1;; Machine description for AArch64 SVE2. 2;; Copyright (C) 2019-2022 Free Software Foundation, Inc. 3;; Contributed by ARM Ltd. 4;; 5;; This file is part of GCC. 6;; 7;; GCC is free software; you can redistribute it and/or modify it 8;; under the terms of the GNU General Public License as published by 9;; the Free Software Foundation; either version 3, or (at your option) 10;; any later version. 11;; 12;; GCC is distributed in the hope that it will be useful, but 13;; WITHOUT ANY WARRANTY; without even the implied warranty of 14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15;; General Public License for more details. 16;; 17;; You should have received a copy of the GNU General Public License 18;; along with GCC; see the file COPYING3. If not see 19;; <http://www.gnu.org/licenses/>. 20 21;; The file is organised into the following sections (search for the full 22;; line): 23;; 24;; == Moves 25;; ---- Non-temporal gather loads 26;; ---- Non-temporal scatter stores 27;; 28;; == Uniform binary arithmnetic 29;; ---- [INT] Multiplication 30;; ---- [INT] Scaled high-part multiplication 31;; ---- [INT] General binary arithmetic that maps to unspecs 32;; ---- [INT] Saturating binary arithmetic 33;; ---- [INT] Saturating left shifts 34;; 35;; == Uniform ternary arithmnetic 36;; ---- [INT] General ternary arithmetic that maps to unspecs 37;; ---- [INT] Multiply-and-accumulate operations 38;; ---- [INT] Binary logic operations with rotation 39;; ---- [INT] Ternary logic operations 40;; ---- [INT] Shift-and-accumulate operations 41;; ---- [INT] Shift-and-insert operations 42;; ---- [INT] Sum of absolute differences 43;; 44;; == Extending arithmetic 45;; ---- [INT] Wide binary arithmetic 46;; ---- [INT] Long binary arithmetic 47;; ---- [INT] Long left shifts 48;; ---- [INT] Long binary arithmetic with accumulation 49;; ---- [FP] Long multiplication with accumulation 50;; 51;; == Narrowing arithnetic 52;; ---- [INT] Narrowing unary arithmetic 53;; ---- [INT] Narrowing binary arithmetic 54;; ---- [INT] Narrowing right shifts 55;; 56;; == Pairwise arithmetic 57;; ---- [INT] Pairwise arithmetic 58;; ---- [FP] Pairwise arithmetic 59;; ---- [INT] Pairwise arithmetic with accumulation 60;; 61;; == Complex arithmetic 62;; ---- [INT] Complex binary operations 63;; ---- [INT] Complex ternary operations 64;; ---- [INT] Complex dot product 65;; 66;; == Conversions 67;; ---- [FP<-FP] Widening conversions 68;; ---- [FP<-FP] Narrowing conversions 69;; 70;; == Other arithmetic 71;; ---- [INT] Reciprocal approximation 72;; ---- [INT<-FP] Base-2 logarithm 73;; ---- [INT] Polynomial multiplication 74;; 75;; == Permutation 76;; ---- [INT,FP] General permutes 77;; ---- [INT] Optional bit-permute extensions 78;; 79;; == General 80;; ---- Check for aliases between pointers 81;; ---- Histogram processing 82;; ---- String matching 83;; 84;; == Crypotographic extensions 85;; ---- Optional AES extensions 86;; ---- Optional SHA-3 extensions 87;; ---- Optional SM4 extensions 88 89;; ========================================================================= 90;; == Moves 91;; ========================================================================= 92 93;; ------------------------------------------------------------------------- 94;; ---- Non-temporal gather loads 95;; ------------------------------------------------------------------------- 96;; Includes gather forms of: 97;; - LDNT1B 98;; - LDNT1D 99;; - LDNT1H 100;; - LDNT1W 101;; ------------------------------------------------------------------------- 102 103;; Non-extending loads. 104(define_insn "@aarch64_gather_ldnt<mode>" 105 [(set (match_operand:SVE_FULL_SD 0 "register_operand" "=w, w") 106 (unspec:SVE_FULL_SD 107 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 108 (match_operand:DI 2 "aarch64_reg_or_zero" "Z, r") 109 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w") 110 (mem:BLK (scratch))] 111 UNSPEC_LDNT1_GATHER))] 112 "TARGET_SVE2" 113 "@ 114 ldnt1<Vesize>\t%0.<Vetype>, %1/z, [%3.<Vetype>] 115 ldnt1<Vesize>\t%0.<Vetype>, %1/z, [%3.<Vetype>, %2]" 116) 117 118;; Extending loads. 119(define_insn_and_rewrite "@aarch64_gather_ldnt_<ANY_EXTEND:optab><SVE_FULL_SDI:mode><SVE_PARTIAL_I:mode>" 120 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, w") 121 (unspec:SVE_FULL_SDI 122 [(match_operand:<SVE_FULL_SDI:VPRED> 4 "general_operand" "UplDnm, UplDnm") 123 (ANY_EXTEND:SVE_FULL_SDI 124 (unspec:SVE_PARTIAL_I 125 [(match_operand:<SVE_FULL_SDI:VPRED> 1 "register_operand" "Upl, Upl") 126 (match_operand:DI 2 "aarch64_reg_or_zero" "Z, r") 127 (match_operand:<SVE_FULL_SDI:V_INT_EQUIV> 3 "register_operand" "w, w") 128 (mem:BLK (scratch))] 129 UNSPEC_LDNT1_GATHER))] 130 UNSPEC_PRED_X))] 131 "TARGET_SVE2 132 && (~<SVE_FULL_SDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0" 133 "@ 134 ldnt1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_SDI:Vetype>, %1/z, [%3.<SVE_FULL_SDI:Vetype>] 135 ldnt1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_SDI:Vetype>, %1/z, [%3.<SVE_FULL_SDI:Vetype>, %2]" 136 "&& !CONSTANT_P (operands[4])" 137 { 138 operands[4] = CONSTM1_RTX (<SVE_FULL_SDI:VPRED>mode); 139 } 140) 141 142;; ------------------------------------------------------------------------- 143;; ---- Non-temporal scatter stores 144;; ------------------------------------------------------------------------- 145;; Includes scatter forms of: 146;; - STNT1B 147;; - STNT1D 148;; - STNT1H 149;; - STNT1W 150;; ------------------------------------------------------------------------- 151 152;; Non-truncating stores. 153(define_insn "@aarch64_scatter_stnt<mode>" 154 [(set (mem:BLK (scratch)) 155 (unspec:BLK 156 [(match_operand:<VPRED> 0 "register_operand" "Upl, Upl") 157 (match_operand:DI 1 "aarch64_reg_or_zero" "Z, r") 158 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w") 159 (match_operand:SVE_FULL_SD 3 "register_operand" "w, w")] 160 161 UNSPEC_STNT1_SCATTER))] 162 "TARGET_SVE" 163 "@ 164 stnt1<Vesize>\t%3.<Vetype>, %0, [%2.<Vetype>] 165 stnt1<Vesize>\t%3.<Vetype>, %0, [%2.<Vetype>, %1]" 166) 167 168;; Truncating stores. 169(define_insn "@aarch64_scatter_stnt_<SVE_FULL_SDI:mode><SVE_PARTIAL_I:mode>" 170 [(set (mem:BLK (scratch)) 171 (unspec:BLK 172 [(match_operand:<SVE_FULL_SDI:VPRED> 0 "register_operand" "Upl, Upl") 173 (match_operand:DI 1 "aarch64_reg_or_zero" "Z, r") 174 (match_operand:<SVE_FULL_SDI:V_INT_EQUIV> 2 "register_operand" "w, w") 175 (truncate:SVE_PARTIAL_I 176 (match_operand:SVE_FULL_SDI 3 "register_operand" "w, w"))] 177 UNSPEC_STNT1_SCATTER))] 178 "TARGET_SVE2 179 && (~<SVE_FULL_SDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0" 180 "@ 181 stnt1<SVE_PARTIAL_I:Vesize>\t%3.<SVE_FULL_SDI:Vetype>, %0, [%2.<SVE_FULL_SDI:Vetype>] 182 stnt1<SVE_PARTIAL_I:Vesize>\t%3.<SVE_FULL_SDI:Vetype>, %0, [%2.<SVE_FULL_SDI:Vetype>, %1]" 183) 184 185;; ========================================================================= 186;; == Uniform binary arithmnetic 187;; ========================================================================= 188 189;; ------------------------------------------------------------------------- 190;; ---- [INT] Multiplication 191;; ------------------------------------------------------------------------- 192;; Includes the lane forms of: 193;; - MUL 194;; ------------------------------------------------------------------------- 195 196(define_insn "@aarch64_mul_lane_<mode>" 197 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w") 198 (mult:SVE_FULL_HSDI 199 (unspec:SVE_FULL_HSDI 200 [(match_operand:SVE_FULL_HSDI 2 "register_operand" "<sve_lane_con>") 201 (match_operand:SI 3 "const_int_operand")] 202 UNSPEC_SVE_LANE_SELECT) 203 (match_operand:SVE_FULL_HSDI 1 "register_operand" "w")))] 204 "TARGET_SVE2" 205 "mul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]" 206) 207 208;; ------------------------------------------------------------------------- 209;; ---- [INT] Scaled high-part multiplication 210;; ------------------------------------------------------------------------- 211;; The patterns in this section are synthetic. 212;; ------------------------------------------------------------------------- 213 214;; Unpredicated integer multiply-high-with-(round-and-)scale. 215(define_expand "<su>mulh<r>s<mode>3" 216 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand") 217 (unspec:SVE_FULL_BHSI 218 [(match_dup 3) 219 (unspec:SVE_FULL_BHSI 220 [(match_operand:SVE_FULL_BHSI 1 "register_operand") 221 (match_operand:SVE_FULL_BHSI 2 "register_operand")] 222 MULHRS)] 223 UNSPEC_PRED_X))] 224 "TARGET_SVE2" 225 { 226 operands[3] = aarch64_ptrue_reg (<VPRED>mode); 227 228 rtx prod_b = gen_reg_rtx (<VWIDE>mode); 229 rtx prod_t = gen_reg_rtx (<VWIDE>mode); 230 emit_insn (gen_aarch64_sve_<su>mullb<Vwide> (prod_b, operands[1], 231 operands[2])); 232 emit_insn (gen_aarch64_sve_<su>mullt<Vwide> (prod_t, operands[1], 233 operands[2])); 234 235 rtx shift = GEN_INT (GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1); 236 emit_insn (gen_aarch64_sve_<r>shrnb<Vwide> (operands[0], prod_b, shift)); 237 emit_insn (gen_aarch64_sve_<r>shrnt<Vwide> (operands[0], operands[0], 238 prod_t, shift)); 239 240 DONE; 241 } 242) 243 244;; ------------------------------------------------------------------------- 245;; ---- [INT] General binary arithmetic that maps to unspecs 246;; ------------------------------------------------------------------------- 247;; Includes: 248;; - SHADD 249;; - SHSUB 250;; - SHSUBR 251;; - SQRSHL 252;; - SQRSHLR 253;; - SRHADD 254;; - SRSHL 255;; - SRSHLR 256;; - SUQADD 257;; - UHADD 258;; - UHSUB 259;; - UHSUBR 260;; - UQRSHL 261;; - UQRSHLR 262;; - URHADD 263;; - URSHL 264;; - URSHLR 265;; - USQADD 266;; ------------------------------------------------------------------------- 267 268;; Integer average (floor). 269(define_expand "<u>avg<mode>3_floor" 270 [(set (match_operand:SVE_FULL_I 0 "register_operand") 271 (unspec:SVE_FULL_I 272 [(match_dup 3) 273 (unspec:SVE_FULL_I 274 [(match_operand:SVE_FULL_I 1 "register_operand") 275 (match_operand:SVE_FULL_I 2 "register_operand")] 276 HADD)] 277 UNSPEC_PRED_X))] 278 "TARGET_SVE2" 279 { 280 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); 281 } 282) 283 284;; Integer average (rounding). 285(define_expand "<u>avg<mode>3_ceil" 286 [(set (match_operand:SVE_FULL_I 0 "register_operand") 287 (unspec:SVE_FULL_I 288 [(match_dup 3) 289 (unspec:SVE_FULL_I 290 [(match_operand:SVE_FULL_I 1 "register_operand") 291 (match_operand:SVE_FULL_I 2 "register_operand")] 292 RHADD)] 293 UNSPEC_PRED_X))] 294 "TARGET_SVE2" 295 { 296 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); 297 } 298) 299 300;; The immediate form of SQADD acts as an immediate form of SUQADD 301;; over its full range. In contrast to the ss_plus pattern, we do 302;; not need to treat byte immediates specially. E.g.: 303;; 304;; SQADD Z0.B, Z0.B, #128 305;; 306;; is equivalent to: 307;; 308;; MOV Z1.B, #128 309;; SUQADD Z0.B, P0/M, Z0.B, Z1.B 310;; 311;; even though it's not equivalent to: 312;; 313;; MOV Z1.B, #128 314;; SQADD Z0.B, P0/M, Z0.B, Z1.B // Saturating subtraction of 128 315(define_insn "@aarch64_sve_suqadd<mode>_const" 316 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 317 (unspec:SVE_FULL_I 318 [(match_operand:SVE_FULL_I 1 "register_operand" "0, w") 319 (match_operand:SVE_FULL_I 2 "aarch64_sve_arith_immediate")] 320 UNSPEC_SUQADD))] 321 "TARGET_SVE2" 322 "@ 323 sqadd\t%0.<Vetype>, %0.<Vetype>, #%D2 324 movprfx\t%0, %1\;sqadd\t%0.<Vetype>, %0.<Vetype>, #%D2" 325 [(set_attr "movprfx" "*,yes")] 326) 327 328;; General predicated binary arithmetic. All operations handled here 329;; are commutative or have a reversed form. 330(define_insn "@aarch64_pred_<sve_int_op><mode>" 331 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w") 332 (unspec:SVE_FULL_I 333 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") 334 (unspec:SVE_FULL_I 335 [(match_operand:SVE_FULL_I 2 "register_operand" "0, w, w") 336 (match_operand:SVE_FULL_I 3 "register_operand" "w, 0, w")] 337 SVE2_COND_INT_BINARY_REV)] 338 UNSPEC_PRED_X))] 339 "TARGET_SVE2" 340 "@ 341 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 342 <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> 343 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 344 [(set_attr "movprfx" "*,*,yes")] 345) 346 347;; Predicated binary arithmetic with merging. 348(define_expand "@cond_<sve_int_op><mode>" 349 [(set (match_operand:SVE_FULL_I 0 "register_operand") 350 (unspec:SVE_FULL_I 351 [(match_operand:<VPRED> 1 "register_operand") 352 (unspec:SVE_FULL_I 353 [(match_dup 5) 354 (unspec:SVE_FULL_I 355 [(match_operand:SVE_FULL_I 2 "register_operand") 356 (match_operand:SVE_FULL_I 3 "register_operand")] 357 SVE2_COND_INT_BINARY)] 358 UNSPEC_PRED_X) 359 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")] 360 UNSPEC_SEL))] 361 "TARGET_SVE2" 362 { 363 operands[5] = CONSTM1_RTX (<MODE>mode); 364 } 365) 366 367;; Predicated binary arithmetic, merging with the first input. 368(define_insn_and_rewrite "*cond_<sve_int_op><mode>_2" 369 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 370 (unspec:SVE_FULL_I 371 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 372 (unspec:SVE_FULL_I 373 [(match_operand 4) 374 (unspec:SVE_FULL_I 375 [(match_operand:SVE_FULL_I 2 "register_operand" "0, w") 376 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")] 377 SVE2_COND_INT_BINARY)] 378 UNSPEC_PRED_X) 379 (match_dup 2)] 380 UNSPEC_SEL))] 381 "TARGET_SVE2" 382 "@ 383 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 384 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 385 "&& !CONSTANT_P (operands[4])" 386 { 387 operands[4] = CONSTM1_RTX (<VPRED>mode); 388 } 389 [(set_attr "movprfx" "*,yes")] 390) 391 392;; Predicated binary arithmetic, merging with the second input. 393(define_insn_and_rewrite "*cond_<sve_int_op><mode>_3" 394 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 395 (unspec:SVE_FULL_I 396 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 397 (unspec:SVE_FULL_I 398 [(match_operand 4) 399 (unspec:SVE_FULL_I 400 [(match_operand:SVE_FULL_I 2 "register_operand" "w, w") 401 (match_operand:SVE_FULL_I 3 "register_operand" "0, w")] 402 SVE2_COND_INT_BINARY_REV)] 403 UNSPEC_PRED_X) 404 (match_dup 3)] 405 UNSPEC_SEL))] 406 "TARGET_SVE2" 407 "@ 408 <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> 409 movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>" 410 "&& !CONSTANT_P (operands[4])" 411 { 412 operands[4] = CONSTM1_RTX (<VPRED>mode); 413 } 414 [(set_attr "movprfx" "*,yes")] 415) 416 417;; Predicated binary operations, merging with an independent value. 418(define_insn_and_rewrite "*cond_<sve_int_op><mode>_any" 419 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, ?&w") 420 (unspec:SVE_FULL_I 421 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") 422 (unspec:SVE_FULL_I 423 [(match_operand 5) 424 (unspec:SVE_FULL_I 425 [(match_operand:SVE_FULL_I 2 "register_operand" "0, w, w, w, w") 426 (match_operand:SVE_FULL_I 3 "register_operand" "w, 0, w, w, w")] 427 SVE2_COND_INT_BINARY_REV)] 428 UNSPEC_PRED_X) 429 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] 430 UNSPEC_SEL))] 431 "TARGET_SVE2 432 && !rtx_equal_p (operands[2], operands[4]) 433 && !rtx_equal_p (operands[3], operands[4])" 434 "@ 435 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 436 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> 437 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 438 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 439 #" 440 "&& 1" 441 { 442 if (reload_completed 443 && register_operand (operands[4], <MODE>mode) 444 && !rtx_equal_p (operands[0], operands[4])) 445 { 446 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], 447 operands[4], operands[1])); 448 operands[4] = operands[2] = operands[0]; 449 } 450 else if (!CONSTANT_P (operands[5])) 451 operands[5] = CONSTM1_RTX (<VPRED>mode); 452 else 453 FAIL; 454 } 455 [(set_attr "movprfx" "yes")] 456) 457 458;; Predicated binary operations with no reverse form, merging with zero. 459;; At present we don't generate these patterns via a cond_* optab, 460;; so there's no correctness requirement to handle merging with an 461;; independent value. 462(define_insn_and_rewrite "*cond_<sve_int_op><mode>_z" 463 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w") 464 (unspec:SVE_FULL_I 465 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 466 (unspec:SVE_FULL_I 467 [(match_operand 5) 468 (unspec:SVE_FULL_I 469 [(match_operand:SVE_FULL_I 2 "register_operand" "0, w") 470 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")] 471 SVE2_COND_INT_BINARY_NOREV)] 472 UNSPEC_PRED_X) 473 (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_zero")] 474 UNSPEC_SEL))] 475 "TARGET_SVE2" 476 "@ 477 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 478 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 479 "&& !CONSTANT_P (operands[5])" 480 { 481 operands[5] = CONSTM1_RTX (<VPRED>mode); 482 } 483 [(set_attr "movprfx" "yes")] 484) 485 486;; ------------------------------------------------------------------------- 487;; ---- [INT] Saturating binary arithmetic 488;; ------------------------------------------------------------------------- 489;; Includes: 490;; - SQDMULH 491;; - SQRDMULH 492;; ------------------------------------------------------------------------- 493 494(define_insn "@aarch64_sve_<sve_int_op><mode>" 495 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w") 496 (unspec:SVE_FULL_I 497 [(match_operand:SVE_FULL_I 1 "register_operand" "w") 498 (match_operand:SVE_FULL_I 2 "register_operand" "w")] 499 SVE2_INT_BINARY))] 500 "TARGET_SVE2" 501 "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" 502) 503 504(define_insn "@aarch64_sve_<sve_int_op>_lane_<mode>" 505 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w") 506 (unspec:SVE_FULL_HSDI 507 [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w") 508 (unspec:SVE_FULL_HSDI 509 [(match_operand:SVE_FULL_HSDI 2 "register_operand" "<sve_lane_con>") 510 (match_operand:SI 3 "const_int_operand")] 511 UNSPEC_SVE_LANE_SELECT)] 512 SVE2_INT_BINARY_LANE))] 513 "TARGET_SVE2" 514 "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]" 515) 516 517;; ------------------------------------------------------------------------- 518;; ---- [INT] Saturating left shifts 519;; ------------------------------------------------------------------------- 520;; Includes: 521;; - SQSHL 522;; - SQSHLR 523;; - UQSHL 524;; - UQSHLR 525;; ------------------------------------------------------------------------- 526 527;; Predicated left shifts. 528(define_insn "@aarch64_pred_<sve_int_op><mode>" 529 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, w, ?&w, ?&w") 530 (unspec:SVE_FULL_I 531 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") 532 (unspec:SVE_FULL_I 533 [(match_operand:SVE_FULL_I 2 "register_operand" "0, 0, w, w, w") 534 (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, 0, D<lr>, w")] 535 SVE2_COND_INT_SHIFT)] 536 UNSPEC_PRED_X))] 537 "TARGET_SVE2" 538 "@ 539 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 540 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 541 <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> 542 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 543 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 544 [(set_attr "movprfx" "*,*,*,yes,yes")] 545) 546 547;; Predicated left shifts with merging. 548(define_expand "@cond_<sve_int_op><mode>" 549 [(set (match_operand:SVE_FULL_I 0 "register_operand") 550 (unspec:SVE_FULL_I 551 [(match_operand:<VPRED> 1 "register_operand") 552 (unspec:SVE_FULL_I 553 [(match_dup 5) 554 (unspec:SVE_FULL_I 555 [(match_operand:SVE_FULL_I 2 "register_operand") 556 (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand")] 557 SVE2_COND_INT_SHIFT)] 558 UNSPEC_PRED_X) 559 (match_operand:SVE_FULL_I 4 "register_operand")] 560 UNSPEC_SEL))] 561 "TARGET_SVE2" 562 { 563 operands[5] = CONSTM1_RTX (<VPRED>mode); 564 } 565) 566 567;; Predicated left shifts, merging with the first input. 568(define_insn_and_rewrite "*cond_<sve_int_op><mode>_2" 569 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w, ?&w") 570 (unspec:SVE_FULL_I 571 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") 572 (unspec:SVE_FULL_I 573 [(match_operand 4) 574 (unspec:SVE_FULL_I 575 [(match_operand:SVE_FULL_I 2 "register_operand" "0, 0, w, w") 576 (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, D<lr>, w")] 577 SVE2_COND_INT_SHIFT)] 578 UNSPEC_PRED_X) 579 (match_dup 2)] 580 UNSPEC_SEL))] 581 "TARGET_SVE2" 582 "@ 583 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 584 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 585 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 586 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 587 "&& !CONSTANT_P (operands[4])" 588 { 589 operands[4] = CONSTM1_RTX (<VPRED>mode); 590 } 591 [(set_attr "movprfx" "*,*,yes,yes")] 592) 593 594;; Predicated left shifts, merging with the second input. 595(define_insn_and_rewrite "*cond_<sve_int_op><mode>_3" 596 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 597 (unspec:SVE_FULL_I 598 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 599 (unspec:SVE_FULL_I 600 [(match_operand 4) 601 (unspec:SVE_FULL_I 602 [(match_operand:SVE_FULL_I 2 "register_operand" "w, w") 603 (match_operand:SVE_FULL_I 3 "register_operand" "0, w")] 604 SVE2_COND_INT_SHIFT)] 605 UNSPEC_PRED_X) 606 (match_dup 3)] 607 UNSPEC_SEL))] 608 "TARGET_SVE2" 609 "@ 610 <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> 611 movprfx\t%0, %3\;<sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>" 612 "&& !CONSTANT_P (operands[4])" 613 { 614 operands[4] = CONSTM1_RTX (<VPRED>mode); 615 } 616 [(set_attr "movprfx" "*,yes")] 617) 618 619;; Predicated left shifts, merging with an independent value. 620(define_insn_and_rewrite "*cond_<sve_int_op><mode>_any" 621 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, &w, &w, &w, ?&w, ?&w") 622 (unspec:SVE_FULL_I 623 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl, Upl, Upl, Upl") 624 (unspec:SVE_FULL_I 625 [(match_operand 5) 626 (unspec:SVE_FULL_I 627 [(match_operand:SVE_FULL_I 2 "register_operand" "0, 0, w, w, w, w, w, w, w") 628 (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, 0, D<lr>, w, D<lr>, w, D<lr>, w")] 629 SVE2_COND_INT_SHIFT)] 630 UNSPEC_PRED_X) 631 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, Dz, 0, 0, w, w")] 632 UNSPEC_SEL))] 633 "TARGET_SVE2 634 && !rtx_equal_p (operands[2], operands[4]) 635 && (CONSTANT_P (operands[4]) || !rtx_equal_p (operands[3], operands[4]))" 636 "@ 637 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 638 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 639 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> 640 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 641 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 642 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 643 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 644 # 645 #" 646 "&& 1" 647 { 648 if (reload_completed 649 && register_operand (operands[4], <MODE>mode) 650 && !rtx_equal_p (operands[0], operands[4])) 651 { 652 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], 653 operands[4], operands[1])); 654 operands[4] = operands[2] = operands[0]; 655 } 656 else if (!CONSTANT_P (operands[5])) 657 operands[5] = CONSTM1_RTX (<VPRED>mode); 658 else 659 FAIL; 660 } 661 [(set_attr "movprfx" "yes")] 662) 663 664;; ========================================================================= 665;; == Uniform ternary arithmnetic 666;; ========================================================================= 667 668;; ------------------------------------------------------------------------- 669;; ---- [INT] General ternary arithmetic that maps to unspecs 670;; ------------------------------------------------------------------------- 671;; Includes: 672;; - ADCLB 673;; - ADCLT 674;; - EORBT 675;; - EORTB 676;; - SBCLB 677;; - SBCLT 678;; - SQRDMLAH 679;; - SQRDMLSH 680;; ------------------------------------------------------------------------- 681 682(define_insn "@aarch64_sve_<sve_int_op><mode>" 683 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 684 (unspec:SVE_FULL_I 685 [(match_operand:SVE_FULL_I 2 "register_operand" "w, w") 686 (match_operand:SVE_FULL_I 3 "register_operand" "w, w") 687 (match_operand:SVE_FULL_I 1 "register_operand" "0, w")] 688 SVE2_INT_TERNARY))] 689 "TARGET_SVE2" 690 "@ 691 <sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype> 692 movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>" 693 [(set_attr "movprfx" "*,yes")] 694) 695 696(define_insn "@aarch64_sve_<sve_int_op>_lane_<mode>" 697 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w") 698 (unspec:SVE_FULL_HSDI 699 [(match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w") 700 (unspec:SVE_FULL_HSDI 701 [(match_operand:SVE_FULL_HSDI 3 "register_operand" "<sve_lane_con>, <sve_lane_con>") 702 (match_operand:SI 4 "const_int_operand")] 703 UNSPEC_SVE_LANE_SELECT) 704 (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")] 705 SVE2_INT_TERNARY_LANE))] 706 "TARGET_SVE2" 707 "@ 708 <sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4] 709 movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]" 710 [(set_attr "movprfx" "*,yes")] 711) 712 713;; ------------------------------------------------------------------------- 714;; ---- [INT] Multiply-and-accumulate operations 715;; ------------------------------------------------------------------------- 716;; Includes the lane forms of: 717;; - MLA 718;; - MLS 719;; ------------------------------------------------------------------------- 720 721(define_insn "@aarch64_sve_add_mul_lane_<mode>" 722 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w") 723 (plus:SVE_FULL_HSDI 724 (mult:SVE_FULL_HSDI 725 (unspec:SVE_FULL_HSDI 726 [(match_operand:SVE_FULL_HSDI 3 "register_operand" "<sve_lane_con>, <sve_lane_con>") 727 (match_operand:SI 4 "const_int_operand")] 728 UNSPEC_SVE_LANE_SELECT) 729 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w")) 730 (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")))] 731 "TARGET_SVE2" 732 "@ 733 mla\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4] 734 movprfx\t%0, %1\;mla\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]" 735 [(set_attr "movprfx" "*,yes")] 736) 737 738(define_insn "@aarch64_sve_sub_mul_lane_<mode>" 739 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w") 740 (minus:SVE_FULL_HSDI 741 (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w") 742 (mult:SVE_FULL_HSDI 743 (unspec:SVE_FULL_HSDI 744 [(match_operand:SVE_FULL_HSDI 3 "register_operand" "<sve_lane_con>, <sve_lane_con>") 745 (match_operand:SI 4 "const_int_operand")] 746 UNSPEC_SVE_LANE_SELECT) 747 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w"))))] 748 "TARGET_SVE2" 749 "@ 750 mls\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4] 751 movprfx\t%0, %1\;mls\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]" 752 [(set_attr "movprfx" "*,yes")] 753) 754 755;; ------------------------------------------------------------------------- 756;; ---- [INT] Binary logic operations with rotation 757;; ------------------------------------------------------------------------- 758;; Includes: 759;; - XAR 760;; ------------------------------------------------------------------------- 761 762(define_insn "@aarch64_sve2_xar<mode>" 763 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 764 (rotatert:SVE_FULL_I 765 (xor:SVE_FULL_I 766 (match_operand:SVE_FULL_I 1 "register_operand" "%0, w") 767 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")) 768 (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm")))] 769 "TARGET_SVE2" 770 "@ 771 xar\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3 772 movprfx\t%0, %1\;xar\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3" 773 [(set_attr "movprfx" "*,yes")] 774) 775 776;; ------------------------------------------------------------------------- 777;; ---- [INT] Ternary logic operations 778;; ------------------------------------------------------------------------- 779;; Includes: 780;; - BCAX 781;; - BSL 782;; - BSL1N 783;; - BSL2N 784;; - EOR3 785;; - NBSL 786;; ------------------------------------------------------------------------- 787 788;; Unpredicated exclusive OR of AND. 789(define_expand "@aarch64_sve2_bcax<mode>" 790 [(set (match_operand:SVE_FULL_I 0 "register_operand") 791 (xor:SVE_FULL_I 792 (and:SVE_FULL_I 793 (unspec:SVE_FULL_I 794 [(match_dup 4) 795 (not:SVE_FULL_I 796 (match_operand:SVE_FULL_I 3 "register_operand"))] 797 UNSPEC_PRED_X) 798 (match_operand:SVE_FULL_I 2 "register_operand")) 799 (match_operand:SVE_FULL_I 1 "register_operand")))] 800 "TARGET_SVE2" 801 { 802 operands[4] = CONSTM1_RTX (<VPRED>mode); 803 } 804) 805 806(define_insn_and_rewrite "*aarch64_sve2_bcax<mode>" 807 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 808 (xor:SVE_FULL_I 809 (and:SVE_FULL_I 810 (unspec:SVE_FULL_I 811 [(match_operand 4) 812 (not:SVE_FULL_I 813 (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))] 814 UNSPEC_PRED_X) 815 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")) 816 (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))] 817 "TARGET_SVE2" 818 "@ 819 bcax\t%0.d, %0.d, %2.d, %3.d 820 movprfx\t%0, %1\;bcax\t%0.d, %0.d, %2.d, %3.d" 821 "&& !CONSTANT_P (operands[4])" 822 { 823 operands[4] = CONSTM1_RTX (<VPRED>mode); 824 } 825 [(set_attr "movprfx" "*,yes")] 826) 827 828;; Unpredicated 3-way exclusive OR. 829(define_insn "@aarch64_sve2_eor3<mode>" 830 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, w, ?&w") 831 (xor:SVE_FULL_I 832 (xor:SVE_FULL_I 833 (match_operand:SVE_FULL_I 1 "register_operand" "0, w, w, w") 834 (match_operand:SVE_FULL_I 2 "register_operand" "w, 0, w, w")) 835 (match_operand:SVE_FULL_I 3 "register_operand" "w, w, 0, w")))] 836 "TARGET_SVE2" 837 "@ 838 eor3\t%0.d, %0.d, %2.d, %3.d 839 eor3\t%0.d, %0.d, %1.d, %3.d 840 eor3\t%0.d, %0.d, %1.d, %2.d 841 movprfx\t%0, %1\;eor3\t%0.d, %0.d, %2.d, %3.d" 842 [(set_attr "movprfx" "*,*,*,yes")] 843) 844 845;; Use NBSL for vector NOR. 846(define_insn_and_rewrite "*aarch64_sve2_nor<mode>" 847 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 848 (unspec:SVE_FULL_I 849 [(match_operand 3) 850 (and:SVE_FULL_I 851 (not:SVE_FULL_I 852 (match_operand:SVE_FULL_I 1 "register_operand" "%0, w")) 853 (not:SVE_FULL_I 854 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")))] 855 UNSPEC_PRED_X))] 856 "TARGET_SVE2" 857 "@ 858 nbsl\t%0.d, %0.d, %2.d, %0.d 859 movprfx\t%0, %1\;nbsl\t%0.d, %0.d, %2.d, %0.d" 860 "&& !CONSTANT_P (operands[3])" 861 { 862 operands[3] = CONSTM1_RTX (<VPRED>mode); 863 } 864 [(set_attr "movprfx" "*,yes")] 865) 866 867;; Use NBSL for vector NAND. 868(define_insn_and_rewrite "*aarch64_sve2_nand<mode>" 869 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 870 (unspec:SVE_FULL_I 871 [(match_operand 3) 872 (ior:SVE_FULL_I 873 (not:SVE_FULL_I 874 (match_operand:SVE_FULL_I 1 "register_operand" "%0, w")) 875 (not:SVE_FULL_I 876 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")))] 877 UNSPEC_PRED_X))] 878 "TARGET_SVE2" 879 "@ 880 nbsl\t%0.d, %0.d, %2.d, %2.d 881 movprfx\t%0, %1\;nbsl\t%0.d, %0.d, %2.d, %2.d" 882 "&& !CONSTANT_P (operands[3])" 883 { 884 operands[3] = CONSTM1_RTX (<VPRED>mode); 885 } 886 [(set_attr "movprfx" "*,yes")] 887) 888 889;; Unpredicated bitwise select. 890;; (op3 ? bsl_mov : bsl_dup) == (((bsl_mov ^ bsl_dup) & op3) ^ bsl_dup) 891(define_expand "@aarch64_sve2_bsl<mode>" 892 [(set (match_operand:SVE_FULL_I 0 "register_operand") 893 (xor:SVE_FULL_I 894 (and:SVE_FULL_I 895 (xor:SVE_FULL_I 896 (match_operand:SVE_FULL_I 1 "register_operand") 897 (match_operand:SVE_FULL_I 2 "register_operand")) 898 (match_operand:SVE_FULL_I 3 "register_operand")) 899 (match_dup 2)))] 900 "TARGET_SVE2" 901) 902 903(define_insn "*aarch64_sve2_bsl<mode>" 904 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 905 (xor:SVE_FULL_I 906 (and:SVE_FULL_I 907 (xor:SVE_FULL_I 908 (match_operand:SVE_FULL_I 1 "register_operand" "<bsl_1st>, w") 909 (match_operand:SVE_FULL_I 2 "register_operand" "<bsl_2nd>, w")) 910 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")) 911 (match_dup BSL_DUP)))] 912 "TARGET_SVE2" 913 "@ 914 bsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d 915 movprfx\t%0, %<bsl_mov>\;bsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d" 916 [(set_attr "movprfx" "*,yes")] 917) 918 919;; Unpredicated bitwise inverted select. 920;; (~(op3 ? bsl_mov : bsl_dup)) == (~(((bsl_mov ^ bsl_dup) & op3) ^ bsl_dup)) 921(define_expand "@aarch64_sve2_nbsl<mode>" 922 [(set (match_operand:SVE_FULL_I 0 "register_operand") 923 (unspec:SVE_FULL_I 924 [(match_dup 4) 925 (not:SVE_FULL_I 926 (xor:SVE_FULL_I 927 (and:SVE_FULL_I 928 (xor:SVE_FULL_I 929 (match_operand:SVE_FULL_I 1 "register_operand") 930 (match_operand:SVE_FULL_I 2 "register_operand")) 931 (match_operand:SVE_FULL_I 3 "register_operand")) 932 (match_dup 2)))] 933 UNSPEC_PRED_X))] 934 "TARGET_SVE2" 935 { 936 operands[4] = CONSTM1_RTX (<VPRED>mode); 937 } 938) 939 940(define_insn_and_rewrite "*aarch64_sve2_nbsl<mode>" 941 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 942 (unspec:SVE_FULL_I 943 [(match_operand 4) 944 (not:SVE_FULL_I 945 (xor:SVE_FULL_I 946 (and:SVE_FULL_I 947 (xor:SVE_FULL_I 948 (match_operand:SVE_FULL_I 1 "register_operand" "<bsl_1st>, w") 949 (match_operand:SVE_FULL_I 2 "register_operand" "<bsl_2nd>, w")) 950 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")) 951 (match_dup BSL_DUP)))] 952 UNSPEC_PRED_X))] 953 "TARGET_SVE2" 954 "@ 955 nbsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d 956 movprfx\t%0, %<bsl_mov>\;nbsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d" 957 "&& !CONSTANT_P (operands[4])" 958 { 959 operands[4] = CONSTM1_RTX (<VPRED>mode); 960 } 961 [(set_attr "movprfx" "*,yes")] 962) 963 964;; Unpredicated bitwise select with inverted first operand. 965;; (op3 ? ~bsl_mov : bsl_dup) == ((~(bsl_mov ^ bsl_dup) & op3) ^ bsl_dup) 966(define_expand "@aarch64_sve2_bsl1n<mode>" 967 [(set (match_operand:SVE_FULL_I 0 "register_operand") 968 (xor:SVE_FULL_I 969 (and:SVE_FULL_I 970 (unspec:SVE_FULL_I 971 [(match_dup 4) 972 (not:SVE_FULL_I 973 (xor:SVE_FULL_I 974 (match_operand:SVE_FULL_I 1 "register_operand") 975 (match_operand:SVE_FULL_I 2 "register_operand")))] 976 UNSPEC_PRED_X) 977 (match_operand:SVE_FULL_I 3 "register_operand")) 978 (match_dup 2)))] 979 "TARGET_SVE2" 980 { 981 operands[4] = CONSTM1_RTX (<VPRED>mode); 982 } 983) 984 985(define_insn_and_rewrite "*aarch64_sve2_bsl1n<mode>" 986 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 987 (xor:SVE_FULL_I 988 (and:SVE_FULL_I 989 (unspec:SVE_FULL_I 990 [(match_operand 4) 991 (not:SVE_FULL_I 992 (xor:SVE_FULL_I 993 (match_operand:SVE_FULL_I 1 "register_operand" "<bsl_1st>, w") 994 (match_operand:SVE_FULL_I 2 "register_operand" "<bsl_2nd>, w")))] 995 UNSPEC_PRED_X) 996 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")) 997 (match_dup BSL_DUP)))] 998 "TARGET_SVE2" 999 "@ 1000 bsl1n\t%0.d, %0.d, %<bsl_dup>.d, %3.d 1001 movprfx\t%0, %<bsl_mov>\;bsl1n\t%0.d, %0.d, %<bsl_dup>.d, %3.d" 1002 "&& !CONSTANT_P (operands[4])" 1003 { 1004 operands[4] = CONSTM1_RTX (<VPRED>mode); 1005 } 1006 [(set_attr "movprfx" "*,yes")] 1007) 1008 1009;; Unpredicated bitwise select with inverted second operand. 1010;; (bsl_dup ? bsl_mov : ~op3) == ((bsl_dup & bsl_mov) | (~op3 & ~bsl_dup)) 1011(define_expand "@aarch64_sve2_bsl2n<mode>" 1012 [(set (match_operand:SVE_FULL_I 0 "register_operand") 1013 (ior:SVE_FULL_I 1014 (and:SVE_FULL_I 1015 (match_operand:SVE_FULL_I 1 "register_operand") 1016 (match_operand:SVE_FULL_I 3 "register_operand")) 1017 (unspec:SVE_FULL_I 1018 [(match_dup 4) 1019 (and:SVE_FULL_I 1020 (not:SVE_FULL_I 1021 (match_operand:SVE_FULL_I 2 "register_operand")) 1022 (not:SVE_FULL_I 1023 (match_dup 3)))] 1024 UNSPEC_PRED_X)))] 1025 "TARGET_SVE2" 1026 { 1027 operands[4] = CONSTM1_RTX (<VPRED>mode); 1028 } 1029) 1030 1031(define_insn_and_rewrite "*aarch64_sve2_bsl2n<mode>" 1032 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 1033 (ior:SVE_FULL_I 1034 (and:SVE_FULL_I 1035 (match_operand:SVE_FULL_I 1 "register_operand" "<bsl_1st>, w") 1036 (match_operand:SVE_FULL_I 2 "register_operand" "<bsl_2nd>, w")) 1037 (unspec:SVE_FULL_I 1038 [(match_operand 4) 1039 (and:SVE_FULL_I 1040 (not:SVE_FULL_I 1041 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")) 1042 (not:SVE_FULL_I 1043 (match_dup BSL_DUP)))] 1044 UNSPEC_PRED_X)))] 1045 "TARGET_SVE2" 1046 "@ 1047 bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d 1048 movprfx\t%0, %<bsl_mov>\;bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d" 1049 "&& !CONSTANT_P (operands[4])" 1050 { 1051 operands[4] = CONSTM1_RTX (<VPRED>mode); 1052 } 1053 [(set_attr "movprfx" "*,yes")] 1054) 1055 1056;; Unpredicated bitwise select with inverted second operand, alternative form. 1057;; (bsl_dup ? bsl_mov : ~op3) == ((bsl_dup & bsl_mov) | (~bsl_dup & ~op3)) 1058(define_insn_and_rewrite "*aarch64_sve2_bsl2n<mode>" 1059 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 1060 (ior:SVE_FULL_I 1061 (and:SVE_FULL_I 1062 (match_operand:SVE_FULL_I 1 "register_operand" "<bsl_1st>, w") 1063 (match_operand:SVE_FULL_I 2 "register_operand" "<bsl_2nd>, w")) 1064 (unspec:SVE_FULL_I 1065 [(match_operand 4) 1066 (and:SVE_FULL_I 1067 (not:SVE_FULL_I 1068 (match_dup BSL_DUP)) 1069 (not:SVE_FULL_I 1070 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")))] 1071 UNSPEC_PRED_X)))] 1072 "TARGET_SVE2" 1073 "@ 1074 bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d 1075 movprfx\t%0, %<bsl_mov>\;bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d" 1076 "&& !CONSTANT_P (operands[4])" 1077 { 1078 operands[4] = CONSTM1_RTX (<VPRED>mode); 1079 } 1080 [(set_attr "movprfx" "*,yes")] 1081) 1082 1083;; ------------------------------------------------------------------------- 1084;; ---- [INT] Shift-and-accumulate operations 1085;; ------------------------------------------------------------------------- 1086;; Includes: 1087;; - SRSRA 1088;; - SSRA 1089;; - URSRA 1090;; - USRA 1091;; ------------------------------------------------------------------------- 1092 1093;; Provide the natural unpredicated interface for SSRA and USRA. 1094(define_expand "@aarch64_sve_add_<sve_int_op><mode>" 1095 [(set (match_operand:SVE_FULL_I 0 "register_operand") 1096 (plus:SVE_FULL_I 1097 (unspec:SVE_FULL_I 1098 [(match_dup 4) 1099 (SHIFTRT:SVE_FULL_I 1100 (match_operand:SVE_FULL_I 2 "register_operand") 1101 (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))] 1102 UNSPEC_PRED_X) 1103 (match_operand:SVE_FULL_I 1 "register_operand")))] 1104 "TARGET_SVE2" 1105 { 1106 operands[4] = CONSTM1_RTX (<VPRED>mode); 1107 } 1108) 1109 1110;; Pattern-match SSRA and USRA as a predicated operation whose predicate 1111;; isn't needed. 1112(define_insn_and_rewrite "*aarch64_sve2_sra<mode>" 1113 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 1114 (plus:SVE_FULL_I 1115 (unspec:SVE_FULL_I 1116 [(match_operand 4) 1117 (SHIFTRT:SVE_FULL_I 1118 (match_operand:SVE_FULL_I 2 "register_operand" "w, w") 1119 (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))] 1120 UNSPEC_PRED_X) 1121 (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))] 1122 "TARGET_SVE2" 1123 "@ 1124 <sra_op>sra\t%0.<Vetype>, %2.<Vetype>, #%3 1125 movprfx\t%0, %1\;<sra_op>sra\t%0.<Vetype>, %2.<Vetype>, #%3" 1126 "&& !CONSTANT_P (operands[4])" 1127 { 1128 operands[4] = CONSTM1_RTX (<VPRED>mode); 1129 } 1130 [(set_attr "movprfx" "*,yes")] 1131) 1132 1133;; SRSRA and URSRA. 1134(define_insn "@aarch64_sve_add_<sve_int_op><mode>" 1135 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 1136 (plus:SVE_FULL_I 1137 (unspec:SVE_FULL_I 1138 [(match_operand:SVE_FULL_I 2 "register_operand" "w, w") 1139 (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm")] 1140 VRSHR_N) 1141 (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))] 1142 "TARGET_SVE2" 1143 "@ 1144 <sur>sra\t%0.<Vetype>, %2.<Vetype>, #%3 1145 movprfx\t%0, %1\;<sur>sra\t%0.<Vetype>, %2.<Vetype>, #%3" 1146 [(set_attr "movprfx" "*,yes")] 1147) 1148 1149;; ------------------------------------------------------------------------- 1150;; ---- [INT] Shift-and-insert operations 1151;; ------------------------------------------------------------------------- 1152;; Includes: 1153;; - SLI 1154;; - SRI 1155;; ------------------------------------------------------------------------- 1156 1157;; These instructions do not take MOVPRFX. 1158(define_insn "@aarch64_sve_<sve_int_op><mode>" 1159 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w") 1160 (unspec:SVE_FULL_I 1161 [(match_operand:SVE_FULL_I 1 "register_operand" "0") 1162 (match_operand:SVE_FULL_I 2 "register_operand" "w") 1163 (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm")] 1164 SVE2_INT_SHIFT_INSERT))] 1165 "TARGET_SVE2" 1166 "<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, #%3" 1167) 1168 1169;; ------------------------------------------------------------------------- 1170;; ---- [INT] Sum of absolute differences 1171;; ------------------------------------------------------------------------- 1172;; Includes: 1173;; - SABA 1174;; - UABA 1175;; ------------------------------------------------------------------------- 1176 1177;; Provide the natural unpredicated interface for SABA and UABA. 1178(define_expand "@aarch64_sve2_<su>aba<mode>" 1179 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 1180 (plus:SVE_FULL_I 1181 (minus:SVE_FULL_I 1182 (unspec:SVE_FULL_I 1183 [(match_dup 4) 1184 (USMAX:SVE_FULL_I 1185 (match_operand:SVE_FULL_I 2 "register_operand" "w, w") 1186 (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))] 1187 UNSPEC_PRED_X) 1188 (unspec:SVE_FULL_I 1189 [(match_dup 4) 1190 (<max_opp>:SVE_FULL_I 1191 (match_dup 2) 1192 (match_dup 3))] 1193 UNSPEC_PRED_X)) 1194 (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))] 1195 "TARGET_SVE2" 1196 { 1197 operands[4] = CONSTM1_RTX (<VPRED>mode); 1198 } 1199) 1200 1201;; Pattern-match SABA and UABA as an absolute-difference-and-accumulate 1202;; operation whose predicates aren't needed. 1203(define_insn "*aarch64_sve2_<su>aba<mode>" 1204 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 1205 (plus:SVE_FULL_I 1206 (minus:SVE_FULL_I 1207 (unspec:SVE_FULL_I 1208 [(match_operand 4) 1209 (USMAX:SVE_FULL_I 1210 (match_operand:SVE_FULL_I 2 "register_operand" "w, w") 1211 (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))] 1212 UNSPEC_PRED_X) 1213 (unspec:SVE_FULL_I 1214 [(match_operand 5) 1215 (<max_opp>:SVE_FULL_I 1216 (match_dup 2) 1217 (match_dup 3))] 1218 UNSPEC_PRED_X)) 1219 (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))] 1220 "TARGET_SVE2" 1221 "@ 1222 <su>aba\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype> 1223 movprfx\t%0, %1\;<su>aba\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>" 1224 [(set_attr "movprfx" "*,yes")] 1225) 1226 1227;; ========================================================================= 1228;; == Extending arithmetic 1229;; ========================================================================= 1230 1231;; ------------------------------------------------------------------------- 1232;; ---- [INT] Wide binary arithmetic 1233;; ------------------------------------------------------------------------- 1234;; Includes: 1235;; - SADDWB 1236;; - SADDWT 1237;; - SSUBWB 1238;; - SSUBWT 1239;; - UADDWB 1240;; - UADDWT 1241;; - USUBWB 1242;; - USUBWT 1243;; ------------------------------------------------------------------------- 1244 1245(define_insn "@aarch64_sve_<sve_int_op><mode>" 1246 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w") 1247 (unspec:SVE_FULL_HSDI 1248 [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w") 1249 (match_operand:<VNARROW> 2 "register_operand" "w")] 1250 SVE2_INT_BINARY_WIDE))] 1251 "TARGET_SVE2" 1252 "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Ventype>" 1253) 1254 1255;; ------------------------------------------------------------------------- 1256;; ---- [INT] Long binary arithmetic 1257;; ------------------------------------------------------------------------- 1258;; Includes: 1259;; - SABDLB 1260;; - SABDLT 1261;; - SADDLB 1262;; - SADDLBT 1263;; - SADDLT 1264;; - SMULLB 1265;; - SMULLT 1266;; - SQDMULLB 1267;; - SQDMULLT 1268;; - SSUBLB 1269;; - SSUBLBT 1270;; - SSUBLT 1271;; - SSUBLTB 1272;; - UABDLB 1273;; - UABDLT 1274;; - UADDLB 1275;; - UADDLT 1276;; - UMULLB 1277;; - UMULLT 1278;; - USUBLB 1279;; - USUBLT 1280;; ------------------------------------------------------------------------- 1281 1282(define_insn "@aarch64_sve_<sve_int_op><mode>" 1283 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w") 1284 (unspec:SVE_FULL_HSDI 1285 [(match_operand:<VNARROW> 1 "register_operand" "w") 1286 (match_operand:<VNARROW> 2 "register_operand" "w")] 1287 SVE2_INT_BINARY_LONG))] 1288 "TARGET_SVE2" 1289 "<sve_int_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>" 1290) 1291 1292(define_insn "@aarch64_sve_<sve_int_op>_lane_<mode>" 1293 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w") 1294 (unspec:SVE_FULL_SDI 1295 [(match_operand:<VNARROW> 1 "register_operand" "w") 1296 (unspec:<VNARROW> 1297 [(match_operand:<VNARROW> 2 "register_operand" "<sve_lane_con>") 1298 (match_operand:SI 3 "const_int_operand")] 1299 UNSPEC_SVE_LANE_SELECT)] 1300 SVE2_INT_BINARY_LONG_LANE))] 1301 "TARGET_SVE2" 1302 "<sve_int_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>[%3]" 1303) 1304 1305;; ------------------------------------------------------------------------- 1306;; ---- [INT] Long left shifts 1307;; ------------------------------------------------------------------------- 1308;; Includes: 1309;; - SSHLLB 1310;; - SSHLLT 1311;; - USHLLB 1312;; - USHLLT 1313;; ------------------------------------------------------------------------- 1314 1315;; The immediate range is enforced before generating the instruction. 1316(define_insn "@aarch64_sve_<sve_int_op><mode>" 1317 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w") 1318 (unspec:SVE_FULL_HSDI 1319 [(match_operand:<VNARROW> 1 "register_operand" "w") 1320 (match_operand:DI 2 "const_int_operand")] 1321 SVE2_INT_SHIFT_IMM_LONG))] 1322 "TARGET_SVE2" 1323 "<sve_int_op>\t%0.<Vetype>, %1.<Ventype>, #%2" 1324) 1325 1326;; ------------------------------------------------------------------------- 1327;; ---- [INT] Long binary arithmetic with accumulation 1328;; ------------------------------------------------------------------------- 1329;; Includes: 1330;; - SABALB 1331;; - SABALT 1332;; - SMLALB 1333;; - SMLALT 1334;; - SMLSLB 1335;; - SMLSLT 1336;; - SQDMLALB 1337;; - SQDMLALBT 1338;; - SQDMLALT 1339;; - SQDMLSLB 1340;; - SQDMLSLBT 1341;; - SQDMLSLT 1342;; - UABALB 1343;; - UABALT 1344;; - UMLALB 1345;; - UMLALT 1346;; - UMLSLB 1347;; - UMLSLT 1348;; ------------------------------------------------------------------------- 1349 1350;; Non-saturating MLA operations. 1351(define_insn "@aarch64_sve_add_<sve_int_op><mode>" 1352 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w") 1353 (plus:SVE_FULL_HSDI 1354 (unspec:SVE_FULL_HSDI 1355 [(match_operand:<VNARROW> 2 "register_operand" "w, w") 1356 (match_operand:<VNARROW> 3 "register_operand" "w, w")] 1357 SVE2_INT_ADD_BINARY_LONG) 1358 (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")))] 1359 "TARGET_SVE2" 1360 "@ 1361 <sve_int_add_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype> 1362 movprfx\t%0, %1\;<sve_int_add_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>" 1363 [(set_attr "movprfx" "*,yes")] 1364) 1365 1366;; Non-saturating MLA operations with lane select. 1367(define_insn "@aarch64_sve_add_<sve_int_op>_lane_<mode>" 1368 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w") 1369 (plus:SVE_FULL_SDI 1370 (unspec:SVE_FULL_SDI 1371 [(match_operand:<VNARROW> 2 "register_operand" "w, w") 1372 (unspec:<VNARROW> 1373 [(match_operand:<VNARROW> 3 "register_operand" "<sve_lane_con>, <sve_lane_con>") 1374 (match_operand:SI 4 "const_int_operand")] 1375 UNSPEC_SVE_LANE_SELECT)] 1376 SVE2_INT_ADD_BINARY_LONG_LANE) 1377 (match_operand:SVE_FULL_SDI 1 "register_operand" "0, w")))] 1378 "TARGET_SVE2" 1379 "@ 1380 <sve_int_add_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4] 1381 movprfx\t%0, %1\;<sve_int_add_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]" 1382 [(set_attr "movprfx" "*,yes")] 1383) 1384 1385;; Saturating MLA operations. 1386(define_insn "@aarch64_sve_qadd_<sve_int_op><mode>" 1387 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w") 1388 (ss_plus:SVE_FULL_HSDI 1389 (unspec:SVE_FULL_HSDI 1390 [(match_operand:<VNARROW> 2 "register_operand" "w, w") 1391 (match_operand:<VNARROW> 3 "register_operand" "w, w")] 1392 SVE2_INT_QADD_BINARY_LONG) 1393 (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")))] 1394 "TARGET_SVE2" 1395 "@ 1396 <sve_int_qadd_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype> 1397 movprfx\t%0, %1\;<sve_int_qadd_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>" 1398 [(set_attr "movprfx" "*,yes")] 1399) 1400 1401;; Saturating MLA operations with lane select. 1402(define_insn "@aarch64_sve_qadd_<sve_int_op>_lane_<mode>" 1403 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w") 1404 (ss_plus:SVE_FULL_SDI 1405 (unspec:SVE_FULL_SDI 1406 [(match_operand:<VNARROW> 2 "register_operand" "w, w") 1407 (unspec:<VNARROW> 1408 [(match_operand:<VNARROW> 3 "register_operand" "<sve_lane_con>, <sve_lane_con>") 1409 (match_operand:SI 4 "const_int_operand")] 1410 UNSPEC_SVE_LANE_SELECT)] 1411 SVE2_INT_QADD_BINARY_LONG_LANE) 1412 (match_operand:SVE_FULL_SDI 1 "register_operand" "0, w")))] 1413 "TARGET_SVE2" 1414 "@ 1415 <sve_int_qadd_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4] 1416 movprfx\t%0, %1\;<sve_int_qadd_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]" 1417 [(set_attr "movprfx" "*,yes")] 1418) 1419 1420;; Non-saturating MLS operations. 1421(define_insn "@aarch64_sve_sub_<sve_int_op><mode>" 1422 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w") 1423 (minus:SVE_FULL_HSDI 1424 (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w") 1425 (unspec:SVE_FULL_HSDI 1426 [(match_operand:<VNARROW> 2 "register_operand" "w, w") 1427 (match_operand:<VNARROW> 3 "register_operand" "w, w")] 1428 SVE2_INT_SUB_BINARY_LONG)))] 1429 "TARGET_SVE2" 1430 "@ 1431 <sve_int_sub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype> 1432 movprfx\t%0, %1\;<sve_int_sub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>" 1433 [(set_attr "movprfx" "*,yes")] 1434) 1435 1436;; Non-saturating MLS operations with lane select. 1437(define_insn "@aarch64_sve_sub_<sve_int_op>_lane_<mode>" 1438 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w") 1439 (minus:SVE_FULL_SDI 1440 (match_operand:SVE_FULL_SDI 1 "register_operand" "0, w") 1441 (unspec:SVE_FULL_SDI 1442 [(match_operand:<VNARROW> 2 "register_operand" "w, w") 1443 (unspec:<VNARROW> 1444 [(match_operand:<VNARROW> 3 "register_operand" "<sve_lane_con>, <sve_lane_con>") 1445 (match_operand:SI 4 "const_int_operand")] 1446 UNSPEC_SVE_LANE_SELECT)] 1447 SVE2_INT_SUB_BINARY_LONG_LANE)))] 1448 "TARGET_SVE2" 1449 "@ 1450 <sve_int_sub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4] 1451 movprfx\t%0, %1\;<sve_int_sub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]" 1452 [(set_attr "movprfx" "*,yes")] 1453) 1454 1455;; Saturating MLS operations. 1456(define_insn "@aarch64_sve_qsub_<sve_int_op><mode>" 1457 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w") 1458 (ss_minus:SVE_FULL_HSDI 1459 (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w") 1460 (unspec:SVE_FULL_HSDI 1461 [(match_operand:<VNARROW> 2 "register_operand" "w, w") 1462 (match_operand:<VNARROW> 3 "register_operand" "w, w")] 1463 SVE2_INT_QSUB_BINARY_LONG)))] 1464 "TARGET_SVE2" 1465 "@ 1466 <sve_int_qsub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype> 1467 movprfx\t%0, %1\;<sve_int_qsub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>" 1468 [(set_attr "movprfx" "*,yes")] 1469) 1470 1471;; Saturating MLS operations with lane select. 1472(define_insn "@aarch64_sve_qsub_<sve_int_op>_lane_<mode>" 1473 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w") 1474 (ss_minus:SVE_FULL_SDI 1475 (match_operand:SVE_FULL_SDI 1 "register_operand" "0, w") 1476 (unspec:SVE_FULL_SDI 1477 [(match_operand:<VNARROW> 2 "register_operand" "w, w") 1478 (unspec:<VNARROW> 1479 [(match_operand:<VNARROW> 3 "register_operand" "<sve_lane_con>, <sve_lane_con>") 1480 (match_operand:SI 4 "const_int_operand")] 1481 UNSPEC_SVE_LANE_SELECT)] 1482 SVE2_INT_QSUB_BINARY_LONG_LANE)))] 1483 "TARGET_SVE2" 1484 "@ 1485 <sve_int_qsub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4] 1486 movprfx\t%0, %1\;<sve_int_qsub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]" 1487 [(set_attr "movprfx" "*,yes")] 1488) 1489;; ------------------------------------------------------------------------- 1490;; ---- [FP] Long multiplication with accumulation 1491;; ------------------------------------------------------------------------- 1492;; Includes: 1493;; - FMLALB 1494;; - FMLALT 1495;; - FMLSLB 1496;; - FMLSLT 1497;; ------------------------------------------------------------------------- 1498 1499(define_insn "@aarch64_sve_<sve_fp_op><mode>" 1500 [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=w, ?&w") 1501 (unspec:VNx4SF_ONLY 1502 [(match_operand:<VNARROW> 1 "register_operand" "w, w") 1503 (match_operand:<VNARROW> 2 "register_operand" "w, w") 1504 (match_operand:VNx4SF_ONLY 3 "register_operand" "0, w")] 1505 SVE2_FP_TERNARY_LONG))] 1506 "TARGET_SVE2" 1507 "@ 1508 <sve_fp_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype> 1509 movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>" 1510 [(set_attr "movprfx" "*,yes")] 1511) 1512 1513(define_insn "@aarch64_<sve_fp_op>_lane_<mode>" 1514 [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=w, ?&w") 1515 (unspec:VNx4SF_ONLY 1516 [(match_operand:<VNARROW> 1 "register_operand" "w, w") 1517 (unspec:<VNARROW> 1518 [(match_operand:<VNARROW> 2 "register_operand" "<sve_lane_con>, <sve_lane_con>") 1519 (match_operand:SI 3 "const_int_operand")] 1520 UNSPEC_SVE_LANE_SELECT) 1521 (match_operand:VNx4SF_ONLY 4 "register_operand" "0, w")] 1522 SVE2_FP_TERNARY_LONG_LANE))] 1523 "TARGET_SVE2" 1524 "@ 1525 <sve_fp_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>[%3] 1526 movprfx\t%0, %4\;<sve_fp_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>[%3]" 1527 [(set_attr "movprfx" "*,yes")] 1528) 1529 1530;; ========================================================================= 1531;; == Narrowing arithnetic 1532;; ========================================================================= 1533 1534;; ------------------------------------------------------------------------- 1535;; ---- [INT] Narrowing unary arithmetic 1536;; ------------------------------------------------------------------------- 1537;; Includes: 1538;; - SQXTNB 1539;; - SQXTNT 1540;; - SQXTUNB 1541;; - SQXTUNT 1542;; - UQXTNB 1543;; - UQXTNT 1544;; ------------------------------------------------------------------------- 1545 1546(define_insn "@aarch64_sve_<sve_int_op><mode>" 1547 [(set (match_operand:<VNARROW> 0 "register_operand" "=w") 1548 (unspec:<VNARROW> 1549 [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w")] 1550 SVE2_INT_UNARY_NARROWB))] 1551 "TARGET_SVE2" 1552 "<sve_int_op>\t%0.<Ventype>, %1.<Vetype>" 1553) 1554 1555;; These instructions do not take MOVPRFX. 1556(define_insn "@aarch64_sve_<sve_int_op><mode>" 1557 [(set (match_operand:<VNARROW> 0 "register_operand" "=w") 1558 (unspec:<VNARROW> 1559 [(match_operand:<VNARROW> 1 "register_operand" "0") 1560 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")] 1561 SVE2_INT_UNARY_NARROWT))] 1562 "TARGET_SVE2" 1563 "<sve_int_op>\t%0.<Ventype>, %2.<Vetype>" 1564) 1565 1566;; ------------------------------------------------------------------------- 1567;; ---- [INT] Narrowing binary arithmetic 1568;; ------------------------------------------------------------------------- 1569;; Includes: 1570;; - ADDHNB 1571;; - ADDHNT 1572;; - RADDHNB 1573;; - RADDHNT 1574;; - RSUBHNB 1575;; - RSUBHNT 1576;; - SUBHNB 1577;; - SUBHNT 1578;; ------------------------------------------------------------------------- 1579 1580(define_insn "@aarch64_sve_<sve_int_op><mode>" 1581 [(set (match_operand:<VNARROW> 0 "register_operand" "=w") 1582 (unspec:<VNARROW> 1583 [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w") 1584 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")] 1585 SVE2_INT_BINARY_NARROWB))] 1586 "TARGET_SVE2" 1587 "<sve_int_op>\t%0.<Ventype>, %1.<Vetype>, %2.<Vetype>" 1588) 1589 1590;; These instructions do not take MOVPRFX. 1591(define_insn "@aarch64_sve_<sve_int_op><mode>" 1592 [(set (match_operand:<VNARROW> 0 "register_operand" "=w") 1593 (unspec:<VNARROW> 1594 [(match_operand:<VNARROW> 1 "register_operand" "0") 1595 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w") 1596 (match_operand:SVE_FULL_HSDI 3 "register_operand" "w")] 1597 SVE2_INT_BINARY_NARROWT))] 1598 "TARGET_SVE2" 1599 "<sve_int_op>\t%0.<Ventype>, %2.<Vetype>, %3.<Vetype>" 1600) 1601 1602;; ------------------------------------------------------------------------- 1603;; ---- [INT] Narrowing right shifts 1604;; ------------------------------------------------------------------------- 1605;; Includes: 1606;; - RSHRNB 1607;; - RSHRNT 1608;; - SHRNB 1609;; - SHRNT 1610;; - SQRSHRNB 1611;; - SQRSHRNT 1612;; - SQRSHRUNB 1613;; - SQRSHRUNT 1614;; - SQSHRNB 1615;; - SQSHRNT 1616;; - SQSHRUNB 1617;; - SQSHRUNT 1618;; - UQRSHRNB 1619;; - UQRSHRNT 1620;; - UQSHRNB 1621;; - UQSHRNT 1622;; ------------------------------------------------------------------------- 1623 1624;; The immediate range is enforced before generating the instruction. 1625(define_insn "@aarch64_sve_<sve_int_op><mode>" 1626 [(set (match_operand:<VNARROW> 0 "register_operand" "=w") 1627 (unspec:<VNARROW> 1628 [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w") 1629 (match_operand:DI 2 "const_int_operand")] 1630 SVE2_INT_SHIFT_IMM_NARROWB))] 1631 "TARGET_SVE2" 1632 "<sve_int_op>\t%0.<Ventype>, %1.<Vetype>, #%2" 1633) 1634 1635;; The immediate range is enforced before generating the instruction. 1636;; These instructions do not take MOVPRFX. 1637(define_insn "@aarch64_sve_<sve_int_op><mode>" 1638 [(set (match_operand:<VNARROW> 0 "register_operand" "=w") 1639 (unspec:<VNARROW> 1640 [(match_operand:<VNARROW> 1 "register_operand" "0") 1641 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w") 1642 (match_operand:DI 3 "const_int_operand")] 1643 SVE2_INT_SHIFT_IMM_NARROWT))] 1644 "TARGET_SVE2" 1645 "<sve_int_op>\t%0.<Ventype>, %2.<Vetype>, #%3" 1646) 1647 1648;; ========================================================================= 1649;; == Pairwise arithmetic 1650;; ========================================================================= 1651 1652;; ------------------------------------------------------------------------- 1653;; ---- [INT] Pairwise arithmetic 1654;; ------------------------------------------------------------------------- 1655;; Includes: 1656;; - ADDP 1657;; - SMAXP 1658;; - SMINP 1659;; - UMAXP 1660;; - UMINP 1661;; ------------------------------------------------------------------------- 1662 1663(define_insn "@aarch64_pred_<sve_int_op><mode>" 1664 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 1665 (unspec:SVE_FULL_I 1666 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 1667 (match_operand:SVE_FULL_I 2 "register_operand" "0, w") 1668 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")] 1669 SVE2_INT_BINARY_PAIR))] 1670 "TARGET_SVE2" 1671 "@ 1672 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 1673 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 1674 [(set_attr "movprfx" "*,yes")] 1675) 1676 1677;; ------------------------------------------------------------------------- 1678;; ---- [FP] Pairwise arithmetic 1679;; ------------------------------------------------------------------------- 1680;; Includes: 1681;; - FADDP 1682;; - FMAXP 1683;; - FMAXNMP 1684;; - FMINP 1685;; - FMINNMP 1686;; ------------------------------------------------------------------------- 1687 1688(define_insn "@aarch64_pred_<sve_fp_op><mode>" 1689 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 1690 (unspec:SVE_FULL_F 1691 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 1692 (match_operand:SVE_FULL_F 2 "register_operand" "0, w") 1693 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] 1694 SVE2_FP_BINARY_PAIR))] 1695 "TARGET_SVE2" 1696 "@ 1697 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 1698 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 1699 [(set_attr "movprfx" "*,yes")] 1700) 1701 1702;; ------------------------------------------------------------------------- 1703;; ---- [INT] Pairwise arithmetic with accumulation 1704;; ------------------------------------------------------------------------- 1705;; Includes: 1706;; - SADALP 1707;; - UADALP 1708;; ------------------------------------------------------------------------- 1709 1710;; Predicated pairwise absolute difference and accumulate with merging. 1711(define_expand "@cond_<sve_int_op><mode>" 1712 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand") 1713 (unspec:SVE_FULL_HSDI 1714 [(match_operand:<VPRED> 1 "register_operand") 1715 (unspec:SVE_FULL_HSDI 1716 [(match_dup 1) 1717 (match_operand:SVE_FULL_HSDI 2 "register_operand") 1718 (match_operand:<VNARROW> 3 "register_operand")] 1719 SVE2_INT_BINARY_PAIR_LONG) 1720 (match_operand:SVE_FULL_HSDI 4 "aarch64_simd_reg_or_zero")] 1721 UNSPEC_SEL))] 1722 "TARGET_SVE2" 1723{ 1724 /* Only target code is aware of these operations, so we don't need 1725 to handle the fully-general case. */ 1726 gcc_assert (rtx_equal_p (operands[2], operands[4]) 1727 || CONSTANT_P (operands[4])); 1728}) 1729 1730;; Predicated pairwise absolute difference and accumulate, merging with 1731;; the first input. 1732(define_insn_and_rewrite "*cond_<sve_int_op><mode>_2" 1733 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w") 1734 (unspec:SVE_FULL_HSDI 1735 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 1736 (unspec:SVE_FULL_HSDI 1737 [(match_operand 4) 1738 (match_operand:SVE_FULL_HSDI 2 "register_operand" "0, w") 1739 (match_operand:<VNARROW> 3 "register_operand" "w, w")] 1740 SVE2_INT_BINARY_PAIR_LONG) 1741 (match_dup 2)] 1742 UNSPEC_SEL))] 1743 "TARGET_SVE2" 1744 "@ 1745 <sve_int_op>\t%0.<Vetype>, %1/m, %3.<Ventype> 1746 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %3.<Ventype>" 1747 "&& !CONSTANT_P (operands[4])" 1748 { 1749 operands[4] = CONSTM1_RTX (<VPRED>mode); 1750 } 1751 [(set_attr "movprfx" "*,yes")] 1752) 1753 1754;; Predicated pairwise absolute difference and accumulate, merging with zero. 1755(define_insn_and_rewrite "*cond_<sve_int_op><mode>_z" 1756 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=&w, &w") 1757 (unspec:SVE_FULL_HSDI 1758 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 1759 (unspec:SVE_FULL_HSDI 1760 [(match_operand 5) 1761 (match_operand:SVE_FULL_HSDI 2 "register_operand" "0, w") 1762 (match_operand:<VNARROW> 3 "register_operand" "w, w")] 1763 SVE2_INT_BINARY_PAIR_LONG) 1764 (match_operand:SVE_FULL_HSDI 4 "aarch64_simd_imm_zero")] 1765 UNSPEC_SEL))] 1766 "TARGET_SVE2" 1767 "@ 1768 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %3.<Ventype> 1769 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %3.<Ventype>" 1770 "&& !CONSTANT_P (operands[5])" 1771 { 1772 operands[5] = CONSTM1_RTX (<VPRED>mode); 1773 } 1774 [(set_attr "movprfx" "yes")] 1775) 1776 1777;; ========================================================================= 1778;; == Complex arithmetic 1779;; ========================================================================= 1780 1781;; ------------------------------------------------------------------------- 1782;; ---- [INT] Complex binary operations 1783;; ------------------------------------------------------------------------- 1784;; Includes: 1785;; - CADD 1786;; - SQCADD 1787;; ------------------------------------------------------------------------- 1788 1789(define_insn "@aarch64_sve_<optab><mode>" 1790 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 1791 (unspec:SVE_FULL_I 1792 [(match_operand:SVE_FULL_I 1 "register_operand" "0, w") 1793 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")] 1794 SVE2_INT_CADD))] 1795 "TARGET_SVE2" 1796 "@ 1797 <sve_int_op>\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #<rot> 1798 movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #<rot>" 1799 [(set_attr "movprfx" "*,yes")] 1800) 1801 1802;; unpredicated optab pattern for auto-vectorizer 1803(define_expand "cadd<rot><mode>3" 1804 [(set (match_operand:SVE_FULL_I 0 "register_operand") 1805 (unspec:SVE_FULL_I 1806 [(match_operand:SVE_FULL_I 1 "register_operand") 1807 (match_operand:SVE_FULL_I 2 "register_operand")] 1808 SVE2_INT_CADD_OP))] 1809 "TARGET_SVE2" 1810) 1811 1812;; ------------------------------------------------------------------------- 1813;; ---- [INT] Complex ternary operations 1814;; ------------------------------------------------------------------------- 1815;; Includes: 1816;; - CMLA 1817;; - SQRDCMLA 1818;; ------------------------------------------------------------------------- 1819 1820(define_insn "@aarch64_sve_<optab><mode>" 1821 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 1822 (unspec:SVE_FULL_I 1823 [(match_operand:SVE_FULL_I 1 "register_operand" "0, w") 1824 (match_operand:SVE_FULL_I 2 "register_operand" "w, w") 1825 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")] 1826 SVE2_INT_CMLA))] 1827 "TARGET_SVE2" 1828 "@ 1829 <sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>, #<rot> 1830 movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>, #<rot>" 1831 [(set_attr "movprfx" "*,yes")] 1832) 1833 1834(define_insn "@aarch64_<optab>_lane_<mode>" 1835 [(set (match_operand:SVE_FULL_HSI 0 "register_operand" "=w, ?&w") 1836 (unspec:SVE_FULL_HSI 1837 [(match_operand:SVE_FULL_HSI 1 "register_operand" "0, w") 1838 (match_operand:SVE_FULL_HSI 2 "register_operand" "w, w") 1839 (unspec:SVE_FULL_HSI 1840 [(match_operand:SVE_FULL_HSI 3 "register_operand" "<sve_lane_con>, <sve_lane_con>") 1841 (match_operand:SI 4 "const_int_operand")] 1842 UNSPEC_SVE_LANE_SELECT)] 1843 SVE2_INT_CMLA))] 1844 "TARGET_SVE2" 1845 "@ 1846 <sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4], #<rot> 1847 movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4], #<rot>" 1848 [(set_attr "movprfx" "*,yes")] 1849) 1850 1851;; unpredicated optab pattern for auto-vectorizer 1852;; The complex mla/mls operations always need to expand to two instructions. 1853;; The first operation does half the computation and the second does the 1854;; remainder. Because of this, expand early. 1855(define_expand "cml<fcmac1><conj_op><mode>4" 1856 [(set (match_operand:SVE_FULL_I 0 "register_operand") 1857 (plus:SVE_FULL_I (match_operand:SVE_FULL_I 1 "register_operand") 1858 (unspec:SVE_FULL_I 1859 [(match_operand:SVE_FULL_I 2 "register_operand") 1860 (match_operand:SVE_FULL_I 3 "register_operand")] 1861 SVE2_INT_CMLA_OP)))] 1862 "TARGET_SVE2" 1863{ 1864 rtx tmp = gen_reg_rtx (<MODE>mode); 1865 emit_insn (gen_aarch64_sve_cmla<sve_rot1><mode> (tmp, operands[1], 1866 operands[3], operands[2])); 1867 emit_insn (gen_aarch64_sve_cmla<sve_rot2><mode> (operands[0], tmp, 1868 operands[3], operands[2])); 1869 DONE; 1870}) 1871 1872;; unpredicated optab pattern for auto-vectorizer 1873;; The complex mul operations always need to expand to two instructions. 1874;; The first operation does half the computation and the second does the 1875;; remainder. Because of this, expand early. 1876(define_expand "cmul<conj_op><mode>3" 1877 [(set (match_operand:SVE_FULL_I 0 "register_operand") 1878 (unspec:SVE_FULL_I 1879 [(match_operand:SVE_FULL_I 1 "register_operand") 1880 (match_operand:SVE_FULL_I 2 "register_operand")] 1881 SVE2_INT_CMUL_OP))] 1882 "TARGET_SVE2" 1883{ 1884 rtx accum = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode)); 1885 rtx tmp = gen_reg_rtx (<MODE>mode); 1886 emit_insn (gen_aarch64_sve_cmla<sve_rot1><mode> (tmp, accum, 1887 operands[2], operands[1])); 1888 emit_insn (gen_aarch64_sve_cmla<sve_rot2><mode> (operands[0], tmp, 1889 operands[2], operands[1])); 1890 DONE; 1891}) 1892 1893;; ------------------------------------------------------------------------- 1894;; ---- [INT] Complex dot product 1895;; ------------------------------------------------------------------------- 1896;; Includes: 1897;; - CDOT 1898;; ------------------------------------------------------------------------- 1899 1900(define_insn "@aarch64_sve_<optab><mode>" 1901 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w") 1902 (unspec:SVE_FULL_SDI 1903 [(match_operand:SVE_FULL_SDI 1 "register_operand" "0, w") 1904 (match_operand:<VSI2QI> 2 "register_operand" "w, w") 1905 (match_operand:<VSI2QI> 3 "register_operand" "w, w")] 1906 SVE2_INT_CDOT))] 1907 "TARGET_SVE2" 1908 "@ 1909 <sve_int_op>\t%0.<Vetype>, %2.<Vetype_fourth>, %3.<Vetype_fourth>, #<rot> 1910 movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype_fourth>, %3.<Vetype_fourth>, #<rot>" 1911 [(set_attr "movprfx" "*,yes")] 1912) 1913 1914(define_insn "@aarch64_<optab>_lane_<mode>" 1915 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w") 1916 (unspec:SVE_FULL_SDI 1917 [(match_operand:SVE_FULL_SDI 1 "register_operand" "0, w") 1918 (match_operand:<VSI2QI> 2 "register_operand" "w, w") 1919 (unspec:<VSI2QI> 1920 [(match_operand:<VSI2QI> 3 "register_operand" "<sve_lane_con>, <sve_lane_con>") 1921 (match_operand:SI 4 "const_int_operand")] 1922 UNSPEC_SVE_LANE_SELECT)] 1923 SVE2_INT_CDOT))] 1924 "TARGET_SVE2" 1925 "@ 1926 <sve_int_op>\t%0.<Vetype>, %2.<Vetype_fourth>, %3.<Vetype_fourth>[%4], #<rot> 1927 movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype_fourth>, %3.<Vetype_fourth>[%4], #<rot>" 1928 [(set_attr "movprfx" "*,yes")] 1929) 1930 1931;; ========================================================================= 1932;; == Conversions 1933;; ========================================================================= 1934 1935;; ------------------------------------------------------------------------- 1936;; ---- [FP<-FP] Widening conversions 1937;; ------------------------------------------------------------------------- 1938;; Includes: 1939;; - FCVTLT 1940;; ------------------------------------------------------------------------- 1941 1942;; Predicated convert long top. 1943(define_insn "@aarch64_pred_<sve_fp_op><mode>" 1944 [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w") 1945 (unspec:SVE_FULL_SDF 1946 [(match_operand:<VPRED> 1 "register_operand" "Upl") 1947 (match_operand:SI 3 "aarch64_sve_gp_strictness") 1948 (match_operand:<VNARROW> 2 "register_operand" "0")] 1949 SVE2_COND_FP_UNARY_LONG))] 1950 "TARGET_SVE2" 1951 "<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Ventype>" 1952) 1953 1954;; Predicated convert long top with merging. 1955(define_expand "@cond_<sve_fp_op><mode>" 1956 [(set (match_operand:SVE_FULL_SDF 0 "register_operand") 1957 (unspec:SVE_FULL_SDF 1958 [(match_operand:<VPRED> 1 "register_operand") 1959 (unspec:SVE_FULL_SDF 1960 [(match_dup 1) 1961 (const_int SVE_STRICT_GP) 1962 (match_operand:<VNARROW> 2 "register_operand")] 1963 SVE2_COND_FP_UNARY_LONG) 1964 (match_operand:SVE_FULL_SDF 3 "register_operand")] 1965 UNSPEC_SEL))] 1966 "TARGET_SVE2" 1967) 1968 1969;; These instructions do not take MOVPRFX. 1970(define_insn_and_rewrite "*cond_<sve_fp_op><mode>_relaxed" 1971 [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w") 1972 (unspec:SVE_FULL_SDF 1973 [(match_operand:<VPRED> 1 "register_operand" "Upl") 1974 (unspec:SVE_FULL_SDF 1975 [(match_operand 4) 1976 (const_int SVE_RELAXED_GP) 1977 (match_operand:<VNARROW> 2 "register_operand" "w")] 1978 SVE2_COND_FP_UNARY_LONG) 1979 (match_operand:SVE_FULL_SDF 3 "register_operand" "0")] 1980 UNSPEC_SEL))] 1981 "TARGET_SVE2" 1982 "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Ventype>" 1983 "&& !rtx_equal_p (operands[1], operands[4])" 1984 { 1985 operands[4] = copy_rtx (operands[1]); 1986 } 1987) 1988 1989(define_insn "*cond_<sve_fp_op><mode>_strict" 1990 [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w") 1991 (unspec:SVE_FULL_SDF 1992 [(match_operand:<VPRED> 1 "register_operand" "Upl") 1993 (unspec:SVE_FULL_SDF 1994 [(match_dup 1) 1995 (const_int SVE_STRICT_GP) 1996 (match_operand:<VNARROW> 2 "register_operand" "w")] 1997 SVE2_COND_FP_UNARY_LONG) 1998 (match_operand:SVE_FULL_SDF 3 "register_operand" "0")] 1999 UNSPEC_SEL))] 2000 "TARGET_SVE2" 2001 "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Ventype>" 2002) 2003 2004;; ------------------------------------------------------------------------- 2005;; ---- [FP<-FP] Narrowing conversions 2006;; ------------------------------------------------------------------------- 2007;; Includes: 2008;; - FCVTNT 2009;; - FCVTX 2010;; - FCVTXNT 2011;; ------------------------------------------------------------------------- 2012 2013;; Predicated FCVTNT. This doesn't give a natural aarch64_pred_*/cond_* 2014;; pair because the even elements always have to be supplied for active 2015;; elements, even if the inactive elements don't matter. 2016;; 2017;; These instructions do not take MOVPRFX. 2018(define_insn "@aarch64_sve_cvtnt<mode>" 2019 [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w") 2020 (unspec:SVE_FULL_HSF 2021 [(match_operand:<VWIDE_PRED> 2 "register_operand" "Upl") 2022 (const_int SVE_STRICT_GP) 2023 (match_operand:SVE_FULL_HSF 1 "register_operand" "0") 2024 (match_operand:<VWIDE> 3 "register_operand" "w")] 2025 UNSPEC_COND_FCVTNT))] 2026 "TARGET_SVE2" 2027 "fcvtnt\t%0.<Vetype>, %2/m, %3.<Vewtype>" 2028) 2029 2030;; Predicated FCVTX (equivalent to what would be FCVTXNB, except that 2031;; it supports MOVPRFX). 2032(define_insn "@aarch64_pred_<sve_fp_op><mode>" 2033 [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=w, ?&w") 2034 (unspec:VNx4SF_ONLY 2035 [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl, Upl") 2036 (match_operand:SI 3 "aarch64_sve_gp_strictness") 2037 (match_operand:<VWIDE> 2 "register_operand" "0, w")] 2038 SVE2_COND_FP_UNARY_NARROWB))] 2039 "TARGET_SVE2" 2040 "@ 2041 <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype> 2042 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>" 2043 [(set_attr "movprfx" "*,yes")] 2044) 2045 2046;; Predicated FCVTX with merging. 2047(define_expand "@cond_<sve_fp_op><mode>" 2048 [(set (match_operand:VNx4SF_ONLY 0 "register_operand") 2049 (unspec:VNx4SF_ONLY 2050 [(match_operand:<VWIDE_PRED> 1 "register_operand") 2051 (unspec:VNx4SF_ONLY 2052 [(match_dup 1) 2053 (const_int SVE_STRICT_GP) 2054 (match_operand:<VWIDE> 2 "register_operand")] 2055 SVE2_COND_FP_UNARY_NARROWB) 2056 (match_operand:VNx4SF_ONLY 3 "aarch64_simd_reg_or_zero")] 2057 UNSPEC_SEL))] 2058 "TARGET_SVE2" 2059) 2060 2061(define_insn_and_rewrite "*cond_<sve_fp_op><mode>_any_relaxed" 2062 [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=&w, &w, &w") 2063 (unspec:VNx4SF_ONLY 2064 [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl, Upl, Upl") 2065 (unspec:VNx4SF_ONLY 2066 [(match_operand 4) 2067 (const_int SVE_RELAXED_GP) 2068 (match_operand:<VWIDE> 2 "register_operand" "w, w, w")] 2069 SVE2_COND_FP_UNARY_NARROWB) 2070 (match_operand:VNx4SF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] 2071 UNSPEC_SEL))] 2072 "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])" 2073 "@ 2074 <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype> 2075 movprfx\t%0.<Vewtype>, %1/z, %2.<Vewtype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype> 2076 movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>" 2077 "&& !rtx_equal_p (operands[1], operands[4])" 2078 { 2079 operands[4] = copy_rtx (operands[1]); 2080 } 2081 [(set_attr "movprfx" "*,yes,yes")] 2082) 2083 2084(define_insn "*cond_<sve_fp_op><mode>_any_strict" 2085 [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=&w, &w, &w") 2086 (unspec:VNx4SF_ONLY 2087 [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl, Upl, Upl") 2088 (unspec:VNx4SF_ONLY 2089 [(match_dup 1) 2090 (const_int SVE_STRICT_GP) 2091 (match_operand:<VWIDE> 2 "register_operand" "w, w, w")] 2092 SVE2_COND_FP_UNARY_NARROWB) 2093 (match_operand:VNx4SF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] 2094 UNSPEC_SEL))] 2095 "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])" 2096 "@ 2097 <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype> 2098 movprfx\t%0.<Vewtype>, %1/z, %2.<Vewtype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype> 2099 movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>" 2100 [(set_attr "movprfx" "*,yes,yes")] 2101) 2102 2103;; Predicated FCVTXNT. This doesn't give a natural aarch64_pred_*/cond_* 2104;; pair because the even elements always have to be supplied for active 2105;; elements, even if the inactive elements don't matter. 2106;; 2107;; These instructions do not take MOVPRFX. 2108(define_insn "@aarch64_sve2_cvtxnt<mode>" 2109 [(set (match_operand:<VNARROW> 0 "register_operand" "=w") 2110 (unspec:<VNARROW> 2111 [(match_operand:<VPRED> 2 "register_operand" "Upl") 2112 (const_int SVE_STRICT_GP) 2113 (match_operand:<VNARROW> 1 "register_operand" "0") 2114 (match_operand:VNx2DF_ONLY 3 "register_operand" "w")] 2115 UNSPEC_COND_FCVTXNT))] 2116 "TARGET_SVE2" 2117 "fcvtxnt\t%0.<Ventype>, %2/m, %3.<Vetype>" 2118) 2119 2120;; ========================================================================= 2121;; == Other arithmetic 2122;; ========================================================================= 2123 2124;; ------------------------------------------------------------------------- 2125;; ---- [INT] Reciprocal approximation 2126;; ------------------------------------------------------------------------- 2127;; Includes: 2128;; - URECPE 2129;; - URSQRTE 2130;; ------------------------------------------------------------------------- 2131 2132;; Predicated integer unary operations. 2133(define_insn "@aarch64_pred_<sve_int_op><mode>" 2134 [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w") 2135 (unspec:VNx4SI_ONLY 2136 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 2137 (unspec:VNx4SI_ONLY 2138 [(match_operand:VNx4SI_ONLY 2 "register_operand" "0, w")] 2139 SVE2_U32_UNARY)] 2140 UNSPEC_PRED_X))] 2141 "TARGET_SVE2" 2142 "@ 2143 <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype> 2144 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" 2145 [(set_attr "movprfx" "*,yes")] 2146) 2147 2148;; Predicated integer unary operations with merging. 2149(define_expand "@cond_<sve_int_op><mode>" 2150 [(set (match_operand:VNx4SI_ONLY 0 "register_operand") 2151 (unspec:VNx4SI_ONLY 2152 [(match_operand:<VPRED> 1 "register_operand") 2153 (unspec:VNx4SI_ONLY 2154 [(match_dup 4) 2155 (unspec:VNx4SI_ONLY 2156 [(match_operand:VNx4SI_ONLY 2 "register_operand")] 2157 SVE2_U32_UNARY)] 2158 UNSPEC_PRED_X) 2159 (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero")] 2160 UNSPEC_SEL))] 2161 "TARGET_SVE2" 2162 { 2163 operands[4] = CONSTM1_RTX (<MODE>mode); 2164 } 2165) 2166 2167(define_insn_and_rewrite "*cond_<sve_int_op><mode>" 2168 [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w, ?&w") 2169 (unspec:VNx4SI_ONLY 2170 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") 2171 (unspec:VNx4SI_ONLY 2172 [(match_operand 4) 2173 (unspec:VNx4SI_ONLY 2174 [(match_operand:VNx4SI_ONLY 2 "register_operand" "w, w, w")] 2175 SVE2_U32_UNARY)] 2176 UNSPEC_PRED_X) 2177 (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] 2178 UNSPEC_SEL))] 2179 "TARGET_SVE2" 2180 "@ 2181 <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype> 2182 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype> 2183 movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" 2184 "&& !CONSTANT_P (operands[4])" 2185 { 2186 operands[4] = CONSTM1_RTX (<VPRED>mode); 2187 } 2188 [(set_attr "movprfx" "*,yes,yes")] 2189) 2190 2191;; ------------------------------------------------------------------------- 2192;; ---- [INT<-FP] Base-2 logarithm 2193;; ------------------------------------------------------------------------- 2194;; Includes: 2195;; - FLOGB 2196;; ------------------------------------------------------------------------- 2197 2198;; Predicated FLOGB. 2199(define_insn "@aarch64_pred_<sve_fp_op><mode>" 2200 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w, ?&w") 2201 (unspec:<V_INT_EQUIV> 2202 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 2203 (match_operand:SI 3 "aarch64_sve_gp_strictness") 2204 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")] 2205 SVE2_COND_INT_UNARY_FP))] 2206 "TARGET_SVE2" 2207 "@ 2208 <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype> 2209 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" 2210 [(set_attr "movprfx" "*,yes")] 2211) 2212 2213;; Predicated FLOGB with merging. 2214(define_expand "@cond_<sve_fp_op><mode>" 2215 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand") 2216 (unspec:<V_INT_EQUIV> 2217 [(match_operand:<VPRED> 1 "register_operand") 2218 (unspec:<V_INT_EQUIV> 2219 [(match_dup 1) 2220 (const_int SVE_STRICT_GP) 2221 (match_operand:SVE_FULL_F 2 "register_operand")] 2222 SVE2_COND_INT_UNARY_FP) 2223 (match_operand:<V_INT_EQUIV> 3 "aarch64_simd_reg_or_zero")] 2224 UNSPEC_SEL))] 2225 "TARGET_SVE2" 2226) 2227 2228(define_insn_and_rewrite "*cond_<sve_fp_op><mode>" 2229 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=&w, ?&w, ?&w") 2230 (unspec:<V_INT_EQUIV> 2231 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") 2232 (unspec:<V_INT_EQUIV> 2233 [(match_operand 4) 2234 (const_int SVE_RELAXED_GP) 2235 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")] 2236 SVE2_COND_INT_UNARY_FP) 2237 (match_operand:<V_INT_EQUIV> 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] 2238 UNSPEC_SEL))] 2239 "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])" 2240 "@ 2241 <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype> 2242 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype> 2243 movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" 2244 "&& !rtx_equal_p (operands[1], operands[4])" 2245 { 2246 operands[4] = copy_rtx (operands[1]); 2247 } 2248 [(set_attr "movprfx" "*,yes,yes")] 2249) 2250 2251(define_insn "*cond_<sve_fp_op><mode>_strict" 2252 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=&w, ?&w, ?&w") 2253 (unspec:<V_INT_EQUIV> 2254 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") 2255 (unspec:<V_INT_EQUIV> 2256 [(match_dup 1) 2257 (const_int SVE_STRICT_GP) 2258 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")] 2259 SVE2_COND_INT_UNARY_FP) 2260 (match_operand:<V_INT_EQUIV> 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] 2261 UNSPEC_SEL))] 2262 "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])" 2263 "@ 2264 <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype> 2265 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype> 2266 movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" 2267 [(set_attr "movprfx" "*,yes,yes")] 2268) 2269 2270;; ------------------------------------------------------------------------- 2271;; ---- [INT] Polynomial multiplication 2272;; ------------------------------------------------------------------------- 2273;; Includes: 2274;; - PMUL 2275;; - PMULLB 2276;; - PMULLT 2277;; ------------------------------------------------------------------------- 2278 2279;; Uniform PMUL. 2280(define_insn "@aarch64_sve2_pmul<mode>" 2281 [(set (match_operand:VNx16QI_ONLY 0 "register_operand" "=w") 2282 (unspec:VNx16QI_ONLY 2283 [(match_operand:VNx16QI_ONLY 1 "register_operand" "w") 2284 (match_operand:VNx16QI_ONLY 2 "register_operand" "w")] 2285 UNSPEC_PMUL))] 2286 "TARGET_SVE2" 2287 "pmul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" 2288) 2289 2290;; Extending PMUL, with the results modeled as wider vectors. 2291;; This representation is only possible for .H and .D, not .Q. 2292(define_insn "@aarch64_sve_<optab><mode>" 2293 [(set (match_operand:SVE_FULL_HDI 0 "register_operand" "=w") 2294 (unspec:SVE_FULL_HDI 2295 [(match_operand:<VNARROW> 1 "register_operand" "w") 2296 (match_operand:<VNARROW> 2 "register_operand" "w")] 2297 SVE2_PMULL))] 2298 "TARGET_SVE2" 2299 "<sve_int_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>" 2300) 2301 2302;; Extending PMUL, with the results modeled as pairs of values. 2303;; This representation works for .H, .D and .Q, with .Q requiring 2304;; the AES extension. (This is enforced by the mode iterator.) 2305(define_insn "@aarch64_sve_<optab><mode>" 2306 [(set (match_operand:SVE2_PMULL_PAIR_I 0 "register_operand" "=w") 2307 (unspec:SVE2_PMULL_PAIR_I 2308 [(match_operand:SVE2_PMULL_PAIR_I 1 "register_operand" "w") 2309 (match_operand:SVE2_PMULL_PAIR_I 2 "register_operand" "w")] 2310 SVE2_PMULL_PAIR))] 2311 "TARGET_SVE2" 2312 "<sve_int_op>\t%0.<Vewtype>, %1.<Vetype>, %2.<Vetype>" 2313) 2314 2315;; ========================================================================= 2316;; == Permutation 2317;; ========================================================================= 2318 2319;; ------------------------------------------------------------------------- 2320;; ---- [INT,FP] General permutes 2321;; ------------------------------------------------------------------------- 2322;; Includes: 2323;; - TBL (vector pair form) 2324;; - TBX 2325;; ------------------------------------------------------------------------- 2326 2327;; TBL on a pair of data vectors. 2328(define_insn "@aarch64_sve2_tbl2<mode>" 2329 [(set (match_operand:SVE_FULL 0 "register_operand" "=w") 2330 (unspec:SVE_FULL 2331 [(match_operand:<VDOUBLE> 1 "register_operand" "w") 2332 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")] 2333 UNSPEC_TBL2))] 2334 "TARGET_SVE2" 2335 "tbl\t%0.<Vetype>, %1, %2.<Vetype>" 2336) 2337 2338;; TBX. These instructions do not take MOVPRFX. 2339(define_insn "@aarch64_sve2_tbx<mode>" 2340 [(set (match_operand:SVE_FULL 0 "register_operand" "=w") 2341 (unspec:SVE_FULL 2342 [(match_operand:SVE_FULL 1 "register_operand" "0") 2343 (match_operand:SVE_FULL 2 "register_operand" "w") 2344 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w")] 2345 UNSPEC_TBX))] 2346 "TARGET_SVE2" 2347 "tbx\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>" 2348) 2349 2350;; ------------------------------------------------------------------------- 2351;; ---- [INT] Optional bit-permute extensions 2352;; ------------------------------------------------------------------------- 2353;; Includes: 2354;; - BDEP 2355;; - BEXT 2356;; - BGRP 2357;; ------------------------------------------------------------------------- 2358 2359(define_insn "@aarch64_sve_<sve_int_op><mode>" 2360 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w") 2361 (unspec:SVE_FULL_I 2362 [(match_operand:SVE_FULL_I 1 "register_operand" "w") 2363 (match_operand:SVE_FULL_I 2 "register_operand" "w")] 2364 SVE2_INT_BITPERM))] 2365 "TARGET_SVE2_BITPERM" 2366 "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" 2367) 2368 2369;; ========================================================================= 2370;; == General 2371;; ========================================================================= 2372 2373;; ------------------------------------------------------------------------- 2374;; ---- Check for aliases between pointers 2375;; ------------------------------------------------------------------------- 2376;; The patterns in this section are synthetic: WHILERW and WHILEWR are 2377;; defined in aarch64-sve.md instead. 2378;; ------------------------------------------------------------------------- 2379 2380;; Use WHILERW and WHILEWR to accelerate alias checks. This is only 2381;; possible if the accesses we're checking are exactly the same size 2382;; as an SVE vector. 2383(define_expand "check_<raw_war>_ptrs<mode>" 2384 [(match_operand:GPI 0 "register_operand") 2385 (unspec:VNx16BI 2386 [(match_operand:GPI 1 "register_operand") 2387 (match_operand:GPI 2 "register_operand") 2388 (match_operand:GPI 3 "aarch64_bytes_per_sve_vector_operand") 2389 (match_operand:GPI 4 "const_int_operand")] 2390 SVE2_WHILE_PTR)] 2391 "TARGET_SVE2" 2392{ 2393 /* Use the widest predicate mode we can. */ 2394 unsigned int align = INTVAL (operands[4]); 2395 if (align > 8) 2396 align = 8; 2397 machine_mode pred_mode = aarch64_sve_pred_mode (align).require (); 2398 2399 /* Emit a WHILERW or WHILEWR, setting the condition codes based on 2400 the result. */ 2401 emit_insn (gen_while_ptest 2402 (<SVE2_WHILE_PTR:unspec>, <MODE>mode, pred_mode, 2403 gen_rtx_SCRATCH (pred_mode), operands[1], operands[2], 2404 CONSTM1_RTX (VNx16BImode), CONSTM1_RTX (pred_mode))); 2405 2406 /* Set operand 0 to true if the last bit of the predicate result is set, 2407 i.e. if all elements are free of dependencies. */ 2408 rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM); 2409 rtx cmp = gen_rtx_LTU (<MODE>mode, cc_reg, const0_rtx); 2410 emit_insn (gen_aarch64_cstore<mode> (operands[0], cmp, cc_reg)); 2411 DONE; 2412}) 2413 2414;; ------------------------------------------------------------------------- 2415;; ---- Histogram processing 2416;; ------------------------------------------------------------------------- 2417;; Includes: 2418;; - HISTCNT 2419;; - HISTSEG 2420;; ------------------------------------------------------------------------- 2421 2422(define_insn "@aarch64_sve2_histcnt<mode>" 2423 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w") 2424 (unspec:SVE_FULL_SDI 2425 [(match_operand:<VPRED> 1 "register_operand" "Upl") 2426 (match_operand:SVE_FULL_SDI 2 "register_operand" "w") 2427 (match_operand:SVE_FULL_SDI 3 "register_operand" "w")] 2428 UNSPEC_HISTCNT))] 2429 "TARGET_SVE2" 2430 "histcnt\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" 2431) 2432 2433(define_insn "@aarch64_sve2_histseg<mode>" 2434 [(set (match_operand:VNx16QI_ONLY 0 "register_operand" "=w") 2435 (unspec:VNx16QI_ONLY 2436 [(match_operand:VNx16QI_ONLY 1 "register_operand" "w") 2437 (match_operand:VNx16QI_ONLY 2 "register_operand" "w")] 2438 UNSPEC_HISTSEG))] 2439 "TARGET_SVE2" 2440 "histseg\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" 2441) 2442 2443;; ------------------------------------------------------------------------- 2444;; ---- String matching 2445;; ------------------------------------------------------------------------- 2446;; Includes: 2447;; - MATCH 2448;; - NMATCH 2449;; ------------------------------------------------------------------------- 2450 2451;; Predicated string matching. 2452(define_insn "@aarch64_pred_<sve_int_op><mode>" 2453 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") 2454 (unspec:<VPRED> 2455 [(match_operand:<VPRED> 1 "register_operand" "Upl") 2456 (match_operand:SI 2 "aarch64_sve_ptrue_flag") 2457 (unspec:<VPRED> 2458 [(match_operand:SVE_FULL_BHI 3 "register_operand" "w") 2459 (match_operand:SVE_FULL_BHI 4 "register_operand" "w")] 2460 SVE2_MATCH)] 2461 UNSPEC_PRED_Z)) 2462 (clobber (reg:CC_NZC CC_REGNUM))] 2463 "TARGET_SVE2" 2464 "<sve_int_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>" 2465) 2466 2467;; Predicated string matching in which both the flag and predicate results 2468;; are interesting. 2469(define_insn_and_rewrite "*aarch64_pred_<sve_int_op><mode>_cc" 2470 [(set (reg:CC_NZC CC_REGNUM) 2471 (unspec:CC_NZC 2472 [(match_operand:VNx16BI 1 "register_operand" "Upl") 2473 (match_operand 4) 2474 (match_operand:SI 5 "aarch64_sve_ptrue_flag") 2475 (unspec:<VPRED> 2476 [(match_operand 6) 2477 (match_operand:SI 7 "aarch64_sve_ptrue_flag") 2478 (unspec:<VPRED> 2479 [(match_operand:SVE_FULL_BHI 2 "register_operand" "w") 2480 (match_operand:SVE_FULL_BHI 3 "register_operand" "w")] 2481 SVE2_MATCH)] 2482 UNSPEC_PRED_Z)] 2483 UNSPEC_PTEST)) 2484 (set (match_operand:<VPRED> 0 "register_operand" "=Upa") 2485 (unspec:<VPRED> 2486 [(match_dup 6) 2487 (match_dup 7) 2488 (unspec:<VPRED> 2489 [(match_dup 2) 2490 (match_dup 3)] 2491 SVE2_MATCH)] 2492 UNSPEC_PRED_Z))] 2493 "TARGET_SVE2 2494 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])" 2495 "<sve_int_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" 2496 "&& !rtx_equal_p (operands[4], operands[6])" 2497 { 2498 operands[6] = copy_rtx (operands[4]); 2499 operands[7] = operands[5]; 2500 } 2501) 2502 2503;; Predicated string matching in which only the flags result is interesting. 2504(define_insn_and_rewrite "*aarch64_pred_<sve_int_op><mode>_ptest" 2505 [(set (reg:CC_NZC CC_REGNUM) 2506 (unspec:CC_NZC 2507 [(match_operand:VNx16BI 1 "register_operand" "Upl") 2508 (match_operand 4) 2509 (match_operand:SI 5 "aarch64_sve_ptrue_flag") 2510 (unspec:<VPRED> 2511 [(match_operand 6) 2512 (match_operand:SI 7 "aarch64_sve_ptrue_flag") 2513 (unspec:<VPRED> 2514 [(match_operand:SVE_FULL_BHI 2 "register_operand" "w") 2515 (match_operand:SVE_FULL_BHI 3 "register_operand" "w")] 2516 SVE2_MATCH)] 2517 UNSPEC_PRED_Z)] 2518 UNSPEC_PTEST)) 2519 (clobber (match_scratch:<VPRED> 0 "=Upa"))] 2520 "TARGET_SVE2 2521 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])" 2522 "<sve_int_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" 2523 "&& !rtx_equal_p (operands[4], operands[6])" 2524 { 2525 operands[6] = copy_rtx (operands[4]); 2526 operands[7] = operands[5]; 2527 } 2528) 2529 2530;; ========================================================================= 2531;; == Crypotographic extensions 2532;; ========================================================================= 2533 2534;; ------------------------------------------------------------------------- 2535;; ---- Optional AES extensions 2536;; ------------------------------------------------------------------------- 2537;; Includes: 2538;; - AESD 2539;; - AESE 2540;; - AESIMC 2541;; - AESMC 2542;; ------------------------------------------------------------------------- 2543 2544;; AESD and AESE. 2545(define_insn "aarch64_sve2_aes<aes_op>" 2546 [(set (match_operand:VNx16QI 0 "register_operand" "=w") 2547 (unspec:VNx16QI 2548 [(xor:VNx16QI 2549 (match_operand:VNx16QI 1 "register_operand" "%0") 2550 (match_operand:VNx16QI 2 "register_operand" "w"))] 2551 CRYPTO_AES))] 2552 "TARGET_SVE2_AES" 2553 "aes<aes_op>\t%0.b, %0.b, %2.b" 2554 [(set_attr "type" "crypto_aese")] 2555) 2556 2557;; AESMC and AESIMC. These instructions do not take MOVPRFX. 2558(define_insn "aarch64_sve2_aes<aesmc_op>" 2559 [(set (match_operand:VNx16QI 0 "register_operand" "=w") 2560 (unspec:VNx16QI 2561 [(match_operand:VNx16QI 1 "register_operand" "0")] 2562 CRYPTO_AESMC))] 2563 "TARGET_SVE2_AES" 2564 "aes<aesmc_op>\t%0.b, %0.b" 2565 [(set_attr "type" "crypto_aesmc")] 2566) 2567 2568;; When AESE/AESMC and AESD/AESIMC fusion is enabled, we really want 2569;; to keep the two together and enforce the register dependency without 2570;; scheduling or register allocation messing up the order or introducing 2571;; moves inbetween. Mash the two together during combine. 2572 2573(define_insn "*aarch64_sve2_aese_fused" 2574 [(set (match_operand:VNx16QI 0 "register_operand" "=w") 2575 (unspec:VNx16QI 2576 [(unspec:VNx16QI 2577 [(xor:VNx16QI 2578 (match_operand:VNx16QI 1 "register_operand" "%0") 2579 (match_operand:VNx16QI 2 "register_operand" "w"))] 2580 UNSPEC_AESE)] 2581 UNSPEC_AESMC))] 2582 "TARGET_SVE2_AES && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)" 2583 "aese\t%0.b, %0.b, %2.b\;aesmc\t%0.b, %0.b" 2584 [(set_attr "type" "crypto_aese") 2585 (set_attr "length" "8")] 2586) 2587 2588(define_insn "*aarch64_sve2_aesd_fused" 2589 [(set (match_operand:VNx16QI 0 "register_operand" "=w") 2590 (unspec:VNx16QI 2591 [(unspec:VNx16QI 2592 [(xor:VNx16QI 2593 (match_operand:VNx16QI 1 "register_operand" "%0") 2594 (match_operand:VNx16QI 2 "register_operand" "w"))] 2595 UNSPEC_AESD)] 2596 UNSPEC_AESIMC))] 2597 "TARGET_SVE2_AES && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)" 2598 "aesd\t%0.b, %0.b, %2.b\;aesimc\t%0.b, %0.b" 2599 [(set_attr "type" "crypto_aese") 2600 (set_attr "length" "8")] 2601) 2602 2603;; ------------------------------------------------------------------------- 2604;; ---- Optional SHA-3 extensions 2605;; ------------------------------------------------------------------------- 2606;; Includes: 2607;; - RAX1 2608;; ------------------------------------------------------------------------- 2609 2610(define_insn "aarch64_sve2_rax1" 2611 [(set (match_operand:VNx2DI 0 "register_operand" "=w") 2612 (xor:VNx2DI 2613 (rotate:VNx2DI 2614 (match_operand:VNx2DI 2 "register_operand" "w") 2615 (const_int 1)) 2616 (match_operand:VNx2DI 1 "register_operand" "w")))] 2617 "TARGET_SVE2_SHA3" 2618 "rax1\t%0.d, %1.d, %2.d" 2619 [(set_attr "type" "crypto_sha3")] 2620) 2621 2622;; ------------------------------------------------------------------------- 2623;; ---- Optional SM4 extensions 2624;; ------------------------------------------------------------------------- 2625;; Includes: 2626;; - SM4E 2627;; - SM4EKEY 2628;; ------------------------------------------------------------------------- 2629 2630;; These instructions do not take MOVPRFX. 2631(define_insn "aarch64_sve2_sm4e" 2632 [(set (match_operand:VNx4SI 0 "register_operand" "=w") 2633 (unspec:VNx4SI 2634 [(match_operand:VNx4SI 1 "register_operand" "0") 2635 (match_operand:VNx4SI 2 "register_operand" "w")] 2636 UNSPEC_SM4E))] 2637 "TARGET_SVE2_SM4" 2638 "sm4e\t%0.s, %0.s, %2.s" 2639 [(set_attr "type" "crypto_sm4")] 2640) 2641 2642(define_insn "aarch64_sve2_sm4ekey" 2643 [(set (match_operand:VNx4SI 0 "register_operand" "=w") 2644 (unspec:VNx4SI 2645 [(match_operand:VNx4SI 1 "register_operand" "w") 2646 (match_operand:VNx4SI 2 "register_operand" "w")] 2647 UNSPEC_SM4EKEY))] 2648 "TARGET_SVE2_SM4" 2649 "sm4ekey\t%0.s, %1.s, %2.s" 2650 [(set_attr "type" "crypto_sm4")] 2651) 2652