1;; Machine description for AArch64 SVE. 2;; Copyright (C) 2009-2016 Free Software Foundation, Inc. 3;; Contributed by ARM Ltd. 4;; 5;; This file is part of GCC. 6;; 7;; GCC is free software; you can redistribute it and/or modify it 8;; under the terms of the GNU General Public License as published by 9;; the Free Software Foundation; either version 3, or (at your option) 10;; any later version. 11;; 12;; GCC is distributed in the hope that it will be useful, but 13;; WITHOUT ANY WARRANTY; without even the implied warranty of 14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15;; General Public License for more details. 16;; 17;; You should have received a copy of the GNU General Public License 18;; along with GCC; see the file COPYING3. If not see 19;; <http://www.gnu.org/licenses/>. 20 21;; Note on the handling of big-endian SVE 22;; -------------------------------------- 23;; 24;; On big-endian systems, Advanced SIMD mov<mode> patterns act in the 25;; same way as movdi or movti would: the first byte of memory goes 26;; into the most significant byte of the register and the last byte 27;; of memory goes into the least significant byte of the register. 28;; This is the most natural ordering for Advanced SIMD and matches 29;; the ABI layout for 64-bit and 128-bit vector types. 30;; 31;; As a result, the order of bytes within the register is what GCC 32;; expects for a big-endian target, and subreg offsets therefore work 33;; as expected, with the first element in memory having subreg offset 0 34;; and the last element in memory having the subreg offset associated 35;; with a big-endian lowpart. However, this ordering also means that 36;; GCC's lane numbering does not match the architecture's numbering: 37;; GCC always treats the element at the lowest address in memory 38;; (subreg offset 0) as element 0, while the architecture treats 39;; the least significant end of the register as element 0. 40;; 41;; The situation for SVE is different. We want the layout of the 42;; SVE register to be same for mov<mode> as it is for maskload<mode>: 43;; logically, a mov<mode> load must be indistinguishable from a 44;; maskload<mode> whose mask is all true. We therefore need the 45;; register layout to match LD1 rather than LDR. The ABI layout of 46;; SVE types also matches LD1 byte ordering rather than LDR byte ordering. 47;; 48;; As a result, the architecture lane numbering matches GCC's lane 49;; numbering, with element 0 always being the first in memory. 50;; However: 51;; 52;; - Applying a subreg offset to a register does not give the element 53;; that GCC expects: the first element in memory has the subreg offset 54;; associated with a big-endian lowpart while the last element in memory 55;; has subreg offset 0. We handle this via TARGET_CAN_CHANGE_MODE_CLASS. 56;; 57;; - We cannot use LDR and STR for spill slots that might be accessed 58;; via subregs, since although the elements have the order GCC expects, 59;; the order of the bytes within the elements is different. We instead 60;; access spill slots via LD1 and ST1, using secondary reloads to 61;; reserve a predicate register. 62 63 64;; SVE data moves. 65(define_expand "mov<mode>" 66 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand") 67 (match_operand:SVE_ALL 1 "general_operand"))] 68 "TARGET_SVE" 69 { 70 /* Use the predicated load and store patterns where possible. 71 This is required for big-endian targets (see the comment at the 72 head of the file) and increases the addressing choices for 73 little-endian. */ 74 if ((MEM_P (operands[0]) || MEM_P (operands[1])) 75 && can_create_pseudo_p ()) 76 { 77 aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode); 78 DONE; 79 } 80 81 if (CONSTANT_P (operands[1])) 82 { 83 aarch64_expand_mov_immediate (operands[0], operands[1], 84 gen_vec_duplicate<mode>); 85 DONE; 86 } 87 88 /* Optimize subregs on big-endian targets: we can use REV[BHW] 89 instead of going through memory. */ 90 if (BYTES_BIG_ENDIAN 91 && aarch64_maybe_expand_sve_subreg_move (operands[0], operands[1])) 92 DONE; 93 } 94) 95 96;; A pattern for optimizing SUBREGs that have a reinterpreting effect 97;; on big-endian targets; see aarch64_maybe_expand_sve_subreg_move 98;; for details. We use a special predicate for operand 2 to reduce 99;; the number of patterns. 100(define_insn_and_split "*aarch64_sve_mov<mode>_subreg_be" 101 [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w") 102 (unspec:SVE_ALL 103 [(match_operand:VNx16BI 1 "register_operand" "Upl") 104 (match_operand 2 "aarch64_any_register_operand" "w")] 105 UNSPEC_REV_SUBREG))] 106 "TARGET_SVE && BYTES_BIG_ENDIAN" 107 "#" 108 "&& reload_completed" 109 [(const_int 0)] 110 { 111 aarch64_split_sve_subreg_move (operands[0], operands[1], operands[2]); 112 DONE; 113 } 114) 115 116;; Unpredicated moves (little-endian). Only allow memory operations 117;; during and after RA; before RA we want the predicated load and 118;; store patterns to be used instead. 119(define_insn "*aarch64_sve_mov<mode>_le" 120 [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w") 121 (match_operand:SVE_ALL 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))] 122 "TARGET_SVE 123 && !BYTES_BIG_ENDIAN 124 && ((lra_in_progress || reload_completed) 125 || (register_operand (operands[0], <MODE>mode) 126 && nonmemory_operand (operands[1], <MODE>mode)))" 127 "@ 128 ldr\t%0, %1 129 str\t%1, %0 130 mov\t%0.d, %1.d 131 * return aarch64_output_sve_mov_immediate (operands[1]);" 132) 133 134;; Unpredicated moves (big-endian). Memory accesses require secondary 135;; reloads. 136(define_insn "*aarch64_sve_mov<mode>_be" 137 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w") 138 (match_operand:SVE_ALL 1 "aarch64_nonmemory_operand" "w, Dn"))] 139 "TARGET_SVE && BYTES_BIG_ENDIAN" 140 "@ 141 mov\t%0.d, %1.d 142 * return aarch64_output_sve_mov_immediate (operands[1]);" 143) 144 145;; Handle big-endian memory reloads. We use byte PTRUE for all modes 146;; to try to encourage reuse. 147(define_expand "aarch64_sve_reload_be" 148 [(parallel 149 [(set (match_operand 0) 150 (match_operand 1)) 151 (clobber (match_operand:VNx16BI 2 "register_operand" "=Upl"))])] 152 "TARGET_SVE && BYTES_BIG_ENDIAN" 153 { 154 /* Create a PTRUE. */ 155 emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode)); 156 157 /* Refer to the PTRUE in the appropriate mode for this move. */ 158 machine_mode mode = GET_MODE (operands[0]); 159 machine_mode pred_mode 160 = aarch64_sve_pred_mode (GET_MODE_UNIT_SIZE (mode)).require (); 161 rtx pred = gen_lowpart (pred_mode, operands[2]); 162 163 /* Emit a predicated load or store. */ 164 aarch64_emit_sve_pred_move (operands[0], pred, operands[1]); 165 DONE; 166 } 167) 168 169;; A predicated load or store for which the predicate is known to be 170;; all-true. Note that this pattern is generated directly by 171;; aarch64_emit_sve_pred_move, so changes to this pattern will 172;; need changes there as well. 173(define_insn "*pred_mov<mode>" 174 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand" "=w, m") 175 (unspec:SVE_ALL 176 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 177 (match_operand:SVE_ALL 2 "nonimmediate_operand" "m, w")] 178 UNSPEC_MERGE_PTRUE))] 179 "TARGET_SVE 180 && (register_operand (operands[0], <MODE>mode) 181 || register_operand (operands[2], <MODE>mode))" 182 "@ 183 ld1<Vesize>\t%0.<Vetype>, %1/z, %2 184 st1<Vesize>\t%2.<Vetype>, %1, %0" 185) 186 187(define_expand "movmisalign<mode>" 188 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand") 189 (match_operand:SVE_ALL 1 "general_operand"))] 190 "TARGET_SVE" 191 { 192 /* Equivalent to a normal move for our purpooses. */ 193 emit_move_insn (operands[0], operands[1]); 194 DONE; 195 } 196) 197 198(define_insn "maskload<mode><vpred>" 199 [(set (match_operand:SVE_ALL 0 "register_operand" "=w") 200 (unspec:SVE_ALL 201 [(match_operand:<VPRED> 2 "register_operand" "Upl") 202 (match_operand:SVE_ALL 1 "memory_operand" "m")] 203 UNSPEC_LD1_SVE))] 204 "TARGET_SVE" 205 "ld1<Vesize>\t%0.<Vetype>, %2/z, %1" 206) 207 208(define_insn "maskstore<mode><vpred>" 209 [(set (match_operand:SVE_ALL 0 "memory_operand" "+m") 210 (unspec:SVE_ALL [(match_operand:<VPRED> 2 "register_operand" "Upl") 211 (match_operand:SVE_ALL 1 "register_operand" "w") 212 (match_dup 0)] 213 UNSPEC_ST1_SVE))] 214 "TARGET_SVE" 215 "st1<Vesize>\t%1.<Vetype>, %2, %0" 216) 217 218;; Unpredicated gather loads. 219(define_expand "gather_load<mode>" 220 [(set (match_operand:SVE_SD 0 "register_operand") 221 (unspec:SVE_SD 222 [(match_dup 5) 223 (match_operand:DI 1 "aarch64_reg_or_zero") 224 (match_operand:<V_INT_EQUIV> 2 "register_operand") 225 (match_operand:DI 3 "const_int_operand") 226 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>") 227 (mem:BLK (scratch))] 228 UNSPEC_LD1_GATHER))] 229 "TARGET_SVE" 230 { 231 operands[5] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); 232 } 233) 234 235;; Predicated gather loads for 32-bit elements. Operand 3 is true for 236;; unsigned extension and false for signed extension. 237(define_insn "mask_gather_load<mode>" 238 [(set (match_operand:SVE_S 0 "register_operand" "=w, w, w, w, w") 239 (unspec:SVE_S 240 [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl, Upl, Upl") 241 (match_operand:DI 1 "aarch64_reg_or_zero" "Z, rk, rk, rk, rk") 242 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w, w, w, w") 243 (match_operand:DI 3 "const_int_operand" "i, Z, Ui1, Z, Ui1") 244 (match_operand:DI 4 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, i, i") 245 (mem:BLK (scratch))] 246 UNSPEC_LD1_GATHER))] 247 "TARGET_SVE" 248 "@ 249 ld1w\t%0.s, %5/z, [%2.s] 250 ld1w\t%0.s, %5/z, [%1, %2.s, sxtw] 251 ld1w\t%0.s, %5/z, [%1, %2.s, uxtw] 252 ld1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4] 253 ld1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]" 254) 255 256;; Predicated gather loads for 64-bit elements. The value of operand 3 257;; doesn't matter in this case. 258(define_insn "mask_gather_load<mode>" 259 [(set (match_operand:SVE_D 0 "register_operand" "=w, w, w") 260 (unspec:SVE_D 261 [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl") 262 (match_operand:DI 1 "aarch64_reg_or_zero" "Z, rk, rk") 263 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w, w") 264 (match_operand:DI 3 "const_int_operand") 265 (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, Ui1, i") 266 (mem:BLK (scratch))] 267 UNSPEC_LD1_GATHER))] 268 "TARGET_SVE" 269 "@ 270 ld1d\t%0.d, %5/z, [%2.d] 271 ld1d\t%0.d, %5/z, [%1, %2.d] 272 ld1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]" 273) 274 275;; Unpredicated scatter store. 276(define_expand "scatter_store<mode>" 277 [(set (mem:BLK (scratch)) 278 (unspec:BLK 279 [(match_dup 5) 280 (match_operand:DI 0 "aarch64_reg_or_zero") 281 (match_operand:<V_INT_EQUIV> 1 "register_operand") 282 (match_operand:DI 2 "const_int_operand") 283 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>") 284 (match_operand:SVE_SD 4 "register_operand")] 285 UNSPEC_ST1_SCATTER))] 286 "TARGET_SVE" 287 { 288 operands[5] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); 289 } 290) 291 292;; Predicated scatter stores for 32-bit elements. Operand 2 is true for 293;; unsigned extension and false for signed extension. 294(define_insn "mask_scatter_store<mode>" 295 [(set (mem:BLK (scratch)) 296 (unspec:BLK 297 [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl, Upl, Upl") 298 (match_operand:DI 0 "aarch64_reg_or_zero" "Z, rk, rk, rk, rk") 299 (match_operand:<V_INT_EQUIV> 1 "register_operand" "w, w, w, w, w") 300 (match_operand:DI 2 "const_int_operand" "i, Z, Ui1, Z, Ui1") 301 (match_operand:DI 3 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, i, i") 302 (match_operand:SVE_S 4 "register_operand" "w, w, w, w, w")] 303 UNSPEC_ST1_SCATTER))] 304 "TARGET_SVE" 305 "@ 306 st1w\t%4.s, %5, [%1.s] 307 st1w\t%4.s, %5, [%0, %1.s, sxtw] 308 st1w\t%4.s, %5, [%0, %1.s, uxtw] 309 st1w\t%4.s, %5, [%0, %1.s, sxtw %p3] 310 st1w\t%4.s, %5, [%0, %1.s, uxtw %p3]" 311) 312 313;; Predicated scatter stores for 64-bit elements. The value of operand 2 314;; doesn't matter in this case. 315(define_insn "mask_scatter_store<mode>" 316 [(set (mem:BLK (scratch)) 317 (unspec:BLK 318 [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl") 319 (match_operand:DI 0 "aarch64_reg_or_zero" "Z, rk, rk") 320 (match_operand:<V_INT_EQUIV> 1 "register_operand" "w, w, w") 321 (match_operand:DI 2 "const_int_operand") 322 (match_operand:DI 3 "aarch64_gather_scale_operand_d" "Ui1, Ui1, i") 323 (match_operand:SVE_D 4 "register_operand" "w, w, w")] 324 UNSPEC_ST1_SCATTER))] 325 "TARGET_SVE" 326 "@ 327 st1d\t%4.d, %5, [%1.d] 328 st1d\t%4.d, %5, [%0, %1.d] 329 st1d\t%4.d, %5, [%0, %1.d, lsl %p3]" 330) 331 332;; SVE structure moves. 333(define_expand "mov<mode>" 334 [(set (match_operand:SVE_STRUCT 0 "nonimmediate_operand") 335 (match_operand:SVE_STRUCT 1 "general_operand"))] 336 "TARGET_SVE" 337 { 338 /* Big-endian loads and stores need to be done via LD1 and ST1; 339 see the comment at the head of the file for details. */ 340 if ((MEM_P (operands[0]) || MEM_P (operands[1])) 341 && BYTES_BIG_ENDIAN) 342 { 343 gcc_assert (can_create_pseudo_p ()); 344 aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode); 345 DONE; 346 } 347 348 if (CONSTANT_P (operands[1])) 349 { 350 aarch64_expand_mov_immediate (operands[0], operands[1]); 351 DONE; 352 } 353 } 354) 355 356;; Unpredicated structure moves (little-endian). 357(define_insn "*aarch64_sve_mov<mode>_le" 358 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w") 359 (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))] 360 "TARGET_SVE && !BYTES_BIG_ENDIAN" 361 "#" 362 [(set_attr "length" "<insn_length>")] 363) 364 365;; Unpredicated structure moves (big-endian). Memory accesses require 366;; secondary reloads. 367(define_insn "*aarch64_sve_mov<mode>_le" 368 [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w, w") 369 (match_operand:SVE_STRUCT 1 "aarch64_nonmemory_operand" "w, Dn"))] 370 "TARGET_SVE && BYTES_BIG_ENDIAN" 371 "#" 372 [(set_attr "length" "<insn_length>")] 373) 374 375;; Split unpredicated structure moves into pieces. This is the same 376;; for both big-endian and little-endian code, although it only needs 377;; to handle memory operands for little-endian code. 378(define_split 379 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand") 380 (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand"))] 381 "TARGET_SVE && reload_completed" 382 [(const_int 0)] 383 { 384 rtx dest = operands[0]; 385 rtx src = operands[1]; 386 if (REG_P (dest) && REG_P (src)) 387 aarch64_simd_emit_reg_reg_move (operands, <VSINGLE>mode, <vector_count>); 388 else 389 for (unsigned int i = 0; i < <vector_count>; ++i) 390 { 391 rtx subdest = simplify_gen_subreg (<VSINGLE>mode, dest, <MODE>mode, 392 i * BYTES_PER_SVE_VECTOR); 393 rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, src, <MODE>mode, 394 i * BYTES_PER_SVE_VECTOR); 395 emit_insn (gen_rtx_SET (subdest, subsrc)); 396 } 397 DONE; 398 } 399) 400 401;; Predicated structure moves. This works for both endiannesses but in 402;; practice is only useful for big-endian. 403(define_insn_and_split "pred_mov<mode>" 404 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_struct_nonimmediate_operand" "=w, Utx") 405 (unspec:SVE_STRUCT 406 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 407 (match_operand:SVE_STRUCT 2 "aarch64_sve_struct_nonimmediate_operand" "Utx, w")] 408 UNSPEC_MERGE_PTRUE))] 409 "TARGET_SVE 410 && (register_operand (operands[0], <MODE>mode) 411 || register_operand (operands[2], <MODE>mode))" 412 "#" 413 "&& reload_completed" 414 [(const_int 0)] 415 { 416 for (unsigned int i = 0; i < <vector_count>; ++i) 417 { 418 rtx subdest = simplify_gen_subreg (<VSINGLE>mode, operands[0], 419 <MODE>mode, 420 i * BYTES_PER_SVE_VECTOR); 421 rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, operands[2], 422 <MODE>mode, 423 i * BYTES_PER_SVE_VECTOR); 424 aarch64_emit_sve_pred_move (subdest, operands[1], subsrc); 425 } 426 DONE; 427 } 428 [(set_attr "length" "<insn_length>")] 429) 430 431(define_expand "mov<mode>" 432 [(set (match_operand:PRED_ALL 0 "nonimmediate_operand") 433 (match_operand:PRED_ALL 1 "general_operand"))] 434 "TARGET_SVE" 435 { 436 if (GET_CODE (operands[0]) == MEM) 437 operands[1] = force_reg (<MODE>mode, operands[1]); 438 } 439) 440 441(define_insn "*aarch64_sve_mov<mode>" 442 [(set (match_operand:PRED_ALL 0 "nonimmediate_operand" "=Upa, m, Upa, Upa, Upa") 443 (match_operand:PRED_ALL 1 "general_operand" "Upa, Upa, m, Dz, Dm"))] 444 "TARGET_SVE 445 && (register_operand (operands[0], <MODE>mode) 446 || register_operand (operands[1], <MODE>mode))" 447 "@ 448 mov\t%0.b, %1.b 449 str\t%1, %0 450 ldr\t%0, %1 451 pfalse\t%0.b 452 * return aarch64_output_ptrue (<MODE>mode, '<Vetype>');" 453) 454 455;; Handle extractions from a predicate by converting to an integer vector 456;; and extracting from there. 457(define_expand "vec_extract<vpred><Vel>" 458 [(match_operand:<VEL> 0 "register_operand") 459 (match_operand:<VPRED> 1 "register_operand") 460 (match_operand:SI 2 "nonmemory_operand") 461 ;; Dummy operand to which we can attach the iterator. 462 (reg:SVE_I V0_REGNUM)] 463 "TARGET_SVE" 464 { 465 rtx tmp = gen_reg_rtx (<MODE>mode); 466 emit_insn (gen_aarch64_sve_dup<mode>_const (tmp, operands[1], 467 CONST1_RTX (<MODE>mode), 468 CONST0_RTX (<MODE>mode))); 469 emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2])); 470 DONE; 471 } 472) 473 474(define_expand "vec_extract<mode><Vel>" 475 [(set (match_operand:<VEL> 0 "register_operand") 476 (vec_select:<VEL> 477 (match_operand:SVE_ALL 1 "register_operand") 478 (parallel [(match_operand:SI 2 "nonmemory_operand")])))] 479 "TARGET_SVE" 480 { 481 poly_int64 val; 482 if (poly_int_rtx_p (operands[2], &val) 483 && known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1)) 484 { 485 /* The last element can be extracted with a LASTB and a false 486 predicate. */ 487 rtx sel = force_reg (<VPRED>mode, CONST0_RTX (<VPRED>mode)); 488 emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1])); 489 DONE; 490 } 491 if (!CONST_INT_P (operands[2])) 492 { 493 /* Create an index with operand[2] as the base and -1 as the step. 494 It will then be zero for the element we care about. */ 495 rtx index = gen_lowpart (<VEL_INT>mode, operands[2]); 496 index = force_reg (<VEL_INT>mode, index); 497 rtx series = gen_reg_rtx (<V_INT_EQUIV>mode); 498 emit_insn (gen_vec_series<v_int_equiv> (series, index, constm1_rtx)); 499 500 /* Get a predicate that is true for only that element. */ 501 rtx zero = CONST0_RTX (<V_INT_EQUIV>mode); 502 rtx cmp = gen_rtx_EQ (<V_INT_EQUIV>mode, series, zero); 503 rtx sel = gen_reg_rtx (<VPRED>mode); 504 emit_insn (gen_vec_cmp<v_int_equiv><vpred> (sel, cmp, series, zero)); 505 506 /* Select the element using LASTB. */ 507 emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1])); 508 DONE; 509 } 510 } 511) 512 513;; Extract element zero. This is a special case because we want to force 514;; the registers to be the same for the second alternative, and then 515;; split the instruction into nothing after RA. 516(define_insn_and_split "*vec_extract<mode><Vel>_0" 517 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv") 518 (vec_select:<VEL> 519 (match_operand:SVE_ALL 1 "register_operand" "w, 0, w") 520 (parallel [(const_int 0)])))] 521 "TARGET_SVE" 522 { 523 operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1])); 524 switch (which_alternative) 525 { 526 case 0: 527 return "umov\\t%<vwcore>0, %1.<Vetype>[0]"; 528 case 1: 529 return "#"; 530 case 2: 531 return "st1\\t{%1.<Vetype>}[0], %0"; 532 default: 533 gcc_unreachable (); 534 } 535 } 536 "&& reload_completed 537 && REG_P (operands[0]) 538 && REGNO (operands[0]) == REGNO (operands[1])" 539 [(const_int 0)] 540 { 541 emit_note (NOTE_INSN_DELETED); 542 DONE; 543 } 544 [(set_attr "type" "neon_to_gp_q, untyped, neon_store1_one_lane_q")] 545) 546 547;; Extract an element from the Advanced SIMD portion of the register. 548;; We don't just reuse the aarch64-simd.md pattern because we don't 549;; want any change in lane number on big-endian targets. 550(define_insn "*vec_extract<mode><Vel>_v128" 551 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv") 552 (vec_select:<VEL> 553 (match_operand:SVE_ALL 1 "register_operand" "w, w, w") 554 (parallel [(match_operand:SI 2 "const_int_operand")])))] 555 "TARGET_SVE 556 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 1, 15)" 557 { 558 operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1])); 559 switch (which_alternative) 560 { 561 case 0: 562 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]"; 563 case 1: 564 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]"; 565 case 2: 566 return "st1\\t{%1.<Vetype>}[%2], %0"; 567 default: 568 gcc_unreachable (); 569 } 570 } 571 [(set_attr "type" "neon_to_gp_q, neon_dup_q, neon_store1_one_lane_q")] 572) 573 574;; Extract an element in the range of DUP. This pattern allows the 575;; source and destination to be different. 576(define_insn "*vec_extract<mode><Vel>_dup" 577 [(set (match_operand:<VEL> 0 "register_operand" "=w") 578 (vec_select:<VEL> 579 (match_operand:SVE_ALL 1 "register_operand" "w") 580 (parallel [(match_operand:SI 2 "const_int_operand")])))] 581 "TARGET_SVE 582 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 16, 63)" 583 { 584 operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0])); 585 return "dup\t%0.<Vetype>, %1.<Vetype>[%2]"; 586 } 587) 588 589;; Extract an element outside the range of DUP. This pattern requires the 590;; source and destination to be the same. 591(define_insn "*vec_extract<mode><Vel>_ext" 592 [(set (match_operand:<VEL> 0 "register_operand" "=w") 593 (vec_select:<VEL> 594 (match_operand:SVE_ALL 1 "register_operand" "0") 595 (parallel [(match_operand:SI 2 "const_int_operand")])))] 596 "TARGET_SVE && INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode) >= 64" 597 { 598 operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0])); 599 operands[2] = GEN_INT (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode)); 600 return "ext\t%0.b, %0.b, %0.b, #%2"; 601 } 602) 603 604;; Extract the last active element of operand 1 into operand 0. 605;; If no elements are active, extract the last inactive element instead. 606(define_insn "extract_last_<mode>" 607 [(set (match_operand:<VEL> 0 "register_operand" "=r, w") 608 (unspec:<VEL> 609 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 610 (match_operand:SVE_ALL 2 "register_operand" "w, w")] 611 UNSPEC_LASTB))] 612 "TARGET_SVE" 613 "@ 614 lastb\t%<vwcore>0, %1, %2.<Vetype> 615 lastb\t%<Vetype>0, %1, %2.<Vetype>" 616) 617 618(define_expand "vec_duplicate<mode>" 619 [(parallel 620 [(set (match_operand:SVE_ALL 0 "register_operand") 621 (vec_duplicate:SVE_ALL 622 (match_operand:<VEL> 1 "aarch64_sve_dup_operand"))) 623 (clobber (scratch:<VPRED>))])] 624 "TARGET_SVE" 625 { 626 if (MEM_P (operands[1])) 627 { 628 rtx ptrue = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); 629 emit_insn (gen_sve_ld1r<mode> (operands[0], ptrue, operands[1], 630 CONST0_RTX (<MODE>mode))); 631 DONE; 632 } 633 } 634) 635 636;; Accept memory operands for the benefit of combine, and also in case 637;; the scalar input gets spilled to memory during RA. We want to split 638;; the load at the first opportunity in order to allow the PTRUE to be 639;; optimized with surrounding code. 640(define_insn_and_split "*vec_duplicate<mode>_reg" 641 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w") 642 (vec_duplicate:SVE_ALL 643 (match_operand:<VEL> 1 "aarch64_sve_dup_operand" "r, w, Uty"))) 644 (clobber (match_scratch:<VPRED> 2 "=X, X, Upl"))] 645 "TARGET_SVE" 646 "@ 647 mov\t%0.<Vetype>, %<vwcore>1 648 mov\t%0.<Vetype>, %<Vetype>1 649 #" 650 "&& MEM_P (operands[1])" 651 [(const_int 0)] 652 { 653 if (GET_CODE (operands[2]) == SCRATCH) 654 operands[2] = gen_reg_rtx (<VPRED>mode); 655 emit_move_insn (operands[2], CONSTM1_RTX (<VPRED>mode)); 656 emit_insn (gen_sve_ld1r<mode> (operands[0], operands[2], operands[1], 657 CONST0_RTX (<MODE>mode))); 658 DONE; 659 } 660 [(set_attr "length" "4,4,8")] 661) 662 663;; This is used for vec_duplicate<mode>s from memory, but can also 664;; be used by combine to optimize selects of a a vec_duplicate<mode> 665;; with zero. 666(define_insn "sve_ld1r<mode>" 667 [(set (match_operand:SVE_ALL 0 "register_operand" "=w") 668 (unspec:SVE_ALL 669 [(match_operand:<VPRED> 1 "register_operand" "Upl") 670 (vec_duplicate:SVE_ALL 671 (match_operand:<VEL> 2 "aarch64_sve_ld1r_operand" "Uty")) 672 (match_operand:SVE_ALL 3 "aarch64_simd_imm_zero")] 673 UNSPEC_SEL))] 674 "TARGET_SVE" 675 "ld1r<Vesize>\t%0.<Vetype>, %1/z, %2" 676) 677 678;; Load 128 bits from memory and duplicate to fill a vector. Since there 679;; are so few operations on 128-bit "elements", we don't define a VNx1TI 680;; and simply use vectors of bytes instead. 681(define_insn "*sve_ld1rq<Vesize>" 682 [(set (match_operand:SVE_ALL 0 "register_operand" "=w") 683 (unspec:SVE_ALL 684 [(match_operand:<VPRED> 1 "register_operand" "Upl") 685 (match_operand:TI 2 "aarch64_sve_ld1r_operand" "Uty")] 686 UNSPEC_LD1RQ))] 687 "TARGET_SVE" 688 "ld1rq<Vesize>\t%0.<Vetype>, %1/z, %2" 689) 690 691;; Implement a predicate broadcast by shifting the low bit of the scalar 692;; input into the top bit and using a WHILELO. An alternative would be to 693;; duplicate the input and do a compare with zero. 694(define_expand "vec_duplicate<mode>" 695 [(set (match_operand:PRED_ALL 0 "register_operand") 696 (vec_duplicate:PRED_ALL (match_operand 1 "register_operand")))] 697 "TARGET_SVE" 698 { 699 rtx tmp = gen_reg_rtx (DImode); 700 rtx op1 = gen_lowpart (DImode, operands[1]); 701 emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode))); 702 emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp)); 703 DONE; 704 } 705) 706 707(define_insn "vec_series<mode>" 708 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w") 709 (vec_series:SVE_I 710 (match_operand:<VEL> 1 "aarch64_sve_index_operand" "Usi, r, r") 711 (match_operand:<VEL> 2 "aarch64_sve_index_operand" "r, Usi, r")))] 712 "TARGET_SVE" 713 "@ 714 index\t%0.<Vetype>, #%1, %<vw>2 715 index\t%0.<Vetype>, %<vw>1, #%2 716 index\t%0.<Vetype>, %<vw>1, %<vw>2" 717) 718 719;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range 720;; of an INDEX instruction. 721(define_insn "*vec_series<mode>_plus" 722 [(set (match_operand:SVE_I 0 "register_operand" "=w") 723 (plus:SVE_I 724 (vec_duplicate:SVE_I 725 (match_operand:<VEL> 1 "register_operand" "r")) 726 (match_operand:SVE_I 2 "immediate_operand")))] 727 "TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])" 728 { 729 operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]); 730 return "index\t%0.<Vetype>, %<vw>1, #%2"; 731 } 732) 733 734;; Unpredicated LD[234]. 735(define_expand "vec_load_lanes<mode><vsingle>" 736 [(set (match_operand:SVE_STRUCT 0 "register_operand") 737 (unspec:SVE_STRUCT 738 [(match_dup 2) 739 (match_operand:SVE_STRUCT 1 "memory_operand")] 740 UNSPEC_LDN))] 741 "TARGET_SVE" 742 { 743 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); 744 } 745) 746 747;; Predicated LD[234]. 748(define_insn "vec_mask_load_lanes<mode><vsingle>" 749 [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w") 750 (unspec:SVE_STRUCT 751 [(match_operand:<VPRED> 2 "register_operand" "Upl") 752 (match_operand:SVE_STRUCT 1 "memory_operand" "m")] 753 UNSPEC_LDN))] 754 "TARGET_SVE" 755 "ld<vector_count><Vesize>\t%0, %2/z, %1" 756) 757 758;; Unpredicated ST[234]. This is always a full update, so the dependence 759;; on the old value of the memory location (via (match_dup 0)) is redundant. 760;; There doesn't seem to be any obvious benefit to treating the all-true 761;; case differently though. In particular, it's very unlikely that we'll 762;; only find out during RTL that a store_lanes is dead. 763(define_expand "vec_store_lanes<mode><vsingle>" 764 [(set (match_operand:SVE_STRUCT 0 "memory_operand") 765 (unspec:SVE_STRUCT 766 [(match_dup 2) 767 (match_operand:SVE_STRUCT 1 "register_operand") 768 (match_dup 0)] 769 UNSPEC_STN))] 770 "TARGET_SVE" 771 { 772 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); 773 } 774) 775 776;; Predicated ST[234]. 777(define_insn "vec_mask_store_lanes<mode><vsingle>" 778 [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m") 779 (unspec:SVE_STRUCT 780 [(match_operand:<VPRED> 2 "register_operand" "Upl") 781 (match_operand:SVE_STRUCT 1 "register_operand" "w") 782 (match_dup 0)] 783 UNSPEC_STN))] 784 "TARGET_SVE" 785 "st<vector_count><Vesize>\t%1, %2, %0" 786) 787 788(define_expand "vec_perm<mode>" 789 [(match_operand:SVE_ALL 0 "register_operand") 790 (match_operand:SVE_ALL 1 "register_operand") 791 (match_operand:SVE_ALL 2 "register_operand") 792 (match_operand:<V_INT_EQUIV> 3 "aarch64_sve_vec_perm_operand")] 793 "TARGET_SVE && GET_MODE_NUNITS (<MODE>mode).is_constant ()" 794 { 795 aarch64_expand_sve_vec_perm (operands[0], operands[1], 796 operands[2], operands[3]); 797 DONE; 798 } 799) 800 801(define_insn "*aarch64_sve_tbl<mode>" 802 [(set (match_operand:SVE_ALL 0 "register_operand" "=w") 803 (unspec:SVE_ALL 804 [(match_operand:SVE_ALL 1 "register_operand" "w") 805 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")] 806 UNSPEC_TBL))] 807 "TARGET_SVE" 808 "tbl\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" 809) 810 811(define_insn "*aarch64_sve_<perm_insn><perm_hilo><mode>" 812 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") 813 (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa") 814 (match_operand:PRED_ALL 2 "register_operand" "Upa")] 815 PERMUTE))] 816 "TARGET_SVE" 817 "<perm_insn><perm_hilo>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" 818) 819 820(define_insn "aarch64_sve_<perm_insn><perm_hilo><mode>" 821 [(set (match_operand:SVE_ALL 0 "register_operand" "=w") 822 (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w") 823 (match_operand:SVE_ALL 2 "register_operand" "w")] 824 PERMUTE))] 825 "TARGET_SVE" 826 "<perm_insn><perm_hilo>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" 827) 828 829(define_insn "*aarch64_sve_rev64<mode>" 830 [(set (match_operand:SVE_BHS 0 "register_operand" "=w") 831 (unspec:SVE_BHS 832 [(match_operand:VNx2BI 1 "register_operand" "Upl") 833 (unspec:SVE_BHS [(match_operand:SVE_BHS 2 "register_operand" "w")] 834 UNSPEC_REV64)] 835 UNSPEC_MERGE_PTRUE))] 836 "TARGET_SVE" 837 "rev<Vesize>\t%0.d, %1/m, %2.d" 838) 839 840(define_insn "*aarch64_sve_rev32<mode>" 841 [(set (match_operand:SVE_BH 0 "register_operand" "=w") 842 (unspec:SVE_BH 843 [(match_operand:VNx4BI 1 "register_operand" "Upl") 844 (unspec:SVE_BH [(match_operand:SVE_BH 2 "register_operand" "w")] 845 UNSPEC_REV32)] 846 UNSPEC_MERGE_PTRUE))] 847 "TARGET_SVE" 848 "rev<Vesize>\t%0.s, %1/m, %2.s" 849) 850 851(define_insn "*aarch64_sve_rev16vnx16qi" 852 [(set (match_operand:VNx16QI 0 "register_operand" "=w") 853 (unspec:VNx16QI 854 [(match_operand:VNx8BI 1 "register_operand" "Upl") 855 (unspec:VNx16QI [(match_operand:VNx16QI 2 "register_operand" "w")] 856 UNSPEC_REV16)] 857 UNSPEC_MERGE_PTRUE))] 858 "TARGET_SVE" 859 "revb\t%0.h, %1/m, %2.h" 860) 861 862(define_insn "*aarch64_sve_rev<mode>" 863 [(set (match_operand:SVE_ALL 0 "register_operand" "=w") 864 (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w")] 865 UNSPEC_REV))] 866 "TARGET_SVE" 867 "rev\t%0.<Vetype>, %1.<Vetype>") 868 869(define_insn "*aarch64_sve_dup_lane<mode>" 870 [(set (match_operand:SVE_ALL 0 "register_operand" "=w") 871 (vec_duplicate:SVE_ALL 872 (vec_select:<VEL> 873 (match_operand:SVE_ALL 1 "register_operand" "w") 874 (parallel [(match_operand:SI 2 "const_int_operand")]))))] 875 "TARGET_SVE 876 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 0, 63)" 877 "dup\t%0.<Vetype>, %1.<Vetype>[%2]" 878) 879 880;; Note that the immediate (third) operand is the lane index not 881;; the byte index. 882(define_insn "*aarch64_sve_ext<mode>" 883 [(set (match_operand:SVE_ALL 0 "register_operand" "=w") 884 (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "0") 885 (match_operand:SVE_ALL 2 "register_operand" "w") 886 (match_operand:SI 3 "const_int_operand")] 887 UNSPEC_EXT))] 888 "TARGET_SVE 889 && IN_RANGE (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode), 0, 255)" 890 { 891 operands[3] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode)); 892 return "ext\\t%0.b, %0.b, %2.b, #%3"; 893 } 894) 895 896(define_insn "add<mode>3" 897 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, w") 898 (plus:SVE_I 899 (match_operand:SVE_I 1 "register_operand" "%0, 0, 0, w") 900 (match_operand:SVE_I 2 "aarch64_sve_add_operand" "vsa, vsn, vsi, w")))] 901 "TARGET_SVE" 902 "@ 903 add\t%0.<Vetype>, %0.<Vetype>, #%D2 904 sub\t%0.<Vetype>, %0.<Vetype>, #%N2 905 * return aarch64_output_sve_inc_dec_immediate (\"%0.<Vetype>\", operands[2]); 906 add\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" 907) 908 909(define_insn "sub<mode>3" 910 [(set (match_operand:SVE_I 0 "register_operand" "=w, w") 911 (minus:SVE_I 912 (match_operand:SVE_I 1 "aarch64_sve_arith_operand" "w, vsa") 913 (match_operand:SVE_I 2 "register_operand" "w, 0")))] 914 "TARGET_SVE" 915 "@ 916 sub\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype> 917 subr\t%0.<Vetype>, %0.<Vetype>, #%D1" 918) 919 920;; Unpredicated multiplication. 921(define_expand "mul<mode>3" 922 [(set (match_operand:SVE_I 0 "register_operand") 923 (unspec:SVE_I 924 [(match_dup 3) 925 (mult:SVE_I 926 (match_operand:SVE_I 1 "register_operand") 927 (match_operand:SVE_I 2 "aarch64_sve_mul_operand"))] 928 UNSPEC_MERGE_PTRUE))] 929 "TARGET_SVE" 930 { 931 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); 932 } 933) 934 935;; Multiplication predicated with a PTRUE. We don't actually need the 936;; predicate for the first alternative, but using Upa or X isn't likely 937;; to gain much and would make the instruction seem less uniform to the 938;; register allocator. 939(define_insn "*mul<mode>3" 940 [(set (match_operand:SVE_I 0 "register_operand" "=w, w") 941 (unspec:SVE_I 942 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 943 (mult:SVE_I 944 (match_operand:SVE_I 2 "register_operand" "%0, 0") 945 (match_operand:SVE_I 3 "aarch64_sve_mul_operand" "vsm, w"))] 946 UNSPEC_MERGE_PTRUE))] 947 "TARGET_SVE" 948 "@ 949 mul\t%0.<Vetype>, %0.<Vetype>, #%3 950 mul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 951) 952 953(define_insn "*madd<mode>" 954 [(set (match_operand:SVE_I 0 "register_operand" "=w, w") 955 (plus:SVE_I 956 (unspec:SVE_I 957 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 958 (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w") 959 (match_operand:SVE_I 3 "register_operand" "w, w"))] 960 UNSPEC_MERGE_PTRUE) 961 (match_operand:SVE_I 4 "register_operand" "w, 0")))] 962 "TARGET_SVE" 963 "@ 964 mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> 965 mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" 966) 967 968(define_insn "*msub<mode>3" 969 [(set (match_operand:SVE_I 0 "register_operand" "=w, w") 970 (minus:SVE_I 971 (match_operand:SVE_I 4 "register_operand" "w, 0") 972 (unspec:SVE_I 973 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 974 (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w") 975 (match_operand:SVE_I 3 "register_operand" "w, w"))] 976 UNSPEC_MERGE_PTRUE)))] 977 "TARGET_SVE" 978 "@ 979 msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> 980 mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" 981) 982 983;; Unpredicated highpart multiplication. 984(define_expand "<su>mul<mode>3_highpart" 985 [(set (match_operand:SVE_I 0 "register_operand") 986 (unspec:SVE_I 987 [(match_dup 3) 988 (unspec:SVE_I [(match_operand:SVE_I 1 "register_operand") 989 (match_operand:SVE_I 2 "register_operand")] 990 MUL_HIGHPART)] 991 UNSPEC_MERGE_PTRUE))] 992 "TARGET_SVE" 993 { 994 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); 995 } 996) 997 998;; Predicated highpart multiplication. 999(define_insn "*<su>mul<mode>3_highpart" 1000 [(set (match_operand:SVE_I 0 "register_operand" "=w") 1001 (unspec:SVE_I 1002 [(match_operand:<VPRED> 1 "register_operand" "Upl") 1003 (unspec:SVE_I [(match_operand:SVE_I 2 "register_operand" "%0") 1004 (match_operand:SVE_I 3 "register_operand" "w")] 1005 MUL_HIGHPART)] 1006 UNSPEC_MERGE_PTRUE))] 1007 "TARGET_SVE" 1008 "<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 1009) 1010 1011;; Unpredicated NEG, NOT and POPCOUNT. 1012(define_expand "<optab><mode>2" 1013 [(set (match_operand:SVE_I 0 "register_operand") 1014 (unspec:SVE_I 1015 [(match_dup 2) 1016 (SVE_INT_UNARY:SVE_I (match_operand:SVE_I 1 "register_operand"))] 1017 UNSPEC_MERGE_PTRUE))] 1018 "TARGET_SVE" 1019 { 1020 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); 1021 } 1022) 1023 1024;; NEG, NOT and POPCOUNT predicated with a PTRUE. 1025(define_insn "*<optab><mode>2" 1026 [(set (match_operand:SVE_I 0 "register_operand" "=w") 1027 (unspec:SVE_I 1028 [(match_operand:<VPRED> 1 "register_operand" "Upl") 1029 (SVE_INT_UNARY:SVE_I 1030 (match_operand:SVE_I 2 "register_operand" "w"))] 1031 UNSPEC_MERGE_PTRUE))] 1032 "TARGET_SVE" 1033 "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" 1034) 1035 1036;; Vector AND, ORR and XOR. 1037(define_insn "<optab><mode>3" 1038 [(set (match_operand:SVE_I 0 "register_operand" "=w, w") 1039 (LOGICAL:SVE_I 1040 (match_operand:SVE_I 1 "register_operand" "%0, w") 1041 (match_operand:SVE_I 2 "aarch64_sve_logical_operand" "vsl, w")))] 1042 "TARGET_SVE" 1043 "@ 1044 <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2 1045 <logical>\t%0.d, %1.d, %2.d" 1046) 1047 1048;; Vector AND, ORR and XOR on floating-point modes. We avoid subregs 1049;; by providing this, but we need to use UNSPECs since rtx logical ops 1050;; aren't defined for floating-point modes. 1051(define_insn "*<optab><mode>3" 1052 [(set (match_operand:SVE_F 0 "register_operand" "=w") 1053 (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand" "w") 1054 (match_operand:SVE_F 2 "register_operand" "w")] 1055 LOGICALF))] 1056 "TARGET_SVE" 1057 "<logicalf_op>\t%0.d, %1.d, %2.d" 1058) 1059 1060;; REG_EQUAL notes on "not<mode>3" should ensure that we can generate 1061;; this pattern even though the NOT instruction itself is predicated. 1062(define_insn "bic<mode>3" 1063 [(set (match_operand:SVE_I 0 "register_operand" "=w") 1064 (and:SVE_I 1065 (not:SVE_I (match_operand:SVE_I 1 "register_operand" "w")) 1066 (match_operand:SVE_I 2 "register_operand" "w")))] 1067 "TARGET_SVE" 1068 "bic\t%0.d, %2.d, %1.d" 1069) 1070 1071;; Predicate AND. We can reuse one of the inputs as the GP. 1072(define_insn "and<mode>3" 1073 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") 1074 (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand" "Upa") 1075 (match_operand:PRED_ALL 2 "register_operand" "Upa")))] 1076 "TARGET_SVE" 1077 "and\t%0.b, %1/z, %1.b, %2.b" 1078) 1079 1080;; Unpredicated predicate ORR and XOR. 1081(define_expand "<optab><mode>3" 1082 [(set (match_operand:PRED_ALL 0 "register_operand") 1083 (and:PRED_ALL 1084 (LOGICAL_OR:PRED_ALL 1085 (match_operand:PRED_ALL 1 "register_operand") 1086 (match_operand:PRED_ALL 2 "register_operand")) 1087 (match_dup 3)))] 1088 "TARGET_SVE" 1089 { 1090 operands[3] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode)); 1091 } 1092) 1093 1094;; Predicated predicate ORR and XOR. 1095(define_insn "pred_<optab><mode>3" 1096 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") 1097 (and:PRED_ALL 1098 (LOGICAL:PRED_ALL 1099 (match_operand:PRED_ALL 2 "register_operand" "Upa") 1100 (match_operand:PRED_ALL 3 "register_operand" "Upa")) 1101 (match_operand:PRED_ALL 1 "register_operand" "Upa")))] 1102 "TARGET_SVE" 1103 "<logical>\t%0.b, %1/z, %2.b, %3.b" 1104) 1105 1106;; Perform a logical operation on operands 2 and 3, using operand 1 as 1107;; the GP (which is known to be a PTRUE). Store the result in operand 0 1108;; and set the flags in the same way as for PTEST. The (and ...) in the 1109;; UNSPEC_PTEST_PTRUE is logically redundant, but means that the tested 1110;; value is structurally equivalent to rhs of the second set. 1111(define_insn "*<optab><mode>3_cc" 1112 [(set (reg:CC CC_REGNUM) 1113 (compare:CC 1114 (unspec:SI [(match_operand:PRED_ALL 1 "register_operand" "Upa") 1115 (and:PRED_ALL 1116 (LOGICAL:PRED_ALL 1117 (match_operand:PRED_ALL 2 "register_operand" "Upa") 1118 (match_operand:PRED_ALL 3 "register_operand" "Upa")) 1119 (match_dup 1))] 1120 UNSPEC_PTEST_PTRUE) 1121 (const_int 0))) 1122 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") 1123 (and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3)) 1124 (match_dup 1)))] 1125 "TARGET_SVE" 1126 "<logical>s\t%0.b, %1/z, %2.b, %3.b" 1127) 1128 1129;; Unpredicated predicate inverse. 1130(define_expand "one_cmpl<mode>2" 1131 [(set (match_operand:PRED_ALL 0 "register_operand") 1132 (and:PRED_ALL 1133 (not:PRED_ALL (match_operand:PRED_ALL 1 "register_operand")) 1134 (match_dup 2)))] 1135 "TARGET_SVE" 1136 { 1137 operands[2] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode)); 1138 } 1139) 1140 1141;; Predicated predicate inverse. 1142(define_insn "*one_cmpl<mode>3" 1143 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") 1144 (and:PRED_ALL 1145 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa")) 1146 (match_operand:PRED_ALL 1 "register_operand" "Upa")))] 1147 "TARGET_SVE" 1148 "not\t%0.b, %1/z, %2.b" 1149) 1150 1151;; Predicated predicate BIC and ORN. 1152(define_insn "*<nlogical><mode>3" 1153 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") 1154 (and:PRED_ALL 1155 (NLOGICAL:PRED_ALL 1156 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa")) 1157 (match_operand:PRED_ALL 3 "register_operand" "Upa")) 1158 (match_operand:PRED_ALL 1 "register_operand" "Upa")))] 1159 "TARGET_SVE" 1160 "<nlogical>\t%0.b, %1/z, %3.b, %2.b" 1161) 1162 1163;; Predicated predicate NAND and NOR. 1164(define_insn "*<logical_nn><mode>3" 1165 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") 1166 (and:PRED_ALL 1167 (NLOGICAL:PRED_ALL 1168 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa")) 1169 (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa"))) 1170 (match_operand:PRED_ALL 1 "register_operand" "Upa")))] 1171 "TARGET_SVE" 1172 "<logical_nn>\t%0.b, %1/z, %2.b, %3.b" 1173) 1174 1175;; Unpredicated LSL, LSR and ASR by a vector. 1176(define_expand "v<optab><mode>3" 1177 [(set (match_operand:SVE_I 0 "register_operand") 1178 (unspec:SVE_I 1179 [(match_dup 3) 1180 (ASHIFT:SVE_I 1181 (match_operand:SVE_I 1 "register_operand") 1182 (match_operand:SVE_I 2 "aarch64_sve_<lr>shift_operand"))] 1183 UNSPEC_MERGE_PTRUE))] 1184 "TARGET_SVE" 1185 { 1186 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); 1187 } 1188) 1189 1190;; LSL, LSR and ASR by a vector, predicated with a PTRUE. We don't 1191;; actually need the predicate for the first alternative, but using Upa 1192;; or X isn't likely to gain much and would make the instruction seem 1193;; less uniform to the register allocator. 1194(define_insn "*v<optab><mode>3" 1195 [(set (match_operand:SVE_I 0 "register_operand" "=w, w") 1196 (unspec:SVE_I 1197 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 1198 (ASHIFT:SVE_I 1199 (match_operand:SVE_I 2 "register_operand" "w, 0") 1200 (match_operand:SVE_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w"))] 1201 UNSPEC_MERGE_PTRUE))] 1202 "TARGET_SVE" 1203 "@ 1204 <shift>\t%0.<Vetype>, %2.<Vetype>, #%3 1205 <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 1206) 1207 1208;; LSL, LSR and ASR by a scalar, which expands into one of the vector 1209;; shifts above. 1210(define_expand "<ASHIFT:optab><mode>3" 1211 [(set (match_operand:SVE_I 0 "register_operand") 1212 (ASHIFT:SVE_I (match_operand:SVE_I 1 "register_operand") 1213 (match_operand:<VEL> 2 "general_operand")))] 1214 "TARGET_SVE" 1215 { 1216 rtx amount; 1217 if (CONST_INT_P (operands[2])) 1218 { 1219 amount = gen_const_vec_duplicate (<MODE>mode, operands[2]); 1220 if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode)) 1221 amount = force_reg (<MODE>mode, amount); 1222 } 1223 else 1224 { 1225 amount = gen_reg_rtx (<MODE>mode); 1226 emit_insn (gen_vec_duplicate<mode> (amount, 1227 convert_to_mode (<VEL>mode, 1228 operands[2], 0))); 1229 } 1230 emit_insn (gen_v<optab><mode>3 (operands[0], operands[1], amount)); 1231 DONE; 1232 } 1233) 1234 1235;; Test all bits of operand 1. Operand 0 is a GP that is known to hold PTRUE. 1236;; 1237;; Using UNSPEC_PTEST_PTRUE allows combine patterns to assume that the GP 1238;; is a PTRUE even if the optimizers haven't yet been able to propagate 1239;; the constant. We would use a separate unspec code for PTESTs involving 1240;; GPs that might not be PTRUEs. 1241(define_insn "ptest_ptrue<mode>" 1242 [(set (reg:CC CC_REGNUM) 1243 (compare:CC 1244 (unspec:SI [(match_operand:PRED_ALL 0 "register_operand" "Upa") 1245 (match_operand:PRED_ALL 1 "register_operand" "Upa")] 1246 UNSPEC_PTEST_PTRUE) 1247 (const_int 0)))] 1248 "TARGET_SVE" 1249 "ptest\t%0, %1.b" 1250) 1251 1252;; Set element I of the result if operand1 + J < operand2 for all J in [0, I]. 1253;; with the comparison being unsigned. 1254(define_insn "while_ult<GPI:mode><PRED_ALL:mode>" 1255 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") 1256 (unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ") 1257 (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")] 1258 UNSPEC_WHILE_LO)) 1259 (clobber (reg:CC CC_REGNUM))] 1260 "TARGET_SVE" 1261 "whilelo\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2" 1262) 1263 1264;; WHILELO sets the flags in the same way as a PTEST with a PTRUE GP. 1265;; Handle the case in which both results are useful. The GP operand 1266;; to the PTEST isn't needed, so we allow it to be anything. 1267(define_insn_and_split "while_ult<GPI:mode><PRED_ALL:mode>_cc" 1268 [(set (reg:CC CC_REGNUM) 1269 (compare:CC 1270 (unspec:SI [(match_operand:PRED_ALL 1) 1271 (unspec:PRED_ALL 1272 [(match_operand:GPI 2 "aarch64_reg_or_zero" "rZ") 1273 (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")] 1274 UNSPEC_WHILE_LO)] 1275 UNSPEC_PTEST_PTRUE) 1276 (const_int 0))) 1277 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") 1278 (unspec:PRED_ALL [(match_dup 2) 1279 (match_dup 3)] 1280 UNSPEC_WHILE_LO))] 1281 "TARGET_SVE" 1282 "whilelo\t%0.<PRED_ALL:Vetype>, %<w>2, %<w>3" 1283 ;; Force the compiler to drop the unused predicate operand, so that we 1284 ;; don't have an unnecessary PTRUE. 1285 "&& !CONSTANT_P (operands[1])" 1286 [(const_int 0)] 1287 { 1288 emit_insn (gen_while_ult<GPI:mode><PRED_ALL:mode>_cc 1289 (operands[0], CONSTM1_RTX (<MODE>mode), 1290 operands[2], operands[3])); 1291 DONE; 1292 } 1293) 1294 1295;; Predicated integer comparison. 1296(define_insn "*vec_cmp<cmp_op>_<mode>" 1297 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") 1298 (unspec:<VPRED> 1299 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 1300 (match_operand:SVE_I 2 "register_operand" "w, w") 1301 (match_operand:SVE_I 3 "aarch64_sve_cmp_<imm_con>_operand" "<imm_con>, w")] 1302 SVE_COND_INT_CMP)) 1303 (clobber (reg:CC CC_REGNUM))] 1304 "TARGET_SVE" 1305 "@ 1306 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3 1307 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" 1308) 1309 1310;; Predicated integer comparison in which only the flags result is interesting. 1311(define_insn "*vec_cmp<cmp_op>_<mode>_ptest" 1312 [(set (reg:CC CC_REGNUM) 1313 (compare:CC 1314 (unspec:SI 1315 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 1316 (unspec:<VPRED> 1317 [(match_dup 1) 1318 (match_operand:SVE_I 2 "register_operand" "w, w") 1319 (match_operand:SVE_I 3 "aarch64_sve_cmp_<imm_con>_operand" "<imm_con>, w")] 1320 SVE_COND_INT_CMP)] 1321 UNSPEC_PTEST_PTRUE) 1322 (const_int 0))) 1323 (clobber (match_scratch:<VPRED> 0 "=Upa, Upa"))] 1324 "TARGET_SVE" 1325 "@ 1326 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3 1327 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" 1328) 1329 1330;; Predicated comparison in which both the flag and predicate results 1331;; are interesting. 1332(define_insn "*vec_cmp<cmp_op>_<mode>_cc" 1333 [(set (reg:CC CC_REGNUM) 1334 (compare:CC 1335 (unspec:SI 1336 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 1337 (unspec:<VPRED> 1338 [(match_dup 1) 1339 (match_operand:SVE_I 2 "register_operand" "w, w") 1340 (match_operand:SVE_I 3 "aarch64_sve_cmp_<imm_con>_operand" "<imm_con>, w")] 1341 SVE_COND_INT_CMP)] 1342 UNSPEC_PTEST_PTRUE) 1343 (const_int 0))) 1344 (set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") 1345 (unspec:<VPRED> 1346 [(match_dup 1) 1347 (match_dup 2) 1348 (match_dup 3)] 1349 SVE_COND_INT_CMP))] 1350 "TARGET_SVE" 1351 "@ 1352 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3 1353 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" 1354) 1355 1356;; Predicated floating-point comparison (excluding FCMUO, which doesn't 1357;; allow #0.0 as an operand). 1358(define_insn "*vec_fcm<cmp_op><mode>" 1359 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") 1360 (unspec:<VPRED> 1361 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 1362 (match_operand:SVE_F 2 "register_operand" "w, w") 1363 (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")] 1364 SVE_COND_FP_CMP))] 1365 "TARGET_SVE" 1366 "@ 1367 fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0 1368 fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" 1369) 1370 1371;; Predicated FCMUO. 1372(define_insn "*vec_fcmuo<mode>" 1373 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") 1374 (unspec:<VPRED> 1375 [(match_operand:<VPRED> 1 "register_operand" "Upl") 1376 (match_operand:SVE_F 2 "register_operand" "w") 1377 (match_operand:SVE_F 3 "register_operand" "w")] 1378 UNSPEC_COND_UO))] 1379 "TARGET_SVE" 1380 "fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" 1381) 1382 1383;; vcond_mask operand order: true, false, mask 1384;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR) 1385;; SEL operand order: mask, true, false 1386(define_insn "vcond_mask_<mode><vpred>" 1387 [(set (match_operand:SVE_ALL 0 "register_operand" "=w") 1388 (unspec:SVE_ALL 1389 [(match_operand:<VPRED> 3 "register_operand" "Upa") 1390 (match_operand:SVE_ALL 1 "register_operand" "w") 1391 (match_operand:SVE_ALL 2 "register_operand" "w")] 1392 UNSPEC_SEL))] 1393 "TARGET_SVE" 1394 "sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>" 1395) 1396 1397;; Selects between a duplicated immediate and zero. 1398(define_insn "aarch64_sve_dup<mode>_const" 1399 [(set (match_operand:SVE_I 0 "register_operand" "=w") 1400 (unspec:SVE_I 1401 [(match_operand:<VPRED> 1 "register_operand" "Upl") 1402 (match_operand:SVE_I 2 "aarch64_sve_dup_immediate") 1403 (match_operand:SVE_I 3 "aarch64_simd_imm_zero")] 1404 UNSPEC_SEL))] 1405 "TARGET_SVE" 1406 "mov\t%0.<Vetype>, %1/z, #%2" 1407) 1408 1409;; Integer (signed) vcond. Don't enforce an immediate range here, since it 1410;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead. 1411(define_expand "vcond<mode><v_int_equiv>" 1412 [(set (match_operand:SVE_ALL 0 "register_operand") 1413 (if_then_else:SVE_ALL 1414 (match_operator 3 "comparison_operator" 1415 [(match_operand:<V_INT_EQUIV> 4 "register_operand") 1416 (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")]) 1417 (match_operand:SVE_ALL 1 "register_operand") 1418 (match_operand:SVE_ALL 2 "register_operand")))] 1419 "TARGET_SVE" 1420 { 1421 aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands); 1422 DONE; 1423 } 1424) 1425 1426;; Integer vcondu. Don't enforce an immediate range here, since it 1427;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead. 1428(define_expand "vcondu<mode><v_int_equiv>" 1429 [(set (match_operand:SVE_ALL 0 "register_operand") 1430 (if_then_else:SVE_ALL 1431 (match_operator 3 "comparison_operator" 1432 [(match_operand:<V_INT_EQUIV> 4 "register_operand") 1433 (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")]) 1434 (match_operand:SVE_ALL 1 "register_operand") 1435 (match_operand:SVE_ALL 2 "register_operand")))] 1436 "TARGET_SVE" 1437 { 1438 aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands); 1439 DONE; 1440 } 1441) 1442 1443;; Floating-point vcond. All comparisons except FCMUO allow a zero 1444;; operand; aarch64_expand_sve_vcond handles the case of an FCMUO 1445;; with zero. 1446(define_expand "vcond<mode><v_fp_equiv>" 1447 [(set (match_operand:SVE_SD 0 "register_operand") 1448 (if_then_else:SVE_SD 1449 (match_operator 3 "comparison_operator" 1450 [(match_operand:<V_FP_EQUIV> 4 "register_operand") 1451 (match_operand:<V_FP_EQUIV> 5 "aarch64_simd_reg_or_zero")]) 1452 (match_operand:SVE_SD 1 "register_operand") 1453 (match_operand:SVE_SD 2 "register_operand")))] 1454 "TARGET_SVE" 1455 { 1456 aarch64_expand_sve_vcond (<MODE>mode, <V_FP_EQUIV>mode, operands); 1457 DONE; 1458 } 1459) 1460 1461;; Signed integer comparisons. Don't enforce an immediate range here, since 1462;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int 1463;; instead. 1464(define_expand "vec_cmp<mode><vpred>" 1465 [(parallel 1466 [(set (match_operand:<VPRED> 0 "register_operand") 1467 (match_operator:<VPRED> 1 "comparison_operator" 1468 [(match_operand:SVE_I 2 "register_operand") 1469 (match_operand:SVE_I 3 "nonmemory_operand")])) 1470 (clobber (reg:CC CC_REGNUM))])] 1471 "TARGET_SVE" 1472 { 1473 aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]), 1474 operands[2], operands[3]); 1475 DONE; 1476 } 1477) 1478 1479;; Unsigned integer comparisons. Don't enforce an immediate range here, since 1480;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int 1481;; instead. 1482(define_expand "vec_cmpu<mode><vpred>" 1483 [(parallel 1484 [(set (match_operand:<VPRED> 0 "register_operand") 1485 (match_operator:<VPRED> 1 "comparison_operator" 1486 [(match_operand:SVE_I 2 "register_operand") 1487 (match_operand:SVE_I 3 "nonmemory_operand")])) 1488 (clobber (reg:CC CC_REGNUM))])] 1489 "TARGET_SVE" 1490 { 1491 aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]), 1492 operands[2], operands[3]); 1493 DONE; 1494 } 1495) 1496 1497;; Floating-point comparisons. All comparisons except FCMUO allow a zero 1498;; operand; aarch64_expand_sve_vec_cmp_float handles the case of an FCMUO 1499;; with zero. 1500(define_expand "vec_cmp<mode><vpred>" 1501 [(set (match_operand:<VPRED> 0 "register_operand") 1502 (match_operator:<VPRED> 1 "comparison_operator" 1503 [(match_operand:SVE_F 2 "register_operand") 1504 (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero")]))] 1505 "TARGET_SVE" 1506 { 1507 aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]), 1508 operands[2], operands[3], false); 1509 DONE; 1510 } 1511) 1512 1513;; Branch based on predicate equality or inequality. 1514(define_expand "cbranch<mode>4" 1515 [(set (pc) 1516 (if_then_else 1517 (match_operator 0 "aarch64_equality_operator" 1518 [(match_operand:PRED_ALL 1 "register_operand") 1519 (match_operand:PRED_ALL 2 "aarch64_simd_reg_or_zero")]) 1520 (label_ref (match_operand 3 "")) 1521 (pc)))] 1522 "" 1523 { 1524 rtx ptrue = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode)); 1525 rtx pred; 1526 if (operands[2] == CONST0_RTX (<MODE>mode)) 1527 pred = operands[1]; 1528 else 1529 { 1530 pred = gen_reg_rtx (<MODE>mode); 1531 emit_insn (gen_pred_xor<mode>3 (pred, ptrue, operands[1], 1532 operands[2])); 1533 } 1534 emit_insn (gen_ptest_ptrue<mode> (ptrue, pred)); 1535 operands[1] = gen_rtx_REG (CCmode, CC_REGNUM); 1536 operands[2] = const0_rtx; 1537 } 1538) 1539 1540;; Unpredicated integer MIN/MAX. 1541(define_expand "<su><maxmin><mode>3" 1542 [(set (match_operand:SVE_I 0 "register_operand") 1543 (unspec:SVE_I 1544 [(match_dup 3) 1545 (MAXMIN:SVE_I (match_operand:SVE_I 1 "register_operand") 1546 (match_operand:SVE_I 2 "register_operand"))] 1547 UNSPEC_MERGE_PTRUE))] 1548 "TARGET_SVE" 1549 { 1550 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); 1551 } 1552) 1553 1554;; Integer MIN/MAX predicated with a PTRUE. 1555(define_insn "*<su><maxmin><mode>3" 1556 [(set (match_operand:SVE_I 0 "register_operand" "=w") 1557 (unspec:SVE_I 1558 [(match_operand:<VPRED> 1 "register_operand" "Upl") 1559 (MAXMIN:SVE_I (match_operand:SVE_I 2 "register_operand" "%0") 1560 (match_operand:SVE_I 3 "register_operand" "w"))] 1561 UNSPEC_MERGE_PTRUE))] 1562 "TARGET_SVE" 1563 "<su><maxmin>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 1564) 1565 1566;; Unpredicated floating-point MIN/MAX. 1567(define_expand "<su><maxmin><mode>3" 1568 [(set (match_operand:SVE_F 0 "register_operand") 1569 (unspec:SVE_F 1570 [(match_dup 3) 1571 (FMAXMIN:SVE_F (match_operand:SVE_F 1 "register_operand") 1572 (match_operand:SVE_F 2 "register_operand"))] 1573 UNSPEC_MERGE_PTRUE))] 1574 "TARGET_SVE" 1575 { 1576 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); 1577 } 1578) 1579 1580;; Floating-point MIN/MAX predicated with a PTRUE. 1581(define_insn "*<su><maxmin><mode>3" 1582 [(set (match_operand:SVE_F 0 "register_operand" "=w") 1583 (unspec:SVE_F 1584 [(match_operand:<VPRED> 1 "register_operand" "Upl") 1585 (FMAXMIN:SVE_F (match_operand:SVE_F 2 "register_operand" "%0") 1586 (match_operand:SVE_F 3 "register_operand" "w"))] 1587 UNSPEC_MERGE_PTRUE))] 1588 "TARGET_SVE" 1589 "f<maxmin>nm\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 1590) 1591 1592;; Unpredicated fmin/fmax. 1593(define_expand "<maxmin_uns><mode>3" 1594 [(set (match_operand:SVE_F 0 "register_operand") 1595 (unspec:SVE_F 1596 [(match_dup 3) 1597 (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand") 1598 (match_operand:SVE_F 2 "register_operand")] 1599 FMAXMIN_UNS)] 1600 UNSPEC_MERGE_PTRUE))] 1601 "TARGET_SVE" 1602 { 1603 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); 1604 } 1605) 1606 1607;; fmin/fmax predicated with a PTRUE. 1608(define_insn "*<maxmin_uns><mode>3" 1609 [(set (match_operand:SVE_F 0 "register_operand" "=w") 1610 (unspec:SVE_F 1611 [(match_operand:<VPRED> 1 "register_operand" "Upl") 1612 (unspec:SVE_F [(match_operand:SVE_F 2 "register_operand" "%0") 1613 (match_operand:SVE_F 3 "register_operand" "w")] 1614 FMAXMIN_UNS)] 1615 UNSPEC_MERGE_PTRUE))] 1616 "TARGET_SVE" 1617 "<maxmin_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 1618) 1619 1620;; Predicated integer operations. 1621(define_insn "cond_<optab><mode>" 1622 [(set (match_operand:SVE_I 0 "register_operand" "=w") 1623 (unspec:SVE_I 1624 [(match_operand:<VPRED> 1 "register_operand" "Upl") 1625 (match_operand:SVE_I 2 "register_operand" "0") 1626 (match_operand:SVE_I 3 "register_operand" "w")] 1627 SVE_COND_INT_OP))] 1628 "TARGET_SVE" 1629 "<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 1630) 1631 1632;; Set operand 0 to the last active element in operand 3, or to tied 1633;; operand 1 if no elements are active. 1634(define_insn "fold_extract_last_<mode>" 1635 [(set (match_operand:<VEL> 0 "register_operand" "=r, w") 1636 (unspec:<VEL> 1637 [(match_operand:<VEL> 1 "register_operand" "0, 0") 1638 (match_operand:<VPRED> 2 "register_operand" "Upl, Upl") 1639 (match_operand:SVE_ALL 3 "register_operand" "w, w")] 1640 UNSPEC_CLASTB))] 1641 "TARGET_SVE" 1642 "@ 1643 clastb\t%<vwcore>0, %2, %<vwcore>0, %3.<Vetype> 1644 clastb\t%<vw>0, %2, %<vw>0, %3.<Vetype>" 1645) 1646 1647;; Unpredicated integer add reduction. 1648(define_expand "reduc_plus_scal_<mode>" 1649 [(set (match_operand:<VEL> 0 "register_operand") 1650 (unspec:<VEL> [(match_dup 2) 1651 (match_operand:SVE_I 1 "register_operand")] 1652 UNSPEC_ADDV))] 1653 "TARGET_SVE" 1654 { 1655 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); 1656 } 1657) 1658 1659;; Predicated integer add reduction. The result is always 64-bits. 1660(define_insn "*reduc_plus_scal_<mode>" 1661 [(set (match_operand:<VEL> 0 "register_operand" "=w") 1662 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl") 1663 (match_operand:SVE_I 2 "register_operand" "w")] 1664 UNSPEC_ADDV))] 1665 "TARGET_SVE" 1666 "uaddv\t%d0, %1, %2.<Vetype>" 1667) 1668 1669;; Unpredicated floating-point add reduction. 1670(define_expand "reduc_plus_scal_<mode>" 1671 [(set (match_operand:<VEL> 0 "register_operand") 1672 (unspec:<VEL> [(match_dup 2) 1673 (match_operand:SVE_F 1 "register_operand")] 1674 UNSPEC_FADDV))] 1675 "TARGET_SVE" 1676 { 1677 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); 1678 } 1679) 1680 1681;; Predicated floating-point add reduction. 1682(define_insn "*reduc_plus_scal_<mode>" 1683 [(set (match_operand:<VEL> 0 "register_operand" "=w") 1684 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl") 1685 (match_operand:SVE_F 2 "register_operand" "w")] 1686 UNSPEC_FADDV))] 1687 "TARGET_SVE" 1688 "faddv\t%<Vetype>0, %1, %2.<Vetype>" 1689) 1690 1691;; Unpredicated integer MIN/MAX reduction. 1692(define_expand "reduc_<maxmin_uns>_scal_<mode>" 1693 [(set (match_operand:<VEL> 0 "register_operand") 1694 (unspec:<VEL> [(match_dup 2) 1695 (match_operand:SVE_I 1 "register_operand")] 1696 MAXMINV))] 1697 "TARGET_SVE" 1698 { 1699 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); 1700 } 1701) 1702 1703;; Predicated integer MIN/MAX reduction. 1704(define_insn "*reduc_<maxmin_uns>_scal_<mode>" 1705 [(set (match_operand:<VEL> 0 "register_operand" "=w") 1706 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl") 1707 (match_operand:SVE_I 2 "register_operand" "w")] 1708 MAXMINV))] 1709 "TARGET_SVE" 1710 "<maxmin_uns_op>v\t%<Vetype>0, %1, %2.<Vetype>" 1711) 1712 1713;; Unpredicated floating-point MIN/MAX reduction. 1714(define_expand "reduc_<maxmin_uns>_scal_<mode>" 1715 [(set (match_operand:<VEL> 0 "register_operand") 1716 (unspec:<VEL> [(match_dup 2) 1717 (match_operand:SVE_F 1 "register_operand")] 1718 FMAXMINV))] 1719 "TARGET_SVE" 1720 { 1721 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); 1722 } 1723) 1724 1725;; Predicated floating-point MIN/MAX reduction. 1726(define_insn "*reduc_<maxmin_uns>_scal_<mode>" 1727 [(set (match_operand:<VEL> 0 "register_operand" "=w") 1728 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl") 1729 (match_operand:SVE_F 2 "register_operand" "w")] 1730 FMAXMINV))] 1731 "TARGET_SVE" 1732 "<maxmin_uns_op>v\t%<Vetype>0, %1, %2.<Vetype>" 1733) 1734 1735(define_expand "reduc_<optab>_scal_<mode>" 1736 [(set (match_operand:<VEL> 0 "register_operand") 1737 (unspec:<VEL> [(match_dup 2) 1738 (match_operand:SVE_I 1 "register_operand")] 1739 BITWISEV))] 1740 "TARGET_SVE" 1741 { 1742 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); 1743 } 1744) 1745 1746(define_insn "*reduc_<optab>_scal_<mode>" 1747 [(set (match_operand:<VEL> 0 "register_operand" "=w") 1748 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl") 1749 (match_operand:SVE_I 2 "register_operand" "w")] 1750 BITWISEV))] 1751 "TARGET_SVE" 1752 "<bit_reduc_op>\t%<Vetype>0, %1, %2.<Vetype>" 1753) 1754 1755;; Unpredicated in-order FP reductions. 1756(define_expand "fold_left_plus_<mode>" 1757 [(set (match_operand:<VEL> 0 "register_operand") 1758 (unspec:<VEL> [(match_dup 3) 1759 (match_operand:<VEL> 1 "register_operand") 1760 (match_operand:SVE_F 2 "register_operand")] 1761 UNSPEC_FADDA))] 1762 "TARGET_SVE" 1763 { 1764 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); 1765 } 1766) 1767 1768;; In-order FP reductions predicated with PTRUE. 1769(define_insn "*fold_left_plus_<mode>" 1770 [(set (match_operand:<VEL> 0 "register_operand" "=w") 1771 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl") 1772 (match_operand:<VEL> 2 "register_operand" "0") 1773 (match_operand:SVE_F 3 "register_operand" "w")] 1774 UNSPEC_FADDA))] 1775 "TARGET_SVE" 1776 "fadda\t%<Vetype>0, %1, %<Vetype>0, %3.<Vetype>" 1777) 1778 1779;; Predicated form of the above in-order reduction. 1780(define_insn "*pred_fold_left_plus_<mode>" 1781 [(set (match_operand:<VEL> 0 "register_operand" "=w") 1782 (unspec:<VEL> 1783 [(match_operand:<VEL> 1 "register_operand" "0") 1784 (unspec:SVE_F 1785 [(match_operand:<VPRED> 2 "register_operand" "Upl") 1786 (match_operand:SVE_F 3 "register_operand" "w") 1787 (match_operand:SVE_F 4 "aarch64_simd_imm_zero")] 1788 UNSPEC_SEL)] 1789 UNSPEC_FADDA))] 1790 "TARGET_SVE" 1791 "fadda\t%<Vetype>0, %2, %<Vetype>0, %3.<Vetype>" 1792) 1793 1794;; Unpredicated floating-point addition. 1795(define_expand "add<mode>3" 1796 [(set (match_operand:SVE_F 0 "register_operand") 1797 (unspec:SVE_F 1798 [(match_dup 3) 1799 (plus:SVE_F 1800 (match_operand:SVE_F 1 "register_operand") 1801 (match_operand:SVE_F 2 "aarch64_sve_float_arith_with_sub_operand"))] 1802 UNSPEC_MERGE_PTRUE))] 1803 "TARGET_SVE" 1804 { 1805 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); 1806 } 1807) 1808 1809;; Floating-point addition predicated with a PTRUE. 1810(define_insn "*add<mode>3" 1811 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w") 1812 (unspec:SVE_F 1813 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") 1814 (plus:SVE_F 1815 (match_operand:SVE_F 2 "register_operand" "%0, 0, w") 1816 (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, w"))] 1817 UNSPEC_MERGE_PTRUE))] 1818 "TARGET_SVE" 1819 "@ 1820 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 1821 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3 1822 fadd\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>" 1823) 1824 1825;; Unpredicated floating-point subtraction. 1826(define_expand "sub<mode>3" 1827 [(set (match_operand:SVE_F 0 "register_operand") 1828 (unspec:SVE_F 1829 [(match_dup 3) 1830 (minus:SVE_F 1831 (match_operand:SVE_F 1 "aarch64_sve_float_arith_operand") 1832 (match_operand:SVE_F 2 "register_operand"))] 1833 UNSPEC_MERGE_PTRUE))] 1834 "TARGET_SVE" 1835 { 1836 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); 1837 } 1838) 1839 1840;; Floating-point subtraction predicated with a PTRUE. 1841(define_insn "*sub<mode>3" 1842 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w, w") 1843 (unspec:SVE_F 1844 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") 1845 (minus:SVE_F 1846 (match_operand:SVE_F 2 "aarch64_sve_float_arith_operand" "0, 0, vsA, w") 1847 (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, 0, w"))] 1848 UNSPEC_MERGE_PTRUE))] 1849 "TARGET_SVE 1850 && (register_operand (operands[2], <MODE>mode) 1851 || register_operand (operands[3], <MODE>mode))" 1852 "@ 1853 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 1854 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3 1855 fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2 1856 fsub\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>" 1857) 1858 1859;; Unpredicated floating-point multiplication. 1860(define_expand "mul<mode>3" 1861 [(set (match_operand:SVE_F 0 "register_operand") 1862 (unspec:SVE_F 1863 [(match_dup 3) 1864 (mult:SVE_F 1865 (match_operand:SVE_F 1 "register_operand") 1866 (match_operand:SVE_F 2 "aarch64_sve_float_mul_operand"))] 1867 UNSPEC_MERGE_PTRUE))] 1868 "TARGET_SVE" 1869 { 1870 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); 1871 } 1872) 1873 1874;; Floating-point multiplication predicated with a PTRUE. 1875(define_insn "*mul<mode>3" 1876 [(set (match_operand:SVE_F 0 "register_operand" "=w, w") 1877 (unspec:SVE_F 1878 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 1879 (mult:SVE_F 1880 (match_operand:SVE_F 2 "register_operand" "%0, w") 1881 (match_operand:SVE_F 3 "aarch64_sve_float_mul_operand" "vsM, w"))] 1882 UNSPEC_MERGE_PTRUE))] 1883 "TARGET_SVE" 1884 "@ 1885 fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 1886 fmul\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>" 1887) 1888 1889;; Unpredicated fma (%0 = (%1 * %2) + %3). 1890(define_expand "fma<mode>4" 1891 [(set (match_operand:SVE_F 0 "register_operand") 1892 (unspec:SVE_F 1893 [(match_dup 4) 1894 (fma:SVE_F (match_operand:SVE_F 1 "register_operand") 1895 (match_operand:SVE_F 2 "register_operand") 1896 (match_operand:SVE_F 3 "register_operand"))] 1897 UNSPEC_MERGE_PTRUE))] 1898 "TARGET_SVE" 1899 { 1900 operands[4] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); 1901 } 1902) 1903 1904;; fma predicated with a PTRUE. 1905(define_insn "*fma<mode>4" 1906 [(set (match_operand:SVE_F 0 "register_operand" "=w, w") 1907 (unspec:SVE_F 1908 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 1909 (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w") 1910 (match_operand:SVE_F 4 "register_operand" "w, w") 1911 (match_operand:SVE_F 2 "register_operand" "w, 0"))] 1912 UNSPEC_MERGE_PTRUE))] 1913 "TARGET_SVE" 1914 "@ 1915 fmad\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype> 1916 fmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>" 1917) 1918 1919;; Unpredicated fnma (%0 = (-%1 * %2) + %3). 1920(define_expand "fnma<mode>4" 1921 [(set (match_operand:SVE_F 0 "register_operand") 1922 (unspec:SVE_F 1923 [(match_dup 4) 1924 (fma:SVE_F (neg:SVE_F 1925 (match_operand:SVE_F 1 "register_operand")) 1926 (match_operand:SVE_F 2 "register_operand") 1927 (match_operand:SVE_F 3 "register_operand"))] 1928 UNSPEC_MERGE_PTRUE))] 1929 "TARGET_SVE" 1930 { 1931 operands[4] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); 1932 } 1933) 1934 1935;; fnma predicated with a PTRUE. 1936(define_insn "*fnma<mode>4" 1937 [(set (match_operand:SVE_F 0 "register_operand" "=w, w") 1938 (unspec:SVE_F 1939 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 1940 (fma:SVE_F (neg:SVE_F 1941 (match_operand:SVE_F 3 "register_operand" "%0, w")) 1942 (match_operand:SVE_F 4 "register_operand" "w, w") 1943 (match_operand:SVE_F 2 "register_operand" "w, 0"))] 1944 UNSPEC_MERGE_PTRUE))] 1945 "TARGET_SVE" 1946 "@ 1947 fmsb\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype> 1948 fmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>" 1949) 1950 1951;; Unpredicated fms (%0 = (%1 * %2) - %3). 1952(define_expand "fms<mode>4" 1953 [(set (match_operand:SVE_F 0 "register_operand") 1954 (unspec:SVE_F 1955 [(match_dup 4) 1956 (fma:SVE_F (match_operand:SVE_F 1 "register_operand") 1957 (match_operand:SVE_F 2 "register_operand") 1958 (neg:SVE_F 1959 (match_operand:SVE_F 3 "register_operand")))] 1960 UNSPEC_MERGE_PTRUE))] 1961 "TARGET_SVE" 1962 { 1963 operands[4] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); 1964 } 1965) 1966 1967;; fms predicated with a PTRUE. 1968(define_insn "*fms<mode>4" 1969 [(set (match_operand:SVE_F 0 "register_operand" "=w, w") 1970 (unspec:SVE_F 1971 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 1972 (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w") 1973 (match_operand:SVE_F 4 "register_operand" "w, w") 1974 (neg:SVE_F 1975 (match_operand:SVE_F 2 "register_operand" "w, 0")))] 1976 UNSPEC_MERGE_PTRUE))] 1977 "TARGET_SVE" 1978 "@ 1979 fnmsb\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype> 1980 fnmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>" 1981) 1982 1983;; Unpredicated fnms (%0 = (-%1 * %2) - %3). 1984(define_expand "fnms<mode>4" 1985 [(set (match_operand:SVE_F 0 "register_operand") 1986 (unspec:SVE_F 1987 [(match_dup 4) 1988 (fma:SVE_F (neg:SVE_F 1989 (match_operand:SVE_F 1 "register_operand")) 1990 (match_operand:SVE_F 2 "register_operand") 1991 (neg:SVE_F 1992 (match_operand:SVE_F 3 "register_operand")))] 1993 UNSPEC_MERGE_PTRUE))] 1994 "TARGET_SVE" 1995 { 1996 operands[4] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); 1997 } 1998) 1999 2000;; fnms predicated with a PTRUE. 2001(define_insn "*fnms<mode>4" 2002 [(set (match_operand:SVE_F 0 "register_operand" "=w, w") 2003 (unspec:SVE_F 2004 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 2005 (fma:SVE_F (neg:SVE_F 2006 (match_operand:SVE_F 3 "register_operand" "%0, w")) 2007 (match_operand:SVE_F 4 "register_operand" "w, w") 2008 (neg:SVE_F 2009 (match_operand:SVE_F 2 "register_operand" "w, 0")))] 2010 UNSPEC_MERGE_PTRUE))] 2011 "TARGET_SVE" 2012 "@ 2013 fnmad\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype> 2014 fnmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>" 2015) 2016 2017;; Unpredicated floating-point division. 2018(define_expand "div<mode>3" 2019 [(set (match_operand:SVE_F 0 "register_operand") 2020 (unspec:SVE_F 2021 [(match_dup 3) 2022 (div:SVE_F (match_operand:SVE_F 1 "register_operand") 2023 (match_operand:SVE_F 2 "register_operand"))] 2024 UNSPEC_MERGE_PTRUE))] 2025 "TARGET_SVE" 2026 { 2027 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); 2028 } 2029) 2030 2031;; Floating-point division predicated with a PTRUE. 2032(define_insn "*div<mode>3" 2033 [(set (match_operand:SVE_F 0 "register_operand" "=w, w") 2034 (unspec:SVE_F 2035 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 2036 (div:SVE_F (match_operand:SVE_F 2 "register_operand" "0, w") 2037 (match_operand:SVE_F 3 "register_operand" "w, 0"))] 2038 UNSPEC_MERGE_PTRUE))] 2039 "TARGET_SVE" 2040 "@ 2041 fdiv\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 2042 fdivr\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>" 2043) 2044 2045;; Unpredicated FNEG, FABS and FSQRT. 2046(define_expand "<optab><mode>2" 2047 [(set (match_operand:SVE_F 0 "register_operand") 2048 (unspec:SVE_F 2049 [(match_dup 2) 2050 (SVE_FP_UNARY:SVE_F (match_operand:SVE_F 1 "register_operand"))] 2051 UNSPEC_MERGE_PTRUE))] 2052 "TARGET_SVE" 2053 { 2054 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); 2055 } 2056) 2057 2058;; FNEG, FABS and FSQRT predicated with a PTRUE. 2059(define_insn "*<optab><mode>2" 2060 [(set (match_operand:SVE_F 0 "register_operand" "=w") 2061 (unspec:SVE_F 2062 [(match_operand:<VPRED> 1 "register_operand" "Upl") 2063 (SVE_FP_UNARY:SVE_F (match_operand:SVE_F 2 "register_operand" "w"))] 2064 UNSPEC_MERGE_PTRUE))] 2065 "TARGET_SVE" 2066 "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" 2067) 2068 2069;; Unpredicated FRINTy. 2070(define_expand "<frint_pattern><mode>2" 2071 [(set (match_operand:SVE_F 0 "register_operand") 2072 (unspec:SVE_F 2073 [(match_dup 2) 2074 (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand")] 2075 FRINT)] 2076 UNSPEC_MERGE_PTRUE))] 2077 "TARGET_SVE" 2078 { 2079 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); 2080 } 2081) 2082 2083;; FRINTy predicated with a PTRUE. 2084(define_insn "*<frint_pattern><mode>2" 2085 [(set (match_operand:SVE_F 0 "register_operand" "=w") 2086 (unspec:SVE_F 2087 [(match_operand:<VPRED> 1 "register_operand" "Upl") 2088 (unspec:SVE_F [(match_operand:SVE_F 2 "register_operand" "w")] 2089 FRINT)] 2090 UNSPEC_MERGE_PTRUE))] 2091 "TARGET_SVE" 2092 "frint<frint_suffix>\t%0.<Vetype>, %1/m, %2.<Vetype>" 2093) 2094 2095;; Unpredicated conversion of floats to integers of the same size (HF to HI, 2096;; SF to SI or DF to DI). 2097(define_expand "<fix_trunc_optab><mode><v_int_equiv>2" 2098 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand") 2099 (unspec:<V_INT_EQUIV> 2100 [(match_dup 2) 2101 (FIXUORS:<V_INT_EQUIV> 2102 (match_operand:SVE_F 1 "register_operand"))] 2103 UNSPEC_MERGE_PTRUE))] 2104 "TARGET_SVE" 2105 { 2106 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); 2107 } 2108) 2109 2110;; Conversion of SF to DI, SI or HI, predicated with a PTRUE. 2111(define_insn "*<fix_trunc_optab>v16hsf<mode>2" 2112 [(set (match_operand:SVE_HSDI 0 "register_operand" "=w") 2113 (unspec:SVE_HSDI 2114 [(match_operand:<VPRED> 1 "register_operand" "Upl") 2115 (FIXUORS:SVE_HSDI 2116 (match_operand:VNx8HF 2 "register_operand" "w"))] 2117 UNSPEC_MERGE_PTRUE))] 2118 "TARGET_SVE" 2119 "fcvtz<su>\t%0.<Vetype>, %1/m, %2.h" 2120) 2121 2122;; Conversion of SF to DI or SI, predicated with a PTRUE. 2123(define_insn "*<fix_trunc_optab>vnx4sf<mode>2" 2124 [(set (match_operand:SVE_SDI 0 "register_operand" "=w") 2125 (unspec:SVE_SDI 2126 [(match_operand:<VPRED> 1 "register_operand" "Upl") 2127 (FIXUORS:SVE_SDI 2128 (match_operand:VNx4SF 2 "register_operand" "w"))] 2129 UNSPEC_MERGE_PTRUE))] 2130 "TARGET_SVE" 2131 "fcvtz<su>\t%0.<Vetype>, %1/m, %2.s" 2132) 2133 2134;; Conversion of DF to DI or SI, predicated with a PTRUE. 2135(define_insn "*<fix_trunc_optab>vnx2df<mode>2" 2136 [(set (match_operand:SVE_SDI 0 "register_operand" "=w") 2137 (unspec:SVE_SDI 2138 [(match_operand:VNx2BI 1 "register_operand" "Upl") 2139 (FIXUORS:SVE_SDI 2140 (match_operand:VNx2DF 2 "register_operand" "w"))] 2141 UNSPEC_MERGE_PTRUE))] 2142 "TARGET_SVE" 2143 "fcvtz<su>\t%0.<Vetype>, %1/m, %2.d" 2144) 2145 2146;; Unpredicated conversion of integers to floats of the same size 2147;; (HI to HF, SI to SF or DI to DF). 2148(define_expand "<optab><v_int_equiv><mode>2" 2149 [(set (match_operand:SVE_F 0 "register_operand") 2150 (unspec:SVE_F 2151 [(match_dup 2) 2152 (FLOATUORS:SVE_F 2153 (match_operand:<V_INT_EQUIV> 1 "register_operand"))] 2154 UNSPEC_MERGE_PTRUE))] 2155 "TARGET_SVE" 2156 { 2157 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); 2158 } 2159) 2160 2161;; Conversion of DI, SI or HI to the same number of HFs, predicated 2162;; with a PTRUE. 2163(define_insn "*<optab><mode>vnx8hf2" 2164 [(set (match_operand:VNx8HF 0 "register_operand" "=w") 2165 (unspec:VNx8HF 2166 [(match_operand:<VPRED> 1 "register_operand" "Upl") 2167 (FLOATUORS:VNx8HF 2168 (match_operand:SVE_HSDI 2 "register_operand" "w"))] 2169 UNSPEC_MERGE_PTRUE))] 2170 "TARGET_SVE" 2171 "<su_optab>cvtf\t%0.h, %1/m, %2.<Vetype>" 2172) 2173 2174;; Conversion of DI or SI to the same number of SFs, predicated with a PTRUE. 2175(define_insn "*<optab><mode>vnx4sf2" 2176 [(set (match_operand:VNx4SF 0 "register_operand" "=w") 2177 (unspec:VNx4SF 2178 [(match_operand:<VPRED> 1 "register_operand" "Upl") 2179 (FLOATUORS:VNx4SF 2180 (match_operand:SVE_SDI 2 "register_operand" "w"))] 2181 UNSPEC_MERGE_PTRUE))] 2182 "TARGET_SVE" 2183 "<su_optab>cvtf\t%0.s, %1/m, %2.<Vetype>" 2184) 2185 2186;; Conversion of DI or SI to DF, predicated with a PTRUE. 2187(define_insn "aarch64_sve_<optab><mode>vnx2df2" 2188 [(set (match_operand:VNx2DF 0 "register_operand" "=w") 2189 (unspec:VNx2DF 2190 [(match_operand:VNx2BI 1 "register_operand" "Upl") 2191 (FLOATUORS:VNx2DF 2192 (match_operand:SVE_SDI 2 "register_operand" "w"))] 2193 UNSPEC_MERGE_PTRUE))] 2194 "TARGET_SVE" 2195 "<su_optab>cvtf\t%0.d, %1/m, %2.<Vetype>" 2196) 2197 2198;; Conversion of DFs to the same number of SFs, or SFs to the same number 2199;; of HFs. 2200(define_insn "*trunc<Vwide><mode>2" 2201 [(set (match_operand:SVE_HSF 0 "register_operand" "=w") 2202 (unspec:SVE_HSF 2203 [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl") 2204 (unspec:SVE_HSF 2205 [(match_operand:<VWIDE> 2 "register_operand" "w")] 2206 UNSPEC_FLOAT_CONVERT)] 2207 UNSPEC_MERGE_PTRUE))] 2208 "TARGET_SVE" 2209 "fcvt\t%0.<Vetype>, %1/m, %2.<Vewtype>" 2210) 2211 2212;; Conversion of SFs to the same number of DFs, or HFs to the same number 2213;; of SFs. 2214(define_insn "aarch64_sve_extend<mode><Vwide>2" 2215 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 2216 (unspec:<VWIDE> 2217 [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl") 2218 (unspec:<VWIDE> 2219 [(match_operand:SVE_HSF 2 "register_operand" "w")] 2220 UNSPEC_FLOAT_CONVERT)] 2221 UNSPEC_MERGE_PTRUE))] 2222 "TARGET_SVE" 2223 "fcvt\t%0.<Vewtype>, %1/m, %2.<Vetype>" 2224) 2225 2226;; Unpack the low or high half of a predicate, where "high" refers to 2227;; the low-numbered lanes for big-endian and the high-numbered lanes 2228;; for little-endian. 2229(define_expand "vec_unpack<su>_<perm_hilo>_<mode>" 2230 [(match_operand:<VWIDE> 0 "register_operand") 2231 (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand")] 2232 UNPACK)] 2233 "TARGET_SVE" 2234 { 2235 emit_insn ((<hi_lanes_optab> 2236 ? gen_aarch64_sve_punpkhi_<PRED_BHS:mode> 2237 : gen_aarch64_sve_punpklo_<PRED_BHS:mode>) 2238 (operands[0], operands[1])); 2239 DONE; 2240 } 2241) 2242 2243;; PUNPKHI and PUNPKLO. 2244(define_insn "aarch64_sve_punpk<perm_hilo>_<mode>" 2245 [(set (match_operand:<VWIDE> 0 "register_operand" "=Upa") 2246 (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand" "Upa")] 2247 UNPACK_UNSIGNED))] 2248 "TARGET_SVE" 2249 "punpk<perm_hilo>\t%0.h, %1.b" 2250) 2251 2252;; Unpack the low or high half of a vector, where "high" refers to 2253;; the low-numbered lanes for big-endian and the high-numbered lanes 2254;; for little-endian. 2255(define_expand "vec_unpack<su>_<perm_hilo>_<SVE_BHSI:mode>" 2256 [(match_operand:<VWIDE> 0 "register_operand") 2257 (unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand")] UNPACK)] 2258 "TARGET_SVE" 2259 { 2260 emit_insn ((<hi_lanes_optab> 2261 ? gen_aarch64_sve_<su>unpkhi_<SVE_BHSI:mode> 2262 : gen_aarch64_sve_<su>unpklo_<SVE_BHSI:mode>) 2263 (operands[0], operands[1])); 2264 DONE; 2265 } 2266) 2267 2268;; SUNPKHI, UUNPKHI, SUNPKLO and UUNPKLO. 2269(define_insn "aarch64_sve_<su>unpk<perm_hilo>_<SVE_BHSI:mode>" 2270 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 2271 (unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand" "w")] 2272 UNPACK))] 2273 "TARGET_SVE" 2274 "<su>unpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>" 2275) 2276 2277;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF. 2278;; First unpack the source without conversion, then float-convert the 2279;; unpacked source. 2280(define_expand "vec_unpacks_<perm_hilo>_<mode>" 2281 [(match_operand:<VWIDE> 0 "register_operand") 2282 (unspec:SVE_HSF [(match_operand:SVE_HSF 1 "register_operand")] 2283 UNPACK_UNSIGNED)] 2284 "TARGET_SVE" 2285 { 2286 /* Use ZIP to do the unpack, since we don't care about the upper halves 2287 and since it has the nice property of not needing any subregs. 2288 If using UUNPK* turns out to be preferable, we could model it as 2289 a ZIP whose first operand is zero. */ 2290 rtx temp = gen_reg_rtx (<MODE>mode); 2291 emit_insn ((<hi_lanes_optab> 2292 ? gen_aarch64_sve_zip2<mode> 2293 : gen_aarch64_sve_zip1<mode>) 2294 (temp, operands[1], operands[1])); 2295 rtx ptrue = force_reg (<VWIDE_PRED>mode, CONSTM1_RTX (<VWIDE_PRED>mode)); 2296 emit_insn (gen_aarch64_sve_extend<mode><Vwide>2 (operands[0], 2297 ptrue, temp)); 2298 DONE; 2299 } 2300) 2301 2302;; Unpack one half of a VNx4SI to VNx2DF. First unpack from VNx4SI 2303;; to VNx2DI, reinterpret the VNx2DI as a VNx4SI, then convert the 2304;; unpacked VNx4SI to VNx2DF. 2305(define_expand "vec_unpack<su_optab>_float_<perm_hilo>_vnx4si" 2306 [(match_operand:VNx2DF 0 "register_operand") 2307 (FLOATUORS:VNx2DF 2308 (unspec:VNx2DI [(match_operand:VNx4SI 1 "register_operand")] 2309 UNPACK_UNSIGNED))] 2310 "TARGET_SVE" 2311 { 2312 /* Use ZIP to do the unpack, since we don't care about the upper halves 2313 and since it has the nice property of not needing any subregs. 2314 If using UUNPK* turns out to be preferable, we could model it as 2315 a ZIP whose first operand is zero. */ 2316 rtx temp = gen_reg_rtx (VNx4SImode); 2317 emit_insn ((<hi_lanes_optab> 2318 ? gen_aarch64_sve_zip2vnx4si 2319 : gen_aarch64_sve_zip1vnx4si) 2320 (temp, operands[1], operands[1])); 2321 rtx ptrue = force_reg (VNx2BImode, CONSTM1_RTX (VNx2BImode)); 2322 emit_insn (gen_aarch64_sve_<FLOATUORS:optab>vnx4sivnx2df2 (operands[0], 2323 ptrue, temp)); 2324 DONE; 2325 } 2326) 2327 2328;; Predicate pack. Use UZP1 on the narrower type, which discards 2329;; the high part of each wide element. 2330(define_insn "vec_pack_trunc_<Vwide>" 2331 [(set (match_operand:PRED_BHS 0 "register_operand" "=Upa") 2332 (unspec:PRED_BHS 2333 [(match_operand:<VWIDE> 1 "register_operand" "Upa") 2334 (match_operand:<VWIDE> 2 "register_operand" "Upa")] 2335 UNSPEC_PACK))] 2336 "TARGET_SVE" 2337 "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" 2338) 2339 2340;; Integer pack. Use UZP1 on the narrower type, which discards 2341;; the high part of each wide element. 2342(define_insn "vec_pack_trunc_<Vwide>" 2343 [(set (match_operand:SVE_BHSI 0 "register_operand" "=w") 2344 (unspec:SVE_BHSI 2345 [(match_operand:<VWIDE> 1 "register_operand" "w") 2346 (match_operand:<VWIDE> 2 "register_operand" "w")] 2347 UNSPEC_PACK))] 2348 "TARGET_SVE" 2349 "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" 2350) 2351 2352;; Convert two vectors of DF to SF, or two vectors of SF to HF, and pack 2353;; the results into a single vector. 2354(define_expand "vec_pack_trunc_<Vwide>" 2355 [(set (match_dup 4) 2356 (unspec:SVE_HSF 2357 [(match_dup 3) 2358 (unspec:SVE_HSF [(match_operand:<VWIDE> 1 "register_operand")] 2359 UNSPEC_FLOAT_CONVERT)] 2360 UNSPEC_MERGE_PTRUE)) 2361 (set (match_dup 5) 2362 (unspec:SVE_HSF 2363 [(match_dup 3) 2364 (unspec:SVE_HSF [(match_operand:<VWIDE> 2 "register_operand")] 2365 UNSPEC_FLOAT_CONVERT)] 2366 UNSPEC_MERGE_PTRUE)) 2367 (set (match_operand:SVE_HSF 0 "register_operand") 2368 (unspec:SVE_HSF [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))] 2369 "TARGET_SVE" 2370 { 2371 operands[3] = force_reg (<VWIDE_PRED>mode, CONSTM1_RTX (<VWIDE_PRED>mode)); 2372 operands[4] = gen_reg_rtx (<MODE>mode); 2373 operands[5] = gen_reg_rtx (<MODE>mode); 2374 } 2375) 2376 2377;; Convert two vectors of DF to SI and pack the results into a single vector. 2378(define_expand "vec_pack_<su>fix_trunc_vnx2df" 2379 [(set (match_dup 4) 2380 (unspec:VNx4SI 2381 [(match_dup 3) 2382 (FIXUORS:VNx4SI (match_operand:VNx2DF 1 "register_operand"))] 2383 UNSPEC_MERGE_PTRUE)) 2384 (set (match_dup 5) 2385 (unspec:VNx4SI 2386 [(match_dup 3) 2387 (FIXUORS:VNx4SI (match_operand:VNx2DF 2 "register_operand"))] 2388 UNSPEC_MERGE_PTRUE)) 2389 (set (match_operand:VNx4SI 0 "register_operand") 2390 (unspec:VNx4SI [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))] 2391 "TARGET_SVE" 2392 { 2393 operands[3] = force_reg (VNx2BImode, CONSTM1_RTX (VNx2BImode)); 2394 operands[4] = gen_reg_rtx (VNx4SImode); 2395 operands[5] = gen_reg_rtx (VNx4SImode); 2396 } 2397) 2398 2399;; Predicated floating-point operations. 2400(define_insn "cond_<optab><mode>" 2401 [(set (match_operand:SVE_F 0 "register_operand" "=w") 2402 (unspec:SVE_F 2403 [(match_operand:<VPRED> 1 "register_operand" "Upl") 2404 (match_operand:SVE_F 2 "register_operand" "0") 2405 (match_operand:SVE_F 3 "register_operand" "w")] 2406 SVE_COND_FP_OP))] 2407 "TARGET_SVE" 2408 "<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 2409) 2410 2411;; Shift an SVE vector left and insert a scalar into element 0. 2412(define_insn "vec_shl_insert_<mode>" 2413 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w") 2414 (unspec:SVE_ALL 2415 [(match_operand:SVE_ALL 1 "register_operand" "0, 0") 2416 (match_operand:<VEL> 2 "register_operand" "rZ, w")] 2417 UNSPEC_INSR))] 2418 "TARGET_SVE" 2419 "@ 2420 insr\t%0.<Vetype>, %<vwcore>2 2421 insr\t%0.<Vetype>, %<Vetype>2" 2422) 2423