1;; Machine description for AArch64 AdvSIMD architecture. 2;; Copyright (C) 2011-2021 Free Software Foundation, Inc. 3;; Contributed by ARM Ltd. 4;; 5;; This file is part of GCC. 6;; 7;; GCC is free software; you can redistribute it and/or modify it 8;; under the terms of the GNU General Public License as published by 9;; the Free Software Foundation; either version 3, or (at your option) 10;; any later version. 11;; 12;; GCC is distributed in the hope that it will be useful, but 13;; WITHOUT ANY WARRANTY; without even the implied warranty of 14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15;; General Public License for more details. 16;; 17;; You should have received a copy of the GNU General Public License 18;; along with GCC; see the file COPYING3. If not see 19;; <http://www.gnu.org/licenses/>. 20 21(define_expand "mov<mode>" 22 [(set (match_operand:VALL_F16MOV 0 "nonimmediate_operand") 23 (match_operand:VALL_F16MOV 1 "general_operand"))] 24 "TARGET_SIMD" 25 " 26 /* Force the operand into a register if it is not an 27 immediate whose use can be replaced with xzr. 28 If the mode is 16 bytes wide, then we will be doing 29 a stp in DI mode, so we check the validity of that. 30 If the mode is 8 bytes wide, then we will do doing a 31 normal str, so the check need not apply. */ 32 if (GET_CODE (operands[0]) == MEM 33 && !(aarch64_simd_imm_zero (operands[1], <MODE>mode) 34 && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16) 35 && aarch64_mem_pair_operand (operands[0], DImode)) 36 || known_eq (GET_MODE_SIZE (<MODE>mode), 8)))) 37 operands[1] = force_reg (<MODE>mode, operands[1]); 38 39 /* If a constant is too complex to force to memory (e.g. because it 40 contains CONST_POLY_INTs), build it up from individual elements instead. 41 We should only need to do this before RA; aarch64_legitimate_constant_p 42 should ensure that we don't try to rematerialize the constant later. */ 43 if (GET_CODE (operands[1]) == CONST_VECTOR 44 && targetm.cannot_force_const_mem (<MODE>mode, operands[1])) 45 { 46 aarch64_expand_vector_init (operands[0], operands[1]); 47 DONE; 48 } 49 " 50) 51 52(define_expand "movmisalign<mode>" 53 [(set (match_operand:VALL 0 "nonimmediate_operand") 54 (match_operand:VALL 1 "general_operand"))] 55 "TARGET_SIMD && !STRICT_ALIGNMENT" 56{ 57 /* This pattern is not permitted to fail during expansion: if both arguments 58 are non-registers (e.g. memory := constant, which can be created by the 59 auto-vectorizer), force operand 1 into a register. */ 60 if (!register_operand (operands[0], <MODE>mode) 61 && !register_operand (operands[1], <MODE>mode)) 62 operands[1] = force_reg (<MODE>mode, operands[1]); 63}) 64 65(define_insn "aarch64_simd_dup<mode>" 66 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w") 67 (vec_duplicate:VDQ_I 68 (match_operand:<VEL> 1 "register_operand" "w,?r")))] 69 "TARGET_SIMD" 70 "@ 71 dup\\t%0.<Vtype>, %1.<Vetype>[0] 72 dup\\t%0.<Vtype>, %<vw>1" 73 [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")] 74) 75 76(define_insn "aarch64_simd_dup<mode>" 77 [(set (match_operand:VDQF_F16 0 "register_operand" "=w") 78 (vec_duplicate:VDQF_F16 79 (match_operand:<VEL> 1 "register_operand" "w")))] 80 "TARGET_SIMD" 81 "dup\\t%0.<Vtype>, %1.<Vetype>[0]" 82 [(set_attr "type" "neon_dup<q>")] 83) 84 85(define_insn "aarch64_dup_lane<mode>" 86 [(set (match_operand:VALL_F16 0 "register_operand" "=w") 87 (vec_duplicate:VALL_F16 88 (vec_select:<VEL> 89 (match_operand:VALL_F16 1 "register_operand" "w") 90 (parallel [(match_operand:SI 2 "immediate_operand" "i")]) 91 )))] 92 "TARGET_SIMD" 93 { 94 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); 95 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]"; 96 } 97 [(set_attr "type" "neon_dup<q>")] 98) 99 100(define_insn "aarch64_dup_lane_<vswap_width_name><mode>" 101 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w") 102 (vec_duplicate:VALL_F16_NO_V2Q 103 (vec_select:<VEL> 104 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w") 105 (parallel [(match_operand:SI 2 "immediate_operand" "i")]) 106 )))] 107 "TARGET_SIMD" 108 { 109 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2])); 110 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]"; 111 } 112 [(set_attr "type" "neon_dup<q>")] 113) 114 115(define_insn "*aarch64_simd_mov<VDMOV:mode>" 116 [(set (match_operand:VDMOV 0 "nonimmediate_operand" 117 "=w, m, m, w, ?r, ?w, ?r, w") 118 (match_operand:VDMOV 1 "general_operand" 119 "m, Dz, w, w, w, r, r, Dn"))] 120 "TARGET_SIMD 121 && (register_operand (operands[0], <MODE>mode) 122 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))" 123{ 124 switch (which_alternative) 125 { 126 case 0: return "ldr\t%d0, %1"; 127 case 1: return "str\txzr, %0"; 128 case 2: return "str\t%d1, %0"; 129 case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>"; 130 case 4: return "umov\t%0, %1.d[0]"; 131 case 5: return "fmov\t%d0, %1"; 132 case 6: return "mov\t%0, %1"; 133 case 7: 134 return aarch64_output_simd_mov_immediate (operands[1], 64); 135 default: gcc_unreachable (); 136 } 137} 138 [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\ 139 neon_logic<q>, neon_to_gp<q>, f_mcr,\ 140 mov_reg, neon_move<q>")] 141) 142 143(define_insn "*aarch64_simd_mov<VQMOV:mode>" 144 [(set (match_operand:VQMOV 0 "nonimmediate_operand" 145 "=w, Umn, m, w, ?r, ?w, ?r, w") 146 (match_operand:VQMOV 1 "general_operand" 147 "m, Dz, w, w, w, r, r, Dn"))] 148 "TARGET_SIMD 149 && (register_operand (operands[0], <MODE>mode) 150 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))" 151{ 152 switch (which_alternative) 153 { 154 case 0: 155 return "ldr\t%q0, %1"; 156 case 1: 157 return "stp\txzr, xzr, %0"; 158 case 2: 159 return "str\t%q1, %0"; 160 case 3: 161 return "mov\t%0.<Vbtype>, %1.<Vbtype>"; 162 case 4: 163 case 5: 164 case 6: 165 return "#"; 166 case 7: 167 return aarch64_output_simd_mov_immediate (operands[1], 128); 168 default: 169 gcc_unreachable (); 170 } 171} 172 [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\ 173 neon_logic<q>, multiple, multiple,\ 174 multiple, neon_move<q>") 175 (set_attr "length" "4,4,4,4,8,8,8,4")] 176) 177 178;; When storing lane zero we can use the normal STR and its more permissive 179;; addressing modes. 180 181(define_insn "aarch64_store_lane0<mode>" 182 [(set (match_operand:<VEL> 0 "memory_operand" "=m") 183 (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w") 184 (parallel [(match_operand 2 "const_int_operand" "n")])))] 185 "TARGET_SIMD 186 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0" 187 "str\\t%<Vetype>1, %0" 188 [(set_attr "type" "neon_store1_1reg<q>")] 189) 190 191(define_insn "load_pair<DREG:mode><DREG2:mode>" 192 [(set (match_operand:DREG 0 "register_operand" "=w") 193 (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump")) 194 (set (match_operand:DREG2 2 "register_operand" "=w") 195 (match_operand:DREG2 3 "memory_operand" "m"))] 196 "TARGET_SIMD 197 && rtx_equal_p (XEXP (operands[3], 0), 198 plus_constant (Pmode, 199 XEXP (operands[1], 0), 200 GET_MODE_SIZE (<DREG:MODE>mode)))" 201 "ldp\\t%d0, %d2, %z1" 202 [(set_attr "type" "neon_ldp")] 203) 204 205(define_insn "vec_store_pair<DREG:mode><DREG2:mode>" 206 [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump") 207 (match_operand:DREG 1 "register_operand" "w")) 208 (set (match_operand:DREG2 2 "memory_operand" "=m") 209 (match_operand:DREG2 3 "register_operand" "w"))] 210 "TARGET_SIMD 211 && rtx_equal_p (XEXP (operands[2], 0), 212 plus_constant (Pmode, 213 XEXP (operands[0], 0), 214 GET_MODE_SIZE (<DREG:MODE>mode)))" 215 "stp\\t%d1, %d3, %z0" 216 [(set_attr "type" "neon_stp")] 217) 218 219(define_insn "load_pair<VQ:mode><VQ2:mode>" 220 [(set (match_operand:VQ 0 "register_operand" "=w") 221 (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump")) 222 (set (match_operand:VQ2 2 "register_operand" "=w") 223 (match_operand:VQ2 3 "memory_operand" "m"))] 224 "TARGET_SIMD 225 && rtx_equal_p (XEXP (operands[3], 0), 226 plus_constant (Pmode, 227 XEXP (operands[1], 0), 228 GET_MODE_SIZE (<VQ:MODE>mode)))" 229 "ldp\\t%q0, %q2, %z1" 230 [(set_attr "type" "neon_ldp_q")] 231) 232 233(define_insn "vec_store_pair<VQ:mode><VQ2:mode>" 234 [(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump") 235 (match_operand:VQ 1 "register_operand" "w")) 236 (set (match_operand:VQ2 2 "memory_operand" "=m") 237 (match_operand:VQ2 3 "register_operand" "w"))] 238 "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0), 239 plus_constant (Pmode, 240 XEXP (operands[0], 0), 241 GET_MODE_SIZE (<VQ:MODE>mode)))" 242 "stp\\t%q1, %q3, %z0" 243 [(set_attr "type" "neon_stp_q")] 244) 245 246 247(define_split 248 [(set (match_operand:VQMOV 0 "register_operand" "") 249 (match_operand:VQMOV 1 "register_operand" ""))] 250 "TARGET_SIMD && reload_completed 251 && GP_REGNUM_P (REGNO (operands[0])) 252 && GP_REGNUM_P (REGNO (operands[1]))" 253 [(const_int 0)] 254{ 255 aarch64_simd_emit_reg_reg_move (operands, DImode, 2); 256 DONE; 257}) 258 259(define_split 260 [(set (match_operand:VQMOV 0 "register_operand" "") 261 (match_operand:VQMOV 1 "register_operand" ""))] 262 "TARGET_SIMD && reload_completed 263 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1]))) 264 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))" 265 [(const_int 0)] 266{ 267 aarch64_split_simd_move (operands[0], operands[1]); 268 DONE; 269}) 270 271(define_expand "@aarch64_split_simd_mov<mode>" 272 [(set (match_operand:VQMOV 0) 273 (match_operand:VQMOV 1))] 274 "TARGET_SIMD" 275 { 276 rtx dst = operands[0]; 277 rtx src = operands[1]; 278 279 if (GP_REGNUM_P (REGNO (src))) 280 { 281 rtx src_low_part = gen_lowpart (<VHALF>mode, src); 282 rtx src_high_part = gen_highpart (<VHALF>mode, src); 283 284 emit_insn 285 (gen_move_lo_quad_<mode> (dst, src_low_part)); 286 emit_insn 287 (gen_move_hi_quad_<mode> (dst, src_high_part)); 288 } 289 290 else 291 { 292 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst); 293 rtx dst_high_part = gen_highpart (<VHALF>mode, dst); 294 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); 295 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 296 emit_insn (gen_aarch64_get_half<mode> (dst_low_part, src, lo)); 297 emit_insn (gen_aarch64_get_half<mode> (dst_high_part, src, hi)); 298 } 299 DONE; 300 } 301) 302 303(define_expand "aarch64_get_half<mode>" 304 [(set (match_operand:<VHALF> 0 "register_operand") 305 (vec_select:<VHALF> 306 (match_operand:VQMOV 1 "register_operand") 307 (match_operand 2 "ascending_int_parallel")))] 308 "TARGET_SIMD" 309) 310 311(define_expand "aarch64_get_low<mode>" 312 [(match_operand:<VHALF> 0 "register_operand") 313 (match_operand:VQMOV 1 "register_operand")] 314 "TARGET_SIMD" 315 { 316 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); 317 emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], lo)); 318 DONE; 319 } 320) 321 322(define_expand "aarch64_get_high<mode>" 323 [(match_operand:<VHALF> 0 "register_operand") 324 (match_operand:VQMOV 1 "register_operand")] 325 "TARGET_SIMD" 326 { 327 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 328 emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], hi)); 329 DONE; 330 } 331) 332 333(define_insn_and_split "aarch64_simd_mov_from_<mode>low" 334 [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r") 335 (vec_select:<VHALF> 336 (match_operand:VQMOV_NO2E 1 "register_operand" "w,w") 337 (match_operand:VQMOV_NO2E 2 "vect_par_cnst_lo_half" "")))] 338 "TARGET_SIMD" 339 "@ 340 # 341 umov\t%0, %1.d[0]" 342 "&& reload_completed && aarch64_simd_register (operands[0], <VHALF>mode)" 343 [(set (match_dup 0) (match_dup 1))] 344 { 345 operands[1] = aarch64_replace_reg_mode (operands[1], <VHALF>mode); 346 } 347 [(set_attr "type" "mov_reg,neon_to_gp<q>") 348 (set_attr "length" "4")] 349) 350 351(define_insn "aarch64_simd_mov_from_<mode>high" 352 [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r") 353 (vec_select:<VHALF> 354 (match_operand:VQMOV_NO2E 1 "register_operand" "w,w") 355 (match_operand:VQMOV_NO2E 2 "vect_par_cnst_hi_half" "")))] 356 "TARGET_SIMD" 357 "@ 358 dup\\t%d0, %1.d[1] 359 umov\t%0, %1.d[1]" 360 [(set_attr "type" "neon_dup<q>,neon_to_gp<q>") 361 (set_attr "length" "4")] 362) 363 364(define_insn "orn<mode>3" 365 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 366 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")) 367 (match_operand:VDQ_I 2 "register_operand" "w")))] 368 "TARGET_SIMD" 369 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>" 370 [(set_attr "type" "neon_logic<q>")] 371) 372 373(define_insn "bic<mode>3" 374 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 375 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")) 376 (match_operand:VDQ_I 2 "register_operand" "w")))] 377 "TARGET_SIMD" 378 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>" 379 [(set_attr "type" "neon_logic<q>")] 380) 381 382(define_insn "add<mode>3" 383 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 384 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") 385 (match_operand:VDQ_I 2 "register_operand" "w")))] 386 "TARGET_SIMD" 387 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 388 [(set_attr "type" "neon_add<q>")] 389) 390 391(define_insn "sub<mode>3" 392 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 393 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") 394 (match_operand:VDQ_I 2 "register_operand" "w")))] 395 "TARGET_SIMD" 396 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 397 [(set_attr "type" "neon_sub<q>")] 398) 399 400(define_insn "mul<mode>3" 401 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 402 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w") 403 (match_operand:VDQ_BHSI 2 "register_operand" "w")))] 404 "TARGET_SIMD" 405 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 406 [(set_attr "type" "neon_mul_<Vetype><q>")] 407) 408 409(define_insn "bswap<mode>2" 410 [(set (match_operand:VDQHSD 0 "register_operand" "=w") 411 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))] 412 "TARGET_SIMD" 413 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>" 414 [(set_attr "type" "neon_rev<q>")] 415) 416 417(define_insn "aarch64_rbit<mode>" 418 [(set (match_operand:VB 0 "register_operand" "=w") 419 (unspec:VB [(match_operand:VB 1 "register_operand" "w")] 420 UNSPEC_RBIT))] 421 "TARGET_SIMD" 422 "rbit\\t%0.<Vbtype>, %1.<Vbtype>" 423 [(set_attr "type" "neon_rbit")] 424) 425 426(define_expand "ctz<mode>2" 427 [(set (match_operand:VS 0 "register_operand") 428 (ctz:VS (match_operand:VS 1 "register_operand")))] 429 "TARGET_SIMD" 430 { 431 emit_insn (gen_bswap<mode>2 (operands[0], operands[1])); 432 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0], 433 <MODE>mode, 0); 434 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi)); 435 emit_insn (gen_clz<mode>2 (operands[0], operands[0])); 436 DONE; 437 } 438) 439 440(define_expand "xorsign<mode>3" 441 [(match_operand:VHSDF 0 "register_operand") 442 (match_operand:VHSDF 1 "register_operand") 443 (match_operand:VHSDF 2 "register_operand")] 444 "TARGET_SIMD" 445{ 446 447 machine_mode imode = <V_INT_EQUIV>mode; 448 rtx v_bitmask = gen_reg_rtx (imode); 449 rtx op1x = gen_reg_rtx (imode); 450 rtx op2x = gen_reg_rtx (imode); 451 452 rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode); 453 rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode); 454 455 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1; 456 457 emit_move_insn (v_bitmask, 458 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode, 459 HOST_WIDE_INT_M1U << bits)); 460 461 emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2)); 462 emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x)); 463 emit_move_insn (operands[0], 464 lowpart_subreg (<MODE>mode, op1x, imode)); 465 DONE; 466} 467) 468 469;; The fcadd and fcmla patterns are made UNSPEC for the explicitly due to the 470;; fact that their usage need to guarantee that the source vectors are 471;; contiguous. It would be wrong to describe the operation without being able 472;; to describe the permute that is also required, but even if that is done 473;; the permute would have been created as a LOAD_LANES which means the values 474;; in the registers are in the wrong order. 475(define_insn "aarch64_fcadd<rot><mode>" 476 [(set (match_operand:VHSDF 0 "register_operand" "=w") 477 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w") 478 (match_operand:VHSDF 2 "register_operand" "w")] 479 FCADD))] 480 "TARGET_COMPLEX" 481 "fcadd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>, #<rot>" 482 [(set_attr "type" "neon_fcadd")] 483) 484 485(define_expand "cadd<rot><mode>3" 486 [(set (match_operand:VHSDF 0 "register_operand") 487 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand") 488 (match_operand:VHSDF 2 "register_operand")] 489 FCADD))] 490 "TARGET_COMPLEX && !BYTES_BIG_ENDIAN" 491) 492 493(define_insn "aarch64_fcmla<rot><mode>" 494 [(set (match_operand:VHSDF 0 "register_operand" "=w") 495 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0") 496 (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w") 497 (match_operand:VHSDF 3 "register_operand" "w")] 498 FCMLA)))] 499 "TARGET_COMPLEX" 500 "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>, #<rot>" 501 [(set_attr "type" "neon_fcmla")] 502) 503 504 505(define_insn "aarch64_fcmla_lane<rot><mode>" 506 [(set (match_operand:VHSDF 0 "register_operand" "=w") 507 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0") 508 (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w") 509 (match_operand:VHSDF 3 "register_operand" "w") 510 (match_operand:SI 4 "const_int_operand" "n")] 511 FCMLA)))] 512 "TARGET_COMPLEX" 513{ 514 operands[4] = aarch64_endian_lane_rtx (<VHALF>mode, INTVAL (operands[4])); 515 return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>"; 516} 517 [(set_attr "type" "neon_fcmla")] 518) 519 520(define_insn "aarch64_fcmla_laneq<rot>v4hf" 521 [(set (match_operand:V4HF 0 "register_operand" "=w") 522 (plus:V4HF (match_operand:V4HF 1 "register_operand" "0") 523 (unspec:V4HF [(match_operand:V4HF 2 "register_operand" "w") 524 (match_operand:V8HF 3 "register_operand" "w") 525 (match_operand:SI 4 "const_int_operand" "n")] 526 FCMLA)))] 527 "TARGET_COMPLEX" 528{ 529 operands[4] = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4])); 530 return "fcmla\t%0.4h, %2.4h, %3.h[%4], #<rot>"; 531} 532 [(set_attr "type" "neon_fcmla")] 533) 534 535(define_insn "aarch64_fcmlaq_lane<rot><mode>" 536 [(set (match_operand:VQ_HSF 0 "register_operand" "=w") 537 (plus:VQ_HSF (match_operand:VQ_HSF 1 "register_operand" "0") 538 (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "register_operand" "w") 539 (match_operand:<VHALF> 3 "register_operand" "w") 540 (match_operand:SI 4 "const_int_operand" "n")] 541 FCMLA)))] 542 "TARGET_COMPLEX" 543{ 544 int nunits = GET_MODE_NUNITS (<VHALF>mode).to_constant (); 545 operands[4] 546 = gen_int_mode (ENDIAN_LANE_N (nunits / 2, INTVAL (operands[4])), SImode); 547 return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>"; 548} 549 [(set_attr "type" "neon_fcmla")] 550) 551 552;; The complex mla/mls operations always need to expand to two instructions. 553;; The first operation does half the computation and the second does the 554;; remainder. Because of this, expand early. 555(define_expand "cml<fcmac1><conj_op><mode>4" 556 [(set (match_operand:VHSDF 0 "register_operand") 557 (plus:VHSDF (match_operand:VHSDF 1 "register_operand") 558 (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand") 559 (match_operand:VHSDF 3 "register_operand")] 560 FCMLA_OP)))] 561 "TARGET_COMPLEX && !BYTES_BIG_ENDIAN" 562{ 563 rtx tmp = gen_reg_rtx (<MODE>mode); 564 emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[1], 565 operands[3], operands[2])); 566 emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], tmp, 567 operands[3], operands[2])); 568 DONE; 569}) 570 571;; The complex mul operations always need to expand to two instructions. 572;; The first operation does half the computation and the second does the 573;; remainder. Because of this, expand early. 574(define_expand "cmul<conj_op><mode>3" 575 [(set (match_operand:VHSDF 0 "register_operand") 576 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand") 577 (match_operand:VHSDF 2 "register_operand")] 578 FCMUL_OP))] 579 "TARGET_COMPLEX && !BYTES_BIG_ENDIAN" 580{ 581 rtx tmp = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode)); 582 rtx res1 = gen_reg_rtx (<MODE>mode); 583 emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (res1, tmp, 584 operands[2], operands[1])); 585 emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], res1, 586 operands[2], operands[1])); 587 DONE; 588}) 589 590;; These instructions map to the __builtins for the Dot Product operations. 591(define_insn "aarch64_<sur>dot<vsi2qi>" 592 [(set (match_operand:VS 0 "register_operand" "=w") 593 (plus:VS (match_operand:VS 1 "register_operand" "0") 594 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w") 595 (match_operand:<VSI2QI> 3 "register_operand" "w")] 596 DOTPROD)))] 597 "TARGET_DOTPROD" 598 "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>" 599 [(set_attr "type" "neon_dot<q>")] 600) 601 602;; These instructions map to the __builtins for the armv8.6a I8MM usdot 603;; (vector) Dot Product operation. 604(define_insn "aarch64_usdot<vsi2qi>" 605 [(set (match_operand:VS 0 "register_operand" "=w") 606 (plus:VS 607 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w") 608 (match_operand:<VSI2QI> 3 "register_operand" "w")] 609 UNSPEC_USDOT) 610 (match_operand:VS 1 "register_operand" "0")))] 611 "TARGET_I8MM" 612 "usdot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>" 613 [(set_attr "type" "neon_dot<q>")] 614) 615 616;; These expands map to the Dot Product optab the vectorizer checks for. 617;; The auto-vectorizer expects a dot product builtin that also does an 618;; accumulation into the provided register. 619;; Given the following pattern 620;; 621;; for (i=0; i<len; i++) { 622;; c = a[i] * b[i]; 623;; r += c; 624;; } 625;; return result; 626;; 627;; This can be auto-vectorized to 628;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3]; 629;; 630;; given enough iterations. However the vectorizer can keep unrolling the loop 631;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7]; 632;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11]; 633;; ... 634;; 635;; and so the vectorizer provides r, in which the result has to be accumulated. 636(define_expand "<sur>dot_prod<vsi2qi>" 637 [(set (match_operand:VS 0 "register_operand") 638 (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand") 639 (match_operand:<VSI2QI> 2 "register_operand")] 640 DOTPROD) 641 (match_operand:VS 3 "register_operand")))] 642 "TARGET_DOTPROD" 643{ 644 emit_insn ( 645 gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1], 646 operands[2])); 647 emit_insn (gen_rtx_SET (operands[0], operands[3])); 648 DONE; 649}) 650 651;; These instructions map to the __builtins for the Dot Product 652;; indexed operations. 653(define_insn "aarch64_<sur>dot_lane<vsi2qi>" 654 [(set (match_operand:VS 0 "register_operand" "=w") 655 (plus:VS (match_operand:VS 1 "register_operand" "0") 656 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w") 657 (match_operand:V8QI 3 "register_operand" "<h_con>") 658 (match_operand:SI 4 "immediate_operand" "i")] 659 DOTPROD)))] 660 "TARGET_DOTPROD" 661 { 662 operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4])); 663 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]"; 664 } 665 [(set_attr "type" "neon_dot<q>")] 666) 667 668(define_insn "aarch64_<sur>dot_laneq<vsi2qi>" 669 [(set (match_operand:VS 0 "register_operand" "=w") 670 (plus:VS (match_operand:VS 1 "register_operand" "0") 671 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w") 672 (match_operand:V16QI 3 "register_operand" "<h_con>") 673 (match_operand:SI 4 "immediate_operand" "i")] 674 DOTPROD)))] 675 "TARGET_DOTPROD" 676 { 677 operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4])); 678 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]"; 679 } 680 [(set_attr "type" "neon_dot<q>")] 681) 682 683;; These instructions map to the __builtins for the armv8.6a I8MM usdot, sudot 684;; (by element) Dot Product operations. 685(define_insn "aarch64_<DOTPROD_I8MM:sur>dot_lane<VB:isquadop><VS:vsi2qi>" 686 [(set (match_operand:VS 0 "register_operand" "=w") 687 (plus:VS 688 (unspec:VS [(match_operand:<VS:VSI2QI> 2 "register_operand" "w") 689 (match_operand:VB 3 "register_operand" "w") 690 (match_operand:SI 4 "immediate_operand" "i")] 691 DOTPROD_I8MM) 692 (match_operand:VS 1 "register_operand" "0")))] 693 "TARGET_I8MM" 694 { 695 int nunits = GET_MODE_NUNITS (<VB:MODE>mode).to_constant (); 696 int lane = INTVAL (operands[4]); 697 operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane), SImode); 698 return "<DOTPROD_I8MM:sur>dot\\t%0.<VS:Vtype>, %2.<VS:Vdottype>, %3.4b[%4]"; 699 } 700 [(set_attr "type" "neon_dot<VS:q>")] 701) 702 703(define_expand "copysign<mode>3" 704 [(match_operand:VHSDF 0 "register_operand") 705 (match_operand:VHSDF 1 "register_operand") 706 (match_operand:VHSDF 2 "register_operand")] 707 "TARGET_FLOAT && TARGET_SIMD" 708{ 709 rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode); 710 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1; 711 712 emit_move_insn (v_bitmask, 713 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode, 714 HOST_WIDE_INT_M1U << bits)); 715 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask, 716 operands[2], operands[1])); 717 DONE; 718} 719) 720 721(define_insn "*aarch64_mul3_elt<mode>" 722 [(set (match_operand:VMUL 0 "register_operand" "=w") 723 (mult:VMUL 724 (vec_duplicate:VMUL 725 (vec_select:<VEL> 726 (match_operand:VMUL 1 "register_operand" "<h_con>") 727 (parallel [(match_operand:SI 2 "immediate_operand")]))) 728 (match_operand:VMUL 3 "register_operand" "w")))] 729 "TARGET_SIMD" 730 { 731 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); 732 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]"; 733 } 734 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")] 735) 736 737(define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>" 738 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w") 739 (mult:VMUL_CHANGE_NLANES 740 (vec_duplicate:VMUL_CHANGE_NLANES 741 (vec_select:<VEL> 742 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>") 743 (parallel [(match_operand:SI 2 "immediate_operand")]))) 744 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))] 745 "TARGET_SIMD" 746 { 747 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2])); 748 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]"; 749 } 750 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")] 751) 752 753(define_insn "*aarch64_mul3_elt_from_dup<mode>" 754 [(set (match_operand:VMUL 0 "register_operand" "=w") 755 (mult:VMUL 756 (vec_duplicate:VMUL 757 (match_operand:<VEL> 1 "register_operand" "<h_con>")) 758 (match_operand:VMUL 2 "register_operand" "w")))] 759 "TARGET_SIMD" 760 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"; 761 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")] 762) 763 764(define_insn "@aarch64_rsqrte<mode>" 765 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") 766 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")] 767 UNSPEC_RSQRTE))] 768 "TARGET_SIMD" 769 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>" 770 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")]) 771 772(define_insn "@aarch64_rsqrts<mode>" 773 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") 774 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w") 775 (match_operand:VHSDF_HSDF 2 "register_operand" "w")] 776 UNSPEC_RSQRTS))] 777 "TARGET_SIMD" 778 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 779 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")]) 780 781(define_expand "rsqrt<mode>2" 782 [(set (match_operand:VALLF 0 "register_operand") 783 (unspec:VALLF [(match_operand:VALLF 1 "register_operand")] 784 UNSPEC_RSQRT))] 785 "TARGET_SIMD" 786{ 787 aarch64_emit_approx_sqrt (operands[0], operands[1], true); 788 DONE; 789}) 790 791(define_insn "aarch64_ursqrte<mode>" 792[(set (match_operand:VDQ_SI 0 "register_operand" "=w") 793 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")] 794 UNSPEC_RSQRTE))] 795"TARGET_SIMD" 796"ursqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>" 797[(set_attr "type" "neon_fp_rsqrte_<stype><q>")]) 798 799(define_insn "*aarch64_mul3_elt_to_64v2df" 800 [(set (match_operand:DF 0 "register_operand" "=w") 801 (mult:DF 802 (vec_select:DF 803 (match_operand:V2DF 1 "register_operand" "w") 804 (parallel [(match_operand:SI 2 "immediate_operand")])) 805 (match_operand:DF 3 "register_operand" "w")))] 806 "TARGET_SIMD" 807 { 808 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2])); 809 return "fmul\\t%0.2d, %3.2d, %1.d[%2]"; 810 } 811 [(set_attr "type" "neon_fp_mul_d_scalar_q")] 812) 813 814(define_insn "neg<mode>2" 815 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 816 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))] 817 "TARGET_SIMD" 818 "neg\t%0.<Vtype>, %1.<Vtype>" 819 [(set_attr "type" "neon_neg<q>")] 820) 821 822(define_insn "abs<mode>2" 823 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 824 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))] 825 "TARGET_SIMD" 826 "abs\t%0.<Vtype>, %1.<Vtype>" 827 [(set_attr "type" "neon_abs<q>")] 828) 829 830;; The intrinsic version of integer ABS must not be allowed to 831;; combine with any operation with an integerated ABS step, such 832;; as SABD. 833(define_insn "aarch64_abs<mode>" 834 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w") 835 (unspec:VSDQ_I_DI 836 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")] 837 UNSPEC_ABS))] 838 "TARGET_SIMD" 839 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>" 840 [(set_attr "type" "neon_abs<q>")] 841) 842 843;; It's tempting to represent SABD as ABS (MINUS op1 op2). 844;; This isn't accurate as ABS treats always its input as a signed value. 845;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64. 846;; Whereas SABD would return 192 (-64 signed) on the above example. 847;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead. 848(define_insn "aarch64_<su>abd<mode>" 849 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 850 (minus:VDQ_BHSI 851 (USMAX:VDQ_BHSI 852 (match_operand:VDQ_BHSI 1 "register_operand" "w") 853 (match_operand:VDQ_BHSI 2 "register_operand" "w")) 854 (<max_opp>:VDQ_BHSI 855 (match_dup 1) 856 (match_dup 2))))] 857 "TARGET_SIMD" 858 "<su>abd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 859 [(set_attr "type" "neon_abd<q>")] 860) 861 862 863(define_insn "aarch64_<sur>abdl<mode>" 864 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 865 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w") 866 (match_operand:VD_BHSI 2 "register_operand" "w")] 867 ABDL))] 868 "TARGET_SIMD" 869 "<sur>abdl\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>" 870 [(set_attr "type" "neon_abd<q>")] 871) 872 873(define_insn "aarch64_<sur>abdl2<mode>" 874 [(set (match_operand:<VDBLW> 0 "register_operand" "=w") 875 (unspec:<VDBLW> [(match_operand:VQW 1 "register_operand" "w") 876 (match_operand:VQW 2 "register_operand" "w")] 877 ABDL2))] 878 "TARGET_SIMD" 879 "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>" 880 [(set_attr "type" "neon_abd<q>")] 881) 882 883(define_insn "aarch64_<sur>abal<mode>" 884 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 885 (unspec:<VWIDE> [(match_operand:VD_BHSI 2 "register_operand" "w") 886 (match_operand:VD_BHSI 3 "register_operand" "w") 887 (match_operand:<VWIDE> 1 "register_operand" "0")] 888 ABAL))] 889 "TARGET_SIMD" 890 "<sur>abal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>" 891 [(set_attr "type" "neon_arith_acc<q>")] 892) 893 894(define_insn "aarch64_<sur>abal2<mode>" 895 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 896 (unspec:<VWIDE> [(match_operand:VQW 2 "register_operand" "w") 897 (match_operand:VQW 3 "register_operand" "w") 898 (match_operand:<VWIDE> 1 "register_operand" "0")] 899 ABAL2))] 900 "TARGET_SIMD" 901 "<sur>abal2\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>" 902 [(set_attr "type" "neon_arith_acc<q>")] 903) 904 905(define_insn "aarch64_<sur>adalp<mode>" 906 [(set (match_operand:<VDBLW> 0 "register_operand" "=w") 907 (unspec:<VDBLW> [(match_operand:VDQV_S 2 "register_operand" "w") 908 (match_operand:<VDBLW> 1 "register_operand" "0")] 909 ADALP))] 910 "TARGET_SIMD" 911 "<sur>adalp\t%0.<Vwhalf>, %2.<Vtype>" 912 [(set_attr "type" "neon_reduc_add<q>")] 913) 914 915;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI 916;; inputs in operands 1 and 2. The sequence also has to perform a widening 917;; reduction of the difference into a V4SI vector and accumulate that into 918;; operand 3 before copying that into the result operand 0. 919;; Perform that with a sequence of: 920;; UABDL2 tmp.8h, op1.16b, op2.16b 921;; UABAL tmp.8h, op1.8b, op2.8b 922;; UADALP op3.4s, tmp.8h 923;; MOV op0, op3 // should be eliminated in later passes. 924;; 925;; For TARGET_DOTPROD we do: 926;; MOV tmp1.16b, #1 // Can be CSE'd and hoisted out of loops. 927;; UABD tmp2.16b, op1.16b, op2.16b 928;; UDOT op3.4s, tmp2.16b, tmp1.16b 929;; MOV op0, op3 // RA will tie the operands of UDOT appropriately. 930;; 931;; The signed version just uses the signed variants of the above instructions 932;; but for TARGET_DOTPROD still emits a UDOT as the absolute difference is 933;; unsigned. 934 935(define_expand "<sur>sadv16qi" 936 [(use (match_operand:V4SI 0 "register_operand")) 937 (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand")) 938 (use (match_operand:V16QI 2 "register_operand"))] ABAL) 939 (use (match_operand:V4SI 3 "register_operand"))] 940 "TARGET_SIMD" 941 { 942 if (TARGET_DOTPROD) 943 { 944 rtx ones = force_reg (V16QImode, CONST1_RTX (V16QImode)); 945 rtx abd = gen_reg_rtx (V16QImode); 946 emit_insn (gen_aarch64_<sur>abdv16qi (abd, operands[1], operands[2])); 947 emit_insn (gen_aarch64_udotv16qi (operands[0], operands[3], 948 abd, ones)); 949 DONE; 950 } 951 rtx reduc = gen_reg_rtx (V8HImode); 952 emit_insn (gen_aarch64_<sur>abdl2v16qi (reduc, operands[1], 953 operands[2])); 954 emit_insn (gen_aarch64_<sur>abalv8qi (reduc, reduc, 955 gen_lowpart (V8QImode, operands[1]), 956 gen_lowpart (V8QImode, 957 operands[2]))); 958 emit_insn (gen_aarch64_<sur>adalpv8hi (operands[3], operands[3], reduc)); 959 emit_move_insn (operands[0], operands[3]); 960 DONE; 961 } 962) 963 964(define_insn "aarch64_<su>aba<mode>" 965 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 966 (plus:VDQ_BHSI (minus:VDQ_BHSI 967 (USMAX:VDQ_BHSI 968 (match_operand:VDQ_BHSI 2 "register_operand" "w") 969 (match_operand:VDQ_BHSI 3 "register_operand" "w")) 970 (<max_opp>:VDQ_BHSI 971 (match_dup 2) 972 (match_dup 3))) 973 (match_operand:VDQ_BHSI 1 "register_operand" "0")))] 974 "TARGET_SIMD" 975 "<su>aba\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>" 976 [(set_attr "type" "neon_arith_acc<q>")] 977) 978 979(define_insn "fabd<mode>3" 980 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") 981 (abs:VHSDF_HSDF 982 (minus:VHSDF_HSDF 983 (match_operand:VHSDF_HSDF 1 "register_operand" "w") 984 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))] 985 "TARGET_SIMD" 986 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 987 [(set_attr "type" "neon_fp_abd_<stype><q>")] 988) 989 990;; For AND (vector, register) and BIC (vector, immediate) 991(define_insn "and<mode>3" 992 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w") 993 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0") 994 (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))] 995 "TARGET_SIMD" 996 { 997 switch (which_alternative) 998 { 999 case 0: 1000 return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"; 1001 case 1: 1002 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>, 1003 AARCH64_CHECK_BIC); 1004 default: 1005 gcc_unreachable (); 1006 } 1007 } 1008 [(set_attr "type" "neon_logic<q>")] 1009) 1010 1011;; For ORR (vector, register) and ORR (vector, immediate) 1012(define_insn "ior<mode>3" 1013 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w") 1014 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0") 1015 (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))] 1016 "TARGET_SIMD" 1017 { 1018 switch (which_alternative) 1019 { 1020 case 0: 1021 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"; 1022 case 1: 1023 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>, 1024 AARCH64_CHECK_ORR); 1025 default: 1026 gcc_unreachable (); 1027 } 1028 } 1029 [(set_attr "type" "neon_logic<q>")] 1030) 1031 1032(define_insn "xor<mode>3" 1033 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 1034 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") 1035 (match_operand:VDQ_I 2 "register_operand" "w")))] 1036 "TARGET_SIMD" 1037 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>" 1038 [(set_attr "type" "neon_logic<q>")] 1039) 1040 1041(define_insn "one_cmpl<mode>2" 1042 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 1043 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))] 1044 "TARGET_SIMD" 1045 "not\t%0.<Vbtype>, %1.<Vbtype>" 1046 [(set_attr "type" "neon_logic<q>")] 1047) 1048 1049(define_insn "aarch64_simd_vec_set<mode>" 1050 [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w") 1051 (vec_merge:VALL_F16 1052 (vec_duplicate:VALL_F16 1053 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv")) 1054 (match_operand:VALL_F16 3 "register_operand" "0,0,0") 1055 (match_operand:SI 2 "immediate_operand" "i,i,i")))] 1056 "TARGET_SIMD" 1057 { 1058 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2]))); 1059 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt); 1060 switch (which_alternative) 1061 { 1062 case 0: 1063 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]"; 1064 case 1: 1065 return "ins\\t%0.<Vetype>[%p2], %<vwcore>1"; 1066 case 2: 1067 return "ld1\\t{%0.<Vetype>}[%p2], %1"; 1068 default: 1069 gcc_unreachable (); 1070 } 1071 } 1072 [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")] 1073) 1074 1075(define_insn "@aarch64_simd_vec_copy_lane<mode>" 1076 [(set (match_operand:VALL_F16 0 "register_operand" "=w") 1077 (vec_merge:VALL_F16 1078 (vec_duplicate:VALL_F16 1079 (vec_select:<VEL> 1080 (match_operand:VALL_F16 3 "register_operand" "w") 1081 (parallel 1082 [(match_operand:SI 4 "immediate_operand" "i")]))) 1083 (match_operand:VALL_F16 1 "register_operand" "0") 1084 (match_operand:SI 2 "immediate_operand" "i")))] 1085 "TARGET_SIMD" 1086 { 1087 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2]))); 1088 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt); 1089 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4])); 1090 1091 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]"; 1092 } 1093 [(set_attr "type" "neon_ins<q>")] 1094) 1095 1096(define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>" 1097 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w") 1098 (vec_merge:VALL_F16_NO_V2Q 1099 (vec_duplicate:VALL_F16_NO_V2Q 1100 (vec_select:<VEL> 1101 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w") 1102 (parallel 1103 [(match_operand:SI 4 "immediate_operand" "i")]))) 1104 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0") 1105 (match_operand:SI 2 "immediate_operand" "i")))] 1106 "TARGET_SIMD" 1107 { 1108 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2]))); 1109 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt); 1110 operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, 1111 INTVAL (operands[4])); 1112 1113 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]"; 1114 } 1115 [(set_attr "type" "neon_ins<q>")] 1116) 1117 1118(define_expand "signbit<mode>2" 1119 [(use (match_operand:<V_INT_EQUIV> 0 "register_operand")) 1120 (use (match_operand:VDQSF 1 "register_operand"))] 1121 "TARGET_SIMD" 1122{ 1123 int shift_amount = GET_MODE_UNIT_BITSIZE (<V_INT_EQUIV>mode) - 1; 1124 rtx shift_vector = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode, 1125 shift_amount); 1126 operands[1] = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode); 1127 1128 emit_insn (gen_aarch64_simd_lshr<v_int_equiv> (operands[0], operands[1], 1129 shift_vector)); 1130 DONE; 1131}) 1132 1133(define_insn "aarch64_simd_lshr<mode>" 1134 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 1135 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") 1136 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))] 1137 "TARGET_SIMD" 1138 "ushr\t%0.<Vtype>, %1.<Vtype>, %2" 1139 [(set_attr "type" "neon_shift_imm<q>")] 1140) 1141 1142(define_insn "aarch64_simd_ashr<mode>" 1143 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 1144 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") 1145 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))] 1146 "TARGET_SIMD" 1147 "sshr\t%0.<Vtype>, %1.<Vtype>, %2" 1148 [(set_attr "type" "neon_shift_imm<q>")] 1149) 1150 1151(define_insn "*aarch64_simd_sra<mode>" 1152 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 1153 (plus:VDQ_I 1154 (SHIFTRT:VDQ_I 1155 (match_operand:VDQ_I 1 "register_operand" "w") 1156 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")) 1157 (match_operand:VDQ_I 3 "register_operand" "0")))] 1158 "TARGET_SIMD" 1159 "<sra_op>sra\t%0.<Vtype>, %1.<Vtype>, %2" 1160 [(set_attr "type" "neon_shift_acc<q>")] 1161) 1162 1163(define_insn "aarch64_simd_imm_shl<mode>" 1164 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 1165 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") 1166 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))] 1167 "TARGET_SIMD" 1168 "shl\t%0.<Vtype>, %1.<Vtype>, %2" 1169 [(set_attr "type" "neon_shift_imm<q>")] 1170) 1171 1172(define_insn "aarch64_simd_reg_sshl<mode>" 1173 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 1174 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") 1175 (match_operand:VDQ_I 2 "register_operand" "w")))] 1176 "TARGET_SIMD" 1177 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 1178 [(set_attr "type" "neon_shift_reg<q>")] 1179) 1180 1181(define_insn "aarch64_simd_reg_shl<mode>_unsigned" 1182 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 1183 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w") 1184 (match_operand:VDQ_I 2 "register_operand" "w")] 1185 UNSPEC_ASHIFT_UNSIGNED))] 1186 "TARGET_SIMD" 1187 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 1188 [(set_attr "type" "neon_shift_reg<q>")] 1189) 1190 1191(define_insn "aarch64_simd_reg_shl<mode>_signed" 1192 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 1193 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w") 1194 (match_operand:VDQ_I 2 "register_operand" "w")] 1195 UNSPEC_ASHIFT_SIGNED))] 1196 "TARGET_SIMD" 1197 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 1198 [(set_attr "type" "neon_shift_reg<q>")] 1199) 1200 1201(define_expand "ashl<mode>3" 1202 [(match_operand:VDQ_I 0 "register_operand") 1203 (match_operand:VDQ_I 1 "register_operand") 1204 (match_operand:SI 2 "general_operand")] 1205 "TARGET_SIMD" 1206{ 1207 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT; 1208 int shift_amount; 1209 1210 if (CONST_INT_P (operands[2])) 1211 { 1212 shift_amount = INTVAL (operands[2]); 1213 if (shift_amount >= 0 && shift_amount < bit_width) 1214 { 1215 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode, 1216 shift_amount); 1217 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0], 1218 operands[1], 1219 tmp)); 1220 DONE; 1221 } 1222 } 1223 1224 operands[2] = force_reg (SImode, operands[2]); 1225 1226 rtx tmp = gen_reg_rtx (<MODE>mode); 1227 emit_insn (gen_aarch64_simd_dup<mode> (tmp, convert_to_mode (<VEL>mode, 1228 operands[2], 1229 0))); 1230 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1], tmp)); 1231 DONE; 1232}) 1233 1234(define_expand "lshr<mode>3" 1235 [(match_operand:VDQ_I 0 "register_operand") 1236 (match_operand:VDQ_I 1 "register_operand") 1237 (match_operand:SI 2 "general_operand")] 1238 "TARGET_SIMD" 1239{ 1240 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT; 1241 int shift_amount; 1242 1243 if (CONST_INT_P (operands[2])) 1244 { 1245 shift_amount = INTVAL (operands[2]); 1246 if (shift_amount > 0 && shift_amount <= bit_width) 1247 { 1248 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode, 1249 shift_amount); 1250 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0], 1251 operands[1], 1252 tmp)); 1253 DONE; 1254 } 1255 } 1256 1257 operands[2] = force_reg (SImode, operands[2]); 1258 1259 rtx tmp = gen_reg_rtx (SImode); 1260 rtx tmp1 = gen_reg_rtx (<MODE>mode); 1261 emit_insn (gen_negsi2 (tmp, operands[2])); 1262 emit_insn (gen_aarch64_simd_dup<mode> (tmp1, 1263 convert_to_mode (<VEL>mode, tmp, 0))); 1264 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1], 1265 tmp1)); 1266 DONE; 1267}) 1268 1269(define_expand "ashr<mode>3" 1270 [(match_operand:VDQ_I 0 "register_operand") 1271 (match_operand:VDQ_I 1 "register_operand") 1272 (match_operand:SI 2 "general_operand")] 1273 "TARGET_SIMD" 1274{ 1275 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT; 1276 int shift_amount; 1277 1278 if (CONST_INT_P (operands[2])) 1279 { 1280 shift_amount = INTVAL (operands[2]); 1281 if (shift_amount > 0 && shift_amount <= bit_width) 1282 { 1283 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode, 1284 shift_amount); 1285 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0], 1286 operands[1], 1287 tmp)); 1288 DONE; 1289 } 1290 } 1291 1292 operands[2] = force_reg (SImode, operands[2]); 1293 1294 rtx tmp = gen_reg_rtx (SImode); 1295 rtx tmp1 = gen_reg_rtx (<MODE>mode); 1296 emit_insn (gen_negsi2 (tmp, operands[2])); 1297 emit_insn (gen_aarch64_simd_dup<mode> (tmp1, convert_to_mode (<VEL>mode, 1298 tmp, 0))); 1299 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1], 1300 tmp1)); 1301 DONE; 1302}) 1303 1304(define_expand "vashl<mode>3" 1305 [(match_operand:VDQ_I 0 "register_operand") 1306 (match_operand:VDQ_I 1 "register_operand") 1307 (match_operand:VDQ_I 2 "register_operand")] 1308 "TARGET_SIMD" 1309{ 1310 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1], 1311 operands[2])); 1312 DONE; 1313}) 1314 1315;; Using mode VDQ_BHSI as there is no V2DImode neg! 1316;; Negating individual lanes most certainly offsets the 1317;; gain from vectorization. 1318(define_expand "vashr<mode>3" 1319 [(match_operand:VDQ_BHSI 0 "register_operand") 1320 (match_operand:VDQ_BHSI 1 "register_operand") 1321 (match_operand:VDQ_BHSI 2 "register_operand")] 1322 "TARGET_SIMD" 1323{ 1324 rtx neg = gen_reg_rtx (<MODE>mode); 1325 emit (gen_neg<mode>2 (neg, operands[2])); 1326 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1], 1327 neg)); 1328 DONE; 1329}) 1330 1331;; DI vector shift 1332(define_expand "aarch64_ashr_simddi" 1333 [(match_operand:DI 0 "register_operand") 1334 (match_operand:DI 1 "register_operand") 1335 (match_operand:SI 2 "aarch64_shift_imm64_di")] 1336 "TARGET_SIMD" 1337 { 1338 /* An arithmetic shift right by 64 fills the result with copies of the sign 1339 bit, just like asr by 63 - however the standard pattern does not handle 1340 a shift by 64. */ 1341 if (INTVAL (operands[2]) == 64) 1342 operands[2] = GEN_INT (63); 1343 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2])); 1344 DONE; 1345 } 1346) 1347 1348(define_expand "vlshr<mode>3" 1349 [(match_operand:VDQ_BHSI 0 "register_operand") 1350 (match_operand:VDQ_BHSI 1 "register_operand") 1351 (match_operand:VDQ_BHSI 2 "register_operand")] 1352 "TARGET_SIMD" 1353{ 1354 rtx neg = gen_reg_rtx (<MODE>mode); 1355 emit (gen_neg<mode>2 (neg, operands[2])); 1356 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1], 1357 neg)); 1358 DONE; 1359}) 1360 1361(define_expand "aarch64_lshr_simddi" 1362 [(match_operand:DI 0 "register_operand") 1363 (match_operand:DI 1 "register_operand") 1364 (match_operand:SI 2 "aarch64_shift_imm64_di")] 1365 "TARGET_SIMD" 1366 { 1367 if (INTVAL (operands[2]) == 64) 1368 emit_move_insn (operands[0], const0_rtx); 1369 else 1370 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2])); 1371 DONE; 1372 } 1373) 1374 1375;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero. 1376(define_insn "vec_shr_<mode>" 1377 [(set (match_operand:VD 0 "register_operand" "=w") 1378 (unspec:VD [(match_operand:VD 1 "register_operand" "w") 1379 (match_operand:SI 2 "immediate_operand" "i")] 1380 UNSPEC_VEC_SHR))] 1381 "TARGET_SIMD" 1382 { 1383 if (BYTES_BIG_ENDIAN) 1384 return "shl %d0, %d1, %2"; 1385 else 1386 return "ushr %d0, %d1, %2"; 1387 } 1388 [(set_attr "type" "neon_shift_imm")] 1389) 1390 1391(define_expand "vec_set<mode>" 1392 [(match_operand:VALL_F16 0 "register_operand") 1393 (match_operand:<VEL> 1 "register_operand") 1394 (match_operand:SI 2 "immediate_operand")] 1395 "TARGET_SIMD" 1396 { 1397 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]); 1398 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1], 1399 GEN_INT (elem), operands[0])); 1400 DONE; 1401 } 1402) 1403 1404 1405(define_insn "aarch64_mla<mode>" 1406 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 1407 (plus:VDQ_BHSI (mult:VDQ_BHSI 1408 (match_operand:VDQ_BHSI 2 "register_operand" "w") 1409 (match_operand:VDQ_BHSI 3 "register_operand" "w")) 1410 (match_operand:VDQ_BHSI 1 "register_operand" "0")))] 1411 "TARGET_SIMD" 1412 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>" 1413 [(set_attr "type" "neon_mla_<Vetype><q>")] 1414) 1415 1416(define_insn "*aarch64_mla_elt<mode>" 1417 [(set (match_operand:VDQHS 0 "register_operand" "=w") 1418 (plus:VDQHS 1419 (mult:VDQHS 1420 (vec_duplicate:VDQHS 1421 (vec_select:<VEL> 1422 (match_operand:VDQHS 1 "register_operand" "<h_con>") 1423 (parallel [(match_operand:SI 2 "immediate_operand")]))) 1424 (match_operand:VDQHS 3 "register_operand" "w")) 1425 (match_operand:VDQHS 4 "register_operand" "0")))] 1426 "TARGET_SIMD" 1427 { 1428 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); 1429 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]"; 1430 } 1431 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] 1432) 1433 1434(define_insn "*aarch64_mla_elt_<vswap_width_name><mode>" 1435 [(set (match_operand:VDQHS 0 "register_operand" "=w") 1436 (plus:VDQHS 1437 (mult:VDQHS 1438 (vec_duplicate:VDQHS 1439 (vec_select:<VEL> 1440 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>") 1441 (parallel [(match_operand:SI 2 "immediate_operand")]))) 1442 (match_operand:VDQHS 3 "register_operand" "w")) 1443 (match_operand:VDQHS 4 "register_operand" "0")))] 1444 "TARGET_SIMD" 1445 { 1446 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2])); 1447 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]"; 1448 } 1449 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] 1450) 1451 1452(define_insn "aarch64_mla_n<mode>" 1453 [(set (match_operand:VDQHS 0 "register_operand" "=w") 1454 (plus:VDQHS 1455 (mult:VDQHS 1456 (vec_duplicate:VDQHS 1457 (match_operand:<VEL> 3 "register_operand" "<h_con>")) 1458 (match_operand:VDQHS 2 "register_operand" "w")) 1459 (match_operand:VDQHS 1 "register_operand" "0")))] 1460 "TARGET_SIMD" 1461 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[0]" 1462 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] 1463) 1464 1465(define_insn "aarch64_mls<mode>" 1466 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 1467 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0") 1468 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w") 1469 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))] 1470 "TARGET_SIMD" 1471 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>" 1472 [(set_attr "type" "neon_mla_<Vetype><q>")] 1473) 1474 1475(define_insn "*aarch64_mls_elt<mode>" 1476 [(set (match_operand:VDQHS 0 "register_operand" "=w") 1477 (minus:VDQHS 1478 (match_operand:VDQHS 4 "register_operand" "0") 1479 (mult:VDQHS 1480 (vec_duplicate:VDQHS 1481 (vec_select:<VEL> 1482 (match_operand:VDQHS 1 "register_operand" "<h_con>") 1483 (parallel [(match_operand:SI 2 "immediate_operand")]))) 1484 (match_operand:VDQHS 3 "register_operand" "w"))))] 1485 "TARGET_SIMD" 1486 { 1487 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); 1488 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]"; 1489 } 1490 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] 1491) 1492 1493(define_insn "*aarch64_mls_elt_<vswap_width_name><mode>" 1494 [(set (match_operand:VDQHS 0 "register_operand" "=w") 1495 (minus:VDQHS 1496 (match_operand:VDQHS 4 "register_operand" "0") 1497 (mult:VDQHS 1498 (vec_duplicate:VDQHS 1499 (vec_select:<VEL> 1500 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>") 1501 (parallel [(match_operand:SI 2 "immediate_operand")]))) 1502 (match_operand:VDQHS 3 "register_operand" "w"))))] 1503 "TARGET_SIMD" 1504 { 1505 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2])); 1506 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]"; 1507 } 1508 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] 1509) 1510 1511(define_insn "aarch64_mls_n<mode>" 1512 [(set (match_operand:VDQHS 0 "register_operand" "=w") 1513 (minus:VDQHS 1514 (match_operand:VDQHS 1 "register_operand" "0") 1515 (mult:VDQHS 1516 (vec_duplicate:VDQHS 1517 (match_operand:<VEL> 3 "register_operand" "<h_con>")) 1518 (match_operand:VDQHS 2 "register_operand" "w"))))] 1519 "TARGET_SIMD" 1520 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[0]" 1521 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] 1522) 1523 1524;; Max/Min operations. 1525(define_insn "<su><maxmin><mode>3" 1526 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 1527 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w") 1528 (match_operand:VDQ_BHSI 2 "register_operand" "w")))] 1529 "TARGET_SIMD" 1530 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 1531 [(set_attr "type" "neon_minmax<q>")] 1532) 1533 1534(define_expand "<su><maxmin>v2di3" 1535 [(set (match_operand:V2DI 0 "register_operand") 1536 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand") 1537 (match_operand:V2DI 2 "register_operand")))] 1538 "TARGET_SIMD" 1539{ 1540 enum rtx_code cmp_operator; 1541 rtx cmp_fmt; 1542 1543 switch (<CODE>) 1544 { 1545 case UMIN: 1546 cmp_operator = LTU; 1547 break; 1548 case SMIN: 1549 cmp_operator = LT; 1550 break; 1551 case UMAX: 1552 cmp_operator = GTU; 1553 break; 1554 case SMAX: 1555 cmp_operator = GT; 1556 break; 1557 default: 1558 gcc_unreachable (); 1559 } 1560 1561 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]); 1562 emit_insn (gen_vcondv2div2di (operands[0], operands[1], 1563 operands[2], cmp_fmt, operands[1], operands[2])); 1564 DONE; 1565}) 1566 1567;; Pairwise Integer Max/Min operations. 1568(define_insn "aarch64_<maxmin_uns>p<mode>" 1569 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 1570 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w") 1571 (match_operand:VDQ_BHSI 2 "register_operand" "w")] 1572 MAXMINV))] 1573 "TARGET_SIMD" 1574 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 1575 [(set_attr "type" "neon_minmax<q>")] 1576) 1577 1578;; Pairwise FP Max/Min operations. 1579(define_insn "aarch64_<maxmin_uns>p<mode>" 1580 [(set (match_operand:VHSDF 0 "register_operand" "=w") 1581 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w") 1582 (match_operand:VHSDF 2 "register_operand" "w")] 1583 FMAXMINV))] 1584 "TARGET_SIMD" 1585 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 1586 [(set_attr "type" "neon_minmax<q>")] 1587) 1588 1589;; vec_concat gives a new vector with the low elements from operand 1, and 1590;; the high elements from operand 2. That is to say, given op1 = { a, b } 1591;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }. 1592;; What that means, is that the RTL descriptions of the below patterns 1593;; need to change depending on endianness. 1594 1595;; Move to the low architectural bits of the register. 1596;; On little-endian this is { operand, zeroes } 1597;; On big-endian this is { zeroes, operand } 1598 1599(define_insn "move_lo_quad_internal_<mode>" 1600 [(set (match_operand:VQMOV 0 "register_operand" "=w,w,w") 1601 (vec_concat:VQMOV 1602 (match_operand:<VHALF> 1 "register_operand" "w,r,r") 1603 (match_operand:<VHALF> 2 "aarch64_simd_or_scalar_imm_zero")))] 1604 "TARGET_SIMD && !BYTES_BIG_ENDIAN" 1605 "@ 1606 dup\\t%d0, %1.d[0] 1607 fmov\\t%d0, %1 1608 dup\\t%d0, %1" 1609 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>") 1610 (set_attr "length" "4") 1611 (set_attr "arch" "simd,fp,simd")] 1612) 1613 1614(define_insn "move_lo_quad_internal_be_<mode>" 1615 [(set (match_operand:VQMOV 0 "register_operand" "=w,w,w") 1616 (vec_concat:VQMOV 1617 (match_operand:<VHALF> 2 "aarch64_simd_or_scalar_imm_zero") 1618 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))] 1619 "TARGET_SIMD && BYTES_BIG_ENDIAN" 1620 "@ 1621 dup\\t%d0, %1.d[0] 1622 fmov\\t%d0, %1 1623 dup\\t%d0, %1" 1624 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>") 1625 (set_attr "length" "4") 1626 (set_attr "arch" "simd,fp,simd")] 1627) 1628 1629(define_expand "move_lo_quad_<mode>" 1630 [(match_operand:VQMOV 0 "register_operand") 1631 (match_operand:<VHALF> 1 "register_operand")] 1632 "TARGET_SIMD" 1633{ 1634 rtx zs = CONST0_RTX (<VHALF>mode); 1635 if (BYTES_BIG_ENDIAN) 1636 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1], zs)); 1637 else 1638 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1], zs)); 1639 DONE; 1640} 1641) 1642 1643;; Move operand1 to the high architectural bits of the register, keeping 1644;; the low architectural bits of operand2. 1645;; For little-endian this is { operand2, operand1 } 1646;; For big-endian this is { operand1, operand2 } 1647 1648(define_insn "aarch64_simd_move_hi_quad_<mode>" 1649 [(set (match_operand:VQMOV 0 "register_operand" "+w,w") 1650 (vec_concat:VQMOV 1651 (vec_select:<VHALF> 1652 (match_dup 0) 1653 (match_operand:VQMOV 2 "vect_par_cnst_lo_half" "")) 1654 (match_operand:<VHALF> 1 "register_operand" "w,r")))] 1655 "TARGET_SIMD && !BYTES_BIG_ENDIAN" 1656 "@ 1657 ins\\t%0.d[1], %1.d[0] 1658 ins\\t%0.d[1], %1" 1659 [(set_attr "type" "neon_ins")] 1660) 1661 1662(define_insn "aarch64_simd_move_hi_quad_be_<mode>" 1663 [(set (match_operand:VQMOV 0 "register_operand" "+w,w") 1664 (vec_concat:VQMOV 1665 (match_operand:<VHALF> 1 "register_operand" "w,r") 1666 (vec_select:<VHALF> 1667 (match_dup 0) 1668 (match_operand:VQMOV 2 "vect_par_cnst_lo_half" ""))))] 1669 "TARGET_SIMD && BYTES_BIG_ENDIAN" 1670 "@ 1671 ins\\t%0.d[1], %1.d[0] 1672 ins\\t%0.d[1], %1" 1673 [(set_attr "type" "neon_ins")] 1674) 1675 1676(define_expand "move_hi_quad_<mode>" 1677 [(match_operand:VQMOV 0 "register_operand") 1678 (match_operand:<VHALF> 1 "register_operand")] 1679 "TARGET_SIMD" 1680{ 1681 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); 1682 if (BYTES_BIG_ENDIAN) 1683 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0], 1684 operands[1], p)); 1685 else 1686 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0], 1687 operands[1], p)); 1688 DONE; 1689}) 1690 1691;; Narrowing operations. 1692 1693;; For doubles. 1694(define_insn "aarch64_simd_vec_pack_trunc_<mode>" 1695 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") 1696 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))] 1697 "TARGET_SIMD" 1698 "xtn\\t%0.<Vntype>, %1.<Vtype>" 1699 [(set_attr "type" "neon_shift_imm_narrow_q")] 1700) 1701 1702(define_expand "vec_pack_trunc_<mode>" 1703 [(match_operand:<VNARROWD> 0 "register_operand") 1704 (match_operand:VDN 1 "register_operand") 1705 (match_operand:VDN 2 "register_operand")] 1706 "TARGET_SIMD" 1707{ 1708 rtx tempreg = gen_reg_rtx (<VDBL>mode); 1709 int lo = BYTES_BIG_ENDIAN ? 2 : 1; 1710 int hi = BYTES_BIG_ENDIAN ? 1 : 2; 1711 1712 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo])); 1713 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi])); 1714 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg)); 1715 DONE; 1716}) 1717 1718(define_insn "aarch64_shrn<mode>_insn_le" 1719 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") 1720 (vec_concat:<VNARROWQ2> 1721 (truncate:<VNARROWQ> 1722 (lshiftrt:VQN (match_operand:VQN 1 "register_operand" "w") 1723 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))) 1724 (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))] 1725 "TARGET_SIMD && !BYTES_BIG_ENDIAN" 1726 "shrn\\t%0.<Vntype>, %1.<Vtype>, %2" 1727 [(set_attr "type" "neon_shift_imm_narrow_q")] 1728) 1729 1730(define_insn "aarch64_shrn<mode>_insn_be" 1731 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") 1732 (vec_concat:<VNARROWQ2> 1733 (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero") 1734 (truncate:<VNARROWQ> 1735 (lshiftrt:VQN (match_operand:VQN 1 "register_operand" "w") 1736 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")))))] 1737 "TARGET_SIMD && BYTES_BIG_ENDIAN" 1738 "shrn\\t%0.<Vntype>, %1.<Vtype>, %2" 1739 [(set_attr "type" "neon_shift_imm_narrow_q")] 1740) 1741 1742(define_expand "aarch64_shrn<mode>" 1743 [(set (match_operand:<VNARROWQ> 0 "register_operand") 1744 (truncate:<VNARROWQ> 1745 (lshiftrt:VQN (match_operand:VQN 1 "register_operand") 1746 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>"))))] 1747 "TARGET_SIMD" 1748 { 1749 operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode, 1750 INTVAL (operands[2])); 1751 rtx tmp = gen_reg_rtx (<VNARROWQ2>mode); 1752 if (BYTES_BIG_ENDIAN) 1753 emit_insn (gen_aarch64_shrn<mode>_insn_be (tmp, operands[1], 1754 operands[2], CONST0_RTX (<VNARROWQ>mode))); 1755 else 1756 emit_insn (gen_aarch64_shrn<mode>_insn_le (tmp, operands[1], 1757 operands[2], CONST0_RTX (<VNARROWQ>mode))); 1758 1759 /* The intrinsic expects a narrow result, so emit a subreg that will get 1760 optimized away as appropriate. */ 1761 emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp, 1762 <VNARROWQ2>mode)); 1763 DONE; 1764 } 1765) 1766 1767(define_insn "aarch64_rshrn<mode>_insn_le" 1768 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") 1769 (vec_concat:<VNARROWQ2> 1770 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w") 1771 (match_operand:VQN 2 1772 "aarch64_simd_shift_imm_vec_<vn_mode>")] UNSPEC_RSHRN) 1773 (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))] 1774 "TARGET_SIMD && !BYTES_BIG_ENDIAN" 1775 "rshrn\\t%0.<Vntype>, %1.<Vtype>, %2" 1776 [(set_attr "type" "neon_shift_imm_narrow_q")] 1777) 1778 1779(define_insn "aarch64_rshrn<mode>_insn_be" 1780 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") 1781 (vec_concat:<VNARROWQ2> 1782 (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero") 1783 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w") 1784 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")] 1785 UNSPEC_RSHRN)))] 1786 "TARGET_SIMD && BYTES_BIG_ENDIAN" 1787 "rshrn\\t%0.<Vntype>, %1.<Vtype>, %2" 1788 [(set_attr "type" "neon_shift_imm_narrow_q")] 1789) 1790 1791(define_expand "aarch64_rshrn<mode>" 1792 [(match_operand:<VNARROWQ> 0 "register_operand") 1793 (match_operand:VQN 1 "register_operand") 1794 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>")] 1795 "TARGET_SIMD" 1796 { 1797 operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode, 1798 INTVAL (operands[2])); 1799 rtx tmp = gen_reg_rtx (<VNARROWQ2>mode); 1800 if (BYTES_BIG_ENDIAN) 1801 emit_insn (gen_aarch64_rshrn<mode>_insn_be (tmp, operands[1], 1802 operands[2], CONST0_RTX (<VNARROWQ>mode))); 1803 else 1804 emit_insn (gen_aarch64_rshrn<mode>_insn_le (tmp, operands[1], 1805 operands[2], CONST0_RTX (<VNARROWQ>mode))); 1806 1807 /* The intrinsic expects a narrow result, so emit a subreg that will get 1808 optimized away as appropriate. */ 1809 emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp, 1810 <VNARROWQ2>mode)); 1811 DONE; 1812 } 1813) 1814 1815(define_insn "aarch64_shrn2<mode>_insn_le" 1816 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") 1817 (vec_concat:<VNARROWQ2> 1818 (match_operand:<VNARROWQ> 1 "register_operand" "0") 1819 (truncate:<VNARROWQ> 1820 (lshiftrt:VQN (match_operand:VQN 2 "register_operand" "w") 1821 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))))] 1822 "TARGET_SIMD && !BYTES_BIG_ENDIAN" 1823 "shrn2\\t%0.<V2ntype>, %2.<Vtype>, %3" 1824 [(set_attr "type" "neon_shift_imm_narrow_q")] 1825) 1826 1827(define_insn "aarch64_shrn2<mode>_insn_be" 1828 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") 1829 (vec_concat:<VNARROWQ2> 1830 (truncate:<VNARROWQ> 1831 (lshiftrt:VQN (match_operand:VQN 2 "register_operand" "w") 1832 (match_operand:VQN 3 1833 "aarch64_simd_shift_imm_vec_<vn_mode>"))) 1834 (match_operand:<VNARROWQ> 1 "register_operand" "0")))] 1835 "TARGET_SIMD && BYTES_BIG_ENDIAN" 1836 "shrn2\\t%0.<V2ntype>, %2.<Vtype>, %3" 1837 [(set_attr "type" "neon_shift_imm_narrow_q")] 1838) 1839 1840(define_expand "aarch64_shrn2<mode>" 1841 [(match_operand:<VNARROWQ2> 0 "register_operand") 1842 (match_operand:<VNARROWQ> 1 "register_operand") 1843 (match_operand:VQN 2 "register_operand") 1844 (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")] 1845 "TARGET_SIMD" 1846 { 1847 operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode, 1848 INTVAL (operands[3])); 1849 if (BYTES_BIG_ENDIAN) 1850 emit_insn (gen_aarch64_shrn2<mode>_insn_be (operands[0], operands[1], 1851 operands[2], operands[3])); 1852 else 1853 emit_insn (gen_aarch64_shrn2<mode>_insn_le (operands[0], operands[1], 1854 operands[2], operands[3])); 1855 DONE; 1856 } 1857) 1858 1859(define_insn "aarch64_rshrn2<mode>_insn_le" 1860 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") 1861 (vec_concat:<VNARROWQ2> 1862 (match_operand:<VNARROWQ> 1 "register_operand" "0") 1863 (unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w") 1864 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")] 1865 UNSPEC_RSHRN)))] 1866 "TARGET_SIMD && !BYTES_BIG_ENDIAN" 1867 "rshrn2\\t%0.<V2ntype>, %2.<Vtype>, %3" 1868 [(set_attr "type" "neon_shift_imm_narrow_q")] 1869) 1870 1871(define_insn "aarch64_rshrn2<mode>_insn_be" 1872 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") 1873 (vec_concat:<VNARROWQ2> 1874 (unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w") 1875 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")] 1876 UNSPEC_RSHRN) 1877 (match_operand:<VNARROWQ> 1 "register_operand" "0")))] 1878 "TARGET_SIMD && BYTES_BIG_ENDIAN" 1879 "rshrn2\\t%0.<V2ntype>, %2.<Vtype>, %3" 1880 [(set_attr "type" "neon_shift_imm_narrow_q")] 1881) 1882 1883(define_expand "aarch64_rshrn2<mode>" 1884 [(match_operand:<VNARROWQ2> 0 "register_operand") 1885 (match_operand:<VNARROWQ> 1 "register_operand") 1886 (match_operand:VQN 2 "register_operand") 1887 (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")] 1888 "TARGET_SIMD" 1889 { 1890 operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode, 1891 INTVAL (operands[3])); 1892 if (BYTES_BIG_ENDIAN) 1893 emit_insn (gen_aarch64_rshrn2<mode>_insn_be (operands[0], operands[1], 1894 operands[2], operands[3])); 1895 else 1896 emit_insn (gen_aarch64_rshrn2<mode>_insn_le (operands[0], operands[1], 1897 operands[2], operands[3])); 1898 DONE; 1899 } 1900) 1901 1902;; For quads. 1903 1904(define_insn "vec_pack_trunc_<mode>" 1905 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w") 1906 (vec_concat:<VNARROWQ2> 1907 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")) 1908 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))] 1909 "TARGET_SIMD" 1910 { 1911 if (BYTES_BIG_ENDIAN) 1912 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>"; 1913 else 1914 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>"; 1915 } 1916 [(set_attr "type" "multiple") 1917 (set_attr "length" "8")] 1918) 1919 1920;; Widening operations. 1921 1922(define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>" 1923 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1924 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1925 (match_operand:VQW 1 "register_operand" "w") 1926 (match_operand:VQW 2 "vect_par_cnst_lo_half" "") 1927 )))] 1928 "TARGET_SIMD" 1929 "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>" 1930 [(set_attr "type" "neon_shift_imm_long")] 1931) 1932 1933(define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>" 1934 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1935 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1936 (match_operand:VQW 1 "register_operand" "w") 1937 (match_operand:VQW 2 "vect_par_cnst_hi_half" "") 1938 )))] 1939 "TARGET_SIMD" 1940 "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>" 1941 [(set_attr "type" "neon_shift_imm_long")] 1942) 1943 1944(define_expand "vec_unpack<su>_hi_<mode>" 1945 [(match_operand:<VWIDE> 0 "register_operand") 1946 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))] 1947 "TARGET_SIMD" 1948 { 1949 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 1950 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0], 1951 operands[1], p)); 1952 DONE; 1953 } 1954) 1955 1956(define_expand "vec_unpack<su>_lo_<mode>" 1957 [(match_operand:<VWIDE> 0 "register_operand") 1958 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))] 1959 "TARGET_SIMD" 1960 { 1961 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); 1962 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0], 1963 operands[1], p)); 1964 DONE; 1965 } 1966) 1967 1968;; Widening arithmetic. 1969 1970(define_insn "*aarch64_<su>mlal_lo<mode>" 1971 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1972 (plus:<VWIDE> 1973 (mult:<VWIDE> 1974 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1975 (match_operand:VQW 2 "register_operand" "w") 1976 (match_operand:VQW 3 "vect_par_cnst_lo_half" ""))) 1977 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1978 (match_operand:VQW 4 "register_operand" "w") 1979 (match_dup 3)))) 1980 (match_operand:<VWIDE> 1 "register_operand" "0")))] 1981 "TARGET_SIMD" 1982 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>" 1983 [(set_attr "type" "neon_mla_<Vetype>_long")] 1984) 1985 1986(define_insn "aarch64_<su>mlal_hi<mode>_insn" 1987 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1988 (plus:<VWIDE> 1989 (mult:<VWIDE> 1990 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1991 (match_operand:VQW 2 "register_operand" "w") 1992 (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))) 1993 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1994 (match_operand:VQW 4 "register_operand" "w") 1995 (match_dup 3)))) 1996 (match_operand:<VWIDE> 1 "register_operand" "0")))] 1997 "TARGET_SIMD" 1998 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>" 1999 [(set_attr "type" "neon_mla_<Vetype>_long")] 2000) 2001 2002(define_expand "aarch64_<su>mlal_hi<mode>" 2003 [(match_operand:<VWIDE> 0 "register_operand") 2004 (match_operand:<VWIDE> 1 "register_operand") 2005 (ANY_EXTEND:<VWIDE>(match_operand:VQW 2 "register_operand")) 2006 (match_operand:VQW 3 "register_operand")] 2007 "TARGET_SIMD" 2008{ 2009 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 2010 emit_insn (gen_aarch64_<su>mlal_hi<mode>_insn (operands[0], operands[1], 2011 operands[2], p, operands[3])); 2012 DONE; 2013} 2014) 2015 2016(define_insn "aarch64_<su>mlal_hi_n<mode>_insn" 2017 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 2018 (plus:<VWIDE> 2019 (mult:<VWIDE> 2020 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 2021 (match_operand:VQ_HSI 2 "register_operand" "w") 2022 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) 2023 (ANY_EXTEND:<VWIDE> (vec_duplicate:<VCOND> 2024 (match_operand:<VEL> 4 "register_operand" "<h_con>")))) 2025 (match_operand:<VWIDE> 1 "register_operand" "0")))] 2026 "TARGET_SIMD" 2027 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[0]" 2028 [(set_attr "type" "neon_mla_<Vetype>_long")] 2029) 2030 2031(define_expand "aarch64_<su>mlal_hi_n<mode>" 2032 [(match_operand:<VWIDE> 0 "register_operand") 2033 (match_operand:<VWIDE> 1 "register_operand") 2034 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand")) 2035 (match_operand:<VEL> 3 "register_operand")] 2036 "TARGET_SIMD" 2037{ 2038 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 2039 emit_insn (gen_aarch64_<su>mlal_hi_n<mode>_insn (operands[0], 2040 operands[1], operands[2], p, operands[3])); 2041 DONE; 2042} 2043) 2044 2045(define_insn "*aarch64_<su>mlsl_lo<mode>" 2046 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 2047 (minus:<VWIDE> 2048 (match_operand:<VWIDE> 1 "register_operand" "0") 2049 (mult:<VWIDE> 2050 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 2051 (match_operand:VQW 2 "register_operand" "w") 2052 (match_operand:VQW 3 "vect_par_cnst_lo_half" ""))) 2053 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 2054 (match_operand:VQW 4 "register_operand" "w") 2055 (match_dup 3))))))] 2056 "TARGET_SIMD" 2057 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>" 2058 [(set_attr "type" "neon_mla_<Vetype>_long")] 2059) 2060 2061(define_insn "aarch64_<su>mlsl_hi<mode>_insn" 2062 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 2063 (minus:<VWIDE> 2064 (match_operand:<VWIDE> 1 "register_operand" "0") 2065 (mult:<VWIDE> 2066 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 2067 (match_operand:VQW 2 "register_operand" "w") 2068 (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))) 2069 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 2070 (match_operand:VQW 4 "register_operand" "w") 2071 (match_dup 3))))))] 2072 "TARGET_SIMD" 2073 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>" 2074 [(set_attr "type" "neon_mla_<Vetype>_long")] 2075) 2076 2077(define_expand "aarch64_<su>mlsl_hi<mode>" 2078 [(match_operand:<VWIDE> 0 "register_operand") 2079 (match_operand:<VWIDE> 1 "register_operand") 2080 (ANY_EXTEND:<VWIDE>(match_operand:VQW 2 "register_operand")) 2081 (match_operand:VQW 3 "register_operand")] 2082 "TARGET_SIMD" 2083{ 2084 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 2085 emit_insn (gen_aarch64_<su>mlsl_hi<mode>_insn (operands[0], operands[1], 2086 operands[2], p, operands[3])); 2087 DONE; 2088} 2089) 2090 2091(define_insn "aarch64_<su>mlsl_hi_n<mode>_insn" 2092 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 2093 (minus:<VWIDE> 2094 (match_operand:<VWIDE> 1 "register_operand" "0") 2095 (mult:<VWIDE> 2096 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 2097 (match_operand:VQ_HSI 2 "register_operand" "w") 2098 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) 2099 (ANY_EXTEND:<VWIDE> (vec_duplicate:<VCOND> 2100 (match_operand:<VEL> 4 "register_operand" "<h_con>"))))))] 2101 "TARGET_SIMD" 2102 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[0]" 2103 [(set_attr "type" "neon_mla_<Vetype>_long")] 2104) 2105 2106(define_expand "aarch64_<su>mlsl_hi_n<mode>" 2107 [(match_operand:<VWIDE> 0 "register_operand") 2108 (match_operand:<VWIDE> 1 "register_operand") 2109 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand")) 2110 (match_operand:<VEL> 3 "register_operand")] 2111 "TARGET_SIMD" 2112{ 2113 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 2114 emit_insn (gen_aarch64_<su>mlsl_hi_n<mode>_insn (operands[0], 2115 operands[1], operands[2], p, operands[3])); 2116 DONE; 2117} 2118) 2119 2120(define_insn "aarch64_<su>mlal<mode>" 2121 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 2122 (plus:<VWIDE> 2123 (mult:<VWIDE> 2124 (ANY_EXTEND:<VWIDE> 2125 (match_operand:VD_BHSI 2 "register_operand" "w")) 2126 (ANY_EXTEND:<VWIDE> 2127 (match_operand:VD_BHSI 3 "register_operand" "w"))) 2128 (match_operand:<VWIDE> 1 "register_operand" "0")))] 2129 "TARGET_SIMD" 2130 "<su>mlal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>" 2131 [(set_attr "type" "neon_mla_<Vetype>_long")] 2132) 2133 2134(define_insn "aarch64_<su>mlal_n<mode>" 2135 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 2136 (plus:<VWIDE> 2137 (mult:<VWIDE> 2138 (ANY_EXTEND:<VWIDE> 2139 (match_operand:VD_HSI 2 "register_operand" "w")) 2140 (ANY_EXTEND:<VWIDE> 2141 (vec_duplicate:VD_HSI 2142 (match_operand:<VEL> 3 "register_operand" "<h_con>")))) 2143 (match_operand:<VWIDE> 1 "register_operand" "0")))] 2144 "TARGET_SIMD" 2145 "<su>mlal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vetype>[0]" 2146 [(set_attr "type" "neon_mla_<Vetype>_long")] 2147) 2148 2149(define_insn "aarch64_<su>mlsl<mode>" 2150 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 2151 (minus:<VWIDE> 2152 (match_operand:<VWIDE> 1 "register_operand" "0") 2153 (mult:<VWIDE> 2154 (ANY_EXTEND:<VWIDE> 2155 (match_operand:VD_BHSI 2 "register_operand" "w")) 2156 (ANY_EXTEND:<VWIDE> 2157 (match_operand:VD_BHSI 3 "register_operand" "w")))))] 2158 "TARGET_SIMD" 2159 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>" 2160 [(set_attr "type" "neon_mla_<Vetype>_long")] 2161) 2162 2163(define_insn "aarch64_<su>mlsl_n<mode>" 2164 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 2165 (minus:<VWIDE> 2166 (match_operand:<VWIDE> 1 "register_operand" "0") 2167 (mult:<VWIDE> 2168 (ANY_EXTEND:<VWIDE> 2169 (match_operand:VD_HSI 2 "register_operand" "w")) 2170 (ANY_EXTEND:<VWIDE> 2171 (vec_duplicate:VD_HSI 2172 (match_operand:<VEL> 3 "register_operand" "<h_con>"))))))] 2173 "TARGET_SIMD" 2174 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vetype>[0]" 2175 [(set_attr "type" "neon_mla_<Vetype>_long")] 2176) 2177 2178(define_insn "aarch64_simd_vec_<su>mult_lo_<mode>" 2179 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 2180 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 2181 (match_operand:VQW 1 "register_operand" "w") 2182 (match_operand:VQW 3 "vect_par_cnst_lo_half" ""))) 2183 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 2184 (match_operand:VQW 2 "register_operand" "w") 2185 (match_dup 3)))))] 2186 "TARGET_SIMD" 2187 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>" 2188 [(set_attr "type" "neon_mul_<Vetype>_long")] 2189) 2190 2191(define_insn "aarch64_intrinsic_vec_<su>mult_lo_<mode>" 2192 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 2193 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> 2194 (match_operand:VD_BHSI 1 "register_operand" "w")) 2195 (ANY_EXTEND:<VWIDE> 2196 (match_operand:VD_BHSI 2 "register_operand" "w"))))] 2197 "TARGET_SIMD" 2198 "<su>mull\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>" 2199 [(set_attr "type" "neon_mul_<Vetype>_long")] 2200) 2201 2202(define_expand "vec_widen_<su>mult_lo_<mode>" 2203 [(match_operand:<VWIDE> 0 "register_operand") 2204 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand")) 2205 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))] 2206 "TARGET_SIMD" 2207 { 2208 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); 2209 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0], 2210 operands[1], 2211 operands[2], p)); 2212 DONE; 2213 } 2214) 2215 2216(define_insn "aarch64_simd_vec_<su>mult_hi_<mode>" 2217 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 2218 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 2219 (match_operand:VQW 1 "register_operand" "w") 2220 (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))) 2221 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 2222 (match_operand:VQW 2 "register_operand" "w") 2223 (match_dup 3)))))] 2224 "TARGET_SIMD" 2225 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>" 2226 [(set_attr "type" "neon_mul_<Vetype>_long")] 2227) 2228 2229(define_expand "vec_widen_<su>mult_hi_<mode>" 2230 [(match_operand:<VWIDE> 0 "register_operand") 2231 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand")) 2232 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))] 2233 "TARGET_SIMD" 2234 { 2235 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 2236 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0], 2237 operands[1], 2238 operands[2], p)); 2239 DONE; 2240 2241 } 2242) 2243 2244;; vmull_lane_s16 intrinsics 2245(define_insn "aarch64_vec_<su>mult_lane<Qlane>" 2246 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 2247 (mult:<VWIDE> 2248 (ANY_EXTEND:<VWIDE> 2249 (match_operand:<VCOND> 1 "register_operand" "w")) 2250 (ANY_EXTEND:<VWIDE> 2251 (vec_duplicate:<VCOND> 2252 (vec_select:<VEL> 2253 (match_operand:VDQHS 2 "register_operand" "<vwx>") 2254 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))))))] 2255 "TARGET_SIMD" 2256 { 2257 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3])); 2258 return "<su>mull\\t%0.<Vwtype>, %1.<Vcondtype>, %2.<Vetype>[%3]"; 2259 } 2260 [(set_attr "type" "neon_mul_<Vetype>_scalar_long")] 2261) 2262 2263(define_insn "aarch64_<su>mull_hi_lane<mode>_insn" 2264 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 2265 (mult:<VWIDE> 2266 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 2267 (match_operand:VQ_HSI 1 "register_operand" "w") 2268 (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" ""))) 2269 (ANY_EXTEND:<VWIDE> (vec_duplicate:<VHALF> 2270 (vec_select:<VEL> 2271 (match_operand:<VCOND> 3 "register_operand" "<vwx>") 2272 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))] 2273 "TARGET_SIMD" 2274 { 2275 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4])); 2276 return "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %3.<Vetype>[%4]"; 2277 } 2278 [(set_attr "type" "neon_mul_<Vetype>_scalar_long")] 2279) 2280 2281(define_expand "aarch64_<su>mull_hi_lane<mode>" 2282 [(match_operand:<VWIDE> 0 "register_operand") 2283 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 1 "register_operand")) 2284 (match_operand:<VCOND> 2 "register_operand") 2285 (match_operand:SI 3 "immediate_operand")] 2286 "TARGET_SIMD" 2287{ 2288 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 2289 emit_insn (gen_aarch64_<su>mull_hi_lane<mode>_insn (operands[0], 2290 operands[1], p, operands[2], operands[3])); 2291 DONE; 2292} 2293) 2294 2295(define_insn "aarch64_<su>mull_hi_laneq<mode>_insn" 2296 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 2297 (mult:<VWIDE> 2298 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 2299 (match_operand:VQ_HSI 1 "register_operand" "w") 2300 (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" ""))) 2301 (ANY_EXTEND:<VWIDE> (vec_duplicate:<VHALF> 2302 (vec_select:<VEL> 2303 (match_operand:<VCONQ> 3 "register_operand" "<vwx>") 2304 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))] 2305 "TARGET_SIMD" 2306 { 2307 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4])); 2308 return "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %3.<Vetype>[%4]"; 2309 } 2310 [(set_attr "type" "neon_mul_<Vetype>_scalar_long")] 2311) 2312 2313(define_expand "aarch64_<su>mull_hi_laneq<mode>" 2314 [(match_operand:<VWIDE> 0 "register_operand") 2315 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 1 "register_operand")) 2316 (match_operand:<VCONQ> 2 "register_operand") 2317 (match_operand:SI 3 "immediate_operand")] 2318 "TARGET_SIMD" 2319{ 2320 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 2321 emit_insn (gen_aarch64_<su>mull_hi_laneq<mode>_insn (operands[0], 2322 operands[1], p, operands[2], operands[3])); 2323 DONE; 2324} 2325) 2326 2327(define_insn "aarch64_<su>mull_n<mode>" 2328 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 2329 (mult:<VWIDE> 2330 (ANY_EXTEND:<VWIDE> 2331 (match_operand:VD_HSI 1 "register_operand" "w")) 2332 (ANY_EXTEND:<VWIDE> 2333 (vec_duplicate:<VCOND> 2334 (match_operand:<VEL> 2 "register_operand" "<h_con>")))))] 2335 "TARGET_SIMD" 2336 "<su>mull\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[0]" 2337 [(set_attr "type" "neon_mul_<Vetype>_scalar_long")] 2338) 2339 2340(define_insn "aarch64_<su>mull_hi_n<mode>_insn" 2341 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 2342 (mult:<VWIDE> 2343 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 2344 (match_operand:VQ_HSI 1 "register_operand" "w") 2345 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) 2346 (ANY_EXTEND:<VWIDE> 2347 (vec_duplicate:<VCOND> 2348 (match_operand:<VEL> 2 "register_operand" "<h_con>")))))] 2349 "TARGET_SIMD" 2350 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[0]" 2351 [(set_attr "type" "neon_mul_<Vetype>_scalar_long")] 2352) 2353 2354(define_expand "aarch64_<su>mull_hi_n<mode>" 2355 [(match_operand:<VWIDE> 0 "register_operand") 2356 (ANY_EXTEND:<VWIDE> (match_operand:VQ_HSI 1 "register_operand")) 2357 (match_operand:<VEL> 2 "register_operand")] 2358 "TARGET_SIMD" 2359 { 2360 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 2361 emit_insn (gen_aarch64_<su>mull_hi_n<mode>_insn (operands[0], operands[1], 2362 operands[2], p)); 2363 DONE; 2364 } 2365) 2366 2367;; vmlal_lane_s16 intrinsics 2368(define_insn "aarch64_vec_<su>mlal_lane<Qlane>" 2369 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 2370 (plus:<VWIDE> 2371 (mult:<VWIDE> 2372 (ANY_EXTEND:<VWIDE> 2373 (match_operand:<VCOND> 2 "register_operand" "w")) 2374 (ANY_EXTEND:<VWIDE> 2375 (vec_duplicate:<VCOND> 2376 (vec_select:<VEL> 2377 (match_operand:VDQHS 3 "register_operand" "<vwx>") 2378 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))) 2379 (match_operand:<VWIDE> 1 "register_operand" "0")))] 2380 "TARGET_SIMD" 2381 { 2382 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4])); 2383 return "<su>mlal\\t%0.<Vwtype>, %2.<Vcondtype>, %3.<Vetype>[%4]"; 2384 } 2385 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")] 2386) 2387 2388(define_insn "aarch64_<su>mlal_hi_lane<mode>_insn" 2389 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 2390 (plus:<VWIDE> 2391 (mult:<VWIDE> 2392 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 2393 (match_operand:VQ_HSI 2 "register_operand" "w") 2394 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) 2395 (ANY_EXTEND:<VWIDE> (vec_duplicate:<VHALF> 2396 (vec_select:<VEL> 2397 (match_operand:<VCOND> 4 "register_operand" "<vwx>") 2398 (parallel [(match_operand:SI 5 "immediate_operand" "i")]))))) 2399 (match_operand:<VWIDE> 1 "register_operand" "0")))] 2400 "TARGET_SIMD" 2401 { 2402 operands[5] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[5])); 2403 return "<su>mlal2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]"; 2404 } 2405 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")] 2406) 2407 2408(define_expand "aarch64_<su>mlal_hi_lane<mode>" 2409 [(match_operand:<VWIDE> 0 "register_operand") 2410 (match_operand:<VWIDE> 1 "register_operand") 2411 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand")) 2412 (match_operand:<VCOND> 3 "register_operand") 2413 (match_operand:SI 4 "immediate_operand")] 2414 "TARGET_SIMD" 2415{ 2416 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 2417 emit_insn (gen_aarch64_<su>mlal_hi_lane<mode>_insn (operands[0], 2418 operands[1], operands[2], p, operands[3], operands[4])); 2419 DONE; 2420} 2421) 2422 2423(define_insn "aarch64_<su>mlal_hi_laneq<mode>_insn" 2424 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 2425 (plus:<VWIDE> 2426 (mult:<VWIDE> 2427 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 2428 (match_operand:VQ_HSI 2 "register_operand" "w") 2429 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) 2430 (ANY_EXTEND:<VWIDE> (vec_duplicate:<VHALF> 2431 (vec_select:<VEL> 2432 (match_operand:<VCONQ> 4 "register_operand" "<vwx>") 2433 (parallel [(match_operand:SI 5 "immediate_operand" "i")]))))) 2434 (match_operand:<VWIDE> 1 "register_operand" "0")))] 2435 "TARGET_SIMD" 2436 { 2437 operands[5] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[5])); 2438 return "<su>mlal2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]"; 2439 } 2440 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")] 2441) 2442 2443(define_expand "aarch64_<su>mlal_hi_laneq<mode>" 2444 [(match_operand:<VWIDE> 0 "register_operand") 2445 (match_operand:<VWIDE> 1 "register_operand") 2446 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand")) 2447 (match_operand:<VCONQ> 3 "register_operand") 2448 (match_operand:SI 4 "immediate_operand")] 2449 "TARGET_SIMD" 2450{ 2451 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 2452 emit_insn (gen_aarch64_<su>mlal_hi_laneq<mode>_insn (operands[0], 2453 operands[1], operands[2], p, operands[3], operands[4])); 2454 DONE; 2455} 2456) 2457 2458(define_insn "aarch64_vec_<su>mlsl_lane<Qlane>" 2459 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 2460 (minus:<VWIDE> 2461 (match_operand:<VWIDE> 1 "register_operand" "0") 2462 (mult:<VWIDE> 2463 (ANY_EXTEND:<VWIDE> 2464 (match_operand:<VCOND> 2 "register_operand" "w")) 2465 (ANY_EXTEND:<VWIDE> 2466 (vec_duplicate:<VCOND> 2467 (vec_select:<VEL> 2468 (match_operand:VDQHS 3 "register_operand" "<vwx>") 2469 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))))))] 2470 "TARGET_SIMD" 2471 { 2472 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4])); 2473 return "<su>mlsl\\t%0.<Vwtype>, %2.<Vcondtype>, %3.<Vetype>[%4]"; 2474 } 2475 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")] 2476) 2477 2478(define_insn "aarch64_<su>mlsl_hi_lane<mode>_insn" 2479 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 2480 (minus:<VWIDE> 2481 (match_operand:<VWIDE> 1 "register_operand" "0") 2482 (mult:<VWIDE> 2483 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 2484 (match_operand:VQ_HSI 2 "register_operand" "w") 2485 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) 2486 (ANY_EXTEND:<VWIDE> (vec_duplicate:<VHALF> 2487 (vec_select:<VEL> 2488 (match_operand:<VCOND> 4 "register_operand" "<vwx>") 2489 (parallel [(match_operand:SI 5 "immediate_operand" "i")])))) 2490 )))] 2491 "TARGET_SIMD" 2492 { 2493 operands[5] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[5])); 2494 return "<su>mlsl2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]"; 2495 } 2496 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")] 2497) 2498 2499(define_expand "aarch64_<su>mlsl_hi_lane<mode>" 2500 [(match_operand:<VWIDE> 0 "register_operand") 2501 (match_operand:<VWIDE> 1 "register_operand") 2502 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand")) 2503 (match_operand:<VCOND> 3 "register_operand") 2504 (match_operand:SI 4 "immediate_operand")] 2505 "TARGET_SIMD" 2506{ 2507 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 2508 emit_insn (gen_aarch64_<su>mlsl_hi_lane<mode>_insn (operands[0], 2509 operands[1], operands[2], p, operands[3], operands[4])); 2510 DONE; 2511} 2512) 2513 2514(define_insn "aarch64_<su>mlsl_hi_laneq<mode>_insn" 2515 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 2516 (minus:<VWIDE> 2517 (match_operand:<VWIDE> 1 "register_operand" "0") 2518 (mult:<VWIDE> 2519 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 2520 (match_operand:VQ_HSI 2 "register_operand" "w") 2521 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) 2522 (ANY_EXTEND:<VWIDE> (vec_duplicate:<VHALF> 2523 (vec_select:<VEL> 2524 (match_operand:<VCONQ> 4 "register_operand" "<vwx>") 2525 (parallel [(match_operand:SI 5 "immediate_operand" "i")])))) 2526 )))] 2527 "TARGET_SIMD" 2528 { 2529 operands[5] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[5])); 2530 return "<su>mlsl2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]"; 2531 } 2532 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")] 2533) 2534 2535(define_expand "aarch64_<su>mlsl_hi_laneq<mode>" 2536 [(match_operand:<VWIDE> 0 "register_operand") 2537 (match_operand:<VWIDE> 1 "register_operand") 2538 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand")) 2539 (match_operand:<VCONQ> 3 "register_operand") 2540 (match_operand:SI 4 "immediate_operand")] 2541 "TARGET_SIMD" 2542{ 2543 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 2544 emit_insn (gen_aarch64_<su>mlsl_hi_laneq<mode>_insn (operands[0], 2545 operands[1], operands[2], p, operands[3], operands[4])); 2546 DONE; 2547} 2548) 2549 2550;; FP vector operations. 2551;; AArch64 AdvSIMD supports single-precision (32-bit) and 2552;; double-precision (64-bit) floating-point data types and arithmetic as 2553;; defined by the IEEE 754-2008 standard. This makes them vectorizable 2554;; without the need for -ffast-math or -funsafe-math-optimizations. 2555;; 2556;; Floating-point operations can raise an exception. Vectorizing such 2557;; operations are safe because of reasons explained below. 2558;; 2559;; ARMv8 permits an extension to enable trapped floating-point 2560;; exception handling, however this is an optional feature. In the 2561;; event of a floating-point exception being raised by vectorised 2562;; code then: 2563;; 1. If trapped floating-point exceptions are available, then a trap 2564;; will be taken when any lane raises an enabled exception. A trap 2565;; handler may determine which lane raised the exception. 2566;; 2. Alternatively a sticky exception flag is set in the 2567;; floating-point status register (FPSR). Software may explicitly 2568;; test the exception flags, in which case the tests will either 2569;; prevent vectorisation, allowing precise identification of the 2570;; failing operation, or if tested outside of vectorisable regions 2571;; then the specific operation and lane are not of interest. 2572 2573;; FP arithmetic operations. 2574 2575(define_insn "add<mode>3" 2576 [(set (match_operand:VHSDF 0 "register_operand" "=w") 2577 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w") 2578 (match_operand:VHSDF 2 "register_operand" "w")))] 2579 "TARGET_SIMD" 2580 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 2581 [(set_attr "type" "neon_fp_addsub_<stype><q>")] 2582) 2583 2584(define_insn "sub<mode>3" 2585 [(set (match_operand:VHSDF 0 "register_operand" "=w") 2586 (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w") 2587 (match_operand:VHSDF 2 "register_operand" "w")))] 2588 "TARGET_SIMD" 2589 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 2590 [(set_attr "type" "neon_fp_addsub_<stype><q>")] 2591) 2592 2593(define_insn "mul<mode>3" 2594 [(set (match_operand:VHSDF 0 "register_operand" "=w") 2595 (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w") 2596 (match_operand:VHSDF 2 "register_operand" "w")))] 2597 "TARGET_SIMD" 2598 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 2599 [(set_attr "type" "neon_fp_mul_<stype><q>")] 2600) 2601 2602(define_expand "div<mode>3" 2603 [(set (match_operand:VHSDF 0 "register_operand") 2604 (div:VHSDF (match_operand:VHSDF 1 "register_operand") 2605 (match_operand:VHSDF 2 "register_operand")))] 2606 "TARGET_SIMD" 2607{ 2608 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2])) 2609 DONE; 2610 2611 operands[1] = force_reg (<MODE>mode, operands[1]); 2612}) 2613 2614(define_insn "*div<mode>3" 2615 [(set (match_operand:VHSDF 0 "register_operand" "=w") 2616 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w") 2617 (match_operand:VHSDF 2 "register_operand" "w")))] 2618 "TARGET_SIMD" 2619 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 2620 [(set_attr "type" "neon_fp_div_<stype><q>")] 2621) 2622 2623(define_insn "neg<mode>2" 2624 [(set (match_operand:VHSDF 0 "register_operand" "=w") 2625 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))] 2626 "TARGET_SIMD" 2627 "fneg\\t%0.<Vtype>, %1.<Vtype>" 2628 [(set_attr "type" "neon_fp_neg_<stype><q>")] 2629) 2630 2631(define_insn "abs<mode>2" 2632 [(set (match_operand:VHSDF 0 "register_operand" "=w") 2633 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))] 2634 "TARGET_SIMD" 2635 "fabs\\t%0.<Vtype>, %1.<Vtype>" 2636 [(set_attr "type" "neon_fp_abs_<stype><q>")] 2637) 2638 2639(define_insn "fma<mode>4" 2640 [(set (match_operand:VHSDF 0 "register_operand" "=w") 2641 (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w") 2642 (match_operand:VHSDF 2 "register_operand" "w") 2643 (match_operand:VHSDF 3 "register_operand" "0")))] 2644 "TARGET_SIMD" 2645 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 2646 [(set_attr "type" "neon_fp_mla_<stype><q>")] 2647) 2648 2649(define_insn "*aarch64_fma4_elt<mode>" 2650 [(set (match_operand:VDQF 0 "register_operand" "=w") 2651 (fma:VDQF 2652 (vec_duplicate:VDQF 2653 (vec_select:<VEL> 2654 (match_operand:VDQF 1 "register_operand" "<h_con>") 2655 (parallel [(match_operand:SI 2 "immediate_operand")]))) 2656 (match_operand:VDQF 3 "register_operand" "w") 2657 (match_operand:VDQF 4 "register_operand" "0")))] 2658 "TARGET_SIMD" 2659 { 2660 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); 2661 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]"; 2662 } 2663 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")] 2664) 2665 2666(define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>" 2667 [(set (match_operand:VDQSF 0 "register_operand" "=w") 2668 (fma:VDQSF 2669 (vec_duplicate:VDQSF 2670 (vec_select:<VEL> 2671 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>") 2672 (parallel [(match_operand:SI 2 "immediate_operand")]))) 2673 (match_operand:VDQSF 3 "register_operand" "w") 2674 (match_operand:VDQSF 4 "register_operand" "0")))] 2675 "TARGET_SIMD" 2676 { 2677 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2])); 2678 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]"; 2679 } 2680 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")] 2681) 2682 2683(define_insn "*aarch64_fma4_elt_from_dup<mode>" 2684 [(set (match_operand:VMUL 0 "register_operand" "=w") 2685 (fma:VMUL 2686 (vec_duplicate:VMUL 2687 (match_operand:<VEL> 1 "register_operand" "<h_con>")) 2688 (match_operand:VMUL 2 "register_operand" "w") 2689 (match_operand:VMUL 3 "register_operand" "0")))] 2690 "TARGET_SIMD" 2691 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]" 2692 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")] 2693) 2694 2695(define_insn "*aarch64_fma4_elt_to_64v2df" 2696 [(set (match_operand:DF 0 "register_operand" "=w") 2697 (fma:DF 2698 (vec_select:DF 2699 (match_operand:V2DF 1 "register_operand" "w") 2700 (parallel [(match_operand:SI 2 "immediate_operand")])) 2701 (match_operand:DF 3 "register_operand" "w") 2702 (match_operand:DF 4 "register_operand" "0")))] 2703 "TARGET_SIMD" 2704 { 2705 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2])); 2706 return "fmla\\t%0.2d, %3.2d, %1.d[%2]"; 2707 } 2708 [(set_attr "type" "neon_fp_mla_d_scalar_q")] 2709) 2710 2711(define_insn "fnma<mode>4" 2712 [(set (match_operand:VHSDF 0 "register_operand" "=w") 2713 (fma:VHSDF 2714 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")) 2715 (match_operand:VHSDF 2 "register_operand" "w") 2716 (match_operand:VHSDF 3 "register_operand" "0")))] 2717 "TARGET_SIMD" 2718 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 2719 [(set_attr "type" "neon_fp_mla_<stype><q>")] 2720) 2721 2722(define_insn "*aarch64_fnma4_elt<mode>" 2723 [(set (match_operand:VDQF 0 "register_operand" "=w") 2724 (fma:VDQF 2725 (neg:VDQF 2726 (match_operand:VDQF 3 "register_operand" "w")) 2727 (vec_duplicate:VDQF 2728 (vec_select:<VEL> 2729 (match_operand:VDQF 1 "register_operand" "<h_con>") 2730 (parallel [(match_operand:SI 2 "immediate_operand")]))) 2731 (match_operand:VDQF 4 "register_operand" "0")))] 2732 "TARGET_SIMD" 2733 { 2734 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); 2735 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]"; 2736 } 2737 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")] 2738) 2739 2740(define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>" 2741 [(set (match_operand:VDQSF 0 "register_operand" "=w") 2742 (fma:VDQSF 2743 (neg:VDQSF 2744 (match_operand:VDQSF 3 "register_operand" "w")) 2745 (vec_duplicate:VDQSF 2746 (vec_select:<VEL> 2747 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>") 2748 (parallel [(match_operand:SI 2 "immediate_operand")]))) 2749 (match_operand:VDQSF 4 "register_operand" "0")))] 2750 "TARGET_SIMD" 2751 { 2752 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2])); 2753 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]"; 2754 } 2755 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")] 2756) 2757 2758(define_insn "*aarch64_fnma4_elt_from_dup<mode>" 2759 [(set (match_operand:VMUL 0 "register_operand" "=w") 2760 (fma:VMUL 2761 (neg:VMUL 2762 (match_operand:VMUL 2 "register_operand" "w")) 2763 (vec_duplicate:VMUL 2764 (match_operand:<VEL> 1 "register_operand" "<h_con>")) 2765 (match_operand:VMUL 3 "register_operand" "0")))] 2766 "TARGET_SIMD" 2767 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]" 2768 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")] 2769) 2770 2771(define_insn "*aarch64_fnma4_elt_to_64v2df" 2772 [(set (match_operand:DF 0 "register_operand" "=w") 2773 (fma:DF 2774 (vec_select:DF 2775 (match_operand:V2DF 1 "register_operand" "w") 2776 (parallel [(match_operand:SI 2 "immediate_operand")])) 2777 (neg:DF 2778 (match_operand:DF 3 "register_operand" "w")) 2779 (match_operand:DF 4 "register_operand" "0")))] 2780 "TARGET_SIMD" 2781 { 2782 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2])); 2783 return "fmls\\t%0.2d, %3.2d, %1.d[%2]"; 2784 } 2785 [(set_attr "type" "neon_fp_mla_d_scalar_q")] 2786) 2787 2788;; Vector versions of the floating-point frint patterns. 2789;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn. 2790(define_insn "<frint_pattern><mode>2" 2791 [(set (match_operand:VHSDF 0 "register_operand" "=w") 2792 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")] 2793 FRINT))] 2794 "TARGET_SIMD" 2795 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>" 2796 [(set_attr "type" "neon_fp_round_<stype><q>")] 2797) 2798 2799;; Vector versions of the fcvt standard patterns. 2800;; Expands to lbtrunc, lround, lceil, lfloor 2801(define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2" 2802 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w") 2803 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET> 2804 [(match_operand:VHSDF 1 "register_operand" "w")] 2805 FCVT)))] 2806 "TARGET_SIMD" 2807 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>" 2808 [(set_attr "type" "neon_fp_to_int_<stype><q>")] 2809) 2810 2811;; HF Scalar variants of related SIMD instructions. 2812(define_insn "l<fcvt_pattern><su_optab>hfhi2" 2813 [(set (match_operand:HI 0 "register_operand" "=w") 2814 (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")] 2815 FCVT)))] 2816 "TARGET_SIMD_F16INST" 2817 "fcvt<frint_suffix><su>\t%h0, %h1" 2818 [(set_attr "type" "neon_fp_to_int_s")] 2819) 2820 2821(define_insn "<optab>_trunchfhi2" 2822 [(set (match_operand:HI 0 "register_operand" "=w") 2823 (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))] 2824 "TARGET_SIMD_F16INST" 2825 "fcvtz<su>\t%h0, %h1" 2826 [(set_attr "type" "neon_fp_to_int_s")] 2827) 2828 2829(define_insn "<optab>hihf2" 2830 [(set (match_operand:HF 0 "register_operand" "=w") 2831 (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))] 2832 "TARGET_SIMD_F16INST" 2833 "<su_optab>cvtf\t%h0, %h1" 2834 [(set_attr "type" "neon_int_to_fp_s")] 2835) 2836 2837(define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult" 2838 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w") 2839 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET> 2840 [(mult:VDQF 2841 (match_operand:VDQF 1 "register_operand" "w") 2842 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))] 2843 UNSPEC_FRINTZ)))] 2844 "TARGET_SIMD 2845 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1, 2846 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))" 2847 { 2848 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]); 2849 char buf[64]; 2850 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits); 2851 output_asm_insn (buf, operands); 2852 return ""; 2853 } 2854 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")] 2855) 2856 2857(define_expand "<optab><VHSDF:mode><fcvt_target>2" 2858 [(set (match_operand:<FCVT_TARGET> 0 "register_operand") 2859 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET> 2860 [(match_operand:VHSDF 1 "register_operand")] 2861 UNSPEC_FRINTZ)))] 2862 "TARGET_SIMD" 2863 {}) 2864 2865(define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2" 2866 [(set (match_operand:<FCVT_TARGET> 0 "register_operand") 2867 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET> 2868 [(match_operand:VHSDF 1 "register_operand")] 2869 UNSPEC_FRINTZ)))] 2870 "TARGET_SIMD" 2871 {}) 2872 2873(define_expand "ftrunc<VHSDF:mode>2" 2874 [(set (match_operand:VHSDF 0 "register_operand") 2875 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")] 2876 UNSPEC_FRINTZ))] 2877 "TARGET_SIMD" 2878 {}) 2879 2880(define_insn "<optab><fcvt_target><VHSDF:mode>2" 2881 [(set (match_operand:VHSDF 0 "register_operand" "=w") 2882 (FLOATUORS:VHSDF 2883 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))] 2884 "TARGET_SIMD" 2885 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>" 2886 [(set_attr "type" "neon_int_to_fp_<stype><q>")] 2887) 2888 2889;; Conversions between vectors of floats and doubles. 2890;; Contains a mix of patterns to match standard pattern names 2891;; and those for intrinsics. 2892 2893;; Float widening operations. 2894 2895(define_insn "aarch64_simd_vec_unpacks_lo_<mode>" 2896 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 2897 (float_extend:<VWIDE> (vec_select:<VHALF> 2898 (match_operand:VQ_HSF 1 "register_operand" "w") 2899 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "") 2900 )))] 2901 "TARGET_SIMD" 2902 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>" 2903 [(set_attr "type" "neon_fp_cvt_widen_s")] 2904) 2905 2906;; Convert between fixed-point and floating-point (vector modes) 2907 2908(define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3" 2909 [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w") 2910 (unspec:<VHSDF:FCVT_TARGET> 2911 [(match_operand:VHSDF 1 "register_operand" "w") 2912 (match_operand:SI 2 "immediate_operand" "i")] 2913 FCVT_F2FIXED))] 2914 "TARGET_SIMD" 2915 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2" 2916 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")] 2917) 2918 2919(define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3" 2920 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w") 2921 (unspec:<VDQ_HSDI:FCVT_TARGET> 2922 [(match_operand:VDQ_HSDI 1 "register_operand" "w") 2923 (match_operand:SI 2 "immediate_operand" "i")] 2924 FCVT_FIXED2F))] 2925 "TARGET_SIMD" 2926 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2" 2927 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")] 2928) 2929 2930;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns 2931;; is inconsistent with vector ordering elsewhere in the compiler, in that 2932;; the meaning of HI and LO changes depending on the target endianness. 2933;; While elsewhere we map the higher numbered elements of a vector to 2934;; the lower architectural lanes of the vector, for these patterns we want 2935;; to always treat "hi" as referring to the higher architectural lanes. 2936;; Consequently, while the patterns below look inconsistent with our 2937;; other big-endian patterns their behavior is as required. 2938 2939(define_expand "vec_unpacks_lo_<mode>" 2940 [(match_operand:<VWIDE> 0 "register_operand") 2941 (match_operand:VQ_HSF 1 "register_operand")] 2942 "TARGET_SIMD" 2943 { 2944 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); 2945 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0], 2946 operands[1], p)); 2947 DONE; 2948 } 2949) 2950 2951(define_insn "aarch64_simd_vec_unpacks_hi_<mode>" 2952 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 2953 (float_extend:<VWIDE> (vec_select:<VHALF> 2954 (match_operand:VQ_HSF 1 "register_operand" "w") 2955 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "") 2956 )))] 2957 "TARGET_SIMD" 2958 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>" 2959 [(set_attr "type" "neon_fp_cvt_widen_s")] 2960) 2961 2962(define_expand "vec_unpacks_hi_<mode>" 2963 [(match_operand:<VWIDE> 0 "register_operand") 2964 (match_operand:VQ_HSF 1 "register_operand")] 2965 "TARGET_SIMD" 2966 { 2967 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 2968 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0], 2969 operands[1], p)); 2970 DONE; 2971 } 2972) 2973(define_insn "aarch64_float_extend_lo_<Vwide>" 2974 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 2975 (float_extend:<VWIDE> 2976 (match_operand:VDF 1 "register_operand" "w")))] 2977 "TARGET_SIMD" 2978 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>" 2979 [(set_attr "type" "neon_fp_cvt_widen_s")] 2980) 2981 2982;; Float narrowing operations. 2983 2984(define_insn "aarch64_float_truncate_lo_<mode>" 2985 [(set (match_operand:VDF 0 "register_operand" "=w") 2986 (float_truncate:VDF 2987 (match_operand:<VWIDE> 1 "register_operand" "w")))] 2988 "TARGET_SIMD" 2989 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>" 2990 [(set_attr "type" "neon_fp_cvt_narrow_d_q")] 2991) 2992 2993(define_insn "aarch64_float_truncate_hi_<Vdbl>_le" 2994 [(set (match_operand:<VDBL> 0 "register_operand" "=w") 2995 (vec_concat:<VDBL> 2996 (match_operand:VDF 1 "register_operand" "0") 2997 (float_truncate:VDF 2998 (match_operand:<VWIDE> 2 "register_operand" "w"))))] 2999 "TARGET_SIMD && !BYTES_BIG_ENDIAN" 3000 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>" 3001 [(set_attr "type" "neon_fp_cvt_narrow_d_q")] 3002) 3003 3004(define_insn "aarch64_float_truncate_hi_<Vdbl>_be" 3005 [(set (match_operand:<VDBL> 0 "register_operand" "=w") 3006 (vec_concat:<VDBL> 3007 (float_truncate:VDF 3008 (match_operand:<VWIDE> 2 "register_operand" "w")) 3009 (match_operand:VDF 1 "register_operand" "0")))] 3010 "TARGET_SIMD && BYTES_BIG_ENDIAN" 3011 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>" 3012 [(set_attr "type" "neon_fp_cvt_narrow_d_q")] 3013) 3014 3015(define_expand "aarch64_float_truncate_hi_<Vdbl>" 3016 [(match_operand:<VDBL> 0 "register_operand") 3017 (match_operand:VDF 1 "register_operand") 3018 (match_operand:<VWIDE> 2 "register_operand")] 3019 "TARGET_SIMD" 3020{ 3021 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN 3022 ? gen_aarch64_float_truncate_hi_<Vdbl>_be 3023 : gen_aarch64_float_truncate_hi_<Vdbl>_le; 3024 emit_insn (gen (operands[0], operands[1], operands[2])); 3025 DONE; 3026} 3027) 3028 3029(define_expand "vec_pack_trunc_v2df" 3030 [(set (match_operand:V4SF 0 "register_operand") 3031 (vec_concat:V4SF 3032 (float_truncate:V2SF 3033 (match_operand:V2DF 1 "register_operand")) 3034 (float_truncate:V2SF 3035 (match_operand:V2DF 2 "register_operand")) 3036 ))] 3037 "TARGET_SIMD" 3038 { 3039 rtx tmp = gen_reg_rtx (V2SFmode); 3040 int lo = BYTES_BIG_ENDIAN ? 2 : 1; 3041 int hi = BYTES_BIG_ENDIAN ? 1 : 2; 3042 3043 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo])); 3044 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0], 3045 tmp, operands[hi])); 3046 DONE; 3047 } 3048) 3049 3050(define_expand "vec_pack_trunc_df" 3051 [(set (match_operand:V2SF 0 "register_operand") 3052 (vec_concat:V2SF 3053 (float_truncate:SF 3054 (match_operand:DF 1 "register_operand")) 3055 (float_truncate:SF 3056 (match_operand:DF 2 "register_operand")) 3057 ))] 3058 "TARGET_SIMD" 3059 { 3060 rtx tmp = gen_reg_rtx (V2SFmode); 3061 int lo = BYTES_BIG_ENDIAN ? 2 : 1; 3062 int hi = BYTES_BIG_ENDIAN ? 1 : 2; 3063 3064 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo])); 3065 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi])); 3066 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp)); 3067 DONE; 3068 } 3069) 3070 3071;; FP Max/Min 3072;; Max/Min are introduced by idiom recognition by GCC's mid-end. An 3073;; expression like: 3074;; a = (b < c) ? b : c; 3075;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and 3076;; -fno-signed-zeros are enabled either explicitly or indirectly via 3077;; -ffast-math. 3078;; 3079;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL. 3080;; The 'smax' and 'smin' RTL standard pattern names do not specify which 3081;; operand will be returned when both operands are zero (i.e. they may not 3082;; honour signed zeroes), or when either operand is NaN. Therefore GCC 3083;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring 3084;; NaNs. 3085 3086(define_insn "<su><maxmin><mode>3" 3087 [(set (match_operand:VHSDF 0 "register_operand" "=w") 3088 (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w") 3089 (match_operand:VHSDF 2 "register_operand" "w")))] 3090 "TARGET_SIMD" 3091 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 3092 [(set_attr "type" "neon_fp_minmax_<stype><q>")] 3093) 3094 3095;; Vector forms for fmax, fmin, fmaxnm, fminnm. 3096;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names, 3097;; which implement the IEEE fmax ()/fmin () functions. 3098(define_insn "<maxmin_uns><mode>3" 3099 [(set (match_operand:VHSDF 0 "register_operand" "=w") 3100 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w") 3101 (match_operand:VHSDF 2 "register_operand" "w")] 3102 FMAXMIN_UNS))] 3103 "TARGET_SIMD" 3104 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 3105 [(set_attr "type" "neon_fp_minmax_<stype><q>")] 3106) 3107 3108;; 'across lanes' add. 3109 3110(define_expand "reduc_plus_scal_<mode>" 3111 [(match_operand:<VEL> 0 "register_operand") 3112 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand")] 3113 UNSPEC_ADDV)] 3114 "TARGET_SIMD" 3115 { 3116 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0); 3117 rtx scratch = gen_reg_rtx (<MODE>mode); 3118 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1])); 3119 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt)); 3120 DONE; 3121 } 3122) 3123 3124(define_insn "aarch64_faddp<mode>" 3125 [(set (match_operand:VHSDF 0 "register_operand" "=w") 3126 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w") 3127 (match_operand:VHSDF 2 "register_operand" "w")] 3128 UNSPEC_FADDV))] 3129 "TARGET_SIMD" 3130 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 3131 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")] 3132) 3133 3134(define_insn "aarch64_reduc_plus_internal<mode>" 3135 [(set (match_operand:VDQV 0 "register_operand" "=w") 3136 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")] 3137 UNSPEC_ADDV))] 3138 "TARGET_SIMD" 3139 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>" 3140 [(set_attr "type" "neon_reduc_add<q>")] 3141) 3142 3143(define_insn "aarch64_<su>addlv<mode>" 3144 [(set (match_operand:<VWIDE_S> 0 "register_operand" "=w") 3145 (unspec:<VWIDE_S> [(match_operand:VDQV_L 1 "register_operand" "w")] 3146 USADDLV))] 3147 "TARGET_SIMD" 3148 "<su>addl<vp>\\t%<Vwstype>0<Vwsuf>, %1.<Vtype>" 3149 [(set_attr "type" "neon_reduc_add<q>")] 3150) 3151 3152;; ADDV with result zero-extended to SI/DImode (for popcount). 3153(define_insn "aarch64_zero_extend<GPI:mode>_reduc_plus_<VDQV_E:mode>" 3154 [(set (match_operand:GPI 0 "register_operand" "=w") 3155 (zero_extend:GPI 3156 (unspec:<VDQV_E:VEL> [(match_operand:VDQV_E 1 "register_operand" "w")] 3157 UNSPEC_ADDV)))] 3158 "TARGET_SIMD" 3159 "add<VDQV_E:vp>\\t%<VDQV_E:Vetype>0, %1.<VDQV_E:Vtype>" 3160 [(set_attr "type" "neon_reduc_add<VDQV_E:q>")] 3161) 3162 3163(define_insn "aarch64_reduc_plus_internalv2si" 3164 [(set (match_operand:V2SI 0 "register_operand" "=w") 3165 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")] 3166 UNSPEC_ADDV))] 3167 "TARGET_SIMD" 3168 "addp\\t%0.2s, %1.2s, %1.2s" 3169 [(set_attr "type" "neon_reduc_add")] 3170) 3171 3172(define_insn "reduc_plus_scal_<mode>" 3173 [(set (match_operand:<VEL> 0 "register_operand" "=w") 3174 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")] 3175 UNSPEC_FADDV))] 3176 "TARGET_SIMD" 3177 "faddp\\t%<Vetype>0, %1.<Vtype>" 3178 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")] 3179) 3180 3181(define_expand "reduc_plus_scal_v4sf" 3182 [(set (match_operand:SF 0 "register_operand") 3183 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")] 3184 UNSPEC_FADDV))] 3185 "TARGET_SIMD" 3186{ 3187 rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0); 3188 rtx scratch = gen_reg_rtx (V4SFmode); 3189 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1])); 3190 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch)); 3191 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt)); 3192 DONE; 3193}) 3194 3195(define_insn "clrsb<mode>2" 3196 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 3197 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))] 3198 "TARGET_SIMD" 3199 "cls\\t%0.<Vtype>, %1.<Vtype>" 3200 [(set_attr "type" "neon_cls<q>")] 3201) 3202 3203(define_insn "clz<mode>2" 3204 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 3205 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))] 3206 "TARGET_SIMD" 3207 "clz\\t%0.<Vtype>, %1.<Vtype>" 3208 [(set_attr "type" "neon_cls<q>")] 3209) 3210 3211(define_insn "popcount<mode>2" 3212 [(set (match_operand:VB 0 "register_operand" "=w") 3213 (popcount:VB (match_operand:VB 1 "register_operand" "w")))] 3214 "TARGET_SIMD" 3215 "cnt\\t%0.<Vbtype>, %1.<Vbtype>" 3216 [(set_attr "type" "neon_cnt<q>")] 3217) 3218 3219;; 'across lanes' max and min ops. 3220 3221;; Template for outputting a scalar, so we can create __builtins which can be 3222;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin). 3223(define_expand "reduc_<maxmin_uns>_scal_<mode>" 3224 [(match_operand:<VEL> 0 "register_operand") 3225 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")] 3226 FMAXMINV)] 3227 "TARGET_SIMD" 3228 { 3229 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0); 3230 rtx scratch = gen_reg_rtx (<MODE>mode); 3231 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch, 3232 operands[1])); 3233 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt)); 3234 DONE; 3235 } 3236) 3237 3238;; Likewise for integer cases, signed and unsigned. 3239(define_expand "reduc_<maxmin_uns>_scal_<mode>" 3240 [(match_operand:<VEL> 0 "register_operand") 3241 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")] 3242 MAXMINV)] 3243 "TARGET_SIMD" 3244 { 3245 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0); 3246 rtx scratch = gen_reg_rtx (<MODE>mode); 3247 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch, 3248 operands[1])); 3249 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt)); 3250 DONE; 3251 } 3252) 3253 3254(define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>" 3255 [(set (match_operand:VDQV_S 0 "register_operand" "=w") 3256 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")] 3257 MAXMINV))] 3258 "TARGET_SIMD" 3259 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>" 3260 [(set_attr "type" "neon_reduc_minmax<q>")] 3261) 3262 3263(define_insn "aarch64_reduc_<maxmin_uns>_internalv2si" 3264 [(set (match_operand:V2SI 0 "register_operand" "=w") 3265 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")] 3266 MAXMINV))] 3267 "TARGET_SIMD" 3268 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s" 3269 [(set_attr "type" "neon_reduc_minmax")] 3270) 3271 3272(define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>" 3273 [(set (match_operand:VHSDF 0 "register_operand" "=w") 3274 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")] 3275 FMAXMINV))] 3276 "TARGET_SIMD" 3277 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>" 3278 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")] 3279) 3280 3281;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register 3282;; allocation. 3283;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which 3284;; to select. 3285;; 3286;; Thus our BSL is of the form: 3287;; op0 = bsl (mask, op2, op3) 3288;; We can use any of: 3289;; 3290;; if (op0 = mask) 3291;; bsl mask, op1, op2 3292;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0) 3293;; bit op0, op2, mask 3294;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0) 3295;; bif op0, op1, mask 3296;; 3297;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander. 3298;; Some forms of straight-line code may generate the equivalent form 3299;; in *aarch64_simd_bsl<mode>_alt. 3300 3301(define_insn "aarch64_simd_bsl<mode>_internal" 3302 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w") 3303 (xor:VDQ_I 3304 (and:VDQ_I 3305 (xor:VDQ_I 3306 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w") 3307 (match_operand:VDQ_I 2 "register_operand" "w,w,0")) 3308 (match_operand:VDQ_I 1 "register_operand" "0,w,w")) 3309 (match_dup:<V_INT_EQUIV> 3) 3310 ))] 3311 "TARGET_SIMD" 3312 "@ 3313 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype> 3314 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype> 3315 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>" 3316 [(set_attr "type" "neon_bsl<q>")] 3317) 3318 3319;; We need this form in addition to the above pattern to match the case 3320;; when combine tries merging three insns such that the second operand of 3321;; the outer XOR matches the second operand of the inner XOR rather than 3322;; the first. The two are equivalent but since recog doesn't try all 3323;; permutations of commutative operations, we have to have a separate pattern. 3324 3325(define_insn "*aarch64_simd_bsl<mode>_alt" 3326 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w") 3327 (xor:VDQ_I 3328 (and:VDQ_I 3329 (xor:VDQ_I 3330 (match_operand:VDQ_I 3 "register_operand" "w,w,0") 3331 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w")) 3332 (match_operand:VDQ_I 1 "register_operand" "0,w,w")) 3333 (match_dup:<V_INT_EQUIV> 2)))] 3334 "TARGET_SIMD" 3335 "@ 3336 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype> 3337 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype> 3338 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>" 3339 [(set_attr "type" "neon_bsl<q>")] 3340) 3341 3342;; DImode is special, we want to avoid computing operations which are 3343;; more naturally computed in general purpose registers in the vector 3344;; registers. If we do that, we need to move all three operands from general 3345;; purpose registers to vector registers, then back again. However, we 3346;; don't want to make this pattern an UNSPEC as we'd lose scope for 3347;; optimizations based on the component operations of a BSL. 3348;; 3349;; That means we need a splitter back to the individual operations, if they 3350;; would be better calculated on the integer side. 3351 3352(define_insn_and_split "aarch64_simd_bsldi_internal" 3353 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r") 3354 (xor:DI 3355 (and:DI 3356 (xor:DI 3357 (match_operand:DI 3 "register_operand" "w,0,w,r") 3358 (match_operand:DI 2 "register_operand" "w,w,0,r")) 3359 (match_operand:DI 1 "register_operand" "0,w,w,r")) 3360 (match_dup:DI 3) 3361 ))] 3362 "TARGET_SIMD" 3363 "@ 3364 bsl\\t%0.8b, %2.8b, %3.8b 3365 bit\\t%0.8b, %2.8b, %1.8b 3366 bif\\t%0.8b, %3.8b, %1.8b 3367 #" 3368 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))" 3369 [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)] 3370{ 3371 /* Split back to individual operations. If we're before reload, and 3372 able to create a temporary register, do so. If we're after reload, 3373 we've got an early-clobber destination register, so use that. 3374 Otherwise, we can't create pseudos and we can't yet guarantee that 3375 operands[0] is safe to write, so FAIL to split. */ 3376 3377 rtx scratch; 3378 if (reload_completed) 3379 scratch = operands[0]; 3380 else if (can_create_pseudo_p ()) 3381 scratch = gen_reg_rtx (DImode); 3382 else 3383 FAIL; 3384 3385 emit_insn (gen_xordi3 (scratch, operands[2], operands[3])); 3386 emit_insn (gen_anddi3 (scratch, scratch, operands[1])); 3387 emit_insn (gen_xordi3 (operands[0], scratch, operands[3])); 3388 DONE; 3389} 3390 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple") 3391 (set_attr "length" "4,4,4,12")] 3392) 3393 3394(define_insn_and_split "aarch64_simd_bsldi_alt" 3395 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r") 3396 (xor:DI 3397 (and:DI 3398 (xor:DI 3399 (match_operand:DI 3 "register_operand" "w,w,0,r") 3400 (match_operand:DI 2 "register_operand" "w,0,w,r")) 3401 (match_operand:DI 1 "register_operand" "0,w,w,r")) 3402 (match_dup:DI 2) 3403 ))] 3404 "TARGET_SIMD" 3405 "@ 3406 bsl\\t%0.8b, %3.8b, %2.8b 3407 bit\\t%0.8b, %3.8b, %1.8b 3408 bif\\t%0.8b, %2.8b, %1.8b 3409 #" 3410 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))" 3411 [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)] 3412{ 3413 /* Split back to individual operations. If we're before reload, and 3414 able to create a temporary register, do so. If we're after reload, 3415 we've got an early-clobber destination register, so use that. 3416 Otherwise, we can't create pseudos and we can't yet guarantee that 3417 operands[0] is safe to write, so FAIL to split. */ 3418 3419 rtx scratch; 3420 if (reload_completed) 3421 scratch = operands[0]; 3422 else if (can_create_pseudo_p ()) 3423 scratch = gen_reg_rtx (DImode); 3424 else 3425 FAIL; 3426 3427 emit_insn (gen_xordi3 (scratch, operands[2], operands[3])); 3428 emit_insn (gen_anddi3 (scratch, scratch, operands[1])); 3429 emit_insn (gen_xordi3 (operands[0], scratch, operands[2])); 3430 DONE; 3431} 3432 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple") 3433 (set_attr "length" "4,4,4,12")] 3434) 3435 3436(define_expand "aarch64_simd_bsl<mode>" 3437 [(match_operand:VALLDIF 0 "register_operand") 3438 (match_operand:<V_INT_EQUIV> 1 "register_operand") 3439 (match_operand:VALLDIF 2 "register_operand") 3440 (match_operand:VALLDIF 3 "register_operand")] 3441 "TARGET_SIMD" 3442{ 3443 /* We can't alias operands together if they have different modes. */ 3444 rtx tmp = operands[0]; 3445 if (FLOAT_MODE_P (<MODE>mode)) 3446 { 3447 operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]); 3448 operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]); 3449 tmp = gen_reg_rtx (<V_INT_EQUIV>mode); 3450 } 3451 operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]); 3452 emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp, 3453 operands[1], 3454 operands[2], 3455 operands[3])); 3456 if (tmp != operands[0]) 3457 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp)); 3458 3459 DONE; 3460}) 3461 3462(define_expand "vcond_mask_<mode><v_int_equiv>" 3463 [(match_operand:VALLDI 0 "register_operand") 3464 (match_operand:VALLDI 1 "nonmemory_operand") 3465 (match_operand:VALLDI 2 "nonmemory_operand") 3466 (match_operand:<V_INT_EQUIV> 3 "register_operand")] 3467 "TARGET_SIMD" 3468{ 3469 /* If we have (a = (P) ? -1 : 0); 3470 Then we can simply move the generated mask (result must be int). */ 3471 if (operands[1] == CONSTM1_RTX (<MODE>mode) 3472 && operands[2] == CONST0_RTX (<MODE>mode)) 3473 emit_move_insn (operands[0], operands[3]); 3474 /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */ 3475 else if (operands[1] == CONST0_RTX (<MODE>mode) 3476 && operands[2] == CONSTM1_RTX (<MODE>mode)) 3477 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3])); 3478 else 3479 { 3480 if (!REG_P (operands[1])) 3481 operands[1] = force_reg (<MODE>mode, operands[1]); 3482 if (!REG_P (operands[2])) 3483 operands[2] = force_reg (<MODE>mode, operands[2]); 3484 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3], 3485 operands[1], operands[2])); 3486 } 3487 3488 DONE; 3489}) 3490 3491;; Patterns comparing two vectors to produce a mask. 3492 3493(define_expand "vec_cmp<mode><mode>" 3494 [(set (match_operand:VSDQ_I_DI 0 "register_operand") 3495 (match_operator 1 "comparison_operator" 3496 [(match_operand:VSDQ_I_DI 2 "register_operand") 3497 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))] 3498 "TARGET_SIMD" 3499{ 3500 rtx mask = operands[0]; 3501 enum rtx_code code = GET_CODE (operands[1]); 3502 3503 switch (code) 3504 { 3505 case NE: 3506 case LE: 3507 case LT: 3508 case GE: 3509 case GT: 3510 case EQ: 3511 if (operands[3] == CONST0_RTX (<MODE>mode)) 3512 break; 3513 3514 /* Fall through. */ 3515 default: 3516 if (!REG_P (operands[3])) 3517 operands[3] = force_reg (<MODE>mode, operands[3]); 3518 3519 break; 3520 } 3521 3522 switch (code) 3523 { 3524 case LT: 3525 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3])); 3526 break; 3527 3528 case GE: 3529 emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3])); 3530 break; 3531 3532 case LE: 3533 emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3])); 3534 break; 3535 3536 case GT: 3537 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3])); 3538 break; 3539 3540 case LTU: 3541 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2])); 3542 break; 3543 3544 case GEU: 3545 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3])); 3546 break; 3547 3548 case LEU: 3549 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2])); 3550 break; 3551 3552 case GTU: 3553 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3])); 3554 break; 3555 3556 case NE: 3557 /* Handle NE as !EQ. */ 3558 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3])); 3559 emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask)); 3560 break; 3561 3562 case EQ: 3563 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3])); 3564 break; 3565 3566 default: 3567 gcc_unreachable (); 3568 } 3569 3570 DONE; 3571}) 3572 3573(define_expand "vec_cmp<mode><v_int_equiv>" 3574 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand") 3575 (match_operator 1 "comparison_operator" 3576 [(match_operand:VDQF 2 "register_operand") 3577 (match_operand:VDQF 3 "nonmemory_operand")]))] 3578 "TARGET_SIMD" 3579{ 3580 int use_zero_form = 0; 3581 enum rtx_code code = GET_CODE (operands[1]); 3582 rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode); 3583 3584 rtx (*comparison) (rtx, rtx, rtx) = NULL; 3585 3586 switch (code) 3587 { 3588 case LE: 3589 case LT: 3590 case GE: 3591 case GT: 3592 case EQ: 3593 if (operands[3] == CONST0_RTX (<MODE>mode)) 3594 { 3595 use_zero_form = 1; 3596 break; 3597 } 3598 /* Fall through. */ 3599 default: 3600 if (!REG_P (operands[3])) 3601 operands[3] = force_reg (<MODE>mode, operands[3]); 3602 3603 break; 3604 } 3605 3606 switch (code) 3607 { 3608 case LT: 3609 if (use_zero_form) 3610 { 3611 comparison = gen_aarch64_cmlt<mode>; 3612 break; 3613 } 3614 /* Fall through. */ 3615 case UNLT: 3616 std::swap (operands[2], operands[3]); 3617 /* Fall through. */ 3618 case UNGT: 3619 case GT: 3620 comparison = gen_aarch64_cmgt<mode>; 3621 break; 3622 case LE: 3623 if (use_zero_form) 3624 { 3625 comparison = gen_aarch64_cmle<mode>; 3626 break; 3627 } 3628 /* Fall through. */ 3629 case UNLE: 3630 std::swap (operands[2], operands[3]); 3631 /* Fall through. */ 3632 case UNGE: 3633 case GE: 3634 comparison = gen_aarch64_cmge<mode>; 3635 break; 3636 case NE: 3637 case EQ: 3638 comparison = gen_aarch64_cmeq<mode>; 3639 break; 3640 case UNEQ: 3641 case ORDERED: 3642 case UNORDERED: 3643 case LTGT: 3644 break; 3645 default: 3646 gcc_unreachable (); 3647 } 3648 3649 switch (code) 3650 { 3651 case UNGE: 3652 case UNGT: 3653 case UNLE: 3654 case UNLT: 3655 { 3656 /* All of the above must not raise any FP exceptions. Thus we first 3657 check each operand for NaNs and force any elements containing NaN to 3658 zero before using them in the compare. 3659 Example: UN<cc> (a, b) -> UNORDERED (a, b) | 3660 (cm<cc> (isnan (a) ? 0.0 : a, 3661 isnan (b) ? 0.0 : b)) 3662 We use the following transformations for doing the comparisions: 3663 a UNGE b -> a GE b 3664 a UNGT b -> a GT b 3665 a UNLE b -> b GE a 3666 a UNLT b -> b GT a. */ 3667 3668 rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode); 3669 rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode); 3670 rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode); 3671 emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2])); 3672 emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3])); 3673 emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1)); 3674 emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0, 3675 lowpart_subreg (<V_INT_EQUIV>mode, 3676 operands[2], 3677 <MODE>mode))); 3678 emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1, 3679 lowpart_subreg (<V_INT_EQUIV>mode, 3680 operands[3], 3681 <MODE>mode))); 3682 gcc_assert (comparison != NULL); 3683 emit_insn (comparison (operands[0], 3684 lowpart_subreg (<MODE>mode, 3685 tmp0, <V_INT_EQUIV>mode), 3686 lowpart_subreg (<MODE>mode, 3687 tmp1, <V_INT_EQUIV>mode))); 3688 emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0])); 3689 } 3690 break; 3691 3692 case LT: 3693 case LE: 3694 case GT: 3695 case GE: 3696 case EQ: 3697 case NE: 3698 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ. 3699 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are: 3700 a GE b -> a GE b 3701 a GT b -> a GT b 3702 a LE b -> b GE a 3703 a LT b -> b GT a 3704 a EQ b -> a EQ b 3705 a NE b -> ~(a EQ b) */ 3706 gcc_assert (comparison != NULL); 3707 emit_insn (comparison (operands[0], operands[2], operands[3])); 3708 if (code == NE) 3709 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0])); 3710 break; 3711 3712 case LTGT: 3713 /* LTGT is not guranteed to not generate a FP exception. So let's 3714 go the faster way : ((a > b) || (b > a)). */ 3715 emit_insn (gen_aarch64_cmgt<mode> (operands[0], 3716 operands[2], operands[3])); 3717 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2])); 3718 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp)); 3719 break; 3720 3721 case ORDERED: 3722 case UNORDERED: 3723 case UNEQ: 3724 /* cmeq (a, a) & cmeq (b, b). */ 3725 emit_insn (gen_aarch64_cmeq<mode> (operands[0], 3726 operands[2], operands[2])); 3727 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3])); 3728 emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp)); 3729 3730 if (code == UNORDERED) 3731 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0])); 3732 else if (code == UNEQ) 3733 { 3734 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3])); 3735 emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp)); 3736 } 3737 break; 3738 3739 default: 3740 gcc_unreachable (); 3741 } 3742 3743 DONE; 3744}) 3745 3746(define_expand "vec_cmpu<mode><mode>" 3747 [(set (match_operand:VSDQ_I_DI 0 "register_operand") 3748 (match_operator 1 "comparison_operator" 3749 [(match_operand:VSDQ_I_DI 2 "register_operand") 3750 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))] 3751 "TARGET_SIMD" 3752{ 3753 emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1], 3754 operands[2], operands[3])); 3755 DONE; 3756}) 3757 3758(define_expand "vcond<mode><mode>" 3759 [(set (match_operand:VALLDI 0 "register_operand") 3760 (if_then_else:VALLDI 3761 (match_operator 3 "comparison_operator" 3762 [(match_operand:VALLDI 4 "register_operand") 3763 (match_operand:VALLDI 5 "nonmemory_operand")]) 3764 (match_operand:VALLDI 1 "nonmemory_operand") 3765 (match_operand:VALLDI 2 "nonmemory_operand")))] 3766 "TARGET_SIMD" 3767{ 3768 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode); 3769 enum rtx_code code = GET_CODE (operands[3]); 3770 3771 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert 3772 it as well as switch operands 1/2 in order to avoid the additional 3773 NOT instruction. */ 3774 if (code == NE) 3775 { 3776 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]), 3777 operands[4], operands[5]); 3778 std::swap (operands[1], operands[2]); 3779 } 3780 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3], 3781 operands[4], operands[5])); 3782 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1], 3783 operands[2], mask)); 3784 3785 DONE; 3786}) 3787 3788(define_expand "vcond<v_cmp_mixed><mode>" 3789 [(set (match_operand:<V_cmp_mixed> 0 "register_operand") 3790 (if_then_else:<V_cmp_mixed> 3791 (match_operator 3 "comparison_operator" 3792 [(match_operand:VDQF_COND 4 "register_operand") 3793 (match_operand:VDQF_COND 5 "nonmemory_operand")]) 3794 (match_operand:<V_cmp_mixed> 1 "nonmemory_operand") 3795 (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))] 3796 "TARGET_SIMD" 3797{ 3798 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode); 3799 enum rtx_code code = GET_CODE (operands[3]); 3800 3801 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert 3802 it as well as switch operands 1/2 in order to avoid the additional 3803 NOT instruction. */ 3804 if (code == NE) 3805 { 3806 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]), 3807 operands[4], operands[5]); 3808 std::swap (operands[1], operands[2]); 3809 } 3810 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3], 3811 operands[4], operands[5])); 3812 emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> ( 3813 operands[0], operands[1], 3814 operands[2], mask)); 3815 3816 DONE; 3817}) 3818 3819(define_expand "vcondu<mode><mode>" 3820 [(set (match_operand:VSDQ_I_DI 0 "register_operand") 3821 (if_then_else:VSDQ_I_DI 3822 (match_operator 3 "comparison_operator" 3823 [(match_operand:VSDQ_I_DI 4 "register_operand") 3824 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")]) 3825 (match_operand:VSDQ_I_DI 1 "nonmemory_operand") 3826 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))] 3827 "TARGET_SIMD" 3828{ 3829 rtx mask = gen_reg_rtx (<MODE>mode); 3830 enum rtx_code code = GET_CODE (operands[3]); 3831 3832 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert 3833 it as well as switch operands 1/2 in order to avoid the additional 3834 NOT instruction. */ 3835 if (code == NE) 3836 { 3837 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]), 3838 operands[4], operands[5]); 3839 std::swap (operands[1], operands[2]); 3840 } 3841 emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3], 3842 operands[4], operands[5])); 3843 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1], 3844 operands[2], mask)); 3845 DONE; 3846}) 3847 3848(define_expand "vcondu<mode><v_cmp_mixed>" 3849 [(set (match_operand:VDQF 0 "register_operand") 3850 (if_then_else:VDQF 3851 (match_operator 3 "comparison_operator" 3852 [(match_operand:<V_cmp_mixed> 4 "register_operand") 3853 (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")]) 3854 (match_operand:VDQF 1 "nonmemory_operand") 3855 (match_operand:VDQF 2 "nonmemory_operand")))] 3856 "TARGET_SIMD" 3857{ 3858 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode); 3859 enum rtx_code code = GET_CODE (operands[3]); 3860 3861 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert 3862 it as well as switch operands 1/2 in order to avoid the additional 3863 NOT instruction. */ 3864 if (code == NE) 3865 { 3866 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]), 3867 operands[4], operands[5]); 3868 std::swap (operands[1], operands[2]); 3869 } 3870 emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> ( 3871 mask, operands[3], 3872 operands[4], operands[5])); 3873 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1], 3874 operands[2], mask)); 3875 DONE; 3876}) 3877 3878;; Patterns for AArch64 SIMD Intrinsics. 3879 3880;; Lane extraction with sign extension to general purpose register. 3881(define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>" 3882 [(set (match_operand:GPI 0 "register_operand" "=r") 3883 (sign_extend:GPI 3884 (vec_select:<VDQQH:VEL> 3885 (match_operand:VDQQH 1 "register_operand" "w") 3886 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] 3887 "TARGET_SIMD" 3888 { 3889 operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode, 3890 INTVAL (operands[2])); 3891 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]"; 3892 } 3893 [(set_attr "type" "neon_to_gp<VDQQH:q>")] 3894) 3895 3896(define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>" 3897 [(set (match_operand:GPI 0 "register_operand" "=r") 3898 (zero_extend:GPI 3899 (vec_select:<VDQQH:VEL> 3900 (match_operand:VDQQH 1 "register_operand" "w") 3901 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] 3902 "TARGET_SIMD" 3903 { 3904 operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode, 3905 INTVAL (operands[2])); 3906 return "umov\\t%w0, %1.<VDQQH:Vetype>[%2]"; 3907 } 3908 [(set_attr "type" "neon_to_gp<VDQQH:q>")] 3909) 3910 3911;; Lane extraction of a value, neither sign nor zero extension 3912;; is guaranteed so upper bits should be considered undefined. 3913;; RTL uses GCC vector extension indices throughout so flip only for assembly. 3914;; Extracting lane zero is split into a simple move when it is between SIMD 3915;; registers or a store. 3916(define_insn_and_split "aarch64_get_lane<mode>" 3917 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv") 3918 (vec_select:<VEL> 3919 (match_operand:VALL_F16 1 "register_operand" "w, w, w") 3920 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))] 3921 "TARGET_SIMD" 3922 { 3923 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); 3924 switch (which_alternative) 3925 { 3926 case 0: 3927 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]"; 3928 case 1: 3929 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]"; 3930 case 2: 3931 return "st1\\t{%1.<Vetype>}[%2], %0"; 3932 default: 3933 gcc_unreachable (); 3934 } 3935 } 3936 "&& reload_completed 3937 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0" 3938 [(set (match_dup 0) (match_dup 1))] 3939 { 3940 operands[1] = aarch64_replace_reg_mode (operands[1], <VEL>mode); 3941 } 3942 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")] 3943) 3944 3945(define_insn "load_pair_lanes<mode>" 3946 [(set (match_operand:<VDBL> 0 "register_operand" "=w") 3947 (vec_concat:<VDBL> 3948 (match_operand:VDC 1 "memory_operand" "Utq") 3949 (match_operand:VDC 2 "memory_operand" "m")))] 3950 "TARGET_SIMD && !STRICT_ALIGNMENT 3951 && rtx_equal_p (XEXP (operands[2], 0), 3952 plus_constant (Pmode, 3953 XEXP (operands[1], 0), 3954 GET_MODE_SIZE (<MODE>mode)))" 3955 "ldr\\t%q0, %1" 3956 [(set_attr "type" "neon_load1_1reg_q")] 3957) 3958 3959(define_insn "store_pair_lanes<mode>" 3960 [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn") 3961 (vec_concat:<VDBL> 3962 (match_operand:VDC 1 "register_operand" "w, r") 3963 (match_operand:VDC 2 "register_operand" "w, r")))] 3964 "TARGET_SIMD" 3965 "@ 3966 stp\\t%d1, %d2, %y0 3967 stp\\t%x1, %x2, %y0" 3968 [(set_attr "type" "neon_stp, store_16")] 3969) 3970 3971;; In this insn, operand 1 should be low, and operand 2 the high part of the 3972;; dest vector. 3973 3974(define_insn "@aarch64_combinez<mode>" 3975 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w") 3976 (vec_concat:<VDBL> 3977 (match_operand:VDC 1 "general_operand" "w,?r,m") 3978 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))] 3979 "TARGET_SIMD && !BYTES_BIG_ENDIAN" 3980 "@ 3981 mov\\t%0.8b, %1.8b 3982 fmov\t%d0, %1 3983 ldr\\t%d0, %1" 3984 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg") 3985 (set_attr "arch" "simd,fp,simd")] 3986) 3987 3988(define_insn "@aarch64_combinez_be<mode>" 3989 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w") 3990 (vec_concat:<VDBL> 3991 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero") 3992 (match_operand:VDC 1 "general_operand" "w,?r,m")))] 3993 "TARGET_SIMD && BYTES_BIG_ENDIAN" 3994 "@ 3995 mov\\t%0.8b, %1.8b 3996 fmov\t%d0, %1 3997 ldr\\t%d0, %1" 3998 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg") 3999 (set_attr "arch" "simd,fp,simd")] 4000) 4001 4002(define_expand "aarch64_combine<mode>" 4003 [(match_operand:<VDBL> 0 "register_operand") 4004 (match_operand:VDC 1 "register_operand") 4005 (match_operand:VDC 2 "aarch64_simd_reg_or_zero")] 4006 "TARGET_SIMD" 4007{ 4008 if (operands[2] == CONST0_RTX (<MODE>mode)) 4009 { 4010 if (BYTES_BIG_ENDIAN) 4011 emit_insn (gen_aarch64_combinez_be<mode> (operands[0], operands[1], 4012 operands[2])); 4013 else 4014 emit_insn (gen_aarch64_combinez<mode> (operands[0], operands[1], 4015 operands[2])); 4016 } 4017 else 4018 aarch64_split_simd_combine (operands[0], operands[1], operands[2]); 4019 DONE; 4020} 4021) 4022 4023(define_expand "@aarch64_simd_combine<mode>" 4024 [(match_operand:<VDBL> 0 "register_operand") 4025 (match_operand:VDC 1 "register_operand") 4026 (match_operand:VDC 2 "register_operand")] 4027 "TARGET_SIMD" 4028 { 4029 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1])); 4030 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2])); 4031 DONE; 4032 } 4033[(set_attr "type" "multiple")] 4034) 4035 4036;; <su><addsub>l<q>. 4037 4038(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal" 4039 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4040 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 4041 (match_operand:VQW 1 "register_operand" "w") 4042 (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))) 4043 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 4044 (match_operand:VQW 2 "register_operand" "w") 4045 (match_dup 3)))))] 4046 "TARGET_SIMD" 4047 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>" 4048 [(set_attr "type" "neon_<ADDSUB:optab>_long")] 4049) 4050 4051(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal" 4052 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4053 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 4054 (match_operand:VQW 1 "register_operand" "w") 4055 (match_operand:VQW 3 "vect_par_cnst_lo_half" ""))) 4056 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 4057 (match_operand:VQW 2 "register_operand" "w") 4058 (match_dup 3)))))] 4059 "TARGET_SIMD" 4060 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>" 4061 [(set_attr "type" "neon_<ADDSUB:optab>_long")] 4062) 4063 4064(define_expand "vec_widen_<su>addl_lo_<mode>" 4065 [(match_operand:<VWIDE> 0 "register_operand") 4066 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand")) 4067 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))] 4068 "TARGET_SIMD" 4069{ 4070 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); 4071 emit_insn (gen_aarch64_<su>addl<mode>_lo_internal (operands[0], operands[1], 4072 operands[2], p)); 4073 DONE; 4074}) 4075 4076(define_expand "vec_widen_<su>addl_hi_<mode>" 4077 [(match_operand:<VWIDE> 0 "register_operand") 4078 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand")) 4079 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))] 4080 "TARGET_SIMD" 4081{ 4082 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 4083 emit_insn (gen_aarch64_<su>addl<mode>_hi_internal (operands[0], operands[1], 4084 operands[2], p)); 4085 DONE; 4086}) 4087 4088(define_expand "vec_widen_<su>subl_lo_<mode>" 4089 [(match_operand:<VWIDE> 0 "register_operand") 4090 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand")) 4091 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))] 4092 "TARGET_SIMD" 4093{ 4094 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); 4095 emit_insn (gen_aarch64_<su>subl<mode>_lo_internal (operands[0], operands[1], 4096 operands[2], p)); 4097 DONE; 4098}) 4099 4100(define_expand "vec_widen_<su>subl_hi_<mode>" 4101 [(match_operand:<VWIDE> 0 "register_operand") 4102 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand")) 4103 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))] 4104 "TARGET_SIMD" 4105{ 4106 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 4107 emit_insn (gen_aarch64_<su>subl<mode>_hi_internal (operands[0], operands[1], 4108 operands[2], p)); 4109 DONE; 4110}) 4111 4112(define_expand "aarch64_saddl2<mode>" 4113 [(match_operand:<VWIDE> 0 "register_operand") 4114 (match_operand:VQW 1 "register_operand") 4115 (match_operand:VQW 2 "register_operand")] 4116 "TARGET_SIMD" 4117{ 4118 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 4119 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1], 4120 operands[2], p)); 4121 DONE; 4122}) 4123 4124(define_expand "aarch64_uaddl2<mode>" 4125 [(match_operand:<VWIDE> 0 "register_operand") 4126 (match_operand:VQW 1 "register_operand") 4127 (match_operand:VQW 2 "register_operand")] 4128 "TARGET_SIMD" 4129{ 4130 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 4131 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1], 4132 operands[2], p)); 4133 DONE; 4134}) 4135 4136(define_expand "aarch64_ssubl2<mode>" 4137 [(match_operand:<VWIDE> 0 "register_operand") 4138 (match_operand:VQW 1 "register_operand") 4139 (match_operand:VQW 2 "register_operand")] 4140 "TARGET_SIMD" 4141{ 4142 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 4143 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1], 4144 operands[2], p)); 4145 DONE; 4146}) 4147 4148(define_expand "aarch64_usubl2<mode>" 4149 [(match_operand:<VWIDE> 0 "register_operand") 4150 (match_operand:VQW 1 "register_operand") 4151 (match_operand:VQW 2 "register_operand")] 4152 "TARGET_SIMD" 4153{ 4154 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 4155 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1], 4156 operands[2], p)); 4157 DONE; 4158}) 4159 4160(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>" 4161 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4162 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> 4163 (match_operand:VD_BHSI 1 "register_operand" "w")) 4164 (ANY_EXTEND:<VWIDE> 4165 (match_operand:VD_BHSI 2 "register_operand" "w"))))] 4166 "TARGET_SIMD" 4167 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>" 4168 [(set_attr "type" "neon_<ADDSUB:optab>_long")] 4169) 4170 4171;; <su><addsub>w<q>. 4172 4173(define_expand "widen_ssum<mode>3" 4174 [(set (match_operand:<VDBLW> 0 "register_operand") 4175 (plus:<VDBLW> (sign_extend:<VDBLW> 4176 (match_operand:VQW 1 "register_operand")) 4177 (match_operand:<VDBLW> 2 "register_operand")))] 4178 "TARGET_SIMD" 4179 { 4180 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); 4181 rtx temp = gen_reg_rtx (GET_MODE (operands[0])); 4182 4183 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2], 4184 operands[1], p)); 4185 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1])); 4186 DONE; 4187 } 4188) 4189 4190(define_expand "widen_ssum<mode>3" 4191 [(set (match_operand:<VWIDE> 0 "register_operand") 4192 (plus:<VWIDE> (sign_extend:<VWIDE> 4193 (match_operand:VD_BHSI 1 "register_operand")) 4194 (match_operand:<VWIDE> 2 "register_operand")))] 4195 "TARGET_SIMD" 4196{ 4197 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1])); 4198 DONE; 4199}) 4200 4201(define_expand "widen_usum<mode>3" 4202 [(set (match_operand:<VDBLW> 0 "register_operand") 4203 (plus:<VDBLW> (zero_extend:<VDBLW> 4204 (match_operand:VQW 1 "register_operand")) 4205 (match_operand:<VDBLW> 2 "register_operand")))] 4206 "TARGET_SIMD" 4207 { 4208 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); 4209 rtx temp = gen_reg_rtx (GET_MODE (operands[0])); 4210 4211 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2], 4212 operands[1], p)); 4213 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1])); 4214 DONE; 4215 } 4216) 4217 4218(define_expand "widen_usum<mode>3" 4219 [(set (match_operand:<VWIDE> 0 "register_operand") 4220 (plus:<VWIDE> (zero_extend:<VWIDE> 4221 (match_operand:VD_BHSI 1 "register_operand")) 4222 (match_operand:<VWIDE> 2 "register_operand")))] 4223 "TARGET_SIMD" 4224{ 4225 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1])); 4226 DONE; 4227}) 4228 4229(define_insn "aarch64_<ANY_EXTEND:su>subw<mode>" 4230 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4231 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w") 4232 (ANY_EXTEND:<VWIDE> 4233 (match_operand:VD_BHSI 2 "register_operand" "w"))))] 4234 "TARGET_SIMD" 4235 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>" 4236 [(set_attr "type" "neon_sub_widen")] 4237) 4238 4239(define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal" 4240 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4241 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w") 4242 (ANY_EXTEND:<VWIDE> 4243 (vec_select:<VHALF> 4244 (match_operand:VQW 2 "register_operand" "w") 4245 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))] 4246 "TARGET_SIMD" 4247 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>" 4248 [(set_attr "type" "neon_sub_widen")] 4249) 4250 4251(define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal" 4252 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4253 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w") 4254 (ANY_EXTEND:<VWIDE> 4255 (vec_select:<VHALF> 4256 (match_operand:VQW 2 "register_operand" "w") 4257 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))] 4258 "TARGET_SIMD" 4259 "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>" 4260 [(set_attr "type" "neon_sub_widen")] 4261) 4262 4263(define_insn "aarch64_<ANY_EXTEND:su>addw<mode>" 4264 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4265 (plus:<VWIDE> 4266 (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w")) 4267 (match_operand:<VWIDE> 1 "register_operand" "w")))] 4268 "TARGET_SIMD" 4269 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>" 4270 [(set_attr "type" "neon_add_widen")] 4271) 4272 4273(define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal" 4274 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4275 (plus:<VWIDE> 4276 (ANY_EXTEND:<VWIDE> 4277 (vec_select:<VHALF> 4278 (match_operand:VQW 2 "register_operand" "w") 4279 (match_operand:VQW 3 "vect_par_cnst_lo_half" ""))) 4280 (match_operand:<VWIDE> 1 "register_operand" "w")))] 4281 "TARGET_SIMD" 4282 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>" 4283 [(set_attr "type" "neon_add_widen")] 4284) 4285 4286(define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal" 4287 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4288 (plus:<VWIDE> 4289 (ANY_EXTEND:<VWIDE> 4290 (vec_select:<VHALF> 4291 (match_operand:VQW 2 "register_operand" "w") 4292 (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))) 4293 (match_operand:<VWIDE> 1 "register_operand" "w")))] 4294 "TARGET_SIMD" 4295 "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>" 4296 [(set_attr "type" "neon_add_widen")] 4297) 4298 4299(define_expand "aarch64_saddw2<mode>" 4300 [(match_operand:<VWIDE> 0 "register_operand") 4301 (match_operand:<VWIDE> 1 "register_operand") 4302 (match_operand:VQW 2 "register_operand")] 4303 "TARGET_SIMD" 4304{ 4305 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 4306 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1], 4307 operands[2], p)); 4308 DONE; 4309}) 4310 4311(define_expand "aarch64_uaddw2<mode>" 4312 [(match_operand:<VWIDE> 0 "register_operand") 4313 (match_operand:<VWIDE> 1 "register_operand") 4314 (match_operand:VQW 2 "register_operand")] 4315 "TARGET_SIMD" 4316{ 4317 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 4318 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1], 4319 operands[2], p)); 4320 DONE; 4321}) 4322 4323 4324(define_expand "aarch64_ssubw2<mode>" 4325 [(match_operand:<VWIDE> 0 "register_operand") 4326 (match_operand:<VWIDE> 1 "register_operand") 4327 (match_operand:VQW 2 "register_operand")] 4328 "TARGET_SIMD" 4329{ 4330 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 4331 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1], 4332 operands[2], p)); 4333 DONE; 4334}) 4335 4336(define_expand "aarch64_usubw2<mode>" 4337 [(match_operand:<VWIDE> 0 "register_operand") 4338 (match_operand:<VWIDE> 1 "register_operand") 4339 (match_operand:VQW 2 "register_operand")] 4340 "TARGET_SIMD" 4341{ 4342 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 4343 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1], 4344 operands[2], p)); 4345 DONE; 4346}) 4347 4348;; <su><r>h<addsub>. 4349 4350(define_expand "<u>avg<mode>3_floor" 4351 [(set (match_operand:VDQ_BHSI 0 "register_operand") 4352 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand") 4353 (match_operand:VDQ_BHSI 2 "register_operand")] 4354 HADD))] 4355 "TARGET_SIMD" 4356) 4357 4358(define_expand "<u>avg<mode>3_ceil" 4359 [(set (match_operand:VDQ_BHSI 0 "register_operand") 4360 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand") 4361 (match_operand:VDQ_BHSI 2 "register_operand")] 4362 RHADD))] 4363 "TARGET_SIMD" 4364) 4365 4366(define_insn "aarch64_<sur>h<addsub><mode>" 4367 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 4368 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w") 4369 (match_operand:VDQ_BHSI 2 "register_operand" "w")] 4370 HADDSUB))] 4371 "TARGET_SIMD" 4372 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 4373 [(set_attr "type" "neon_<addsub>_halve<q>")] 4374) 4375 4376;; <r><addsub>hn<q>. 4377 4378(define_insn "aarch64_<sur><addsub>hn<mode>" 4379 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") 4380 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w") 4381 (match_operand:VQN 2 "register_operand" "w")] 4382 ADDSUBHN))] 4383 "TARGET_SIMD" 4384 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>" 4385 [(set_attr "type" "neon_<addsub>_halve_narrow_q")] 4386) 4387 4388(define_insn "aarch64_<sur><addsub>hn2<mode>" 4389 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") 4390 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0") 4391 (match_operand:VQN 2 "register_operand" "w") 4392 (match_operand:VQN 3 "register_operand" "w")] 4393 ADDSUBHN2))] 4394 "TARGET_SIMD" 4395 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>" 4396 [(set_attr "type" "neon_<addsub>_halve_narrow_q")] 4397) 4398 4399;; pmul. 4400 4401(define_insn "aarch64_pmul<mode>" 4402 [(set (match_operand:VB 0 "register_operand" "=w") 4403 (unspec:VB [(match_operand:VB 1 "register_operand" "w") 4404 (match_operand:VB 2 "register_operand" "w")] 4405 UNSPEC_PMUL))] 4406 "TARGET_SIMD" 4407 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 4408 [(set_attr "type" "neon_mul_<Vetype><q>")] 4409) 4410 4411;; fmulx. 4412 4413(define_insn "aarch64_fmulx<mode>" 4414 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") 4415 (unspec:VHSDF_HSDF 4416 [(match_operand:VHSDF_HSDF 1 "register_operand" "w") 4417 (match_operand:VHSDF_HSDF 2 "register_operand" "w")] 4418 UNSPEC_FMULX))] 4419 "TARGET_SIMD" 4420 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 4421 [(set_attr "type" "neon_fp_mul_<stype>")] 4422) 4423 4424;; vmulxq_lane_f32, and vmulx_laneq_f32 4425 4426(define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>" 4427 [(set (match_operand:VDQSF 0 "register_operand" "=w") 4428 (unspec:VDQSF 4429 [(match_operand:VDQSF 1 "register_operand" "w") 4430 (vec_duplicate:VDQSF 4431 (vec_select:<VEL> 4432 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w") 4433 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))] 4434 UNSPEC_FMULX))] 4435 "TARGET_SIMD" 4436 { 4437 operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3])); 4438 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 4439 } 4440 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")] 4441) 4442 4443;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32 4444 4445(define_insn "*aarch64_mulx_elt<mode>" 4446 [(set (match_operand:VDQF 0 "register_operand" "=w") 4447 (unspec:VDQF 4448 [(match_operand:VDQF 1 "register_operand" "w") 4449 (vec_duplicate:VDQF 4450 (vec_select:<VEL> 4451 (match_operand:VDQF 2 "register_operand" "w") 4452 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))] 4453 UNSPEC_FMULX))] 4454 "TARGET_SIMD" 4455 { 4456 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3])); 4457 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 4458 } 4459 [(set_attr "type" "neon_fp_mul_<Vetype><q>")] 4460) 4461 4462;; vmulxq_lane 4463 4464(define_insn "*aarch64_mulx_elt_from_dup<mode>" 4465 [(set (match_operand:VHSDF 0 "register_operand" "=w") 4466 (unspec:VHSDF 4467 [(match_operand:VHSDF 1 "register_operand" "w") 4468 (vec_duplicate:VHSDF 4469 (match_operand:<VEL> 2 "register_operand" "<h_con>"))] 4470 UNSPEC_FMULX))] 4471 "TARGET_SIMD" 4472 "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]"; 4473 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")] 4474) 4475 4476;; vmulxs_lane_f32, vmulxs_laneq_f32 4477;; vmulxd_lane_f64 == vmulx_lane_f64 4478;; vmulxd_laneq_f64 == vmulx_laneq_f64 4479 4480(define_insn "*aarch64_vgetfmulx<mode>" 4481 [(set (match_operand:<VEL> 0 "register_operand" "=w") 4482 (unspec:<VEL> 4483 [(match_operand:<VEL> 1 "register_operand" "w") 4484 (vec_select:<VEL> 4485 (match_operand:VDQF 2 "register_operand" "w") 4486 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] 4487 UNSPEC_FMULX))] 4488 "TARGET_SIMD" 4489 { 4490 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3])); 4491 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]"; 4492 } 4493 [(set_attr "type" "fmul<Vetype>")] 4494) 4495;; <su>q<addsub> 4496 4497(define_insn "aarch64_<su_optab>q<addsub><mode>" 4498 [(set (match_operand:VSDQ_I 0 "register_operand" "=w") 4499 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w") 4500 (match_operand:VSDQ_I 2 "register_operand" "w")))] 4501 "TARGET_SIMD" 4502 "<su_optab>q<addsub>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 4503 [(set_attr "type" "neon_q<addsub><q>")] 4504) 4505 4506;; suqadd and usqadd 4507 4508(define_insn "aarch64_<sur>qadd<mode>" 4509 [(set (match_operand:VSDQ_I 0 "register_operand" "=w") 4510 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0") 4511 (match_operand:VSDQ_I 2 "register_operand" "w")] 4512 USSUQADD))] 4513 "TARGET_SIMD" 4514 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>" 4515 [(set_attr "type" "neon_qadd<q>")] 4516) 4517 4518;; sqmovun 4519 4520(define_insn "aarch64_sqmovun<mode>" 4521 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") 4522 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")] 4523 UNSPEC_SQXTUN))] 4524 "TARGET_SIMD" 4525 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>" 4526 [(set_attr "type" "neon_sat_shift_imm_narrow_q")] 4527) 4528 4529;; sqmovn and uqmovn 4530 4531(define_insn "aarch64_<sur>qmovn<mode>" 4532 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") 4533 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")] 4534 SUQMOVN))] 4535 "TARGET_SIMD" 4536 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>" 4537 [(set_attr "type" "neon_sat_shift_imm_narrow_q")] 4538) 4539 4540(define_insn "aarch64_<su>qxtn2<mode>_le" 4541 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") 4542 (vec_concat:<VNARROWQ2> 4543 (match_operand:<VNARROWQ> 1 "register_operand" "0") 4544 (SAT_TRUNC:<VNARROWQ> 4545 (match_operand:VQN 2 "register_operand" "w"))))] 4546 "TARGET_SIMD && !BYTES_BIG_ENDIAN" 4547 "<su>qxtn2\\t%0.<V2ntype>, %2.<Vtype>" 4548 [(set_attr "type" "neon_sat_shift_imm_narrow_q")] 4549) 4550 4551(define_insn "aarch64_<su>qxtn2<mode>_be" 4552 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") 4553 (vec_concat:<VNARROWQ2> 4554 (SAT_TRUNC:<VNARROWQ> 4555 (match_operand:VQN 2 "register_operand" "w")) 4556 (match_operand:<VNARROWQ> 1 "register_operand" "0")))] 4557 "TARGET_SIMD && BYTES_BIG_ENDIAN" 4558 "<su>qxtn2\\t%0.<V2ntype>, %2.<Vtype>" 4559 [(set_attr "type" "neon_sat_shift_imm_narrow_q")] 4560) 4561 4562(define_expand "aarch64_<su>qxtn2<mode>" 4563 [(match_operand:<VNARROWQ2> 0 "register_operand") 4564 (match_operand:<VNARROWQ> 1 "register_operand") 4565 (SAT_TRUNC:<VNARROWQ> 4566 (match_operand:VQN 2 "register_operand"))] 4567 "TARGET_SIMD" 4568 { 4569 if (BYTES_BIG_ENDIAN) 4570 emit_insn (gen_aarch64_<su>qxtn2<mode>_be (operands[0], operands[1], 4571 operands[2])); 4572 else 4573 emit_insn (gen_aarch64_<su>qxtn2<mode>_le (operands[0], operands[1], 4574 operands[2])); 4575 DONE; 4576 } 4577) 4578 4579(define_insn "aarch64_sqxtun2<mode>_le" 4580 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") 4581 (vec_concat:<VNARROWQ2> 4582 (match_operand:<VNARROWQ> 1 "register_operand" "0") 4583 (unspec:<VNARROWQ> 4584 [(match_operand:VQN 2 "register_operand" "w")] UNSPEC_SQXTUN2)))] 4585 "TARGET_SIMD && !BYTES_BIG_ENDIAN" 4586 "sqxtun2\\t%0.<V2ntype>, %2.<Vtype>" 4587 [(set_attr "type" "neon_sat_shift_imm_narrow_q")] 4588) 4589 4590(define_insn "aarch64_sqxtun2<mode>_be" 4591 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") 4592 (vec_concat:<VNARROWQ2> 4593 (unspec:<VNARROWQ> 4594 [(match_operand:VQN 2 "register_operand" "w")] UNSPEC_SQXTUN2) 4595 (match_operand:<VNARROWQ> 1 "register_operand" "0")))] 4596 "TARGET_SIMD && BYTES_BIG_ENDIAN" 4597 "sqxtun2\\t%0.<V2ntype>, %2.<Vtype>" 4598 [(set_attr "type" "neon_sat_shift_imm_narrow_q")] 4599) 4600 4601(define_expand "aarch64_sqxtun2<mode>" 4602 [(match_operand:<VNARROWQ2> 0 "register_operand") 4603 (match_operand:<VNARROWQ> 1 "register_operand") 4604 (unspec:<VNARROWQ> 4605 [(match_operand:VQN 2 "register_operand")] UNSPEC_SQXTUN2)] 4606 "TARGET_SIMD" 4607 { 4608 if (BYTES_BIG_ENDIAN) 4609 emit_insn (gen_aarch64_sqxtun2<mode>_be (operands[0], operands[1], 4610 operands[2])); 4611 else 4612 emit_insn (gen_aarch64_sqxtun2<mode>_le (operands[0], operands[1], 4613 operands[2])); 4614 DONE; 4615 } 4616) 4617 4618;; <su>q<absneg> 4619 4620(define_insn "aarch64_s<optab><mode>" 4621 [(set (match_operand:VSDQ_I 0 "register_operand" "=w") 4622 (UNQOPS:VSDQ_I 4623 (match_operand:VSDQ_I 1 "register_operand" "w")))] 4624 "TARGET_SIMD" 4625 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>" 4626 [(set_attr "type" "neon_<optab><q>")] 4627) 4628 4629;; sq<r>dmulh. 4630 4631(define_insn "aarch64_sq<r>dmulh<mode>" 4632 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w") 4633 (unspec:VSDQ_HSI 4634 [(match_operand:VSDQ_HSI 1 "register_operand" "w") 4635 (match_operand:VSDQ_HSI 2 "register_operand" "w")] 4636 VQDMULH))] 4637 "TARGET_SIMD" 4638 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 4639 [(set_attr "type" "neon_sat_mul_<Vetype><q>")] 4640) 4641 4642;; sq<r>dmulh_lane 4643 4644(define_insn "aarch64_sq<r>dmulh_lane<mode>" 4645 [(set (match_operand:VDQHS 0 "register_operand" "=w") 4646 (unspec:VDQHS 4647 [(match_operand:VDQHS 1 "register_operand" "w") 4648 (vec_select:<VEL> 4649 (match_operand:<VCOND> 2 "register_operand" "<vwx>") 4650 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] 4651 VQDMULH))] 4652 "TARGET_SIMD" 4653 "* 4654 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3])); 4655 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";" 4656 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] 4657) 4658 4659(define_insn "aarch64_sq<r>dmulh_laneq<mode>" 4660 [(set (match_operand:VDQHS 0 "register_operand" "=w") 4661 (unspec:VDQHS 4662 [(match_operand:VDQHS 1 "register_operand" "w") 4663 (vec_select:<VEL> 4664 (match_operand:<VCONQ> 2 "register_operand" "<vwx>") 4665 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] 4666 VQDMULH))] 4667 "TARGET_SIMD" 4668 "* 4669 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3])); 4670 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";" 4671 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] 4672) 4673 4674(define_insn "aarch64_sq<r>dmulh_lane<mode>" 4675 [(set (match_operand:SD_HSI 0 "register_operand" "=w") 4676 (unspec:SD_HSI 4677 [(match_operand:SD_HSI 1 "register_operand" "w") 4678 (vec_select:<VEL> 4679 (match_operand:<VCOND> 2 "register_operand" "<vwx>") 4680 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] 4681 VQDMULH))] 4682 "TARGET_SIMD" 4683 "* 4684 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3])); 4685 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";" 4686 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] 4687) 4688 4689(define_insn "aarch64_sq<r>dmulh_laneq<mode>" 4690 [(set (match_operand:SD_HSI 0 "register_operand" "=w") 4691 (unspec:SD_HSI 4692 [(match_operand:SD_HSI 1 "register_operand" "w") 4693 (vec_select:<VEL> 4694 (match_operand:<VCONQ> 2 "register_operand" "<vwx>") 4695 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] 4696 VQDMULH))] 4697 "TARGET_SIMD" 4698 "* 4699 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3])); 4700 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";" 4701 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] 4702) 4703 4704;; sqrdml[as]h. 4705 4706(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>" 4707 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w") 4708 (unspec:VSDQ_HSI 4709 [(match_operand:VSDQ_HSI 1 "register_operand" "0") 4710 (match_operand:VSDQ_HSI 2 "register_operand" "w") 4711 (match_operand:VSDQ_HSI 3 "register_operand" "w")] 4712 SQRDMLH_AS))] 4713 "TARGET_SIMD_RDMA" 4714 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>" 4715 [(set_attr "type" "neon_sat_mla_<Vetype>_long")] 4716) 4717 4718;; sqrdml[as]h_lane. 4719 4720(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>" 4721 [(set (match_operand:VDQHS 0 "register_operand" "=w") 4722 (unspec:VDQHS 4723 [(match_operand:VDQHS 1 "register_operand" "0") 4724 (match_operand:VDQHS 2 "register_operand" "w") 4725 (vec_select:<VEL> 4726 (match_operand:<VCOND> 3 "register_operand" "<vwx>") 4727 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))] 4728 SQRDMLH_AS))] 4729 "TARGET_SIMD_RDMA" 4730 { 4731 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4])); 4732 return 4733 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]"; 4734 } 4735 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 4736) 4737 4738(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>" 4739 [(set (match_operand:SD_HSI 0 "register_operand" "=w") 4740 (unspec:SD_HSI 4741 [(match_operand:SD_HSI 1 "register_operand" "0") 4742 (match_operand:SD_HSI 2 "register_operand" "w") 4743 (vec_select:<VEL> 4744 (match_operand:<VCOND> 3 "register_operand" "<vwx>") 4745 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))] 4746 SQRDMLH_AS))] 4747 "TARGET_SIMD_RDMA" 4748 { 4749 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4])); 4750 return 4751 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]"; 4752 } 4753 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 4754) 4755 4756;; sqrdml[as]h_laneq. 4757 4758(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>" 4759 [(set (match_operand:VDQHS 0 "register_operand" "=w") 4760 (unspec:VDQHS 4761 [(match_operand:VDQHS 1 "register_operand" "0") 4762 (match_operand:VDQHS 2 "register_operand" "w") 4763 (vec_select:<VEL> 4764 (match_operand:<VCONQ> 3 "register_operand" "<vwx>") 4765 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))] 4766 SQRDMLH_AS))] 4767 "TARGET_SIMD_RDMA" 4768 { 4769 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4])); 4770 return 4771 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]"; 4772 } 4773 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 4774) 4775 4776(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>" 4777 [(set (match_operand:SD_HSI 0 "register_operand" "=w") 4778 (unspec:SD_HSI 4779 [(match_operand:SD_HSI 1 "register_operand" "0") 4780 (match_operand:SD_HSI 2 "register_operand" "w") 4781 (vec_select:<VEL> 4782 (match_operand:<VCONQ> 3 "register_operand" "<vwx>") 4783 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))] 4784 SQRDMLH_AS))] 4785 "TARGET_SIMD_RDMA" 4786 { 4787 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4])); 4788 return 4789 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]"; 4790 } 4791 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 4792) 4793 4794;; vqdml[sa]l 4795 4796(define_insn "aarch64_sqdmlal<mode>" 4797 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4798 (ss_plus:<VWIDE> 4799 (ss_ashift:<VWIDE> 4800 (mult:<VWIDE> 4801 (sign_extend:<VWIDE> 4802 (match_operand:VSD_HSI 2 "register_operand" "w")) 4803 (sign_extend:<VWIDE> 4804 (match_operand:VSD_HSI 3 "register_operand" "w"))) 4805 (const_int 1)) 4806 (match_operand:<VWIDE> 1 "register_operand" "0")))] 4807 "TARGET_SIMD" 4808 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>" 4809 [(set_attr "type" "neon_sat_mla_<Vetype>_long")] 4810) 4811 4812(define_insn "aarch64_sqdmlsl<mode>" 4813 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4814 (ss_minus:<VWIDE> 4815 (match_operand:<VWIDE> 1 "register_operand" "0") 4816 (ss_ashift:<VWIDE> 4817 (mult:<VWIDE> 4818 (sign_extend:<VWIDE> 4819 (match_operand:VSD_HSI 2 "register_operand" "w")) 4820 (sign_extend:<VWIDE> 4821 (match_operand:VSD_HSI 3 "register_operand" "w"))) 4822 (const_int 1))))] 4823 "TARGET_SIMD" 4824 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>" 4825 [(set_attr "type" "neon_sat_mla_<Vetype>_long")] 4826) 4827 4828;; vqdml[sa]l_lane 4829 4830(define_insn "aarch64_sqdmlal_lane<mode>" 4831 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4832 (ss_plus:<VWIDE> 4833 (ss_ashift:<VWIDE> 4834 (mult:<VWIDE> 4835 (sign_extend:<VWIDE> 4836 (match_operand:VD_HSI 2 "register_operand" "w")) 4837 (sign_extend:<VWIDE> 4838 (vec_duplicate:VD_HSI 4839 (vec_select:<VEL> 4840 (match_operand:<VCOND> 3 "register_operand" "<vwx>") 4841 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) 4842 )) 4843 (const_int 1)) 4844 (match_operand:<VWIDE> 1 "register_operand" "0")))] 4845 "TARGET_SIMD" 4846 { 4847 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4])); 4848 return 4849 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; 4850 } 4851 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 4852) 4853 4854(define_insn "aarch64_sqdmlsl_lane<mode>" 4855 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4856 (ss_minus:<VWIDE> 4857 (match_operand:<VWIDE> 1 "register_operand" "0") 4858 (ss_ashift:<VWIDE> 4859 (mult:<VWIDE> 4860 (sign_extend:<VWIDE> 4861 (match_operand:VD_HSI 2 "register_operand" "w")) 4862 (sign_extend:<VWIDE> 4863 (vec_duplicate:VD_HSI 4864 (vec_select:<VEL> 4865 (match_operand:<VCOND> 3 "register_operand" "<vwx>") 4866 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) 4867 )) 4868 (const_int 1))))] 4869 "TARGET_SIMD" 4870 { 4871 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4])); 4872 return 4873 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; 4874 } 4875 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 4876) 4877 4878 4879(define_insn "aarch64_sqdmlsl_laneq<mode>" 4880 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4881 (ss_minus:<VWIDE> 4882 (match_operand:<VWIDE> 1 "register_operand" "0") 4883 (ss_ashift:<VWIDE> 4884 (mult:<VWIDE> 4885 (sign_extend:<VWIDE> 4886 (match_operand:VD_HSI 2 "register_operand" "w")) 4887 (sign_extend:<VWIDE> 4888 (vec_duplicate:VD_HSI 4889 (vec_select:<VEL> 4890 (match_operand:<VCONQ> 3 "register_operand" "<vwx>") 4891 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) 4892 )) 4893 (const_int 1))))] 4894 "TARGET_SIMD" 4895 { 4896 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4])); 4897 return 4898 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; 4899 } 4900 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 4901) 4902 4903(define_insn "aarch64_sqdmlal_laneq<mode>" 4904 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4905 (ss_plus:<VWIDE> 4906 (ss_ashift:<VWIDE> 4907 (mult:<VWIDE> 4908 (sign_extend:<VWIDE> 4909 (match_operand:VD_HSI 2 "register_operand" "w")) 4910 (sign_extend:<VWIDE> 4911 (vec_duplicate:VD_HSI 4912 (vec_select:<VEL> 4913 (match_operand:<VCONQ> 3 "register_operand" "<vwx>") 4914 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) 4915 )) 4916 (const_int 1)) 4917 (match_operand:<VWIDE> 1 "register_operand" "0")))] 4918 "TARGET_SIMD" 4919 { 4920 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4])); 4921 return 4922 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; 4923 } 4924 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 4925) 4926 4927 4928(define_insn "aarch64_sqdmlal_lane<mode>" 4929 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4930 (ss_plus:<VWIDE> 4931 (ss_ashift:<VWIDE> 4932 (mult:<VWIDE> 4933 (sign_extend:<VWIDE> 4934 (match_operand:SD_HSI 2 "register_operand" "w")) 4935 (sign_extend:<VWIDE> 4936 (vec_select:<VEL> 4937 (match_operand:<VCOND> 3 "register_operand" "<vwx>") 4938 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) 4939 ) 4940 (const_int 1)) 4941 (match_operand:<VWIDE> 1 "register_operand" "0")))] 4942 "TARGET_SIMD" 4943 { 4944 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4])); 4945 return 4946 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; 4947 } 4948 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 4949) 4950 4951(define_insn "aarch64_sqdmlsl_lane<mode>" 4952 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4953 (ss_minus:<VWIDE> 4954 (match_operand:<VWIDE> 1 "register_operand" "0") 4955 (ss_ashift:<VWIDE> 4956 (mult:<VWIDE> 4957 (sign_extend:<VWIDE> 4958 (match_operand:SD_HSI 2 "register_operand" "w")) 4959 (sign_extend:<VWIDE> 4960 (vec_select:<VEL> 4961 (match_operand:<VCOND> 3 "register_operand" "<vwx>") 4962 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) 4963 ) 4964 (const_int 1))))] 4965 "TARGET_SIMD" 4966 { 4967 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4])); 4968 return 4969 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; 4970 } 4971 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 4972) 4973 4974 4975(define_insn "aarch64_sqdmlal_laneq<mode>" 4976 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4977 (ss_plus:<VWIDE> 4978 (ss_ashift:<VWIDE> 4979 (mult:<VWIDE> 4980 (sign_extend:<VWIDE> 4981 (match_operand:SD_HSI 2 "register_operand" "w")) 4982 (sign_extend:<VWIDE> 4983 (vec_select:<VEL> 4984 (match_operand:<VCONQ> 3 "register_operand" "<vwx>") 4985 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) 4986 ) 4987 (const_int 1)) 4988 (match_operand:<VWIDE> 1 "register_operand" "0")))] 4989 "TARGET_SIMD" 4990 { 4991 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4])); 4992 return 4993 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; 4994 } 4995 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 4996) 4997 4998(define_insn "aarch64_sqdmlsl_laneq<mode>" 4999 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 5000 (ss_minus:<VWIDE> 5001 (match_operand:<VWIDE> 1 "register_operand" "0") 5002 (ss_ashift:<VWIDE> 5003 (mult:<VWIDE> 5004 (sign_extend:<VWIDE> 5005 (match_operand:SD_HSI 2 "register_operand" "w")) 5006 (sign_extend:<VWIDE> 5007 (vec_select:<VEL> 5008 (match_operand:<VCONQ> 3 "register_operand" "<vwx>") 5009 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) 5010 ) 5011 (const_int 1))))] 5012 "TARGET_SIMD" 5013 { 5014 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4])); 5015 return 5016 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; 5017 } 5018 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 5019) 5020 5021;; vqdml[sa]l_n 5022 5023(define_insn "aarch64_sqdmlsl_n<mode>" 5024 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 5025 (ss_minus:<VWIDE> 5026 (match_operand:<VWIDE> 1 "register_operand" "0") 5027 (ss_ashift:<VWIDE> 5028 (mult:<VWIDE> 5029 (sign_extend:<VWIDE> 5030 (match_operand:VD_HSI 2 "register_operand" "w")) 5031 (sign_extend:<VWIDE> 5032 (vec_duplicate:VD_HSI 5033 (match_operand:<VEL> 3 "register_operand" "<vwx>")))) 5034 (const_int 1))))] 5035 "TARGET_SIMD" 5036 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]" 5037 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 5038) 5039 5040(define_insn "aarch64_sqdmlal_n<mode>" 5041 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 5042 (ss_plus:<VWIDE> 5043 (ss_ashift:<VWIDE> 5044 (mult:<VWIDE> 5045 (sign_extend:<VWIDE> 5046 (match_operand:VD_HSI 2 "register_operand" "w")) 5047 (sign_extend:<VWIDE> 5048 (vec_duplicate:VD_HSI 5049 (match_operand:<VEL> 3 "register_operand" "<vwx>")))) 5050 (const_int 1)) 5051 (match_operand:<VWIDE> 1 "register_operand" "0")))] 5052 "TARGET_SIMD" 5053 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]" 5054 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 5055) 5056 5057 5058;; sqdml[as]l2 5059 5060(define_insn "aarch64_sqdmlal2<mode>_internal" 5061 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 5062 (ss_plus:<VWIDE> 5063 (ss_ashift:<VWIDE> 5064 (mult:<VWIDE> 5065 (sign_extend:<VWIDE> 5066 (vec_select:<VHALF> 5067 (match_operand:VQ_HSI 2 "register_operand" "w") 5068 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) 5069 (sign_extend:<VWIDE> 5070 (vec_select:<VHALF> 5071 (match_operand:VQ_HSI 3 "register_operand" "w") 5072 (match_dup 4)))) 5073 (const_int 1)) 5074 (match_operand:<VWIDE> 1 "register_operand" "0")))] 5075 "TARGET_SIMD" 5076 "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>" 5077 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 5078) 5079 5080(define_insn "aarch64_sqdmlsl2<mode>_internal" 5081 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 5082 (ss_minus:<VWIDE> 5083 (match_operand:<VWIDE> 1 "register_operand" "0") 5084 (ss_ashift:<VWIDE> 5085 (mult:<VWIDE> 5086 (sign_extend:<VWIDE> 5087 (vec_select:<VHALF> 5088 (match_operand:VQ_HSI 2 "register_operand" "w") 5089 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) 5090 (sign_extend:<VWIDE> 5091 (vec_select:<VHALF> 5092 (match_operand:VQ_HSI 3 "register_operand" "w") 5093 (match_dup 4)))) 5094 (const_int 1))))] 5095 "TARGET_SIMD" 5096 "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>" 5097 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 5098) 5099 5100(define_expand "aarch64_sqdmlal2<mode>" 5101 [(match_operand:<VWIDE> 0 "register_operand") 5102 (match_operand:<VWIDE> 1 "register_operand") 5103 (match_operand:VQ_HSI 2 "register_operand") 5104 (match_operand:VQ_HSI 3 "register_operand")] 5105 "TARGET_SIMD" 5106{ 5107 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 5108 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1], 5109 operands[2], operands[3], p)); 5110 DONE; 5111}) 5112 5113(define_expand "aarch64_sqdmlsl2<mode>" 5114 [(match_operand:<VWIDE> 0 "register_operand") 5115 (match_operand:<VWIDE> 1 "register_operand") 5116 (match_operand:VQ_HSI 2 "register_operand") 5117 (match_operand:VQ_HSI 3 "register_operand")] 5118 "TARGET_SIMD" 5119{ 5120 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 5121 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1], 5122 operands[2], operands[3], p)); 5123 DONE; 5124}) 5125 5126;; vqdml[sa]l2_lane 5127 5128(define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal" 5129 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 5130 (SBINQOPS:<VWIDE> 5131 (match_operand:<VWIDE> 1 "register_operand" "0") 5132 (ss_ashift:<VWIDE> 5133 (mult:<VWIDE> 5134 (sign_extend:<VWIDE> 5135 (vec_select:<VHALF> 5136 (match_operand:VQ_HSI 2 "register_operand" "w") 5137 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" ""))) 5138 (sign_extend:<VWIDE> 5139 (vec_duplicate:<VHALF> 5140 (vec_select:<VEL> 5141 (match_operand:<VCOND> 3 "register_operand" "<vwx>") 5142 (parallel [(match_operand:SI 4 "immediate_operand" "i")]) 5143 )))) 5144 (const_int 1))))] 5145 "TARGET_SIMD" 5146 { 5147 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4])); 5148 return 5149 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; 5150 } 5151 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 5152) 5153 5154(define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal" 5155 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 5156 (SBINQOPS:<VWIDE> 5157 (match_operand:<VWIDE> 1 "register_operand" "0") 5158 (ss_ashift:<VWIDE> 5159 (mult:<VWIDE> 5160 (sign_extend:<VWIDE> 5161 (vec_select:<VHALF> 5162 (match_operand:VQ_HSI 2 "register_operand" "w") 5163 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" ""))) 5164 (sign_extend:<VWIDE> 5165 (vec_duplicate:<VHALF> 5166 (vec_select:<VEL> 5167 (match_operand:<VCONQ> 3 "register_operand" "<vwx>") 5168 (parallel [(match_operand:SI 4 "immediate_operand" "i")]) 5169 )))) 5170 (const_int 1))))] 5171 "TARGET_SIMD" 5172 { 5173 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4])); 5174 return 5175 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; 5176 } 5177 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 5178) 5179 5180(define_expand "aarch64_sqdmlal2_lane<mode>" 5181 [(match_operand:<VWIDE> 0 "register_operand") 5182 (match_operand:<VWIDE> 1 "register_operand") 5183 (match_operand:VQ_HSI 2 "register_operand") 5184 (match_operand:<VCOND> 3 "register_operand") 5185 (match_operand:SI 4 "immediate_operand")] 5186 "TARGET_SIMD" 5187{ 5188 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 5189 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1], 5190 operands[2], operands[3], 5191 operands[4], p)); 5192 DONE; 5193}) 5194 5195(define_expand "aarch64_sqdmlal2_laneq<mode>" 5196 [(match_operand:<VWIDE> 0 "register_operand") 5197 (match_operand:<VWIDE> 1 "register_operand") 5198 (match_operand:VQ_HSI 2 "register_operand") 5199 (match_operand:<VCONQ> 3 "register_operand") 5200 (match_operand:SI 4 "immediate_operand")] 5201 "TARGET_SIMD" 5202{ 5203 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 5204 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1], 5205 operands[2], operands[3], 5206 operands[4], p)); 5207 DONE; 5208}) 5209 5210(define_expand "aarch64_sqdmlsl2_lane<mode>" 5211 [(match_operand:<VWIDE> 0 "register_operand") 5212 (match_operand:<VWIDE> 1 "register_operand") 5213 (match_operand:VQ_HSI 2 "register_operand") 5214 (match_operand:<VCOND> 3 "register_operand") 5215 (match_operand:SI 4 "immediate_operand")] 5216 "TARGET_SIMD" 5217{ 5218 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 5219 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1], 5220 operands[2], operands[3], 5221 operands[4], p)); 5222 DONE; 5223}) 5224 5225(define_expand "aarch64_sqdmlsl2_laneq<mode>" 5226 [(match_operand:<VWIDE> 0 "register_operand") 5227 (match_operand:<VWIDE> 1 "register_operand") 5228 (match_operand:VQ_HSI 2 "register_operand") 5229 (match_operand:<VCONQ> 3 "register_operand") 5230 (match_operand:SI 4 "immediate_operand")] 5231 "TARGET_SIMD" 5232{ 5233 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 5234 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1], 5235 operands[2], operands[3], 5236 operands[4], p)); 5237 DONE; 5238}) 5239 5240(define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal" 5241 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 5242 (SBINQOPS:<VWIDE> 5243 (match_operand:<VWIDE> 1 "register_operand" "0") 5244 (ss_ashift:<VWIDE> 5245 (mult:<VWIDE> 5246 (sign_extend:<VWIDE> 5247 (vec_select:<VHALF> 5248 (match_operand:VQ_HSI 2 "register_operand" "w") 5249 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) 5250 (sign_extend:<VWIDE> 5251 (vec_duplicate:<VHALF> 5252 (match_operand:<VEL> 3 "register_operand" "<vwx>")))) 5253 (const_int 1))))] 5254 "TARGET_SIMD" 5255 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]" 5256 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 5257) 5258 5259(define_expand "aarch64_sqdmlal2_n<mode>" 5260 [(match_operand:<VWIDE> 0 "register_operand") 5261 (match_operand:<VWIDE> 1 "register_operand") 5262 (match_operand:VQ_HSI 2 "register_operand") 5263 (match_operand:<VEL> 3 "register_operand")] 5264 "TARGET_SIMD" 5265{ 5266 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 5267 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1], 5268 operands[2], operands[3], 5269 p)); 5270 DONE; 5271}) 5272 5273(define_expand "aarch64_sqdmlsl2_n<mode>" 5274 [(match_operand:<VWIDE> 0 "register_operand") 5275 (match_operand:<VWIDE> 1 "register_operand") 5276 (match_operand:VQ_HSI 2 "register_operand") 5277 (match_operand:<VEL> 3 "register_operand")] 5278 "TARGET_SIMD" 5279{ 5280 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 5281 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1], 5282 operands[2], operands[3], 5283 p)); 5284 DONE; 5285}) 5286 5287;; vqdmull 5288 5289(define_insn "aarch64_sqdmull<mode>" 5290 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 5291 (ss_ashift:<VWIDE> 5292 (mult:<VWIDE> 5293 (sign_extend:<VWIDE> 5294 (match_operand:VSD_HSI 1 "register_operand" "w")) 5295 (sign_extend:<VWIDE> 5296 (match_operand:VSD_HSI 2 "register_operand" "w"))) 5297 (const_int 1)))] 5298 "TARGET_SIMD" 5299 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 5300 [(set_attr "type" "neon_sat_mul_<Vetype>_long")] 5301) 5302 5303;; vqdmull_lane 5304 5305(define_insn "aarch64_sqdmull_lane<mode>" 5306 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 5307 (ss_ashift:<VWIDE> 5308 (mult:<VWIDE> 5309 (sign_extend:<VWIDE> 5310 (match_operand:VD_HSI 1 "register_operand" "w")) 5311 (sign_extend:<VWIDE> 5312 (vec_duplicate:VD_HSI 5313 (vec_select:<VEL> 5314 (match_operand:<VCOND> 2 "register_operand" "<vwx>") 5315 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) 5316 )) 5317 (const_int 1)))] 5318 "TARGET_SIMD" 5319 { 5320 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3])); 5321 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 5322 } 5323 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 5324) 5325 5326(define_insn "aarch64_sqdmull_laneq<mode>" 5327 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 5328 (ss_ashift:<VWIDE> 5329 (mult:<VWIDE> 5330 (sign_extend:<VWIDE> 5331 (match_operand:VD_HSI 1 "register_operand" "w")) 5332 (sign_extend:<VWIDE> 5333 (vec_duplicate:VD_HSI 5334 (vec_select:<VEL> 5335 (match_operand:<VCONQ> 2 "register_operand" "<vwx>") 5336 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) 5337 )) 5338 (const_int 1)))] 5339 "TARGET_SIMD" 5340 { 5341 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3])); 5342 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 5343 } 5344 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 5345) 5346 5347(define_insn "aarch64_sqdmull_lane<mode>" 5348 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 5349 (ss_ashift:<VWIDE> 5350 (mult:<VWIDE> 5351 (sign_extend:<VWIDE> 5352 (match_operand:SD_HSI 1 "register_operand" "w")) 5353 (sign_extend:<VWIDE> 5354 (vec_select:<VEL> 5355 (match_operand:<VCOND> 2 "register_operand" "<vwx>") 5356 (parallel [(match_operand:SI 3 "immediate_operand" "i")])) 5357 )) 5358 (const_int 1)))] 5359 "TARGET_SIMD" 5360 { 5361 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3])); 5362 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 5363 } 5364 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 5365) 5366 5367(define_insn "aarch64_sqdmull_laneq<mode>" 5368 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 5369 (ss_ashift:<VWIDE> 5370 (mult:<VWIDE> 5371 (sign_extend:<VWIDE> 5372 (match_operand:SD_HSI 1 "register_operand" "w")) 5373 (sign_extend:<VWIDE> 5374 (vec_select:<VEL> 5375 (match_operand:<VCONQ> 2 "register_operand" "<vwx>") 5376 (parallel [(match_operand:SI 3 "immediate_operand" "i")])) 5377 )) 5378 (const_int 1)))] 5379 "TARGET_SIMD" 5380 { 5381 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3])); 5382 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 5383 } 5384 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 5385) 5386 5387;; vqdmull_n 5388 5389(define_insn "aarch64_sqdmull_n<mode>" 5390 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 5391 (ss_ashift:<VWIDE> 5392 (mult:<VWIDE> 5393 (sign_extend:<VWIDE> 5394 (match_operand:VD_HSI 1 "register_operand" "w")) 5395 (sign_extend:<VWIDE> 5396 (vec_duplicate:VD_HSI 5397 (match_operand:<VEL> 2 "register_operand" "<vwx>"))) 5398 ) 5399 (const_int 1)))] 5400 "TARGET_SIMD" 5401 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]" 5402 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 5403) 5404 5405;; vqdmull2 5406 5407 5408 5409(define_insn "aarch64_sqdmull2<mode>_internal" 5410 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 5411 (ss_ashift:<VWIDE> 5412 (mult:<VWIDE> 5413 (sign_extend:<VWIDE> 5414 (vec_select:<VHALF> 5415 (match_operand:VQ_HSI 1 "register_operand" "w") 5416 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) 5417 (sign_extend:<VWIDE> 5418 (vec_select:<VHALF> 5419 (match_operand:VQ_HSI 2 "register_operand" "w") 5420 (match_dup 3))) 5421 ) 5422 (const_int 1)))] 5423 "TARGET_SIMD" 5424 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 5425 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 5426) 5427 5428(define_expand "aarch64_sqdmull2<mode>" 5429 [(match_operand:<VWIDE> 0 "register_operand") 5430 (match_operand:VQ_HSI 1 "register_operand") 5431 (match_operand:VQ_HSI 2 "register_operand")] 5432 "TARGET_SIMD" 5433{ 5434 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 5435 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1], 5436 operands[2], p)); 5437 DONE; 5438}) 5439 5440;; vqdmull2_lane 5441 5442(define_insn "aarch64_sqdmull2_lane<mode>_internal" 5443 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 5444 (ss_ashift:<VWIDE> 5445 (mult:<VWIDE> 5446 (sign_extend:<VWIDE> 5447 (vec_select:<VHALF> 5448 (match_operand:VQ_HSI 1 "register_operand" "w") 5449 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) 5450 (sign_extend:<VWIDE> 5451 (vec_duplicate:<VHALF> 5452 (vec_select:<VEL> 5453 (match_operand:<VCOND> 2 "register_operand" "<vwx>") 5454 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) 5455 )) 5456 (const_int 1)))] 5457 "TARGET_SIMD" 5458 { 5459 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3])); 5460 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 5461 } 5462 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 5463) 5464 5465(define_insn "aarch64_sqdmull2_laneq<mode>_internal" 5466 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 5467 (ss_ashift:<VWIDE> 5468 (mult:<VWIDE> 5469 (sign_extend:<VWIDE> 5470 (vec_select:<VHALF> 5471 (match_operand:VQ_HSI 1 "register_operand" "w") 5472 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) 5473 (sign_extend:<VWIDE> 5474 (vec_duplicate:<VHALF> 5475 (vec_select:<VEL> 5476 (match_operand:<VCONQ> 2 "register_operand" "<vwx>") 5477 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) 5478 )) 5479 (const_int 1)))] 5480 "TARGET_SIMD" 5481 { 5482 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3])); 5483 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 5484 } 5485 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 5486) 5487 5488(define_expand "aarch64_sqdmull2_lane<mode>" 5489 [(match_operand:<VWIDE> 0 "register_operand") 5490 (match_operand:VQ_HSI 1 "register_operand") 5491 (match_operand:<VCOND> 2 "register_operand") 5492 (match_operand:SI 3 "immediate_operand")] 5493 "TARGET_SIMD" 5494{ 5495 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 5496 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1], 5497 operands[2], operands[3], 5498 p)); 5499 DONE; 5500}) 5501 5502(define_expand "aarch64_sqdmull2_laneq<mode>" 5503 [(match_operand:<VWIDE> 0 "register_operand") 5504 (match_operand:VQ_HSI 1 "register_operand") 5505 (match_operand:<VCONQ> 2 "register_operand") 5506 (match_operand:SI 3 "immediate_operand")] 5507 "TARGET_SIMD" 5508{ 5509 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 5510 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1], 5511 operands[2], operands[3], 5512 p)); 5513 DONE; 5514}) 5515 5516;; vqdmull2_n 5517 5518(define_insn "aarch64_sqdmull2_n<mode>_internal" 5519 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 5520 (ss_ashift:<VWIDE> 5521 (mult:<VWIDE> 5522 (sign_extend:<VWIDE> 5523 (vec_select:<VHALF> 5524 (match_operand:VQ_HSI 1 "register_operand" "w") 5525 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) 5526 (sign_extend:<VWIDE> 5527 (vec_duplicate:<VHALF> 5528 (match_operand:<VEL> 2 "register_operand" "<vwx>"))) 5529 ) 5530 (const_int 1)))] 5531 "TARGET_SIMD" 5532 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]" 5533 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 5534) 5535 5536(define_expand "aarch64_sqdmull2_n<mode>" 5537 [(match_operand:<VWIDE> 0 "register_operand") 5538 (match_operand:VQ_HSI 1 "register_operand") 5539 (match_operand:<VEL> 2 "register_operand")] 5540 "TARGET_SIMD" 5541{ 5542 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 5543 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1], 5544 operands[2], p)); 5545 DONE; 5546}) 5547 5548;; vshl 5549 5550(define_insn "aarch64_<sur>shl<mode>" 5551 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w") 5552 (unspec:VSDQ_I_DI 5553 [(match_operand:VSDQ_I_DI 1 "register_operand" "w") 5554 (match_operand:VSDQ_I_DI 2 "register_operand" "w")] 5555 VSHL))] 5556 "TARGET_SIMD" 5557 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"; 5558 [(set_attr "type" "neon_shift_reg<q>")] 5559) 5560 5561 5562;; vqshl 5563 5564(define_insn "aarch64_<sur>q<r>shl<mode>" 5565 [(set (match_operand:VSDQ_I 0 "register_operand" "=w") 5566 (unspec:VSDQ_I 5567 [(match_operand:VSDQ_I 1 "register_operand" "w") 5568 (match_operand:VSDQ_I 2 "register_operand" "w")] 5569 VQSHL))] 5570 "TARGET_SIMD" 5571 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"; 5572 [(set_attr "type" "neon_sat_shift_reg<q>")] 5573) 5574 5575(define_expand "vec_widen_<sur>shiftl_lo_<mode>" 5576 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 5577 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w") 5578 (match_operand:SI 2 5579 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")] 5580 VSHLL))] 5581 "TARGET_SIMD" 5582 { 5583 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); 5584 emit_insn (gen_aarch64_<sur>shll<mode>_internal (operands[0], operands[1], 5585 p, operands[2])); 5586 DONE; 5587 } 5588) 5589 5590(define_expand "vec_widen_<sur>shiftl_hi_<mode>" 5591 [(set (match_operand:<VWIDE> 0 "register_operand") 5592 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w") 5593 (match_operand:SI 2 5594 "immediate_operand" "i")] 5595 VSHLL))] 5596 "TARGET_SIMD" 5597 { 5598 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 5599 emit_insn (gen_aarch64_<sur>shll2<mode>_internal (operands[0], operands[1], 5600 p, operands[2])); 5601 DONE; 5602 } 5603) 5604 5605;; vshll_n 5606 5607(define_insn "aarch64_<sur>shll<mode>_internal" 5608 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 5609 (unspec:<VWIDE> [(vec_select:<VHALF> 5610 (match_operand:VQW 1 "register_operand" "w") 5611 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")) 5612 (match_operand:SI 3 5613 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")] 5614 VSHLL))] 5615 "TARGET_SIMD" 5616 { 5617 if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode)) 5618 return "shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3"; 5619 else 5620 return "<sur>shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3"; 5621 } 5622 [(set_attr "type" "neon_shift_imm_long")] 5623) 5624 5625(define_insn "aarch64_<sur>shll2<mode>_internal" 5626 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 5627 (unspec:<VWIDE> [(vec_select:<VHALF> 5628 (match_operand:VQW 1 "register_operand" "w") 5629 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")) 5630 (match_operand:SI 3 5631 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")] 5632 VSHLL))] 5633 "TARGET_SIMD" 5634 { 5635 if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode)) 5636 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %3"; 5637 else 5638 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %3"; 5639 } 5640 [(set_attr "type" "neon_shift_imm_long")] 5641) 5642 5643(define_insn "aarch64_<sur>shll_n<mode>" 5644 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 5645 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w") 5646 (match_operand:SI 2 5647 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")] 5648 VSHLL))] 5649 "TARGET_SIMD" 5650 { 5651 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode)) 5652 return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2"; 5653 else 5654 return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2"; 5655 } 5656 [(set_attr "type" "neon_shift_imm_long")] 5657) 5658 5659;; vshll_high_n 5660 5661(define_insn "aarch64_<sur>shll2_n<mode>" 5662 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 5663 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w") 5664 (match_operand:SI 2 "immediate_operand" "i")] 5665 VSHLL))] 5666 "TARGET_SIMD" 5667 { 5668 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode)) 5669 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2"; 5670 else 5671 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2"; 5672 } 5673 [(set_attr "type" "neon_shift_imm_long")] 5674) 5675 5676;; vrshr_n 5677 5678(define_insn "aarch64_<sur>shr_n<mode>" 5679 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w") 5680 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w") 5681 (match_operand:SI 2 5682 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")] 5683 VRSHR_N))] 5684 "TARGET_SIMD" 5685 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2" 5686 [(set_attr "type" "neon_sat_shift_imm<q>")] 5687) 5688 5689;; v(r)sra_n 5690 5691(define_insn "aarch64_<sur>sra_n<mode>" 5692 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w") 5693 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0") 5694 (match_operand:VSDQ_I_DI 2 "register_operand" "w") 5695 (match_operand:SI 3 5696 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")] 5697 VSRA))] 5698 "TARGET_SIMD" 5699 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3" 5700 [(set_attr "type" "neon_shift_acc<q>")] 5701) 5702 5703;; vs<lr>i_n 5704 5705(define_insn "aarch64_<sur>s<lr>i_n<mode>" 5706 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w") 5707 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0") 5708 (match_operand:VSDQ_I_DI 2 "register_operand" "w") 5709 (match_operand:SI 3 5710 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")] 5711 VSLRI))] 5712 "TARGET_SIMD" 5713 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3" 5714 [(set_attr "type" "neon_shift_imm<q>")] 5715) 5716 5717;; vqshl(u) 5718 5719(define_insn "aarch64_<sur>qshl<u>_n<mode>" 5720 [(set (match_operand:VSDQ_I 0 "register_operand" "=w") 5721 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w") 5722 (match_operand:SI 2 5723 "aarch64_simd_shift_imm_<ve_mode>" "i")] 5724 VQSHL_N))] 5725 "TARGET_SIMD" 5726 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2" 5727 [(set_attr "type" "neon_sat_shift_imm<q>")] 5728) 5729 5730 5731;; vq(r)shr(u)n_n 5732 5733(define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>" 5734 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") 5735 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w") 5736 (match_operand:SI 2 5737 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")] 5738 VQSHRN_N))] 5739 "TARGET_SIMD" 5740 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2" 5741 [(set_attr "type" "neon_sat_shift_imm_narrow_q")] 5742) 5743 5744(define_insn "aarch64_<sur>q<r>shr<u>n2_n<mode>" 5745 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") 5746 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0") 5747 (match_operand:VQN 2 "register_operand" "w") 5748 (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")] 5749 VQSHRN_N))] 5750 "TARGET_SIMD" 5751 "<sur>q<r>shr<u>n2\\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3" 5752 [(set_attr "type" "neon_sat_shift_imm_narrow_q")] 5753) 5754 5755 5756;; cm(eq|ge|gt|lt|le) 5757;; Note, we have constraints for Dz and Z as different expanders 5758;; have different ideas of what should be passed to this pattern. 5759 5760(define_insn "aarch64_cm<optab><mode>" 5761 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w") 5762 (neg:<V_INT_EQUIV> 5763 (COMPARISONS:<V_INT_EQUIV> 5764 (match_operand:VDQ_I 1 "register_operand" "w,w") 5765 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz") 5766 )))] 5767 "TARGET_SIMD" 5768 "@ 5769 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype> 5770 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0" 5771 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")] 5772) 5773 5774(define_insn_and_split "aarch64_cm<optab>di" 5775 [(set (match_operand:DI 0 "register_operand" "=w,w,r") 5776 (neg:DI 5777 (COMPARISONS:DI 5778 (match_operand:DI 1 "register_operand" "w,w,r") 5779 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r") 5780 ))) 5781 (clobber (reg:CC CC_REGNUM))] 5782 "TARGET_SIMD" 5783 "#" 5784 "&& reload_completed" 5785 [(set (match_operand:DI 0 "register_operand") 5786 (neg:DI 5787 (COMPARISONS:DI 5788 (match_operand:DI 1 "register_operand") 5789 (match_operand:DI 2 "aarch64_simd_reg_or_zero") 5790 )))] 5791 { 5792 /* If we are in the general purpose register file, 5793 we split to a sequence of comparison and store. */ 5794 if (GP_REGNUM_P (REGNO (operands[0])) 5795 && GP_REGNUM_P (REGNO (operands[1]))) 5796 { 5797 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]); 5798 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]); 5799 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]); 5800 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); 5801 DONE; 5802 } 5803 /* Otherwise, we expand to a similar pattern which does not 5804 clobber CC_REGNUM. */ 5805 } 5806 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")] 5807) 5808 5809(define_insn "*aarch64_cm<optab>di" 5810 [(set (match_operand:DI 0 "register_operand" "=w,w") 5811 (neg:DI 5812 (COMPARISONS:DI 5813 (match_operand:DI 1 "register_operand" "w,w") 5814 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz") 5815 )))] 5816 "TARGET_SIMD && reload_completed" 5817 "@ 5818 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2> 5819 cm<optab>\t%d0, %d1, #0" 5820 [(set_attr "type" "neon_compare, neon_compare_zero")] 5821) 5822 5823;; cm(hs|hi) 5824 5825(define_insn "aarch64_cm<optab><mode>" 5826 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w") 5827 (neg:<V_INT_EQUIV> 5828 (UCOMPARISONS:<V_INT_EQUIV> 5829 (match_operand:VDQ_I 1 "register_operand" "w") 5830 (match_operand:VDQ_I 2 "register_operand" "w") 5831 )))] 5832 "TARGET_SIMD" 5833 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>" 5834 [(set_attr "type" "neon_compare<q>")] 5835) 5836 5837(define_insn_and_split "aarch64_cm<optab>di" 5838 [(set (match_operand:DI 0 "register_operand" "=w,r") 5839 (neg:DI 5840 (UCOMPARISONS:DI 5841 (match_operand:DI 1 "register_operand" "w,r") 5842 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r") 5843 ))) 5844 (clobber (reg:CC CC_REGNUM))] 5845 "TARGET_SIMD" 5846 "#" 5847 "&& reload_completed" 5848 [(set (match_operand:DI 0 "register_operand") 5849 (neg:DI 5850 (UCOMPARISONS:DI 5851 (match_operand:DI 1 "register_operand") 5852 (match_operand:DI 2 "aarch64_simd_reg_or_zero") 5853 )))] 5854 { 5855 /* If we are in the general purpose register file, 5856 we split to a sequence of comparison and store. */ 5857 if (GP_REGNUM_P (REGNO (operands[0])) 5858 && GP_REGNUM_P (REGNO (operands[1]))) 5859 { 5860 machine_mode mode = CCmode; 5861 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]); 5862 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]); 5863 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); 5864 DONE; 5865 } 5866 /* Otherwise, we expand to a similar pattern which does not 5867 clobber CC_REGNUM. */ 5868 } 5869 [(set_attr "type" "neon_compare,multiple")] 5870) 5871 5872(define_insn "*aarch64_cm<optab>di" 5873 [(set (match_operand:DI 0 "register_operand" "=w") 5874 (neg:DI 5875 (UCOMPARISONS:DI 5876 (match_operand:DI 1 "register_operand" "w") 5877 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w") 5878 )))] 5879 "TARGET_SIMD && reload_completed" 5880 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>" 5881 [(set_attr "type" "neon_compare")] 5882) 5883 5884;; cmtst 5885 5886;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst, 5887;; we don't have any insns using ne, and aarch64_vcond outputs 5888;; not (neg (eq (and x y) 0)) 5889;; which is rewritten by simplify_rtx as 5890;; plus (eq (and x y) 0) -1. 5891 5892(define_insn "aarch64_cmtst<mode>" 5893 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w") 5894 (plus:<V_INT_EQUIV> 5895 (eq:<V_INT_EQUIV> 5896 (and:VDQ_I 5897 (match_operand:VDQ_I 1 "register_operand" "w") 5898 (match_operand:VDQ_I 2 "register_operand" "w")) 5899 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero")) 5900 (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one"))) 5901 ] 5902 "TARGET_SIMD" 5903 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 5904 [(set_attr "type" "neon_tst<q>")] 5905) 5906 5907(define_insn_and_split "aarch64_cmtstdi" 5908 [(set (match_operand:DI 0 "register_operand" "=w,r") 5909 (neg:DI 5910 (ne:DI 5911 (and:DI 5912 (match_operand:DI 1 "register_operand" "w,r") 5913 (match_operand:DI 2 "register_operand" "w,r")) 5914 (const_int 0)))) 5915 (clobber (reg:CC CC_REGNUM))] 5916 "TARGET_SIMD" 5917 "#" 5918 "&& reload_completed" 5919 [(set (match_operand:DI 0 "register_operand") 5920 (neg:DI 5921 (ne:DI 5922 (and:DI 5923 (match_operand:DI 1 "register_operand") 5924 (match_operand:DI 2 "register_operand")) 5925 (const_int 0))))] 5926 { 5927 /* If we are in the general purpose register file, 5928 we split to a sequence of comparison and store. */ 5929 if (GP_REGNUM_P (REGNO (operands[0])) 5930 && GP_REGNUM_P (REGNO (operands[1]))) 5931 { 5932 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]); 5933 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx); 5934 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx); 5935 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx); 5936 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); 5937 DONE; 5938 } 5939 /* Otherwise, we expand to a similar pattern which does not 5940 clobber CC_REGNUM. */ 5941 } 5942 [(set_attr "type" "neon_tst,multiple")] 5943) 5944 5945(define_insn "*aarch64_cmtstdi" 5946 [(set (match_operand:DI 0 "register_operand" "=w") 5947 (neg:DI 5948 (ne:DI 5949 (and:DI 5950 (match_operand:DI 1 "register_operand" "w") 5951 (match_operand:DI 2 "register_operand" "w")) 5952 (const_int 0))))] 5953 "TARGET_SIMD" 5954 "cmtst\t%d0, %d1, %d2" 5955 [(set_attr "type" "neon_tst")] 5956) 5957 5958;; fcm(eq|ge|gt|le|lt) 5959 5960(define_insn "aarch64_cm<optab><mode>" 5961 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w") 5962 (neg:<V_INT_EQUIV> 5963 (COMPARISONS:<V_INT_EQUIV> 5964 (match_operand:VHSDF_HSDF 1 "register_operand" "w,w") 5965 (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz") 5966 )))] 5967 "TARGET_SIMD" 5968 "@ 5969 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype> 5970 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0" 5971 [(set_attr "type" "neon_fp_compare_<stype><q>")] 5972) 5973 5974;; fac(ge|gt) 5975;; Note we can also handle what would be fac(le|lt) by 5976;; generating fac(ge|gt). 5977 5978(define_insn "aarch64_fac<optab><mode>" 5979 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w") 5980 (neg:<V_INT_EQUIV> 5981 (FAC_COMPARISONS:<V_INT_EQUIV> 5982 (abs:VHSDF_HSDF 5983 (match_operand:VHSDF_HSDF 1 "register_operand" "w")) 5984 (abs:VHSDF_HSDF 5985 (match_operand:VHSDF_HSDF 2 "register_operand" "w")) 5986 )))] 5987 "TARGET_SIMD" 5988 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>" 5989 [(set_attr "type" "neon_fp_compare_<stype><q>")] 5990) 5991 5992;; addp 5993 5994(define_insn "aarch64_addp<mode>" 5995 [(set (match_operand:VD_BHSI 0 "register_operand" "=w") 5996 (unspec:VD_BHSI 5997 [(match_operand:VD_BHSI 1 "register_operand" "w") 5998 (match_operand:VD_BHSI 2 "register_operand" "w")] 5999 UNSPEC_ADDP))] 6000 "TARGET_SIMD" 6001 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 6002 [(set_attr "type" "neon_reduc_add<q>")] 6003) 6004 6005(define_insn "aarch64_addpdi" 6006 [(set (match_operand:DI 0 "register_operand" "=w") 6007 (unspec:DI 6008 [(match_operand:V2DI 1 "register_operand" "w")] 6009 UNSPEC_ADDP))] 6010 "TARGET_SIMD" 6011 "addp\t%d0, %1.2d" 6012 [(set_attr "type" "neon_reduc_add")] 6013) 6014 6015;; sqrt 6016 6017(define_expand "sqrt<mode>2" 6018 [(set (match_operand:VHSDF 0 "register_operand") 6019 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand")))] 6020 "TARGET_SIMD" 6021{ 6022 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false)) 6023 DONE; 6024}) 6025 6026(define_insn "*sqrt<mode>2" 6027 [(set (match_operand:VHSDF 0 "register_operand" "=w") 6028 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))] 6029 "TARGET_SIMD" 6030 "fsqrt\\t%0.<Vtype>, %1.<Vtype>" 6031 [(set_attr "type" "neon_fp_sqrt_<stype><q>")] 6032) 6033 6034;; Patterns for vector struct loads and stores. 6035 6036(define_insn "aarch64_simd_ld2<mode>" 6037 [(set (match_operand:OI 0 "register_operand" "=w") 6038 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv") 6039 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6040 UNSPEC_LD2))] 6041 "TARGET_SIMD" 6042 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1" 6043 [(set_attr "type" "neon_load2_2reg<q>")] 6044) 6045 6046(define_insn "aarch64_simd_ld2r<mode>" 6047 [(set (match_operand:OI 0 "register_operand" "=w") 6048 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 6049 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ] 6050 UNSPEC_LD2_DUP))] 6051 "TARGET_SIMD" 6052 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1" 6053 [(set_attr "type" "neon_load2_all_lanes<q>")] 6054) 6055 6056(define_insn "aarch64_vec_load_lanesoi_lane<mode>" 6057 [(set (match_operand:OI 0 "register_operand" "=w") 6058 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 6059 (match_operand:OI 2 "register_operand" "0") 6060 (match_operand:SI 3 "immediate_operand" "i") 6061 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ] 6062 UNSPEC_LD2_LANE))] 6063 "TARGET_SIMD" 6064 { 6065 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3])); 6066 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1"; 6067 } 6068 [(set_attr "type" "neon_load2_one_lane")] 6069) 6070 6071(define_expand "vec_load_lanesoi<mode>" 6072 [(set (match_operand:OI 0 "register_operand") 6073 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand") 6074 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6075 UNSPEC_LD2))] 6076 "TARGET_SIMD" 6077{ 6078 if (BYTES_BIG_ENDIAN) 6079 { 6080 rtx tmp = gen_reg_rtx (OImode); 6081 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>); 6082 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1])); 6083 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask)); 6084 } 6085 else 6086 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1])); 6087 DONE; 6088}) 6089 6090(define_insn "aarch64_simd_st2<mode>" 6091 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv") 6092 (unspec:OI [(match_operand:OI 1 "register_operand" "w") 6093 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6094 UNSPEC_ST2))] 6095 "TARGET_SIMD" 6096 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0" 6097 [(set_attr "type" "neon_store2_2reg<q>")] 6098) 6099 6100;; RTL uses GCC vector extension indices, so flip only for assembly. 6101(define_insn "aarch64_vec_store_lanesoi_lane<mode>" 6102 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") 6103 (unspec:BLK [(match_operand:OI 1 "register_operand" "w") 6104 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) 6105 (match_operand:SI 2 "immediate_operand" "i")] 6106 UNSPEC_ST2_LANE))] 6107 "TARGET_SIMD" 6108 { 6109 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); 6110 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0"; 6111 } 6112 [(set_attr "type" "neon_store2_one_lane<q>")] 6113) 6114 6115(define_expand "vec_store_lanesoi<mode>" 6116 [(set (match_operand:OI 0 "aarch64_simd_struct_operand") 6117 (unspec:OI [(match_operand:OI 1 "register_operand") 6118 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6119 UNSPEC_ST2))] 6120 "TARGET_SIMD" 6121{ 6122 if (BYTES_BIG_ENDIAN) 6123 { 6124 rtx tmp = gen_reg_rtx (OImode); 6125 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>); 6126 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask)); 6127 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp)); 6128 } 6129 else 6130 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1])); 6131 DONE; 6132}) 6133 6134(define_insn "aarch64_simd_ld3<mode>" 6135 [(set (match_operand:CI 0 "register_operand" "=w") 6136 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv") 6137 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6138 UNSPEC_LD3))] 6139 "TARGET_SIMD" 6140 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1" 6141 [(set_attr "type" "neon_load3_3reg<q>")] 6142) 6143 6144(define_insn "aarch64_simd_ld3r<mode>" 6145 [(set (match_operand:CI 0 "register_operand" "=w") 6146 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 6147 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ] 6148 UNSPEC_LD3_DUP))] 6149 "TARGET_SIMD" 6150 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1" 6151 [(set_attr "type" "neon_load3_all_lanes<q>")] 6152) 6153 6154(define_insn "aarch64_vec_load_lanesci_lane<mode>" 6155 [(set (match_operand:CI 0 "register_operand" "=w") 6156 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 6157 (match_operand:CI 2 "register_operand" "0") 6158 (match_operand:SI 3 "immediate_operand" "i") 6159 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6160 UNSPEC_LD3_LANE))] 6161 "TARGET_SIMD" 6162{ 6163 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3])); 6164 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1"; 6165} 6166 [(set_attr "type" "neon_load3_one_lane")] 6167) 6168 6169(define_expand "vec_load_lanesci<mode>" 6170 [(set (match_operand:CI 0 "register_operand") 6171 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand") 6172 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6173 UNSPEC_LD3))] 6174 "TARGET_SIMD" 6175{ 6176 if (BYTES_BIG_ENDIAN) 6177 { 6178 rtx tmp = gen_reg_rtx (CImode); 6179 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>); 6180 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1])); 6181 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask)); 6182 } 6183 else 6184 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1])); 6185 DONE; 6186}) 6187 6188(define_insn "aarch64_simd_st3<mode>" 6189 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv") 6190 (unspec:CI [(match_operand:CI 1 "register_operand" "w") 6191 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6192 UNSPEC_ST3))] 6193 "TARGET_SIMD" 6194 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0" 6195 [(set_attr "type" "neon_store3_3reg<q>")] 6196) 6197 6198;; RTL uses GCC vector extension indices, so flip only for assembly. 6199(define_insn "aarch64_vec_store_lanesci_lane<mode>" 6200 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") 6201 (unspec:BLK [(match_operand:CI 1 "register_operand" "w") 6202 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) 6203 (match_operand:SI 2 "immediate_operand" "i")] 6204 UNSPEC_ST3_LANE))] 6205 "TARGET_SIMD" 6206 { 6207 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); 6208 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0"; 6209 } 6210 [(set_attr "type" "neon_store3_one_lane<q>")] 6211) 6212 6213(define_expand "vec_store_lanesci<mode>" 6214 [(set (match_operand:CI 0 "aarch64_simd_struct_operand") 6215 (unspec:CI [(match_operand:CI 1 "register_operand") 6216 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6217 UNSPEC_ST3))] 6218 "TARGET_SIMD" 6219{ 6220 if (BYTES_BIG_ENDIAN) 6221 { 6222 rtx tmp = gen_reg_rtx (CImode); 6223 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>); 6224 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask)); 6225 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp)); 6226 } 6227 else 6228 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1])); 6229 DONE; 6230}) 6231 6232(define_insn "aarch64_simd_ld4<mode>" 6233 [(set (match_operand:XI 0 "register_operand" "=w") 6234 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv") 6235 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6236 UNSPEC_LD4))] 6237 "TARGET_SIMD" 6238 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1" 6239 [(set_attr "type" "neon_load4_4reg<q>")] 6240) 6241 6242(define_insn "aarch64_simd_ld4r<mode>" 6243 [(set (match_operand:XI 0 "register_operand" "=w") 6244 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 6245 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ] 6246 UNSPEC_LD4_DUP))] 6247 "TARGET_SIMD" 6248 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1" 6249 [(set_attr "type" "neon_load4_all_lanes<q>")] 6250) 6251 6252(define_insn "aarch64_vec_load_lanesxi_lane<mode>" 6253 [(set (match_operand:XI 0 "register_operand" "=w") 6254 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 6255 (match_operand:XI 2 "register_operand" "0") 6256 (match_operand:SI 3 "immediate_operand" "i") 6257 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6258 UNSPEC_LD4_LANE))] 6259 "TARGET_SIMD" 6260{ 6261 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3])); 6262 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1"; 6263} 6264 [(set_attr "type" "neon_load4_one_lane")] 6265) 6266 6267(define_expand "vec_load_lanesxi<mode>" 6268 [(set (match_operand:XI 0 "register_operand") 6269 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand") 6270 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6271 UNSPEC_LD4))] 6272 "TARGET_SIMD" 6273{ 6274 if (BYTES_BIG_ENDIAN) 6275 { 6276 rtx tmp = gen_reg_rtx (XImode); 6277 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>); 6278 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1])); 6279 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask)); 6280 } 6281 else 6282 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1])); 6283 DONE; 6284}) 6285 6286(define_insn "aarch64_simd_st4<mode>" 6287 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv") 6288 (unspec:XI [(match_operand:XI 1 "register_operand" "w") 6289 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6290 UNSPEC_ST4))] 6291 "TARGET_SIMD" 6292 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0" 6293 [(set_attr "type" "neon_store4_4reg<q>")] 6294) 6295 6296;; RTL uses GCC vector extension indices, so flip only for assembly. 6297(define_insn "aarch64_vec_store_lanesxi_lane<mode>" 6298 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") 6299 (unspec:BLK [(match_operand:XI 1 "register_operand" "w") 6300 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) 6301 (match_operand:SI 2 "immediate_operand" "i")] 6302 UNSPEC_ST4_LANE))] 6303 "TARGET_SIMD" 6304 { 6305 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); 6306 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0"; 6307 } 6308 [(set_attr "type" "neon_store4_one_lane<q>")] 6309) 6310 6311(define_expand "vec_store_lanesxi<mode>" 6312 [(set (match_operand:XI 0 "aarch64_simd_struct_operand") 6313 (unspec:XI [(match_operand:XI 1 "register_operand") 6314 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6315 UNSPEC_ST4))] 6316 "TARGET_SIMD" 6317{ 6318 if (BYTES_BIG_ENDIAN) 6319 { 6320 rtx tmp = gen_reg_rtx (XImode); 6321 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>); 6322 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask)); 6323 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp)); 6324 } 6325 else 6326 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1])); 6327 DONE; 6328}) 6329 6330(define_insn_and_split "aarch64_rev_reglist<mode>" 6331[(set (match_operand:VSTRUCT 0 "register_operand" "=&w") 6332 (unspec:VSTRUCT 6333 [(match_operand:VSTRUCT 1 "register_operand" "w") 6334 (match_operand:V16QI 2 "register_operand" "w")] 6335 UNSPEC_REV_REGLIST))] 6336 "TARGET_SIMD" 6337 "#" 6338 "&& reload_completed" 6339 [(const_int 0)] 6340{ 6341 int i; 6342 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG; 6343 for (i = 0; i < nregs; i++) 6344 { 6345 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i); 6346 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i); 6347 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2])); 6348 } 6349 DONE; 6350} 6351 [(set_attr "type" "neon_tbl1_q") 6352 (set_attr "length" "<insn_count>")] 6353) 6354 6355;; Reload patterns for AdvSIMD register list operands. 6356 6357(define_expand "mov<mode>" 6358 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand") 6359 (match_operand:VSTRUCT 1 "general_operand"))] 6360 "TARGET_SIMD" 6361{ 6362 if (can_create_pseudo_p ()) 6363 { 6364 if (GET_CODE (operands[0]) != REG) 6365 operands[1] = force_reg (<MODE>mode, operands[1]); 6366 } 6367}) 6368 6369 6370(define_expand "aarch64_ld1x3<VALLDIF:mode>" 6371 [(match_operand:CI 0 "register_operand") 6372 (match_operand:DI 1 "register_operand") 6373 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6374 "TARGET_SIMD" 6375{ 6376 rtx mem = gen_rtx_MEM (CImode, operands[1]); 6377 emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[0], mem)); 6378 DONE; 6379}) 6380 6381(define_insn "aarch64_ld1_x3_<mode>" 6382 [(set (match_operand:CI 0 "register_operand" "=w") 6383 (unspec:CI 6384 [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv") 6385 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))] 6386 "TARGET_SIMD" 6387 "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1" 6388 [(set_attr "type" "neon_load1_3reg<q>")] 6389) 6390 6391(define_expand "aarch64_ld1x4<VALLDIF:mode>" 6392 [(match_operand:XI 0 "register_operand" "=w") 6393 (match_operand:DI 1 "register_operand" "r") 6394 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6395 "TARGET_SIMD" 6396{ 6397 rtx mem = gen_rtx_MEM (XImode, operands[1]); 6398 emit_insn (gen_aarch64_ld1_x4_<VALLDIF:mode> (operands[0], mem)); 6399 DONE; 6400}) 6401 6402(define_insn "aarch64_ld1_x4_<mode>" 6403 [(set (match_operand:XI 0 "register_operand" "=w") 6404 (unspec:XI 6405 [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv") 6406 (unspec:VALLDIF [(const_int 4)] UNSPEC_VSTRUCTDUMMY)] 6407 UNSPEC_LD1))] 6408 "TARGET_SIMD" 6409 "ld1\\t{%S0.<Vtype> - %V0.<Vtype>}, %1" 6410 [(set_attr "type" "neon_load1_4reg<q>")] 6411) 6412 6413(define_expand "aarch64_st1x2<VALLDIF:mode>" 6414 [(match_operand:DI 0 "register_operand") 6415 (match_operand:OI 1 "register_operand") 6416 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6417 "TARGET_SIMD" 6418{ 6419 rtx mem = gen_rtx_MEM (OImode, operands[0]); 6420 emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[1])); 6421 DONE; 6422}) 6423 6424(define_insn "aarch64_st1_x2_<mode>" 6425 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv") 6426 (unspec:OI 6427 [(match_operand:OI 1 "register_operand" "w") 6428 (unspec:VALLDIF [(const_int 2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))] 6429 "TARGET_SIMD" 6430 "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0" 6431 [(set_attr "type" "neon_store1_2reg<q>")] 6432) 6433 6434(define_expand "aarch64_st1x3<VALLDIF:mode>" 6435 [(match_operand:DI 0 "register_operand") 6436 (match_operand:CI 1 "register_operand") 6437 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6438 "TARGET_SIMD" 6439{ 6440 rtx mem = gen_rtx_MEM (CImode, operands[0]); 6441 emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[1])); 6442 DONE; 6443}) 6444 6445(define_insn "aarch64_st1_x3_<mode>" 6446 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv") 6447 (unspec:CI 6448 [(match_operand:CI 1 "register_operand" "w") 6449 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))] 6450 "TARGET_SIMD" 6451 "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0" 6452 [(set_attr "type" "neon_store1_3reg<q>")] 6453) 6454 6455(define_expand "aarch64_st1x4<VALLDIF:mode>" 6456 [(match_operand:DI 0 "register_operand" "") 6457 (match_operand:XI 1 "register_operand" "") 6458 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6459 "TARGET_SIMD" 6460{ 6461 rtx mem = gen_rtx_MEM (XImode, operands[0]); 6462 emit_insn (gen_aarch64_st1_x4_<VALLDIF:mode> (mem, operands[1])); 6463 DONE; 6464}) 6465 6466(define_insn "aarch64_st1_x4_<mode>" 6467 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv") 6468 (unspec:XI 6469 [(match_operand:XI 1 "register_operand" "w") 6470 (unspec:VALLDIF [(const_int 4)] UNSPEC_VSTRUCTDUMMY)] 6471 UNSPEC_ST1))] 6472 "TARGET_SIMD" 6473 "st1\\t{%S1.<Vtype> - %V1.<Vtype>}, %0" 6474 [(set_attr "type" "neon_store1_4reg<q>")] 6475) 6476 6477(define_insn "*aarch64_mov<mode>" 6478 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w") 6479 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))] 6480 "TARGET_SIMD && !BYTES_BIG_ENDIAN 6481 && (register_operand (operands[0], <MODE>mode) 6482 || register_operand (operands[1], <MODE>mode))" 6483 "@ 6484 # 6485 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0 6486 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1" 6487 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\ 6488 neon_load<nregs>_<nregs>reg_q") 6489 (set_attr "length" "<insn_count>,4,4")] 6490) 6491 6492(define_insn "aarch64_be_ld1<mode>" 6493 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w") 6494 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 6495 "aarch64_simd_struct_operand" "Utv")] 6496 UNSPEC_LD1))] 6497 "TARGET_SIMD" 6498 "ld1\\t{%0<Vmtype>}, %1" 6499 [(set_attr "type" "neon_load1_1reg<q>")] 6500) 6501 6502(define_insn "aarch64_be_st1<mode>" 6503 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv") 6504 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")] 6505 UNSPEC_ST1))] 6506 "TARGET_SIMD" 6507 "st1\\t{%1<Vmtype>}, %0" 6508 [(set_attr "type" "neon_store1_1reg<q>")] 6509) 6510 6511(define_insn "*aarch64_be_movoi" 6512 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w") 6513 (match_operand:OI 1 "general_operand" " w,w,m"))] 6514 "TARGET_SIMD && BYTES_BIG_ENDIAN 6515 && (register_operand (operands[0], OImode) 6516 || register_operand (operands[1], OImode))" 6517 "@ 6518 # 6519 stp\\t%q1, %R1, %0 6520 ldp\\t%q0, %R0, %1" 6521 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q") 6522 (set_attr "length" "8,4,4")] 6523) 6524 6525(define_insn "*aarch64_be_movci" 6526 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w") 6527 (match_operand:CI 1 "general_operand" " w,w,o"))] 6528 "TARGET_SIMD && BYTES_BIG_ENDIAN 6529 && (register_operand (operands[0], CImode) 6530 || register_operand (operands[1], CImode))" 6531 "#" 6532 [(set_attr "type" "multiple") 6533 (set_attr "length" "12,4,4")] 6534) 6535 6536(define_insn "*aarch64_be_movxi" 6537 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w") 6538 (match_operand:XI 1 "general_operand" " w,w,o"))] 6539 "TARGET_SIMD && BYTES_BIG_ENDIAN 6540 && (register_operand (operands[0], XImode) 6541 || register_operand (operands[1], XImode))" 6542 "#" 6543 [(set_attr "type" "multiple") 6544 (set_attr "length" "16,4,4")] 6545) 6546 6547(define_split 6548 [(set (match_operand:OI 0 "register_operand") 6549 (match_operand:OI 1 "register_operand"))] 6550 "TARGET_SIMD && reload_completed" 6551 [(const_int 0)] 6552{ 6553 aarch64_simd_emit_reg_reg_move (operands, TImode, 2); 6554 DONE; 6555}) 6556 6557(define_split 6558 [(set (match_operand:CI 0 "nonimmediate_operand") 6559 (match_operand:CI 1 "general_operand"))] 6560 "TARGET_SIMD && reload_completed" 6561 [(const_int 0)] 6562{ 6563 if (register_operand (operands[0], CImode) 6564 && register_operand (operands[1], CImode)) 6565 { 6566 aarch64_simd_emit_reg_reg_move (operands, TImode, 3); 6567 DONE; 6568 } 6569 else if (BYTES_BIG_ENDIAN) 6570 { 6571 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0), 6572 simplify_gen_subreg (OImode, operands[1], CImode, 0)); 6573 emit_move_insn (gen_lowpart (V16QImode, 6574 simplify_gen_subreg (TImode, operands[0], 6575 CImode, 32)), 6576 gen_lowpart (V16QImode, 6577 simplify_gen_subreg (TImode, operands[1], 6578 CImode, 32))); 6579 DONE; 6580 } 6581 else 6582 FAIL; 6583}) 6584 6585(define_split 6586 [(set (match_operand:XI 0 "nonimmediate_operand") 6587 (match_operand:XI 1 "general_operand"))] 6588 "TARGET_SIMD && reload_completed" 6589 [(const_int 0)] 6590{ 6591 if (register_operand (operands[0], XImode) 6592 && register_operand (operands[1], XImode)) 6593 { 6594 aarch64_simd_emit_reg_reg_move (operands, TImode, 4); 6595 DONE; 6596 } 6597 else if (BYTES_BIG_ENDIAN) 6598 { 6599 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0), 6600 simplify_gen_subreg (OImode, operands[1], XImode, 0)); 6601 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32), 6602 simplify_gen_subreg (OImode, operands[1], XImode, 32)); 6603 DONE; 6604 } 6605 else 6606 FAIL; 6607}) 6608 6609(define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>" 6610 [(match_operand:VSTRUCT 0 "register_operand") 6611 (match_operand:DI 1 "register_operand") 6612 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6613 "TARGET_SIMD" 6614{ 6615 rtx mem = gen_rtx_MEM (BLKmode, operands[1]); 6616 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode)) 6617 * <VSTRUCT:nregs>); 6618 6619 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0], 6620 mem)); 6621 DONE; 6622}) 6623 6624(define_insn "aarch64_ld2<mode>_dreg" 6625 [(set (match_operand:OI 0 "register_operand" "=w") 6626 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 6627 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6628 UNSPEC_LD2_DREG))] 6629 "TARGET_SIMD" 6630 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1" 6631 [(set_attr "type" "neon_load2_2reg<q>")] 6632) 6633 6634(define_insn "aarch64_ld2<mode>_dreg" 6635 [(set (match_operand:OI 0 "register_operand" "=w") 6636 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 6637 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6638 UNSPEC_LD2_DREG))] 6639 "TARGET_SIMD" 6640 "ld1\\t{%S0.1d - %T0.1d}, %1" 6641 [(set_attr "type" "neon_load1_2reg<q>")] 6642) 6643 6644(define_insn "aarch64_ld3<mode>_dreg" 6645 [(set (match_operand:CI 0 "register_operand" "=w") 6646 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 6647 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6648 UNSPEC_LD3_DREG))] 6649 "TARGET_SIMD" 6650 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1" 6651 [(set_attr "type" "neon_load3_3reg<q>")] 6652) 6653 6654(define_insn "aarch64_ld3<mode>_dreg" 6655 [(set (match_operand:CI 0 "register_operand" "=w") 6656 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 6657 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6658 UNSPEC_LD3_DREG))] 6659 "TARGET_SIMD" 6660 "ld1\\t{%S0.1d - %U0.1d}, %1" 6661 [(set_attr "type" "neon_load1_3reg<q>")] 6662) 6663 6664(define_insn "aarch64_ld4<mode>_dreg" 6665 [(set (match_operand:XI 0 "register_operand" "=w") 6666 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 6667 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6668 UNSPEC_LD4_DREG))] 6669 "TARGET_SIMD" 6670 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1" 6671 [(set_attr "type" "neon_load4_4reg<q>")] 6672) 6673 6674(define_insn "aarch64_ld4<mode>_dreg" 6675 [(set (match_operand:XI 0 "register_operand" "=w") 6676 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 6677 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6678 UNSPEC_LD4_DREG))] 6679 "TARGET_SIMD" 6680 "ld1\\t{%S0.1d - %V0.1d}, %1" 6681 [(set_attr "type" "neon_load1_4reg<q>")] 6682) 6683 6684(define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>" 6685 [(match_operand:VSTRUCT 0 "register_operand") 6686 (match_operand:DI 1 "register_operand") 6687 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6688 "TARGET_SIMD" 6689{ 6690 rtx mem = gen_rtx_MEM (BLKmode, operands[1]); 6691 set_mem_size (mem, <VSTRUCT:nregs> * 8); 6692 6693 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem)); 6694 DONE; 6695}) 6696 6697(define_expand "aarch64_ld1<VALL_F16:mode>" 6698 [(match_operand:VALL_F16 0 "register_operand") 6699 (match_operand:DI 1 "register_operand")] 6700 "TARGET_SIMD" 6701{ 6702 machine_mode mode = <VALL_F16:MODE>mode; 6703 rtx mem = gen_rtx_MEM (mode, operands[1]); 6704 6705 if (BYTES_BIG_ENDIAN) 6706 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem)); 6707 else 6708 emit_move_insn (operands[0], mem); 6709 DONE; 6710}) 6711 6712(define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>" 6713 [(match_operand:VSTRUCT 0 "register_operand") 6714 (match_operand:DI 1 "register_operand") 6715 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6716 "TARGET_SIMD" 6717{ 6718 machine_mode mode = <VSTRUCT:MODE>mode; 6719 rtx mem = gen_rtx_MEM (mode, operands[1]); 6720 6721 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem)); 6722 DONE; 6723}) 6724 6725(define_expand "aarch64_ld1x2<VQ:mode>" 6726 [(match_operand:OI 0 "register_operand") 6727 (match_operand:DI 1 "register_operand") 6728 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6729 "TARGET_SIMD" 6730{ 6731 machine_mode mode = OImode; 6732 rtx mem = gen_rtx_MEM (mode, operands[1]); 6733 6734 emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem)); 6735 DONE; 6736}) 6737 6738(define_expand "aarch64_ld1x2<VDC:mode>" 6739 [(match_operand:OI 0 "register_operand") 6740 (match_operand:DI 1 "register_operand") 6741 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6742 "TARGET_SIMD" 6743{ 6744 machine_mode mode = OImode; 6745 rtx mem = gen_rtx_MEM (mode, operands[1]); 6746 6747 emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem)); 6748 DONE; 6749}) 6750 6751 6752(define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>" 6753 [(match_operand:VSTRUCT 0 "register_operand") 6754 (match_operand:DI 1 "register_operand") 6755 (match_operand:VSTRUCT 2 "register_operand") 6756 (match_operand:SI 3 "immediate_operand") 6757 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6758 "TARGET_SIMD" 6759{ 6760 rtx mem = gen_rtx_MEM (BLKmode, operands[1]); 6761 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode)) 6762 * <VSTRUCT:nregs>); 6763 6764 aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL); 6765 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> ( 6766 operands[0], mem, operands[2], operands[3])); 6767 DONE; 6768}) 6769 6770;; Expanders for builtins to extract vector registers from large 6771;; opaque integer modes. 6772 6773;; D-register list. 6774 6775(define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>" 6776 [(match_operand:VDC 0 "register_operand") 6777 (match_operand:VSTRUCT 1 "register_operand") 6778 (match_operand:SI 2 "immediate_operand")] 6779 "TARGET_SIMD" 6780{ 6781 int part = INTVAL (operands[2]); 6782 rtx temp = gen_reg_rtx (<VDC:VDBL>mode); 6783 int offset = part * 16; 6784 6785 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset)); 6786 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp)); 6787 DONE; 6788}) 6789 6790;; Q-register list. 6791 6792(define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>" 6793 [(match_operand:VQ 0 "register_operand") 6794 (match_operand:VSTRUCT 1 "register_operand") 6795 (match_operand:SI 2 "immediate_operand")] 6796 "TARGET_SIMD" 6797{ 6798 int part = INTVAL (operands[2]); 6799 int offset = part * 16; 6800 6801 emit_move_insn (operands[0], 6802 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset)); 6803 DONE; 6804}) 6805 6806;; Permuted-store expanders for neon intrinsics. 6807 6808;; Permute instructions 6809 6810;; vec_perm support 6811 6812(define_expand "vec_perm<mode>" 6813 [(match_operand:VB 0 "register_operand") 6814 (match_operand:VB 1 "register_operand") 6815 (match_operand:VB 2 "register_operand") 6816 (match_operand:VB 3 "register_operand")] 6817 "TARGET_SIMD" 6818{ 6819 aarch64_expand_vec_perm (operands[0], operands[1], 6820 operands[2], operands[3], <nunits>); 6821 DONE; 6822}) 6823 6824(define_insn "aarch64_tbl1<mode>" 6825 [(set (match_operand:VB 0 "register_operand" "=w") 6826 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w") 6827 (match_operand:VB 2 "register_operand" "w")] 6828 UNSPEC_TBL))] 6829 "TARGET_SIMD" 6830 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>" 6831 [(set_attr "type" "neon_tbl1<q>")] 6832) 6833 6834;; Two source registers. 6835 6836(define_insn "aarch64_tbl2v16qi" 6837 [(set (match_operand:V16QI 0 "register_operand" "=w") 6838 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w") 6839 (match_operand:V16QI 2 "register_operand" "w")] 6840 UNSPEC_TBL))] 6841 "TARGET_SIMD" 6842 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b" 6843 [(set_attr "type" "neon_tbl2_q")] 6844) 6845 6846(define_insn "aarch64_tbl3<mode>" 6847 [(set (match_operand:VB 0 "register_operand" "=w") 6848 (unspec:VB [(match_operand:OI 1 "register_operand" "w") 6849 (match_operand:VB 2 "register_operand" "w")] 6850 UNSPEC_TBL))] 6851 "TARGET_SIMD" 6852 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>" 6853 [(set_attr "type" "neon_tbl3")] 6854) 6855 6856(define_insn "aarch64_tbx4<mode>" 6857 [(set (match_operand:VB 0 "register_operand" "=w") 6858 (unspec:VB [(match_operand:VB 1 "register_operand" "0") 6859 (match_operand:OI 2 "register_operand" "w") 6860 (match_operand:VB 3 "register_operand" "w")] 6861 UNSPEC_TBX))] 6862 "TARGET_SIMD" 6863 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>" 6864 [(set_attr "type" "neon_tbl4")] 6865) 6866 6867;; Three source registers. 6868 6869(define_insn "aarch64_qtbl3<mode>" 6870 [(set (match_operand:VB 0 "register_operand" "=w") 6871 (unspec:VB [(match_operand:CI 1 "register_operand" "w") 6872 (match_operand:VB 2 "register_operand" "w")] 6873 UNSPEC_TBL))] 6874 "TARGET_SIMD" 6875 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>" 6876 [(set_attr "type" "neon_tbl3")] 6877) 6878 6879(define_insn "aarch64_qtbx3<mode>" 6880 [(set (match_operand:VB 0 "register_operand" "=w") 6881 (unspec:VB [(match_operand:VB 1 "register_operand" "0") 6882 (match_operand:CI 2 "register_operand" "w") 6883 (match_operand:VB 3 "register_operand" "w")] 6884 UNSPEC_TBX))] 6885 "TARGET_SIMD" 6886 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>" 6887 [(set_attr "type" "neon_tbl3")] 6888) 6889 6890;; Four source registers. 6891 6892(define_insn "aarch64_qtbl4<mode>" 6893 [(set (match_operand:VB 0 "register_operand" "=w") 6894 (unspec:VB [(match_operand:XI 1 "register_operand" "w") 6895 (match_operand:VB 2 "register_operand" "w")] 6896 UNSPEC_TBL))] 6897 "TARGET_SIMD" 6898 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>" 6899 [(set_attr "type" "neon_tbl4")] 6900) 6901 6902(define_insn "aarch64_qtbx4<mode>" 6903 [(set (match_operand:VB 0 "register_operand" "=w") 6904 (unspec:VB [(match_operand:VB 1 "register_operand" "0") 6905 (match_operand:XI 2 "register_operand" "w") 6906 (match_operand:VB 3 "register_operand" "w")] 6907 UNSPEC_TBX))] 6908 "TARGET_SIMD" 6909 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>" 6910 [(set_attr "type" "neon_tbl4")] 6911) 6912 6913(define_insn_and_split "aarch64_combinev16qi" 6914 [(set (match_operand:OI 0 "register_operand" "=w") 6915 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w") 6916 (match_operand:V16QI 2 "register_operand" "w")] 6917 UNSPEC_CONCAT))] 6918 "TARGET_SIMD" 6919 "#" 6920 "&& reload_completed" 6921 [(const_int 0)] 6922{ 6923 aarch64_split_combinev16qi (operands); 6924 DONE; 6925} 6926[(set_attr "type" "multiple")] 6927) 6928 6929;; This instruction's pattern is generated directly by 6930;; aarch64_expand_vec_perm_const, so any changes to the pattern would 6931;; need corresponding changes there. 6932(define_insn "aarch64_<PERMUTE:perm_insn><mode>" 6933 [(set (match_operand:VALL_F16 0 "register_operand" "=w") 6934 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w") 6935 (match_operand:VALL_F16 2 "register_operand" "w")] 6936 PERMUTE))] 6937 "TARGET_SIMD" 6938 "<PERMUTE:perm_insn>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 6939 [(set_attr "type" "neon_permute<q>")] 6940) 6941 6942;; This instruction's pattern is generated directly by 6943;; aarch64_expand_vec_perm_const, so any changes to the pattern would 6944;; need corresponding changes there. Note that the immediate (third) 6945;; operand is a lane index not a byte index. 6946(define_insn "aarch64_ext<mode>" 6947 [(set (match_operand:VALL_F16 0 "register_operand" "=w") 6948 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w") 6949 (match_operand:VALL_F16 2 "register_operand" "w") 6950 (match_operand:SI 3 "immediate_operand" "i")] 6951 UNSPEC_EXT))] 6952 "TARGET_SIMD" 6953{ 6954 operands[3] = GEN_INT (INTVAL (operands[3]) 6955 * GET_MODE_UNIT_SIZE (<MODE>mode)); 6956 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3"; 6957} 6958 [(set_attr "type" "neon_ext<q>")] 6959) 6960 6961;; This instruction's pattern is generated directly by 6962;; aarch64_expand_vec_perm_const, so any changes to the pattern would 6963;; need corresponding changes there. 6964(define_insn "aarch64_rev<REVERSE:rev_op><mode>" 6965 [(set (match_operand:VALL_F16 0 "register_operand" "=w") 6966 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")] 6967 REVERSE))] 6968 "TARGET_SIMD" 6969 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>" 6970 [(set_attr "type" "neon_rev<q>")] 6971) 6972 6973(define_insn "aarch64_st2<mode>_dreg" 6974 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") 6975 (unspec:BLK [(match_operand:OI 1 "register_operand" "w") 6976 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6977 UNSPEC_ST2))] 6978 "TARGET_SIMD" 6979 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0" 6980 [(set_attr "type" "neon_store2_2reg")] 6981) 6982 6983(define_insn "aarch64_st2<mode>_dreg" 6984 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") 6985 (unspec:BLK [(match_operand:OI 1 "register_operand" "w") 6986 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6987 UNSPEC_ST2))] 6988 "TARGET_SIMD" 6989 "st1\\t{%S1.1d - %T1.1d}, %0" 6990 [(set_attr "type" "neon_store1_2reg")] 6991) 6992 6993(define_insn "aarch64_st3<mode>_dreg" 6994 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") 6995 (unspec:BLK [(match_operand:CI 1 "register_operand" "w") 6996 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6997 UNSPEC_ST3))] 6998 "TARGET_SIMD" 6999 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0" 7000 [(set_attr "type" "neon_store3_3reg")] 7001) 7002 7003(define_insn "aarch64_st3<mode>_dreg" 7004 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") 7005 (unspec:BLK [(match_operand:CI 1 "register_operand" "w") 7006 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 7007 UNSPEC_ST3))] 7008 "TARGET_SIMD" 7009 "st1\\t{%S1.1d - %U1.1d}, %0" 7010 [(set_attr "type" "neon_store1_3reg")] 7011) 7012 7013(define_insn "aarch64_st4<mode>_dreg" 7014 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") 7015 (unspec:BLK [(match_operand:XI 1 "register_operand" "w") 7016 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 7017 UNSPEC_ST4))] 7018 "TARGET_SIMD" 7019 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0" 7020 [(set_attr "type" "neon_store4_4reg")] 7021) 7022 7023(define_insn "aarch64_st4<mode>_dreg" 7024 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") 7025 (unspec:BLK [(match_operand:XI 1 "register_operand" "w") 7026 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 7027 UNSPEC_ST4))] 7028 "TARGET_SIMD" 7029 "st1\\t{%S1.1d - %V1.1d}, %0" 7030 [(set_attr "type" "neon_store1_4reg")] 7031) 7032 7033(define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>" 7034 [(match_operand:DI 0 "register_operand") 7035 (match_operand:VSTRUCT 1 "register_operand") 7036 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 7037 "TARGET_SIMD" 7038{ 7039 rtx mem = gen_rtx_MEM (BLKmode, operands[0]); 7040 set_mem_size (mem, <VSTRUCT:nregs> * 8); 7041 7042 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1])); 7043 DONE; 7044}) 7045 7046(define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>" 7047 [(match_operand:DI 0 "register_operand") 7048 (match_operand:VSTRUCT 1 "register_operand") 7049 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 7050 "TARGET_SIMD" 7051{ 7052 machine_mode mode = <VSTRUCT:MODE>mode; 7053 rtx mem = gen_rtx_MEM (mode, operands[0]); 7054 7055 emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1])); 7056 DONE; 7057}) 7058 7059(define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>" 7060 [(match_operand:DI 0 "register_operand") 7061 (match_operand:VSTRUCT 1 "register_operand") 7062 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) 7063 (match_operand:SI 2 "immediate_operand")] 7064 "TARGET_SIMD" 7065{ 7066 rtx mem = gen_rtx_MEM (BLKmode, operands[0]); 7067 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode)) 7068 * <VSTRUCT:nregs>); 7069 7070 emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> ( 7071 mem, operands[1], operands[2])); 7072 DONE; 7073}) 7074 7075(define_expand "aarch64_st1<VALL_F16:mode>" 7076 [(match_operand:DI 0 "register_operand") 7077 (match_operand:VALL_F16 1 "register_operand")] 7078 "TARGET_SIMD" 7079{ 7080 machine_mode mode = <VALL_F16:MODE>mode; 7081 rtx mem = gen_rtx_MEM (mode, operands[0]); 7082 7083 if (BYTES_BIG_ENDIAN) 7084 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1])); 7085 else 7086 emit_move_insn (mem, operands[1]); 7087 DONE; 7088}) 7089 7090;; Expander for builtins to insert vector registers into large 7091;; opaque integer modes. 7092 7093;; Q-register list. We don't need a D-reg inserter as we zero 7094;; extend them in arm_neon.h and insert the resulting Q-regs. 7095 7096(define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>" 7097 [(match_operand:VSTRUCT 0 "register_operand") 7098 (match_operand:VSTRUCT 1 "register_operand") 7099 (match_operand:VQ 2 "register_operand") 7100 (match_operand:SI 3 "immediate_operand")] 7101 "TARGET_SIMD" 7102{ 7103 int part = INTVAL (operands[3]); 7104 int offset = part * 16; 7105 7106 emit_move_insn (operands[0], operands[1]); 7107 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset), 7108 operands[2]); 7109 DONE; 7110}) 7111 7112;; Standard pattern name vec_init<mode><Vel>. 7113 7114(define_expand "vec_init<mode><Vel>" 7115 [(match_operand:VALL_F16 0 "register_operand") 7116 (match_operand 1 "" "")] 7117 "TARGET_SIMD" 7118{ 7119 aarch64_expand_vector_init (operands[0], operands[1]); 7120 DONE; 7121}) 7122 7123(define_expand "vec_init<mode><Vhalf>" 7124 [(match_operand:VQ_NO2E 0 "register_operand") 7125 (match_operand 1 "" "")] 7126 "TARGET_SIMD" 7127{ 7128 aarch64_expand_vector_init (operands[0], operands[1]); 7129 DONE; 7130}) 7131 7132(define_insn "*aarch64_simd_ld1r<mode>" 7133 [(set (match_operand:VALL_F16 0 "register_operand" "=w") 7134 (vec_duplicate:VALL_F16 7135 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))] 7136 "TARGET_SIMD" 7137 "ld1r\\t{%0.<Vtype>}, %1" 7138 [(set_attr "type" "neon_load1_all_lanes")] 7139) 7140 7141(define_insn "aarch64_simd_ld1<mode>_x2" 7142 [(set (match_operand:OI 0 "register_operand" "=w") 7143 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv") 7144 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 7145 UNSPEC_LD1))] 7146 "TARGET_SIMD" 7147 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1" 7148 [(set_attr "type" "neon_load1_2reg<q>")] 7149) 7150 7151(define_insn "aarch64_simd_ld1<mode>_x2" 7152 [(set (match_operand:OI 0 "register_operand" "=w") 7153 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv") 7154 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 7155 UNSPEC_LD1))] 7156 "TARGET_SIMD" 7157 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1" 7158 [(set_attr "type" "neon_load1_2reg<q>")] 7159) 7160 7161 7162(define_insn "@aarch64_frecpe<mode>" 7163 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") 7164 (unspec:VHSDF_HSDF 7165 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")] 7166 UNSPEC_FRECPE))] 7167 "TARGET_SIMD" 7168 "frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>" 7169 [(set_attr "type" "neon_fp_recpe_<stype><q>")] 7170) 7171 7172(define_insn "aarch64_frecpx<mode>" 7173 [(set (match_operand:GPF_F16 0 "register_operand" "=w") 7174 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")] 7175 UNSPEC_FRECPX))] 7176 "TARGET_SIMD" 7177 "frecpx\t%<s>0, %<s>1" 7178 [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")] 7179) 7180 7181(define_insn "@aarch64_frecps<mode>" 7182 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") 7183 (unspec:VHSDF_HSDF 7184 [(match_operand:VHSDF_HSDF 1 "register_operand" "w") 7185 (match_operand:VHSDF_HSDF 2 "register_operand" "w")] 7186 UNSPEC_FRECPS))] 7187 "TARGET_SIMD" 7188 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 7189 [(set_attr "type" "neon_fp_recps_<stype><q>")] 7190) 7191 7192(define_insn "aarch64_urecpe<mode>" 7193 [(set (match_operand:VDQ_SI 0 "register_operand" "=w") 7194 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")] 7195 UNSPEC_URECPE))] 7196 "TARGET_SIMD" 7197 "urecpe\\t%0.<Vtype>, %1.<Vtype>" 7198 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")]) 7199 7200;; Standard pattern name vec_extract<mode><Vel>. 7201 7202(define_expand "vec_extract<mode><Vel>" 7203 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand") 7204 (match_operand:VALL_F16 1 "register_operand") 7205 (match_operand:SI 2 "immediate_operand")] 7206 "TARGET_SIMD" 7207{ 7208 emit_insn 7209 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2])); 7210 DONE; 7211}) 7212 7213;; Extract a 64-bit vector from one half of a 128-bit vector. 7214(define_expand "vec_extract<mode><Vhalf>" 7215 [(match_operand:<VHALF> 0 "register_operand") 7216 (match_operand:VQMOV_NO2E 1 "register_operand") 7217 (match_operand 2 "immediate_operand")] 7218 "TARGET_SIMD" 7219{ 7220 int start = INTVAL (operands[2]); 7221 if (start != 0 && start != <nunits> / 2) 7222 FAIL; 7223 rtx sel = aarch64_gen_stepped_int_parallel (<nunits> / 2, start, 1); 7224 emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], sel)); 7225 DONE; 7226}) 7227 7228;; Extract a single-element 64-bit vector from one half of a 128-bit vector. 7229(define_expand "vec_extractv2dfv1df" 7230 [(match_operand:V1DF 0 "register_operand") 7231 (match_operand:V2DF 1 "register_operand") 7232 (match_operand 2 "immediate_operand")] 7233 "TARGET_SIMD" 7234{ 7235 /* V1DF is rarely used by other patterns, so it should be better to hide 7236 it in a subreg destination of a normal DF op. */ 7237 rtx scalar0 = gen_lowpart (DFmode, operands[0]); 7238 emit_insn (gen_vec_extractv2dfdf (scalar0, operands[1], operands[2])); 7239 DONE; 7240}) 7241 7242;; aes 7243 7244(define_insn "aarch64_crypto_aes<aes_op>v16qi" 7245 [(set (match_operand:V16QI 0 "register_operand" "=w") 7246 (unspec:V16QI 7247 [(xor:V16QI 7248 (match_operand:V16QI 1 "register_operand" "%0") 7249 (match_operand:V16QI 2 "register_operand" "w"))] 7250 CRYPTO_AES))] 7251 "TARGET_SIMD && TARGET_AES" 7252 "aes<aes_op>\\t%0.16b, %2.16b" 7253 [(set_attr "type" "crypto_aese")] 7254) 7255 7256(define_insn "aarch64_crypto_aes<aesmc_op>v16qi" 7257 [(set (match_operand:V16QI 0 "register_operand" "=w") 7258 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")] 7259 CRYPTO_AESMC))] 7260 "TARGET_SIMD && TARGET_AES" 7261 "aes<aesmc_op>\\t%0.16b, %1.16b" 7262 [(set_attr "type" "crypto_aesmc")] 7263) 7264 7265;; When AESE/AESMC fusion is enabled we really want to keep the two together 7266;; and enforce the register dependency without scheduling or register 7267;; allocation messing up the order or introducing moves inbetween. 7268;; Mash the two together during combine. 7269 7270(define_insn "*aarch64_crypto_aese_fused" 7271 [(set (match_operand:V16QI 0 "register_operand" "=w") 7272 (unspec:V16QI 7273 [(unspec:V16QI 7274 [(xor:V16QI 7275 (match_operand:V16QI 1 "register_operand" "%0") 7276 (match_operand:V16QI 2 "register_operand" "w"))] 7277 UNSPEC_AESE)] 7278 UNSPEC_AESMC))] 7279 "TARGET_SIMD && TARGET_AES 7280 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)" 7281 "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b" 7282 [(set_attr "type" "crypto_aese") 7283 (set_attr "length" "8")] 7284) 7285 7286;; When AESD/AESIMC fusion is enabled we really want to keep the two together 7287;; and enforce the register dependency without scheduling or register 7288;; allocation messing up the order or introducing moves inbetween. 7289;; Mash the two together during combine. 7290 7291(define_insn "*aarch64_crypto_aesd_fused" 7292 [(set (match_operand:V16QI 0 "register_operand" "=w") 7293 (unspec:V16QI 7294 [(unspec:V16QI 7295 [(xor:V16QI 7296 (match_operand:V16QI 1 "register_operand" "%0") 7297 (match_operand:V16QI 2 "register_operand" "w"))] 7298 UNSPEC_AESD)] 7299 UNSPEC_AESIMC))] 7300 "TARGET_SIMD && TARGET_AES 7301 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)" 7302 "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b" 7303 [(set_attr "type" "crypto_aese") 7304 (set_attr "length" "8")] 7305) 7306 7307;; sha1 7308 7309(define_insn "aarch64_crypto_sha1hsi" 7310 [(set (match_operand:SI 0 "register_operand" "=w") 7311 (unspec:SI [(match_operand:SI 1 7312 "register_operand" "w")] 7313 UNSPEC_SHA1H))] 7314 "TARGET_SIMD && TARGET_SHA2" 7315 "sha1h\\t%s0, %s1" 7316 [(set_attr "type" "crypto_sha1_fast")] 7317) 7318 7319(define_insn "aarch64_crypto_sha1hv4si" 7320 [(set (match_operand:SI 0 "register_operand" "=w") 7321 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w") 7322 (parallel [(const_int 0)]))] 7323 UNSPEC_SHA1H))] 7324 "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN" 7325 "sha1h\\t%s0, %s1" 7326 [(set_attr "type" "crypto_sha1_fast")] 7327) 7328 7329(define_insn "aarch64_be_crypto_sha1hv4si" 7330 [(set (match_operand:SI 0 "register_operand" "=w") 7331 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w") 7332 (parallel [(const_int 3)]))] 7333 UNSPEC_SHA1H))] 7334 "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN" 7335 "sha1h\\t%s0, %s1" 7336 [(set_attr "type" "crypto_sha1_fast")] 7337) 7338 7339(define_insn "aarch64_crypto_sha1su1v4si" 7340 [(set (match_operand:V4SI 0 "register_operand" "=w") 7341 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 7342 (match_operand:V4SI 2 "register_operand" "w")] 7343 UNSPEC_SHA1SU1))] 7344 "TARGET_SIMD && TARGET_SHA2" 7345 "sha1su1\\t%0.4s, %2.4s" 7346 [(set_attr "type" "crypto_sha1_fast")] 7347) 7348 7349(define_insn "aarch64_crypto_sha1<sha1_op>v4si" 7350 [(set (match_operand:V4SI 0 "register_operand" "=w") 7351 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 7352 (match_operand:SI 2 "register_operand" "w") 7353 (match_operand:V4SI 3 "register_operand" "w")] 7354 CRYPTO_SHA1))] 7355 "TARGET_SIMD && TARGET_SHA2" 7356 "sha1<sha1_op>\\t%q0, %s2, %3.4s" 7357 [(set_attr "type" "crypto_sha1_slow")] 7358) 7359 7360(define_insn "aarch64_crypto_sha1su0v4si" 7361 [(set (match_operand:V4SI 0 "register_operand" "=w") 7362 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 7363 (match_operand:V4SI 2 "register_operand" "w") 7364 (match_operand:V4SI 3 "register_operand" "w")] 7365 UNSPEC_SHA1SU0))] 7366 "TARGET_SIMD && TARGET_SHA2" 7367 "sha1su0\\t%0.4s, %2.4s, %3.4s" 7368 [(set_attr "type" "crypto_sha1_xor")] 7369) 7370 7371;; sha256 7372 7373(define_insn "aarch64_crypto_sha256h<sha256_op>v4si" 7374 [(set (match_operand:V4SI 0 "register_operand" "=w") 7375 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 7376 (match_operand:V4SI 2 "register_operand" "w") 7377 (match_operand:V4SI 3 "register_operand" "w")] 7378 CRYPTO_SHA256))] 7379 "TARGET_SIMD && TARGET_SHA2" 7380 "sha256h<sha256_op>\\t%q0, %q2, %3.4s" 7381 [(set_attr "type" "crypto_sha256_slow")] 7382) 7383 7384(define_insn "aarch64_crypto_sha256su0v4si" 7385 [(set (match_operand:V4SI 0 "register_operand" "=w") 7386 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 7387 (match_operand:V4SI 2 "register_operand" "w")] 7388 UNSPEC_SHA256SU0))] 7389 "TARGET_SIMD && TARGET_SHA2" 7390 "sha256su0\\t%0.4s, %2.4s" 7391 [(set_attr "type" "crypto_sha256_fast")] 7392) 7393 7394(define_insn "aarch64_crypto_sha256su1v4si" 7395 [(set (match_operand:V4SI 0 "register_operand" "=w") 7396 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 7397 (match_operand:V4SI 2 "register_operand" "w") 7398 (match_operand:V4SI 3 "register_operand" "w")] 7399 UNSPEC_SHA256SU1))] 7400 "TARGET_SIMD && TARGET_SHA2" 7401 "sha256su1\\t%0.4s, %2.4s, %3.4s" 7402 [(set_attr "type" "crypto_sha256_slow")] 7403) 7404 7405;; sha512 7406 7407(define_insn "aarch64_crypto_sha512h<sha512_op>qv2di" 7408 [(set (match_operand:V2DI 0 "register_operand" "=w") 7409 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") 7410 (match_operand:V2DI 2 "register_operand" "w") 7411 (match_operand:V2DI 3 "register_operand" "w")] 7412 CRYPTO_SHA512))] 7413 "TARGET_SIMD && TARGET_SHA3" 7414 "sha512h<sha512_op>\\t%q0, %q2, %3.2d" 7415 [(set_attr "type" "crypto_sha512")] 7416) 7417 7418(define_insn "aarch64_crypto_sha512su0qv2di" 7419 [(set (match_operand:V2DI 0 "register_operand" "=w") 7420 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") 7421 (match_operand:V2DI 2 "register_operand" "w")] 7422 UNSPEC_SHA512SU0))] 7423 "TARGET_SIMD && TARGET_SHA3" 7424 "sha512su0\\t%0.2d, %2.2d" 7425 [(set_attr "type" "crypto_sha512")] 7426) 7427 7428(define_insn "aarch64_crypto_sha512su1qv2di" 7429 [(set (match_operand:V2DI 0 "register_operand" "=w") 7430 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") 7431 (match_operand:V2DI 2 "register_operand" "w") 7432 (match_operand:V2DI 3 "register_operand" "w")] 7433 UNSPEC_SHA512SU1))] 7434 "TARGET_SIMD && TARGET_SHA3" 7435 "sha512su1\\t%0.2d, %2.2d, %3.2d" 7436 [(set_attr "type" "crypto_sha512")] 7437) 7438 7439;; sha3 7440 7441(define_insn "eor3q<mode>4" 7442 [(set (match_operand:VQ_I 0 "register_operand" "=w") 7443 (xor:VQ_I 7444 (xor:VQ_I 7445 (match_operand:VQ_I 2 "register_operand" "w") 7446 (match_operand:VQ_I 3 "register_operand" "w")) 7447 (match_operand:VQ_I 1 "register_operand" "w")))] 7448 "TARGET_SIMD && TARGET_SHA3" 7449 "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b" 7450 [(set_attr "type" "crypto_sha3")] 7451) 7452 7453(define_insn "aarch64_rax1qv2di" 7454 [(set (match_operand:V2DI 0 "register_operand" "=w") 7455 (xor:V2DI 7456 (rotate:V2DI 7457 (match_operand:V2DI 2 "register_operand" "w") 7458 (const_int 1)) 7459 (match_operand:V2DI 1 "register_operand" "w")))] 7460 "TARGET_SIMD && TARGET_SHA3" 7461 "rax1\\t%0.2d, %1.2d, %2.2d" 7462 [(set_attr "type" "crypto_sha3")] 7463) 7464 7465(define_insn "aarch64_xarqv2di" 7466 [(set (match_operand:V2DI 0 "register_operand" "=w") 7467 (rotatert:V2DI 7468 (xor:V2DI 7469 (match_operand:V2DI 1 "register_operand" "%w") 7470 (match_operand:V2DI 2 "register_operand" "w")) 7471 (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))] 7472 "TARGET_SIMD && TARGET_SHA3" 7473 "xar\\t%0.2d, %1.2d, %2.2d, %3" 7474 [(set_attr "type" "crypto_sha3")] 7475) 7476 7477(define_insn "bcaxq<mode>4" 7478 [(set (match_operand:VQ_I 0 "register_operand" "=w") 7479 (xor:VQ_I 7480 (and:VQ_I 7481 (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w")) 7482 (match_operand:VQ_I 2 "register_operand" "w")) 7483 (match_operand:VQ_I 1 "register_operand" "w")))] 7484 "TARGET_SIMD && TARGET_SHA3" 7485 "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b" 7486 [(set_attr "type" "crypto_sha3")] 7487) 7488 7489;; SM3 7490 7491(define_insn "aarch64_sm3ss1qv4si" 7492 [(set (match_operand:V4SI 0 "register_operand" "=w") 7493 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w") 7494 (match_operand:V4SI 2 "register_operand" "w") 7495 (match_operand:V4SI 3 "register_operand" "w")] 7496 UNSPEC_SM3SS1))] 7497 "TARGET_SIMD && TARGET_SM4" 7498 "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s" 7499 [(set_attr "type" "crypto_sm3")] 7500) 7501 7502 7503(define_insn "aarch64_sm3tt<sm3tt_op>qv4si" 7504 [(set (match_operand:V4SI 0 "register_operand" "=w") 7505 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 7506 (match_operand:V4SI 2 "register_operand" "w") 7507 (match_operand:V4SI 3 "register_operand" "w") 7508 (match_operand:SI 4 "aarch64_imm2" "Ui2")] 7509 CRYPTO_SM3TT))] 7510 "TARGET_SIMD && TARGET_SM4" 7511 "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]" 7512 [(set_attr "type" "crypto_sm3")] 7513) 7514 7515(define_insn "aarch64_sm3partw<sm3part_op>qv4si" 7516 [(set (match_operand:V4SI 0 "register_operand" "=w") 7517 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 7518 (match_operand:V4SI 2 "register_operand" "w") 7519 (match_operand:V4SI 3 "register_operand" "w")] 7520 CRYPTO_SM3PART))] 7521 "TARGET_SIMD && TARGET_SM4" 7522 "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s" 7523 [(set_attr "type" "crypto_sm3")] 7524) 7525 7526;; SM4 7527 7528(define_insn "aarch64_sm4eqv4si" 7529 [(set (match_operand:V4SI 0 "register_operand" "=w") 7530 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 7531 (match_operand:V4SI 2 "register_operand" "w")] 7532 UNSPEC_SM4E))] 7533 "TARGET_SIMD && TARGET_SM4" 7534 "sm4e\\t%0.4s, %2.4s" 7535 [(set_attr "type" "crypto_sm4")] 7536) 7537 7538(define_insn "aarch64_sm4ekeyqv4si" 7539 [(set (match_operand:V4SI 0 "register_operand" "=w") 7540 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w") 7541 (match_operand:V4SI 2 "register_operand" "w")] 7542 UNSPEC_SM4EKEY))] 7543 "TARGET_SIMD && TARGET_SM4" 7544 "sm4ekey\\t%0.4s, %1.4s, %2.4s" 7545 [(set_attr "type" "crypto_sm4")] 7546) 7547 7548;; fp16fml 7549 7550(define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>" 7551 [(set (match_operand:VDQSF 0 "register_operand") 7552 (unspec:VDQSF 7553 [(match_operand:VDQSF 1 "register_operand") 7554 (match_operand:<VFMLA_W> 2 "register_operand") 7555 (match_operand:<VFMLA_W> 3 "register_operand")] 7556 VFMLA16_LOW))] 7557 "TARGET_F16FML" 7558{ 7559 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, 7560 <nunits> * 2, false); 7561 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, 7562 <nunits> * 2, false); 7563 7564 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0], 7565 operands[1], 7566 operands[2], 7567 operands[3], 7568 p1, p2)); 7569 DONE; 7570 7571}) 7572 7573(define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>" 7574 [(set (match_operand:VDQSF 0 "register_operand") 7575 (unspec:VDQSF 7576 [(match_operand:VDQSF 1 "register_operand") 7577 (match_operand:<VFMLA_W> 2 "register_operand") 7578 (match_operand:<VFMLA_W> 3 "register_operand")] 7579 VFMLA16_HIGH))] 7580 "TARGET_F16FML" 7581{ 7582 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true); 7583 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true); 7584 7585 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0], 7586 operands[1], 7587 operands[2], 7588 operands[3], 7589 p1, p2)); 7590 DONE; 7591}) 7592 7593(define_insn "aarch64_simd_fmlal<f16quad>_low<mode>" 7594 [(set (match_operand:VDQSF 0 "register_operand" "=w") 7595 (fma:VDQSF 7596 (float_extend:VDQSF 7597 (vec_select:<VFMLA_SEL_W> 7598 (match_operand:<VFMLA_W> 2 "register_operand" "w") 7599 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))) 7600 (float_extend:VDQSF 7601 (vec_select:<VFMLA_SEL_W> 7602 (match_operand:<VFMLA_W> 3 "register_operand" "w") 7603 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" ""))) 7604 (match_operand:VDQSF 1 "register_operand" "0")))] 7605 "TARGET_F16FML" 7606 "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h" 7607 [(set_attr "type" "neon_fp_mul_s")] 7608) 7609 7610(define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>" 7611 [(set (match_operand:VDQSF 0 "register_operand" "=w") 7612 (fma:VDQSF 7613 (float_extend:VDQSF 7614 (neg:<VFMLA_SEL_W> 7615 (vec_select:<VFMLA_SEL_W> 7616 (match_operand:<VFMLA_W> 2 "register_operand" "w") 7617 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))) 7618 (float_extend:VDQSF 7619 (vec_select:<VFMLA_SEL_W> 7620 (match_operand:<VFMLA_W> 3 "register_operand" "w") 7621 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" ""))) 7622 (match_operand:VDQSF 1 "register_operand" "0")))] 7623 "TARGET_F16FML" 7624 "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h" 7625 [(set_attr "type" "neon_fp_mul_s")] 7626) 7627 7628(define_insn "aarch64_simd_fmlal<f16quad>_high<mode>" 7629 [(set (match_operand:VDQSF 0 "register_operand" "=w") 7630 (fma:VDQSF 7631 (float_extend:VDQSF 7632 (vec_select:<VFMLA_SEL_W> 7633 (match_operand:<VFMLA_W> 2 "register_operand" "w") 7634 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))) 7635 (float_extend:VDQSF 7636 (vec_select:<VFMLA_SEL_W> 7637 (match_operand:<VFMLA_W> 3 "register_operand" "w") 7638 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" ""))) 7639 (match_operand:VDQSF 1 "register_operand" "0")))] 7640 "TARGET_F16FML" 7641 "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h" 7642 [(set_attr "type" "neon_fp_mul_s")] 7643) 7644 7645(define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>" 7646 [(set (match_operand:VDQSF 0 "register_operand" "=w") 7647 (fma:VDQSF 7648 (float_extend:VDQSF 7649 (neg:<VFMLA_SEL_W> 7650 (vec_select:<VFMLA_SEL_W> 7651 (match_operand:<VFMLA_W> 2 "register_operand" "w") 7652 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))) 7653 (float_extend:VDQSF 7654 (vec_select:<VFMLA_SEL_W> 7655 (match_operand:<VFMLA_W> 3 "register_operand" "w") 7656 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" ""))) 7657 (match_operand:VDQSF 1 "register_operand" "0")))] 7658 "TARGET_F16FML" 7659 "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h" 7660 [(set_attr "type" "neon_fp_mul_s")] 7661) 7662 7663(define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf" 7664 [(set (match_operand:V2SF 0 "register_operand") 7665 (unspec:V2SF [(match_operand:V2SF 1 "register_operand") 7666 (match_operand:V4HF 2 "register_operand") 7667 (match_operand:V4HF 3 "register_operand") 7668 (match_operand:SI 4 "aarch64_imm2")] 7669 VFMLA16_LOW))] 7670 "TARGET_F16FML" 7671{ 7672 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false); 7673 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4])); 7674 7675 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0], 7676 operands[1], 7677 operands[2], 7678 operands[3], 7679 p1, lane)); 7680 DONE; 7681} 7682) 7683 7684(define_expand "aarch64_fml<f16mac1>l_lane_highv2sf" 7685 [(set (match_operand:V2SF 0 "register_operand") 7686 (unspec:V2SF [(match_operand:V2SF 1 "register_operand") 7687 (match_operand:V4HF 2 "register_operand") 7688 (match_operand:V4HF 3 "register_operand") 7689 (match_operand:SI 4 "aarch64_imm2")] 7690 VFMLA16_HIGH))] 7691 "TARGET_F16FML" 7692{ 7693 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true); 7694 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4])); 7695 7696 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0], 7697 operands[1], 7698 operands[2], 7699 operands[3], 7700 p1, lane)); 7701 DONE; 7702}) 7703 7704(define_insn "aarch64_simd_fmlal_lane_lowv2sf" 7705 [(set (match_operand:V2SF 0 "register_operand" "=w") 7706 (fma:V2SF 7707 (float_extend:V2SF 7708 (vec_select:V2HF 7709 (match_operand:V4HF 2 "register_operand" "w") 7710 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))) 7711 (float_extend:V2SF 7712 (vec_duplicate:V2HF 7713 (vec_select:HF 7714 (match_operand:V4HF 3 "register_operand" "x") 7715 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")])))) 7716 (match_operand:V2SF 1 "register_operand" "0")))] 7717 "TARGET_F16FML" 7718 "fmlal\\t%0.2s, %2.2h, %3.h[%5]" 7719 [(set_attr "type" "neon_fp_mul_s")] 7720) 7721 7722(define_insn "aarch64_simd_fmlsl_lane_lowv2sf" 7723 [(set (match_operand:V2SF 0 "register_operand" "=w") 7724 (fma:V2SF 7725 (float_extend:V2SF 7726 (neg:V2HF 7727 (vec_select:V2HF 7728 (match_operand:V4HF 2 "register_operand" "w") 7729 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))) 7730 (float_extend:V2SF 7731 (vec_duplicate:V2HF 7732 (vec_select:HF 7733 (match_operand:V4HF 3 "register_operand" "x") 7734 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")])))) 7735 (match_operand:V2SF 1 "register_operand" "0")))] 7736 "TARGET_F16FML" 7737 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]" 7738 [(set_attr "type" "neon_fp_mul_s")] 7739) 7740 7741(define_insn "aarch64_simd_fmlal_lane_highv2sf" 7742 [(set (match_operand:V2SF 0 "register_operand" "=w") 7743 (fma:V2SF 7744 (float_extend:V2SF 7745 (vec_select:V2HF 7746 (match_operand:V4HF 2 "register_operand" "w") 7747 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))) 7748 (float_extend:V2SF 7749 (vec_duplicate:V2HF 7750 (vec_select:HF 7751 (match_operand:V4HF 3 "register_operand" "x") 7752 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")])))) 7753 (match_operand:V2SF 1 "register_operand" "0")))] 7754 "TARGET_F16FML" 7755 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]" 7756 [(set_attr "type" "neon_fp_mul_s")] 7757) 7758 7759(define_insn "aarch64_simd_fmlsl_lane_highv2sf" 7760 [(set (match_operand:V2SF 0 "register_operand" "=w") 7761 (fma:V2SF 7762 (float_extend:V2SF 7763 (neg:V2HF 7764 (vec_select:V2HF 7765 (match_operand:V4HF 2 "register_operand" "w") 7766 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))) 7767 (float_extend:V2SF 7768 (vec_duplicate:V2HF 7769 (vec_select:HF 7770 (match_operand:V4HF 3 "register_operand" "x") 7771 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")])))) 7772 (match_operand:V2SF 1 "register_operand" "0")))] 7773 "TARGET_F16FML" 7774 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]" 7775 [(set_attr "type" "neon_fp_mul_s")] 7776) 7777 7778(define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf" 7779 [(set (match_operand:V4SF 0 "register_operand") 7780 (unspec:V4SF [(match_operand:V4SF 1 "register_operand") 7781 (match_operand:V8HF 2 "register_operand") 7782 (match_operand:V8HF 3 "register_operand") 7783 (match_operand:SI 4 "aarch64_lane_imm3")] 7784 VFMLA16_LOW))] 7785 "TARGET_F16FML" 7786{ 7787 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false); 7788 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4])); 7789 7790 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0], 7791 operands[1], 7792 operands[2], 7793 operands[3], 7794 p1, lane)); 7795 DONE; 7796}) 7797 7798(define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf" 7799 [(set (match_operand:V4SF 0 "register_operand") 7800 (unspec:V4SF [(match_operand:V4SF 1 "register_operand") 7801 (match_operand:V8HF 2 "register_operand") 7802 (match_operand:V8HF 3 "register_operand") 7803 (match_operand:SI 4 "aarch64_lane_imm3")] 7804 VFMLA16_HIGH))] 7805 "TARGET_F16FML" 7806{ 7807 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true); 7808 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4])); 7809 7810 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0], 7811 operands[1], 7812 operands[2], 7813 operands[3], 7814 p1, lane)); 7815 DONE; 7816}) 7817 7818(define_insn "aarch64_simd_fmlalq_laneq_lowv4sf" 7819 [(set (match_operand:V4SF 0 "register_operand" "=w") 7820 (fma:V4SF 7821 (float_extend:V4SF 7822 (vec_select:V4HF 7823 (match_operand:V8HF 2 "register_operand" "w") 7824 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))) 7825 (float_extend:V4SF 7826 (vec_duplicate:V4HF 7827 (vec_select:HF 7828 (match_operand:V8HF 3 "register_operand" "x") 7829 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")])))) 7830 (match_operand:V4SF 1 "register_operand" "0")))] 7831 "TARGET_F16FML" 7832 "fmlal\\t%0.4s, %2.4h, %3.h[%5]" 7833 [(set_attr "type" "neon_fp_mul_s")] 7834) 7835 7836(define_insn "aarch64_simd_fmlslq_laneq_lowv4sf" 7837 [(set (match_operand:V4SF 0 "register_operand" "=w") 7838 (fma:V4SF 7839 (float_extend:V4SF 7840 (neg:V4HF 7841 (vec_select:V4HF 7842 (match_operand:V8HF 2 "register_operand" "w") 7843 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))) 7844 (float_extend:V4SF 7845 (vec_duplicate:V4HF 7846 (vec_select:HF 7847 (match_operand:V8HF 3 "register_operand" "x") 7848 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")])))) 7849 (match_operand:V4SF 1 "register_operand" "0")))] 7850 "TARGET_F16FML" 7851 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]" 7852 [(set_attr "type" "neon_fp_mul_s")] 7853) 7854 7855(define_insn "aarch64_simd_fmlalq_laneq_highv4sf" 7856 [(set (match_operand:V4SF 0 "register_operand" "=w") 7857 (fma:V4SF 7858 (float_extend:V4SF 7859 (vec_select:V4HF 7860 (match_operand:V8HF 2 "register_operand" "w") 7861 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))) 7862 (float_extend:V4SF 7863 (vec_duplicate:V4HF 7864 (vec_select:HF 7865 (match_operand:V8HF 3 "register_operand" "x") 7866 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")])))) 7867 (match_operand:V4SF 1 "register_operand" "0")))] 7868 "TARGET_F16FML" 7869 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]" 7870 [(set_attr "type" "neon_fp_mul_s")] 7871) 7872 7873(define_insn "aarch64_simd_fmlslq_laneq_highv4sf" 7874 [(set (match_operand:V4SF 0 "register_operand" "=w") 7875 (fma:V4SF 7876 (float_extend:V4SF 7877 (neg:V4HF 7878 (vec_select:V4HF 7879 (match_operand:V8HF 2 "register_operand" "w") 7880 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))) 7881 (float_extend:V4SF 7882 (vec_duplicate:V4HF 7883 (vec_select:HF 7884 (match_operand:V8HF 3 "register_operand" "x") 7885 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")])))) 7886 (match_operand:V4SF 1 "register_operand" "0")))] 7887 "TARGET_F16FML" 7888 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]" 7889 [(set_attr "type" "neon_fp_mul_s")] 7890) 7891 7892(define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf" 7893 [(set (match_operand:V2SF 0 "register_operand") 7894 (unspec:V2SF [(match_operand:V2SF 1 "register_operand") 7895 (match_operand:V4HF 2 "register_operand") 7896 (match_operand:V8HF 3 "register_operand") 7897 (match_operand:SI 4 "aarch64_lane_imm3")] 7898 VFMLA16_LOW))] 7899 "TARGET_F16FML" 7900{ 7901 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false); 7902 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4])); 7903 7904 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0], 7905 operands[1], 7906 operands[2], 7907 operands[3], 7908 p1, lane)); 7909 DONE; 7910 7911}) 7912 7913(define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf" 7914 [(set (match_operand:V2SF 0 "register_operand") 7915 (unspec:V2SF [(match_operand:V2SF 1 "register_operand") 7916 (match_operand:V4HF 2 "register_operand") 7917 (match_operand:V8HF 3 "register_operand") 7918 (match_operand:SI 4 "aarch64_lane_imm3")] 7919 VFMLA16_HIGH))] 7920 "TARGET_F16FML" 7921{ 7922 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true); 7923 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4])); 7924 7925 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0], 7926 operands[1], 7927 operands[2], 7928 operands[3], 7929 p1, lane)); 7930 DONE; 7931 7932}) 7933 7934(define_insn "aarch64_simd_fmlal_laneq_lowv2sf" 7935 [(set (match_operand:V2SF 0 "register_operand" "=w") 7936 (fma:V2SF 7937 (float_extend:V2SF 7938 (vec_select:V2HF 7939 (match_operand:V4HF 2 "register_operand" "w") 7940 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))) 7941 (float_extend:V2SF 7942 (vec_duplicate:V2HF 7943 (vec_select:HF 7944 (match_operand:V8HF 3 "register_operand" "x") 7945 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")])))) 7946 (match_operand:V2SF 1 "register_operand" "0")))] 7947 "TARGET_F16FML" 7948 "fmlal\\t%0.2s, %2.2h, %3.h[%5]" 7949 [(set_attr "type" "neon_fp_mul_s")] 7950) 7951 7952(define_insn "aarch64_simd_fmlsl_laneq_lowv2sf" 7953 [(set (match_operand:V2SF 0 "register_operand" "=w") 7954 (fma:V2SF 7955 (float_extend:V2SF 7956 (neg:V2HF 7957 (vec_select:V2HF 7958 (match_operand:V4HF 2 "register_operand" "w") 7959 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))) 7960 (float_extend:V2SF 7961 (vec_duplicate:V2HF 7962 (vec_select:HF 7963 (match_operand:V8HF 3 "register_operand" "x") 7964 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")])))) 7965 (match_operand:V2SF 1 "register_operand" "0")))] 7966 "TARGET_F16FML" 7967 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]" 7968 [(set_attr "type" "neon_fp_mul_s")] 7969) 7970 7971(define_insn "aarch64_simd_fmlal_laneq_highv2sf" 7972 [(set (match_operand:V2SF 0 "register_operand" "=w") 7973 (fma:V2SF 7974 (float_extend:V2SF 7975 (vec_select:V2HF 7976 (match_operand:V4HF 2 "register_operand" "w") 7977 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))) 7978 (float_extend:V2SF 7979 (vec_duplicate:V2HF 7980 (vec_select:HF 7981 (match_operand:V8HF 3 "register_operand" "x") 7982 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")])))) 7983 (match_operand:V2SF 1 "register_operand" "0")))] 7984 "TARGET_F16FML" 7985 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]" 7986 [(set_attr "type" "neon_fp_mul_s")] 7987) 7988 7989(define_insn "aarch64_simd_fmlsl_laneq_highv2sf" 7990 [(set (match_operand:V2SF 0 "register_operand" "=w") 7991 (fma:V2SF 7992 (float_extend:V2SF 7993 (neg:V2HF 7994 (vec_select:V2HF 7995 (match_operand:V4HF 2 "register_operand" "w") 7996 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))) 7997 (float_extend:V2SF 7998 (vec_duplicate:V2HF 7999 (vec_select:HF 8000 (match_operand:V8HF 3 "register_operand" "x") 8001 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")])))) 8002 (match_operand:V2SF 1 "register_operand" "0")))] 8003 "TARGET_F16FML" 8004 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]" 8005 [(set_attr "type" "neon_fp_mul_s")] 8006) 8007 8008(define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf" 8009 [(set (match_operand:V4SF 0 "register_operand") 8010 (unspec:V4SF [(match_operand:V4SF 1 "register_operand") 8011 (match_operand:V8HF 2 "register_operand") 8012 (match_operand:V4HF 3 "register_operand") 8013 (match_operand:SI 4 "aarch64_imm2")] 8014 VFMLA16_LOW))] 8015 "TARGET_F16FML" 8016{ 8017 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false); 8018 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4])); 8019 8020 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0], 8021 operands[1], 8022 operands[2], 8023 operands[3], 8024 p1, lane)); 8025 DONE; 8026}) 8027 8028(define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf" 8029 [(set (match_operand:V4SF 0 "register_operand") 8030 (unspec:V4SF [(match_operand:V4SF 1 "register_operand") 8031 (match_operand:V8HF 2 "register_operand") 8032 (match_operand:V4HF 3 "register_operand") 8033 (match_operand:SI 4 "aarch64_imm2")] 8034 VFMLA16_HIGH))] 8035 "TARGET_F16FML" 8036{ 8037 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true); 8038 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4])); 8039 8040 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0], 8041 operands[1], 8042 operands[2], 8043 operands[3], 8044 p1, lane)); 8045 DONE; 8046}) 8047 8048(define_insn "aarch64_simd_fmlalq_lane_lowv4sf" 8049 [(set (match_operand:V4SF 0 "register_operand" "=w") 8050 (fma:V4SF 8051 (float_extend:V4SF 8052 (vec_select:V4HF 8053 (match_operand:V8HF 2 "register_operand" "w") 8054 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))) 8055 (float_extend:V4SF 8056 (vec_duplicate:V4HF 8057 (vec_select:HF 8058 (match_operand:V4HF 3 "register_operand" "x") 8059 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")])))) 8060 (match_operand:V4SF 1 "register_operand" "0")))] 8061 "TARGET_F16FML" 8062 "fmlal\\t%0.4s, %2.4h, %3.h[%5]" 8063 [(set_attr "type" "neon_fp_mul_s")] 8064) 8065 8066(define_insn "aarch64_simd_fmlslq_lane_lowv4sf" 8067 [(set (match_operand:V4SF 0 "register_operand" "=w") 8068 (fma:V4SF 8069 (float_extend:V4SF 8070 (neg:V4HF 8071 (vec_select:V4HF 8072 (match_operand:V8HF 2 "register_operand" "w") 8073 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))) 8074 (float_extend:V4SF 8075 (vec_duplicate:V4HF 8076 (vec_select:HF 8077 (match_operand:V4HF 3 "register_operand" "x") 8078 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")])))) 8079 (match_operand:V4SF 1 "register_operand" "0")))] 8080 "TARGET_F16FML" 8081 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]" 8082 [(set_attr "type" "neon_fp_mul_s")] 8083) 8084 8085(define_insn "aarch64_simd_fmlalq_lane_highv4sf" 8086 [(set (match_operand:V4SF 0 "register_operand" "=w") 8087 (fma:V4SF 8088 (float_extend:V4SF 8089 (vec_select:V4HF 8090 (match_operand:V8HF 2 "register_operand" "w") 8091 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))) 8092 (float_extend:V4SF 8093 (vec_duplicate:V4HF 8094 (vec_select:HF 8095 (match_operand:V4HF 3 "register_operand" "x") 8096 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")])))) 8097 (match_operand:V4SF 1 "register_operand" "0")))] 8098 "TARGET_F16FML" 8099 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]" 8100 [(set_attr "type" "neon_fp_mul_s")] 8101) 8102 8103(define_insn "aarch64_simd_fmlslq_lane_highv4sf" 8104 [(set (match_operand:V4SF 0 "register_operand" "=w") 8105 (fma:V4SF 8106 (float_extend:V4SF 8107 (neg:V4HF 8108 (vec_select:V4HF 8109 (match_operand:V8HF 2 "register_operand" "w") 8110 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))) 8111 (float_extend:V4SF 8112 (vec_duplicate:V4HF 8113 (vec_select:HF 8114 (match_operand:V4HF 3 "register_operand" "x") 8115 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")])))) 8116 (match_operand:V4SF 1 "register_operand" "0")))] 8117 "TARGET_F16FML" 8118 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]" 8119 [(set_attr "type" "neon_fp_mul_s")] 8120) 8121 8122;; pmull 8123 8124(define_insn "aarch64_crypto_pmulldi" 8125 [(set (match_operand:TI 0 "register_operand" "=w") 8126 (unspec:TI [(match_operand:DI 1 "register_operand" "w") 8127 (match_operand:DI 2 "register_operand" "w")] 8128 UNSPEC_PMULL))] 8129 "TARGET_SIMD && TARGET_AES" 8130 "pmull\\t%0.1q, %1.1d, %2.1d" 8131 [(set_attr "type" "crypto_pmull")] 8132) 8133 8134(define_insn "aarch64_crypto_pmullv2di" 8135 [(set (match_operand:TI 0 "register_operand" "=w") 8136 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w") 8137 (match_operand:V2DI 2 "register_operand" "w")] 8138 UNSPEC_PMULL2))] 8139 "TARGET_SIMD && TARGET_AES" 8140 "pmull2\\t%0.1q, %1.2d, %2.2d" 8141 [(set_attr "type" "crypto_pmull")] 8142) 8143 8144;; Sign- or zero-extend a 64-bit integer vector to a 128-bit vector. 8145(define_insn "<optab><Vnarrowq><mode>2" 8146 [(set (match_operand:VQN 0 "register_operand" "=w") 8147 (ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))] 8148 "TARGET_SIMD" 8149 "<su>xtl\t%0.<Vtype>, %1.<Vntype>" 8150 [(set_attr "type" "neon_shift_imm_long")] 8151) 8152 8153(define_expand "aarch64_<su>xtl<mode>" 8154 [(set (match_operand:VQN 0 "register_operand" "=w") 8155 (ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))] 8156 "TARGET_SIMD" 8157 "" 8158) 8159 8160(define_expand "aarch64_xtn<mode>" 8161 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") 8162 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))] 8163 "TARGET_SIMD" 8164 "" 8165) 8166 8167;; Truncate a 128-bit integer vector to a 64-bit vector. 8168(define_insn "trunc<mode><Vnarrowq>2" 8169 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") 8170 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))] 8171 "TARGET_SIMD" 8172 "xtn\t%0.<Vntype>, %1.<Vtype>" 8173 [(set_attr "type" "neon_shift_imm_narrow_q")] 8174) 8175 8176(define_insn "aarch64_xtn2<mode>_le" 8177 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") 8178 (vec_concat:<VNARROWQ2> 8179 (match_operand:<VNARROWQ> 1 "register_operand" "0") 8180 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))] 8181 "TARGET_SIMD && !BYTES_BIG_ENDIAN" 8182 "xtn2\t%0.<V2ntype>, %2.<Vtype>" 8183 [(set_attr "type" "neon_shift_imm_narrow_q")] 8184) 8185 8186(define_insn "aarch64_xtn2<mode>_be" 8187 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") 8188 (vec_concat:<VNARROWQ2> 8189 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w")) 8190 (match_operand:<VNARROWQ> 1 "register_operand" "0")))] 8191 "TARGET_SIMD && BYTES_BIG_ENDIAN" 8192 "xtn2\t%0.<V2ntype>, %2.<Vtype>" 8193 [(set_attr "type" "neon_shift_imm_narrow_q")] 8194) 8195 8196(define_expand "aarch64_xtn2<mode>" 8197 [(match_operand:<VNARROWQ2> 0 "register_operand") 8198 (match_operand:<VNARROWQ> 1 "register_operand") 8199 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))] 8200 "TARGET_SIMD" 8201 { 8202 if (BYTES_BIG_ENDIAN) 8203 emit_insn (gen_aarch64_xtn2<mode>_be (operands[0], operands[1], 8204 operands[2])); 8205 else 8206 emit_insn (gen_aarch64_xtn2<mode>_le (operands[0], operands[1], 8207 operands[2])); 8208 DONE; 8209 } 8210) 8211 8212(define_insn "aarch64_bfdot<mode>" 8213 [(set (match_operand:VDQSF 0 "register_operand" "=w") 8214 (plus:VDQSF 8215 (unspec:VDQSF 8216 [(match_operand:<VBFMLA_W> 2 "register_operand" "w") 8217 (match_operand:<VBFMLA_W> 3 "register_operand" "w")] 8218 UNSPEC_BFDOT) 8219 (match_operand:VDQSF 1 "register_operand" "0")))] 8220 "TARGET_BF16_SIMD" 8221 "bfdot\t%0.<Vtype>, %2.<Vbfdottype>, %3.<Vbfdottype>" 8222 [(set_attr "type" "neon_dot<q>")] 8223) 8224 8225(define_insn "aarch64_bfdot_lane<VBF:isquadop><VDQSF:mode>" 8226 [(set (match_operand:VDQSF 0 "register_operand" "=w") 8227 (plus:VDQSF 8228 (unspec:VDQSF 8229 [(match_operand:<VDQSF:VBFMLA_W> 2 "register_operand" "w") 8230 (match_operand:VBF 3 "register_operand" "w") 8231 (match_operand:SI 4 "const_int_operand" "n")] 8232 UNSPEC_BFDOT) 8233 (match_operand:VDQSF 1 "register_operand" "0")))] 8234 "TARGET_BF16_SIMD" 8235{ 8236 int nunits = GET_MODE_NUNITS (<VBF:MODE>mode).to_constant (); 8237 int lane = INTVAL (operands[4]); 8238 operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 2, lane), SImode); 8239 return "bfdot\t%0.<VDQSF:Vtype>, %2.<VDQSF:Vbfdottype>, %3.2h[%4]"; 8240} 8241 [(set_attr "type" "neon_dot<VDQSF:q>")] 8242) 8243 8244;; vget_low/high_bf16 8245(define_expand "aarch64_vget_lo_halfv8bf" 8246 [(match_operand:V4BF 0 "register_operand") 8247 (match_operand:V8BF 1 "register_operand")] 8248 "TARGET_BF16_SIMD" 8249{ 8250 rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, false); 8251 emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p)); 8252 DONE; 8253}) 8254 8255(define_expand "aarch64_vget_hi_halfv8bf" 8256 [(match_operand:V4BF 0 "register_operand") 8257 (match_operand:V8BF 1 "register_operand")] 8258 "TARGET_BF16_SIMD" 8259{ 8260 rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, true); 8261 emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p)); 8262 DONE; 8263}) 8264 8265;; bfmmla 8266(define_insn "aarch64_bfmmlaqv4sf" 8267 [(set (match_operand:V4SF 0 "register_operand" "=w") 8268 (plus:V4SF (match_operand:V4SF 1 "register_operand" "0") 8269 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w") 8270 (match_operand:V8BF 3 "register_operand" "w")] 8271 UNSPEC_BFMMLA)))] 8272 "TARGET_BF16_SIMD" 8273 "bfmmla\\t%0.4s, %2.8h, %3.8h" 8274 [(set_attr "type" "neon_fp_mla_s_q")] 8275) 8276 8277;; bfmlal<bt> 8278(define_insn "aarch64_bfmlal<bt>v4sf" 8279 [(set (match_operand:V4SF 0 "register_operand" "=w") 8280 (plus: V4SF (match_operand:V4SF 1 "register_operand" "0") 8281 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w") 8282 (match_operand:V8BF 3 "register_operand" "w")] 8283 BF_MLA)))] 8284 "TARGET_BF16_SIMD" 8285 "bfmlal<bt>\\t%0.4s, %2.8h, %3.8h" 8286 [(set_attr "type" "neon_fp_mla_s_q")] 8287) 8288 8289(define_insn "aarch64_bfmlal<bt>_lane<q>v4sf" 8290 [(set (match_operand:V4SF 0 "register_operand" "=w") 8291 (plus: V4SF (match_operand:V4SF 1 "register_operand" "0") 8292 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w") 8293 (match_operand:VBF 3 "register_operand" "w") 8294 (match_operand:SI 4 "const_int_operand" "n")] 8295 BF_MLA)))] 8296 "TARGET_BF16_SIMD" 8297{ 8298 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4])); 8299 return "bfmlal<bt>\\t%0.4s, %2.8h, %3.h[%4]"; 8300} 8301 [(set_attr "type" "neon_fp_mla_s_scalar_q")] 8302) 8303 8304;; 8-bit integer matrix multiply-accumulate 8305(define_insn "aarch64_simd_<sur>mmlav16qi" 8306 [(set (match_operand:V4SI 0 "register_operand" "=w") 8307 (plus:V4SI 8308 (unspec:V4SI [(match_operand:V16QI 2 "register_operand" "w") 8309 (match_operand:V16QI 3 "register_operand" "w")] MATMUL) 8310 (match_operand:V4SI 1 "register_operand" "0")))] 8311 "TARGET_I8MM" 8312 "<sur>mmla\\t%0.4s, %2.16b, %3.16b" 8313 [(set_attr "type" "neon_mla_s_q")] 8314) 8315 8316;; bfcvtn 8317(define_insn "aarch64_bfcvtn<q><mode>" 8318 [(set (match_operand:V4SF_TO_BF 0 "register_operand" "=w") 8319 (unspec:V4SF_TO_BF [(match_operand:V4SF 1 "register_operand" "w")] 8320 UNSPEC_BFCVTN))] 8321 "TARGET_BF16_SIMD" 8322 "bfcvtn\\t%0.4h, %1.4s" 8323 [(set_attr "type" "neon_fp_cvt_narrow_s_q")] 8324) 8325 8326(define_insn "aarch64_bfcvtn2v8bf" 8327 [(set (match_operand:V8BF 0 "register_operand" "=w") 8328 (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "0") 8329 (match_operand:V4SF 2 "register_operand" "w")] 8330 UNSPEC_BFCVTN2))] 8331 "TARGET_BF16_SIMD" 8332 "bfcvtn2\\t%0.8h, %2.4s" 8333 [(set_attr "type" "neon_fp_cvt_narrow_s_q")] 8334) 8335 8336(define_insn "aarch64_bfcvtbf" 8337 [(set (match_operand:BF 0 "register_operand" "=w") 8338 (unspec:BF [(match_operand:SF 1 "register_operand" "w")] 8339 UNSPEC_BFCVT))] 8340 "TARGET_BF16_FP" 8341 "bfcvt\\t%h0, %s1" 8342 [(set_attr "type" "f_cvt")] 8343) 8344 8345;; Use shl/shll/shll2 to convert BF scalar/vector modes to SF modes. 8346(define_insn "aarch64_vbfcvt<mode>" 8347 [(set (match_operand:V4SF 0 "register_operand" "=w") 8348 (unspec:V4SF [(match_operand:VBF 1 "register_operand" "w")] 8349 UNSPEC_BFCVTN))] 8350 "TARGET_BF16_SIMD" 8351 "shll\\t%0.4s, %1.4h, #16" 8352 [(set_attr "type" "neon_shift_imm_long")] 8353) 8354 8355(define_insn "aarch64_vbfcvt_highv8bf" 8356 [(set (match_operand:V4SF 0 "register_operand" "=w") 8357 (unspec:V4SF [(match_operand:V8BF 1 "register_operand" "w")] 8358 UNSPEC_BFCVTN2))] 8359 "TARGET_BF16_SIMD" 8360 "shll2\\t%0.4s, %1.8h, #16" 8361 [(set_attr "type" "neon_shift_imm_long")] 8362) 8363 8364(define_insn "aarch64_bfcvtsf" 8365 [(set (match_operand:SF 0 "register_operand" "=w") 8366 (unspec:SF [(match_operand:BF 1 "register_operand" "w")] 8367 UNSPEC_BFCVT))] 8368 "TARGET_BF16_FP" 8369 "shl\\t%d0, %d1, #16" 8370 [(set_attr "type" "neon_shift_imm")] 8371) 8372