1;; Machine description for AArch64 AdvSIMD architecture. 2;; Copyright (C) 2011-2018 Free Software Foundation, Inc. 3;; Contributed by ARM Ltd. 4;; 5;; This file is part of GCC. 6;; 7;; GCC is free software; you can redistribute it and/or modify it 8;; under the terms of the GNU General Public License as published by 9;; the Free Software Foundation; either version 3, or (at your option) 10;; any later version. 11;; 12;; GCC is distributed in the hope that it will be useful, but 13;; WITHOUT ANY WARRANTY; without even the implied warranty of 14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15;; General Public License for more details. 16;; 17;; You should have received a copy of the GNU General Public License 18;; along with GCC; see the file COPYING3. If not see 19;; <http://www.gnu.org/licenses/>. 20 21(define_expand "mov<mode>" 22 [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "") 23 (match_operand:VALL_F16 1 "general_operand" ""))] 24 "TARGET_SIMD" 25 " 26 /* Force the operand into a register if it is not an 27 immediate whose use can be replaced with xzr. 28 If the mode is 16 bytes wide, then we will be doing 29 a stp in DI mode, so we check the validity of that. 30 If the mode is 8 bytes wide, then we will do doing a 31 normal str, so the check need not apply. */ 32 if (GET_CODE (operands[0]) == MEM 33 && !(aarch64_simd_imm_zero (operands[1], <MODE>mode) 34 && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16) 35 && aarch64_mem_pair_operand (operands[0], DImode)) 36 || known_eq (GET_MODE_SIZE (<MODE>mode), 8)))) 37 operands[1] = force_reg (<MODE>mode, operands[1]); 38 " 39) 40 41(define_expand "movmisalign<mode>" 42 [(set (match_operand:VALL 0 "nonimmediate_operand" "") 43 (match_operand:VALL 1 "general_operand" ""))] 44 "TARGET_SIMD" 45{ 46 /* This pattern is not permitted to fail during expansion: if both arguments 47 are non-registers (e.g. memory := constant, which can be created by the 48 auto-vectorizer), force operand 1 into a register. */ 49 if (!register_operand (operands[0], <MODE>mode) 50 && !register_operand (operands[1], <MODE>mode)) 51 operands[1] = force_reg (<MODE>mode, operands[1]); 52}) 53 54(define_insn "aarch64_simd_dup<mode>" 55 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w") 56 (vec_duplicate:VDQ_I 57 (match_operand:<VEL> 1 "register_operand" "w,?r")))] 58 "TARGET_SIMD" 59 "@ 60 dup\\t%0.<Vtype>, %1.<Vetype>[0] 61 dup\\t%0.<Vtype>, %<vw>1" 62 [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")] 63) 64 65(define_insn "aarch64_simd_dup<mode>" 66 [(set (match_operand:VDQF_F16 0 "register_operand" "=w") 67 (vec_duplicate:VDQF_F16 68 (match_operand:<VEL> 1 "register_operand" "w")))] 69 "TARGET_SIMD" 70 "dup\\t%0.<Vtype>, %1.<Vetype>[0]" 71 [(set_attr "type" "neon_dup<q>")] 72) 73 74(define_insn "aarch64_dup_lane<mode>" 75 [(set (match_operand:VALL_F16 0 "register_operand" "=w") 76 (vec_duplicate:VALL_F16 77 (vec_select:<VEL> 78 (match_operand:VALL_F16 1 "register_operand" "w") 79 (parallel [(match_operand:SI 2 "immediate_operand" "i")]) 80 )))] 81 "TARGET_SIMD" 82 { 83 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); 84 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]"; 85 } 86 [(set_attr "type" "neon_dup<q>")] 87) 88 89(define_insn "aarch64_dup_lane_<vswap_width_name><mode>" 90 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w") 91 (vec_duplicate:VALL_F16_NO_V2Q 92 (vec_select:<VEL> 93 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w") 94 (parallel [(match_operand:SI 2 "immediate_operand" "i")]) 95 )))] 96 "TARGET_SIMD" 97 { 98 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2])); 99 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]"; 100 } 101 [(set_attr "type" "neon_dup<q>")] 102) 103 104(define_insn "*aarch64_simd_mov<VD:mode>" 105 [(set (match_operand:VD 0 "nonimmediate_operand" 106 "=w, m, m, w, ?r, ?w, ?r, w") 107 (match_operand:VD 1 "general_operand" 108 "m, Dz, w, w, w, r, r, Dn"))] 109 "TARGET_SIMD 110 && (register_operand (operands[0], <MODE>mode) 111 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))" 112{ 113 switch (which_alternative) 114 { 115 case 0: return "ldr\t%d0, %1"; 116 case 1: return "str\txzr, %0"; 117 case 2: return "str\t%d1, %0"; 118 case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>"; 119 case 4: return "umov\t%0, %1.d[0]"; 120 case 5: return "fmov\t%d0, %1"; 121 case 6: return "mov\t%0, %1"; 122 case 7: 123 return aarch64_output_simd_mov_immediate (operands[1], 64); 124 default: gcc_unreachable (); 125 } 126} 127 [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\ 128 neon_logic<q>, neon_to_gp<q>, f_mcr,\ 129 mov_reg, neon_move<q>")] 130) 131 132(define_insn "*aarch64_simd_mov<VQ:mode>" 133 [(set (match_operand:VQ 0 "nonimmediate_operand" 134 "=w, Umq, m, w, ?r, ?w, ?r, w") 135 (match_operand:VQ 1 "general_operand" 136 "m, Dz, w, w, w, r, r, Dn"))] 137 "TARGET_SIMD 138 && (register_operand (operands[0], <MODE>mode) 139 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))" 140{ 141 switch (which_alternative) 142 { 143 case 0: 144 return "ldr\t%q0, %1"; 145 case 1: 146 return "stp\txzr, xzr, %0"; 147 case 2: 148 return "str\t%q1, %0"; 149 case 3: 150 return "mov\t%0.<Vbtype>, %1.<Vbtype>"; 151 case 4: 152 case 5: 153 case 6: 154 return "#"; 155 case 7: 156 return aarch64_output_simd_mov_immediate (operands[1], 128); 157 default: 158 gcc_unreachable (); 159 } 160} 161 [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\ 162 neon_logic<q>, multiple, multiple,\ 163 multiple, neon_move<q>") 164 (set_attr "length" "4,4,4,4,8,8,8,4")] 165) 166 167;; When storing lane zero we can use the normal STR and its more permissive 168;; addressing modes. 169 170(define_insn "aarch64_store_lane0<mode>" 171 [(set (match_operand:<VEL> 0 "memory_operand" "=m") 172 (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w") 173 (parallel [(match_operand 2 "const_int_operand" "n")])))] 174 "TARGET_SIMD 175 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0" 176 "str\\t%<Vetype>1, %0" 177 [(set_attr "type" "neon_store1_1reg<q>")] 178) 179 180(define_insn "load_pair<mode>" 181 [(set (match_operand:VD 0 "register_operand" "=w") 182 (match_operand:VD 1 "aarch64_mem_pair_operand" "Ump")) 183 (set (match_operand:VD 2 "register_operand" "=w") 184 (match_operand:VD 3 "memory_operand" "m"))] 185 "TARGET_SIMD 186 && rtx_equal_p (XEXP (operands[3], 0), 187 plus_constant (Pmode, 188 XEXP (operands[1], 0), 189 GET_MODE_SIZE (<MODE>mode)))" 190 "ldp\\t%d0, %d2, %1" 191 [(set_attr "type" "neon_ldp")] 192) 193 194(define_insn "store_pair<mode>" 195 [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump") 196 (match_operand:VD 1 "register_operand" "w")) 197 (set (match_operand:VD 2 "memory_operand" "=m") 198 (match_operand:VD 3 "register_operand" "w"))] 199 "TARGET_SIMD 200 && rtx_equal_p (XEXP (operands[2], 0), 201 plus_constant (Pmode, 202 XEXP (operands[0], 0), 203 GET_MODE_SIZE (<MODE>mode)))" 204 "stp\\t%d1, %d3, %0" 205 [(set_attr "type" "neon_stp")] 206) 207 208(define_split 209 [(set (match_operand:VQ 0 "register_operand" "") 210 (match_operand:VQ 1 "register_operand" ""))] 211 "TARGET_SIMD && reload_completed 212 && GP_REGNUM_P (REGNO (operands[0])) 213 && GP_REGNUM_P (REGNO (operands[1]))" 214 [(const_int 0)] 215{ 216 aarch64_simd_emit_reg_reg_move (operands, DImode, 2); 217 DONE; 218}) 219 220(define_split 221 [(set (match_operand:VQ 0 "register_operand" "") 222 (match_operand:VQ 1 "register_operand" ""))] 223 "TARGET_SIMD && reload_completed 224 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1]))) 225 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))" 226 [(const_int 0)] 227{ 228 aarch64_split_simd_move (operands[0], operands[1]); 229 DONE; 230}) 231 232(define_expand "aarch64_split_simd_mov<mode>" 233 [(set (match_operand:VQ 0) 234 (match_operand:VQ 1))] 235 "TARGET_SIMD" 236 { 237 rtx dst = operands[0]; 238 rtx src = operands[1]; 239 240 if (GP_REGNUM_P (REGNO (src))) 241 { 242 rtx src_low_part = gen_lowpart (<VHALF>mode, src); 243 rtx src_high_part = gen_highpart (<VHALF>mode, src); 244 245 emit_insn 246 (gen_move_lo_quad_<mode> (dst, src_low_part)); 247 emit_insn 248 (gen_move_hi_quad_<mode> (dst, src_high_part)); 249 } 250 251 else 252 { 253 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst); 254 rtx dst_high_part = gen_highpart (<VHALF>mode, dst); 255 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); 256 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 257 258 emit_insn 259 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo)); 260 emit_insn 261 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi)); 262 } 263 DONE; 264 } 265) 266 267(define_insn "aarch64_simd_mov_from_<mode>low" 268 [(set (match_operand:<VHALF> 0 "register_operand" "=r") 269 (vec_select:<VHALF> 270 (match_operand:VQ 1 "register_operand" "w") 271 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))] 272 "TARGET_SIMD && reload_completed" 273 "umov\t%0, %1.d[0]" 274 [(set_attr "type" "neon_to_gp<q>") 275 (set_attr "length" "4") 276 ]) 277 278(define_insn "aarch64_simd_mov_from_<mode>high" 279 [(set (match_operand:<VHALF> 0 "register_operand" "=r") 280 (vec_select:<VHALF> 281 (match_operand:VQ 1 "register_operand" "w") 282 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))] 283 "TARGET_SIMD && reload_completed" 284 "umov\t%0, %1.d[1]" 285 [(set_attr "type" "neon_to_gp<q>") 286 (set_attr "length" "4") 287 ]) 288 289(define_insn "orn<mode>3" 290 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 291 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")) 292 (match_operand:VDQ_I 2 "register_operand" "w")))] 293 "TARGET_SIMD" 294 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>" 295 [(set_attr "type" "neon_logic<q>")] 296) 297 298(define_insn "bic<mode>3" 299 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 300 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")) 301 (match_operand:VDQ_I 2 "register_operand" "w")))] 302 "TARGET_SIMD" 303 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>" 304 [(set_attr "type" "neon_logic<q>")] 305) 306 307(define_insn "add<mode>3" 308 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 309 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") 310 (match_operand:VDQ_I 2 "register_operand" "w")))] 311 "TARGET_SIMD" 312 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 313 [(set_attr "type" "neon_add<q>")] 314) 315 316(define_insn "sub<mode>3" 317 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 318 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") 319 (match_operand:VDQ_I 2 "register_operand" "w")))] 320 "TARGET_SIMD" 321 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 322 [(set_attr "type" "neon_sub<q>")] 323) 324 325(define_insn "mul<mode>3" 326 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 327 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w") 328 (match_operand:VDQ_BHSI 2 "register_operand" "w")))] 329 "TARGET_SIMD" 330 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 331 [(set_attr "type" "neon_mul_<Vetype><q>")] 332) 333 334(define_insn "bswap<mode>2" 335 [(set (match_operand:VDQHSD 0 "register_operand" "=w") 336 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))] 337 "TARGET_SIMD" 338 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>" 339 [(set_attr "type" "neon_rev<q>")] 340) 341 342(define_insn "aarch64_rbit<mode>" 343 [(set (match_operand:VB 0 "register_operand" "=w") 344 (unspec:VB [(match_operand:VB 1 "register_operand" "w")] 345 UNSPEC_RBIT))] 346 "TARGET_SIMD" 347 "rbit\\t%0.<Vbtype>, %1.<Vbtype>" 348 [(set_attr "type" "neon_rbit")] 349) 350 351(define_expand "ctz<mode>2" 352 [(set (match_operand:VS 0 "register_operand") 353 (ctz:VS (match_operand:VS 1 "register_operand")))] 354 "TARGET_SIMD" 355 { 356 emit_insn (gen_bswap<mode>2 (operands[0], operands[1])); 357 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0], 358 <MODE>mode, 0); 359 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi)); 360 emit_insn (gen_clz<mode>2 (operands[0], operands[0])); 361 DONE; 362 } 363) 364 365(define_expand "xorsign<mode>3" 366 [(match_operand:VHSDF 0 "register_operand") 367 (match_operand:VHSDF 1 "register_operand") 368 (match_operand:VHSDF 2 "register_operand")] 369 "TARGET_SIMD" 370{ 371 372 machine_mode imode = <V_INT_EQUIV>mode; 373 rtx v_bitmask = gen_reg_rtx (imode); 374 rtx op1x = gen_reg_rtx (imode); 375 rtx op2x = gen_reg_rtx (imode); 376 377 rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode); 378 rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode); 379 380 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1; 381 382 emit_move_insn (v_bitmask, 383 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode, 384 HOST_WIDE_INT_M1U << bits)); 385 386 emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2)); 387 emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x)); 388 emit_move_insn (operands[0], 389 lowpart_subreg (<MODE>mode, op1x, imode)); 390 DONE; 391} 392) 393 394;; These instructions map to the __builtins for the Dot Product operations. 395(define_insn "aarch64_<sur>dot<vsi2qi>" 396 [(set (match_operand:VS 0 "register_operand" "=w") 397 (plus:VS (match_operand:VS 1 "register_operand" "0") 398 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w") 399 (match_operand:<VSI2QI> 3 "register_operand" "w")] 400 DOTPROD)))] 401 "TARGET_DOTPROD" 402 "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>" 403 [(set_attr "type" "neon_dot")] 404) 405 406;; These expands map to the Dot Product optab the vectorizer checks for. 407;; The auto-vectorizer expects a dot product builtin that also does an 408;; accumulation into the provided register. 409;; Given the following pattern 410;; 411;; for (i=0; i<len; i++) { 412;; c = a[i] * b[i]; 413;; r += c; 414;; } 415;; return result; 416;; 417;; This can be auto-vectorized to 418;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3]; 419;; 420;; given enough iterations. However the vectorizer can keep unrolling the loop 421;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7]; 422;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11]; 423;; ... 424;; 425;; and so the vectorizer provides r, in which the result has to be accumulated. 426(define_expand "<sur>dot_prod<vsi2qi>" 427 [(set (match_operand:VS 0 "register_operand") 428 (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand") 429 (match_operand:<VSI2QI> 2 "register_operand")] 430 DOTPROD) 431 (match_operand:VS 3 "register_operand")))] 432 "TARGET_DOTPROD" 433{ 434 emit_insn ( 435 gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1], 436 operands[2])); 437 emit_insn (gen_rtx_SET (operands[0], operands[3])); 438 DONE; 439}) 440 441;; These instructions map to the __builtins for the Dot Product 442;; indexed operations. 443(define_insn "aarch64_<sur>dot_lane<vsi2qi>" 444 [(set (match_operand:VS 0 "register_operand" "=w") 445 (plus:VS (match_operand:VS 1 "register_operand" "0") 446 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w") 447 (match_operand:V8QI 3 "register_operand" "<h_con>") 448 (match_operand:SI 4 "immediate_operand" "i")] 449 DOTPROD)))] 450 "TARGET_DOTPROD" 451 { 452 operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4])); 453 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]"; 454 } 455 [(set_attr "type" "neon_dot")] 456) 457 458(define_insn "aarch64_<sur>dot_laneq<vsi2qi>" 459 [(set (match_operand:VS 0 "register_operand" "=w") 460 (plus:VS (match_operand:VS 1 "register_operand" "0") 461 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w") 462 (match_operand:V16QI 3 "register_operand" "<h_con>") 463 (match_operand:SI 4 "immediate_operand" "i")] 464 DOTPROD)))] 465 "TARGET_DOTPROD" 466 { 467 operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4])); 468 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]"; 469 } 470 [(set_attr "type" "neon_dot")] 471) 472 473(define_expand "copysign<mode>3" 474 [(match_operand:VHSDF 0 "register_operand") 475 (match_operand:VHSDF 1 "register_operand") 476 (match_operand:VHSDF 2 "register_operand")] 477 "TARGET_FLOAT && TARGET_SIMD" 478{ 479 rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode); 480 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1; 481 482 emit_move_insn (v_bitmask, 483 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode, 484 HOST_WIDE_INT_M1U << bits)); 485 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask, 486 operands[2], operands[1])); 487 DONE; 488} 489) 490 491(define_insn "*aarch64_mul3_elt<mode>" 492 [(set (match_operand:VMUL 0 "register_operand" "=w") 493 (mult:VMUL 494 (vec_duplicate:VMUL 495 (vec_select:<VEL> 496 (match_operand:VMUL 1 "register_operand" "<h_con>") 497 (parallel [(match_operand:SI 2 "immediate_operand")]))) 498 (match_operand:VMUL 3 "register_operand" "w")))] 499 "TARGET_SIMD" 500 { 501 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); 502 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]"; 503 } 504 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")] 505) 506 507(define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>" 508 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w") 509 (mult:VMUL_CHANGE_NLANES 510 (vec_duplicate:VMUL_CHANGE_NLANES 511 (vec_select:<VEL> 512 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>") 513 (parallel [(match_operand:SI 2 "immediate_operand")]))) 514 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))] 515 "TARGET_SIMD" 516 { 517 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2])); 518 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]"; 519 } 520 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")] 521) 522 523(define_insn "*aarch64_mul3_elt_from_dup<mode>" 524 [(set (match_operand:VMUL 0 "register_operand" "=w") 525 (mult:VMUL 526 (vec_duplicate:VMUL 527 (match_operand:<VEL> 1 "register_operand" "<h_con>")) 528 (match_operand:VMUL 2 "register_operand" "w")))] 529 "TARGET_SIMD" 530 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"; 531 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")] 532) 533 534(define_insn "aarch64_rsqrte<mode>" 535 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") 536 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")] 537 UNSPEC_RSQRTE))] 538 "TARGET_SIMD" 539 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>" 540 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")]) 541 542(define_insn "aarch64_rsqrts<mode>" 543 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") 544 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w") 545 (match_operand:VHSDF_HSDF 2 "register_operand" "w")] 546 UNSPEC_RSQRTS))] 547 "TARGET_SIMD" 548 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 549 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")]) 550 551(define_expand "rsqrt<mode>2" 552 [(set (match_operand:VALLF 0 "register_operand" "=w") 553 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")] 554 UNSPEC_RSQRT))] 555 "TARGET_SIMD" 556{ 557 aarch64_emit_approx_sqrt (operands[0], operands[1], true); 558 DONE; 559}) 560 561(define_insn "*aarch64_mul3_elt_to_64v2df" 562 [(set (match_operand:DF 0 "register_operand" "=w") 563 (mult:DF 564 (vec_select:DF 565 (match_operand:V2DF 1 "register_operand" "w") 566 (parallel [(match_operand:SI 2 "immediate_operand")])) 567 (match_operand:DF 3 "register_operand" "w")))] 568 "TARGET_SIMD" 569 { 570 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2])); 571 return "fmul\\t%0.2d, %3.2d, %1.d[%2]"; 572 } 573 [(set_attr "type" "neon_fp_mul_d_scalar_q")] 574) 575 576(define_insn "neg<mode>2" 577 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 578 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))] 579 "TARGET_SIMD" 580 "neg\t%0.<Vtype>, %1.<Vtype>" 581 [(set_attr "type" "neon_neg<q>")] 582) 583 584(define_insn "abs<mode>2" 585 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 586 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))] 587 "TARGET_SIMD" 588 "abs\t%0.<Vtype>, %1.<Vtype>" 589 [(set_attr "type" "neon_abs<q>")] 590) 591 592;; The intrinsic version of integer ABS must not be allowed to 593;; combine with any operation with an integerated ABS step, such 594;; as SABD. 595(define_insn "aarch64_abs<mode>" 596 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w") 597 (unspec:VSDQ_I_DI 598 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")] 599 UNSPEC_ABS))] 600 "TARGET_SIMD" 601 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>" 602 [(set_attr "type" "neon_abs<q>")] 603) 604 605(define_insn "abd<mode>_3" 606 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 607 (abs:VDQ_BHSI (minus:VDQ_BHSI 608 (match_operand:VDQ_BHSI 1 "register_operand" "w") 609 (match_operand:VDQ_BHSI 2 "register_operand" "w"))))] 610 "TARGET_SIMD" 611 "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 612 [(set_attr "type" "neon_abd<q>")] 613) 614 615(define_insn "aba<mode>_3" 616 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 617 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI 618 (match_operand:VDQ_BHSI 1 "register_operand" "w") 619 (match_operand:VDQ_BHSI 2 "register_operand" "w"))) 620 (match_operand:VDQ_BHSI 3 "register_operand" "0")))] 621 "TARGET_SIMD" 622 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 623 [(set_attr "type" "neon_arith_acc<q>")] 624) 625 626(define_insn "fabd<mode>3" 627 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") 628 (abs:VHSDF_HSDF 629 (minus:VHSDF_HSDF 630 (match_operand:VHSDF_HSDF 1 "register_operand" "w") 631 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))] 632 "TARGET_SIMD" 633 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 634 [(set_attr "type" "neon_fp_abd_<stype><q>")] 635) 636 637;; For AND (vector, register) and BIC (vector, immediate) 638(define_insn "and<mode>3" 639 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w") 640 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0") 641 (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))] 642 "TARGET_SIMD" 643 { 644 switch (which_alternative) 645 { 646 case 0: 647 return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"; 648 case 1: 649 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>, 650 AARCH64_CHECK_BIC); 651 default: 652 gcc_unreachable (); 653 } 654 } 655 [(set_attr "type" "neon_logic<q>")] 656) 657 658;; For ORR (vector, register) and ORR (vector, immediate) 659(define_insn "ior<mode>3" 660 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w") 661 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0") 662 (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))] 663 "TARGET_SIMD" 664 { 665 switch (which_alternative) 666 { 667 case 0: 668 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"; 669 case 1: 670 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>, 671 AARCH64_CHECK_ORR); 672 default: 673 gcc_unreachable (); 674 } 675 } 676 [(set_attr "type" "neon_logic<q>")] 677) 678 679(define_insn "xor<mode>3" 680 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 681 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") 682 (match_operand:VDQ_I 2 "register_operand" "w")))] 683 "TARGET_SIMD" 684 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>" 685 [(set_attr "type" "neon_logic<q>")] 686) 687 688(define_insn "one_cmpl<mode>2" 689 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 690 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))] 691 "TARGET_SIMD" 692 "not\t%0.<Vbtype>, %1.<Vbtype>" 693 [(set_attr "type" "neon_logic<q>")] 694) 695 696(define_insn "aarch64_simd_vec_set<mode>" 697 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w,w,w") 698 (vec_merge:VDQ_BHSI 699 (vec_duplicate:VDQ_BHSI 700 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "r,w,Utv")) 701 (match_operand:VDQ_BHSI 3 "register_operand" "0,0,0") 702 (match_operand:SI 2 "immediate_operand" "i,i,i")))] 703 "TARGET_SIMD" 704 { 705 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2]))); 706 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt); 707 switch (which_alternative) 708 { 709 case 0: 710 return "ins\\t%0.<Vetype>[%p2], %w1"; 711 case 1: 712 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]"; 713 case 2: 714 return "ld1\\t{%0.<Vetype>}[%p2], %1"; 715 default: 716 gcc_unreachable (); 717 } 718 } 719 [(set_attr "type" "neon_from_gp<q>, neon_ins<q>, neon_load1_one_lane<q>")] 720) 721 722(define_insn "*aarch64_simd_vec_copy_lane<mode>" 723 [(set (match_operand:VALL_F16 0 "register_operand" "=w") 724 (vec_merge:VALL_F16 725 (vec_duplicate:VALL_F16 726 (vec_select:<VEL> 727 (match_operand:VALL_F16 3 "register_operand" "w") 728 (parallel 729 [(match_operand:SI 4 "immediate_operand" "i")]))) 730 (match_operand:VALL_F16 1 "register_operand" "0") 731 (match_operand:SI 2 "immediate_operand" "i")))] 732 "TARGET_SIMD" 733 { 734 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2]))); 735 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt); 736 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4])); 737 738 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]"; 739 } 740 [(set_attr "type" "neon_ins<q>")] 741) 742 743(define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>" 744 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w") 745 (vec_merge:VALL_F16_NO_V2Q 746 (vec_duplicate:VALL_F16_NO_V2Q 747 (vec_select:<VEL> 748 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w") 749 (parallel 750 [(match_operand:SI 4 "immediate_operand" "i")]))) 751 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0") 752 (match_operand:SI 2 "immediate_operand" "i")))] 753 "TARGET_SIMD" 754 { 755 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2]))); 756 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt); 757 operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, 758 INTVAL (operands[4])); 759 760 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]"; 761 } 762 [(set_attr "type" "neon_ins<q>")] 763) 764 765(define_insn "aarch64_simd_lshr<mode>" 766 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 767 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") 768 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))] 769 "TARGET_SIMD" 770 "ushr\t%0.<Vtype>, %1.<Vtype>, %2" 771 [(set_attr "type" "neon_shift_imm<q>")] 772) 773 774(define_insn "aarch64_simd_ashr<mode>" 775 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 776 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") 777 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))] 778 "TARGET_SIMD" 779 "sshr\t%0.<Vtype>, %1.<Vtype>, %2" 780 [(set_attr "type" "neon_shift_imm<q>")] 781) 782 783(define_insn "aarch64_simd_imm_shl<mode>" 784 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 785 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") 786 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))] 787 "TARGET_SIMD" 788 "shl\t%0.<Vtype>, %1.<Vtype>, %2" 789 [(set_attr "type" "neon_shift_imm<q>")] 790) 791 792(define_insn "aarch64_simd_reg_sshl<mode>" 793 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 794 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") 795 (match_operand:VDQ_I 2 "register_operand" "w")))] 796 "TARGET_SIMD" 797 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 798 [(set_attr "type" "neon_shift_reg<q>")] 799) 800 801(define_insn "aarch64_simd_reg_shl<mode>_unsigned" 802 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 803 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w") 804 (match_operand:VDQ_I 2 "register_operand" "w")] 805 UNSPEC_ASHIFT_UNSIGNED))] 806 "TARGET_SIMD" 807 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 808 [(set_attr "type" "neon_shift_reg<q>")] 809) 810 811(define_insn "aarch64_simd_reg_shl<mode>_signed" 812 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 813 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w") 814 (match_operand:VDQ_I 2 "register_operand" "w")] 815 UNSPEC_ASHIFT_SIGNED))] 816 "TARGET_SIMD" 817 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 818 [(set_attr "type" "neon_shift_reg<q>")] 819) 820 821(define_expand "ashl<mode>3" 822 [(match_operand:VDQ_I 0 "register_operand" "") 823 (match_operand:VDQ_I 1 "register_operand" "") 824 (match_operand:SI 2 "general_operand" "")] 825 "TARGET_SIMD" 826{ 827 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT; 828 int shift_amount; 829 830 if (CONST_INT_P (operands[2])) 831 { 832 shift_amount = INTVAL (operands[2]); 833 if (shift_amount >= 0 && shift_amount < bit_width) 834 { 835 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode, 836 shift_amount); 837 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0], 838 operands[1], 839 tmp)); 840 DONE; 841 } 842 else 843 { 844 operands[2] = force_reg (SImode, operands[2]); 845 } 846 } 847 else if (MEM_P (operands[2])) 848 { 849 operands[2] = force_reg (SImode, operands[2]); 850 } 851 852 if (REG_P (operands[2])) 853 { 854 rtx tmp = gen_reg_rtx (<MODE>mode); 855 emit_insn (gen_aarch64_simd_dup<mode> (tmp, 856 convert_to_mode (<VEL>mode, 857 operands[2], 858 0))); 859 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1], 860 tmp)); 861 DONE; 862 } 863 else 864 FAIL; 865} 866) 867 868(define_expand "lshr<mode>3" 869 [(match_operand:VDQ_I 0 "register_operand" "") 870 (match_operand:VDQ_I 1 "register_operand" "") 871 (match_operand:SI 2 "general_operand" "")] 872 "TARGET_SIMD" 873{ 874 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT; 875 int shift_amount; 876 877 if (CONST_INT_P (operands[2])) 878 { 879 shift_amount = INTVAL (operands[2]); 880 if (shift_amount > 0 && shift_amount <= bit_width) 881 { 882 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode, 883 shift_amount); 884 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0], 885 operands[1], 886 tmp)); 887 DONE; 888 } 889 else 890 operands[2] = force_reg (SImode, operands[2]); 891 } 892 else if (MEM_P (operands[2])) 893 { 894 operands[2] = force_reg (SImode, operands[2]); 895 } 896 897 if (REG_P (operands[2])) 898 { 899 rtx tmp = gen_reg_rtx (SImode); 900 rtx tmp1 = gen_reg_rtx (<MODE>mode); 901 emit_insn (gen_negsi2 (tmp, operands[2])); 902 emit_insn (gen_aarch64_simd_dup<mode> (tmp1, 903 convert_to_mode (<VEL>mode, 904 tmp, 0))); 905 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], 906 operands[1], 907 tmp1)); 908 DONE; 909 } 910 else 911 FAIL; 912} 913) 914 915(define_expand "ashr<mode>3" 916 [(match_operand:VDQ_I 0 "register_operand" "") 917 (match_operand:VDQ_I 1 "register_operand" "") 918 (match_operand:SI 2 "general_operand" "")] 919 "TARGET_SIMD" 920{ 921 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT; 922 int shift_amount; 923 924 if (CONST_INT_P (operands[2])) 925 { 926 shift_amount = INTVAL (operands[2]); 927 if (shift_amount > 0 && shift_amount <= bit_width) 928 { 929 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode, 930 shift_amount); 931 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0], 932 operands[1], 933 tmp)); 934 DONE; 935 } 936 else 937 operands[2] = force_reg (SImode, operands[2]); 938 } 939 else if (MEM_P (operands[2])) 940 { 941 operands[2] = force_reg (SImode, operands[2]); 942 } 943 944 if (REG_P (operands[2])) 945 { 946 rtx tmp = gen_reg_rtx (SImode); 947 rtx tmp1 = gen_reg_rtx (<MODE>mode); 948 emit_insn (gen_negsi2 (tmp, operands[2])); 949 emit_insn (gen_aarch64_simd_dup<mode> (tmp1, 950 convert_to_mode (<VEL>mode, 951 tmp, 0))); 952 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], 953 operands[1], 954 tmp1)); 955 DONE; 956 } 957 else 958 FAIL; 959} 960) 961 962(define_expand "vashl<mode>3" 963 [(match_operand:VDQ_I 0 "register_operand" "") 964 (match_operand:VDQ_I 1 "register_operand" "") 965 (match_operand:VDQ_I 2 "register_operand" "")] 966 "TARGET_SIMD" 967{ 968 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1], 969 operands[2])); 970 DONE; 971}) 972 973;; Using mode VDQ_BHSI as there is no V2DImode neg! 974;; Negating individual lanes most certainly offsets the 975;; gain from vectorization. 976(define_expand "vashr<mode>3" 977 [(match_operand:VDQ_BHSI 0 "register_operand" "") 978 (match_operand:VDQ_BHSI 1 "register_operand" "") 979 (match_operand:VDQ_BHSI 2 "register_operand" "")] 980 "TARGET_SIMD" 981{ 982 rtx neg = gen_reg_rtx (<MODE>mode); 983 emit (gen_neg<mode>2 (neg, operands[2])); 984 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1], 985 neg)); 986 DONE; 987}) 988 989;; DI vector shift 990(define_expand "aarch64_ashr_simddi" 991 [(match_operand:DI 0 "register_operand" "=w") 992 (match_operand:DI 1 "register_operand" "w") 993 (match_operand:SI 2 "aarch64_shift_imm64_di" "")] 994 "TARGET_SIMD" 995 { 996 /* An arithmetic shift right by 64 fills the result with copies of the sign 997 bit, just like asr by 63 - however the standard pattern does not handle 998 a shift by 64. */ 999 if (INTVAL (operands[2]) == 64) 1000 operands[2] = GEN_INT (63); 1001 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2])); 1002 DONE; 1003 } 1004) 1005 1006(define_expand "vlshr<mode>3" 1007 [(match_operand:VDQ_BHSI 0 "register_operand" "") 1008 (match_operand:VDQ_BHSI 1 "register_operand" "") 1009 (match_operand:VDQ_BHSI 2 "register_operand" "")] 1010 "TARGET_SIMD" 1011{ 1012 rtx neg = gen_reg_rtx (<MODE>mode); 1013 emit (gen_neg<mode>2 (neg, operands[2])); 1014 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1], 1015 neg)); 1016 DONE; 1017}) 1018 1019(define_expand "aarch64_lshr_simddi" 1020 [(match_operand:DI 0 "register_operand" "=w") 1021 (match_operand:DI 1 "register_operand" "w") 1022 (match_operand:SI 2 "aarch64_shift_imm64_di" "")] 1023 "TARGET_SIMD" 1024 { 1025 if (INTVAL (operands[2]) == 64) 1026 emit_move_insn (operands[0], const0_rtx); 1027 else 1028 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2])); 1029 DONE; 1030 } 1031) 1032 1033(define_expand "vec_set<mode>" 1034 [(match_operand:VDQ_BHSI 0 "register_operand") 1035 (match_operand:<VEL> 1 "register_operand") 1036 (match_operand:SI 2 "immediate_operand")] 1037 "TARGET_SIMD" 1038 { 1039 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]); 1040 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1], 1041 GEN_INT (elem), operands[0])); 1042 DONE; 1043 } 1044) 1045 1046;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero. 1047(define_insn "vec_shr_<mode>" 1048 [(set (match_operand:VD 0 "register_operand" "=w") 1049 (unspec:VD [(match_operand:VD 1 "register_operand" "w") 1050 (match_operand:SI 2 "immediate_operand" "i")] 1051 UNSPEC_VEC_SHR))] 1052 "TARGET_SIMD" 1053 { 1054 if (BYTES_BIG_ENDIAN) 1055 return "shl %d0, %d1, %2"; 1056 else 1057 return "ushr %d0, %d1, %2"; 1058 } 1059 [(set_attr "type" "neon_shift_imm")] 1060) 1061 1062(define_insn "aarch64_simd_vec_setv2di" 1063 [(set (match_operand:V2DI 0 "register_operand" "=w,w") 1064 (vec_merge:V2DI 1065 (vec_duplicate:V2DI 1066 (match_operand:DI 1 "register_operand" "r,w")) 1067 (match_operand:V2DI 3 "register_operand" "0,0") 1068 (match_operand:SI 2 "immediate_operand" "i,i")))] 1069 "TARGET_SIMD" 1070 { 1071 int elt = ENDIAN_LANE_N (2, exact_log2 (INTVAL (operands[2]))); 1072 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt); 1073 switch (which_alternative) 1074 { 1075 case 0: 1076 return "ins\\t%0.d[%p2], %1"; 1077 case 1: 1078 return "ins\\t%0.d[%p2], %1.d[0]"; 1079 default: 1080 gcc_unreachable (); 1081 } 1082 } 1083 [(set_attr "type" "neon_from_gp, neon_ins_q")] 1084) 1085 1086(define_expand "vec_setv2di" 1087 [(match_operand:V2DI 0 "register_operand") 1088 (match_operand:DI 1 "register_operand") 1089 (match_operand:SI 2 "immediate_operand")] 1090 "TARGET_SIMD" 1091 { 1092 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]); 1093 emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1], 1094 GEN_INT (elem), operands[0])); 1095 DONE; 1096 } 1097) 1098 1099(define_insn "aarch64_simd_vec_set<mode>" 1100 [(set (match_operand:VDQF_F16 0 "register_operand" "=w") 1101 (vec_merge:VDQF_F16 1102 (vec_duplicate:VDQF_F16 1103 (match_operand:<VEL> 1 "register_operand" "w")) 1104 (match_operand:VDQF_F16 3 "register_operand" "0") 1105 (match_operand:SI 2 "immediate_operand" "i")))] 1106 "TARGET_SIMD" 1107 { 1108 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2]))); 1109 1110 operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt); 1111 return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]"; 1112 } 1113 [(set_attr "type" "neon_ins<q>")] 1114) 1115 1116(define_expand "vec_set<mode>" 1117 [(match_operand:VDQF_F16 0 "register_operand" "+w") 1118 (match_operand:<VEL> 1 "register_operand" "w") 1119 (match_operand:SI 2 "immediate_operand" "")] 1120 "TARGET_SIMD" 1121 { 1122 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]); 1123 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1], 1124 GEN_INT (elem), operands[0])); 1125 DONE; 1126 } 1127) 1128 1129 1130(define_insn "aarch64_mla<mode>" 1131 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 1132 (plus:VDQ_BHSI (mult:VDQ_BHSI 1133 (match_operand:VDQ_BHSI 2 "register_operand" "w") 1134 (match_operand:VDQ_BHSI 3 "register_operand" "w")) 1135 (match_operand:VDQ_BHSI 1 "register_operand" "0")))] 1136 "TARGET_SIMD" 1137 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>" 1138 [(set_attr "type" "neon_mla_<Vetype><q>")] 1139) 1140 1141(define_insn "*aarch64_mla_elt<mode>" 1142 [(set (match_operand:VDQHS 0 "register_operand" "=w") 1143 (plus:VDQHS 1144 (mult:VDQHS 1145 (vec_duplicate:VDQHS 1146 (vec_select:<VEL> 1147 (match_operand:VDQHS 1 "register_operand" "<h_con>") 1148 (parallel [(match_operand:SI 2 "immediate_operand")]))) 1149 (match_operand:VDQHS 3 "register_operand" "w")) 1150 (match_operand:VDQHS 4 "register_operand" "0")))] 1151 "TARGET_SIMD" 1152 { 1153 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); 1154 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; 1155 } 1156 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] 1157) 1158 1159(define_insn "*aarch64_mla_elt_<vswap_width_name><mode>" 1160 [(set (match_operand:VDQHS 0 "register_operand" "=w") 1161 (plus:VDQHS 1162 (mult:VDQHS 1163 (vec_duplicate:VDQHS 1164 (vec_select:<VEL> 1165 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>") 1166 (parallel [(match_operand:SI 2 "immediate_operand")]))) 1167 (match_operand:VDQHS 3 "register_operand" "w")) 1168 (match_operand:VDQHS 4 "register_operand" "0")))] 1169 "TARGET_SIMD" 1170 { 1171 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2])); 1172 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; 1173 } 1174 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] 1175) 1176 1177(define_insn "*aarch64_mla_elt_merge<mode>" 1178 [(set (match_operand:VDQHS 0 "register_operand" "=w") 1179 (plus:VDQHS 1180 (mult:VDQHS (vec_duplicate:VDQHS 1181 (match_operand:<VEL> 1 "register_operand" "<h_con>")) 1182 (match_operand:VDQHS 2 "register_operand" "w")) 1183 (match_operand:VDQHS 3 "register_operand" "0")))] 1184 "TARGET_SIMD" 1185 "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]" 1186 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] 1187) 1188 1189(define_insn "aarch64_mls<mode>" 1190 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 1191 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0") 1192 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w") 1193 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))] 1194 "TARGET_SIMD" 1195 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>" 1196 [(set_attr "type" "neon_mla_<Vetype><q>")] 1197) 1198 1199(define_insn "*aarch64_mls_elt<mode>" 1200 [(set (match_operand:VDQHS 0 "register_operand" "=w") 1201 (minus:VDQHS 1202 (match_operand:VDQHS 4 "register_operand" "0") 1203 (mult:VDQHS 1204 (vec_duplicate:VDQHS 1205 (vec_select:<VEL> 1206 (match_operand:VDQHS 1 "register_operand" "<h_con>") 1207 (parallel [(match_operand:SI 2 "immediate_operand")]))) 1208 (match_operand:VDQHS 3 "register_operand" "w"))))] 1209 "TARGET_SIMD" 1210 { 1211 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); 1212 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; 1213 } 1214 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] 1215) 1216 1217(define_insn "*aarch64_mls_elt_<vswap_width_name><mode>" 1218 [(set (match_operand:VDQHS 0 "register_operand" "=w") 1219 (minus:VDQHS 1220 (match_operand:VDQHS 4 "register_operand" "0") 1221 (mult:VDQHS 1222 (vec_duplicate:VDQHS 1223 (vec_select:<VEL> 1224 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>") 1225 (parallel [(match_operand:SI 2 "immediate_operand")]))) 1226 (match_operand:VDQHS 3 "register_operand" "w"))))] 1227 "TARGET_SIMD" 1228 { 1229 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2])); 1230 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; 1231 } 1232 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] 1233) 1234 1235(define_insn "*aarch64_mls_elt_merge<mode>" 1236 [(set (match_operand:VDQHS 0 "register_operand" "=w") 1237 (minus:VDQHS 1238 (match_operand:VDQHS 1 "register_operand" "0") 1239 (mult:VDQHS (vec_duplicate:VDQHS 1240 (match_operand:<VEL> 2 "register_operand" "<h_con>")) 1241 (match_operand:VDQHS 3 "register_operand" "w"))))] 1242 "TARGET_SIMD" 1243 "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]" 1244 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] 1245) 1246 1247;; Max/Min operations. 1248(define_insn "<su><maxmin><mode>3" 1249 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 1250 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w") 1251 (match_operand:VDQ_BHSI 2 "register_operand" "w")))] 1252 "TARGET_SIMD" 1253 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 1254 [(set_attr "type" "neon_minmax<q>")] 1255) 1256 1257(define_expand "<su><maxmin>v2di3" 1258 [(set (match_operand:V2DI 0 "register_operand" "") 1259 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "") 1260 (match_operand:V2DI 2 "register_operand" "")))] 1261 "TARGET_SIMD" 1262{ 1263 enum rtx_code cmp_operator; 1264 rtx cmp_fmt; 1265 1266 switch (<CODE>) 1267 { 1268 case UMIN: 1269 cmp_operator = LTU; 1270 break; 1271 case SMIN: 1272 cmp_operator = LT; 1273 break; 1274 case UMAX: 1275 cmp_operator = GTU; 1276 break; 1277 case SMAX: 1278 cmp_operator = GT; 1279 break; 1280 default: 1281 gcc_unreachable (); 1282 } 1283 1284 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]); 1285 emit_insn (gen_vcondv2div2di (operands[0], operands[1], 1286 operands[2], cmp_fmt, operands[1], operands[2])); 1287 DONE; 1288}) 1289 1290;; Pairwise Integer Max/Min operations. 1291(define_insn "aarch64_<maxmin_uns>p<mode>" 1292 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 1293 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w") 1294 (match_operand:VDQ_BHSI 2 "register_operand" "w")] 1295 MAXMINV))] 1296 "TARGET_SIMD" 1297 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 1298 [(set_attr "type" "neon_minmax<q>")] 1299) 1300 1301;; Pairwise FP Max/Min operations. 1302(define_insn "aarch64_<maxmin_uns>p<mode>" 1303 [(set (match_operand:VHSDF 0 "register_operand" "=w") 1304 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w") 1305 (match_operand:VHSDF 2 "register_operand" "w")] 1306 FMAXMINV))] 1307 "TARGET_SIMD" 1308 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 1309 [(set_attr "type" "neon_minmax<q>")] 1310) 1311 1312;; vec_concat gives a new vector with the low elements from operand 1, and 1313;; the high elements from operand 2. That is to say, given op1 = { a, b } 1314;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }. 1315;; What that means, is that the RTL descriptions of the below patterns 1316;; need to change depending on endianness. 1317 1318;; Move to the low architectural bits of the register. 1319;; On little-endian this is { operand, zeroes } 1320;; On big-endian this is { zeroes, operand } 1321 1322(define_insn "move_lo_quad_internal_<mode>" 1323 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w") 1324 (vec_concat:VQ_NO2E 1325 (match_operand:<VHALF> 1 "register_operand" "w,r,r") 1326 (vec_duplicate:<VHALF> (const_int 0))))] 1327 "TARGET_SIMD && !BYTES_BIG_ENDIAN" 1328 "@ 1329 dup\\t%d0, %1.d[0] 1330 fmov\\t%d0, %1 1331 dup\\t%d0, %1" 1332 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>") 1333 (set_attr "simd" "yes,*,yes") 1334 (set_attr "fp" "*,yes,*") 1335 (set_attr "length" "4")] 1336) 1337 1338(define_insn "move_lo_quad_internal_<mode>" 1339 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w") 1340 (vec_concat:VQ_2E 1341 (match_operand:<VHALF> 1 "register_operand" "w,r,r") 1342 (const_int 0)))] 1343 "TARGET_SIMD && !BYTES_BIG_ENDIAN" 1344 "@ 1345 dup\\t%d0, %1.d[0] 1346 fmov\\t%d0, %1 1347 dup\\t%d0, %1" 1348 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>") 1349 (set_attr "simd" "yes,*,yes") 1350 (set_attr "fp" "*,yes,*") 1351 (set_attr "length" "4")] 1352) 1353 1354(define_insn "move_lo_quad_internal_be_<mode>" 1355 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w") 1356 (vec_concat:VQ_NO2E 1357 (vec_duplicate:<VHALF> (const_int 0)) 1358 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))] 1359 "TARGET_SIMD && BYTES_BIG_ENDIAN" 1360 "@ 1361 dup\\t%d0, %1.d[0] 1362 fmov\\t%d0, %1 1363 dup\\t%d0, %1" 1364 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>") 1365 (set_attr "simd" "yes,*,yes") 1366 (set_attr "fp" "*,yes,*") 1367 (set_attr "length" "4")] 1368) 1369 1370(define_insn "move_lo_quad_internal_be_<mode>" 1371 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w") 1372 (vec_concat:VQ_2E 1373 (const_int 0) 1374 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))] 1375 "TARGET_SIMD && BYTES_BIG_ENDIAN" 1376 "@ 1377 dup\\t%d0, %1.d[0] 1378 fmov\\t%d0, %1 1379 dup\\t%d0, %1" 1380 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>") 1381 (set_attr "simd" "yes,*,yes") 1382 (set_attr "fp" "*,yes,*") 1383 (set_attr "length" "4")] 1384) 1385 1386(define_expand "move_lo_quad_<mode>" 1387 [(match_operand:VQ 0 "register_operand") 1388 (match_operand:VQ 1 "register_operand")] 1389 "TARGET_SIMD" 1390{ 1391 if (BYTES_BIG_ENDIAN) 1392 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1])); 1393 else 1394 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1])); 1395 DONE; 1396} 1397) 1398 1399;; Move operand1 to the high architectural bits of the register, keeping 1400;; the low architectural bits of operand2. 1401;; For little-endian this is { operand2, operand1 } 1402;; For big-endian this is { operand1, operand2 } 1403 1404(define_insn "aarch64_simd_move_hi_quad_<mode>" 1405 [(set (match_operand:VQ 0 "register_operand" "+w,w") 1406 (vec_concat:VQ 1407 (vec_select:<VHALF> 1408 (match_dup 0) 1409 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")) 1410 (match_operand:<VHALF> 1 "register_operand" "w,r")))] 1411 "TARGET_SIMD && !BYTES_BIG_ENDIAN" 1412 "@ 1413 ins\\t%0.d[1], %1.d[0] 1414 ins\\t%0.d[1], %1" 1415 [(set_attr "type" "neon_ins")] 1416) 1417 1418(define_insn "aarch64_simd_move_hi_quad_be_<mode>" 1419 [(set (match_operand:VQ 0 "register_operand" "+w,w") 1420 (vec_concat:VQ 1421 (match_operand:<VHALF> 1 "register_operand" "w,r") 1422 (vec_select:<VHALF> 1423 (match_dup 0) 1424 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))] 1425 "TARGET_SIMD && BYTES_BIG_ENDIAN" 1426 "@ 1427 ins\\t%0.d[1], %1.d[0] 1428 ins\\t%0.d[1], %1" 1429 [(set_attr "type" "neon_ins")] 1430) 1431 1432(define_expand "move_hi_quad_<mode>" 1433 [(match_operand:VQ 0 "register_operand" "") 1434 (match_operand:<VHALF> 1 "register_operand" "")] 1435 "TARGET_SIMD" 1436{ 1437 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); 1438 if (BYTES_BIG_ENDIAN) 1439 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0], 1440 operands[1], p)); 1441 else 1442 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0], 1443 operands[1], p)); 1444 DONE; 1445}) 1446 1447;; Narrowing operations. 1448 1449;; For doubles. 1450(define_insn "aarch64_simd_vec_pack_trunc_<mode>" 1451 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") 1452 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))] 1453 "TARGET_SIMD" 1454 "xtn\\t%0.<Vntype>, %1.<Vtype>" 1455 [(set_attr "type" "neon_shift_imm_narrow_q")] 1456) 1457 1458(define_expand "vec_pack_trunc_<mode>" 1459 [(match_operand:<VNARROWD> 0 "register_operand" "") 1460 (match_operand:VDN 1 "register_operand" "") 1461 (match_operand:VDN 2 "register_operand" "")] 1462 "TARGET_SIMD" 1463{ 1464 rtx tempreg = gen_reg_rtx (<VDBL>mode); 1465 int lo = BYTES_BIG_ENDIAN ? 2 : 1; 1466 int hi = BYTES_BIG_ENDIAN ? 1 : 2; 1467 1468 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo])); 1469 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi])); 1470 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg)); 1471 DONE; 1472}) 1473 1474;; For quads. 1475 1476(define_insn "vec_pack_trunc_<mode>" 1477 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w") 1478 (vec_concat:<VNARROWQ2> 1479 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")) 1480 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))] 1481 "TARGET_SIMD" 1482 { 1483 if (BYTES_BIG_ENDIAN) 1484 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>"; 1485 else 1486 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>"; 1487 } 1488 [(set_attr "type" "multiple") 1489 (set_attr "length" "8")] 1490) 1491 1492;; Widening operations. 1493 1494(define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>" 1495 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1496 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1497 (match_operand:VQW 1 "register_operand" "w") 1498 (match_operand:VQW 2 "vect_par_cnst_lo_half" "") 1499 )))] 1500 "TARGET_SIMD" 1501 "<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0" 1502 [(set_attr "type" "neon_shift_imm_long")] 1503) 1504 1505(define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>" 1506 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1507 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1508 (match_operand:VQW 1 "register_operand" "w") 1509 (match_operand:VQW 2 "vect_par_cnst_hi_half" "") 1510 )))] 1511 "TARGET_SIMD" 1512 "<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0" 1513 [(set_attr "type" "neon_shift_imm_long")] 1514) 1515 1516(define_expand "vec_unpack<su>_hi_<mode>" 1517 [(match_operand:<VWIDE> 0 "register_operand" "") 1518 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))] 1519 "TARGET_SIMD" 1520 { 1521 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 1522 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0], 1523 operands[1], p)); 1524 DONE; 1525 } 1526) 1527 1528(define_expand "vec_unpack<su>_lo_<mode>" 1529 [(match_operand:<VWIDE> 0 "register_operand" "") 1530 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))] 1531 "TARGET_SIMD" 1532 { 1533 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); 1534 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0], 1535 operands[1], p)); 1536 DONE; 1537 } 1538) 1539 1540;; Widening arithmetic. 1541 1542(define_insn "*aarch64_<su>mlal_lo<mode>" 1543 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1544 (plus:<VWIDE> 1545 (mult:<VWIDE> 1546 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1547 (match_operand:VQW 2 "register_operand" "w") 1548 (match_operand:VQW 3 "vect_par_cnst_lo_half" ""))) 1549 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1550 (match_operand:VQW 4 "register_operand" "w") 1551 (match_dup 3)))) 1552 (match_operand:<VWIDE> 1 "register_operand" "0")))] 1553 "TARGET_SIMD" 1554 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>" 1555 [(set_attr "type" "neon_mla_<Vetype>_long")] 1556) 1557 1558(define_insn "*aarch64_<su>mlal_hi<mode>" 1559 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1560 (plus:<VWIDE> 1561 (mult:<VWIDE> 1562 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1563 (match_operand:VQW 2 "register_operand" "w") 1564 (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))) 1565 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1566 (match_operand:VQW 4 "register_operand" "w") 1567 (match_dup 3)))) 1568 (match_operand:<VWIDE> 1 "register_operand" "0")))] 1569 "TARGET_SIMD" 1570 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>" 1571 [(set_attr "type" "neon_mla_<Vetype>_long")] 1572) 1573 1574(define_insn "*aarch64_<su>mlsl_lo<mode>" 1575 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1576 (minus:<VWIDE> 1577 (match_operand:<VWIDE> 1 "register_operand" "0") 1578 (mult:<VWIDE> 1579 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1580 (match_operand:VQW 2 "register_operand" "w") 1581 (match_operand:VQW 3 "vect_par_cnst_lo_half" ""))) 1582 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1583 (match_operand:VQW 4 "register_operand" "w") 1584 (match_dup 3))))))] 1585 "TARGET_SIMD" 1586 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>" 1587 [(set_attr "type" "neon_mla_<Vetype>_long")] 1588) 1589 1590(define_insn "*aarch64_<su>mlsl_hi<mode>" 1591 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1592 (minus:<VWIDE> 1593 (match_operand:<VWIDE> 1 "register_operand" "0") 1594 (mult:<VWIDE> 1595 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1596 (match_operand:VQW 2 "register_operand" "w") 1597 (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))) 1598 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1599 (match_operand:VQW 4 "register_operand" "w") 1600 (match_dup 3))))))] 1601 "TARGET_SIMD" 1602 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>" 1603 [(set_attr "type" "neon_mla_<Vetype>_long")] 1604) 1605 1606(define_insn "*aarch64_<su>mlal<mode>" 1607 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1608 (plus:<VWIDE> 1609 (mult:<VWIDE> 1610 (ANY_EXTEND:<VWIDE> 1611 (match_operand:VD_BHSI 1 "register_operand" "w")) 1612 (ANY_EXTEND:<VWIDE> 1613 (match_operand:VD_BHSI 2 "register_operand" "w"))) 1614 (match_operand:<VWIDE> 3 "register_operand" "0")))] 1615 "TARGET_SIMD" 1616 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>" 1617 [(set_attr "type" "neon_mla_<Vetype>_long")] 1618) 1619 1620(define_insn "*aarch64_<su>mlsl<mode>" 1621 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1622 (minus:<VWIDE> 1623 (match_operand:<VWIDE> 1 "register_operand" "0") 1624 (mult:<VWIDE> 1625 (ANY_EXTEND:<VWIDE> 1626 (match_operand:VD_BHSI 2 "register_operand" "w")) 1627 (ANY_EXTEND:<VWIDE> 1628 (match_operand:VD_BHSI 3 "register_operand" "w")))))] 1629 "TARGET_SIMD" 1630 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>" 1631 [(set_attr "type" "neon_mla_<Vetype>_long")] 1632) 1633 1634(define_insn "aarch64_simd_vec_<su>mult_lo_<mode>" 1635 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1636 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1637 (match_operand:VQW 1 "register_operand" "w") 1638 (match_operand:VQW 3 "vect_par_cnst_lo_half" ""))) 1639 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1640 (match_operand:VQW 2 "register_operand" "w") 1641 (match_dup 3)))))] 1642 "TARGET_SIMD" 1643 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>" 1644 [(set_attr "type" "neon_mul_<Vetype>_long")] 1645) 1646 1647(define_expand "vec_widen_<su>mult_lo_<mode>" 1648 [(match_operand:<VWIDE> 0 "register_operand" "") 1649 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" "")) 1650 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))] 1651 "TARGET_SIMD" 1652 { 1653 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); 1654 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0], 1655 operands[1], 1656 operands[2], p)); 1657 DONE; 1658 } 1659) 1660 1661(define_insn "aarch64_simd_vec_<su>mult_hi_<mode>" 1662 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1663 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1664 (match_operand:VQW 1 "register_operand" "w") 1665 (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))) 1666 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1667 (match_operand:VQW 2 "register_operand" "w") 1668 (match_dup 3)))))] 1669 "TARGET_SIMD" 1670 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>" 1671 [(set_attr "type" "neon_mul_<Vetype>_long")] 1672) 1673 1674(define_expand "vec_widen_<su>mult_hi_<mode>" 1675 [(match_operand:<VWIDE> 0 "register_operand" "") 1676 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" "")) 1677 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))] 1678 "TARGET_SIMD" 1679 { 1680 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 1681 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0], 1682 operands[1], 1683 operands[2], p)); 1684 DONE; 1685 1686 } 1687) 1688 1689;; FP vector operations. 1690;; AArch64 AdvSIMD supports single-precision (32-bit) and 1691;; double-precision (64-bit) floating-point data types and arithmetic as 1692;; defined by the IEEE 754-2008 standard. This makes them vectorizable 1693;; without the need for -ffast-math or -funsafe-math-optimizations. 1694;; 1695;; Floating-point operations can raise an exception. Vectorizing such 1696;; operations are safe because of reasons explained below. 1697;; 1698;; ARMv8 permits an extension to enable trapped floating-point 1699;; exception handling, however this is an optional feature. In the 1700;; event of a floating-point exception being raised by vectorised 1701;; code then: 1702;; 1. If trapped floating-point exceptions are available, then a trap 1703;; will be taken when any lane raises an enabled exception. A trap 1704;; handler may determine which lane raised the exception. 1705;; 2. Alternatively a sticky exception flag is set in the 1706;; floating-point status register (FPSR). Software may explicitly 1707;; test the exception flags, in which case the tests will either 1708;; prevent vectorisation, allowing precise identification of the 1709;; failing operation, or if tested outside of vectorisable regions 1710;; then the specific operation and lane are not of interest. 1711 1712;; FP arithmetic operations. 1713 1714(define_insn "add<mode>3" 1715 [(set (match_operand:VHSDF 0 "register_operand" "=w") 1716 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w") 1717 (match_operand:VHSDF 2 "register_operand" "w")))] 1718 "TARGET_SIMD" 1719 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 1720 [(set_attr "type" "neon_fp_addsub_<stype><q>")] 1721) 1722 1723(define_insn "sub<mode>3" 1724 [(set (match_operand:VHSDF 0 "register_operand" "=w") 1725 (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w") 1726 (match_operand:VHSDF 2 "register_operand" "w")))] 1727 "TARGET_SIMD" 1728 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 1729 [(set_attr "type" "neon_fp_addsub_<stype><q>")] 1730) 1731 1732(define_insn "mul<mode>3" 1733 [(set (match_operand:VHSDF 0 "register_operand" "=w") 1734 (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w") 1735 (match_operand:VHSDF 2 "register_operand" "w")))] 1736 "TARGET_SIMD" 1737 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 1738 [(set_attr "type" "neon_fp_mul_<stype><q>")] 1739) 1740 1741(define_expand "div<mode>3" 1742 [(set (match_operand:VHSDF 0 "register_operand" "=w") 1743 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w") 1744 (match_operand:VHSDF 2 "register_operand" "w")))] 1745 "TARGET_SIMD" 1746{ 1747 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2])) 1748 DONE; 1749 1750 operands[1] = force_reg (<MODE>mode, operands[1]); 1751}) 1752 1753(define_insn "*div<mode>3" 1754 [(set (match_operand:VHSDF 0 "register_operand" "=w") 1755 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w") 1756 (match_operand:VHSDF 2 "register_operand" "w")))] 1757 "TARGET_SIMD" 1758 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 1759 [(set_attr "type" "neon_fp_div_<stype><q>")] 1760) 1761 1762(define_insn "neg<mode>2" 1763 [(set (match_operand:VHSDF 0 "register_operand" "=w") 1764 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))] 1765 "TARGET_SIMD" 1766 "fneg\\t%0.<Vtype>, %1.<Vtype>" 1767 [(set_attr "type" "neon_fp_neg_<stype><q>")] 1768) 1769 1770(define_insn "abs<mode>2" 1771 [(set (match_operand:VHSDF 0 "register_operand" "=w") 1772 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))] 1773 "TARGET_SIMD" 1774 "fabs\\t%0.<Vtype>, %1.<Vtype>" 1775 [(set_attr "type" "neon_fp_abs_<stype><q>")] 1776) 1777 1778(define_insn "fma<mode>4" 1779 [(set (match_operand:VHSDF 0 "register_operand" "=w") 1780 (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w") 1781 (match_operand:VHSDF 2 "register_operand" "w") 1782 (match_operand:VHSDF 3 "register_operand" "0")))] 1783 "TARGET_SIMD" 1784 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 1785 [(set_attr "type" "neon_fp_mla_<stype><q>")] 1786) 1787 1788(define_insn "*aarch64_fma4_elt<mode>" 1789 [(set (match_operand:VDQF 0 "register_operand" "=w") 1790 (fma:VDQF 1791 (vec_duplicate:VDQF 1792 (vec_select:<VEL> 1793 (match_operand:VDQF 1 "register_operand" "<h_con>") 1794 (parallel [(match_operand:SI 2 "immediate_operand")]))) 1795 (match_operand:VDQF 3 "register_operand" "w") 1796 (match_operand:VDQF 4 "register_operand" "0")))] 1797 "TARGET_SIMD" 1798 { 1799 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); 1800 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; 1801 } 1802 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")] 1803) 1804 1805(define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>" 1806 [(set (match_operand:VDQSF 0 "register_operand" "=w") 1807 (fma:VDQSF 1808 (vec_duplicate:VDQSF 1809 (vec_select:<VEL> 1810 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>") 1811 (parallel [(match_operand:SI 2 "immediate_operand")]))) 1812 (match_operand:VDQSF 3 "register_operand" "w") 1813 (match_operand:VDQSF 4 "register_operand" "0")))] 1814 "TARGET_SIMD" 1815 { 1816 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2])); 1817 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; 1818 } 1819 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")] 1820) 1821 1822(define_insn "*aarch64_fma4_elt_from_dup<mode>" 1823 [(set (match_operand:VMUL 0 "register_operand" "=w") 1824 (fma:VMUL 1825 (vec_duplicate:VMUL 1826 (match_operand:<VEL> 1 "register_operand" "<h_con>")) 1827 (match_operand:VMUL 2 "register_operand" "w") 1828 (match_operand:VMUL 3 "register_operand" "0")))] 1829 "TARGET_SIMD" 1830 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]" 1831 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")] 1832) 1833 1834(define_insn "*aarch64_fma4_elt_to_64v2df" 1835 [(set (match_operand:DF 0 "register_operand" "=w") 1836 (fma:DF 1837 (vec_select:DF 1838 (match_operand:V2DF 1 "register_operand" "w") 1839 (parallel [(match_operand:SI 2 "immediate_operand")])) 1840 (match_operand:DF 3 "register_operand" "w") 1841 (match_operand:DF 4 "register_operand" "0")))] 1842 "TARGET_SIMD" 1843 { 1844 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2])); 1845 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]"; 1846 } 1847 [(set_attr "type" "neon_fp_mla_d_scalar_q")] 1848) 1849 1850(define_insn "fnma<mode>4" 1851 [(set (match_operand:VHSDF 0 "register_operand" "=w") 1852 (fma:VHSDF 1853 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")) 1854 (match_operand:VHSDF 2 "register_operand" "w") 1855 (match_operand:VHSDF 3 "register_operand" "0")))] 1856 "TARGET_SIMD" 1857 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 1858 [(set_attr "type" "neon_fp_mla_<stype><q>")] 1859) 1860 1861(define_insn "*aarch64_fnma4_elt<mode>" 1862 [(set (match_operand:VDQF 0 "register_operand" "=w") 1863 (fma:VDQF 1864 (neg:VDQF 1865 (match_operand:VDQF 3 "register_operand" "w")) 1866 (vec_duplicate:VDQF 1867 (vec_select:<VEL> 1868 (match_operand:VDQF 1 "register_operand" "<h_con>") 1869 (parallel [(match_operand:SI 2 "immediate_operand")]))) 1870 (match_operand:VDQF 4 "register_operand" "0")))] 1871 "TARGET_SIMD" 1872 { 1873 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); 1874 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; 1875 } 1876 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")] 1877) 1878 1879(define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>" 1880 [(set (match_operand:VDQSF 0 "register_operand" "=w") 1881 (fma:VDQSF 1882 (neg:VDQSF 1883 (match_operand:VDQSF 3 "register_operand" "w")) 1884 (vec_duplicate:VDQSF 1885 (vec_select:<VEL> 1886 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>") 1887 (parallel [(match_operand:SI 2 "immediate_operand")]))) 1888 (match_operand:VDQSF 4 "register_operand" "0")))] 1889 "TARGET_SIMD" 1890 { 1891 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2])); 1892 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; 1893 } 1894 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")] 1895) 1896 1897(define_insn "*aarch64_fnma4_elt_from_dup<mode>" 1898 [(set (match_operand:VMUL 0 "register_operand" "=w") 1899 (fma:VMUL 1900 (neg:VMUL 1901 (match_operand:VMUL 2 "register_operand" "w")) 1902 (vec_duplicate:VMUL 1903 (match_operand:<VEL> 1 "register_operand" "<h_con>")) 1904 (match_operand:VMUL 3 "register_operand" "0")))] 1905 "TARGET_SIMD" 1906 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]" 1907 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")] 1908) 1909 1910(define_insn "*aarch64_fnma4_elt_to_64v2df" 1911 [(set (match_operand:DF 0 "register_operand" "=w") 1912 (fma:DF 1913 (vec_select:DF 1914 (match_operand:V2DF 1 "register_operand" "w") 1915 (parallel [(match_operand:SI 2 "immediate_operand")])) 1916 (neg:DF 1917 (match_operand:DF 3 "register_operand" "w")) 1918 (match_operand:DF 4 "register_operand" "0")))] 1919 "TARGET_SIMD" 1920 { 1921 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2])); 1922 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]"; 1923 } 1924 [(set_attr "type" "neon_fp_mla_d_scalar_q")] 1925) 1926 1927;; Vector versions of the floating-point frint patterns. 1928;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn. 1929(define_insn "<frint_pattern><mode>2" 1930 [(set (match_operand:VHSDF 0 "register_operand" "=w") 1931 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")] 1932 FRINT))] 1933 "TARGET_SIMD" 1934 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>" 1935 [(set_attr "type" "neon_fp_round_<stype><q>")] 1936) 1937 1938;; Vector versions of the fcvt standard patterns. 1939;; Expands to lbtrunc, lround, lceil, lfloor 1940(define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2" 1941 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w") 1942 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET> 1943 [(match_operand:VHSDF 1 "register_operand" "w")] 1944 FCVT)))] 1945 "TARGET_SIMD" 1946 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>" 1947 [(set_attr "type" "neon_fp_to_int_<stype><q>")] 1948) 1949 1950;; HF Scalar variants of related SIMD instructions. 1951(define_insn "l<fcvt_pattern><su_optab>hfhi2" 1952 [(set (match_operand:HI 0 "register_operand" "=w") 1953 (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")] 1954 FCVT)))] 1955 "TARGET_SIMD_F16INST" 1956 "fcvt<frint_suffix><su>\t%h0, %h1" 1957 [(set_attr "type" "neon_fp_to_int_s")] 1958) 1959 1960(define_insn "<optab>_trunchfhi2" 1961 [(set (match_operand:HI 0 "register_operand" "=w") 1962 (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))] 1963 "TARGET_SIMD_F16INST" 1964 "fcvtz<su>\t%h0, %h1" 1965 [(set_attr "type" "neon_fp_to_int_s")] 1966) 1967 1968(define_insn "<optab>hihf2" 1969 [(set (match_operand:HF 0 "register_operand" "=w") 1970 (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))] 1971 "TARGET_SIMD_F16INST" 1972 "<su_optab>cvtf\t%h0, %h1" 1973 [(set_attr "type" "neon_int_to_fp_s")] 1974) 1975 1976(define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult" 1977 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w") 1978 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET> 1979 [(mult:VDQF 1980 (match_operand:VDQF 1 "register_operand" "w") 1981 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))] 1982 UNSPEC_FRINTZ)))] 1983 "TARGET_SIMD 1984 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1, 1985 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))" 1986 { 1987 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]); 1988 char buf[64]; 1989 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits); 1990 output_asm_insn (buf, operands); 1991 return ""; 1992 } 1993 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")] 1994) 1995 1996(define_expand "<optab><VHSDF:mode><fcvt_target>2" 1997 [(set (match_operand:<FCVT_TARGET> 0 "register_operand") 1998 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET> 1999 [(match_operand:VHSDF 1 "register_operand")] 2000 UNSPEC_FRINTZ)))] 2001 "TARGET_SIMD" 2002 {}) 2003 2004(define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2" 2005 [(set (match_operand:<FCVT_TARGET> 0 "register_operand") 2006 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET> 2007 [(match_operand:VHSDF 1 "register_operand")] 2008 UNSPEC_FRINTZ)))] 2009 "TARGET_SIMD" 2010 {}) 2011 2012(define_expand "ftrunc<VHSDF:mode>2" 2013 [(set (match_operand:VHSDF 0 "register_operand") 2014 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")] 2015 UNSPEC_FRINTZ))] 2016 "TARGET_SIMD" 2017 {}) 2018 2019(define_insn "<optab><fcvt_target><VHSDF:mode>2" 2020 [(set (match_operand:VHSDF 0 "register_operand" "=w") 2021 (FLOATUORS:VHSDF 2022 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))] 2023 "TARGET_SIMD" 2024 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>" 2025 [(set_attr "type" "neon_int_to_fp_<stype><q>")] 2026) 2027 2028;; Conversions between vectors of floats and doubles. 2029;; Contains a mix of patterns to match standard pattern names 2030;; and those for intrinsics. 2031 2032;; Float widening operations. 2033 2034(define_insn "aarch64_simd_vec_unpacks_lo_<mode>" 2035 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 2036 (float_extend:<VWIDE> (vec_select:<VHALF> 2037 (match_operand:VQ_HSF 1 "register_operand" "w") 2038 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "") 2039 )))] 2040 "TARGET_SIMD" 2041 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>" 2042 [(set_attr "type" "neon_fp_cvt_widen_s")] 2043) 2044 2045;; Convert between fixed-point and floating-point (vector modes) 2046 2047(define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3" 2048 [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w") 2049 (unspec:<VHSDF:FCVT_TARGET> 2050 [(match_operand:VHSDF 1 "register_operand" "w") 2051 (match_operand:SI 2 "immediate_operand" "i")] 2052 FCVT_F2FIXED))] 2053 "TARGET_SIMD" 2054 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2" 2055 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")] 2056) 2057 2058(define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3" 2059 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w") 2060 (unspec:<VDQ_HSDI:FCVT_TARGET> 2061 [(match_operand:VDQ_HSDI 1 "register_operand" "w") 2062 (match_operand:SI 2 "immediate_operand" "i")] 2063 FCVT_FIXED2F))] 2064 "TARGET_SIMD" 2065 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2" 2066 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")] 2067) 2068 2069;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns 2070;; is inconsistent with vector ordering elsewhere in the compiler, in that 2071;; the meaning of HI and LO changes depending on the target endianness. 2072;; While elsewhere we map the higher numbered elements of a vector to 2073;; the lower architectural lanes of the vector, for these patterns we want 2074;; to always treat "hi" as referring to the higher architectural lanes. 2075;; Consequently, while the patterns below look inconsistent with our 2076;; other big-endian patterns their behavior is as required. 2077 2078(define_expand "vec_unpacks_lo_<mode>" 2079 [(match_operand:<VWIDE> 0 "register_operand" "") 2080 (match_operand:VQ_HSF 1 "register_operand" "")] 2081 "TARGET_SIMD" 2082 { 2083 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); 2084 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0], 2085 operands[1], p)); 2086 DONE; 2087 } 2088) 2089 2090(define_insn "aarch64_simd_vec_unpacks_hi_<mode>" 2091 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 2092 (float_extend:<VWIDE> (vec_select:<VHALF> 2093 (match_operand:VQ_HSF 1 "register_operand" "w") 2094 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "") 2095 )))] 2096 "TARGET_SIMD" 2097 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>" 2098 [(set_attr "type" "neon_fp_cvt_widen_s")] 2099) 2100 2101(define_expand "vec_unpacks_hi_<mode>" 2102 [(match_operand:<VWIDE> 0 "register_operand" "") 2103 (match_operand:VQ_HSF 1 "register_operand" "")] 2104 "TARGET_SIMD" 2105 { 2106 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 2107 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0], 2108 operands[1], p)); 2109 DONE; 2110 } 2111) 2112(define_insn "aarch64_float_extend_lo_<Vwide>" 2113 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 2114 (float_extend:<VWIDE> 2115 (match_operand:VDF 1 "register_operand" "w")))] 2116 "TARGET_SIMD" 2117 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>" 2118 [(set_attr "type" "neon_fp_cvt_widen_s")] 2119) 2120 2121;; Float narrowing operations. 2122 2123(define_insn "aarch64_float_truncate_lo_<mode>" 2124 [(set (match_operand:VDF 0 "register_operand" "=w") 2125 (float_truncate:VDF 2126 (match_operand:<VWIDE> 1 "register_operand" "w")))] 2127 "TARGET_SIMD" 2128 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>" 2129 [(set_attr "type" "neon_fp_cvt_narrow_d_q")] 2130) 2131 2132(define_insn "aarch64_float_truncate_hi_<Vdbl>_le" 2133 [(set (match_operand:<VDBL> 0 "register_operand" "=w") 2134 (vec_concat:<VDBL> 2135 (match_operand:VDF 1 "register_operand" "0") 2136 (float_truncate:VDF 2137 (match_operand:<VWIDE> 2 "register_operand" "w"))))] 2138 "TARGET_SIMD && !BYTES_BIG_ENDIAN" 2139 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>" 2140 [(set_attr "type" "neon_fp_cvt_narrow_d_q")] 2141) 2142 2143(define_insn "aarch64_float_truncate_hi_<Vdbl>_be" 2144 [(set (match_operand:<VDBL> 0 "register_operand" "=w") 2145 (vec_concat:<VDBL> 2146 (float_truncate:VDF 2147 (match_operand:<VWIDE> 2 "register_operand" "w")) 2148 (match_operand:VDF 1 "register_operand" "0")))] 2149 "TARGET_SIMD && BYTES_BIG_ENDIAN" 2150 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>" 2151 [(set_attr "type" "neon_fp_cvt_narrow_d_q")] 2152) 2153 2154(define_expand "aarch64_float_truncate_hi_<Vdbl>" 2155 [(match_operand:<VDBL> 0 "register_operand" "=w") 2156 (match_operand:VDF 1 "register_operand" "0") 2157 (match_operand:<VWIDE> 2 "register_operand" "w")] 2158 "TARGET_SIMD" 2159{ 2160 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN 2161 ? gen_aarch64_float_truncate_hi_<Vdbl>_be 2162 : gen_aarch64_float_truncate_hi_<Vdbl>_le; 2163 emit_insn (gen (operands[0], operands[1], operands[2])); 2164 DONE; 2165} 2166) 2167 2168(define_expand "vec_pack_trunc_v2df" 2169 [(set (match_operand:V4SF 0 "register_operand") 2170 (vec_concat:V4SF 2171 (float_truncate:V2SF 2172 (match_operand:V2DF 1 "register_operand")) 2173 (float_truncate:V2SF 2174 (match_operand:V2DF 2 "register_operand")) 2175 ))] 2176 "TARGET_SIMD" 2177 { 2178 rtx tmp = gen_reg_rtx (V2SFmode); 2179 int lo = BYTES_BIG_ENDIAN ? 2 : 1; 2180 int hi = BYTES_BIG_ENDIAN ? 1 : 2; 2181 2182 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo])); 2183 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0], 2184 tmp, operands[hi])); 2185 DONE; 2186 } 2187) 2188 2189(define_expand "vec_pack_trunc_df" 2190 [(set (match_operand:V2SF 0 "register_operand") 2191 (vec_concat:V2SF 2192 (float_truncate:SF 2193 (match_operand:DF 1 "register_operand")) 2194 (float_truncate:SF 2195 (match_operand:DF 2 "register_operand")) 2196 ))] 2197 "TARGET_SIMD" 2198 { 2199 rtx tmp = gen_reg_rtx (V2SFmode); 2200 int lo = BYTES_BIG_ENDIAN ? 2 : 1; 2201 int hi = BYTES_BIG_ENDIAN ? 1 : 2; 2202 2203 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo])); 2204 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi])); 2205 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp)); 2206 DONE; 2207 } 2208) 2209 2210;; FP Max/Min 2211;; Max/Min are introduced by idiom recognition by GCC's mid-end. An 2212;; expression like: 2213;; a = (b < c) ? b : c; 2214;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled 2215;; either explicitly or indirectly via -ffast-math. 2216;; 2217;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL. 2218;; The 'smax' and 'smin' RTL standard pattern names do not specify which 2219;; operand will be returned when both operands are zero (i.e. they may not 2220;; honour signed zeroes), or when either operand is NaN. Therefore GCC 2221;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring 2222;; NaNs. 2223 2224(define_insn "<su><maxmin><mode>3" 2225 [(set (match_operand:VHSDF 0 "register_operand" "=w") 2226 (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w") 2227 (match_operand:VHSDF 2 "register_operand" "w")))] 2228 "TARGET_SIMD" 2229 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 2230 [(set_attr "type" "neon_fp_minmax_<stype><q>")] 2231) 2232 2233;; Vector forms for fmax, fmin, fmaxnm, fminnm. 2234;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names, 2235;; which implement the IEEE fmax ()/fmin () functions. 2236(define_insn "<maxmin_uns><mode>3" 2237 [(set (match_operand:VHSDF 0 "register_operand" "=w") 2238 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w") 2239 (match_operand:VHSDF 2 "register_operand" "w")] 2240 FMAXMIN_UNS))] 2241 "TARGET_SIMD" 2242 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 2243 [(set_attr "type" "neon_fp_minmax_<stype><q>")] 2244) 2245 2246;; 'across lanes' add. 2247 2248(define_expand "reduc_plus_scal_<mode>" 2249 [(match_operand:<VEL> 0 "register_operand" "=w") 2250 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")] 2251 UNSPEC_ADDV)] 2252 "TARGET_SIMD" 2253 { 2254 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0); 2255 rtx scratch = gen_reg_rtx (<MODE>mode); 2256 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1])); 2257 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt)); 2258 DONE; 2259 } 2260) 2261 2262(define_insn "aarch64_faddp<mode>" 2263 [(set (match_operand:VHSDF 0 "register_operand" "=w") 2264 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w") 2265 (match_operand:VHSDF 2 "register_operand" "w")] 2266 UNSPEC_FADDV))] 2267 "TARGET_SIMD" 2268 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 2269 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")] 2270) 2271 2272(define_insn "aarch64_reduc_plus_internal<mode>" 2273 [(set (match_operand:VDQV 0 "register_operand" "=w") 2274 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")] 2275 UNSPEC_ADDV))] 2276 "TARGET_SIMD" 2277 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>" 2278 [(set_attr "type" "neon_reduc_add<q>")] 2279) 2280 2281(define_insn "aarch64_reduc_plus_internalv2si" 2282 [(set (match_operand:V2SI 0 "register_operand" "=w") 2283 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")] 2284 UNSPEC_ADDV))] 2285 "TARGET_SIMD" 2286 "addp\\t%0.2s, %1.2s, %1.2s" 2287 [(set_attr "type" "neon_reduc_add")] 2288) 2289 2290(define_insn "reduc_plus_scal_<mode>" 2291 [(set (match_operand:<VEL> 0 "register_operand" "=w") 2292 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")] 2293 UNSPEC_FADDV))] 2294 "TARGET_SIMD" 2295 "faddp\\t%<Vetype>0, %1.<Vtype>" 2296 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")] 2297) 2298 2299(define_expand "reduc_plus_scal_v4sf" 2300 [(set (match_operand:SF 0 "register_operand") 2301 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")] 2302 UNSPEC_FADDV))] 2303 "TARGET_SIMD" 2304{ 2305 rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0); 2306 rtx scratch = gen_reg_rtx (V4SFmode); 2307 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1])); 2308 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch)); 2309 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt)); 2310 DONE; 2311}) 2312 2313(define_insn "clrsb<mode>2" 2314 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 2315 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))] 2316 "TARGET_SIMD" 2317 "cls\\t%0.<Vtype>, %1.<Vtype>" 2318 [(set_attr "type" "neon_cls<q>")] 2319) 2320 2321(define_insn "clz<mode>2" 2322 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 2323 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))] 2324 "TARGET_SIMD" 2325 "clz\\t%0.<Vtype>, %1.<Vtype>" 2326 [(set_attr "type" "neon_cls<q>")] 2327) 2328 2329(define_insn "popcount<mode>2" 2330 [(set (match_operand:VB 0 "register_operand" "=w") 2331 (popcount:VB (match_operand:VB 1 "register_operand" "w")))] 2332 "TARGET_SIMD" 2333 "cnt\\t%0.<Vbtype>, %1.<Vbtype>" 2334 [(set_attr "type" "neon_cnt<q>")] 2335) 2336 2337;; 'across lanes' max and min ops. 2338 2339;; Template for outputting a scalar, so we can create __builtins which can be 2340;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin). 2341(define_expand "reduc_<maxmin_uns>_scal_<mode>" 2342 [(match_operand:<VEL> 0 "register_operand") 2343 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")] 2344 FMAXMINV)] 2345 "TARGET_SIMD" 2346 { 2347 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0); 2348 rtx scratch = gen_reg_rtx (<MODE>mode); 2349 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch, 2350 operands[1])); 2351 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt)); 2352 DONE; 2353 } 2354) 2355 2356;; Likewise for integer cases, signed and unsigned. 2357(define_expand "reduc_<maxmin_uns>_scal_<mode>" 2358 [(match_operand:<VEL> 0 "register_operand") 2359 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")] 2360 MAXMINV)] 2361 "TARGET_SIMD" 2362 { 2363 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0); 2364 rtx scratch = gen_reg_rtx (<MODE>mode); 2365 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch, 2366 operands[1])); 2367 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt)); 2368 DONE; 2369 } 2370) 2371 2372(define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>" 2373 [(set (match_operand:VDQV_S 0 "register_operand" "=w") 2374 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")] 2375 MAXMINV))] 2376 "TARGET_SIMD" 2377 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>" 2378 [(set_attr "type" "neon_reduc_minmax<q>")] 2379) 2380 2381(define_insn "aarch64_reduc_<maxmin_uns>_internalv2si" 2382 [(set (match_operand:V2SI 0 "register_operand" "=w") 2383 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")] 2384 MAXMINV))] 2385 "TARGET_SIMD" 2386 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s" 2387 [(set_attr "type" "neon_reduc_minmax")] 2388) 2389 2390(define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>" 2391 [(set (match_operand:VHSDF 0 "register_operand" "=w") 2392 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")] 2393 FMAXMINV))] 2394 "TARGET_SIMD" 2395 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>" 2396 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")] 2397) 2398 2399;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register 2400;; allocation. 2401;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which 2402;; to select. 2403;; 2404;; Thus our BSL is of the form: 2405;; op0 = bsl (mask, op2, op3) 2406;; We can use any of: 2407;; 2408;; if (op0 = mask) 2409;; bsl mask, op1, op2 2410;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0) 2411;; bit op0, op2, mask 2412;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0) 2413;; bif op0, op1, mask 2414;; 2415;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander. 2416;; Some forms of straight-line code may generate the equivalent form 2417;; in *aarch64_simd_bsl<mode>_alt. 2418 2419(define_insn "aarch64_simd_bsl<mode>_internal" 2420 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w") 2421 (xor:VDQ_I 2422 (and:VDQ_I 2423 (xor:VDQ_I 2424 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w") 2425 (match_operand:VDQ_I 2 "register_operand" "w,w,0")) 2426 (match_operand:VDQ_I 1 "register_operand" "0,w,w")) 2427 (match_dup:<V_INT_EQUIV> 3) 2428 ))] 2429 "TARGET_SIMD" 2430 "@ 2431 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype> 2432 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype> 2433 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>" 2434 [(set_attr "type" "neon_bsl<q>")] 2435) 2436 2437;; We need this form in addition to the above pattern to match the case 2438;; when combine tries merging three insns such that the second operand of 2439;; the outer XOR matches the second operand of the inner XOR rather than 2440;; the first. The two are equivalent but since recog doesn't try all 2441;; permutations of commutative operations, we have to have a separate pattern. 2442 2443(define_insn "*aarch64_simd_bsl<mode>_alt" 2444 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w") 2445 (xor:VDQ_I 2446 (and:VDQ_I 2447 (xor:VDQ_I 2448 (match_operand:VDQ_I 3 "register_operand" "w,w,0") 2449 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w")) 2450 (match_operand:VDQ_I 1 "register_operand" "0,w,w")) 2451 (match_dup:<V_INT_EQUIV> 2)))] 2452 "TARGET_SIMD" 2453 "@ 2454 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype> 2455 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype> 2456 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>" 2457 [(set_attr "type" "neon_bsl<q>")] 2458) 2459 2460;; DImode is special, we want to avoid computing operations which are 2461;; more naturally computed in general purpose registers in the vector 2462;; registers. If we do that, we need to move all three operands from general 2463;; purpose registers to vector registers, then back again. However, we 2464;; don't want to make this pattern an UNSPEC as we'd lose scope for 2465;; optimizations based on the component operations of a BSL. 2466;; 2467;; That means we need a splitter back to the individual operations, if they 2468;; would be better calculated on the integer side. 2469 2470(define_insn_and_split "aarch64_simd_bsldi_internal" 2471 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r") 2472 (xor:DI 2473 (and:DI 2474 (xor:DI 2475 (match_operand:DI 3 "register_operand" "w,0,w,r") 2476 (match_operand:DI 2 "register_operand" "w,w,0,r")) 2477 (match_operand:DI 1 "register_operand" "0,w,w,r")) 2478 (match_dup:DI 3) 2479 ))] 2480 "TARGET_SIMD" 2481 "@ 2482 bsl\\t%0.8b, %2.8b, %3.8b 2483 bit\\t%0.8b, %2.8b, %1.8b 2484 bif\\t%0.8b, %3.8b, %1.8b 2485 #" 2486 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))" 2487 [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)] 2488{ 2489 /* Split back to individual operations. If we're before reload, and 2490 able to create a temporary register, do so. If we're after reload, 2491 we've got an early-clobber destination register, so use that. 2492 Otherwise, we can't create pseudos and we can't yet guarantee that 2493 operands[0] is safe to write, so FAIL to split. */ 2494 2495 rtx scratch; 2496 if (reload_completed) 2497 scratch = operands[0]; 2498 else if (can_create_pseudo_p ()) 2499 scratch = gen_reg_rtx (DImode); 2500 else 2501 FAIL; 2502 2503 emit_insn (gen_xordi3 (scratch, operands[2], operands[3])); 2504 emit_insn (gen_anddi3 (scratch, scratch, operands[1])); 2505 emit_insn (gen_xordi3 (operands[0], scratch, operands[3])); 2506 DONE; 2507} 2508 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple") 2509 (set_attr "length" "4,4,4,12")] 2510) 2511 2512(define_insn_and_split "aarch64_simd_bsldi_alt" 2513 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r") 2514 (xor:DI 2515 (and:DI 2516 (xor:DI 2517 (match_operand:DI 3 "register_operand" "w,w,0,r") 2518 (match_operand:DI 2 "register_operand" "w,0,w,r")) 2519 (match_operand:DI 1 "register_operand" "0,w,w,r")) 2520 (match_dup:DI 2) 2521 ))] 2522 "TARGET_SIMD" 2523 "@ 2524 bsl\\t%0.8b, %3.8b, %2.8b 2525 bit\\t%0.8b, %3.8b, %1.8b 2526 bif\\t%0.8b, %2.8b, %1.8b 2527 #" 2528 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))" 2529 [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)] 2530{ 2531 /* Split back to individual operations. If we're before reload, and 2532 able to create a temporary register, do so. If we're after reload, 2533 we've got an early-clobber destination register, so use that. 2534 Otherwise, we can't create pseudos and we can't yet guarantee that 2535 operands[0] is safe to write, so FAIL to split. */ 2536 2537 rtx scratch; 2538 if (reload_completed) 2539 scratch = operands[0]; 2540 else if (can_create_pseudo_p ()) 2541 scratch = gen_reg_rtx (DImode); 2542 else 2543 FAIL; 2544 2545 emit_insn (gen_xordi3 (scratch, operands[2], operands[3])); 2546 emit_insn (gen_anddi3 (scratch, scratch, operands[1])); 2547 emit_insn (gen_xordi3 (operands[0], scratch, operands[2])); 2548 DONE; 2549} 2550 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple") 2551 (set_attr "length" "4,4,4,12")] 2552) 2553 2554(define_expand "aarch64_simd_bsl<mode>" 2555 [(match_operand:VALLDIF 0 "register_operand") 2556 (match_operand:<V_INT_EQUIV> 1 "register_operand") 2557 (match_operand:VALLDIF 2 "register_operand") 2558 (match_operand:VALLDIF 3 "register_operand")] 2559 "TARGET_SIMD" 2560{ 2561 /* We can't alias operands together if they have different modes. */ 2562 rtx tmp = operands[0]; 2563 if (FLOAT_MODE_P (<MODE>mode)) 2564 { 2565 operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]); 2566 operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]); 2567 tmp = gen_reg_rtx (<V_INT_EQUIV>mode); 2568 } 2569 operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]); 2570 emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp, 2571 operands[1], 2572 operands[2], 2573 operands[3])); 2574 if (tmp != operands[0]) 2575 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp)); 2576 2577 DONE; 2578}) 2579 2580(define_expand "vcond_mask_<mode><v_int_equiv>" 2581 [(match_operand:VALLDI 0 "register_operand") 2582 (match_operand:VALLDI 1 "nonmemory_operand") 2583 (match_operand:VALLDI 2 "nonmemory_operand") 2584 (match_operand:<V_INT_EQUIV> 3 "register_operand")] 2585 "TARGET_SIMD" 2586{ 2587 /* If we have (a = (P) ? -1 : 0); 2588 Then we can simply move the generated mask (result must be int). */ 2589 if (operands[1] == CONSTM1_RTX (<MODE>mode) 2590 && operands[2] == CONST0_RTX (<MODE>mode)) 2591 emit_move_insn (operands[0], operands[3]); 2592 /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */ 2593 else if (operands[1] == CONST0_RTX (<MODE>mode) 2594 && operands[2] == CONSTM1_RTX (<MODE>mode)) 2595 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3])); 2596 else 2597 { 2598 if (!REG_P (operands[1])) 2599 operands[1] = force_reg (<MODE>mode, operands[1]); 2600 if (!REG_P (operands[2])) 2601 operands[2] = force_reg (<MODE>mode, operands[2]); 2602 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3], 2603 operands[1], operands[2])); 2604 } 2605 2606 DONE; 2607}) 2608 2609;; Patterns comparing two vectors to produce a mask. 2610 2611(define_expand "vec_cmp<mode><mode>" 2612 [(set (match_operand:VSDQ_I_DI 0 "register_operand") 2613 (match_operator 1 "comparison_operator" 2614 [(match_operand:VSDQ_I_DI 2 "register_operand") 2615 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))] 2616 "TARGET_SIMD" 2617{ 2618 rtx mask = operands[0]; 2619 enum rtx_code code = GET_CODE (operands[1]); 2620 2621 switch (code) 2622 { 2623 case NE: 2624 case LE: 2625 case LT: 2626 case GE: 2627 case GT: 2628 case EQ: 2629 if (operands[3] == CONST0_RTX (<MODE>mode)) 2630 break; 2631 2632 /* Fall through. */ 2633 default: 2634 if (!REG_P (operands[3])) 2635 operands[3] = force_reg (<MODE>mode, operands[3]); 2636 2637 break; 2638 } 2639 2640 switch (code) 2641 { 2642 case LT: 2643 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3])); 2644 break; 2645 2646 case GE: 2647 emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3])); 2648 break; 2649 2650 case LE: 2651 emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3])); 2652 break; 2653 2654 case GT: 2655 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3])); 2656 break; 2657 2658 case LTU: 2659 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2])); 2660 break; 2661 2662 case GEU: 2663 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3])); 2664 break; 2665 2666 case LEU: 2667 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2])); 2668 break; 2669 2670 case GTU: 2671 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3])); 2672 break; 2673 2674 case NE: 2675 /* Handle NE as !EQ. */ 2676 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3])); 2677 emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask)); 2678 break; 2679 2680 case EQ: 2681 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3])); 2682 break; 2683 2684 default: 2685 gcc_unreachable (); 2686 } 2687 2688 DONE; 2689}) 2690 2691(define_expand "vec_cmp<mode><v_int_equiv>" 2692 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand") 2693 (match_operator 1 "comparison_operator" 2694 [(match_operand:VDQF 2 "register_operand") 2695 (match_operand:VDQF 3 "nonmemory_operand")]))] 2696 "TARGET_SIMD" 2697{ 2698 int use_zero_form = 0; 2699 enum rtx_code code = GET_CODE (operands[1]); 2700 rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode); 2701 2702 rtx (*comparison) (rtx, rtx, rtx) = NULL; 2703 2704 switch (code) 2705 { 2706 case LE: 2707 case LT: 2708 case GE: 2709 case GT: 2710 case EQ: 2711 if (operands[3] == CONST0_RTX (<MODE>mode)) 2712 { 2713 use_zero_form = 1; 2714 break; 2715 } 2716 /* Fall through. */ 2717 default: 2718 if (!REG_P (operands[3])) 2719 operands[3] = force_reg (<MODE>mode, operands[3]); 2720 2721 break; 2722 } 2723 2724 switch (code) 2725 { 2726 case LT: 2727 if (use_zero_form) 2728 { 2729 comparison = gen_aarch64_cmlt<mode>; 2730 break; 2731 } 2732 /* Fall through. */ 2733 case UNLT: 2734 std::swap (operands[2], operands[3]); 2735 /* Fall through. */ 2736 case UNGT: 2737 case GT: 2738 comparison = gen_aarch64_cmgt<mode>; 2739 break; 2740 case LE: 2741 if (use_zero_form) 2742 { 2743 comparison = gen_aarch64_cmle<mode>; 2744 break; 2745 } 2746 /* Fall through. */ 2747 case UNLE: 2748 std::swap (operands[2], operands[3]); 2749 /* Fall through. */ 2750 case UNGE: 2751 case GE: 2752 comparison = gen_aarch64_cmge<mode>; 2753 break; 2754 case NE: 2755 case EQ: 2756 comparison = gen_aarch64_cmeq<mode>; 2757 break; 2758 case UNEQ: 2759 case ORDERED: 2760 case UNORDERED: 2761 case LTGT: 2762 break; 2763 default: 2764 gcc_unreachable (); 2765 } 2766 2767 switch (code) 2768 { 2769 case UNGE: 2770 case UNGT: 2771 case UNLE: 2772 case UNLT: 2773 { 2774 /* All of the above must not raise any FP exceptions. Thus we first 2775 check each operand for NaNs and force any elements containing NaN to 2776 zero before using them in the compare. 2777 Example: UN<cc> (a, b) -> UNORDERED (a, b) | 2778 (cm<cc> (isnan (a) ? 0.0 : a, 2779 isnan (b) ? 0.0 : b)) 2780 We use the following transformations for doing the comparisions: 2781 a UNGE b -> a GE b 2782 a UNGT b -> a GT b 2783 a UNLE b -> b GE a 2784 a UNLT b -> b GT a. */ 2785 2786 rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode); 2787 rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode); 2788 rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode); 2789 emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2])); 2790 emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3])); 2791 emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1)); 2792 emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0, 2793 lowpart_subreg (<V_INT_EQUIV>mode, 2794 operands[2], 2795 <MODE>mode))); 2796 emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1, 2797 lowpart_subreg (<V_INT_EQUIV>mode, 2798 operands[3], 2799 <MODE>mode))); 2800 gcc_assert (comparison != NULL); 2801 emit_insn (comparison (operands[0], 2802 lowpart_subreg (<MODE>mode, 2803 tmp0, <V_INT_EQUIV>mode), 2804 lowpart_subreg (<MODE>mode, 2805 tmp1, <V_INT_EQUIV>mode))); 2806 emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0])); 2807 } 2808 break; 2809 2810 case LT: 2811 case LE: 2812 case GT: 2813 case GE: 2814 case EQ: 2815 case NE: 2816 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ. 2817 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are: 2818 a GE b -> a GE b 2819 a GT b -> a GT b 2820 a LE b -> b GE a 2821 a LT b -> b GT a 2822 a EQ b -> a EQ b 2823 a NE b -> ~(a EQ b) */ 2824 gcc_assert (comparison != NULL); 2825 emit_insn (comparison (operands[0], operands[2], operands[3])); 2826 if (code == NE) 2827 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0])); 2828 break; 2829 2830 case LTGT: 2831 /* LTGT is not guranteed to not generate a FP exception. So let's 2832 go the faster way : ((a > b) || (b > a)). */ 2833 emit_insn (gen_aarch64_cmgt<mode> (operands[0], 2834 operands[2], operands[3])); 2835 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2])); 2836 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp)); 2837 break; 2838 2839 case ORDERED: 2840 case UNORDERED: 2841 case UNEQ: 2842 /* cmeq (a, a) & cmeq (b, b). */ 2843 emit_insn (gen_aarch64_cmeq<mode> (operands[0], 2844 operands[2], operands[2])); 2845 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3])); 2846 emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp)); 2847 2848 if (code == UNORDERED) 2849 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0])); 2850 else if (code == UNEQ) 2851 { 2852 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3])); 2853 emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp)); 2854 } 2855 break; 2856 2857 default: 2858 gcc_unreachable (); 2859 } 2860 2861 DONE; 2862}) 2863 2864(define_expand "vec_cmpu<mode><mode>" 2865 [(set (match_operand:VSDQ_I_DI 0 "register_operand") 2866 (match_operator 1 "comparison_operator" 2867 [(match_operand:VSDQ_I_DI 2 "register_operand") 2868 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))] 2869 "TARGET_SIMD" 2870{ 2871 emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1], 2872 operands[2], operands[3])); 2873 DONE; 2874}) 2875 2876(define_expand "vcond<mode><mode>" 2877 [(set (match_operand:VALLDI 0 "register_operand") 2878 (if_then_else:VALLDI 2879 (match_operator 3 "comparison_operator" 2880 [(match_operand:VALLDI 4 "register_operand") 2881 (match_operand:VALLDI 5 "nonmemory_operand")]) 2882 (match_operand:VALLDI 1 "nonmemory_operand") 2883 (match_operand:VALLDI 2 "nonmemory_operand")))] 2884 "TARGET_SIMD" 2885{ 2886 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode); 2887 enum rtx_code code = GET_CODE (operands[3]); 2888 2889 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert 2890 it as well as switch operands 1/2 in order to avoid the additional 2891 NOT instruction. */ 2892 if (code == NE) 2893 { 2894 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]), 2895 operands[4], operands[5]); 2896 std::swap (operands[1], operands[2]); 2897 } 2898 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3], 2899 operands[4], operands[5])); 2900 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1], 2901 operands[2], mask)); 2902 2903 DONE; 2904}) 2905 2906(define_expand "vcond<v_cmp_mixed><mode>" 2907 [(set (match_operand:<V_cmp_mixed> 0 "register_operand") 2908 (if_then_else:<V_cmp_mixed> 2909 (match_operator 3 "comparison_operator" 2910 [(match_operand:VDQF_COND 4 "register_operand") 2911 (match_operand:VDQF_COND 5 "nonmemory_operand")]) 2912 (match_operand:<V_cmp_mixed> 1 "nonmemory_operand") 2913 (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))] 2914 "TARGET_SIMD" 2915{ 2916 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode); 2917 enum rtx_code code = GET_CODE (operands[3]); 2918 2919 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert 2920 it as well as switch operands 1/2 in order to avoid the additional 2921 NOT instruction. */ 2922 if (code == NE) 2923 { 2924 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]), 2925 operands[4], operands[5]); 2926 std::swap (operands[1], operands[2]); 2927 } 2928 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3], 2929 operands[4], operands[5])); 2930 emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> ( 2931 operands[0], operands[1], 2932 operands[2], mask)); 2933 2934 DONE; 2935}) 2936 2937(define_expand "vcondu<mode><mode>" 2938 [(set (match_operand:VSDQ_I_DI 0 "register_operand") 2939 (if_then_else:VSDQ_I_DI 2940 (match_operator 3 "comparison_operator" 2941 [(match_operand:VSDQ_I_DI 4 "register_operand") 2942 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")]) 2943 (match_operand:VSDQ_I_DI 1 "nonmemory_operand") 2944 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))] 2945 "TARGET_SIMD" 2946{ 2947 rtx mask = gen_reg_rtx (<MODE>mode); 2948 enum rtx_code code = GET_CODE (operands[3]); 2949 2950 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert 2951 it as well as switch operands 1/2 in order to avoid the additional 2952 NOT instruction. */ 2953 if (code == NE) 2954 { 2955 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]), 2956 operands[4], operands[5]); 2957 std::swap (operands[1], operands[2]); 2958 } 2959 emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3], 2960 operands[4], operands[5])); 2961 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1], 2962 operands[2], mask)); 2963 DONE; 2964}) 2965 2966(define_expand "vcondu<mode><v_cmp_mixed>" 2967 [(set (match_operand:VDQF 0 "register_operand") 2968 (if_then_else:VDQF 2969 (match_operator 3 "comparison_operator" 2970 [(match_operand:<V_cmp_mixed> 4 "register_operand") 2971 (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")]) 2972 (match_operand:VDQF 1 "nonmemory_operand") 2973 (match_operand:VDQF 2 "nonmemory_operand")))] 2974 "TARGET_SIMD" 2975{ 2976 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode); 2977 enum rtx_code code = GET_CODE (operands[3]); 2978 2979 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert 2980 it as well as switch operands 1/2 in order to avoid the additional 2981 NOT instruction. */ 2982 if (code == NE) 2983 { 2984 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]), 2985 operands[4], operands[5]); 2986 std::swap (operands[1], operands[2]); 2987 } 2988 emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> ( 2989 mask, operands[3], 2990 operands[4], operands[5])); 2991 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1], 2992 operands[2], mask)); 2993 DONE; 2994}) 2995 2996;; Patterns for AArch64 SIMD Intrinsics. 2997 2998;; Lane extraction with sign extension to general purpose register. 2999(define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>" 3000 [(set (match_operand:GPI 0 "register_operand" "=r") 3001 (sign_extend:GPI 3002 (vec_select:<VEL> 3003 (match_operand:VDQQH 1 "register_operand" "w") 3004 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] 3005 "TARGET_SIMD" 3006 { 3007 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); 3008 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]"; 3009 } 3010 [(set_attr "type" "neon_to_gp<q>")] 3011) 3012 3013(define_insn "*aarch64_get_lane_zero_extendsi<mode>" 3014 [(set (match_operand:SI 0 "register_operand" "=r") 3015 (zero_extend:SI 3016 (vec_select:<VEL> 3017 (match_operand:VDQQH 1 "register_operand" "w") 3018 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] 3019 "TARGET_SIMD" 3020 { 3021 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); 3022 return "umov\\t%w0, %1.<Vetype>[%2]"; 3023 } 3024 [(set_attr "type" "neon_to_gp<q>")] 3025) 3026 3027;; Lane extraction of a value, neither sign nor zero extension 3028;; is guaranteed so upper bits should be considered undefined. 3029;; RTL uses GCC vector extension indices throughout so flip only for assembly. 3030(define_insn "aarch64_get_lane<mode>" 3031 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv") 3032 (vec_select:<VEL> 3033 (match_operand:VALL_F16 1 "register_operand" "w, w, w") 3034 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))] 3035 "TARGET_SIMD" 3036 { 3037 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); 3038 switch (which_alternative) 3039 { 3040 case 0: 3041 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]"; 3042 case 1: 3043 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]"; 3044 case 2: 3045 return "st1\\t{%1.<Vetype>}[%2], %0"; 3046 default: 3047 gcc_unreachable (); 3048 } 3049 } 3050 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")] 3051) 3052 3053(define_insn "load_pair_lanes<mode>" 3054 [(set (match_operand:<VDBL> 0 "register_operand" "=w") 3055 (vec_concat:<VDBL> 3056 (match_operand:VDC 1 "memory_operand" "Utq") 3057 (match_operand:VDC 2 "memory_operand" "m")))] 3058 "TARGET_SIMD && !STRICT_ALIGNMENT 3059 && rtx_equal_p (XEXP (operands[2], 0), 3060 plus_constant (Pmode, 3061 XEXP (operands[1], 0), 3062 GET_MODE_SIZE (<MODE>mode)))" 3063 "ldr\\t%q0, %1" 3064 [(set_attr "type" "neon_load1_1reg_q")] 3065) 3066 3067(define_insn "store_pair_lanes<mode>" 3068 [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Uml, Uml") 3069 (vec_concat:<VDBL> 3070 (match_operand:VDC 1 "register_operand" "w, r") 3071 (match_operand:VDC 2 "register_operand" "w, r")))] 3072 "TARGET_SIMD" 3073 "@ 3074 stp\\t%d1, %d2, %y0 3075 stp\\t%x1, %x2, %y0" 3076 [(set_attr "type" "neon_stp, store_16")] 3077) 3078 3079;; In this insn, operand 1 should be low, and operand 2 the high part of the 3080;; dest vector. 3081 3082(define_insn "*aarch64_combinez<mode>" 3083 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w") 3084 (vec_concat:<VDBL> 3085 (match_operand:VDC 1 "general_operand" "w,?r,m") 3086 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))] 3087 "TARGET_SIMD && !BYTES_BIG_ENDIAN" 3088 "@ 3089 mov\\t%0.8b, %1.8b 3090 fmov\t%d0, %1 3091 ldr\\t%d0, %1" 3092 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg") 3093 (set_attr "simd" "yes,*,yes") 3094 (set_attr "fp" "*,yes,*")] 3095) 3096 3097(define_insn "*aarch64_combinez_be<mode>" 3098 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w") 3099 (vec_concat:<VDBL> 3100 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero") 3101 (match_operand:VDC 1 "general_operand" "w,?r,m")))] 3102 "TARGET_SIMD && BYTES_BIG_ENDIAN" 3103 "@ 3104 mov\\t%0.8b, %1.8b 3105 fmov\t%d0, %1 3106 ldr\\t%d0, %1" 3107 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg") 3108 (set_attr "simd" "yes,*,yes") 3109 (set_attr "fp" "*,yes,*")] 3110) 3111 3112(define_expand "aarch64_combine<mode>" 3113 [(match_operand:<VDBL> 0 "register_operand") 3114 (match_operand:VDC 1 "register_operand") 3115 (match_operand:VDC 2 "register_operand")] 3116 "TARGET_SIMD" 3117{ 3118 aarch64_split_simd_combine (operands[0], operands[1], operands[2]); 3119 3120 DONE; 3121} 3122) 3123 3124(define_expand "aarch64_simd_combine<mode>" 3125 [(match_operand:<VDBL> 0 "register_operand") 3126 (match_operand:VDC 1 "register_operand") 3127 (match_operand:VDC 2 "register_operand")] 3128 "TARGET_SIMD" 3129 { 3130 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1])); 3131 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2])); 3132 DONE; 3133 } 3134[(set_attr "type" "multiple")] 3135) 3136 3137;; <su><addsub>l<q>. 3138 3139(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal" 3140 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3141 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 3142 (match_operand:VQW 1 "register_operand" "w") 3143 (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))) 3144 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 3145 (match_operand:VQW 2 "register_operand" "w") 3146 (match_dup 3)))))] 3147 "TARGET_SIMD" 3148 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>" 3149 [(set_attr "type" "neon_<ADDSUB:optab>_long")] 3150) 3151 3152(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal" 3153 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3154 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 3155 (match_operand:VQW 1 "register_operand" "w") 3156 (match_operand:VQW 3 "vect_par_cnst_lo_half" ""))) 3157 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 3158 (match_operand:VQW 2 "register_operand" "w") 3159 (match_dup 3)))))] 3160 "TARGET_SIMD" 3161 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>" 3162 [(set_attr "type" "neon_<ADDSUB:optab>_long")] 3163) 3164 3165 3166(define_expand "aarch64_saddl2<mode>" 3167 [(match_operand:<VWIDE> 0 "register_operand" "=w") 3168 (match_operand:VQW 1 "register_operand" "w") 3169 (match_operand:VQW 2 "register_operand" "w")] 3170 "TARGET_SIMD" 3171{ 3172 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 3173 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1], 3174 operands[2], p)); 3175 DONE; 3176}) 3177 3178(define_expand "aarch64_uaddl2<mode>" 3179 [(match_operand:<VWIDE> 0 "register_operand" "=w") 3180 (match_operand:VQW 1 "register_operand" "w") 3181 (match_operand:VQW 2 "register_operand" "w")] 3182 "TARGET_SIMD" 3183{ 3184 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 3185 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1], 3186 operands[2], p)); 3187 DONE; 3188}) 3189 3190(define_expand "aarch64_ssubl2<mode>" 3191 [(match_operand:<VWIDE> 0 "register_operand" "=w") 3192 (match_operand:VQW 1 "register_operand" "w") 3193 (match_operand:VQW 2 "register_operand" "w")] 3194 "TARGET_SIMD" 3195{ 3196 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 3197 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1], 3198 operands[2], p)); 3199 DONE; 3200}) 3201 3202(define_expand "aarch64_usubl2<mode>" 3203 [(match_operand:<VWIDE> 0 "register_operand" "=w") 3204 (match_operand:VQW 1 "register_operand" "w") 3205 (match_operand:VQW 2 "register_operand" "w")] 3206 "TARGET_SIMD" 3207{ 3208 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 3209 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1], 3210 operands[2], p)); 3211 DONE; 3212}) 3213 3214(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>" 3215 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3216 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> 3217 (match_operand:VD_BHSI 1 "register_operand" "w")) 3218 (ANY_EXTEND:<VWIDE> 3219 (match_operand:VD_BHSI 2 "register_operand" "w"))))] 3220 "TARGET_SIMD" 3221 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>" 3222 [(set_attr "type" "neon_<ADDSUB:optab>_long")] 3223) 3224 3225;; <su><addsub>w<q>. 3226 3227(define_expand "widen_ssum<mode>3" 3228 [(set (match_operand:<VDBLW> 0 "register_operand" "") 3229 (plus:<VDBLW> (sign_extend:<VDBLW> 3230 (match_operand:VQW 1 "register_operand" "")) 3231 (match_operand:<VDBLW> 2 "register_operand" "")))] 3232 "TARGET_SIMD" 3233 { 3234 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); 3235 rtx temp = gen_reg_rtx (GET_MODE (operands[0])); 3236 3237 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2], 3238 operands[1], p)); 3239 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1])); 3240 DONE; 3241 } 3242) 3243 3244(define_expand "widen_ssum<mode>3" 3245 [(set (match_operand:<VWIDE> 0 "register_operand" "") 3246 (plus:<VWIDE> (sign_extend:<VWIDE> 3247 (match_operand:VD_BHSI 1 "register_operand" "")) 3248 (match_operand:<VWIDE> 2 "register_operand" "")))] 3249 "TARGET_SIMD" 3250{ 3251 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1])); 3252 DONE; 3253}) 3254 3255(define_expand "widen_usum<mode>3" 3256 [(set (match_operand:<VDBLW> 0 "register_operand" "") 3257 (plus:<VDBLW> (zero_extend:<VDBLW> 3258 (match_operand:VQW 1 "register_operand" "")) 3259 (match_operand:<VDBLW> 2 "register_operand" "")))] 3260 "TARGET_SIMD" 3261 { 3262 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); 3263 rtx temp = gen_reg_rtx (GET_MODE (operands[0])); 3264 3265 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2], 3266 operands[1], p)); 3267 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1])); 3268 DONE; 3269 } 3270) 3271 3272(define_expand "widen_usum<mode>3" 3273 [(set (match_operand:<VWIDE> 0 "register_operand" "") 3274 (plus:<VWIDE> (zero_extend:<VWIDE> 3275 (match_operand:VD_BHSI 1 "register_operand" "")) 3276 (match_operand:<VWIDE> 2 "register_operand" "")))] 3277 "TARGET_SIMD" 3278{ 3279 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1])); 3280 DONE; 3281}) 3282 3283(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>" 3284 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3285 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w") 3286 (ANY_EXTEND:<VWIDE> 3287 (match_operand:VD_BHSI 2 "register_operand" "w"))))] 3288 "TARGET_SIMD" 3289 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>" 3290 [(set_attr "type" "neon_<ADDSUB:optab>_widen")] 3291) 3292 3293(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal" 3294 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3295 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w") 3296 (ANY_EXTEND:<VWIDE> 3297 (vec_select:<VHALF> 3298 (match_operand:VQW 2 "register_operand" "w") 3299 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))] 3300 "TARGET_SIMD" 3301 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>" 3302 [(set_attr "type" "neon_<ADDSUB:optab>_widen")] 3303) 3304 3305(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal" 3306 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3307 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w") 3308 (ANY_EXTEND:<VWIDE> 3309 (vec_select:<VHALF> 3310 (match_operand:VQW 2 "register_operand" "w") 3311 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))] 3312 "TARGET_SIMD" 3313 "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>" 3314 [(set_attr "type" "neon_<ADDSUB:optab>_widen")] 3315) 3316 3317(define_expand "aarch64_saddw2<mode>" 3318 [(match_operand:<VWIDE> 0 "register_operand" "=w") 3319 (match_operand:<VWIDE> 1 "register_operand" "w") 3320 (match_operand:VQW 2 "register_operand" "w")] 3321 "TARGET_SIMD" 3322{ 3323 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 3324 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1], 3325 operands[2], p)); 3326 DONE; 3327}) 3328 3329(define_expand "aarch64_uaddw2<mode>" 3330 [(match_operand:<VWIDE> 0 "register_operand" "=w") 3331 (match_operand:<VWIDE> 1 "register_operand" "w") 3332 (match_operand:VQW 2 "register_operand" "w")] 3333 "TARGET_SIMD" 3334{ 3335 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 3336 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1], 3337 operands[2], p)); 3338 DONE; 3339}) 3340 3341 3342(define_expand "aarch64_ssubw2<mode>" 3343 [(match_operand:<VWIDE> 0 "register_operand" "=w") 3344 (match_operand:<VWIDE> 1 "register_operand" "w") 3345 (match_operand:VQW 2 "register_operand" "w")] 3346 "TARGET_SIMD" 3347{ 3348 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 3349 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1], 3350 operands[2], p)); 3351 DONE; 3352}) 3353 3354(define_expand "aarch64_usubw2<mode>" 3355 [(match_operand:<VWIDE> 0 "register_operand" "=w") 3356 (match_operand:<VWIDE> 1 "register_operand" "w") 3357 (match_operand:VQW 2 "register_operand" "w")] 3358 "TARGET_SIMD" 3359{ 3360 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 3361 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1], 3362 operands[2], p)); 3363 DONE; 3364}) 3365 3366;; <su><r>h<addsub>. 3367 3368(define_insn "aarch64_<sur>h<addsub><mode>" 3369 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 3370 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w") 3371 (match_operand:VDQ_BHSI 2 "register_operand" "w")] 3372 HADDSUB))] 3373 "TARGET_SIMD" 3374 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 3375 [(set_attr "type" "neon_<addsub>_halve<q>")] 3376) 3377 3378;; <r><addsub>hn<q>. 3379 3380(define_insn "aarch64_<sur><addsub>hn<mode>" 3381 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") 3382 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w") 3383 (match_operand:VQN 2 "register_operand" "w")] 3384 ADDSUBHN))] 3385 "TARGET_SIMD" 3386 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>" 3387 [(set_attr "type" "neon_<addsub>_halve_narrow_q")] 3388) 3389 3390(define_insn "aarch64_<sur><addsub>hn2<mode>" 3391 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") 3392 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0") 3393 (match_operand:VQN 2 "register_operand" "w") 3394 (match_operand:VQN 3 "register_operand" "w")] 3395 ADDSUBHN2))] 3396 "TARGET_SIMD" 3397 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>" 3398 [(set_attr "type" "neon_<addsub>_halve_narrow_q")] 3399) 3400 3401;; pmul. 3402 3403(define_insn "aarch64_pmul<mode>" 3404 [(set (match_operand:VB 0 "register_operand" "=w") 3405 (unspec:VB [(match_operand:VB 1 "register_operand" "w") 3406 (match_operand:VB 2 "register_operand" "w")] 3407 UNSPEC_PMUL))] 3408 "TARGET_SIMD" 3409 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 3410 [(set_attr "type" "neon_mul_<Vetype><q>")] 3411) 3412 3413;; fmulx. 3414 3415(define_insn "aarch64_fmulx<mode>" 3416 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") 3417 (unspec:VHSDF_HSDF 3418 [(match_operand:VHSDF_HSDF 1 "register_operand" "w") 3419 (match_operand:VHSDF_HSDF 2 "register_operand" "w")] 3420 UNSPEC_FMULX))] 3421 "TARGET_SIMD" 3422 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 3423 [(set_attr "type" "neon_fp_mul_<stype>")] 3424) 3425 3426;; vmulxq_lane_f32, and vmulx_laneq_f32 3427 3428(define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>" 3429 [(set (match_operand:VDQSF 0 "register_operand" "=w") 3430 (unspec:VDQSF 3431 [(match_operand:VDQSF 1 "register_operand" "w") 3432 (vec_duplicate:VDQSF 3433 (vec_select:<VEL> 3434 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w") 3435 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))] 3436 UNSPEC_FMULX))] 3437 "TARGET_SIMD" 3438 { 3439 operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3])); 3440 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 3441 } 3442 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")] 3443) 3444 3445;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32 3446 3447(define_insn "*aarch64_mulx_elt<mode>" 3448 [(set (match_operand:VDQF 0 "register_operand" "=w") 3449 (unspec:VDQF 3450 [(match_operand:VDQF 1 "register_operand" "w") 3451 (vec_duplicate:VDQF 3452 (vec_select:<VEL> 3453 (match_operand:VDQF 2 "register_operand" "w") 3454 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))] 3455 UNSPEC_FMULX))] 3456 "TARGET_SIMD" 3457 { 3458 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3])); 3459 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 3460 } 3461 [(set_attr "type" "neon_fp_mul_<Vetype><q>")] 3462) 3463 3464;; vmulxq_lane 3465 3466(define_insn "*aarch64_mulx_elt_from_dup<mode>" 3467 [(set (match_operand:VHSDF 0 "register_operand" "=w") 3468 (unspec:VHSDF 3469 [(match_operand:VHSDF 1 "register_operand" "w") 3470 (vec_duplicate:VHSDF 3471 (match_operand:<VEL> 2 "register_operand" "<h_con>"))] 3472 UNSPEC_FMULX))] 3473 "TARGET_SIMD" 3474 "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]"; 3475 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")] 3476) 3477 3478;; vmulxs_lane_f32, vmulxs_laneq_f32 3479;; vmulxd_lane_f64 == vmulx_lane_f64 3480;; vmulxd_laneq_f64 == vmulx_laneq_f64 3481 3482(define_insn "*aarch64_vgetfmulx<mode>" 3483 [(set (match_operand:<VEL> 0 "register_operand" "=w") 3484 (unspec:<VEL> 3485 [(match_operand:<VEL> 1 "register_operand" "w") 3486 (vec_select:<VEL> 3487 (match_operand:VDQF 2 "register_operand" "w") 3488 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] 3489 UNSPEC_FMULX))] 3490 "TARGET_SIMD" 3491 { 3492 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3])); 3493 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]"; 3494 } 3495 [(set_attr "type" "fmul<Vetype>")] 3496) 3497;; <su>q<addsub> 3498 3499(define_insn "aarch64_<su_optab><optab><mode>" 3500 [(set (match_operand:VSDQ_I 0 "register_operand" "=w") 3501 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w") 3502 (match_operand:VSDQ_I 2 "register_operand" "w")))] 3503 "TARGET_SIMD" 3504 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 3505 [(set_attr "type" "neon_<optab><q>")] 3506) 3507 3508;; suqadd and usqadd 3509 3510(define_insn "aarch64_<sur>qadd<mode>" 3511 [(set (match_operand:VSDQ_I 0 "register_operand" "=w") 3512 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0") 3513 (match_operand:VSDQ_I 2 "register_operand" "w")] 3514 USSUQADD))] 3515 "TARGET_SIMD" 3516 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>" 3517 [(set_attr "type" "neon_qadd<q>")] 3518) 3519 3520;; sqmovun 3521 3522(define_insn "aarch64_sqmovun<mode>" 3523 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") 3524 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")] 3525 UNSPEC_SQXTUN))] 3526 "TARGET_SIMD" 3527 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>" 3528 [(set_attr "type" "neon_sat_shift_imm_narrow_q")] 3529) 3530 3531;; sqmovn and uqmovn 3532 3533(define_insn "aarch64_<sur>qmovn<mode>" 3534 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") 3535 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")] 3536 SUQMOVN))] 3537 "TARGET_SIMD" 3538 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>" 3539 [(set_attr "type" "neon_sat_shift_imm_narrow_q")] 3540) 3541 3542;; <su>q<absneg> 3543 3544(define_insn "aarch64_s<optab><mode>" 3545 [(set (match_operand:VSDQ_I 0 "register_operand" "=w") 3546 (UNQOPS:VSDQ_I 3547 (match_operand:VSDQ_I 1 "register_operand" "w")))] 3548 "TARGET_SIMD" 3549 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>" 3550 [(set_attr "type" "neon_<optab><q>")] 3551) 3552 3553;; sq<r>dmulh. 3554 3555(define_insn "aarch64_sq<r>dmulh<mode>" 3556 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w") 3557 (unspec:VSDQ_HSI 3558 [(match_operand:VSDQ_HSI 1 "register_operand" "w") 3559 (match_operand:VSDQ_HSI 2 "register_operand" "w")] 3560 VQDMULH))] 3561 "TARGET_SIMD" 3562 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 3563 [(set_attr "type" "neon_sat_mul_<Vetype><q>")] 3564) 3565 3566;; sq<r>dmulh_lane 3567 3568(define_insn "aarch64_sq<r>dmulh_lane<mode>" 3569 [(set (match_operand:VDQHS 0 "register_operand" "=w") 3570 (unspec:VDQHS 3571 [(match_operand:VDQHS 1 "register_operand" "w") 3572 (vec_select:<VEL> 3573 (match_operand:<VCOND> 2 "register_operand" "<vwx>") 3574 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] 3575 VQDMULH))] 3576 "TARGET_SIMD" 3577 "* 3578 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3])); 3579 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";" 3580 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] 3581) 3582 3583(define_insn "aarch64_sq<r>dmulh_laneq<mode>" 3584 [(set (match_operand:VDQHS 0 "register_operand" "=w") 3585 (unspec:VDQHS 3586 [(match_operand:VDQHS 1 "register_operand" "w") 3587 (vec_select:<VEL> 3588 (match_operand:<VCONQ> 2 "register_operand" "<vwx>") 3589 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] 3590 VQDMULH))] 3591 "TARGET_SIMD" 3592 "* 3593 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3])); 3594 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";" 3595 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] 3596) 3597 3598(define_insn "aarch64_sq<r>dmulh_lane<mode>" 3599 [(set (match_operand:SD_HSI 0 "register_operand" "=w") 3600 (unspec:SD_HSI 3601 [(match_operand:SD_HSI 1 "register_operand" "w") 3602 (vec_select:<VEL> 3603 (match_operand:<VCOND> 2 "register_operand" "<vwx>") 3604 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] 3605 VQDMULH))] 3606 "TARGET_SIMD" 3607 "* 3608 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3])); 3609 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";" 3610 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] 3611) 3612 3613(define_insn "aarch64_sq<r>dmulh_laneq<mode>" 3614 [(set (match_operand:SD_HSI 0 "register_operand" "=w") 3615 (unspec:SD_HSI 3616 [(match_operand:SD_HSI 1 "register_operand" "w") 3617 (vec_select:<VEL> 3618 (match_operand:<VCONQ> 2 "register_operand" "<vwx>") 3619 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] 3620 VQDMULH))] 3621 "TARGET_SIMD" 3622 "* 3623 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3])); 3624 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";" 3625 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] 3626) 3627 3628;; sqrdml[as]h. 3629 3630(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>" 3631 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w") 3632 (unspec:VSDQ_HSI 3633 [(match_operand:VSDQ_HSI 1 "register_operand" "0") 3634 (match_operand:VSDQ_HSI 2 "register_operand" "w") 3635 (match_operand:VSDQ_HSI 3 "register_operand" "w")] 3636 SQRDMLH_AS))] 3637 "TARGET_SIMD_RDMA" 3638 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>" 3639 [(set_attr "type" "neon_sat_mla_<Vetype>_long")] 3640) 3641 3642;; sqrdml[as]h_lane. 3643 3644(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>" 3645 [(set (match_operand:VDQHS 0 "register_operand" "=w") 3646 (unspec:VDQHS 3647 [(match_operand:VDQHS 1 "register_operand" "0") 3648 (match_operand:VDQHS 2 "register_operand" "w") 3649 (vec_select:<VEL> 3650 (match_operand:<VCOND> 3 "register_operand" "<vwx>") 3651 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))] 3652 SQRDMLH_AS))] 3653 "TARGET_SIMD_RDMA" 3654 { 3655 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4])); 3656 return 3657 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]"; 3658 } 3659 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 3660) 3661 3662(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>" 3663 [(set (match_operand:SD_HSI 0 "register_operand" "=w") 3664 (unspec:SD_HSI 3665 [(match_operand:SD_HSI 1 "register_operand" "0") 3666 (match_operand:SD_HSI 2 "register_operand" "w") 3667 (vec_select:<VEL> 3668 (match_operand:<VCOND> 3 "register_operand" "<vwx>") 3669 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))] 3670 SQRDMLH_AS))] 3671 "TARGET_SIMD_RDMA" 3672 { 3673 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4])); 3674 return 3675 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]"; 3676 } 3677 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 3678) 3679 3680;; sqrdml[as]h_laneq. 3681 3682(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>" 3683 [(set (match_operand:VDQHS 0 "register_operand" "=w") 3684 (unspec:VDQHS 3685 [(match_operand:VDQHS 1 "register_operand" "0") 3686 (match_operand:VDQHS 2 "register_operand" "w") 3687 (vec_select:<VEL> 3688 (match_operand:<VCONQ> 3 "register_operand" "<vwx>") 3689 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))] 3690 SQRDMLH_AS))] 3691 "TARGET_SIMD_RDMA" 3692 { 3693 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4])); 3694 return 3695 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]"; 3696 } 3697 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 3698) 3699 3700(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>" 3701 [(set (match_operand:SD_HSI 0 "register_operand" "=w") 3702 (unspec:SD_HSI 3703 [(match_operand:SD_HSI 1 "register_operand" "0") 3704 (match_operand:SD_HSI 2 "register_operand" "w") 3705 (vec_select:<VEL> 3706 (match_operand:<VCONQ> 3 "register_operand" "<vwx>") 3707 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))] 3708 SQRDMLH_AS))] 3709 "TARGET_SIMD_RDMA" 3710 { 3711 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4])); 3712 return 3713 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]"; 3714 } 3715 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 3716) 3717 3718;; vqdml[sa]l 3719 3720(define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>" 3721 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3722 (SBINQOPS:<VWIDE> 3723 (match_operand:<VWIDE> 1 "register_operand" "0") 3724 (ss_ashift:<VWIDE> 3725 (mult:<VWIDE> 3726 (sign_extend:<VWIDE> 3727 (match_operand:VSD_HSI 2 "register_operand" "w")) 3728 (sign_extend:<VWIDE> 3729 (match_operand:VSD_HSI 3 "register_operand" "w"))) 3730 (const_int 1))))] 3731 "TARGET_SIMD" 3732 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>" 3733 [(set_attr "type" "neon_sat_mla_<Vetype>_long")] 3734) 3735 3736;; vqdml[sa]l_lane 3737 3738(define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>" 3739 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3740 (SBINQOPS:<VWIDE> 3741 (match_operand:<VWIDE> 1 "register_operand" "0") 3742 (ss_ashift:<VWIDE> 3743 (mult:<VWIDE> 3744 (sign_extend:<VWIDE> 3745 (match_operand:VD_HSI 2 "register_operand" "w")) 3746 (sign_extend:<VWIDE> 3747 (vec_duplicate:VD_HSI 3748 (vec_select:<VEL> 3749 (match_operand:<VCOND> 3 "register_operand" "<vwx>") 3750 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) 3751 )) 3752 (const_int 1))))] 3753 "TARGET_SIMD" 3754 { 3755 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4])); 3756 return 3757 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; 3758 } 3759 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 3760) 3761 3762(define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>" 3763 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3764 (SBINQOPS:<VWIDE> 3765 (match_operand:<VWIDE> 1 "register_operand" "0") 3766 (ss_ashift:<VWIDE> 3767 (mult:<VWIDE> 3768 (sign_extend:<VWIDE> 3769 (match_operand:VD_HSI 2 "register_operand" "w")) 3770 (sign_extend:<VWIDE> 3771 (vec_duplicate:VD_HSI 3772 (vec_select:<VEL> 3773 (match_operand:<VCONQ> 3 "register_operand" "<vwx>") 3774 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) 3775 )) 3776 (const_int 1))))] 3777 "TARGET_SIMD" 3778 { 3779 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4])); 3780 return 3781 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; 3782 } 3783 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 3784) 3785 3786(define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>" 3787 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3788 (SBINQOPS:<VWIDE> 3789 (match_operand:<VWIDE> 1 "register_operand" "0") 3790 (ss_ashift:<VWIDE> 3791 (mult:<VWIDE> 3792 (sign_extend:<VWIDE> 3793 (match_operand:SD_HSI 2 "register_operand" "w")) 3794 (sign_extend:<VWIDE> 3795 (vec_select:<VEL> 3796 (match_operand:<VCOND> 3 "register_operand" "<vwx>") 3797 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) 3798 ) 3799 (const_int 1))))] 3800 "TARGET_SIMD" 3801 { 3802 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4])); 3803 return 3804 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; 3805 } 3806 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 3807) 3808 3809(define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>" 3810 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3811 (SBINQOPS:<VWIDE> 3812 (match_operand:<VWIDE> 1 "register_operand" "0") 3813 (ss_ashift:<VWIDE> 3814 (mult:<VWIDE> 3815 (sign_extend:<VWIDE> 3816 (match_operand:SD_HSI 2 "register_operand" "w")) 3817 (sign_extend:<VWIDE> 3818 (vec_select:<VEL> 3819 (match_operand:<VCONQ> 3 "register_operand" "<vwx>") 3820 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) 3821 ) 3822 (const_int 1))))] 3823 "TARGET_SIMD" 3824 { 3825 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4])); 3826 return 3827 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; 3828 } 3829 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 3830) 3831 3832;; vqdml[sa]l_n 3833 3834(define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>" 3835 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3836 (SBINQOPS:<VWIDE> 3837 (match_operand:<VWIDE> 1 "register_operand" "0") 3838 (ss_ashift:<VWIDE> 3839 (mult:<VWIDE> 3840 (sign_extend:<VWIDE> 3841 (match_operand:VD_HSI 2 "register_operand" "w")) 3842 (sign_extend:<VWIDE> 3843 (vec_duplicate:VD_HSI 3844 (match_operand:<VEL> 3 "register_operand" "<vwx>")))) 3845 (const_int 1))))] 3846 "TARGET_SIMD" 3847 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]" 3848 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 3849) 3850 3851;; sqdml[as]l2 3852 3853(define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal" 3854 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3855 (SBINQOPS:<VWIDE> 3856 (match_operand:<VWIDE> 1 "register_operand" "0") 3857 (ss_ashift:<VWIDE> 3858 (mult:<VWIDE> 3859 (sign_extend:<VWIDE> 3860 (vec_select:<VHALF> 3861 (match_operand:VQ_HSI 2 "register_operand" "w") 3862 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) 3863 (sign_extend:<VWIDE> 3864 (vec_select:<VHALF> 3865 (match_operand:VQ_HSI 3 "register_operand" "w") 3866 (match_dup 4)))) 3867 (const_int 1))))] 3868 "TARGET_SIMD" 3869 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>" 3870 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 3871) 3872 3873(define_expand "aarch64_sqdmlal2<mode>" 3874 [(match_operand:<VWIDE> 0 "register_operand" "=w") 3875 (match_operand:<VWIDE> 1 "register_operand" "w") 3876 (match_operand:VQ_HSI 2 "register_operand" "w") 3877 (match_operand:VQ_HSI 3 "register_operand" "w")] 3878 "TARGET_SIMD" 3879{ 3880 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 3881 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1], 3882 operands[2], operands[3], p)); 3883 DONE; 3884}) 3885 3886(define_expand "aarch64_sqdmlsl2<mode>" 3887 [(match_operand:<VWIDE> 0 "register_operand" "=w") 3888 (match_operand:<VWIDE> 1 "register_operand" "w") 3889 (match_operand:VQ_HSI 2 "register_operand" "w") 3890 (match_operand:VQ_HSI 3 "register_operand" "w")] 3891 "TARGET_SIMD" 3892{ 3893 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 3894 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1], 3895 operands[2], operands[3], p)); 3896 DONE; 3897}) 3898 3899;; vqdml[sa]l2_lane 3900 3901(define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal" 3902 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3903 (SBINQOPS:<VWIDE> 3904 (match_operand:<VWIDE> 1 "register_operand" "0") 3905 (ss_ashift:<VWIDE> 3906 (mult:<VWIDE> 3907 (sign_extend:<VWIDE> 3908 (vec_select:<VHALF> 3909 (match_operand:VQ_HSI 2 "register_operand" "w") 3910 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" ""))) 3911 (sign_extend:<VWIDE> 3912 (vec_duplicate:<VHALF> 3913 (vec_select:<VEL> 3914 (match_operand:<VCOND> 3 "register_operand" "<vwx>") 3915 (parallel [(match_operand:SI 4 "immediate_operand" "i")]) 3916 )))) 3917 (const_int 1))))] 3918 "TARGET_SIMD" 3919 { 3920 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4])); 3921 return 3922 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; 3923 } 3924 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 3925) 3926 3927(define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal" 3928 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3929 (SBINQOPS:<VWIDE> 3930 (match_operand:<VWIDE> 1 "register_operand" "0") 3931 (ss_ashift:<VWIDE> 3932 (mult:<VWIDE> 3933 (sign_extend:<VWIDE> 3934 (vec_select:<VHALF> 3935 (match_operand:VQ_HSI 2 "register_operand" "w") 3936 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" ""))) 3937 (sign_extend:<VWIDE> 3938 (vec_duplicate:<VHALF> 3939 (vec_select:<VEL> 3940 (match_operand:<VCONQ> 3 "register_operand" "<vwx>") 3941 (parallel [(match_operand:SI 4 "immediate_operand" "i")]) 3942 )))) 3943 (const_int 1))))] 3944 "TARGET_SIMD" 3945 { 3946 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4])); 3947 return 3948 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; 3949 } 3950 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 3951) 3952 3953(define_expand "aarch64_sqdmlal2_lane<mode>" 3954 [(match_operand:<VWIDE> 0 "register_operand" "=w") 3955 (match_operand:<VWIDE> 1 "register_operand" "w") 3956 (match_operand:VQ_HSI 2 "register_operand" "w") 3957 (match_operand:<VCOND> 3 "register_operand" "<vwx>") 3958 (match_operand:SI 4 "immediate_operand" "i")] 3959 "TARGET_SIMD" 3960{ 3961 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 3962 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1], 3963 operands[2], operands[3], 3964 operands[4], p)); 3965 DONE; 3966}) 3967 3968(define_expand "aarch64_sqdmlal2_laneq<mode>" 3969 [(match_operand:<VWIDE> 0 "register_operand" "=w") 3970 (match_operand:<VWIDE> 1 "register_operand" "w") 3971 (match_operand:VQ_HSI 2 "register_operand" "w") 3972 (match_operand:<VCONQ> 3 "register_operand" "<vwx>") 3973 (match_operand:SI 4 "immediate_operand" "i")] 3974 "TARGET_SIMD" 3975{ 3976 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 3977 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1], 3978 operands[2], operands[3], 3979 operands[4], p)); 3980 DONE; 3981}) 3982 3983(define_expand "aarch64_sqdmlsl2_lane<mode>" 3984 [(match_operand:<VWIDE> 0 "register_operand" "=w") 3985 (match_operand:<VWIDE> 1 "register_operand" "w") 3986 (match_operand:VQ_HSI 2 "register_operand" "w") 3987 (match_operand:<VCOND> 3 "register_operand" "<vwx>") 3988 (match_operand:SI 4 "immediate_operand" "i")] 3989 "TARGET_SIMD" 3990{ 3991 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 3992 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1], 3993 operands[2], operands[3], 3994 operands[4], p)); 3995 DONE; 3996}) 3997 3998(define_expand "aarch64_sqdmlsl2_laneq<mode>" 3999 [(match_operand:<VWIDE> 0 "register_operand" "=w") 4000 (match_operand:<VWIDE> 1 "register_operand" "w") 4001 (match_operand:VQ_HSI 2 "register_operand" "w") 4002 (match_operand:<VCONQ> 3 "register_operand" "<vwx>") 4003 (match_operand:SI 4 "immediate_operand" "i")] 4004 "TARGET_SIMD" 4005{ 4006 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 4007 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1], 4008 operands[2], operands[3], 4009 operands[4], p)); 4010 DONE; 4011}) 4012 4013(define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal" 4014 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4015 (SBINQOPS:<VWIDE> 4016 (match_operand:<VWIDE> 1 "register_operand" "0") 4017 (ss_ashift:<VWIDE> 4018 (mult:<VWIDE> 4019 (sign_extend:<VWIDE> 4020 (vec_select:<VHALF> 4021 (match_operand:VQ_HSI 2 "register_operand" "w") 4022 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) 4023 (sign_extend:<VWIDE> 4024 (vec_duplicate:<VHALF> 4025 (match_operand:<VEL> 3 "register_operand" "<vwx>")))) 4026 (const_int 1))))] 4027 "TARGET_SIMD" 4028 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]" 4029 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 4030) 4031 4032(define_expand "aarch64_sqdmlal2_n<mode>" 4033 [(match_operand:<VWIDE> 0 "register_operand" "=w") 4034 (match_operand:<VWIDE> 1 "register_operand" "w") 4035 (match_operand:VQ_HSI 2 "register_operand" "w") 4036 (match_operand:<VEL> 3 "register_operand" "w")] 4037 "TARGET_SIMD" 4038{ 4039 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 4040 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1], 4041 operands[2], operands[3], 4042 p)); 4043 DONE; 4044}) 4045 4046(define_expand "aarch64_sqdmlsl2_n<mode>" 4047 [(match_operand:<VWIDE> 0 "register_operand" "=w") 4048 (match_operand:<VWIDE> 1 "register_operand" "w") 4049 (match_operand:VQ_HSI 2 "register_operand" "w") 4050 (match_operand:<VEL> 3 "register_operand" "w")] 4051 "TARGET_SIMD" 4052{ 4053 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 4054 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1], 4055 operands[2], operands[3], 4056 p)); 4057 DONE; 4058}) 4059 4060;; vqdmull 4061 4062(define_insn "aarch64_sqdmull<mode>" 4063 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4064 (ss_ashift:<VWIDE> 4065 (mult:<VWIDE> 4066 (sign_extend:<VWIDE> 4067 (match_operand:VSD_HSI 1 "register_operand" "w")) 4068 (sign_extend:<VWIDE> 4069 (match_operand:VSD_HSI 2 "register_operand" "w"))) 4070 (const_int 1)))] 4071 "TARGET_SIMD" 4072 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 4073 [(set_attr "type" "neon_sat_mul_<Vetype>_long")] 4074) 4075 4076;; vqdmull_lane 4077 4078(define_insn "aarch64_sqdmull_lane<mode>" 4079 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4080 (ss_ashift:<VWIDE> 4081 (mult:<VWIDE> 4082 (sign_extend:<VWIDE> 4083 (match_operand:VD_HSI 1 "register_operand" "w")) 4084 (sign_extend:<VWIDE> 4085 (vec_duplicate:VD_HSI 4086 (vec_select:<VEL> 4087 (match_operand:<VCOND> 2 "register_operand" "<vwx>") 4088 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) 4089 )) 4090 (const_int 1)))] 4091 "TARGET_SIMD" 4092 { 4093 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3])); 4094 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 4095 } 4096 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 4097) 4098 4099(define_insn "aarch64_sqdmull_laneq<mode>" 4100 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4101 (ss_ashift:<VWIDE> 4102 (mult:<VWIDE> 4103 (sign_extend:<VWIDE> 4104 (match_operand:VD_HSI 1 "register_operand" "w")) 4105 (sign_extend:<VWIDE> 4106 (vec_duplicate:VD_HSI 4107 (vec_select:<VEL> 4108 (match_operand:<VCONQ> 2 "register_operand" "<vwx>") 4109 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) 4110 )) 4111 (const_int 1)))] 4112 "TARGET_SIMD" 4113 { 4114 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3])); 4115 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 4116 } 4117 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 4118) 4119 4120(define_insn "aarch64_sqdmull_lane<mode>" 4121 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4122 (ss_ashift:<VWIDE> 4123 (mult:<VWIDE> 4124 (sign_extend:<VWIDE> 4125 (match_operand:SD_HSI 1 "register_operand" "w")) 4126 (sign_extend:<VWIDE> 4127 (vec_select:<VEL> 4128 (match_operand:<VCOND> 2 "register_operand" "<vwx>") 4129 (parallel [(match_operand:SI 3 "immediate_operand" "i")])) 4130 )) 4131 (const_int 1)))] 4132 "TARGET_SIMD" 4133 { 4134 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3])); 4135 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 4136 } 4137 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 4138) 4139 4140(define_insn "aarch64_sqdmull_laneq<mode>" 4141 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4142 (ss_ashift:<VWIDE> 4143 (mult:<VWIDE> 4144 (sign_extend:<VWIDE> 4145 (match_operand:SD_HSI 1 "register_operand" "w")) 4146 (sign_extend:<VWIDE> 4147 (vec_select:<VEL> 4148 (match_operand:<VCONQ> 2 "register_operand" "<vwx>") 4149 (parallel [(match_operand:SI 3 "immediate_operand" "i")])) 4150 )) 4151 (const_int 1)))] 4152 "TARGET_SIMD" 4153 { 4154 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3])); 4155 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 4156 } 4157 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 4158) 4159 4160;; vqdmull_n 4161 4162(define_insn "aarch64_sqdmull_n<mode>" 4163 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4164 (ss_ashift:<VWIDE> 4165 (mult:<VWIDE> 4166 (sign_extend:<VWIDE> 4167 (match_operand:VD_HSI 1 "register_operand" "w")) 4168 (sign_extend:<VWIDE> 4169 (vec_duplicate:VD_HSI 4170 (match_operand:<VEL> 2 "register_operand" "<vwx>"))) 4171 ) 4172 (const_int 1)))] 4173 "TARGET_SIMD" 4174 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]" 4175 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 4176) 4177 4178;; vqdmull2 4179 4180 4181 4182(define_insn "aarch64_sqdmull2<mode>_internal" 4183 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4184 (ss_ashift:<VWIDE> 4185 (mult:<VWIDE> 4186 (sign_extend:<VWIDE> 4187 (vec_select:<VHALF> 4188 (match_operand:VQ_HSI 1 "register_operand" "w") 4189 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) 4190 (sign_extend:<VWIDE> 4191 (vec_select:<VHALF> 4192 (match_operand:VQ_HSI 2 "register_operand" "w") 4193 (match_dup 3))) 4194 ) 4195 (const_int 1)))] 4196 "TARGET_SIMD" 4197 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 4198 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 4199) 4200 4201(define_expand "aarch64_sqdmull2<mode>" 4202 [(match_operand:<VWIDE> 0 "register_operand" "=w") 4203 (match_operand:VQ_HSI 1 "register_operand" "w") 4204 (match_operand:VQ_HSI 2 "register_operand" "w")] 4205 "TARGET_SIMD" 4206{ 4207 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 4208 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1], 4209 operands[2], p)); 4210 DONE; 4211}) 4212 4213;; vqdmull2_lane 4214 4215(define_insn "aarch64_sqdmull2_lane<mode>_internal" 4216 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4217 (ss_ashift:<VWIDE> 4218 (mult:<VWIDE> 4219 (sign_extend:<VWIDE> 4220 (vec_select:<VHALF> 4221 (match_operand:VQ_HSI 1 "register_operand" "w") 4222 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) 4223 (sign_extend:<VWIDE> 4224 (vec_duplicate:<VHALF> 4225 (vec_select:<VEL> 4226 (match_operand:<VCOND> 2 "register_operand" "<vwx>") 4227 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) 4228 )) 4229 (const_int 1)))] 4230 "TARGET_SIMD" 4231 { 4232 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3])); 4233 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 4234 } 4235 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 4236) 4237 4238(define_insn "aarch64_sqdmull2_laneq<mode>_internal" 4239 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4240 (ss_ashift:<VWIDE> 4241 (mult:<VWIDE> 4242 (sign_extend:<VWIDE> 4243 (vec_select:<VHALF> 4244 (match_operand:VQ_HSI 1 "register_operand" "w") 4245 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) 4246 (sign_extend:<VWIDE> 4247 (vec_duplicate:<VHALF> 4248 (vec_select:<VEL> 4249 (match_operand:<VCONQ> 2 "register_operand" "<vwx>") 4250 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) 4251 )) 4252 (const_int 1)))] 4253 "TARGET_SIMD" 4254 { 4255 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3])); 4256 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 4257 } 4258 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 4259) 4260 4261(define_expand "aarch64_sqdmull2_lane<mode>" 4262 [(match_operand:<VWIDE> 0 "register_operand" "=w") 4263 (match_operand:VQ_HSI 1 "register_operand" "w") 4264 (match_operand:<VCOND> 2 "register_operand" "<vwx>") 4265 (match_operand:SI 3 "immediate_operand" "i")] 4266 "TARGET_SIMD" 4267{ 4268 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 4269 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1], 4270 operands[2], operands[3], 4271 p)); 4272 DONE; 4273}) 4274 4275(define_expand "aarch64_sqdmull2_laneq<mode>" 4276 [(match_operand:<VWIDE> 0 "register_operand" "=w") 4277 (match_operand:VQ_HSI 1 "register_operand" "w") 4278 (match_operand:<VCONQ> 2 "register_operand" "<vwx>") 4279 (match_operand:SI 3 "immediate_operand" "i")] 4280 "TARGET_SIMD" 4281{ 4282 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 4283 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1], 4284 operands[2], operands[3], 4285 p)); 4286 DONE; 4287}) 4288 4289;; vqdmull2_n 4290 4291(define_insn "aarch64_sqdmull2_n<mode>_internal" 4292 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4293 (ss_ashift:<VWIDE> 4294 (mult:<VWIDE> 4295 (sign_extend:<VWIDE> 4296 (vec_select:<VHALF> 4297 (match_operand:VQ_HSI 1 "register_operand" "w") 4298 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) 4299 (sign_extend:<VWIDE> 4300 (vec_duplicate:<VHALF> 4301 (match_operand:<VEL> 2 "register_operand" "<vwx>"))) 4302 ) 4303 (const_int 1)))] 4304 "TARGET_SIMD" 4305 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]" 4306 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 4307) 4308 4309(define_expand "aarch64_sqdmull2_n<mode>" 4310 [(match_operand:<VWIDE> 0 "register_operand" "=w") 4311 (match_operand:VQ_HSI 1 "register_operand" "w") 4312 (match_operand:<VEL> 2 "register_operand" "w")] 4313 "TARGET_SIMD" 4314{ 4315 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 4316 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1], 4317 operands[2], p)); 4318 DONE; 4319}) 4320 4321;; vshl 4322 4323(define_insn "aarch64_<sur>shl<mode>" 4324 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w") 4325 (unspec:VSDQ_I_DI 4326 [(match_operand:VSDQ_I_DI 1 "register_operand" "w") 4327 (match_operand:VSDQ_I_DI 2 "register_operand" "w")] 4328 VSHL))] 4329 "TARGET_SIMD" 4330 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"; 4331 [(set_attr "type" "neon_shift_reg<q>")] 4332) 4333 4334 4335;; vqshl 4336 4337(define_insn "aarch64_<sur>q<r>shl<mode>" 4338 [(set (match_operand:VSDQ_I 0 "register_operand" "=w") 4339 (unspec:VSDQ_I 4340 [(match_operand:VSDQ_I 1 "register_operand" "w") 4341 (match_operand:VSDQ_I 2 "register_operand" "w")] 4342 VQSHL))] 4343 "TARGET_SIMD" 4344 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"; 4345 [(set_attr "type" "neon_sat_shift_reg<q>")] 4346) 4347 4348;; vshll_n 4349 4350(define_insn "aarch64_<sur>shll_n<mode>" 4351 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4352 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w") 4353 (match_operand:SI 2 4354 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")] 4355 VSHLL))] 4356 "TARGET_SIMD" 4357 { 4358 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode)) 4359 return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2"; 4360 else 4361 return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2"; 4362 } 4363 [(set_attr "type" "neon_shift_imm_long")] 4364) 4365 4366;; vshll_high_n 4367 4368(define_insn "aarch64_<sur>shll2_n<mode>" 4369 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4370 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w") 4371 (match_operand:SI 2 "immediate_operand" "i")] 4372 VSHLL))] 4373 "TARGET_SIMD" 4374 { 4375 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode)) 4376 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2"; 4377 else 4378 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2"; 4379 } 4380 [(set_attr "type" "neon_shift_imm_long")] 4381) 4382 4383;; vrshr_n 4384 4385(define_insn "aarch64_<sur>shr_n<mode>" 4386 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w") 4387 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w") 4388 (match_operand:SI 2 4389 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")] 4390 VRSHR_N))] 4391 "TARGET_SIMD" 4392 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2" 4393 [(set_attr "type" "neon_sat_shift_imm<q>")] 4394) 4395 4396;; v(r)sra_n 4397 4398(define_insn "aarch64_<sur>sra_n<mode>" 4399 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w") 4400 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0") 4401 (match_operand:VSDQ_I_DI 2 "register_operand" "w") 4402 (match_operand:SI 3 4403 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")] 4404 VSRA))] 4405 "TARGET_SIMD" 4406 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3" 4407 [(set_attr "type" "neon_shift_acc<q>")] 4408) 4409 4410;; vs<lr>i_n 4411 4412(define_insn "aarch64_<sur>s<lr>i_n<mode>" 4413 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w") 4414 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0") 4415 (match_operand:VSDQ_I_DI 2 "register_operand" "w") 4416 (match_operand:SI 3 4417 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")] 4418 VSLRI))] 4419 "TARGET_SIMD" 4420 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3" 4421 [(set_attr "type" "neon_shift_imm<q>")] 4422) 4423 4424;; vqshl(u) 4425 4426(define_insn "aarch64_<sur>qshl<u>_n<mode>" 4427 [(set (match_operand:VSDQ_I 0 "register_operand" "=w") 4428 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w") 4429 (match_operand:SI 2 4430 "aarch64_simd_shift_imm_<ve_mode>" "i")] 4431 VQSHL_N))] 4432 "TARGET_SIMD" 4433 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2" 4434 [(set_attr "type" "neon_sat_shift_imm<q>")] 4435) 4436 4437 4438;; vq(r)shr(u)n_n 4439 4440(define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>" 4441 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") 4442 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w") 4443 (match_operand:SI 2 4444 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")] 4445 VQSHRN_N))] 4446 "TARGET_SIMD" 4447 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2" 4448 [(set_attr "type" "neon_sat_shift_imm_narrow_q")] 4449) 4450 4451 4452;; cm(eq|ge|gt|lt|le) 4453;; Note, we have constraints for Dz and Z as different expanders 4454;; have different ideas of what should be passed to this pattern. 4455 4456(define_insn "aarch64_cm<optab><mode>" 4457 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w") 4458 (neg:<V_INT_EQUIV> 4459 (COMPARISONS:<V_INT_EQUIV> 4460 (match_operand:VDQ_I 1 "register_operand" "w,w") 4461 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz") 4462 )))] 4463 "TARGET_SIMD" 4464 "@ 4465 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype> 4466 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0" 4467 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")] 4468) 4469 4470(define_insn_and_split "aarch64_cm<optab>di" 4471 [(set (match_operand:DI 0 "register_operand" "=w,w,r") 4472 (neg:DI 4473 (COMPARISONS:DI 4474 (match_operand:DI 1 "register_operand" "w,w,r") 4475 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r") 4476 ))) 4477 (clobber (reg:CC CC_REGNUM))] 4478 "TARGET_SIMD" 4479 "#" 4480 "&& reload_completed" 4481 [(set (match_operand:DI 0 "register_operand") 4482 (neg:DI 4483 (COMPARISONS:DI 4484 (match_operand:DI 1 "register_operand") 4485 (match_operand:DI 2 "aarch64_simd_reg_or_zero") 4486 )))] 4487 { 4488 /* If we are in the general purpose register file, 4489 we split to a sequence of comparison and store. */ 4490 if (GP_REGNUM_P (REGNO (operands[0])) 4491 && GP_REGNUM_P (REGNO (operands[1]))) 4492 { 4493 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]); 4494 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]); 4495 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]); 4496 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); 4497 DONE; 4498 } 4499 /* Otherwise, we expand to a similar pattern which does not 4500 clobber CC_REGNUM. */ 4501 } 4502 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")] 4503) 4504 4505(define_insn "*aarch64_cm<optab>di" 4506 [(set (match_operand:DI 0 "register_operand" "=w,w") 4507 (neg:DI 4508 (COMPARISONS:DI 4509 (match_operand:DI 1 "register_operand" "w,w") 4510 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz") 4511 )))] 4512 "TARGET_SIMD && reload_completed" 4513 "@ 4514 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2> 4515 cm<optab>\t%d0, %d1, #0" 4516 [(set_attr "type" "neon_compare, neon_compare_zero")] 4517) 4518 4519;; cm(hs|hi) 4520 4521(define_insn "aarch64_cm<optab><mode>" 4522 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w") 4523 (neg:<V_INT_EQUIV> 4524 (UCOMPARISONS:<V_INT_EQUIV> 4525 (match_operand:VDQ_I 1 "register_operand" "w") 4526 (match_operand:VDQ_I 2 "register_operand" "w") 4527 )))] 4528 "TARGET_SIMD" 4529 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>" 4530 [(set_attr "type" "neon_compare<q>")] 4531) 4532 4533(define_insn_and_split "aarch64_cm<optab>di" 4534 [(set (match_operand:DI 0 "register_operand" "=w,r") 4535 (neg:DI 4536 (UCOMPARISONS:DI 4537 (match_operand:DI 1 "register_operand" "w,r") 4538 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r") 4539 ))) 4540 (clobber (reg:CC CC_REGNUM))] 4541 "TARGET_SIMD" 4542 "#" 4543 "&& reload_completed" 4544 [(set (match_operand:DI 0 "register_operand") 4545 (neg:DI 4546 (UCOMPARISONS:DI 4547 (match_operand:DI 1 "register_operand") 4548 (match_operand:DI 2 "aarch64_simd_reg_or_zero") 4549 )))] 4550 { 4551 /* If we are in the general purpose register file, 4552 we split to a sequence of comparison and store. */ 4553 if (GP_REGNUM_P (REGNO (operands[0])) 4554 && GP_REGNUM_P (REGNO (operands[1]))) 4555 { 4556 machine_mode mode = CCmode; 4557 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]); 4558 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]); 4559 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); 4560 DONE; 4561 } 4562 /* Otherwise, we expand to a similar pattern which does not 4563 clobber CC_REGNUM. */ 4564 } 4565 [(set_attr "type" "neon_compare,multiple")] 4566) 4567 4568(define_insn "*aarch64_cm<optab>di" 4569 [(set (match_operand:DI 0 "register_operand" "=w") 4570 (neg:DI 4571 (UCOMPARISONS:DI 4572 (match_operand:DI 1 "register_operand" "w") 4573 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w") 4574 )))] 4575 "TARGET_SIMD && reload_completed" 4576 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>" 4577 [(set_attr "type" "neon_compare")] 4578) 4579 4580;; cmtst 4581 4582;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst, 4583;; we don't have any insns using ne, and aarch64_vcond outputs 4584;; not (neg (eq (and x y) 0)) 4585;; which is rewritten by simplify_rtx as 4586;; plus (eq (and x y) 0) -1. 4587 4588(define_insn "aarch64_cmtst<mode>" 4589 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w") 4590 (plus:<V_INT_EQUIV> 4591 (eq:<V_INT_EQUIV> 4592 (and:VDQ_I 4593 (match_operand:VDQ_I 1 "register_operand" "w") 4594 (match_operand:VDQ_I 2 "register_operand" "w")) 4595 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero")) 4596 (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one"))) 4597 ] 4598 "TARGET_SIMD" 4599 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 4600 [(set_attr "type" "neon_tst<q>")] 4601) 4602 4603(define_insn_and_split "aarch64_cmtstdi" 4604 [(set (match_operand:DI 0 "register_operand" "=w,r") 4605 (neg:DI 4606 (ne:DI 4607 (and:DI 4608 (match_operand:DI 1 "register_operand" "w,r") 4609 (match_operand:DI 2 "register_operand" "w,r")) 4610 (const_int 0)))) 4611 (clobber (reg:CC CC_REGNUM))] 4612 "TARGET_SIMD" 4613 "#" 4614 "&& reload_completed" 4615 [(set (match_operand:DI 0 "register_operand") 4616 (neg:DI 4617 (ne:DI 4618 (and:DI 4619 (match_operand:DI 1 "register_operand") 4620 (match_operand:DI 2 "register_operand")) 4621 (const_int 0))))] 4622 { 4623 /* If we are in the general purpose register file, 4624 we split to a sequence of comparison and store. */ 4625 if (GP_REGNUM_P (REGNO (operands[0])) 4626 && GP_REGNUM_P (REGNO (operands[1]))) 4627 { 4628 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]); 4629 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx); 4630 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx); 4631 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx); 4632 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); 4633 DONE; 4634 } 4635 /* Otherwise, we expand to a similar pattern which does not 4636 clobber CC_REGNUM. */ 4637 } 4638 [(set_attr "type" "neon_tst,multiple")] 4639) 4640 4641(define_insn "*aarch64_cmtstdi" 4642 [(set (match_operand:DI 0 "register_operand" "=w") 4643 (neg:DI 4644 (ne:DI 4645 (and:DI 4646 (match_operand:DI 1 "register_operand" "w") 4647 (match_operand:DI 2 "register_operand" "w")) 4648 (const_int 0))))] 4649 "TARGET_SIMD" 4650 "cmtst\t%d0, %d1, %d2" 4651 [(set_attr "type" "neon_tst")] 4652) 4653 4654;; fcm(eq|ge|gt|le|lt) 4655 4656(define_insn "aarch64_cm<optab><mode>" 4657 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w") 4658 (neg:<V_INT_EQUIV> 4659 (COMPARISONS:<V_INT_EQUIV> 4660 (match_operand:VHSDF_HSDF 1 "register_operand" "w,w") 4661 (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz") 4662 )))] 4663 "TARGET_SIMD" 4664 "@ 4665 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype> 4666 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0" 4667 [(set_attr "type" "neon_fp_compare_<stype><q>")] 4668) 4669 4670;; fac(ge|gt) 4671;; Note we can also handle what would be fac(le|lt) by 4672;; generating fac(ge|gt). 4673 4674(define_insn "aarch64_fac<optab><mode>" 4675 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w") 4676 (neg:<V_INT_EQUIV> 4677 (FAC_COMPARISONS:<V_INT_EQUIV> 4678 (abs:VHSDF_HSDF 4679 (match_operand:VHSDF_HSDF 1 "register_operand" "w")) 4680 (abs:VHSDF_HSDF 4681 (match_operand:VHSDF_HSDF 2 "register_operand" "w")) 4682 )))] 4683 "TARGET_SIMD" 4684 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>" 4685 [(set_attr "type" "neon_fp_compare_<stype><q>")] 4686) 4687 4688;; addp 4689 4690(define_insn "aarch64_addp<mode>" 4691 [(set (match_operand:VD_BHSI 0 "register_operand" "=w") 4692 (unspec:VD_BHSI 4693 [(match_operand:VD_BHSI 1 "register_operand" "w") 4694 (match_operand:VD_BHSI 2 "register_operand" "w")] 4695 UNSPEC_ADDP))] 4696 "TARGET_SIMD" 4697 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 4698 [(set_attr "type" "neon_reduc_add<q>")] 4699) 4700 4701(define_insn "aarch64_addpdi" 4702 [(set (match_operand:DI 0 "register_operand" "=w") 4703 (unspec:DI 4704 [(match_operand:V2DI 1 "register_operand" "w")] 4705 UNSPEC_ADDP))] 4706 "TARGET_SIMD" 4707 "addp\t%d0, %1.2d" 4708 [(set_attr "type" "neon_reduc_add")] 4709) 4710 4711;; sqrt 4712 4713(define_expand "sqrt<mode>2" 4714 [(set (match_operand:VHSDF 0 "register_operand" "=w") 4715 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))] 4716 "TARGET_SIMD" 4717{ 4718 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false)) 4719 DONE; 4720}) 4721 4722(define_insn "*sqrt<mode>2" 4723 [(set (match_operand:VHSDF 0 "register_operand" "=w") 4724 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))] 4725 "TARGET_SIMD" 4726 "fsqrt\\t%0.<Vtype>, %1.<Vtype>" 4727 [(set_attr "type" "neon_fp_sqrt_<stype><q>")] 4728) 4729 4730;; Patterns for vector struct loads and stores. 4731 4732(define_insn "aarch64_simd_ld2<mode>" 4733 [(set (match_operand:OI 0 "register_operand" "=w") 4734 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv") 4735 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4736 UNSPEC_LD2))] 4737 "TARGET_SIMD" 4738 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1" 4739 [(set_attr "type" "neon_load2_2reg<q>")] 4740) 4741 4742(define_insn "aarch64_simd_ld2r<mode>" 4743 [(set (match_operand:OI 0 "register_operand" "=w") 4744 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 4745 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ] 4746 UNSPEC_LD2_DUP))] 4747 "TARGET_SIMD" 4748 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1" 4749 [(set_attr "type" "neon_load2_all_lanes<q>")] 4750) 4751 4752(define_insn "aarch64_vec_load_lanesoi_lane<mode>" 4753 [(set (match_operand:OI 0 "register_operand" "=w") 4754 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 4755 (match_operand:OI 2 "register_operand" "0") 4756 (match_operand:SI 3 "immediate_operand" "i") 4757 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ] 4758 UNSPEC_LD2_LANE))] 4759 "TARGET_SIMD" 4760 { 4761 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3])); 4762 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1"; 4763 } 4764 [(set_attr "type" "neon_load2_one_lane")] 4765) 4766 4767(define_expand "vec_load_lanesoi<mode>" 4768 [(set (match_operand:OI 0 "register_operand" "=w") 4769 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv") 4770 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4771 UNSPEC_LD2))] 4772 "TARGET_SIMD" 4773{ 4774 if (BYTES_BIG_ENDIAN) 4775 { 4776 rtx tmp = gen_reg_rtx (OImode); 4777 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>); 4778 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1])); 4779 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask)); 4780 } 4781 else 4782 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1])); 4783 DONE; 4784}) 4785 4786(define_insn "aarch64_simd_st2<mode>" 4787 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv") 4788 (unspec:OI [(match_operand:OI 1 "register_operand" "w") 4789 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4790 UNSPEC_ST2))] 4791 "TARGET_SIMD" 4792 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0" 4793 [(set_attr "type" "neon_store2_2reg<q>")] 4794) 4795 4796;; RTL uses GCC vector extension indices, so flip only for assembly. 4797(define_insn "aarch64_vec_store_lanesoi_lane<mode>" 4798 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") 4799 (unspec:BLK [(match_operand:OI 1 "register_operand" "w") 4800 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) 4801 (match_operand:SI 2 "immediate_operand" "i")] 4802 UNSPEC_ST2_LANE))] 4803 "TARGET_SIMD" 4804 { 4805 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); 4806 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0"; 4807 } 4808 [(set_attr "type" "neon_store2_one_lane<q>")] 4809) 4810 4811(define_expand "vec_store_lanesoi<mode>" 4812 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv") 4813 (unspec:OI [(match_operand:OI 1 "register_operand" "w") 4814 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4815 UNSPEC_ST2))] 4816 "TARGET_SIMD" 4817{ 4818 if (BYTES_BIG_ENDIAN) 4819 { 4820 rtx tmp = gen_reg_rtx (OImode); 4821 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>); 4822 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask)); 4823 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp)); 4824 } 4825 else 4826 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1])); 4827 DONE; 4828}) 4829 4830(define_insn "aarch64_simd_ld3<mode>" 4831 [(set (match_operand:CI 0 "register_operand" "=w") 4832 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv") 4833 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4834 UNSPEC_LD3))] 4835 "TARGET_SIMD" 4836 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1" 4837 [(set_attr "type" "neon_load3_3reg<q>")] 4838) 4839 4840(define_insn "aarch64_simd_ld3r<mode>" 4841 [(set (match_operand:CI 0 "register_operand" "=w") 4842 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 4843 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ] 4844 UNSPEC_LD3_DUP))] 4845 "TARGET_SIMD" 4846 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1" 4847 [(set_attr "type" "neon_load3_all_lanes<q>")] 4848) 4849 4850(define_insn "aarch64_vec_load_lanesci_lane<mode>" 4851 [(set (match_operand:CI 0 "register_operand" "=w") 4852 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 4853 (match_operand:CI 2 "register_operand" "0") 4854 (match_operand:SI 3 "immediate_operand" "i") 4855 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4856 UNSPEC_LD3_LANE))] 4857 "TARGET_SIMD" 4858{ 4859 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3])); 4860 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1"; 4861} 4862 [(set_attr "type" "neon_load3_one_lane")] 4863) 4864 4865(define_expand "vec_load_lanesci<mode>" 4866 [(set (match_operand:CI 0 "register_operand" "=w") 4867 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv") 4868 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4869 UNSPEC_LD3))] 4870 "TARGET_SIMD" 4871{ 4872 if (BYTES_BIG_ENDIAN) 4873 { 4874 rtx tmp = gen_reg_rtx (CImode); 4875 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>); 4876 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1])); 4877 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask)); 4878 } 4879 else 4880 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1])); 4881 DONE; 4882}) 4883 4884(define_insn "aarch64_simd_st3<mode>" 4885 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv") 4886 (unspec:CI [(match_operand:CI 1 "register_operand" "w") 4887 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4888 UNSPEC_ST3))] 4889 "TARGET_SIMD" 4890 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0" 4891 [(set_attr "type" "neon_store3_3reg<q>")] 4892) 4893 4894;; RTL uses GCC vector extension indices, so flip only for assembly. 4895(define_insn "aarch64_vec_store_lanesci_lane<mode>" 4896 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") 4897 (unspec:BLK [(match_operand:CI 1 "register_operand" "w") 4898 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) 4899 (match_operand:SI 2 "immediate_operand" "i")] 4900 UNSPEC_ST3_LANE))] 4901 "TARGET_SIMD" 4902 { 4903 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); 4904 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0"; 4905 } 4906 [(set_attr "type" "neon_store3_one_lane<q>")] 4907) 4908 4909(define_expand "vec_store_lanesci<mode>" 4910 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv") 4911 (unspec:CI [(match_operand:CI 1 "register_operand" "w") 4912 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4913 UNSPEC_ST3))] 4914 "TARGET_SIMD" 4915{ 4916 if (BYTES_BIG_ENDIAN) 4917 { 4918 rtx tmp = gen_reg_rtx (CImode); 4919 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>); 4920 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask)); 4921 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp)); 4922 } 4923 else 4924 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1])); 4925 DONE; 4926}) 4927 4928(define_insn "aarch64_simd_ld4<mode>" 4929 [(set (match_operand:XI 0 "register_operand" "=w") 4930 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv") 4931 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4932 UNSPEC_LD4))] 4933 "TARGET_SIMD" 4934 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1" 4935 [(set_attr "type" "neon_load4_4reg<q>")] 4936) 4937 4938(define_insn "aarch64_simd_ld4r<mode>" 4939 [(set (match_operand:XI 0 "register_operand" "=w") 4940 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 4941 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ] 4942 UNSPEC_LD4_DUP))] 4943 "TARGET_SIMD" 4944 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1" 4945 [(set_attr "type" "neon_load4_all_lanes<q>")] 4946) 4947 4948(define_insn "aarch64_vec_load_lanesxi_lane<mode>" 4949 [(set (match_operand:XI 0 "register_operand" "=w") 4950 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 4951 (match_operand:XI 2 "register_operand" "0") 4952 (match_operand:SI 3 "immediate_operand" "i") 4953 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4954 UNSPEC_LD4_LANE))] 4955 "TARGET_SIMD" 4956{ 4957 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3])); 4958 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1"; 4959} 4960 [(set_attr "type" "neon_load4_one_lane")] 4961) 4962 4963(define_expand "vec_load_lanesxi<mode>" 4964 [(set (match_operand:XI 0 "register_operand" "=w") 4965 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv") 4966 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4967 UNSPEC_LD4))] 4968 "TARGET_SIMD" 4969{ 4970 if (BYTES_BIG_ENDIAN) 4971 { 4972 rtx tmp = gen_reg_rtx (XImode); 4973 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>); 4974 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1])); 4975 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask)); 4976 } 4977 else 4978 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1])); 4979 DONE; 4980}) 4981 4982(define_insn "aarch64_simd_st4<mode>" 4983 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv") 4984 (unspec:XI [(match_operand:XI 1 "register_operand" "w") 4985 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4986 UNSPEC_ST4))] 4987 "TARGET_SIMD" 4988 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0" 4989 [(set_attr "type" "neon_store4_4reg<q>")] 4990) 4991 4992;; RTL uses GCC vector extension indices, so flip only for assembly. 4993(define_insn "aarch64_vec_store_lanesxi_lane<mode>" 4994 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") 4995 (unspec:BLK [(match_operand:XI 1 "register_operand" "w") 4996 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) 4997 (match_operand:SI 2 "immediate_operand" "i")] 4998 UNSPEC_ST4_LANE))] 4999 "TARGET_SIMD" 5000 { 5001 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); 5002 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0"; 5003 } 5004 [(set_attr "type" "neon_store4_one_lane<q>")] 5005) 5006 5007(define_expand "vec_store_lanesxi<mode>" 5008 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv") 5009 (unspec:XI [(match_operand:XI 1 "register_operand" "w") 5010 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5011 UNSPEC_ST4))] 5012 "TARGET_SIMD" 5013{ 5014 if (BYTES_BIG_ENDIAN) 5015 { 5016 rtx tmp = gen_reg_rtx (XImode); 5017 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>); 5018 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask)); 5019 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp)); 5020 } 5021 else 5022 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1])); 5023 DONE; 5024}) 5025 5026(define_insn_and_split "aarch64_rev_reglist<mode>" 5027[(set (match_operand:VSTRUCT 0 "register_operand" "=&w") 5028 (unspec:VSTRUCT 5029 [(match_operand:VSTRUCT 1 "register_operand" "w") 5030 (match_operand:V16QI 2 "register_operand" "w")] 5031 UNSPEC_REV_REGLIST))] 5032 "TARGET_SIMD" 5033 "#" 5034 "&& reload_completed" 5035 [(const_int 0)] 5036{ 5037 int i; 5038 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG; 5039 for (i = 0; i < nregs; i++) 5040 { 5041 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i); 5042 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i); 5043 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2])); 5044 } 5045 DONE; 5046} 5047 [(set_attr "type" "neon_tbl1_q") 5048 (set_attr "length" "<insn_count>")] 5049) 5050 5051;; Reload patterns for AdvSIMD register list operands. 5052 5053(define_expand "mov<mode>" 5054 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "") 5055 (match_operand:VSTRUCT 1 "general_operand" ""))] 5056 "TARGET_SIMD" 5057{ 5058 if (can_create_pseudo_p ()) 5059 { 5060 if (GET_CODE (operands[0]) != REG) 5061 operands[1] = force_reg (<MODE>mode, operands[1]); 5062 } 5063}) 5064 5065(define_insn "*aarch64_mov<mode>" 5066 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w") 5067 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))] 5068 "TARGET_SIMD && !BYTES_BIG_ENDIAN 5069 && (register_operand (operands[0], <MODE>mode) 5070 || register_operand (operands[1], <MODE>mode))" 5071 "@ 5072 # 5073 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0 5074 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1" 5075 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\ 5076 neon_load<nregs>_<nregs>reg_q") 5077 (set_attr "length" "<insn_count>,4,4")] 5078) 5079 5080(define_insn "aarch64_be_ld1<mode>" 5081 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w") 5082 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 5083 "aarch64_simd_struct_operand" "Utv")] 5084 UNSPEC_LD1))] 5085 "TARGET_SIMD" 5086 "ld1\\t{%0<Vmtype>}, %1" 5087 [(set_attr "type" "neon_load1_1reg<q>")] 5088) 5089 5090(define_insn "aarch64_be_st1<mode>" 5091 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv") 5092 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")] 5093 UNSPEC_ST1))] 5094 "TARGET_SIMD" 5095 "st1\\t{%1<Vmtype>}, %0" 5096 [(set_attr "type" "neon_store1_1reg<q>")] 5097) 5098 5099(define_insn "*aarch64_be_movoi" 5100 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w") 5101 (match_operand:OI 1 "general_operand" " w,w,m"))] 5102 "TARGET_SIMD && BYTES_BIG_ENDIAN 5103 && (register_operand (operands[0], OImode) 5104 || register_operand (operands[1], OImode))" 5105 "@ 5106 # 5107 stp\\t%q1, %R1, %0 5108 ldp\\t%q0, %R0, %1" 5109 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q") 5110 (set_attr "length" "8,4,4")] 5111) 5112 5113(define_insn "*aarch64_be_movci" 5114 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w") 5115 (match_operand:CI 1 "general_operand" " w,w,o"))] 5116 "TARGET_SIMD && BYTES_BIG_ENDIAN 5117 && (register_operand (operands[0], CImode) 5118 || register_operand (operands[1], CImode))" 5119 "#" 5120 [(set_attr "type" "multiple") 5121 (set_attr "length" "12,4,4")] 5122) 5123 5124(define_insn "*aarch64_be_movxi" 5125 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w") 5126 (match_operand:XI 1 "general_operand" " w,w,o"))] 5127 "TARGET_SIMD && BYTES_BIG_ENDIAN 5128 && (register_operand (operands[0], XImode) 5129 || register_operand (operands[1], XImode))" 5130 "#" 5131 [(set_attr "type" "multiple") 5132 (set_attr "length" "16,4,4")] 5133) 5134 5135(define_split 5136 [(set (match_operand:OI 0 "register_operand") 5137 (match_operand:OI 1 "register_operand"))] 5138 "TARGET_SIMD && reload_completed" 5139 [(const_int 0)] 5140{ 5141 aarch64_simd_emit_reg_reg_move (operands, TImode, 2); 5142 DONE; 5143}) 5144 5145(define_split 5146 [(set (match_operand:CI 0 "nonimmediate_operand") 5147 (match_operand:CI 1 "general_operand"))] 5148 "TARGET_SIMD && reload_completed" 5149 [(const_int 0)] 5150{ 5151 if (register_operand (operands[0], CImode) 5152 && register_operand (operands[1], CImode)) 5153 { 5154 aarch64_simd_emit_reg_reg_move (operands, TImode, 3); 5155 DONE; 5156 } 5157 else if (BYTES_BIG_ENDIAN) 5158 { 5159 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0), 5160 simplify_gen_subreg (OImode, operands[1], CImode, 0)); 5161 emit_move_insn (gen_lowpart (V16QImode, 5162 simplify_gen_subreg (TImode, operands[0], 5163 CImode, 32)), 5164 gen_lowpart (V16QImode, 5165 simplify_gen_subreg (TImode, operands[1], 5166 CImode, 32))); 5167 DONE; 5168 } 5169 else 5170 FAIL; 5171}) 5172 5173(define_split 5174 [(set (match_operand:XI 0 "nonimmediate_operand") 5175 (match_operand:XI 1 "general_operand"))] 5176 "TARGET_SIMD && reload_completed" 5177 [(const_int 0)] 5178{ 5179 if (register_operand (operands[0], XImode) 5180 && register_operand (operands[1], XImode)) 5181 { 5182 aarch64_simd_emit_reg_reg_move (operands, TImode, 4); 5183 DONE; 5184 } 5185 else if (BYTES_BIG_ENDIAN) 5186 { 5187 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0), 5188 simplify_gen_subreg (OImode, operands[1], XImode, 0)); 5189 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32), 5190 simplify_gen_subreg (OImode, operands[1], XImode, 32)); 5191 DONE; 5192 } 5193 else 5194 FAIL; 5195}) 5196 5197(define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>" 5198 [(match_operand:VSTRUCT 0 "register_operand" "=w") 5199 (match_operand:DI 1 "register_operand" "w") 5200 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5201 "TARGET_SIMD" 5202{ 5203 rtx mem = gen_rtx_MEM (BLKmode, operands[1]); 5204 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode)) 5205 * <VSTRUCT:nregs>); 5206 5207 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0], 5208 mem)); 5209 DONE; 5210}) 5211 5212(define_insn "aarch64_ld2<mode>_dreg" 5213 [(set (match_operand:OI 0 "register_operand" "=w") 5214 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 5215 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5216 UNSPEC_LD2_DREG))] 5217 "TARGET_SIMD" 5218 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1" 5219 [(set_attr "type" "neon_load2_2reg<q>")] 5220) 5221 5222(define_insn "aarch64_ld2<mode>_dreg" 5223 [(set (match_operand:OI 0 "register_operand" "=w") 5224 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 5225 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5226 UNSPEC_LD2_DREG))] 5227 "TARGET_SIMD" 5228 "ld1\\t{%S0.1d - %T0.1d}, %1" 5229 [(set_attr "type" "neon_load1_2reg<q>")] 5230) 5231 5232(define_insn "aarch64_ld3<mode>_dreg" 5233 [(set (match_operand:CI 0 "register_operand" "=w") 5234 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 5235 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5236 UNSPEC_LD3_DREG))] 5237 "TARGET_SIMD" 5238 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1" 5239 [(set_attr "type" "neon_load3_3reg<q>")] 5240) 5241 5242(define_insn "aarch64_ld3<mode>_dreg" 5243 [(set (match_operand:CI 0 "register_operand" "=w") 5244 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 5245 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5246 UNSPEC_LD3_DREG))] 5247 "TARGET_SIMD" 5248 "ld1\\t{%S0.1d - %U0.1d}, %1" 5249 [(set_attr "type" "neon_load1_3reg<q>")] 5250) 5251 5252(define_insn "aarch64_ld4<mode>_dreg" 5253 [(set (match_operand:XI 0 "register_operand" "=w") 5254 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 5255 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5256 UNSPEC_LD4_DREG))] 5257 "TARGET_SIMD" 5258 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1" 5259 [(set_attr "type" "neon_load4_4reg<q>")] 5260) 5261 5262(define_insn "aarch64_ld4<mode>_dreg" 5263 [(set (match_operand:XI 0 "register_operand" "=w") 5264 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 5265 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5266 UNSPEC_LD4_DREG))] 5267 "TARGET_SIMD" 5268 "ld1\\t{%S0.1d - %V0.1d}, %1" 5269 [(set_attr "type" "neon_load1_4reg<q>")] 5270) 5271 5272(define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>" 5273 [(match_operand:VSTRUCT 0 "register_operand" "=w") 5274 (match_operand:DI 1 "register_operand" "r") 5275 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5276 "TARGET_SIMD" 5277{ 5278 rtx mem = gen_rtx_MEM (BLKmode, operands[1]); 5279 set_mem_size (mem, <VSTRUCT:nregs> * 8); 5280 5281 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem)); 5282 DONE; 5283}) 5284 5285(define_expand "aarch64_ld1<VALL_F16:mode>" 5286 [(match_operand:VALL_F16 0 "register_operand") 5287 (match_operand:DI 1 "register_operand")] 5288 "TARGET_SIMD" 5289{ 5290 machine_mode mode = <VALL_F16:MODE>mode; 5291 rtx mem = gen_rtx_MEM (mode, operands[1]); 5292 5293 if (BYTES_BIG_ENDIAN) 5294 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem)); 5295 else 5296 emit_move_insn (operands[0], mem); 5297 DONE; 5298}) 5299 5300(define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>" 5301 [(match_operand:VSTRUCT 0 "register_operand" "=w") 5302 (match_operand:DI 1 "register_operand" "r") 5303 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5304 "TARGET_SIMD" 5305{ 5306 machine_mode mode = <VSTRUCT:MODE>mode; 5307 rtx mem = gen_rtx_MEM (mode, operands[1]); 5308 5309 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem)); 5310 DONE; 5311}) 5312 5313(define_expand "aarch64_ld1x2<VQ:mode>" 5314 [(match_operand:OI 0 "register_operand" "=w") 5315 (match_operand:DI 1 "register_operand" "r") 5316 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5317 "TARGET_SIMD" 5318{ 5319 machine_mode mode = OImode; 5320 rtx mem = gen_rtx_MEM (mode, operands[1]); 5321 5322 emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem)); 5323 DONE; 5324}) 5325 5326(define_expand "aarch64_ld1x2<VDC:mode>" 5327 [(match_operand:OI 0 "register_operand" "=w") 5328 (match_operand:DI 1 "register_operand" "r") 5329 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5330 "TARGET_SIMD" 5331{ 5332 machine_mode mode = OImode; 5333 rtx mem = gen_rtx_MEM (mode, operands[1]); 5334 5335 emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem)); 5336 DONE; 5337}) 5338 5339 5340(define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>" 5341 [(match_operand:VSTRUCT 0 "register_operand" "=w") 5342 (match_operand:DI 1 "register_operand" "w") 5343 (match_operand:VSTRUCT 2 "register_operand" "0") 5344 (match_operand:SI 3 "immediate_operand" "i") 5345 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5346 "TARGET_SIMD" 5347{ 5348 rtx mem = gen_rtx_MEM (BLKmode, operands[1]); 5349 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode)) 5350 * <VSTRUCT:nregs>); 5351 5352 aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL); 5353 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> ( 5354 operands[0], mem, operands[2], operands[3])); 5355 DONE; 5356}) 5357 5358;; Expanders for builtins to extract vector registers from large 5359;; opaque integer modes. 5360 5361;; D-register list. 5362 5363(define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>" 5364 [(match_operand:VDC 0 "register_operand" "=w") 5365 (match_operand:VSTRUCT 1 "register_operand" "w") 5366 (match_operand:SI 2 "immediate_operand" "i")] 5367 "TARGET_SIMD" 5368{ 5369 int part = INTVAL (operands[2]); 5370 rtx temp = gen_reg_rtx (<VDC:VDBL>mode); 5371 int offset = part * 16; 5372 5373 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset)); 5374 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp)); 5375 DONE; 5376}) 5377 5378;; Q-register list. 5379 5380(define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>" 5381 [(match_operand:VQ 0 "register_operand" "=w") 5382 (match_operand:VSTRUCT 1 "register_operand" "w") 5383 (match_operand:SI 2 "immediate_operand" "i")] 5384 "TARGET_SIMD" 5385{ 5386 int part = INTVAL (operands[2]); 5387 int offset = part * 16; 5388 5389 emit_move_insn (operands[0], 5390 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset)); 5391 DONE; 5392}) 5393 5394;; Permuted-store expanders for neon intrinsics. 5395 5396;; Permute instructions 5397 5398;; vec_perm support 5399 5400(define_expand "vec_perm<mode>" 5401 [(match_operand:VB 0 "register_operand") 5402 (match_operand:VB 1 "register_operand") 5403 (match_operand:VB 2 "register_operand") 5404 (match_operand:VB 3 "register_operand")] 5405 "TARGET_SIMD" 5406{ 5407 aarch64_expand_vec_perm (operands[0], operands[1], 5408 operands[2], operands[3], <nunits>); 5409 DONE; 5410}) 5411 5412(define_insn "aarch64_tbl1<mode>" 5413 [(set (match_operand:VB 0 "register_operand" "=w") 5414 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w") 5415 (match_operand:VB 2 "register_operand" "w")] 5416 UNSPEC_TBL))] 5417 "TARGET_SIMD" 5418 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>" 5419 [(set_attr "type" "neon_tbl1<q>")] 5420) 5421 5422;; Two source registers. 5423 5424(define_insn "aarch64_tbl2v16qi" 5425 [(set (match_operand:V16QI 0 "register_operand" "=w") 5426 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w") 5427 (match_operand:V16QI 2 "register_operand" "w")] 5428 UNSPEC_TBL))] 5429 "TARGET_SIMD" 5430 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b" 5431 [(set_attr "type" "neon_tbl2_q")] 5432) 5433 5434(define_insn "aarch64_tbl3<mode>" 5435 [(set (match_operand:VB 0 "register_operand" "=w") 5436 (unspec:VB [(match_operand:OI 1 "register_operand" "w") 5437 (match_operand:VB 2 "register_operand" "w")] 5438 UNSPEC_TBL))] 5439 "TARGET_SIMD" 5440 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>" 5441 [(set_attr "type" "neon_tbl3")] 5442) 5443 5444(define_insn "aarch64_tbx4<mode>" 5445 [(set (match_operand:VB 0 "register_operand" "=w") 5446 (unspec:VB [(match_operand:VB 1 "register_operand" "0") 5447 (match_operand:OI 2 "register_operand" "w") 5448 (match_operand:VB 3 "register_operand" "w")] 5449 UNSPEC_TBX))] 5450 "TARGET_SIMD" 5451 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>" 5452 [(set_attr "type" "neon_tbl4")] 5453) 5454 5455;; Three source registers. 5456 5457(define_insn "aarch64_qtbl3<mode>" 5458 [(set (match_operand:VB 0 "register_operand" "=w") 5459 (unspec:VB [(match_operand:CI 1 "register_operand" "w") 5460 (match_operand:VB 2 "register_operand" "w")] 5461 UNSPEC_TBL))] 5462 "TARGET_SIMD" 5463 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>" 5464 [(set_attr "type" "neon_tbl3")] 5465) 5466 5467(define_insn "aarch64_qtbx3<mode>" 5468 [(set (match_operand:VB 0 "register_operand" "=w") 5469 (unspec:VB [(match_operand:VB 1 "register_operand" "0") 5470 (match_operand:CI 2 "register_operand" "w") 5471 (match_operand:VB 3 "register_operand" "w")] 5472 UNSPEC_TBX))] 5473 "TARGET_SIMD" 5474 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>" 5475 [(set_attr "type" "neon_tbl3")] 5476) 5477 5478;; Four source registers. 5479 5480(define_insn "aarch64_qtbl4<mode>" 5481 [(set (match_operand:VB 0 "register_operand" "=w") 5482 (unspec:VB [(match_operand:XI 1 "register_operand" "w") 5483 (match_operand:VB 2 "register_operand" "w")] 5484 UNSPEC_TBL))] 5485 "TARGET_SIMD" 5486 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>" 5487 [(set_attr "type" "neon_tbl4")] 5488) 5489 5490(define_insn "aarch64_qtbx4<mode>" 5491 [(set (match_operand:VB 0 "register_operand" "=w") 5492 (unspec:VB [(match_operand:VB 1 "register_operand" "0") 5493 (match_operand:XI 2 "register_operand" "w") 5494 (match_operand:VB 3 "register_operand" "w")] 5495 UNSPEC_TBX))] 5496 "TARGET_SIMD" 5497 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>" 5498 [(set_attr "type" "neon_tbl4")] 5499) 5500 5501(define_insn_and_split "aarch64_combinev16qi" 5502 [(set (match_operand:OI 0 "register_operand" "=w") 5503 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w") 5504 (match_operand:V16QI 2 "register_operand" "w")] 5505 UNSPEC_CONCAT))] 5506 "TARGET_SIMD" 5507 "#" 5508 "&& reload_completed" 5509 [(const_int 0)] 5510{ 5511 aarch64_split_combinev16qi (operands); 5512 DONE; 5513} 5514[(set_attr "type" "multiple")] 5515) 5516 5517;; This instruction's pattern is generated directly by 5518;; aarch64_expand_vec_perm_const, so any changes to the pattern would 5519;; need corresponding changes there. 5520(define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>" 5521 [(set (match_operand:VALL_F16 0 "register_operand" "=w") 5522 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w") 5523 (match_operand:VALL_F16 2 "register_operand" "w")] 5524 PERMUTE))] 5525 "TARGET_SIMD" 5526 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 5527 [(set_attr "type" "neon_permute<q>")] 5528) 5529 5530;; This instruction's pattern is generated directly by 5531;; aarch64_expand_vec_perm_const, so any changes to the pattern would 5532;; need corresponding changes there. Note that the immediate (third) 5533;; operand is a lane index not a byte index. 5534(define_insn "aarch64_ext<mode>" 5535 [(set (match_operand:VALL_F16 0 "register_operand" "=w") 5536 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w") 5537 (match_operand:VALL_F16 2 "register_operand" "w") 5538 (match_operand:SI 3 "immediate_operand" "i")] 5539 UNSPEC_EXT))] 5540 "TARGET_SIMD" 5541{ 5542 operands[3] = GEN_INT (INTVAL (operands[3]) 5543 * GET_MODE_UNIT_SIZE (<MODE>mode)); 5544 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3"; 5545} 5546 [(set_attr "type" "neon_ext<q>")] 5547) 5548 5549;; This instruction's pattern is generated directly by 5550;; aarch64_expand_vec_perm_const, so any changes to the pattern would 5551;; need corresponding changes there. 5552(define_insn "aarch64_rev<REVERSE:rev_op><mode>" 5553 [(set (match_operand:VALL_F16 0 "register_operand" "=w") 5554 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")] 5555 REVERSE))] 5556 "TARGET_SIMD" 5557 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>" 5558 [(set_attr "type" "neon_rev<q>")] 5559) 5560 5561(define_insn "aarch64_st2<mode>_dreg" 5562 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") 5563 (unspec:BLK [(match_operand:OI 1 "register_operand" "w") 5564 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5565 UNSPEC_ST2))] 5566 "TARGET_SIMD" 5567 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0" 5568 [(set_attr "type" "neon_store2_2reg")] 5569) 5570 5571(define_insn "aarch64_st2<mode>_dreg" 5572 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") 5573 (unspec:BLK [(match_operand:OI 1 "register_operand" "w") 5574 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5575 UNSPEC_ST2))] 5576 "TARGET_SIMD" 5577 "st1\\t{%S1.1d - %T1.1d}, %0" 5578 [(set_attr "type" "neon_store1_2reg")] 5579) 5580 5581(define_insn "aarch64_st3<mode>_dreg" 5582 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") 5583 (unspec:BLK [(match_operand:CI 1 "register_operand" "w") 5584 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5585 UNSPEC_ST3))] 5586 "TARGET_SIMD" 5587 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0" 5588 [(set_attr "type" "neon_store3_3reg")] 5589) 5590 5591(define_insn "aarch64_st3<mode>_dreg" 5592 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") 5593 (unspec:BLK [(match_operand:CI 1 "register_operand" "w") 5594 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5595 UNSPEC_ST3))] 5596 "TARGET_SIMD" 5597 "st1\\t{%S1.1d - %U1.1d}, %0" 5598 [(set_attr "type" "neon_store1_3reg")] 5599) 5600 5601(define_insn "aarch64_st4<mode>_dreg" 5602 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") 5603 (unspec:BLK [(match_operand:XI 1 "register_operand" "w") 5604 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5605 UNSPEC_ST4))] 5606 "TARGET_SIMD" 5607 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0" 5608 [(set_attr "type" "neon_store4_4reg")] 5609) 5610 5611(define_insn "aarch64_st4<mode>_dreg" 5612 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") 5613 (unspec:BLK [(match_operand:XI 1 "register_operand" "w") 5614 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5615 UNSPEC_ST4))] 5616 "TARGET_SIMD" 5617 "st1\\t{%S1.1d - %V1.1d}, %0" 5618 [(set_attr "type" "neon_store1_4reg")] 5619) 5620 5621(define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>" 5622 [(match_operand:DI 0 "register_operand" "r") 5623 (match_operand:VSTRUCT 1 "register_operand" "w") 5624 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5625 "TARGET_SIMD" 5626{ 5627 rtx mem = gen_rtx_MEM (BLKmode, operands[0]); 5628 set_mem_size (mem, <VSTRUCT:nregs> * 8); 5629 5630 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1])); 5631 DONE; 5632}) 5633 5634(define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>" 5635 [(match_operand:DI 0 "register_operand" "r") 5636 (match_operand:VSTRUCT 1 "register_operand" "w") 5637 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5638 "TARGET_SIMD" 5639{ 5640 machine_mode mode = <VSTRUCT:MODE>mode; 5641 rtx mem = gen_rtx_MEM (mode, operands[0]); 5642 5643 emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1])); 5644 DONE; 5645}) 5646 5647(define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>" 5648 [(match_operand:DI 0 "register_operand" "r") 5649 (match_operand:VSTRUCT 1 "register_operand" "w") 5650 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) 5651 (match_operand:SI 2 "immediate_operand")] 5652 "TARGET_SIMD" 5653{ 5654 rtx mem = gen_rtx_MEM (BLKmode, operands[0]); 5655 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode)) 5656 * <VSTRUCT:nregs>); 5657 5658 emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> ( 5659 mem, operands[1], operands[2])); 5660 DONE; 5661}) 5662 5663(define_expand "aarch64_st1<VALL_F16:mode>" 5664 [(match_operand:DI 0 "register_operand") 5665 (match_operand:VALL_F16 1 "register_operand")] 5666 "TARGET_SIMD" 5667{ 5668 machine_mode mode = <VALL_F16:MODE>mode; 5669 rtx mem = gen_rtx_MEM (mode, operands[0]); 5670 5671 if (BYTES_BIG_ENDIAN) 5672 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1])); 5673 else 5674 emit_move_insn (mem, operands[1]); 5675 DONE; 5676}) 5677 5678;; Expander for builtins to insert vector registers into large 5679;; opaque integer modes. 5680 5681;; Q-register list. We don't need a D-reg inserter as we zero 5682;; extend them in arm_neon.h and insert the resulting Q-regs. 5683 5684(define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>" 5685 [(match_operand:VSTRUCT 0 "register_operand" "+w") 5686 (match_operand:VSTRUCT 1 "register_operand" "0") 5687 (match_operand:VQ 2 "register_operand" "w") 5688 (match_operand:SI 3 "immediate_operand" "i")] 5689 "TARGET_SIMD" 5690{ 5691 int part = INTVAL (operands[3]); 5692 int offset = part * 16; 5693 5694 emit_move_insn (operands[0], operands[1]); 5695 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset), 5696 operands[2]); 5697 DONE; 5698}) 5699 5700;; Standard pattern name vec_init<mode><Vel>. 5701 5702(define_expand "vec_init<mode><Vel>" 5703 [(match_operand:VALL_F16 0 "register_operand" "") 5704 (match_operand 1 "" "")] 5705 "TARGET_SIMD" 5706{ 5707 aarch64_expand_vector_init (operands[0], operands[1]); 5708 DONE; 5709}) 5710 5711(define_insn "*aarch64_simd_ld1r<mode>" 5712 [(set (match_operand:VALL_F16 0 "register_operand" "=w") 5713 (vec_duplicate:VALL_F16 5714 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))] 5715 "TARGET_SIMD" 5716 "ld1r\\t{%0.<Vtype>}, %1" 5717 [(set_attr "type" "neon_load1_all_lanes")] 5718) 5719 5720(define_insn "aarch64_simd_ld1<mode>_x2" 5721 [(set (match_operand:OI 0 "register_operand" "=w") 5722 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv") 5723 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5724 UNSPEC_LD1))] 5725 "TARGET_SIMD" 5726 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1" 5727 [(set_attr "type" "neon_load1_2reg<q>")] 5728) 5729 5730(define_insn "aarch64_simd_ld1<mode>_x2" 5731 [(set (match_operand:OI 0 "register_operand" "=w") 5732 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv") 5733 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5734 UNSPEC_LD1))] 5735 "TARGET_SIMD" 5736 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1" 5737 [(set_attr "type" "neon_load1_2reg<q>")] 5738) 5739 5740 5741(define_insn "aarch64_frecpe<mode>" 5742 [(set (match_operand:VHSDF 0 "register_operand" "=w") 5743 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")] 5744 UNSPEC_FRECPE))] 5745 "TARGET_SIMD" 5746 "frecpe\\t%0.<Vtype>, %1.<Vtype>" 5747 [(set_attr "type" "neon_fp_recpe_<stype><q>")] 5748) 5749 5750(define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>" 5751 [(set (match_operand:GPF_F16 0 "register_operand" "=w") 5752 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")] 5753 FRECP))] 5754 "TARGET_SIMD" 5755 "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1" 5756 [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")] 5757) 5758 5759(define_insn "aarch64_frecps<mode>" 5760 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") 5761 (unspec:VHSDF_HSDF 5762 [(match_operand:VHSDF_HSDF 1 "register_operand" "w") 5763 (match_operand:VHSDF_HSDF 2 "register_operand" "w")] 5764 UNSPEC_FRECPS))] 5765 "TARGET_SIMD" 5766 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 5767 [(set_attr "type" "neon_fp_recps_<stype><q>")] 5768) 5769 5770(define_insn "aarch64_urecpe<mode>" 5771 [(set (match_operand:VDQ_SI 0 "register_operand" "=w") 5772 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")] 5773 UNSPEC_URECPE))] 5774 "TARGET_SIMD" 5775 "urecpe\\t%0.<Vtype>, %1.<Vtype>" 5776 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")]) 5777 5778;; Standard pattern name vec_extract<mode><Vel>. 5779 5780(define_expand "vec_extract<mode><Vel>" 5781 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "") 5782 (match_operand:VALL_F16 1 "register_operand" "") 5783 (match_operand:SI 2 "immediate_operand" "")] 5784 "TARGET_SIMD" 5785{ 5786 emit_insn 5787 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2])); 5788 DONE; 5789}) 5790 5791;; aes 5792 5793(define_insn "aarch64_crypto_aes<aes_op>v16qi" 5794 [(set (match_operand:V16QI 0 "register_operand" "=w") 5795 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0") 5796 (match_operand:V16QI 2 "register_operand" "w")] 5797 CRYPTO_AES))] 5798 "TARGET_SIMD && TARGET_AES" 5799 "aes<aes_op>\\t%0.16b, %2.16b" 5800 [(set_attr "type" "crypto_aese")] 5801) 5802 5803;; When AES/AESMC fusion is enabled we want the register allocation to 5804;; look like: 5805;; AESE Vn, _ 5806;; AESMC Vn, Vn 5807;; So prefer to tie operand 1 to operand 0 when fusing. 5808 5809(define_insn "aarch64_crypto_aes<aesmc_op>v16qi" 5810 [(set (match_operand:V16QI 0 "register_operand" "=w,w") 5811 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")] 5812 CRYPTO_AESMC))] 5813 "TARGET_SIMD && TARGET_AES" 5814 "aes<aesmc_op>\\t%0.16b, %1.16b" 5815 [(set_attr "type" "crypto_aesmc") 5816 (set_attr_alternative "enabled" 5817 [(if_then_else (match_test 5818 "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)") 5819 (const_string "yes" ) 5820 (const_string "no")) 5821 (const_string "yes")])] 5822) 5823 5824;; sha1 5825 5826(define_insn "aarch64_crypto_sha1hsi" 5827 [(set (match_operand:SI 0 "register_operand" "=w") 5828 (unspec:SI [(match_operand:SI 1 5829 "register_operand" "w")] 5830 UNSPEC_SHA1H))] 5831 "TARGET_SIMD && TARGET_SHA2" 5832 "sha1h\\t%s0, %s1" 5833 [(set_attr "type" "crypto_sha1_fast")] 5834) 5835 5836(define_insn "aarch64_crypto_sha1hv4si" 5837 [(set (match_operand:SI 0 "register_operand" "=w") 5838 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w") 5839 (parallel [(const_int 0)]))] 5840 UNSPEC_SHA1H))] 5841 "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN" 5842 "sha1h\\t%s0, %s1" 5843 [(set_attr "type" "crypto_sha1_fast")] 5844) 5845 5846(define_insn "aarch64_be_crypto_sha1hv4si" 5847 [(set (match_operand:SI 0 "register_operand" "=w") 5848 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w") 5849 (parallel [(const_int 3)]))] 5850 UNSPEC_SHA1H))] 5851 "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN" 5852 "sha1h\\t%s0, %s1" 5853 [(set_attr "type" "crypto_sha1_fast")] 5854) 5855 5856(define_insn "aarch64_crypto_sha1su1v4si" 5857 [(set (match_operand:V4SI 0 "register_operand" "=w") 5858 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 5859 (match_operand:V4SI 2 "register_operand" "w")] 5860 UNSPEC_SHA1SU1))] 5861 "TARGET_SIMD && TARGET_SHA2" 5862 "sha1su1\\t%0.4s, %2.4s" 5863 [(set_attr "type" "crypto_sha1_fast")] 5864) 5865 5866(define_insn "aarch64_crypto_sha1<sha1_op>v4si" 5867 [(set (match_operand:V4SI 0 "register_operand" "=w") 5868 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 5869 (match_operand:SI 2 "register_operand" "w") 5870 (match_operand:V4SI 3 "register_operand" "w")] 5871 CRYPTO_SHA1))] 5872 "TARGET_SIMD && TARGET_SHA2" 5873 "sha1<sha1_op>\\t%q0, %s2, %3.4s" 5874 [(set_attr "type" "crypto_sha1_slow")] 5875) 5876 5877(define_insn "aarch64_crypto_sha1su0v4si" 5878 [(set (match_operand:V4SI 0 "register_operand" "=w") 5879 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 5880 (match_operand:V4SI 2 "register_operand" "w") 5881 (match_operand:V4SI 3 "register_operand" "w")] 5882 UNSPEC_SHA1SU0))] 5883 "TARGET_SIMD && TARGET_SHA2" 5884 "sha1su0\\t%0.4s, %2.4s, %3.4s" 5885 [(set_attr "type" "crypto_sha1_xor")] 5886) 5887 5888;; sha256 5889 5890(define_insn "aarch64_crypto_sha256h<sha256_op>v4si" 5891 [(set (match_operand:V4SI 0 "register_operand" "=w") 5892 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 5893 (match_operand:V4SI 2 "register_operand" "w") 5894 (match_operand:V4SI 3 "register_operand" "w")] 5895 CRYPTO_SHA256))] 5896 "TARGET_SIMD && TARGET_SHA2" 5897 "sha256h<sha256_op>\\t%q0, %q2, %3.4s" 5898 [(set_attr "type" "crypto_sha256_slow")] 5899) 5900 5901(define_insn "aarch64_crypto_sha256su0v4si" 5902 [(set (match_operand:V4SI 0 "register_operand" "=w") 5903 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 5904 (match_operand:V4SI 2 "register_operand" "w")] 5905 UNSPEC_SHA256SU0))] 5906 "TARGET_SIMD && TARGET_SHA2" 5907 "sha256su0\\t%0.4s, %2.4s" 5908 [(set_attr "type" "crypto_sha256_fast")] 5909) 5910 5911(define_insn "aarch64_crypto_sha256su1v4si" 5912 [(set (match_operand:V4SI 0 "register_operand" "=w") 5913 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 5914 (match_operand:V4SI 2 "register_operand" "w") 5915 (match_operand:V4SI 3 "register_operand" "w")] 5916 UNSPEC_SHA256SU1))] 5917 "TARGET_SIMD && TARGET_SHA2" 5918 "sha256su1\\t%0.4s, %2.4s, %3.4s" 5919 [(set_attr "type" "crypto_sha256_slow")] 5920) 5921 5922;; sha512 5923 5924(define_insn "aarch64_crypto_sha512h<sha512_op>qv2di" 5925 [(set (match_operand:V2DI 0 "register_operand" "=w") 5926 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") 5927 (match_operand:V2DI 2 "register_operand" "w") 5928 (match_operand:V2DI 3 "register_operand" "w")] 5929 CRYPTO_SHA512))] 5930 "TARGET_SIMD && TARGET_SHA3" 5931 "sha512h<sha512_op>\\t%q0, %q2, %3.2d" 5932 [(set_attr "type" "crypto_sha512")] 5933) 5934 5935(define_insn "aarch64_crypto_sha512su0qv2di" 5936 [(set (match_operand:V2DI 0 "register_operand" "=w") 5937 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") 5938 (match_operand:V2DI 2 "register_operand" "w")] 5939 UNSPEC_SHA512SU0))] 5940 "TARGET_SIMD && TARGET_SHA3" 5941 "sha512su0\\t%0.2d, %2.2d" 5942 [(set_attr "type" "crypto_sha512")] 5943) 5944 5945(define_insn "aarch64_crypto_sha512su1qv2di" 5946 [(set (match_operand:V2DI 0 "register_operand" "=w") 5947 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") 5948 (match_operand:V2DI 2 "register_operand" "w") 5949 (match_operand:V2DI 3 "register_operand" "w")] 5950 UNSPEC_SHA512SU1))] 5951 "TARGET_SIMD && TARGET_SHA3" 5952 "sha512su1\\t%0.2d, %2.2d, %3.2d" 5953 [(set_attr "type" "crypto_sha512")] 5954) 5955 5956;; sha3 5957 5958(define_insn "aarch64_eor3qv8hi" 5959 [(set (match_operand:V8HI 0 "register_operand" "=w") 5960 (xor:V8HI 5961 (xor:V8HI 5962 (match_operand:V8HI 2 "register_operand" "%w") 5963 (match_operand:V8HI 3 "register_operand" "w")) 5964 (match_operand:V8HI 1 "register_operand" "w")))] 5965 "TARGET_SIMD && TARGET_SHA3" 5966 "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b" 5967 [(set_attr "type" "crypto_sha3")] 5968) 5969 5970(define_insn "aarch64_rax1qv2di" 5971 [(set (match_operand:V2DI 0 "register_operand" "=w") 5972 (xor:V2DI 5973 (rotate:V2DI 5974 (match_operand:V2DI 2 "register_operand" "w") 5975 (const_int 1)) 5976 (match_operand:V2DI 1 "register_operand" "w")))] 5977 "TARGET_SIMD && TARGET_SHA3" 5978 "rax1\\t%0.2d, %1.2d, %2.2d" 5979 [(set_attr "type" "crypto_sha3")] 5980) 5981 5982(define_insn "aarch64_xarqv2di" 5983 [(set (match_operand:V2DI 0 "register_operand" "=w") 5984 (rotatert:V2DI 5985 (xor:V2DI 5986 (match_operand:V2DI 1 "register_operand" "%w") 5987 (match_operand:V2DI 2 "register_operand" "w")) 5988 (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))] 5989 "TARGET_SIMD && TARGET_SHA3" 5990 "xar\\t%0.2d, %1.2d, %2.2d, %3" 5991 [(set_attr "type" "crypto_sha3")] 5992) 5993 5994(define_insn "aarch64_bcaxqv8hi" 5995 [(set (match_operand:V8HI 0 "register_operand" "=w") 5996 (xor:V8HI 5997 (and:V8HI 5998 (not:V8HI (match_operand:V8HI 3 "register_operand" "w")) 5999 (match_operand:V8HI 2 "register_operand" "w")) 6000 (match_operand:V8HI 1 "register_operand" "w")))] 6001 "TARGET_SIMD && TARGET_SHA3" 6002 "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b" 6003 [(set_attr "type" "crypto_sha3")] 6004) 6005 6006;; SM3 6007 6008(define_insn "aarch64_sm3ss1qv4si" 6009 [(set (match_operand:V4SI 0 "register_operand" "=w") 6010 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w") 6011 (match_operand:V4SI 2 "register_operand" "w") 6012 (match_operand:V4SI 3 "register_operand" "w")] 6013 UNSPEC_SM3SS1))] 6014 "TARGET_SIMD && TARGET_SM4" 6015 "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s" 6016 [(set_attr "type" "crypto_sm3")] 6017) 6018 6019 6020(define_insn "aarch64_sm3tt<sm3tt_op>qv4si" 6021 [(set (match_operand:V4SI 0 "register_operand" "=w") 6022 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 6023 (match_operand:V4SI 2 "register_operand" "w") 6024 (match_operand:V4SI 3 "register_operand" "w") 6025 (match_operand:SI 4 "aarch64_imm2" "Ui2")] 6026 CRYPTO_SM3TT))] 6027 "TARGET_SIMD && TARGET_SM4" 6028 "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]" 6029 [(set_attr "type" "crypto_sm3")] 6030) 6031 6032(define_insn "aarch64_sm3partw<sm3part_op>qv4si" 6033 [(set (match_operand:V4SI 0 "register_operand" "=w") 6034 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 6035 (match_operand:V4SI 2 "register_operand" "w") 6036 (match_operand:V4SI 3 "register_operand" "w")] 6037 CRYPTO_SM3PART))] 6038 "TARGET_SIMD && TARGET_SM4" 6039 "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s" 6040 [(set_attr "type" "crypto_sm3")] 6041) 6042 6043;; SM4 6044 6045(define_insn "aarch64_sm4eqv4si" 6046 [(set (match_operand:V4SI 0 "register_operand" "=w") 6047 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 6048 (match_operand:V4SI 2 "register_operand" "w")] 6049 UNSPEC_SM4E))] 6050 "TARGET_SIMD && TARGET_SM4" 6051 "sm4e\\t%0.4s, %2.4s" 6052 [(set_attr "type" "crypto_sm4")] 6053) 6054 6055(define_insn "aarch64_sm4ekeyqv4si" 6056 [(set (match_operand:V4SI 0 "register_operand" "=w") 6057 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w") 6058 (match_operand:V4SI 2 "register_operand" "w")] 6059 UNSPEC_SM4EKEY))] 6060 "TARGET_SIMD && TARGET_SM4" 6061 "sm4ekey\\t%0.4s, %1.4s, %2.4s" 6062 [(set_attr "type" "crypto_sm4")] 6063) 6064 6065;; fp16fml 6066 6067(define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>" 6068 [(set (match_operand:VDQSF 0 "register_operand" "=w") 6069 (unspec:VDQSF 6070 [(match_operand:VDQSF 1 "register_operand" "0") 6071 (match_operand:<VFMLA_W> 2 "register_operand" "w") 6072 (match_operand:<VFMLA_W> 3 "register_operand" "w")] 6073 VFMLA16_LOW))] 6074 "TARGET_F16FML" 6075{ 6076 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, 6077 <nunits> * 2, false); 6078 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, 6079 <nunits> * 2, false); 6080 6081 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0], 6082 operands[1], 6083 operands[2], 6084 operands[3], 6085 p1, p2)); 6086 DONE; 6087 6088}) 6089 6090(define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>" 6091 [(set (match_operand:VDQSF 0 "register_operand" "=w") 6092 (unspec:VDQSF 6093 [(match_operand:VDQSF 1 "register_operand" "0") 6094 (match_operand:<VFMLA_W> 2 "register_operand" "w") 6095 (match_operand:<VFMLA_W> 3 "register_operand" "w")] 6096 VFMLA16_HIGH))] 6097 "TARGET_F16FML" 6098{ 6099 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true); 6100 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true); 6101 6102 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0], 6103 operands[1], 6104 operands[2], 6105 operands[3], 6106 p1, p2)); 6107 DONE; 6108}) 6109 6110(define_insn "aarch64_simd_fmlal<f16quad>_low<mode>" 6111 [(set (match_operand:VDQSF 0 "register_operand" "=w") 6112 (fma:VDQSF 6113 (float_extend:VDQSF 6114 (vec_select:<VFMLA_SEL_W> 6115 (match_operand:<VFMLA_W> 2 "register_operand" "w") 6116 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))) 6117 (float_extend:VDQSF 6118 (vec_select:<VFMLA_SEL_W> 6119 (match_operand:<VFMLA_W> 3 "register_operand" "w") 6120 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" ""))) 6121 (match_operand:VDQSF 1 "register_operand" "0")))] 6122 "TARGET_F16FML" 6123 "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h" 6124 [(set_attr "type" "neon_fp_mul_s")] 6125) 6126 6127(define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>" 6128 [(set (match_operand:VDQSF 0 "register_operand" "=w") 6129 (fma:VDQSF 6130 (float_extend:VDQSF 6131 (neg:<VFMLA_SEL_W> 6132 (vec_select:<VFMLA_SEL_W> 6133 (match_operand:<VFMLA_W> 2 "register_operand" "w") 6134 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))) 6135 (float_extend:VDQSF 6136 (vec_select:<VFMLA_SEL_W> 6137 (match_operand:<VFMLA_W> 3 "register_operand" "w") 6138 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" ""))) 6139 (match_operand:VDQSF 1 "register_operand" "0")))] 6140 "TARGET_F16FML" 6141 "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h" 6142 [(set_attr "type" "neon_fp_mul_s")] 6143) 6144 6145(define_insn "aarch64_simd_fmlal<f16quad>_high<mode>" 6146 [(set (match_operand:VDQSF 0 "register_operand" "=w") 6147 (fma:VDQSF 6148 (float_extend:VDQSF 6149 (vec_select:<VFMLA_SEL_W> 6150 (match_operand:<VFMLA_W> 2 "register_operand" "w") 6151 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))) 6152 (float_extend:VDQSF 6153 (vec_select:<VFMLA_SEL_W> 6154 (match_operand:<VFMLA_W> 3 "register_operand" "w") 6155 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" ""))) 6156 (match_operand:VDQSF 1 "register_operand" "0")))] 6157 "TARGET_F16FML" 6158 "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h" 6159 [(set_attr "type" "neon_fp_mul_s")] 6160) 6161 6162(define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>" 6163 [(set (match_operand:VDQSF 0 "register_operand" "=w") 6164 (fma:VDQSF 6165 (float_extend:VDQSF 6166 (neg:<VFMLA_SEL_W> 6167 (vec_select:<VFMLA_SEL_W> 6168 (match_operand:<VFMLA_W> 2 "register_operand" "w") 6169 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))) 6170 (float_extend:VDQSF 6171 (vec_select:<VFMLA_SEL_W> 6172 (match_operand:<VFMLA_W> 3 "register_operand" "w") 6173 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" ""))) 6174 (match_operand:VDQSF 1 "register_operand" "0")))] 6175 "TARGET_F16FML" 6176 "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h" 6177 [(set_attr "type" "neon_fp_mul_s")] 6178) 6179 6180(define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf" 6181 [(set (match_operand:V2SF 0 "register_operand" "") 6182 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "") 6183 (match_operand:V4HF 2 "register_operand" "") 6184 (match_operand:V4HF 3 "register_operand" "") 6185 (match_operand:SI 4 "aarch64_imm2" "")] 6186 VFMLA16_LOW))] 6187 "TARGET_F16FML" 6188{ 6189 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false); 6190 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4])); 6191 6192 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0], 6193 operands[1], 6194 operands[2], 6195 operands[3], 6196 p1, lane)); 6197 DONE; 6198} 6199) 6200 6201(define_expand "aarch64_fml<f16mac1>l_lane_highv2sf" 6202 [(set (match_operand:V2SF 0 "register_operand" "") 6203 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "") 6204 (match_operand:V4HF 2 "register_operand" "") 6205 (match_operand:V4HF 3 "register_operand" "") 6206 (match_operand:SI 4 "aarch64_imm2" "")] 6207 VFMLA16_HIGH))] 6208 "TARGET_F16FML" 6209{ 6210 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true); 6211 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4])); 6212 6213 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0], 6214 operands[1], 6215 operands[2], 6216 operands[3], 6217 p1, lane)); 6218 DONE; 6219}) 6220 6221(define_insn "aarch64_simd_fmlal_lane_lowv2sf" 6222 [(set (match_operand:V2SF 0 "register_operand" "=w") 6223 (fma:V2SF 6224 (float_extend:V2SF 6225 (vec_select:V2HF 6226 (match_operand:V4HF 2 "register_operand" "w") 6227 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))) 6228 (float_extend:V2SF 6229 (vec_duplicate:V2HF 6230 (vec_select:HF 6231 (match_operand:V4HF 3 "register_operand" "x") 6232 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")])))) 6233 (match_operand:V2SF 1 "register_operand" "0")))] 6234 "TARGET_F16FML" 6235 "fmlal\\t%0.2s, %2.2h, %3.h[%5]" 6236 [(set_attr "type" "neon_fp_mul_s")] 6237) 6238 6239(define_insn "aarch64_simd_fmlsl_lane_lowv2sf" 6240 [(set (match_operand:V2SF 0 "register_operand" "=w") 6241 (fma:V2SF 6242 (float_extend:V2SF 6243 (neg:V2HF 6244 (vec_select:V2HF 6245 (match_operand:V4HF 2 "register_operand" "w") 6246 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))) 6247 (float_extend:V2SF 6248 (vec_duplicate:V2HF 6249 (vec_select:HF 6250 (match_operand:V4HF 3 "register_operand" "x") 6251 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")])))) 6252 (match_operand:V2SF 1 "register_operand" "0")))] 6253 "TARGET_F16FML" 6254 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]" 6255 [(set_attr "type" "neon_fp_mul_s")] 6256) 6257 6258(define_insn "aarch64_simd_fmlal_lane_highv2sf" 6259 [(set (match_operand:V2SF 0 "register_operand" "=w") 6260 (fma:V2SF 6261 (float_extend:V2SF 6262 (vec_select:V2HF 6263 (match_operand:V4HF 2 "register_operand" "w") 6264 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))) 6265 (float_extend:V2SF 6266 (vec_duplicate:V2HF 6267 (vec_select:HF 6268 (match_operand:V4HF 3 "register_operand" "x") 6269 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")])))) 6270 (match_operand:V2SF 1 "register_operand" "0")))] 6271 "TARGET_F16FML" 6272 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]" 6273 [(set_attr "type" "neon_fp_mul_s")] 6274) 6275 6276(define_insn "aarch64_simd_fmlsl_lane_highv2sf" 6277 [(set (match_operand:V2SF 0 "register_operand" "=w") 6278 (fma:V2SF 6279 (float_extend:V2SF 6280 (neg:V2HF 6281 (vec_select:V2HF 6282 (match_operand:V4HF 2 "register_operand" "w") 6283 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))) 6284 (float_extend:V2SF 6285 (vec_duplicate:V2HF 6286 (vec_select:HF 6287 (match_operand:V4HF 3 "register_operand" "x") 6288 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")])))) 6289 (match_operand:V2SF 1 "register_operand" "0")))] 6290 "TARGET_F16FML" 6291 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]" 6292 [(set_attr "type" "neon_fp_mul_s")] 6293) 6294 6295(define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf" 6296 [(set (match_operand:V4SF 0 "register_operand" "") 6297 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "") 6298 (match_operand:V8HF 2 "register_operand" "") 6299 (match_operand:V8HF 3 "register_operand" "") 6300 (match_operand:SI 4 "aarch64_lane_imm3" "")] 6301 VFMLA16_LOW))] 6302 "TARGET_F16FML" 6303{ 6304 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false); 6305 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4])); 6306 6307 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0], 6308 operands[1], 6309 operands[2], 6310 operands[3], 6311 p1, lane)); 6312 DONE; 6313}) 6314 6315(define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf" 6316 [(set (match_operand:V4SF 0 "register_operand" "") 6317 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "") 6318 (match_operand:V8HF 2 "register_operand" "") 6319 (match_operand:V8HF 3 "register_operand" "") 6320 (match_operand:SI 4 "aarch64_lane_imm3" "")] 6321 VFMLA16_HIGH))] 6322 "TARGET_F16FML" 6323{ 6324 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true); 6325 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4])); 6326 6327 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0], 6328 operands[1], 6329 operands[2], 6330 operands[3], 6331 p1, lane)); 6332 DONE; 6333}) 6334 6335(define_insn "aarch64_simd_fmlalq_laneq_lowv4sf" 6336 [(set (match_operand:V4SF 0 "register_operand" "=w") 6337 (fma:V4SF 6338 (float_extend:V4SF 6339 (vec_select:V4HF 6340 (match_operand:V8HF 2 "register_operand" "w") 6341 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))) 6342 (float_extend:V4SF 6343 (vec_duplicate:V4HF 6344 (vec_select:HF 6345 (match_operand:V8HF 3 "register_operand" "x") 6346 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")])))) 6347 (match_operand:V4SF 1 "register_operand" "0")))] 6348 "TARGET_F16FML" 6349 "fmlal\\t%0.4s, %2.4h, %3.h[%5]" 6350 [(set_attr "type" "neon_fp_mul_s")] 6351) 6352 6353(define_insn "aarch64_simd_fmlslq_laneq_lowv4sf" 6354 [(set (match_operand:V4SF 0 "register_operand" "=w") 6355 (fma:V4SF 6356 (float_extend:V4SF 6357 (neg:V4HF 6358 (vec_select:V4HF 6359 (match_operand:V8HF 2 "register_operand" "w") 6360 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))) 6361 (float_extend:V4SF 6362 (vec_duplicate:V4HF 6363 (vec_select:HF 6364 (match_operand:V8HF 3 "register_operand" "x") 6365 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")])))) 6366 (match_operand:V4SF 1 "register_operand" "0")))] 6367 "TARGET_F16FML" 6368 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]" 6369 [(set_attr "type" "neon_fp_mul_s")] 6370) 6371 6372(define_insn "aarch64_simd_fmlalq_laneq_highv4sf" 6373 [(set (match_operand:V4SF 0 "register_operand" "=w") 6374 (fma:V4SF 6375 (float_extend:V4SF 6376 (vec_select:V4HF 6377 (match_operand:V8HF 2 "register_operand" "w") 6378 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))) 6379 (float_extend:V4SF 6380 (vec_duplicate:V4HF 6381 (vec_select:HF 6382 (match_operand:V8HF 3 "register_operand" "x") 6383 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")])))) 6384 (match_operand:V4SF 1 "register_operand" "0")))] 6385 "TARGET_F16FML" 6386 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]" 6387 [(set_attr "type" "neon_fp_mul_s")] 6388) 6389 6390(define_insn "aarch64_simd_fmlslq_laneq_highv4sf" 6391 [(set (match_operand:V4SF 0 "register_operand" "=w") 6392 (fma:V4SF 6393 (float_extend:V4SF 6394 (neg:V4HF 6395 (vec_select:V4HF 6396 (match_operand:V8HF 2 "register_operand" "w") 6397 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))) 6398 (float_extend:V4SF 6399 (vec_duplicate:V4HF 6400 (vec_select:HF 6401 (match_operand:V8HF 3 "register_operand" "x") 6402 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")])))) 6403 (match_operand:V4SF 1 "register_operand" "0")))] 6404 "TARGET_F16FML" 6405 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]" 6406 [(set_attr "type" "neon_fp_mul_s")] 6407) 6408 6409(define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf" 6410 [(set (match_operand:V2SF 0 "register_operand" "") 6411 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "") 6412 (match_operand:V4HF 2 "register_operand" "") 6413 (match_operand:V8HF 3 "register_operand" "") 6414 (match_operand:SI 4 "aarch64_lane_imm3" "")] 6415 VFMLA16_LOW))] 6416 "TARGET_F16FML" 6417{ 6418 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false); 6419 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4])); 6420 6421 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0], 6422 operands[1], 6423 operands[2], 6424 operands[3], 6425 p1, lane)); 6426 DONE; 6427 6428}) 6429 6430(define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf" 6431 [(set (match_operand:V2SF 0 "register_operand" "") 6432 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "") 6433 (match_operand:V4HF 2 "register_operand" "") 6434 (match_operand:V8HF 3 "register_operand" "") 6435 (match_operand:SI 4 "aarch64_lane_imm3" "")] 6436 VFMLA16_HIGH))] 6437 "TARGET_F16FML" 6438{ 6439 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true); 6440 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4])); 6441 6442 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0], 6443 operands[1], 6444 operands[2], 6445 operands[3], 6446 p1, lane)); 6447 DONE; 6448 6449}) 6450 6451(define_insn "aarch64_simd_fmlal_laneq_lowv2sf" 6452 [(set (match_operand:V2SF 0 "register_operand" "=w") 6453 (fma:V2SF 6454 (float_extend:V2SF 6455 (vec_select:V2HF 6456 (match_operand:V4HF 2 "register_operand" "w") 6457 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))) 6458 (float_extend:V2SF 6459 (vec_duplicate:V2HF 6460 (vec_select:HF 6461 (match_operand:V8HF 3 "register_operand" "x") 6462 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")])))) 6463 (match_operand:V2SF 1 "register_operand" "0")))] 6464 "TARGET_F16FML" 6465 "fmlal\\t%0.2s, %2.2h, %3.h[%5]" 6466 [(set_attr "type" "neon_fp_mul_s")] 6467) 6468 6469(define_insn "aarch64_simd_fmlsl_laneq_lowv2sf" 6470 [(set (match_operand:V2SF 0 "register_operand" "=w") 6471 (fma:V2SF 6472 (float_extend:V2SF 6473 (neg:V2HF 6474 (vec_select:V2HF 6475 (match_operand:V4HF 2 "register_operand" "w") 6476 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))) 6477 (float_extend:V2SF 6478 (vec_duplicate:V2HF 6479 (vec_select:HF 6480 (match_operand:V8HF 3 "register_operand" "x") 6481 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")])))) 6482 (match_operand:V2SF 1 "register_operand" "0")))] 6483 "TARGET_F16FML" 6484 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]" 6485 [(set_attr "type" "neon_fp_mul_s")] 6486) 6487 6488(define_insn "aarch64_simd_fmlal_laneq_highv2sf" 6489 [(set (match_operand:V2SF 0 "register_operand" "=w") 6490 (fma:V2SF 6491 (float_extend:V2SF 6492 (vec_select:V2HF 6493 (match_operand:V4HF 2 "register_operand" "w") 6494 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))) 6495 (float_extend:V2SF 6496 (vec_duplicate:V2HF 6497 (vec_select:HF 6498 (match_operand:V8HF 3 "register_operand" "x") 6499 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")])))) 6500 (match_operand:V2SF 1 "register_operand" "0")))] 6501 "TARGET_F16FML" 6502 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]" 6503 [(set_attr "type" "neon_fp_mul_s")] 6504) 6505 6506(define_insn "aarch64_simd_fmlsl_laneq_highv2sf" 6507 [(set (match_operand:V2SF 0 "register_operand" "=w") 6508 (fma:V2SF 6509 (float_extend:V2SF 6510 (neg:V2HF 6511 (vec_select:V2HF 6512 (match_operand:V4HF 2 "register_operand" "w") 6513 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))) 6514 (float_extend:V2SF 6515 (vec_duplicate:V2HF 6516 (vec_select:HF 6517 (match_operand:V8HF 3 "register_operand" "x") 6518 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")])))) 6519 (match_operand:V2SF 1 "register_operand" "0")))] 6520 "TARGET_F16FML" 6521 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]" 6522 [(set_attr "type" "neon_fp_mul_s")] 6523) 6524 6525(define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf" 6526 [(set (match_operand:V4SF 0 "register_operand" "") 6527 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "") 6528 (match_operand:V8HF 2 "register_operand" "") 6529 (match_operand:V4HF 3 "register_operand" "") 6530 (match_operand:SI 4 "aarch64_imm2" "")] 6531 VFMLA16_LOW))] 6532 "TARGET_F16FML" 6533{ 6534 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false); 6535 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4])); 6536 6537 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0], 6538 operands[1], 6539 operands[2], 6540 operands[3], 6541 p1, lane)); 6542 DONE; 6543}) 6544 6545(define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf" 6546 [(set (match_operand:V4SF 0 "register_operand" "") 6547 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "") 6548 (match_operand:V8HF 2 "register_operand" "") 6549 (match_operand:V4HF 3 "register_operand" "") 6550 (match_operand:SI 4 "aarch64_imm2" "")] 6551 VFMLA16_HIGH))] 6552 "TARGET_F16FML" 6553{ 6554 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true); 6555 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4])); 6556 6557 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0], 6558 operands[1], 6559 operands[2], 6560 operands[3], 6561 p1, lane)); 6562 DONE; 6563}) 6564 6565(define_insn "aarch64_simd_fmlalq_lane_lowv4sf" 6566 [(set (match_operand:V4SF 0 "register_operand" "=w") 6567 (fma:V4SF 6568 (float_extend:V4SF 6569 (vec_select:V4HF 6570 (match_operand:V8HF 2 "register_operand" "w") 6571 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))) 6572 (float_extend:V4SF 6573 (vec_duplicate:V4HF 6574 (vec_select:HF 6575 (match_operand:V4HF 3 "register_operand" "x") 6576 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")])))) 6577 (match_operand:V4SF 1 "register_operand" "0")))] 6578 "TARGET_F16FML" 6579 "fmlal\\t%0.4s, %2.4h, %3.h[%5]" 6580 [(set_attr "type" "neon_fp_mul_s")] 6581) 6582 6583(define_insn "aarch64_simd_fmlslq_lane_lowv4sf" 6584 [(set (match_operand:V4SF 0 "register_operand" "=w") 6585 (fma:V4SF 6586 (float_extend:V4SF 6587 (neg:V4HF 6588 (vec_select:V4HF 6589 (match_operand:V8HF 2 "register_operand" "w") 6590 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))) 6591 (float_extend:V4SF 6592 (vec_duplicate:V4HF 6593 (vec_select:HF 6594 (match_operand:V4HF 3 "register_operand" "x") 6595 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")])))) 6596 (match_operand:V4SF 1 "register_operand" "0")))] 6597 "TARGET_F16FML" 6598 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]" 6599 [(set_attr "type" "neon_fp_mul_s")] 6600) 6601 6602(define_insn "aarch64_simd_fmlalq_lane_highv4sf" 6603 [(set (match_operand:V4SF 0 "register_operand" "=w") 6604 (fma:V4SF 6605 (float_extend:V4SF 6606 (vec_select:V4HF 6607 (match_operand:V8HF 2 "register_operand" "w") 6608 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))) 6609 (float_extend:V4SF 6610 (vec_duplicate:V4HF 6611 (vec_select:HF 6612 (match_operand:V4HF 3 "register_operand" "x") 6613 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")])))) 6614 (match_operand:V4SF 1 "register_operand" "0")))] 6615 "TARGET_F16FML" 6616 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]" 6617 [(set_attr "type" "neon_fp_mul_s")] 6618) 6619 6620(define_insn "aarch64_simd_fmlslq_lane_highv4sf" 6621 [(set (match_operand:V4SF 0 "register_operand" "=w") 6622 (fma:V4SF 6623 (float_extend:V4SF 6624 (neg:V4HF 6625 (vec_select:V4HF 6626 (match_operand:V8HF 2 "register_operand" "w") 6627 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))) 6628 (float_extend:V4SF 6629 (vec_duplicate:V4HF 6630 (vec_select:HF 6631 (match_operand:V4HF 3 "register_operand" "x") 6632 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")])))) 6633 (match_operand:V4SF 1 "register_operand" "0")))] 6634 "TARGET_F16FML" 6635 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]" 6636 [(set_attr "type" "neon_fp_mul_s")] 6637) 6638 6639;; pmull 6640 6641(define_insn "aarch64_crypto_pmulldi" 6642 [(set (match_operand:TI 0 "register_operand" "=w") 6643 (unspec:TI [(match_operand:DI 1 "register_operand" "w") 6644 (match_operand:DI 2 "register_operand" "w")] 6645 UNSPEC_PMULL))] 6646 "TARGET_SIMD && TARGET_AES" 6647 "pmull\\t%0.1q, %1.1d, %2.1d" 6648 [(set_attr "type" "crypto_pmull")] 6649) 6650 6651(define_insn "aarch64_crypto_pmullv2di" 6652 [(set (match_operand:TI 0 "register_operand" "=w") 6653 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w") 6654 (match_operand:V2DI 2 "register_operand" "w")] 6655 UNSPEC_PMULL2))] 6656 "TARGET_SIMD && TARGET_AES" 6657 "pmull2\\t%0.1q, %1.2d, %2.2d" 6658 [(set_attr "type" "crypto_pmull")] 6659) 6660