1;; ARM NEON coprocessor Machine Description 2;; Copyright (C) 2006-2018 Free Software Foundation, Inc. 3;; Written by CodeSourcery. 4;; 5;; This file is part of GCC. 6;; 7;; GCC is free software; you can redistribute it and/or modify it 8;; under the terms of the GNU General Public License as published by 9;; the Free Software Foundation; either version 3, or (at your option) 10;; any later version. 11;; 12;; GCC is distributed in the hope that it will be useful, but 13;; WITHOUT ANY WARRANTY; without even the implied warranty of 14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15;; General Public License for more details. 16;; 17;; You should have received a copy of the GNU General Public License 18;; along with GCC; see the file COPYING3. If not see 19;; <http://www.gnu.org/licenses/>. 20 21 22;; Attribute used to permit string comparisons against <VQH_mnem> in 23;; type attribute definitions. 24(define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd")) 25 26(define_insn "*neon_mov<mode>" 27 [(set (match_operand:VDX 0 "nonimmediate_operand" 28 "=w,Un,w, w, w, ?r,?w,?r,?r, ?Us") 29 (match_operand:VDX 1 "general_operand" 30 " w,w, Dm,Dn,Uni, w, r, r, Usi,r"))] 31 "TARGET_NEON 32 && (register_operand (operands[0], <MODE>mode) 33 || register_operand (operands[1], <MODE>mode))" 34{ 35 if (which_alternative == 2 || which_alternative == 3) 36 { 37 int width, is_valid; 38 static char templ[40]; 39 40 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode, 41 &operands[1], &width); 42 43 gcc_assert (is_valid != 0); 44 45 if (width == 0) 46 return "vmov.f32\t%P0, %1 @ <mode>"; 47 else 48 sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width); 49 50 return templ; 51 } 52 53 switch (which_alternative) 54 { 55 case 0: return "vmov\t%P0, %P1 @ <mode>"; 56 case 1: case 4: return output_move_neon (operands); 57 case 2: case 3: gcc_unreachable (); 58 case 5: return "vmov\t%Q0, %R0, %P1 @ <mode>"; 59 case 6: return "vmov\t%P0, %Q1, %R1 @ <mode>"; 60 default: return output_move_double (operands, true, NULL); 61 } 62} 63 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\ 64 neon_move<q>,neon_load1_1reg, neon_to_gp<q>,\ 65 neon_from_gp<q>,mov_reg,neon_load1_2reg,\ 66 neon_store1_2reg") 67 (set_attr "length" "4,4,4,4,4,4,4,8,8,8") 68 (set_attr "arm_pool_range" "*,*,*,*,1020,*,*,*,1020,*") 69 (set_attr "thumb2_pool_range" "*,*,*,*,1018,*,*,*,1018,*") 70 (set_attr "neg_pool_range" "*,*,*,*,1004,*,*,*,1004,*")]) 71 72(define_insn "*neon_mov<mode>" 73 [(set (match_operand:VQXMOV 0 "nonimmediate_operand" 74 "=w,Un,w, w, w, ?r,?w,?r,?r, ?Us") 75 (match_operand:VQXMOV 1 "general_operand" 76 " w,w, Dm,DN,Uni, w, r, r, Usi, r"))] 77 "TARGET_NEON 78 && (register_operand (operands[0], <MODE>mode) 79 || register_operand (operands[1], <MODE>mode))" 80{ 81 if (which_alternative == 2 || which_alternative == 3) 82 { 83 int width, is_valid; 84 static char templ[40]; 85 86 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode, 87 &operands[1], &width); 88 89 gcc_assert (is_valid != 0); 90 91 if (width == 0) 92 return "vmov.f32\t%q0, %1 @ <mode>"; 93 else 94 sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width); 95 96 return templ; 97 } 98 99 switch (which_alternative) 100 { 101 case 0: return "vmov\t%q0, %q1 @ <mode>"; 102 case 1: case 4: return output_move_neon (operands); 103 case 2: case 3: gcc_unreachable (); 104 case 5: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1"; 105 case 6: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1"; 106 default: return output_move_quad (operands); 107 } 108} 109 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\ 110 neon_move_q,neon_load2_2reg_q,neon_to_gp_q,\ 111 neon_from_gp_q,mov_reg,neon_load1_4reg,neon_store1_4reg") 112 (set_attr "length" "4,8,4,4,8,8,8,16,8,16") 113 (set_attr "arm_pool_range" "*,*,*,*,1020,*,*,*,1020,*") 114 (set_attr "thumb2_pool_range" "*,*,*,*,1018,*,*,*,1018,*") 115 (set_attr "neg_pool_range" "*,*,*,*,996,*,*,*,996,*")]) 116 117/* We define these mov expanders to match the standard mov$a optab to prevent 118 the mid-end from trying to do a subreg for these modes which is the most 119 inefficient way to expand the move. Also big-endian subreg's aren't 120 allowed for a subset of modes, See TARGET_CAN_CHANGE_MODE_CLASS. 121 Without these RTL generation patterns the mid-end would attempt to take a 122 sub-reg and may ICE if it can't. */ 123 124(define_expand "movti" 125 [(set (match_operand:TI 0 "nonimmediate_operand" "") 126 (match_operand:TI 1 "general_operand" ""))] 127 "TARGET_NEON" 128{ 129 if (can_create_pseudo_p ()) 130 { 131 if (!REG_P (operands[0])) 132 operands[1] = force_reg (TImode, operands[1]); 133 } 134}) 135 136(define_expand "mov<mode>" 137 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "") 138 (match_operand:VSTRUCT 1 "general_operand" ""))] 139 "TARGET_NEON" 140{ 141 if (can_create_pseudo_p ()) 142 { 143 if (!REG_P (operands[0])) 144 operands[1] = force_reg (<MODE>mode, operands[1]); 145 } 146}) 147 148(define_expand "mov<mode>" 149 [(set (match_operand:VH 0 "s_register_operand") 150 (match_operand:VH 1 "s_register_operand"))] 151 "TARGET_NEON" 152{ 153 if (can_create_pseudo_p ()) 154 { 155 if (!REG_P (operands[0])) 156 operands[1] = force_reg (<MODE>mode, operands[1]); 157 } 158}) 159 160(define_insn "*neon_mov<mode>" 161 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w") 162 (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))] 163 "TARGET_NEON 164 && (register_operand (operands[0], <MODE>mode) 165 || register_operand (operands[1], <MODE>mode))" 166{ 167 switch (which_alternative) 168 { 169 case 0: return "#"; 170 case 1: case 2: return output_move_neon (operands); 171 default: gcc_unreachable (); 172 } 173} 174 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q") 175 (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))]) 176 177(define_split 178 [(set (match_operand:EI 0 "s_register_operand" "") 179 (match_operand:EI 1 "s_register_operand" ""))] 180 "TARGET_NEON && reload_completed" 181 [(set (match_dup 0) (match_dup 1)) 182 (set (match_dup 2) (match_dup 3))] 183{ 184 int rdest = REGNO (operands[0]); 185 int rsrc = REGNO (operands[1]); 186 rtx dest[2], src[2]; 187 188 dest[0] = gen_rtx_REG (TImode, rdest); 189 src[0] = gen_rtx_REG (TImode, rsrc); 190 dest[1] = gen_rtx_REG (DImode, rdest + 4); 191 src[1] = gen_rtx_REG (DImode, rsrc + 4); 192 193 neon_disambiguate_copy (operands, dest, src, 2); 194}) 195 196(define_split 197 [(set (match_operand:OI 0 "s_register_operand" "") 198 (match_operand:OI 1 "s_register_operand" ""))] 199 "TARGET_NEON && reload_completed" 200 [(set (match_dup 0) (match_dup 1)) 201 (set (match_dup 2) (match_dup 3))] 202{ 203 int rdest = REGNO (operands[0]); 204 int rsrc = REGNO (operands[1]); 205 rtx dest[2], src[2]; 206 207 dest[0] = gen_rtx_REG (TImode, rdest); 208 src[0] = gen_rtx_REG (TImode, rsrc); 209 dest[1] = gen_rtx_REG (TImode, rdest + 4); 210 src[1] = gen_rtx_REG (TImode, rsrc + 4); 211 212 neon_disambiguate_copy (operands, dest, src, 2); 213}) 214 215(define_split 216 [(set (match_operand:CI 0 "s_register_operand" "") 217 (match_operand:CI 1 "s_register_operand" ""))] 218 "TARGET_NEON && reload_completed" 219 [(set (match_dup 0) (match_dup 1)) 220 (set (match_dup 2) (match_dup 3)) 221 (set (match_dup 4) (match_dup 5))] 222{ 223 int rdest = REGNO (operands[0]); 224 int rsrc = REGNO (operands[1]); 225 rtx dest[3], src[3]; 226 227 dest[0] = gen_rtx_REG (TImode, rdest); 228 src[0] = gen_rtx_REG (TImode, rsrc); 229 dest[1] = gen_rtx_REG (TImode, rdest + 4); 230 src[1] = gen_rtx_REG (TImode, rsrc + 4); 231 dest[2] = gen_rtx_REG (TImode, rdest + 8); 232 src[2] = gen_rtx_REG (TImode, rsrc + 8); 233 234 neon_disambiguate_copy (operands, dest, src, 3); 235}) 236 237(define_split 238 [(set (match_operand:XI 0 "s_register_operand" "") 239 (match_operand:XI 1 "s_register_operand" ""))] 240 "TARGET_NEON && reload_completed" 241 [(set (match_dup 0) (match_dup 1)) 242 (set (match_dup 2) (match_dup 3)) 243 (set (match_dup 4) (match_dup 5)) 244 (set (match_dup 6) (match_dup 7))] 245{ 246 int rdest = REGNO (operands[0]); 247 int rsrc = REGNO (operands[1]); 248 rtx dest[4], src[4]; 249 250 dest[0] = gen_rtx_REG (TImode, rdest); 251 src[0] = gen_rtx_REG (TImode, rsrc); 252 dest[1] = gen_rtx_REG (TImode, rdest + 4); 253 src[1] = gen_rtx_REG (TImode, rsrc + 4); 254 dest[2] = gen_rtx_REG (TImode, rdest + 8); 255 src[2] = gen_rtx_REG (TImode, rsrc + 8); 256 dest[3] = gen_rtx_REG (TImode, rdest + 12); 257 src[3] = gen_rtx_REG (TImode, rsrc + 12); 258 259 neon_disambiguate_copy (operands, dest, src, 4); 260}) 261 262(define_expand "movmisalign<mode>" 263 [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand") 264 (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")] 265 UNSPEC_MISALIGNED_ACCESS))] 266 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access" 267{ 268 rtx adjust_mem; 269 /* This pattern is not permitted to fail during expansion: if both arguments 270 are non-registers (e.g. memory := constant, which can be created by the 271 auto-vectorizer), force operand 1 into a register. */ 272 if (!s_register_operand (operands[0], <MODE>mode) 273 && !s_register_operand (operands[1], <MODE>mode)) 274 operands[1] = force_reg (<MODE>mode, operands[1]); 275 276 if (s_register_operand (operands[0], <MODE>mode)) 277 adjust_mem = operands[1]; 278 else 279 adjust_mem = operands[0]; 280 281 /* Legitimize address. */ 282 if (!neon_vector_mem_operand (adjust_mem, 2, true)) 283 XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0)); 284 285}) 286 287(define_insn "*movmisalign<mode>_neon_store" 288 [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um") 289 (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")] 290 UNSPEC_MISALIGNED_ACCESS))] 291 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access" 292 "vst1.<V_sz_elem>\t{%P1}, %A0" 293 [(set_attr "type" "neon_store1_1reg<q>")]) 294 295(define_insn "*movmisalign<mode>_neon_load" 296 [(set (match_operand:VDX 0 "s_register_operand" "=w") 297 (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand" 298 " Um")] 299 UNSPEC_MISALIGNED_ACCESS))] 300 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access" 301 "vld1.<V_sz_elem>\t{%P0}, %A1" 302 [(set_attr "type" "neon_load1_1reg<q>")]) 303 304(define_insn "*movmisalign<mode>_neon_store" 305 [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um") 306 (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")] 307 UNSPEC_MISALIGNED_ACCESS))] 308 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access" 309 "vst1.<V_sz_elem>\t{%q1}, %A0" 310 [(set_attr "type" "neon_store1_1reg<q>")]) 311 312(define_insn "*movmisalign<mode>_neon_load" 313 [(set (match_operand:VQX 0 "s_register_operand" "=w") 314 (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand" 315 " Um")] 316 UNSPEC_MISALIGNED_ACCESS))] 317 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access" 318 "vld1.<V_sz_elem>\t{%q0}, %A1" 319 [(set_attr "type" "neon_load1_1reg<q>")]) 320 321(define_insn "vec_set<mode>_internal" 322 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w,w") 323 (vec_merge:VD_LANE 324 (vec_duplicate:VD_LANE 325 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r")) 326 (match_operand:VD_LANE 3 "s_register_operand" "0,0") 327 (match_operand:SI 2 "immediate_operand" "i,i")))] 328 "TARGET_NEON" 329{ 330 int elt = ffs ((int) INTVAL (operands[2])) - 1; 331 if (BYTES_BIG_ENDIAN) 332 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt; 333 operands[2] = GEN_INT (elt); 334 335 if (which_alternative == 0) 336 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1"; 337 else 338 return "vmov.<V_sz_elem>\t%P0[%c2], %1"; 339} 340 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]) 341 342(define_insn "vec_set<mode>_internal" 343 [(set (match_operand:VQ2 0 "s_register_operand" "=w,w") 344 (vec_merge:VQ2 345 (vec_duplicate:VQ2 346 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r")) 347 (match_operand:VQ2 3 "s_register_operand" "0,0") 348 (match_operand:SI 2 "immediate_operand" "i,i")))] 349 "TARGET_NEON" 350{ 351 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1; 352 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2; 353 int elt = elem % half_elts; 354 int hi = (elem / half_elts) * 2; 355 int regno = REGNO (operands[0]); 356 357 if (BYTES_BIG_ENDIAN) 358 elt = half_elts - 1 - elt; 359 360 operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi); 361 operands[2] = GEN_INT (elt); 362 363 if (which_alternative == 0) 364 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1"; 365 else 366 return "vmov.<V_sz_elem>\t%P0[%c2], %1"; 367} 368 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")] 369) 370 371(define_insn "vec_setv2di_internal" 372 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w") 373 (vec_merge:V2DI 374 (vec_duplicate:V2DI 375 (match_operand:DI 1 "nonimmediate_operand" "Um,r")) 376 (match_operand:V2DI 3 "s_register_operand" "0,0") 377 (match_operand:SI 2 "immediate_operand" "i,i")))] 378 "TARGET_NEON" 379{ 380 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1; 381 int regno = REGNO (operands[0]) + 2 * elem; 382 383 operands[0] = gen_rtx_REG (DImode, regno); 384 385 if (which_alternative == 0) 386 return "vld1.64\t%P0, %A1"; 387 else 388 return "vmov\t%P0, %Q1, %R1"; 389} 390 [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")] 391) 392 393(define_expand "vec_set<mode>" 394 [(match_operand:VDQ 0 "s_register_operand" "") 395 (match_operand:<V_elem> 1 "s_register_operand" "") 396 (match_operand:SI 2 "immediate_operand" "")] 397 "TARGET_NEON" 398{ 399 HOST_WIDE_INT elem = HOST_WIDE_INT_1 << INTVAL (operands[2]); 400 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1], 401 GEN_INT (elem), operands[0])); 402 DONE; 403}) 404 405(define_insn "vec_extract<mode><V_elem_l>" 406 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r") 407 (vec_select:<V_elem> 408 (match_operand:VD_LANE 1 "s_register_operand" "w,w") 409 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))] 410 "TARGET_NEON" 411{ 412 if (BYTES_BIG_ENDIAN) 413 { 414 int elt = INTVAL (operands[2]); 415 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt; 416 operands[2] = GEN_INT (elt); 417 } 418 419 if (which_alternative == 0) 420 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0"; 421 else 422 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]"; 423} 424 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")] 425) 426 427(define_insn "vec_extract<mode><V_elem_l>" 428 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r") 429 (vec_select:<V_elem> 430 (match_operand:VQ2 1 "s_register_operand" "w,w") 431 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))] 432 "TARGET_NEON" 433{ 434 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2; 435 int elt = INTVAL (operands[2]) % half_elts; 436 int hi = (INTVAL (operands[2]) / half_elts) * 2; 437 int regno = REGNO (operands[1]); 438 439 if (BYTES_BIG_ENDIAN) 440 elt = half_elts - 1 - elt; 441 442 operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi); 443 operands[2] = GEN_INT (elt); 444 445 if (which_alternative == 0) 446 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0"; 447 else 448 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]"; 449} 450 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")] 451) 452 453(define_insn "vec_extractv2didi" 454 [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r") 455 (vec_select:DI 456 (match_operand:V2DI 1 "s_register_operand" "w,w") 457 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))] 458 "TARGET_NEON" 459{ 460 int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]); 461 462 operands[1] = gen_rtx_REG (DImode, regno); 463 464 if (which_alternative == 0) 465 return "vst1.64\t{%P1}, %A0 @ v2di"; 466 else 467 return "vmov\t%Q0, %R0, %P1 @ v2di"; 468} 469 [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")] 470) 471 472(define_expand "vec_init<mode><V_elem_l>" 473 [(match_operand:VDQ 0 "s_register_operand" "") 474 (match_operand 1 "" "")] 475 "TARGET_NEON" 476{ 477 neon_expand_vector_init (operands[0], operands[1]); 478 DONE; 479}) 480 481;; Doubleword and quadword arithmetic. 482 483;; NOTE: some other instructions also support 64-bit integer 484;; element size, which we could potentially use for "long long" operations. 485 486(define_insn "*add<mode>3_neon" 487 [(set (match_operand:VDQ 0 "s_register_operand" "=w") 488 (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w") 489 (match_operand:VDQ 2 "s_register_operand" "w")))] 490 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" 491 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 492 [(set (attr "type") 493 (if_then_else (match_test "<Is_float_mode>") 494 (const_string "neon_fp_addsub_s<q>") 495 (const_string "neon_add<q>")))] 496) 497 498;; As with SFmode, full support for HFmode vector arithmetic is only available 499;; when flag-unsafe-math-optimizations is enabled. 500 501(define_insn "add<mode>3" 502 [(set 503 (match_operand:VH 0 "s_register_operand" "=w") 504 (plus:VH 505 (match_operand:VH 1 "s_register_operand" "w") 506 (match_operand:VH 2 "s_register_operand" "w")))] 507 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations" 508 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 509 [(set (attr "type") 510 (if_then_else (match_test "<Is_float_mode>") 511 (const_string "neon_fp_addsub_s<q>") 512 (const_string "neon_add<q>")))] 513) 514 515(define_insn "add<mode>3_fp16" 516 [(set 517 (match_operand:VH 0 "s_register_operand" "=w") 518 (plus:VH 519 (match_operand:VH 1 "s_register_operand" "w") 520 (match_operand:VH 2 "s_register_operand" "w")))] 521 "TARGET_NEON_FP16INST" 522 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 523 [(set (attr "type") 524 (if_then_else (match_test "<Is_float_mode>") 525 (const_string "neon_fp_addsub_s<q>") 526 (const_string "neon_add<q>")))] 527) 528 529(define_insn "adddi3_neon" 530 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r") 531 (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r") 532 (match_operand:DI 2 "arm_adddi_operand" "w,r,0,w,r,Dd,Dd"))) 533 (clobber (reg:CC CC_REGNUM))] 534 "TARGET_NEON" 535{ 536 switch (which_alternative) 537 { 538 case 0: /* fall through */ 539 case 3: return "vadd.i64\t%P0, %P1, %P2"; 540 case 1: return "#"; 541 case 2: return "#"; 542 case 4: return "#"; 543 case 5: return "#"; 544 case 6: return "#"; 545 default: gcc_unreachable (); 546 } 547} 548 [(set_attr "type" "neon_add,multiple,multiple,neon_add,\ 549 multiple,multiple,multiple") 550 (set_attr "conds" "*,clob,clob,*,clob,clob,clob") 551 (set_attr "length" "*,8,8,*,8,8,8") 552 (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")] 553) 554 555(define_insn "*sub<mode>3_neon" 556 [(set (match_operand:VDQ 0 "s_register_operand" "=w") 557 (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w") 558 (match_operand:VDQ 2 "s_register_operand" "w")))] 559 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" 560 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 561 [(set (attr "type") 562 (if_then_else (match_test "<Is_float_mode>") 563 (const_string "neon_fp_addsub_s<q>") 564 (const_string "neon_sub<q>")))] 565) 566 567(define_insn "sub<mode>3" 568 [(set 569 (match_operand:VH 0 "s_register_operand" "=w") 570 (minus:VH 571 (match_operand:VH 1 "s_register_operand" "w") 572 (match_operand:VH 2 "s_register_operand" "w")))] 573 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations" 574 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 575 [(set_attr "type" "neon_sub<q>")] 576) 577 578(define_insn "sub<mode>3_fp16" 579 [(set 580 (match_operand:VH 0 "s_register_operand" "=w") 581 (minus:VH 582 (match_operand:VH 1 "s_register_operand" "w") 583 (match_operand:VH 2 "s_register_operand" "w")))] 584 "TARGET_NEON_FP16INST" 585 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 586 [(set_attr "type" "neon_sub<q>")] 587) 588 589(define_insn "subdi3_neon" 590 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w") 591 (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w") 592 (match_operand:DI 2 "s_register_operand" "w,r,0,0,w"))) 593 (clobber (reg:CC CC_REGNUM))] 594 "TARGET_NEON" 595{ 596 switch (which_alternative) 597 { 598 case 0: /* fall through */ 599 case 4: return "vsub.i64\t%P0, %P1, %P2"; 600 case 1: /* fall through */ 601 case 2: /* fall through */ 602 case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2"; 603 default: gcc_unreachable (); 604 } 605} 606 [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub") 607 (set_attr "conds" "*,clob,clob,clob,*") 608 (set_attr "length" "*,8,8,8,*") 609 (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")] 610) 611 612(define_insn "*mul<mode>3_neon" 613 [(set (match_operand:VDQW 0 "s_register_operand" "=w") 614 (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w") 615 (match_operand:VDQW 2 "s_register_operand" "w")))] 616 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" 617 "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 618 [(set (attr "type") 619 (if_then_else (match_test "<Is_float_mode>") 620 (const_string "neon_fp_mul_s<q>") 621 (const_string "neon_mul_<V_elem_ch><q>")))] 622) 623 624(define_insn "mul<mode>3add<mode>_neon" 625 [(set (match_operand:VDQW 0 "s_register_operand" "=w") 626 (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w") 627 (match_operand:VDQW 3 "s_register_operand" "w")) 628 (match_operand:VDQW 1 "s_register_operand" "0")))] 629 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" 630 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" 631 [(set (attr "type") 632 (if_then_else (match_test "<Is_float_mode>") 633 (const_string "neon_fp_mla_s<q>") 634 (const_string "neon_mla_<V_elem_ch><q>")))] 635) 636 637(define_insn "mul<mode>3add<mode>_neon" 638 [(set (match_operand:VH 0 "s_register_operand" "=w") 639 (plus:VH (mult:VH (match_operand:VH 2 "s_register_operand" "w") 640 (match_operand:VH 3 "s_register_operand" "w")) 641 (match_operand:VH 1 "s_register_operand" "0")))] 642 "TARGET_NEON_FP16INST && (!<Is_float_mode> || flag_unsafe_math_optimizations)" 643 "vmla.f16\t%<V_reg>0, %<V_reg>2, %<V_reg>3" 644 [(set_attr "type" "neon_fp_mla_s<q>")] 645) 646 647(define_insn "mul<mode>3neg<mode>add<mode>_neon" 648 [(set (match_operand:VDQW 0 "s_register_operand" "=w") 649 (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0") 650 (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w") 651 (match_operand:VDQW 3 "s_register_operand" "w"))))] 652 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" 653 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" 654 [(set (attr "type") 655 (if_then_else (match_test "<Is_float_mode>") 656 (const_string "neon_fp_mla_s<q>") 657 (const_string "neon_mla_<V_elem_ch><q>")))] 658) 659 660;; Fused multiply-accumulate 661;; We define each insn twice here: 662;; 1: with flag_unsafe_math_optimizations for the widening multiply phase 663;; to be able to use when converting to FMA. 664;; 2: without flag_unsafe_math_optimizations for the intrinsics to use. 665(define_insn "fma<VCVTF:mode>4" 666 [(set (match_operand:VCVTF 0 "register_operand" "=w") 667 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w") 668 (match_operand:VCVTF 2 "register_operand" "w") 669 (match_operand:VCVTF 3 "register_operand" "0")))] 670 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations" 671 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 672 [(set_attr "type" "neon_fp_mla_s<q>")] 673) 674 675(define_insn "fma<VCVTF:mode>4_intrinsic" 676 [(set (match_operand:VCVTF 0 "register_operand" "=w") 677 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w") 678 (match_operand:VCVTF 2 "register_operand" "w") 679 (match_operand:VCVTF 3 "register_operand" "0")))] 680 "TARGET_NEON && TARGET_FMA" 681 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 682 [(set_attr "type" "neon_fp_mla_s<q>")] 683) 684 685(define_insn "fma<VH:mode>4" 686 [(set (match_operand:VH 0 "register_operand" "=w") 687 (fma:VH 688 (match_operand:VH 1 "register_operand" "w") 689 (match_operand:VH 2 "register_operand" "w") 690 (match_operand:VH 3 "register_operand" "0")))] 691 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations" 692 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 693 [(set_attr "type" "neon_fp_mla_s<q>")] 694) 695 696(define_insn "fma<VH:mode>4_intrinsic" 697 [(set (match_operand:VH 0 "register_operand" "=w") 698 (fma:VH 699 (match_operand:VH 1 "register_operand" "w") 700 (match_operand:VH 2 "register_operand" "w") 701 (match_operand:VH 3 "register_operand" "0")))] 702 "TARGET_NEON_FP16INST" 703 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 704 [(set_attr "type" "neon_fp_mla_s<q>")] 705) 706 707(define_insn "*fmsub<VCVTF:mode>4" 708 [(set (match_operand:VCVTF 0 "register_operand" "=w") 709 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w")) 710 (match_operand:VCVTF 2 "register_operand" "w") 711 (match_operand:VCVTF 3 "register_operand" "0")))] 712 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations" 713 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 714 [(set_attr "type" "neon_fp_mla_s<q>")] 715) 716 717(define_insn "fmsub<VCVTF:mode>4_intrinsic" 718 [(set (match_operand:VCVTF 0 "register_operand" "=w") 719 (fma:VCVTF 720 (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w")) 721 (match_operand:VCVTF 2 "register_operand" "w") 722 (match_operand:VCVTF 3 "register_operand" "0")))] 723 "TARGET_NEON && TARGET_FMA" 724 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 725 [(set_attr "type" "neon_fp_mla_s<q>")] 726) 727 728(define_insn "fmsub<VH:mode>4_intrinsic" 729 [(set (match_operand:VH 0 "register_operand" "=w") 730 (fma:VH 731 (neg:VH (match_operand:VH 1 "register_operand" "w")) 732 (match_operand:VH 2 "register_operand" "w") 733 (match_operand:VH 3 "register_operand" "0")))] 734 "TARGET_NEON_FP16INST" 735 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 736 [(set_attr "type" "neon_fp_mla_s<q>")] 737) 738 739(define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>" 740 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 741 (unspec:VCVTF [(match_operand:VCVTF 1 742 "s_register_operand" "w")] 743 NEON_VRINT))] 744 "TARGET_NEON && TARGET_VFP5" 745 "vrint<nvrint_variant>.f32\\t%<V_reg>0, %<V_reg>1" 746 [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")] 747) 748 749(define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>" 750 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w") 751 (FIXUORS:<V_cmp_result> (unspec:VCVTF 752 [(match_operand:VCVTF 1 "register_operand" "w")] 753 NEON_VCVT)))] 754 "TARGET_NEON && TARGET_VFP5" 755 "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1" 756 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>") 757 (set_attr "predicable" "no")] 758) 759 760(define_insn "ior<mode>3" 761 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w") 762 (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0") 763 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))] 764 "TARGET_NEON" 765{ 766 switch (which_alternative) 767 { 768 case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2"; 769 case 1: return neon_output_logic_immediate ("vorr", &operands[2], 770 <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode)); 771 default: gcc_unreachable (); 772 } 773} 774 [(set_attr "type" "neon_logic<q>")] 775) 776 777;; The concrete forms of the Neon immediate-logic instructions are vbic and 778;; vorr. We support the pseudo-instruction vand instead, because that 779;; corresponds to the canonical form the middle-end expects to use for 780;; immediate bitwise-ANDs. 781 782(define_insn "and<mode>3" 783 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w") 784 (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0") 785 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))] 786 "TARGET_NEON" 787{ 788 switch (which_alternative) 789 { 790 case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2"; 791 case 1: return neon_output_logic_immediate ("vand", &operands[2], 792 <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode)); 793 default: gcc_unreachable (); 794 } 795} 796 [(set_attr "type" "neon_logic<q>")] 797) 798 799(define_insn "orn<mode>3_neon" 800 [(set (match_operand:VDQ 0 "s_register_operand" "=w") 801 (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w")) 802 (match_operand:VDQ 1 "s_register_operand" "w")))] 803 "TARGET_NEON" 804 "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 805 [(set_attr "type" "neon_logic<q>")] 806) 807 808;; TODO: investigate whether we should disable 809;; this and bicdi3_neon for the A8 in line with the other 810;; changes above. 811(define_insn_and_split "orndi3_neon" 812 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r") 813 (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r")) 814 (match_operand:DI 1 "s_register_operand" "w,r,r,0")))] 815 "TARGET_NEON" 816 "@ 817 vorn\t%P0, %P1, %P2 818 # 819 # 820 #" 821 "reload_completed && 822 (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))" 823 [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1))) 824 (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))] 825 " 826 { 827 if (TARGET_THUMB2) 828 { 829 operands[3] = gen_highpart (SImode, operands[0]); 830 operands[0] = gen_lowpart (SImode, operands[0]); 831 operands[4] = gen_highpart (SImode, operands[2]); 832 operands[2] = gen_lowpart (SImode, operands[2]); 833 operands[5] = gen_highpart (SImode, operands[1]); 834 operands[1] = gen_lowpart (SImode, operands[1]); 835 } 836 else 837 { 838 emit_insn (gen_one_cmpldi2 (operands[0], operands[2])); 839 emit_insn (gen_iordi3 (operands[0], operands[1], operands[0])); 840 DONE; 841 } 842 }" 843 [(set_attr "type" "neon_logic,multiple,multiple,multiple") 844 (set_attr "length" "*,16,8,8") 845 (set_attr "arch" "any,a,t2,t2")] 846) 847 848(define_insn "bic<mode>3_neon" 849 [(set (match_operand:VDQ 0 "s_register_operand" "=w") 850 (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w")) 851 (match_operand:VDQ 1 "s_register_operand" "w")))] 852 "TARGET_NEON" 853 "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 854 [(set_attr "type" "neon_logic<q>")] 855) 856 857;; Compare to *anddi_notdi_di. 858(define_insn "bicdi3_neon" 859 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r") 860 (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0")) 861 (match_operand:DI 1 "s_register_operand" "w,0,r")))] 862 "TARGET_NEON" 863 "@ 864 vbic\t%P0, %P1, %P2 865 # 866 #" 867 [(set_attr "type" "neon_logic,multiple,multiple") 868 (set_attr "length" "*,8,8")] 869) 870 871(define_insn "xor<mode>3" 872 [(set (match_operand:VDQ 0 "s_register_operand" "=w") 873 (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w") 874 (match_operand:VDQ 2 "s_register_operand" "w")))] 875 "TARGET_NEON" 876 "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 877 [(set_attr "type" "neon_logic<q>")] 878) 879 880(define_insn "one_cmpl<mode>2" 881 [(set (match_operand:VDQ 0 "s_register_operand" "=w") 882 (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))] 883 "TARGET_NEON" 884 "vmvn\t%<V_reg>0, %<V_reg>1" 885 [(set_attr "type" "neon_move<q>")] 886) 887 888(define_insn "abs<mode>2" 889 [(set (match_operand:VDQW 0 "s_register_operand" "=w") 890 (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))] 891 "TARGET_NEON" 892 "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1" 893 [(set (attr "type") 894 (if_then_else (match_test "<Is_float_mode>") 895 (const_string "neon_fp_abs_s<q>") 896 (const_string "neon_abs<q>")))] 897) 898 899(define_insn "neg<mode>2" 900 [(set (match_operand:VDQW 0 "s_register_operand" "=w") 901 (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))] 902 "TARGET_NEON" 903 "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1" 904 [(set (attr "type") 905 (if_then_else (match_test "<Is_float_mode>") 906 (const_string "neon_fp_neg_s<q>") 907 (const_string "neon_neg<q>")))] 908) 909 910(define_insn "negdi2_neon" 911 [(set (match_operand:DI 0 "s_register_operand" "=&w, w,r,&r") 912 (neg:DI (match_operand:DI 1 "s_register_operand" " w, w,0, r"))) 913 (clobber (match_scratch:DI 2 "= X,&w,X, X")) 914 (clobber (reg:CC CC_REGNUM))] 915 "TARGET_NEON" 916 "#" 917 [(set_attr "length" "8") 918 (set_attr "type" "multiple")] 919) 920 921; Split negdi2_neon for vfp registers 922(define_split 923 [(set (match_operand:DI 0 "s_register_operand" "") 924 (neg:DI (match_operand:DI 1 "s_register_operand" ""))) 925 (clobber (match_scratch:DI 2 "")) 926 (clobber (reg:CC CC_REGNUM))] 927 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))" 928 [(set (match_dup 2) (const_int 0)) 929 (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1))) 930 (clobber (reg:CC CC_REGNUM))])] 931 { 932 if (!REG_P (operands[2])) 933 operands[2] = operands[0]; 934 } 935) 936 937; Split negdi2_neon for core registers 938(define_split 939 [(set (match_operand:DI 0 "s_register_operand" "") 940 (neg:DI (match_operand:DI 1 "s_register_operand" ""))) 941 (clobber (match_scratch:DI 2 "")) 942 (clobber (reg:CC CC_REGNUM))] 943 "TARGET_32BIT && reload_completed 944 && arm_general_register_operand (operands[0], DImode)" 945 [(parallel [(set (match_dup 0) (neg:DI (match_dup 1))) 946 (clobber (reg:CC CC_REGNUM))])] 947 "" 948) 949 950(define_insn "<absneg_str><mode>2" 951 [(set (match_operand:VH 0 "s_register_operand" "=w") 952 (ABSNEG:VH (match_operand:VH 1 "s_register_operand" "w")))] 953 "TARGET_NEON_FP16INST" 954 "v<absneg_str>.<V_s_elem>\t%<V_reg>0, %<V_reg>1" 955 [(set_attr "type" "neon_abs<q>")] 956) 957 958(define_expand "neon_v<absneg_str><mode>" 959 [(set 960 (match_operand:VH 0 "s_register_operand") 961 (ABSNEG:VH (match_operand:VH 1 "s_register_operand")))] 962 "TARGET_NEON_FP16INST" 963{ 964 emit_insn (gen_<absneg_str><mode>2 (operands[0], operands[1])); 965 DONE; 966}) 967 968(define_insn "neon_v<fp16_rnd_str><mode>" 969 [(set (match_operand:VH 0 "s_register_operand" "=w") 970 (unspec:VH 971 [(match_operand:VH 1 "s_register_operand" "w")] 972 FP16_RND))] 973 "TARGET_NEON_FP16INST" 974 "<fp16_rnd_insn>.<V_s_elem>\t%<V_reg>0, %<V_reg>1" 975 [(set_attr "type" "neon_fp_round_s<q>")] 976) 977 978(define_insn "neon_vrsqrte<mode>" 979 [(set (match_operand:VH 0 "s_register_operand" "=w") 980 (unspec:VH 981 [(match_operand:VH 1 "s_register_operand" "w")] 982 UNSPEC_VRSQRTE))] 983 "TARGET_NEON_FP16INST" 984 "vrsqrte.f16\t%<V_reg>0, %<V_reg>1" 985 [(set_attr "type" "neon_fp_rsqrte_s<q>")] 986) 987 988(define_insn "*umin<mode>3_neon" 989 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 990 (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w") 991 (match_operand:VDQIW 2 "s_register_operand" "w")))] 992 "TARGET_NEON" 993 "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 994 [(set_attr "type" "neon_minmax<q>")] 995) 996 997(define_insn "*umax<mode>3_neon" 998 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 999 (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w") 1000 (match_operand:VDQIW 2 "s_register_operand" "w")))] 1001 "TARGET_NEON" 1002 "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 1003 [(set_attr "type" "neon_minmax<q>")] 1004) 1005 1006(define_insn "*smin<mode>3_neon" 1007 [(set (match_operand:VDQW 0 "s_register_operand" "=w") 1008 (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w") 1009 (match_operand:VDQW 2 "s_register_operand" "w")))] 1010 "TARGET_NEON" 1011 "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 1012 [(set (attr "type") 1013 (if_then_else (match_test "<Is_float_mode>") 1014 (const_string "neon_fp_minmax_s<q>") 1015 (const_string "neon_minmax<q>")))] 1016) 1017 1018(define_insn "*smax<mode>3_neon" 1019 [(set (match_operand:VDQW 0 "s_register_operand" "=w") 1020 (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w") 1021 (match_operand:VDQW 2 "s_register_operand" "w")))] 1022 "TARGET_NEON" 1023 "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 1024 [(set (attr "type") 1025 (if_then_else (match_test "<Is_float_mode>") 1026 (const_string "neon_fp_minmax_s<q>") 1027 (const_string "neon_minmax<q>")))] 1028) 1029 1030; TODO: V2DI shifts are current disabled because there are bugs in the 1031; generic vectorizer code. It ends up creating a V2DI constructor with 1032; SImode elements. 1033 1034(define_insn "vashl<mode>3" 1035 [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w") 1036 (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w") 1037 (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dm")))] 1038 "TARGET_NEON" 1039 { 1040 switch (which_alternative) 1041 { 1042 case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"; 1043 case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2], 1044 <MODE>mode, 1045 VALID_NEON_QREG_MODE (<MODE>mode), 1046 true); 1047 default: gcc_unreachable (); 1048 } 1049 } 1050 [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")] 1051) 1052 1053(define_insn "vashr<mode>3_imm" 1054 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 1055 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w") 1056 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dm")))] 1057 "TARGET_NEON" 1058 { 1059 return neon_output_shift_immediate ("vshr", 's', &operands[2], 1060 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode), 1061 false); 1062 } 1063 [(set_attr "type" "neon_shift_imm<q>")] 1064) 1065 1066(define_insn "vlshr<mode>3_imm" 1067 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 1068 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w") 1069 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dm")))] 1070 "TARGET_NEON" 1071 { 1072 return neon_output_shift_immediate ("vshr", 'u', &operands[2], 1073 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode), 1074 false); 1075 } 1076 [(set_attr "type" "neon_shift_imm<q>")] 1077) 1078 1079; Used for implementing logical shift-right, which is a left-shift by a negative 1080; amount, with signed operands. This is essentially the same as ashl<mode>3 1081; above, but using an unspec in case GCC tries anything tricky with negative 1082; shift amounts. 1083 1084(define_insn "ashl<mode>3_signed" 1085 [(set (match_operand:VDQI 0 "s_register_operand" "=w") 1086 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w") 1087 (match_operand:VDQI 2 "s_register_operand" "w")] 1088 UNSPEC_ASHIFT_SIGNED))] 1089 "TARGET_NEON" 1090 "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 1091 [(set_attr "type" "neon_shift_reg<q>")] 1092) 1093 1094; Used for implementing logical shift-right, which is a left-shift by a negative 1095; amount, with unsigned operands. 1096 1097(define_insn "ashl<mode>3_unsigned" 1098 [(set (match_operand:VDQI 0 "s_register_operand" "=w") 1099 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w") 1100 (match_operand:VDQI 2 "s_register_operand" "w")] 1101 UNSPEC_ASHIFT_UNSIGNED))] 1102 "TARGET_NEON" 1103 "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 1104 [(set_attr "type" "neon_shift_reg<q>")] 1105) 1106 1107(define_expand "vashr<mode>3" 1108 [(set (match_operand:VDQIW 0 "s_register_operand" "") 1109 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "") 1110 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))] 1111 "TARGET_NEON" 1112{ 1113 if (s_register_operand (operands[2], <MODE>mode)) 1114 { 1115 rtx neg = gen_reg_rtx (<MODE>mode); 1116 emit_insn (gen_neg<mode>2 (neg, operands[2])); 1117 emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg)); 1118 } 1119 else 1120 emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2])); 1121 DONE; 1122}) 1123 1124(define_expand "vlshr<mode>3" 1125 [(set (match_operand:VDQIW 0 "s_register_operand" "") 1126 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "") 1127 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))] 1128 "TARGET_NEON" 1129{ 1130 if (s_register_operand (operands[2], <MODE>mode)) 1131 { 1132 rtx neg = gen_reg_rtx (<MODE>mode); 1133 emit_insn (gen_neg<mode>2 (neg, operands[2])); 1134 emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg)); 1135 } 1136 else 1137 emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2])); 1138 DONE; 1139}) 1140 1141;; 64-bit shifts 1142 1143;; This pattern loads a 32-bit shift count into a 64-bit NEON register, 1144;; leaving the upper half uninitalized. This is OK since the shift 1145;; instruction only looks at the low 8 bits anyway. To avoid confusing 1146;; data flow analysis however, we pretend the full register is set 1147;; using an unspec. 1148(define_insn "neon_load_count" 1149 [(set (match_operand:DI 0 "s_register_operand" "=w,w") 1150 (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")] 1151 UNSPEC_LOAD_COUNT))] 1152 "TARGET_NEON" 1153 "@ 1154 vld1.32\t{%P0[0]}, %A1 1155 vmov.32\t%P0[0], %1" 1156 [(set_attr "type" "neon_load1_1reg,neon_from_gp")] 1157) 1158 1159(define_insn "ashldi3_neon_noclobber" 1160 [(set (match_operand:DI 0 "s_register_operand" "=w,w") 1161 (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w") 1162 (match_operand:DI 2 "reg_or_int_operand" " i,w")))] 1163 "TARGET_NEON && reload_completed 1164 && (!CONST_INT_P (operands[2]) 1165 || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))" 1166 "@ 1167 vshl.u64\t%P0, %P1, %2 1168 vshl.u64\t%P0, %P1, %P2" 1169 [(set_attr "type" "neon_shift_imm, neon_shift_reg")] 1170) 1171 1172(define_insn_and_split "ashldi3_neon" 1173 [(set (match_operand:DI 0 "s_register_operand" "= w, w, &r, r, &r, ?w,?w") 1174 (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r, 0w, w") 1175 (match_operand:SI 2 "general_operand" "rUm, i, r, i, i,rUm, i"))) 1176 (clobber (match_scratch:SI 3 "= X, X, &r, X, X, X, X")) 1177 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X, X")) 1178 (clobber (match_scratch:DI 5 "=&w, X, X, X, X, &w, X")) 1179 (clobber (reg:CC_C CC_REGNUM))] 1180 "TARGET_NEON" 1181 "#" 1182 "TARGET_NEON && reload_completed" 1183 [(const_int 0)] 1184 " 1185 { 1186 if (IS_VFP_REGNUM (REGNO (operands[0]))) 1187 { 1188 if (CONST_INT_P (operands[2])) 1189 { 1190 if (INTVAL (operands[2]) < 1) 1191 { 1192 emit_insn (gen_movdi (operands[0], operands[1])); 1193 DONE; 1194 } 1195 else if (INTVAL (operands[2]) > 63) 1196 operands[2] = gen_rtx_CONST_INT (VOIDmode, 63); 1197 } 1198 else 1199 { 1200 emit_insn (gen_neon_load_count (operands[5], operands[2])); 1201 operands[2] = operands[5]; 1202 } 1203 1204 /* Ditch the unnecessary clobbers. */ 1205 emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1], 1206 operands[2])); 1207 } 1208 else 1209 { 1210 /* The shift expanders support either full overlap or no overlap. */ 1211 gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1]) 1212 || REGNO (operands[0]) == REGNO (operands[1])); 1213 1214 arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1], 1215 operands[2], operands[3], operands[4]); 1216 } 1217 DONE; 1218 }" 1219 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits") 1220 (set_attr "opt" "*,*,speed,speed,speed,*,*") 1221 (set_attr "type" "multiple")] 1222) 1223 1224; The shift amount needs to be negated for right-shifts 1225(define_insn "signed_shift_di3_neon" 1226 [(set (match_operand:DI 0 "s_register_operand" "=w") 1227 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w") 1228 (match_operand:DI 2 "s_register_operand" " w")] 1229 UNSPEC_ASHIFT_SIGNED))] 1230 "TARGET_NEON && reload_completed" 1231 "vshl.s64\t%P0, %P1, %P2" 1232 [(set_attr "type" "neon_shift_reg")] 1233) 1234 1235; The shift amount needs to be negated for right-shifts 1236(define_insn "unsigned_shift_di3_neon" 1237 [(set (match_operand:DI 0 "s_register_operand" "=w") 1238 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w") 1239 (match_operand:DI 2 "s_register_operand" " w")] 1240 UNSPEC_ASHIFT_UNSIGNED))] 1241 "TARGET_NEON && reload_completed" 1242 "vshl.u64\t%P0, %P1, %P2" 1243 [(set_attr "type" "neon_shift_reg")] 1244) 1245 1246(define_insn "ashrdi3_neon_imm_noclobber" 1247 [(set (match_operand:DI 0 "s_register_operand" "=w") 1248 (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w") 1249 (match_operand:DI 2 "const_int_operand" " i")))] 1250 "TARGET_NEON && reload_completed 1251 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64" 1252 "vshr.s64\t%P0, %P1, %2" 1253 [(set_attr "type" "neon_shift_imm")] 1254) 1255 1256(define_insn "lshrdi3_neon_imm_noclobber" 1257 [(set (match_operand:DI 0 "s_register_operand" "=w") 1258 (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w") 1259 (match_operand:DI 2 "const_int_operand" " i")))] 1260 "TARGET_NEON && reload_completed 1261 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64" 1262 "vshr.u64\t%P0, %P1, %2" 1263 [(set_attr "type" "neon_shift_imm")] 1264) 1265 1266;; ashrdi3_neon 1267;; lshrdi3_neon 1268(define_insn_and_split "<shift>di3_neon" 1269 [(set (match_operand:DI 0 "s_register_operand" "= w, w, &r, r, &r,?w,?w") 1270 (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r,0w, w") 1271 (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, i, r, i"))) 1272 (clobber (match_scratch:SI 3 "=2r, X, &r, X, X,2r, X")) 1273 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X, X")) 1274 (clobber (match_scratch:DI 5 "=&w, X, X, X, X,&w, X")) 1275 (clobber (reg:CC CC_REGNUM))] 1276 "TARGET_NEON" 1277 "#" 1278 "TARGET_NEON && reload_completed" 1279 [(const_int 0)] 1280 " 1281 { 1282 if (IS_VFP_REGNUM (REGNO (operands[0]))) 1283 { 1284 if (CONST_INT_P (operands[2])) 1285 { 1286 if (INTVAL (operands[2]) < 1) 1287 { 1288 emit_insn (gen_movdi (operands[0], operands[1])); 1289 DONE; 1290 } 1291 else if (INTVAL (operands[2]) > 64) 1292 operands[2] = gen_rtx_CONST_INT (VOIDmode, 64); 1293 1294 /* Ditch the unnecessary clobbers. */ 1295 emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0], 1296 operands[1], 1297 operands[2])); 1298 } 1299 else 1300 { 1301 /* We must use a negative left-shift. */ 1302 emit_insn (gen_negsi2 (operands[3], operands[2])); 1303 emit_insn (gen_neon_load_count (operands[5], operands[3])); 1304 emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1], 1305 operands[5])); 1306 } 1307 } 1308 else 1309 { 1310 /* The shift expanders support either full overlap or no overlap. */ 1311 gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1]) 1312 || REGNO (operands[0]) == REGNO (operands[1])); 1313 1314 /* This clobbers CC (ASHIFTRT by register only). */ 1315 arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1], 1316 operands[2], operands[3], operands[4]); 1317 } 1318 1319 DONE; 1320 }" 1321 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits") 1322 (set_attr "opt" "*,*,speed,speed,speed,*,*") 1323 (set_attr "type" "multiple")] 1324) 1325 1326;; Widening operations 1327 1328(define_expand "widen_ssum<mode>3" 1329 [(set (match_operand:<V_double_width> 0 "s_register_operand" "") 1330 (plus:<V_double_width> 1331 (sign_extend:<V_double_width> 1332 (match_operand:VQI 1 "s_register_operand" "")) 1333 (match_operand:<V_double_width> 2 "s_register_operand" "")))] 1334 "TARGET_NEON" 1335 { 1336 machine_mode mode = GET_MODE (operands[1]); 1337 rtx p1, p2; 1338 1339 p1 = arm_simd_vect_par_cnst_half (mode, false); 1340 p2 = arm_simd_vect_par_cnst_half (mode, true); 1341 1342 if (operands[0] != operands[2]) 1343 emit_move_insn (operands[0], operands[2]); 1344 1345 emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0], 1346 operands[1], 1347 p1, 1348 operands[0])); 1349 emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0], 1350 operands[1], 1351 p2, 1352 operands[0])); 1353 DONE; 1354 } 1355) 1356 1357(define_insn "vec_sel_widen_ssum_lo<mode><V_half>3" 1358 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w") 1359 (plus:<V_double_width> 1360 (sign_extend:<V_double_width> 1361 (vec_select:<V_HALF> 1362 (match_operand:VQI 1 "s_register_operand" "%w") 1363 (match_operand:VQI 2 "vect_par_constant_low" ""))) 1364 (match_operand:<V_double_width> 3 "s_register_operand" "0")))] 1365 "TARGET_NEON" 1366{ 1367 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %f1" : 1368 "vaddw.<V_s_elem>\t%q0, %q3, %e1"; 1369} 1370 [(set_attr "type" "neon_add_widen")]) 1371 1372(define_insn "vec_sel_widen_ssum_hi<mode><V_half>3" 1373 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w") 1374 (plus:<V_double_width> 1375 (sign_extend:<V_double_width> 1376 (vec_select:<V_HALF> 1377 (match_operand:VQI 1 "s_register_operand" "%w") 1378 (match_operand:VQI 2 "vect_par_constant_high" ""))) 1379 (match_operand:<V_double_width> 3 "s_register_operand" "0")))] 1380 "TARGET_NEON" 1381{ 1382 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %e1" : 1383 "vaddw.<V_s_elem>\t%q0, %q3, %f1"; 1384} 1385 [(set_attr "type" "neon_add_widen")]) 1386 1387(define_insn "widen_ssum<mode>3" 1388 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 1389 (plus:<V_widen> 1390 (sign_extend:<V_widen> 1391 (match_operand:VW 1 "s_register_operand" "%w")) 1392 (match_operand:<V_widen> 2 "s_register_operand" "w")))] 1393 "TARGET_NEON" 1394 "vaddw.<V_s_elem>\t%q0, %q2, %P1" 1395 [(set_attr "type" "neon_add_widen")] 1396) 1397 1398(define_expand "widen_usum<mode>3" 1399 [(set (match_operand:<V_double_width> 0 "s_register_operand" "") 1400 (plus:<V_double_width> 1401 (zero_extend:<V_double_width> 1402 (match_operand:VQI 1 "s_register_operand" "")) 1403 (match_operand:<V_double_width> 2 "s_register_operand" "")))] 1404 "TARGET_NEON" 1405 { 1406 machine_mode mode = GET_MODE (operands[1]); 1407 rtx p1, p2; 1408 1409 p1 = arm_simd_vect_par_cnst_half (mode, false); 1410 p2 = arm_simd_vect_par_cnst_half (mode, true); 1411 1412 if (operands[0] != operands[2]) 1413 emit_move_insn (operands[0], operands[2]); 1414 1415 emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0], 1416 operands[1], 1417 p1, 1418 operands[0])); 1419 emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0], 1420 operands[1], 1421 p2, 1422 operands[0])); 1423 DONE; 1424 } 1425) 1426 1427(define_insn "vec_sel_widen_usum_lo<mode><V_half>3" 1428 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w") 1429 (plus:<V_double_width> 1430 (zero_extend:<V_double_width> 1431 (vec_select:<V_HALF> 1432 (match_operand:VQI 1 "s_register_operand" "%w") 1433 (match_operand:VQI 2 "vect_par_constant_low" ""))) 1434 (match_operand:<V_double_width> 3 "s_register_operand" "0")))] 1435 "TARGET_NEON" 1436{ 1437 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %f1" : 1438 "vaddw.<V_u_elem>\t%q0, %q3, %e1"; 1439} 1440 [(set_attr "type" "neon_add_widen")]) 1441 1442(define_insn "vec_sel_widen_usum_hi<mode><V_half>3" 1443 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w") 1444 (plus:<V_double_width> 1445 (zero_extend:<V_double_width> 1446 (vec_select:<V_HALF> 1447 (match_operand:VQI 1 "s_register_operand" "%w") 1448 (match_operand:VQI 2 "vect_par_constant_high" ""))) 1449 (match_operand:<V_double_width> 3 "s_register_operand" "0")))] 1450 "TARGET_NEON" 1451{ 1452 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %e1" : 1453 "vaddw.<V_u_elem>\t%q0, %q3, %f1"; 1454} 1455 [(set_attr "type" "neon_add_widen")]) 1456 1457(define_insn "widen_usum<mode>3" 1458 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 1459 (plus:<V_widen> (zero_extend:<V_widen> 1460 (match_operand:VW 1 "s_register_operand" "%w")) 1461 (match_operand:<V_widen> 2 "s_register_operand" "w")))] 1462 "TARGET_NEON" 1463 "vaddw.<V_u_elem>\t%q0, %q2, %P1" 1464 [(set_attr "type" "neon_add_widen")] 1465) 1466 1467;; Helpers for quad-word reduction operations 1468 1469; Add (or smin, smax...) the low N/2 elements of the N-element vector 1470; operand[1] to the high N/2 elements of same. Put the result in operand[0], an 1471; N/2-element vector. 1472 1473(define_insn "quad_halves_<code>v4si" 1474 [(set (match_operand:V2SI 0 "s_register_operand" "=w") 1475 (VQH_OPS:V2SI 1476 (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w") 1477 (parallel [(const_int 0) (const_int 1)])) 1478 (vec_select:V2SI (match_dup 1) 1479 (parallel [(const_int 2) (const_int 3)]))))] 1480 "TARGET_NEON" 1481 "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1" 1482 [(set_attr "vqh_mnem" "<VQH_mnem>") 1483 (set_attr "type" "neon_reduc_<VQH_type>_q")] 1484) 1485 1486(define_insn "quad_halves_<code>v4sf" 1487 [(set (match_operand:V2SF 0 "s_register_operand" "=w") 1488 (VQHS_OPS:V2SF 1489 (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w") 1490 (parallel [(const_int 0) (const_int 1)])) 1491 (vec_select:V2SF (match_dup 1) 1492 (parallel [(const_int 2) (const_int 3)]))))] 1493 "TARGET_NEON && flag_unsafe_math_optimizations" 1494 "<VQH_mnem>.f32\t%P0, %e1, %f1" 1495 [(set_attr "vqh_mnem" "<VQH_mnem>") 1496 (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")] 1497) 1498 1499(define_insn "quad_halves_<code>v8hi" 1500 [(set (match_operand:V4HI 0 "s_register_operand" "+w") 1501 (VQH_OPS:V4HI 1502 (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w") 1503 (parallel [(const_int 0) (const_int 1) 1504 (const_int 2) (const_int 3)])) 1505 (vec_select:V4HI (match_dup 1) 1506 (parallel [(const_int 4) (const_int 5) 1507 (const_int 6) (const_int 7)]))))] 1508 "TARGET_NEON" 1509 "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1" 1510 [(set_attr "vqh_mnem" "<VQH_mnem>") 1511 (set_attr "type" "neon_reduc_<VQH_type>_q")] 1512) 1513 1514(define_insn "quad_halves_<code>v16qi" 1515 [(set (match_operand:V8QI 0 "s_register_operand" "+w") 1516 (VQH_OPS:V8QI 1517 (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w") 1518 (parallel [(const_int 0) (const_int 1) 1519 (const_int 2) (const_int 3) 1520 (const_int 4) (const_int 5) 1521 (const_int 6) (const_int 7)])) 1522 (vec_select:V8QI (match_dup 1) 1523 (parallel [(const_int 8) (const_int 9) 1524 (const_int 10) (const_int 11) 1525 (const_int 12) (const_int 13) 1526 (const_int 14) (const_int 15)]))))] 1527 "TARGET_NEON" 1528 "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1" 1529 [(set_attr "vqh_mnem" "<VQH_mnem>") 1530 (set_attr "type" "neon_reduc_<VQH_type>_q")] 1531) 1532 1533(define_expand "move_hi_quad_<mode>" 1534 [(match_operand:ANY128 0 "s_register_operand" "") 1535 (match_operand:<V_HALF> 1 "s_register_operand" "")] 1536 "TARGET_NEON" 1537{ 1538 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode, 1539 GET_MODE_SIZE (<V_HALF>mode)), 1540 operands[1]); 1541 DONE; 1542}) 1543 1544(define_expand "move_lo_quad_<mode>" 1545 [(match_operand:ANY128 0 "s_register_operand" "") 1546 (match_operand:<V_HALF> 1 "s_register_operand" "")] 1547 "TARGET_NEON" 1548{ 1549 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], 1550 <MODE>mode, 0), 1551 operands[1]); 1552 DONE; 1553}) 1554 1555;; Reduction operations 1556 1557(define_expand "reduc_plus_scal_<mode>" 1558 [(match_operand:<V_elem> 0 "nonimmediate_operand" "") 1559 (match_operand:VD 1 "s_register_operand" "")] 1560 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" 1561{ 1562 rtx vec = gen_reg_rtx (<MODE>mode); 1563 neon_pairwise_reduce (vec, operands[1], <MODE>mode, 1564 &gen_neon_vpadd_internal<mode>); 1565 /* The same result is actually computed into every element. */ 1566 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx)); 1567 DONE; 1568}) 1569 1570(define_expand "reduc_plus_scal_<mode>" 1571 [(match_operand:<V_elem> 0 "nonimmediate_operand" "") 1572 (match_operand:VQ 1 "s_register_operand" "")] 1573 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations) 1574 && !BYTES_BIG_ENDIAN" 1575{ 1576 rtx step1 = gen_reg_rtx (<V_HALF>mode); 1577 1578 emit_insn (gen_quad_halves_plus<mode> (step1, operands[1])); 1579 emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1)); 1580 1581 DONE; 1582}) 1583 1584(define_expand "reduc_plus_scal_v2di" 1585 [(match_operand:DI 0 "nonimmediate_operand" "=w") 1586 (match_operand:V2DI 1 "s_register_operand" "")] 1587 "TARGET_NEON && !BYTES_BIG_ENDIAN" 1588{ 1589 rtx vec = gen_reg_rtx (V2DImode); 1590 1591 emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1])); 1592 emit_insn (gen_vec_extractv2didi (operands[0], vec, const0_rtx)); 1593 1594 DONE; 1595}) 1596 1597(define_insn "arm_reduc_plus_internal_v2di" 1598 [(set (match_operand:V2DI 0 "s_register_operand" "=w") 1599 (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")] 1600 UNSPEC_VPADD))] 1601 "TARGET_NEON && !BYTES_BIG_ENDIAN" 1602 "vadd.i64\t%e0, %e1, %f1" 1603 [(set_attr "type" "neon_add_q")] 1604) 1605 1606(define_expand "reduc_smin_scal_<mode>" 1607 [(match_operand:<V_elem> 0 "nonimmediate_operand" "") 1608 (match_operand:VD 1 "s_register_operand" "")] 1609 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" 1610{ 1611 rtx vec = gen_reg_rtx (<MODE>mode); 1612 1613 neon_pairwise_reduce (vec, operands[1], <MODE>mode, 1614 &gen_neon_vpsmin<mode>); 1615 /* The result is computed into every element of the vector. */ 1616 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx)); 1617 DONE; 1618}) 1619 1620(define_expand "reduc_smin_scal_<mode>" 1621 [(match_operand:<V_elem> 0 "nonimmediate_operand" "") 1622 (match_operand:VQ 1 "s_register_operand" "")] 1623 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations) 1624 && !BYTES_BIG_ENDIAN" 1625{ 1626 rtx step1 = gen_reg_rtx (<V_HALF>mode); 1627 1628 emit_insn (gen_quad_halves_smin<mode> (step1, operands[1])); 1629 emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1)); 1630 1631 DONE; 1632}) 1633 1634(define_expand "reduc_smax_scal_<mode>" 1635 [(match_operand:<V_elem> 0 "nonimmediate_operand" "") 1636 (match_operand:VD 1 "s_register_operand" "")] 1637 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" 1638{ 1639 rtx vec = gen_reg_rtx (<MODE>mode); 1640 neon_pairwise_reduce (vec, operands[1], <MODE>mode, 1641 &gen_neon_vpsmax<mode>); 1642 /* The result is computed into every element of the vector. */ 1643 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx)); 1644 DONE; 1645}) 1646 1647(define_expand "reduc_smax_scal_<mode>" 1648 [(match_operand:<V_elem> 0 "nonimmediate_operand" "") 1649 (match_operand:VQ 1 "s_register_operand" "")] 1650 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations) 1651 && !BYTES_BIG_ENDIAN" 1652{ 1653 rtx step1 = gen_reg_rtx (<V_HALF>mode); 1654 1655 emit_insn (gen_quad_halves_smax<mode> (step1, operands[1])); 1656 emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1)); 1657 1658 DONE; 1659}) 1660 1661(define_expand "reduc_umin_scal_<mode>" 1662 [(match_operand:<V_elem> 0 "nonimmediate_operand" "") 1663 (match_operand:VDI 1 "s_register_operand" "")] 1664 "TARGET_NEON" 1665{ 1666 rtx vec = gen_reg_rtx (<MODE>mode); 1667 neon_pairwise_reduce (vec, operands[1], <MODE>mode, 1668 &gen_neon_vpumin<mode>); 1669 /* The result is computed into every element of the vector. */ 1670 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx)); 1671 DONE; 1672}) 1673 1674(define_expand "reduc_umin_scal_<mode>" 1675 [(match_operand:<V_elem> 0 "nonimmediate_operand" "") 1676 (match_operand:VQI 1 "s_register_operand" "")] 1677 "TARGET_NEON && !BYTES_BIG_ENDIAN" 1678{ 1679 rtx step1 = gen_reg_rtx (<V_HALF>mode); 1680 1681 emit_insn (gen_quad_halves_umin<mode> (step1, operands[1])); 1682 emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1)); 1683 1684 DONE; 1685}) 1686 1687(define_expand "reduc_umax_scal_<mode>" 1688 [(match_operand:<V_elem> 0 "nonimmediate_operand" "") 1689 (match_operand:VDI 1 "s_register_operand" "")] 1690 "TARGET_NEON" 1691{ 1692 rtx vec = gen_reg_rtx (<MODE>mode); 1693 neon_pairwise_reduce (vec, operands[1], <MODE>mode, 1694 &gen_neon_vpumax<mode>); 1695 /* The result is computed into every element of the vector. */ 1696 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx)); 1697 DONE; 1698}) 1699 1700(define_expand "reduc_umax_scal_<mode>" 1701 [(match_operand:<V_elem> 0 "nonimmediate_operand" "") 1702 (match_operand:VQI 1 "s_register_operand" "")] 1703 "TARGET_NEON && !BYTES_BIG_ENDIAN" 1704{ 1705 rtx step1 = gen_reg_rtx (<V_HALF>mode); 1706 1707 emit_insn (gen_quad_halves_umax<mode> (step1, operands[1])); 1708 emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1)); 1709 1710 DONE; 1711}) 1712 1713(define_insn "neon_vpadd_internal<mode>" 1714 [(set (match_operand:VD 0 "s_register_operand" "=w") 1715 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w") 1716 (match_operand:VD 2 "s_register_operand" "w")] 1717 UNSPEC_VPADD))] 1718 "TARGET_NEON" 1719 "vpadd.<V_if_elem>\t%P0, %P1, %P2" 1720 ;; Assume this schedules like vadd. 1721 [(set (attr "type") 1722 (if_then_else (match_test "<Is_float_mode>") 1723 (const_string "neon_fp_reduc_add_s<q>") 1724 (const_string "neon_reduc_add<q>")))] 1725) 1726 1727(define_insn "neon_vpaddv4hf" 1728 [(set 1729 (match_operand:V4HF 0 "s_register_operand" "=w") 1730 (unspec:V4HF [(match_operand:V4HF 1 "s_register_operand" "w") 1731 (match_operand:V4HF 2 "s_register_operand" "w")] 1732 UNSPEC_VPADD))] 1733 "TARGET_NEON_FP16INST" 1734 "vpadd.f16\t%P0, %P1, %P2" 1735 [(set_attr "type" "neon_reduc_add")] 1736) 1737 1738(define_insn "neon_vpsmin<mode>" 1739 [(set (match_operand:VD 0 "s_register_operand" "=w") 1740 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w") 1741 (match_operand:VD 2 "s_register_operand" "w")] 1742 UNSPEC_VPSMIN))] 1743 "TARGET_NEON" 1744 "vpmin.<V_s_elem>\t%P0, %P1, %P2" 1745 [(set (attr "type") 1746 (if_then_else (match_test "<Is_float_mode>") 1747 (const_string "neon_fp_reduc_minmax_s<q>") 1748 (const_string "neon_reduc_minmax<q>")))] 1749) 1750 1751(define_insn "neon_vpsmax<mode>" 1752 [(set (match_operand:VD 0 "s_register_operand" "=w") 1753 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w") 1754 (match_operand:VD 2 "s_register_operand" "w")] 1755 UNSPEC_VPSMAX))] 1756 "TARGET_NEON" 1757 "vpmax.<V_s_elem>\t%P0, %P1, %P2" 1758 [(set (attr "type") 1759 (if_then_else (match_test "<Is_float_mode>") 1760 (const_string "neon_fp_reduc_minmax_s<q>") 1761 (const_string "neon_reduc_minmax<q>")))] 1762) 1763 1764(define_insn "neon_vpumin<mode>" 1765 [(set (match_operand:VDI 0 "s_register_operand" "=w") 1766 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w") 1767 (match_operand:VDI 2 "s_register_operand" "w")] 1768 UNSPEC_VPUMIN))] 1769 "TARGET_NEON" 1770 "vpmin.<V_u_elem>\t%P0, %P1, %P2" 1771 [(set_attr "type" "neon_reduc_minmax<q>")] 1772) 1773 1774(define_insn "neon_vpumax<mode>" 1775 [(set (match_operand:VDI 0 "s_register_operand" "=w") 1776 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w") 1777 (match_operand:VDI 2 "s_register_operand" "w")] 1778 UNSPEC_VPUMAX))] 1779 "TARGET_NEON" 1780 "vpmax.<V_u_elem>\t%P0, %P1, %P2" 1781 [(set_attr "type" "neon_reduc_minmax<q>")] 1782) 1783 1784;; Saturating arithmetic 1785 1786; NOTE: Neon supports many more saturating variants of instructions than the 1787; following, but these are all GCC currently understands. 1788; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself 1789; yet either, although these patterns may be used by intrinsics when they're 1790; added. 1791 1792(define_insn "*ss_add<mode>_neon" 1793 [(set (match_operand:VD 0 "s_register_operand" "=w") 1794 (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w") 1795 (match_operand:VD 2 "s_register_operand" "w")))] 1796 "TARGET_NEON" 1797 "vqadd.<V_s_elem>\t%P0, %P1, %P2" 1798 [(set_attr "type" "neon_qadd<q>")] 1799) 1800 1801(define_insn "*us_add<mode>_neon" 1802 [(set (match_operand:VD 0 "s_register_operand" "=w") 1803 (us_plus:VD (match_operand:VD 1 "s_register_operand" "w") 1804 (match_operand:VD 2 "s_register_operand" "w")))] 1805 "TARGET_NEON" 1806 "vqadd.<V_u_elem>\t%P0, %P1, %P2" 1807 [(set_attr "type" "neon_qadd<q>")] 1808) 1809 1810(define_insn "*ss_sub<mode>_neon" 1811 [(set (match_operand:VD 0 "s_register_operand" "=w") 1812 (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w") 1813 (match_operand:VD 2 "s_register_operand" "w")))] 1814 "TARGET_NEON" 1815 "vqsub.<V_s_elem>\t%P0, %P1, %P2" 1816 [(set_attr "type" "neon_qsub<q>")] 1817) 1818 1819(define_insn "*us_sub<mode>_neon" 1820 [(set (match_operand:VD 0 "s_register_operand" "=w") 1821 (us_minus:VD (match_operand:VD 1 "s_register_operand" "w") 1822 (match_operand:VD 2 "s_register_operand" "w")))] 1823 "TARGET_NEON" 1824 "vqsub.<V_u_elem>\t%P0, %P1, %P2" 1825 [(set_attr "type" "neon_qsub<q>")] 1826) 1827 1828;; Conditional instructions. These are comparisons with conditional moves for 1829;; vectors. They perform the assignment: 1830;; 1831;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2; 1832;; 1833;; where op3 is <, <=, ==, !=, >= or >. Operations are performed 1834;; element-wise. 1835 1836(define_expand "vcond<mode><mode>" 1837 [(set (match_operand:VDQW 0 "s_register_operand" "") 1838 (if_then_else:VDQW 1839 (match_operator 3 "comparison_operator" 1840 [(match_operand:VDQW 4 "s_register_operand" "") 1841 (match_operand:VDQW 5 "nonmemory_operand" "")]) 1842 (match_operand:VDQW 1 "s_register_operand" "") 1843 (match_operand:VDQW 2 "s_register_operand" "")))] 1844 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" 1845{ 1846 int inverse = 0; 1847 int use_zero_form = 0; 1848 int swap_bsl_operands = 0; 1849 rtx mask = gen_reg_rtx (<V_cmp_result>mode); 1850 rtx tmp = gen_reg_rtx (<V_cmp_result>mode); 1851 1852 rtx (*base_comparison) (rtx, rtx, rtx); 1853 rtx (*complimentary_comparison) (rtx, rtx, rtx); 1854 1855 switch (GET_CODE (operands[3])) 1856 { 1857 case GE: 1858 case GT: 1859 case LE: 1860 case LT: 1861 case EQ: 1862 if (operands[5] == CONST0_RTX (<MODE>mode)) 1863 { 1864 use_zero_form = 1; 1865 break; 1866 } 1867 /* Fall through. */ 1868 default: 1869 if (!REG_P (operands[5])) 1870 operands[5] = force_reg (<MODE>mode, operands[5]); 1871 } 1872 1873 switch (GET_CODE (operands[3])) 1874 { 1875 case LT: 1876 case UNLT: 1877 inverse = 1; 1878 /* Fall through. */ 1879 case GE: 1880 case UNGE: 1881 case ORDERED: 1882 case UNORDERED: 1883 base_comparison = gen_neon_vcge<mode>; 1884 complimentary_comparison = gen_neon_vcgt<mode>; 1885 break; 1886 case LE: 1887 case UNLE: 1888 inverse = 1; 1889 /* Fall through. */ 1890 case GT: 1891 case UNGT: 1892 base_comparison = gen_neon_vcgt<mode>; 1893 complimentary_comparison = gen_neon_vcge<mode>; 1894 break; 1895 case EQ: 1896 case NE: 1897 case UNEQ: 1898 base_comparison = gen_neon_vceq<mode>; 1899 complimentary_comparison = gen_neon_vceq<mode>; 1900 break; 1901 default: 1902 gcc_unreachable (); 1903 } 1904 1905 switch (GET_CODE (operands[3])) 1906 { 1907 case LT: 1908 case LE: 1909 case GT: 1910 case GE: 1911 case EQ: 1912 /* The easy case. Here we emit one of vcge, vcgt or vceq. 1913 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are: 1914 a GE b -> a GE b 1915 a GT b -> a GT b 1916 a LE b -> b GE a 1917 a LT b -> b GT a 1918 a EQ b -> a EQ b 1919 Note that there also exist direct comparison against 0 forms, 1920 so catch those as a special case. */ 1921 if (use_zero_form) 1922 { 1923 inverse = 0; 1924 switch (GET_CODE (operands[3])) 1925 { 1926 case LT: 1927 base_comparison = gen_neon_vclt<mode>; 1928 break; 1929 case LE: 1930 base_comparison = gen_neon_vcle<mode>; 1931 break; 1932 default: 1933 /* Do nothing, other zero form cases already have the correct 1934 base_comparison. */ 1935 break; 1936 } 1937 } 1938 1939 if (!inverse) 1940 emit_insn (base_comparison (mask, operands[4], operands[5])); 1941 else 1942 emit_insn (complimentary_comparison (mask, operands[5], operands[4])); 1943 break; 1944 case UNLT: 1945 case UNLE: 1946 case UNGT: 1947 case UNGE: 1948 case NE: 1949 /* Vector compare returns false for lanes which are unordered, so if we use 1950 the inverse of the comparison we actually want to emit, then 1951 swap the operands to BSL, we will end up with the correct result. 1952 Note that a NE NaN and NaN NE b are true for all a, b. 1953 1954 Our transformations are: 1955 a GE b -> !(b GT a) 1956 a GT b -> !(b GE a) 1957 a LE b -> !(a GT b) 1958 a LT b -> !(a GE b) 1959 a NE b -> !(a EQ b) */ 1960 1961 if (inverse) 1962 emit_insn (base_comparison (mask, operands[4], operands[5])); 1963 else 1964 emit_insn (complimentary_comparison (mask, operands[5], operands[4])); 1965 1966 swap_bsl_operands = 1; 1967 break; 1968 case UNEQ: 1969 /* We check (a > b || b > a). combining these comparisons give us 1970 true iff !(a != b && a ORDERED b), swapping the operands to BSL 1971 will then give us (a == b || a UNORDERED b) as intended. */ 1972 1973 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5])); 1974 emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4])); 1975 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp)); 1976 swap_bsl_operands = 1; 1977 break; 1978 case UNORDERED: 1979 /* Operands are ORDERED iff (a > b || b >= a). 1980 Swapping the operands to BSL will give the UNORDERED case. */ 1981 swap_bsl_operands = 1; 1982 /* Fall through. */ 1983 case ORDERED: 1984 emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5])); 1985 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4])); 1986 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp)); 1987 break; 1988 default: 1989 gcc_unreachable (); 1990 } 1991 1992 if (swap_bsl_operands) 1993 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2], 1994 operands[1])); 1995 else 1996 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1], 1997 operands[2])); 1998 DONE; 1999}) 2000 2001(define_expand "vcondu<mode><mode>" 2002 [(set (match_operand:VDQIW 0 "s_register_operand" "") 2003 (if_then_else:VDQIW 2004 (match_operator 3 "arm_comparison_operator" 2005 [(match_operand:VDQIW 4 "s_register_operand" "") 2006 (match_operand:VDQIW 5 "s_register_operand" "")]) 2007 (match_operand:VDQIW 1 "s_register_operand" "") 2008 (match_operand:VDQIW 2 "s_register_operand" "")))] 2009 "TARGET_NEON" 2010{ 2011 rtx mask; 2012 int inverse = 0, immediate_zero = 0; 2013 2014 mask = gen_reg_rtx (<V_cmp_result>mode); 2015 2016 if (operands[5] == CONST0_RTX (<MODE>mode)) 2017 immediate_zero = 1; 2018 else if (!REG_P (operands[5])) 2019 operands[5] = force_reg (<MODE>mode, operands[5]); 2020 2021 switch (GET_CODE (operands[3])) 2022 { 2023 case GEU: 2024 emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5])); 2025 break; 2026 2027 case GTU: 2028 emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5])); 2029 break; 2030 2031 case EQ: 2032 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5])); 2033 break; 2034 2035 case LEU: 2036 if (immediate_zero) 2037 emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5])); 2038 else 2039 emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4])); 2040 break; 2041 2042 case LTU: 2043 if (immediate_zero) 2044 emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5])); 2045 else 2046 emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4])); 2047 break; 2048 2049 case NE: 2050 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5])); 2051 inverse = 1; 2052 break; 2053 2054 default: 2055 gcc_unreachable (); 2056 } 2057 2058 if (inverse) 2059 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2], 2060 operands[1])); 2061 else 2062 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1], 2063 operands[2])); 2064 2065 DONE; 2066}) 2067 2068;; Patterns for builtins. 2069 2070; good for plain vadd, vaddq. 2071 2072(define_expand "neon_vadd<mode>" 2073 [(match_operand:VCVTF 0 "s_register_operand" "=w") 2074 (match_operand:VCVTF 1 "s_register_operand" "w") 2075 (match_operand:VCVTF 2 "s_register_operand" "w")] 2076 "TARGET_NEON" 2077{ 2078 if (!<Is_float_mode> || flag_unsafe_math_optimizations) 2079 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2])); 2080 else 2081 emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1], 2082 operands[2])); 2083 DONE; 2084}) 2085 2086(define_expand "neon_vadd<mode>" 2087 [(match_operand:VH 0 "s_register_operand") 2088 (match_operand:VH 1 "s_register_operand") 2089 (match_operand:VH 2 "s_register_operand")] 2090 "TARGET_NEON_FP16INST" 2091{ 2092 emit_insn (gen_add<mode>3_fp16 (operands[0], operands[1], operands[2])); 2093 DONE; 2094}) 2095 2096(define_expand "neon_vsub<mode>" 2097 [(match_operand:VH 0 "s_register_operand") 2098 (match_operand:VH 1 "s_register_operand") 2099 (match_operand:VH 2 "s_register_operand")] 2100 "TARGET_NEON_FP16INST" 2101{ 2102 emit_insn (gen_sub<mode>3_fp16 (operands[0], operands[1], operands[2])); 2103 DONE; 2104}) 2105 2106; Note that NEON operations don't support the full IEEE 754 standard: in 2107; particular, denormal values are flushed to zero. This means that GCC cannot 2108; use those instructions for autovectorization, etc. unless 2109; -funsafe-math-optimizations is in effect (in which case flush-to-zero 2110; behavior is permissible). Intrinsic operations (provided by the arm_neon.h 2111; header) must work in either case: if -funsafe-math-optimizations is given, 2112; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics 2113; expand to unspecs (which may potentially limit the extent to which they might 2114; be optimized by generic code). 2115 2116; Used for intrinsics when flag_unsafe_math_optimizations is false. 2117 2118(define_insn "neon_vadd<mode>_unspec" 2119 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 2120 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") 2121 (match_operand:VCVTF 2 "s_register_operand" "w")] 2122 UNSPEC_VADD))] 2123 "TARGET_NEON" 2124 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2125 [(set (attr "type") 2126 (if_then_else (match_test "<Is_float_mode>") 2127 (const_string "neon_fp_addsub_s<q>") 2128 (const_string "neon_add<q>")))] 2129) 2130 2131(define_insn "neon_vaddl<sup><mode>" 2132 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 2133 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w") 2134 (match_operand:VDI 2 "s_register_operand" "w")] 2135 VADDL))] 2136 "TARGET_NEON" 2137 "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2" 2138 [(set_attr "type" "neon_add_long")] 2139) 2140 2141(define_insn "neon_vaddw<sup><mode>" 2142 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 2143 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w") 2144 (match_operand:VDI 2 "s_register_operand" "w")] 2145 VADDW))] 2146 "TARGET_NEON" 2147 "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2" 2148 [(set_attr "type" "neon_add_widen")] 2149) 2150 2151; vhadd and vrhadd. 2152 2153(define_insn "neon_v<r>hadd<sup><mode>" 2154 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 2155 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") 2156 (match_operand:VDQIW 2 "s_register_operand" "w")] 2157 VHADD))] 2158 "TARGET_NEON" 2159 "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2160 [(set_attr "type" "neon_add_halve_q")] 2161) 2162 2163(define_insn "neon_vqadd<sup><mode>" 2164 [(set (match_operand:VDQIX 0 "s_register_operand" "=w") 2165 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") 2166 (match_operand:VDQIX 2 "s_register_operand" "w")] 2167 VQADD))] 2168 "TARGET_NEON" 2169 "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2170 [(set_attr "type" "neon_qadd<q>")] 2171) 2172 2173(define_insn "neon_v<r>addhn<mode>" 2174 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") 2175 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w") 2176 (match_operand:VN 2 "s_register_operand" "w")] 2177 VADDHN))] 2178 "TARGET_NEON" 2179 "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2" 2180 [(set_attr "type" "neon_add_halve_narrow_q")] 2181) 2182 2183;; Polynomial and Float multiplication. 2184(define_insn "neon_vmul<pf><mode>" 2185 [(set (match_operand:VPF 0 "s_register_operand" "=w") 2186 (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w") 2187 (match_operand:VPF 2 "s_register_operand" "w")] 2188 UNSPEC_VMUL))] 2189 "TARGET_NEON" 2190 "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2191 [(set (attr "type") 2192 (if_then_else (match_test "<Is_float_mode>") 2193 (const_string "neon_fp_mul_s<q>") 2194 (const_string "neon_mul_<V_elem_ch><q>")))] 2195) 2196 2197(define_insn "mul<mode>3" 2198 [(set 2199 (match_operand:VH 0 "s_register_operand" "=w") 2200 (mult:VH 2201 (match_operand:VH 1 "s_register_operand" "w") 2202 (match_operand:VH 2 "s_register_operand" "w")))] 2203 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations" 2204 "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2205 [(set_attr "type" "neon_mul_<VH_elem_ch><q>")] 2206) 2207 2208(define_insn "neon_vmulf<mode>" 2209 [(set 2210 (match_operand:VH 0 "s_register_operand" "=w") 2211 (mult:VH 2212 (match_operand:VH 1 "s_register_operand" "w") 2213 (match_operand:VH 2 "s_register_operand" "w")))] 2214 "TARGET_NEON_FP16INST" 2215 "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2216 [(set_attr "type" "neon_mul_<VH_elem_ch><q>")] 2217) 2218 2219(define_expand "neon_vmla<mode>" 2220 [(match_operand:VDQW 0 "s_register_operand" "=w") 2221 (match_operand:VDQW 1 "s_register_operand" "0") 2222 (match_operand:VDQW 2 "s_register_operand" "w") 2223 (match_operand:VDQW 3 "s_register_operand" "w")] 2224 "TARGET_NEON" 2225{ 2226 if (!<Is_float_mode> || flag_unsafe_math_optimizations) 2227 emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1], 2228 operands[2], operands[3])); 2229 else 2230 emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1], 2231 operands[2], operands[3])); 2232 DONE; 2233}) 2234 2235(define_expand "neon_vfma<VCVTF:mode>" 2236 [(match_operand:VCVTF 0 "s_register_operand") 2237 (match_operand:VCVTF 1 "s_register_operand") 2238 (match_operand:VCVTF 2 "s_register_operand") 2239 (match_operand:VCVTF 3 "s_register_operand")] 2240 "TARGET_NEON && TARGET_FMA" 2241{ 2242 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3], 2243 operands[1])); 2244 DONE; 2245}) 2246 2247(define_expand "neon_vfma<VH:mode>" 2248 [(match_operand:VH 0 "s_register_operand") 2249 (match_operand:VH 1 "s_register_operand") 2250 (match_operand:VH 2 "s_register_operand") 2251 (match_operand:VH 3 "s_register_operand")] 2252 "TARGET_NEON_FP16INST" 2253{ 2254 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3], 2255 operands[1])); 2256 DONE; 2257}) 2258 2259(define_expand "neon_vfms<VCVTF:mode>" 2260 [(match_operand:VCVTF 0 "s_register_operand") 2261 (match_operand:VCVTF 1 "s_register_operand") 2262 (match_operand:VCVTF 2 "s_register_operand") 2263 (match_operand:VCVTF 3 "s_register_operand")] 2264 "TARGET_NEON && TARGET_FMA" 2265{ 2266 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3], 2267 operands[1])); 2268 DONE; 2269}) 2270 2271(define_expand "neon_vfms<VH:mode>" 2272 [(match_operand:VH 0 "s_register_operand") 2273 (match_operand:VH 1 "s_register_operand") 2274 (match_operand:VH 2 "s_register_operand") 2275 (match_operand:VH 3 "s_register_operand")] 2276 "TARGET_NEON_FP16INST" 2277{ 2278 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3], 2279 operands[1])); 2280 DONE; 2281}) 2282 2283;; The expand RTL structure here is not important. 2284;; We use the gen_* functions anyway. 2285;; We just need something to wrap the iterators around. 2286 2287(define_expand "neon_vfm<vfml_op>l_<vfml_half><mode>" 2288 [(set (match_operand:VCVTF 0 "s_register_operand") 2289 (unspec:VCVTF 2290 [(match_operand:VCVTF 1 "s_register_operand") 2291 (PLUSMINUS:<VFML> 2292 (match_operand:<VFML> 2 "s_register_operand") 2293 (match_operand:<VFML> 3 "s_register_operand"))] VFMLHALVES))] 2294 "TARGET_FP16FML" 2295{ 2296 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>); 2297 emit_insn (gen_vfm<vfml_op>l_<vfml_half><mode>_intrinsic (operands[0], 2298 operands[1], 2299 operands[2], 2300 operands[3], 2301 half, half)); 2302 DONE; 2303}) 2304 2305(define_insn "vfmal_low<mode>_intrinsic" 2306 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 2307 (fma:VCVTF 2308 (float_extend:VCVTF 2309 (vec_select:<VFMLSEL> 2310 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") 2311 (match_operand:<VFML> 4 "vect_par_constant_low" ""))) 2312 (float_extend:VCVTF 2313 (vec_select:<VFMLSEL> 2314 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>") 2315 (match_operand:<VFML> 5 "vect_par_constant_low" ""))) 2316 (match_operand:VCVTF 1 "s_register_operand" "0")))] 2317 "TARGET_FP16FML" 2318 "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3" 2319 [(set_attr "type" "neon_fp_mla_s<q>")] 2320) 2321 2322(define_insn "vfmsl_high<mode>_intrinsic" 2323 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 2324 (fma:VCVTF 2325 (float_extend:VCVTF 2326 (neg:<VFMLSEL> 2327 (vec_select:<VFMLSEL> 2328 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") 2329 (match_operand:<VFML> 4 "vect_par_constant_high" "")))) 2330 (float_extend:VCVTF 2331 (vec_select:<VFMLSEL> 2332 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>") 2333 (match_operand:<VFML> 5 "vect_par_constant_high" ""))) 2334 (match_operand:VCVTF 1 "s_register_operand" "0")))] 2335 "TARGET_FP16FML" 2336 "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3" 2337 [(set_attr "type" "neon_fp_mla_s<q>")] 2338) 2339 2340(define_insn "vfmal_high<mode>_intrinsic" 2341 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 2342 (fma:VCVTF 2343 (float_extend:VCVTF 2344 (vec_select:<VFMLSEL> 2345 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") 2346 (match_operand:<VFML> 4 "vect_par_constant_high" ""))) 2347 (float_extend:VCVTF 2348 (vec_select:<VFMLSEL> 2349 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>") 2350 (match_operand:<VFML> 5 "vect_par_constant_high" ""))) 2351 (match_operand:VCVTF 1 "s_register_operand" "0")))] 2352 "TARGET_FP16FML" 2353 "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3" 2354 [(set_attr "type" "neon_fp_mla_s<q>")] 2355) 2356 2357(define_insn "vfmsl_low<mode>_intrinsic" 2358 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 2359 (fma:VCVTF 2360 (float_extend:VCVTF 2361 (neg:<VFMLSEL> 2362 (vec_select:<VFMLSEL> 2363 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") 2364 (match_operand:<VFML> 4 "vect_par_constant_low" "")))) 2365 (float_extend:VCVTF 2366 (vec_select:<VFMLSEL> 2367 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>") 2368 (match_operand:<VFML> 5 "vect_par_constant_low" ""))) 2369 (match_operand:VCVTF 1 "s_register_operand" "0")))] 2370 "TARGET_FP16FML" 2371 "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3" 2372 [(set_attr "type" "neon_fp_mla_s<q>")] 2373) 2374 2375(define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><VCVTF:mode>" 2376 [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand") 2377 (unspec:VCVTF 2378 [(match_operand:VCVTF 1 "s_register_operand") 2379 (PLUSMINUS:<VFML> 2380 (match_operand:<VFML> 2 "s_register_operand") 2381 (match_operand:<VFML> 3 "s_register_operand")) 2382 (match_operand:SI 4 "const_int_operand")] VFMLHALVES))] 2383 "TARGET_FP16FML" 2384{ 2385 rtx lane = GEN_INT (NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[4]))); 2386 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>); 2387 emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><mode>_intrinsic 2388 (operands[0], operands[1], 2389 operands[2], operands[3], 2390 half, lane)); 2391 DONE; 2392}) 2393 2394(define_insn "vfmal_lane_low<mode>_intrinsic" 2395 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 2396 (fma:VCVTF 2397 (float_extend:VCVTF 2398 (vec_select:<VFMLSEL> 2399 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") 2400 (match_operand:<VFML> 4 "vect_par_constant_low" ""))) 2401 (float_extend:VCVTF 2402 (vec_duplicate:<VFMLSEL> 2403 (vec_select:HF 2404 (match_operand:<VFML> 3 "s_register_operand" "x") 2405 (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) 2406 (match_operand:VCVTF 1 "s_register_operand" "0")))] 2407 "TARGET_FP16FML" 2408 { 2409 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5])); 2410 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1) 2411 { 2412 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode)); 2413 return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]"; 2414 } 2415 else 2416 { 2417 operands[5] = GEN_INT (lane); 2418 return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]"; 2419 } 2420 } 2421 [(set_attr "type" "neon_fp_mla_s<q>")] 2422) 2423 2424(define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>" 2425 [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand") 2426 (unspec:VCVTF 2427 [(match_operand:VCVTF 1 "s_register_operand") 2428 (PLUSMINUS:<VFML> 2429 (match_operand:<VFML> 2 "s_register_operand") 2430 (match_operand:<VFMLSEL2> 3 "s_register_operand")) 2431 (match_operand:SI 4 "const_int_operand")] VFMLHALVES))] 2432 "TARGET_FP16FML" 2433{ 2434 rtx lane 2435 = GEN_INT (NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[4]))); 2436 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>); 2437 emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>_intrinsic 2438 (operands[0], operands[1], operands[2], operands[3], 2439 half, lane)); 2440 DONE; 2441}) 2442 2443;; Used to implement the intrinsics: 2444;; float32x4_t vfmlalq_lane_low_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane) 2445;; float32x2_t vfmlal_laneq_low_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane) 2446;; Needs a bit of care to get the modes of the different sub-expressions right 2447;; due to 'a' and 'b' having different sizes and make sure we use the right 2448;; S or D subregister to select the appropriate lane from. 2449 2450(define_insn "vfmal_lane_low<vfmlsel2><mode>_intrinsic" 2451 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 2452 (fma:VCVTF 2453 (float_extend:VCVTF 2454 (vec_select:<VFMLSEL> 2455 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") 2456 (match_operand:<VFML> 4 "vect_par_constant_low" ""))) 2457 (float_extend:VCVTF 2458 (vec_duplicate:<VFMLSEL> 2459 (vec_select:HF 2460 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x") 2461 (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) 2462 (match_operand:VCVTF 1 "s_register_operand" "0")))] 2463 "TARGET_FP16FML" 2464 { 2465 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5])); 2466 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode); 2467 int new_lane = lane % elts_per_reg; 2468 int regdiff = lane / elts_per_reg; 2469 operands[5] = GEN_INT (new_lane); 2470 /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes 2471 because we want the print_operand code to print the appropriate 2472 S or D register prefix. */ 2473 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff); 2474 operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2])); 2475 return "vfmal.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]"; 2476 } 2477 [(set_attr "type" "neon_fp_mla_s<q>")] 2478) 2479 2480;; Used to implement the intrinsics: 2481;; float32x4_t vfmlalq_lane_high_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane) 2482;; float32x2_t vfmlal_laneq_high_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane) 2483;; Needs a bit of care to get the modes of the different sub-expressions right 2484;; due to 'a' and 'b' having different sizes and make sure we use the right 2485;; S or D subregister to select the appropriate lane from. 2486 2487(define_insn "vfmal_lane_high<vfmlsel2><mode>_intrinsic" 2488 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 2489 (fma:VCVTF 2490 (float_extend:VCVTF 2491 (vec_select:<VFMLSEL> 2492 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") 2493 (match_operand:<VFML> 4 "vect_par_constant_high" ""))) 2494 (float_extend:VCVTF 2495 (vec_duplicate:<VFMLSEL> 2496 (vec_select:HF 2497 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x") 2498 (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) 2499 (match_operand:VCVTF 1 "s_register_operand" "0")))] 2500 "TARGET_FP16FML" 2501 { 2502 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5])); 2503 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode); 2504 int new_lane = lane % elts_per_reg; 2505 int regdiff = lane / elts_per_reg; 2506 operands[5] = GEN_INT (new_lane); 2507 /* We re-create operands[3] in the halved VFMLSEL mode 2508 because we've calculated the correct half-width subreg to extract 2509 the lane from and we want to print *that* subreg instead. */ 2510 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff); 2511 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]"; 2512 } 2513 [(set_attr "type" "neon_fp_mla_s<q>")] 2514) 2515 2516(define_insn "vfmal_lane_high<mode>_intrinsic" 2517 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 2518 (fma:VCVTF 2519 (float_extend:VCVTF 2520 (vec_select:<VFMLSEL> 2521 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") 2522 (match_operand:<VFML> 4 "vect_par_constant_high" ""))) 2523 (float_extend:VCVTF 2524 (vec_duplicate:<VFMLSEL> 2525 (vec_select:HF 2526 (match_operand:<VFML> 3 "s_register_operand" "x") 2527 (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) 2528 (match_operand:VCVTF 1 "s_register_operand" "0")))] 2529 "TARGET_FP16FML" 2530 { 2531 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5])); 2532 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1) 2533 { 2534 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode)); 2535 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]"; 2536 } 2537 else 2538 { 2539 operands[5] = GEN_INT (lane); 2540 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]"; 2541 } 2542 } 2543 [(set_attr "type" "neon_fp_mla_s<q>")] 2544) 2545 2546(define_insn "vfmsl_lane_low<mode>_intrinsic" 2547 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 2548 (fma:VCVTF 2549 (float_extend:VCVTF 2550 (neg:<VFMLSEL> 2551 (vec_select:<VFMLSEL> 2552 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") 2553 (match_operand:<VFML> 4 "vect_par_constant_low" "")))) 2554 (float_extend:VCVTF 2555 (vec_duplicate:<VFMLSEL> 2556 (vec_select:HF 2557 (match_operand:<VFML> 3 "s_register_operand" "x") 2558 (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) 2559 (match_operand:VCVTF 1 "s_register_operand" "0")))] 2560 "TARGET_FP16FML" 2561 { 2562 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5])); 2563 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1) 2564 { 2565 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode)); 2566 return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]"; 2567 } 2568 else 2569 { 2570 operands[5] = GEN_INT (lane); 2571 return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]"; 2572 } 2573 } 2574 [(set_attr "type" "neon_fp_mla_s<q>")] 2575) 2576 2577;; Used to implement the intrinsics: 2578;; float32x4_t vfmlslq_lane_low_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane) 2579;; float32x2_t vfmlsl_laneq_low_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane) 2580;; Needs a bit of care to get the modes of the different sub-expressions right 2581;; due to 'a' and 'b' having different sizes and make sure we use the right 2582;; S or D subregister to select the appropriate lane from. 2583 2584(define_insn "vfmsl_lane_low<vfmlsel2><mode>_intrinsic" 2585 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 2586 (fma:VCVTF 2587 (float_extend:VCVTF 2588 (neg:<VFMLSEL> 2589 (vec_select:<VFMLSEL> 2590 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") 2591 (match_operand:<VFML> 4 "vect_par_constant_low" "")))) 2592 (float_extend:VCVTF 2593 (vec_duplicate:<VFMLSEL> 2594 (vec_select:HF 2595 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x") 2596 (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) 2597 (match_operand:VCVTF 1 "s_register_operand" "0")))] 2598 "TARGET_FP16FML" 2599 { 2600 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5])); 2601 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode); 2602 int new_lane = lane % elts_per_reg; 2603 int regdiff = lane / elts_per_reg; 2604 operands[5] = GEN_INT (new_lane); 2605 /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes 2606 because we want the print_operand code to print the appropriate 2607 S or D register prefix. */ 2608 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff); 2609 operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2])); 2610 return "vfmsl.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]"; 2611 } 2612 [(set_attr "type" "neon_fp_mla_s<q>")] 2613) 2614 2615;; Used to implement the intrinsics: 2616;; float32x4_t vfmlslq_lane_high_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane) 2617;; float32x2_t vfmlsl_laneq_high_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane) 2618;; Needs a bit of care to get the modes of the different sub-expressions right 2619;; due to 'a' and 'b' having different sizes and make sure we use the right 2620;; S or D subregister to select the appropriate lane from. 2621 2622(define_insn "vfmsl_lane_high<vfmlsel2><mode>_intrinsic" 2623 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 2624 (fma:VCVTF 2625 (float_extend:VCVTF 2626 (neg:<VFMLSEL> 2627 (vec_select:<VFMLSEL> 2628 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") 2629 (match_operand:<VFML> 4 "vect_par_constant_high" "")))) 2630 (float_extend:VCVTF 2631 (vec_duplicate:<VFMLSEL> 2632 (vec_select:HF 2633 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x") 2634 (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) 2635 (match_operand:VCVTF 1 "s_register_operand" "0")))] 2636 "TARGET_FP16FML" 2637 { 2638 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5])); 2639 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode); 2640 int new_lane = lane % elts_per_reg; 2641 int regdiff = lane / elts_per_reg; 2642 operands[5] = GEN_INT (new_lane); 2643 /* We re-create operands[3] in the halved VFMLSEL mode 2644 because we've calculated the correct half-width subreg to extract 2645 the lane from and we want to print *that* subreg instead. */ 2646 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff); 2647 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]"; 2648 } 2649 [(set_attr "type" "neon_fp_mla_s<q>")] 2650) 2651 2652(define_insn "vfmsl_lane_high<mode>_intrinsic" 2653 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 2654 (fma:VCVTF 2655 (float_extend:VCVTF 2656 (neg:<VFMLSEL> 2657 (vec_select:<VFMLSEL> 2658 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") 2659 (match_operand:<VFML> 4 "vect_par_constant_high" "")))) 2660 (float_extend:VCVTF 2661 (vec_duplicate:<VFMLSEL> 2662 (vec_select:HF 2663 (match_operand:<VFML> 3 "s_register_operand" "x") 2664 (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) 2665 (match_operand:VCVTF 1 "s_register_operand" "0")))] 2666 "TARGET_FP16FML" 2667 { 2668 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5])); 2669 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1) 2670 { 2671 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode)); 2672 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]"; 2673 } 2674 else 2675 { 2676 operands[5] = GEN_INT (lane); 2677 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]"; 2678 } 2679 } 2680 [(set_attr "type" "neon_fp_mla_s<q>")] 2681) 2682 2683; Used for intrinsics when flag_unsafe_math_optimizations is false. 2684 2685(define_insn "neon_vmla<mode>_unspec" 2686 [(set (match_operand:VDQW 0 "s_register_operand" "=w") 2687 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") 2688 (match_operand:VDQW 2 "s_register_operand" "w") 2689 (match_operand:VDQW 3 "s_register_operand" "w")] 2690 UNSPEC_VMLA))] 2691 "TARGET_NEON" 2692 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" 2693 [(set (attr "type") 2694 (if_then_else (match_test "<Is_float_mode>") 2695 (const_string "neon_fp_mla_s<q>") 2696 (const_string "neon_mla_<V_elem_ch><q>")))] 2697) 2698 2699(define_insn "neon_vmlal<sup><mode>" 2700 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 2701 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") 2702 (match_operand:VW 2 "s_register_operand" "w") 2703 (match_operand:VW 3 "s_register_operand" "w")] 2704 VMLAL))] 2705 "TARGET_NEON" 2706 "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3" 2707 [(set_attr "type" "neon_mla_<V_elem_ch>_long")] 2708) 2709 2710(define_expand "neon_vmls<mode>" 2711 [(match_operand:VDQW 0 "s_register_operand" "=w") 2712 (match_operand:VDQW 1 "s_register_operand" "0") 2713 (match_operand:VDQW 2 "s_register_operand" "w") 2714 (match_operand:VDQW 3 "s_register_operand" "w")] 2715 "TARGET_NEON" 2716{ 2717 if (!<Is_float_mode> || flag_unsafe_math_optimizations) 2718 emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0], 2719 operands[1], operands[2], operands[3])); 2720 else 2721 emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1], 2722 operands[2], operands[3])); 2723 DONE; 2724}) 2725 2726; Used for intrinsics when flag_unsafe_math_optimizations is false. 2727 2728(define_insn "neon_vmls<mode>_unspec" 2729 [(set (match_operand:VDQW 0 "s_register_operand" "=w") 2730 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") 2731 (match_operand:VDQW 2 "s_register_operand" "w") 2732 (match_operand:VDQW 3 "s_register_operand" "w")] 2733 UNSPEC_VMLS))] 2734 "TARGET_NEON" 2735 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" 2736 [(set (attr "type") 2737 (if_then_else (match_test "<Is_float_mode>") 2738 (const_string "neon_fp_mla_s<q>") 2739 (const_string "neon_mla_<V_elem_ch><q>")))] 2740) 2741 2742(define_insn "neon_vmlsl<sup><mode>" 2743 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 2744 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") 2745 (match_operand:VW 2 "s_register_operand" "w") 2746 (match_operand:VW 3 "s_register_operand" "w")] 2747 VMLSL))] 2748 "TARGET_NEON" 2749 "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3" 2750 [(set_attr "type" "neon_mla_<V_elem_ch>_long")] 2751) 2752 2753;; vqdmulh, vqrdmulh 2754(define_insn "neon_vq<r>dmulh<mode>" 2755 [(set (match_operand:VMDQI 0 "s_register_operand" "=w") 2756 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w") 2757 (match_operand:VMDQI 2 "s_register_operand" "w")] 2758 VQDMULH))] 2759 "TARGET_NEON" 2760 "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2761 [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")] 2762) 2763 2764;; vqrdmlah, vqrdmlsh 2765(define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>" 2766 [(set (match_operand:VMDQI 0 "s_register_operand" "=w") 2767 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0") 2768 (match_operand:VMDQI 2 "s_register_operand" "w") 2769 (match_operand:VMDQI 3 "s_register_operand" "w")] 2770 VQRDMLH_AS))] 2771 "TARGET_NEON_RDMA" 2772 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" 2773 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")] 2774) 2775 2776(define_insn "neon_vqdmlal<mode>" 2777 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 2778 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") 2779 (match_operand:VMDI 2 "s_register_operand" "w") 2780 (match_operand:VMDI 3 "s_register_operand" "w")] 2781 UNSPEC_VQDMLAL))] 2782 "TARGET_NEON" 2783 "vqdmlal.<V_s_elem>\t%q0, %P2, %P3" 2784 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")] 2785) 2786 2787(define_insn "neon_vqdmlsl<mode>" 2788 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 2789 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") 2790 (match_operand:VMDI 2 "s_register_operand" "w") 2791 (match_operand:VMDI 3 "s_register_operand" "w")] 2792 UNSPEC_VQDMLSL))] 2793 "TARGET_NEON" 2794 "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3" 2795 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")] 2796) 2797 2798(define_insn "neon_vmull<sup><mode>" 2799 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 2800 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w") 2801 (match_operand:VW 2 "s_register_operand" "w")] 2802 VMULL))] 2803 "TARGET_NEON" 2804 "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2" 2805 [(set_attr "type" "neon_mul_<V_elem_ch>_long")] 2806) 2807 2808(define_insn "neon_vqdmull<mode>" 2809 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 2810 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w") 2811 (match_operand:VMDI 2 "s_register_operand" "w")] 2812 UNSPEC_VQDMULL))] 2813 "TARGET_NEON" 2814 "vqdmull.<V_s_elem>\t%q0, %P1, %P2" 2815 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")] 2816) 2817 2818(define_expand "neon_vsub<mode>" 2819 [(match_operand:VCVTF 0 "s_register_operand" "=w") 2820 (match_operand:VCVTF 1 "s_register_operand" "w") 2821 (match_operand:VCVTF 2 "s_register_operand" "w")] 2822 "TARGET_NEON" 2823{ 2824 if (!<Is_float_mode> || flag_unsafe_math_optimizations) 2825 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2])); 2826 else 2827 emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1], 2828 operands[2])); 2829 DONE; 2830}) 2831 2832; Used for intrinsics when flag_unsafe_math_optimizations is false. 2833 2834(define_insn "neon_vsub<mode>_unspec" 2835 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 2836 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") 2837 (match_operand:VCVTF 2 "s_register_operand" "w")] 2838 UNSPEC_VSUB))] 2839 "TARGET_NEON" 2840 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2841 [(set (attr "type") 2842 (if_then_else (match_test "<Is_float_mode>") 2843 (const_string "neon_fp_addsub_s<q>") 2844 (const_string "neon_sub<q>")))] 2845) 2846 2847(define_insn "neon_vsubl<sup><mode>" 2848 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 2849 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w") 2850 (match_operand:VDI 2 "s_register_operand" "w")] 2851 VSUBL))] 2852 "TARGET_NEON" 2853 "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2" 2854 [(set_attr "type" "neon_sub_long")] 2855) 2856 2857(define_insn "neon_vsubw<sup><mode>" 2858 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 2859 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w") 2860 (match_operand:VDI 2 "s_register_operand" "w")] 2861 VSUBW))] 2862 "TARGET_NEON" 2863 "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2" 2864 [(set_attr "type" "neon_sub_widen")] 2865) 2866 2867(define_insn "neon_vqsub<sup><mode>" 2868 [(set (match_operand:VDQIX 0 "s_register_operand" "=w") 2869 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") 2870 (match_operand:VDQIX 2 "s_register_operand" "w")] 2871 VQSUB))] 2872 "TARGET_NEON" 2873 "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2874 [(set_attr "type" "neon_qsub<q>")] 2875) 2876 2877(define_insn "neon_vhsub<sup><mode>" 2878 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 2879 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") 2880 (match_operand:VDQIW 2 "s_register_operand" "w")] 2881 VHSUB))] 2882 "TARGET_NEON" 2883 "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2884 [(set_attr "type" "neon_sub_halve<q>")] 2885) 2886 2887(define_insn "neon_v<r>subhn<mode>" 2888 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") 2889 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w") 2890 (match_operand:VN 2 "s_register_operand" "w")] 2891 VSUBHN))] 2892 "TARGET_NEON" 2893 "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2" 2894 [(set_attr "type" "neon_sub_halve_narrow_q")] 2895) 2896 2897;; These may expand to an UNSPEC pattern when a floating point mode is used 2898;; without unsafe math optimizations. 2899(define_expand "neon_vc<cmp_op><mode>" 2900 [(match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w") 2901 (neg:<V_cmp_result> 2902 (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand" "w,w") 2903 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")))] 2904 "TARGET_NEON" 2905 { 2906 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations 2907 are enabled. */ 2908 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT 2909 && !flag_unsafe_math_optimizations) 2910 { 2911 /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because 2912 we define gen_neon_vceq<mode>_insn_unspec only for float modes 2913 whereas this expander iterates over the integer modes as well, 2914 but we will never expand to UNSPECs for the integer comparisons. */ 2915 switch (<MODE>mode) 2916 { 2917 case E_V2SFmode: 2918 emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0], 2919 operands[1], 2920 operands[2])); 2921 break; 2922 case E_V4SFmode: 2923 emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0], 2924 operands[1], 2925 operands[2])); 2926 break; 2927 default: 2928 gcc_unreachable (); 2929 } 2930 } 2931 else 2932 emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0], 2933 operands[1], 2934 operands[2])); 2935 DONE; 2936 } 2937) 2938 2939(define_insn "neon_vc<cmp_op><mode>_insn" 2940 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w") 2941 (neg:<V_cmp_result> 2942 (COMPARISONS:<V_cmp_result> 2943 (match_operand:VDQW 1 "s_register_operand" "w,w") 2944 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))] 2945 "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT 2946 && !flag_unsafe_math_optimizations)" 2947 { 2948 char pattern[100]; 2949 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0," 2950 " %%<V_reg>1, %s", 2951 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT 2952 ? "f" : "<cmp_type>", 2953 which_alternative == 0 2954 ? "%<V_reg>2" : "#0"); 2955 output_asm_insn (pattern, operands); 2956 return ""; 2957 } 2958 [(set (attr "type") 2959 (if_then_else (match_operand 2 "zero_operand") 2960 (const_string "neon_compare_zero<q>") 2961 (const_string "neon_compare<q>")))] 2962) 2963 2964(define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec" 2965 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w") 2966 (unspec:<V_cmp_result> 2967 [(match_operand:VCVTF 1 "s_register_operand" "w,w") 2968 (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")] 2969 NEON_VCMP))] 2970 "TARGET_NEON" 2971 { 2972 char pattern[100]; 2973 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0," 2974 " %%<V_reg>1, %s", 2975 which_alternative == 0 2976 ? "%<V_reg>2" : "#0"); 2977 output_asm_insn (pattern, operands); 2978 return ""; 2979} 2980 [(set_attr "type" "neon_fp_compare_s<q>")] 2981) 2982 2983(define_expand "neon_vc<cmp_op><mode>" 2984 [(match_operand:<V_cmp_result> 0 "s_register_operand") 2985 (neg:<V_cmp_result> 2986 (COMPARISONS:VH 2987 (match_operand:VH 1 "s_register_operand") 2988 (match_operand:VH 2 "reg_or_zero_operand")))] 2989 "TARGET_NEON_FP16INST" 2990{ 2991 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations 2992 are enabled. */ 2993 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT 2994 && !flag_unsafe_math_optimizations) 2995 emit_insn 2996 (gen_neon_vc<cmp_op><mode>_fp16insn_unspec 2997 (operands[0], operands[1], operands[2])); 2998 else 2999 emit_insn 3000 (gen_neon_vc<cmp_op><mode>_fp16insn 3001 (operands[0], operands[1], operands[2])); 3002 DONE; 3003}) 3004 3005(define_insn "neon_vc<cmp_op><mode>_fp16insn" 3006 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w") 3007 (neg:<V_cmp_result> 3008 (COMPARISONS:<V_cmp_result> 3009 (match_operand:VH 1 "s_register_operand" "w,w") 3010 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz"))))] 3011 "TARGET_NEON_FP16INST 3012 && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT 3013 && !flag_unsafe_math_optimizations)" 3014{ 3015 char pattern[100]; 3016 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0," 3017 " %%<V_reg>1, %s", 3018 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT 3019 ? "f" : "<cmp_type>", 3020 which_alternative == 0 3021 ? "%<V_reg>2" : "#0"); 3022 output_asm_insn (pattern, operands); 3023 return ""; 3024} 3025 [(set (attr "type") 3026 (if_then_else (match_operand 2 "zero_operand") 3027 (const_string "neon_compare_zero<q>") 3028 (const_string "neon_compare<q>")))]) 3029 3030(define_insn "neon_vc<cmp_op_unsp><mode>_fp16insn_unspec" 3031 [(set 3032 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w") 3033 (unspec:<V_cmp_result> 3034 [(match_operand:VH 1 "s_register_operand" "w,w") 3035 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz")] 3036 NEON_VCMP))] 3037 "TARGET_NEON_FP16INST" 3038{ 3039 char pattern[100]; 3040 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0," 3041 " %%<V_reg>1, %s", 3042 which_alternative == 0 3043 ? "%<V_reg>2" : "#0"); 3044 output_asm_insn (pattern, operands); 3045 return ""; 3046} 3047 [(set_attr "type" "neon_fp_compare_s<q>")]) 3048 3049(define_insn "neon_vc<cmp_op>u<mode>" 3050 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w") 3051 (neg:<V_cmp_result> 3052 (GTUGEU:<V_cmp_result> 3053 (match_operand:VDQIW 1 "s_register_operand" "w") 3054 (match_operand:VDQIW 2 "s_register_operand" "w"))))] 3055 "TARGET_NEON" 3056 "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 3057 [(set_attr "type" "neon_compare<q>")] 3058) 3059 3060(define_expand "neon_vca<cmp_op><mode>" 3061 [(set (match_operand:<V_cmp_result> 0 "s_register_operand") 3062 (neg:<V_cmp_result> 3063 (GTGE:<V_cmp_result> 3064 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand")) 3065 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))] 3066 "TARGET_NEON" 3067 { 3068 if (flag_unsafe_math_optimizations) 3069 emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1], 3070 operands[2])); 3071 else 3072 emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0], 3073 operands[1], 3074 operands[2])); 3075 DONE; 3076 } 3077) 3078 3079(define_insn "neon_vca<cmp_op><mode>_insn" 3080 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w") 3081 (neg:<V_cmp_result> 3082 (GTGE:<V_cmp_result> 3083 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w")) 3084 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))] 3085 "TARGET_NEON && flag_unsafe_math_optimizations" 3086 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 3087 [(set_attr "type" "neon_fp_compare_s<q>")] 3088) 3089 3090(define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec" 3091 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w") 3092 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w") 3093 (match_operand:VCVTF 2 "s_register_operand" "w")] 3094 NEON_VACMP))] 3095 "TARGET_NEON" 3096 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 3097 [(set_attr "type" "neon_fp_compare_s<q>")] 3098) 3099 3100(define_expand "neon_vca<cmp_op><mode>" 3101 [(set 3102 (match_operand:<V_cmp_result> 0 "s_register_operand") 3103 (neg:<V_cmp_result> 3104 (GLTE:<V_cmp_result> 3105 (abs:VH (match_operand:VH 1 "s_register_operand")) 3106 (abs:VH (match_operand:VH 2 "s_register_operand")))))] 3107 "TARGET_NEON_FP16INST" 3108{ 3109 if (flag_unsafe_math_optimizations) 3110 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn 3111 (operands[0], operands[1], operands[2])); 3112 else 3113 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn_unspec 3114 (operands[0], operands[1], operands[2])); 3115 DONE; 3116}) 3117 3118(define_insn "neon_vca<cmp_op><mode>_fp16insn" 3119 [(set 3120 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w") 3121 (neg:<V_cmp_result> 3122 (GLTE:<V_cmp_result> 3123 (abs:VH (match_operand:VH 1 "s_register_operand" "w")) 3124 (abs:VH (match_operand:VH 2 "s_register_operand" "w")))))] 3125 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations" 3126 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 3127 [(set_attr "type" "neon_fp_compare_s<q>")] 3128) 3129 3130(define_insn "neon_vca<cmp_op_unsp><mode>_fp16insn_unspec" 3131 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w") 3132 (unspec:<V_cmp_result> 3133 [(match_operand:VH 1 "s_register_operand" "w") 3134 (match_operand:VH 2 "s_register_operand" "w")] 3135 NEON_VAGLTE))] 3136 "TARGET_NEON" 3137 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 3138 [(set_attr "type" "neon_fp_compare_s<q>")] 3139) 3140 3141(define_expand "neon_vc<cmp_op>z<mode>" 3142 [(set 3143 (match_operand:<V_cmp_result> 0 "s_register_operand") 3144 (COMPARISONS:<V_cmp_result> 3145 (match_operand:VH 1 "s_register_operand") 3146 (const_int 0)))] 3147 "TARGET_NEON_FP16INST" 3148 { 3149 emit_insn (gen_neon_vc<cmp_op><mode> (operands[0], operands[1], 3150 CONST0_RTX (<MODE>mode))); 3151 DONE; 3152}) 3153 3154(define_insn "neon_vtst<mode>" 3155 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 3156 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") 3157 (match_operand:VDQIW 2 "s_register_operand" "w")] 3158 UNSPEC_VTST))] 3159 "TARGET_NEON" 3160 "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 3161 [(set_attr "type" "neon_tst<q>")] 3162) 3163 3164(define_insn "neon_vabd<sup><mode>" 3165 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 3166 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") 3167 (match_operand:VDQIW 2 "s_register_operand" "w")] 3168 VABD))] 3169 "TARGET_NEON" 3170 "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 3171 [(set_attr "type" "neon_abd<q>")] 3172) 3173 3174(define_insn "neon_vabd<mode>" 3175 [(set (match_operand:VH 0 "s_register_operand" "=w") 3176 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w") 3177 (match_operand:VH 2 "s_register_operand" "w")] 3178 UNSPEC_VABD_F))] 3179 "TARGET_NEON_FP16INST" 3180 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 3181 [(set_attr "type" "neon_abd<q>")] 3182) 3183 3184(define_insn "neon_vabdf<mode>" 3185 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 3186 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") 3187 (match_operand:VCVTF 2 "s_register_operand" "w")] 3188 UNSPEC_VABD_F))] 3189 "TARGET_NEON" 3190 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 3191 [(set_attr "type" "neon_fp_abd_s<q>")] 3192) 3193 3194(define_insn "neon_vabdl<sup><mode>" 3195 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 3196 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w") 3197 (match_operand:VW 2 "s_register_operand" "w")] 3198 VABDL))] 3199 "TARGET_NEON" 3200 "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2" 3201 [(set_attr "type" "neon_abd_long")] 3202) 3203 3204(define_insn "neon_vaba<sup><mode>" 3205 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 3206 (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w") 3207 (match_operand:VDQIW 3 "s_register_operand" "w")] 3208 VABD) 3209 (match_operand:VDQIW 1 "s_register_operand" "0")))] 3210 "TARGET_NEON" 3211 "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" 3212 [(set_attr "type" "neon_arith_acc<q>")] 3213) 3214 3215(define_insn "neon_vabal<sup><mode>" 3216 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 3217 (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w") 3218 (match_operand:VW 3 "s_register_operand" "w")] 3219 VABDL) 3220 (match_operand:<V_widen> 1 "s_register_operand" "0")))] 3221 "TARGET_NEON" 3222 "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3" 3223 [(set_attr "type" "neon_arith_acc<q>")] 3224) 3225 3226(define_insn "neon_v<maxmin><sup><mode>" 3227 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 3228 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") 3229 (match_operand:VDQIW 2 "s_register_operand" "w")] 3230 VMAXMIN))] 3231 "TARGET_NEON" 3232 "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 3233 [(set_attr "type" "neon_minmax<q>")] 3234) 3235 3236(define_insn "neon_v<maxmin>f<mode>" 3237 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 3238 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") 3239 (match_operand:VCVTF 2 "s_register_operand" "w")] 3240 VMAXMINF))] 3241 "TARGET_NEON" 3242 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 3243 [(set_attr "type" "neon_fp_minmax_s<q>")] 3244) 3245 3246(define_insn "neon_v<maxmin>f<mode>" 3247 [(set (match_operand:VH 0 "s_register_operand" "=w") 3248 (unspec:VH 3249 [(match_operand:VH 1 "s_register_operand" "w") 3250 (match_operand:VH 2 "s_register_operand" "w")] 3251 VMAXMINF))] 3252 "TARGET_NEON_FP16INST" 3253 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 3254 [(set_attr "type" "neon_fp_minmax_s<q>")] 3255) 3256 3257(define_insn "neon_vp<maxmin>fv4hf" 3258 [(set (match_operand:V4HF 0 "s_register_operand" "=w") 3259 (unspec:V4HF 3260 [(match_operand:V4HF 1 "s_register_operand" "w") 3261 (match_operand:V4HF 2 "s_register_operand" "w")] 3262 VPMAXMINF))] 3263 "TARGET_NEON_FP16INST" 3264 "vp<maxmin>.f16\t%P0, %P1, %P2" 3265 [(set_attr "type" "neon_reduc_minmax")] 3266) 3267 3268(define_insn "neon_<fmaxmin_op><mode>" 3269 [(set 3270 (match_operand:VH 0 "s_register_operand" "=w") 3271 (unspec:VH 3272 [(match_operand:VH 1 "s_register_operand" "w") 3273 (match_operand:VH 2 "s_register_operand" "w")] 3274 VMAXMINFNM))] 3275 "TARGET_NEON_FP16INST" 3276 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 3277 [(set_attr "type" "neon_fp_minmax_s<q>")] 3278) 3279 3280;; v<maxmin>nm intrinsics. 3281(define_insn "neon_<fmaxmin_op><mode>" 3282 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 3283 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") 3284 (match_operand:VCVTF 2 "s_register_operand" "w")] 3285 VMAXMINFNM))] 3286 "TARGET_NEON && TARGET_VFP5" 3287 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 3288 [(set_attr "type" "neon_fp_minmax_s<q>")] 3289) 3290 3291;; Vector forms for the IEEE-754 fmax()/fmin() functions 3292(define_insn "<fmaxmin><mode>3" 3293 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 3294 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") 3295 (match_operand:VCVTF 2 "s_register_operand" "w")] 3296 VMAXMINFNM))] 3297 "TARGET_NEON && TARGET_VFP5" 3298 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 3299 [(set_attr "type" "neon_fp_minmax_s<q>")] 3300) 3301 3302(define_expand "neon_vpadd<mode>" 3303 [(match_operand:VD 0 "s_register_operand" "=w") 3304 (match_operand:VD 1 "s_register_operand" "w") 3305 (match_operand:VD 2 "s_register_operand" "w")] 3306 "TARGET_NEON" 3307{ 3308 emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1], 3309 operands[2])); 3310 DONE; 3311}) 3312 3313(define_insn "neon_vpaddl<sup><mode>" 3314 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w") 3315 (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")] 3316 VPADDL))] 3317 "TARGET_NEON" 3318 "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1" 3319 [(set_attr "type" "neon_reduc_add_long")] 3320) 3321 3322(define_insn "neon_vpadal<sup><mode>" 3323 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w") 3324 (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0") 3325 (match_operand:VDQIW 2 "s_register_operand" "w")] 3326 VPADAL))] 3327 "TARGET_NEON" 3328 "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2" 3329 [(set_attr "type" "neon_reduc_add_acc")] 3330) 3331 3332(define_insn "neon_vp<maxmin><sup><mode>" 3333 [(set (match_operand:VDI 0 "s_register_operand" "=w") 3334 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w") 3335 (match_operand:VDI 2 "s_register_operand" "w")] 3336 VPMAXMIN))] 3337 "TARGET_NEON" 3338 "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 3339 [(set_attr "type" "neon_reduc_minmax<q>")] 3340) 3341 3342(define_insn "neon_vp<maxmin>f<mode>" 3343 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 3344 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") 3345 (match_operand:VCVTF 2 "s_register_operand" "w")] 3346 VPMAXMINF))] 3347 "TARGET_NEON" 3348 "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 3349 [(set_attr "type" "neon_fp_reduc_minmax_s<q>")] 3350) 3351 3352(define_insn "neon_vrecps<mode>" 3353 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 3354 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") 3355 (match_operand:VCVTF 2 "s_register_operand" "w")] 3356 UNSPEC_VRECPS))] 3357 "TARGET_NEON" 3358 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 3359 [(set_attr "type" "neon_fp_recps_s<q>")] 3360) 3361 3362(define_insn "neon_vrecps<mode>" 3363 [(set 3364 (match_operand:VH 0 "s_register_operand" "=w") 3365 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w") 3366 (match_operand:VH 2 "s_register_operand" "w")] 3367 UNSPEC_VRECPS))] 3368 "TARGET_NEON_FP16INST" 3369 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 3370 [(set_attr "type" "neon_fp_recps_s<q>")] 3371) 3372 3373(define_insn "neon_vrsqrts<mode>" 3374 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 3375 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") 3376 (match_operand:VCVTF 2 "s_register_operand" "w")] 3377 UNSPEC_VRSQRTS))] 3378 "TARGET_NEON" 3379 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 3380 [(set_attr "type" "neon_fp_rsqrts_s<q>")] 3381) 3382 3383(define_insn "neon_vrsqrts<mode>" 3384 [(set 3385 (match_operand:VH 0 "s_register_operand" "=w") 3386 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w") 3387 (match_operand:VH 2 "s_register_operand" "w")] 3388 UNSPEC_VRSQRTS))] 3389 "TARGET_NEON_FP16INST" 3390 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 3391 [(set_attr "type" "neon_fp_rsqrts_s<q>")] 3392) 3393 3394(define_expand "neon_vabs<mode>" 3395 [(match_operand:VDQW 0 "s_register_operand" "") 3396 (match_operand:VDQW 1 "s_register_operand" "")] 3397 "TARGET_NEON" 3398{ 3399 emit_insn (gen_abs<mode>2 (operands[0], operands[1])); 3400 DONE; 3401}) 3402 3403(define_insn "neon_vqabs<mode>" 3404 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 3405 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")] 3406 UNSPEC_VQABS))] 3407 "TARGET_NEON" 3408 "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1" 3409 [(set_attr "type" "neon_qabs<q>")] 3410) 3411 3412(define_insn "neon_bswap<mode>" 3413 [(set (match_operand:VDQHSD 0 "register_operand" "=w") 3414 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))] 3415 "TARGET_NEON" 3416 "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1" 3417 [(set_attr "type" "neon_rev<q>")] 3418) 3419 3420(define_expand "neon_vneg<mode>" 3421 [(match_operand:VDQW 0 "s_register_operand" "") 3422 (match_operand:VDQW 1 "s_register_operand" "")] 3423 "TARGET_NEON" 3424{ 3425 emit_insn (gen_neg<mode>2 (operands[0], operands[1])); 3426 DONE; 3427}) 3428 3429;; These instructions map to the __builtins for the Dot Product operations. 3430(define_insn "neon_<sup>dot<vsi2qi>" 3431 [(set (match_operand:VCVTI 0 "register_operand" "=w") 3432 (plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0") 3433 (unspec:VCVTI [(match_operand:<VSI2QI> 2 3434 "register_operand" "w") 3435 (match_operand:<VSI2QI> 3 3436 "register_operand" "w")] 3437 DOTPROD)))] 3438 "TARGET_DOTPROD" 3439 "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %<V_reg>3" 3440 [(set_attr "type" "neon_dot")] 3441) 3442 3443;; These instructions map to the __builtins for the Dot Product 3444;; indexed operations. 3445(define_insn "neon_<sup>dot_lane<vsi2qi>" 3446 [(set (match_operand:VCVTI 0 "register_operand" "=w") 3447 (plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0") 3448 (unspec:VCVTI [(match_operand:<VSI2QI> 2 3449 "register_operand" "w") 3450 (match_operand:V8QI 3 "register_operand" "t") 3451 (match_operand:SI 4 "immediate_operand" "i")] 3452 DOTPROD)))] 3453 "TARGET_DOTPROD" 3454 { 3455 operands[4] 3456 = GEN_INT (NEON_ENDIAN_LANE_N (V8QImode, INTVAL (operands[4]))); 3457 return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]"; 3458 } 3459 [(set_attr "type" "neon_dot")] 3460) 3461 3462;; These expands map to the Dot Product optab the vectorizer checks for. 3463;; The auto-vectorizer expects a dot product builtin that also does an 3464;; accumulation into the provided register. 3465;; Given the following pattern 3466;; 3467;; for (i=0; i<len; i++) { 3468;; c = a[i] * b[i]; 3469;; r += c; 3470;; } 3471;; return result; 3472;; 3473;; This can be auto-vectorized to 3474;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3]; 3475;; 3476;; given enough iterations. However the vectorizer can keep unrolling the loop 3477;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7]; 3478;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11]; 3479;; ... 3480;; 3481;; and so the vectorizer provides r, in which the result has to be accumulated. 3482(define_expand "<sup>dot_prod<vsi2qi>" 3483 [(set (match_operand:VCVTI 0 "register_operand") 3484 (plus:VCVTI (unspec:VCVTI [(match_operand:<VSI2QI> 1 3485 "register_operand") 3486 (match_operand:<VSI2QI> 2 3487 "register_operand")] 3488 DOTPROD) 3489 (match_operand:VCVTI 3 "register_operand")))] 3490 "TARGET_DOTPROD" 3491{ 3492 emit_insn ( 3493 gen_neon_<sup>dot<vsi2qi> (operands[3], operands[3], operands[1], 3494 operands[2])); 3495 emit_insn (gen_rtx_SET (operands[0], operands[3])); 3496 DONE; 3497}) 3498 3499(define_expand "neon_copysignf<mode>" 3500 [(match_operand:VCVTF 0 "register_operand") 3501 (match_operand:VCVTF 1 "register_operand") 3502 (match_operand:VCVTF 2 "register_operand")] 3503 "TARGET_NEON" 3504 "{ 3505 rtx v_bitmask_cast; 3506 rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode); 3507 rtx c = GEN_INT (0x80000000); 3508 3509 emit_move_insn (v_bitmask, 3510 gen_const_vec_duplicate (<VCVTF:V_cmp_result>mode, c)); 3511 emit_move_insn (operands[0], operands[2]); 3512 v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask, 3513 <VCVTF:V_cmp_result>mode, 0); 3514 emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0], 3515 operands[1])); 3516 3517 DONE; 3518 }" 3519) 3520 3521(define_insn "neon_vqneg<mode>" 3522 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 3523 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")] 3524 UNSPEC_VQNEG))] 3525 "TARGET_NEON" 3526 "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1" 3527 [(set_attr "type" "neon_qneg<q>")] 3528) 3529 3530(define_insn "neon_vcls<mode>" 3531 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 3532 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")] 3533 UNSPEC_VCLS))] 3534 "TARGET_NEON" 3535 "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1" 3536 [(set_attr "type" "neon_cls<q>")] 3537) 3538 3539(define_insn "clz<mode>2" 3540 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 3541 (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))] 3542 "TARGET_NEON" 3543 "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1" 3544 [(set_attr "type" "neon_cnt<q>")] 3545) 3546 3547(define_expand "neon_vclz<mode>" 3548 [(match_operand:VDQIW 0 "s_register_operand" "") 3549 (match_operand:VDQIW 1 "s_register_operand" "")] 3550 "TARGET_NEON" 3551{ 3552 emit_insn (gen_clz<mode>2 (operands[0], operands[1])); 3553 DONE; 3554}) 3555 3556(define_insn "popcount<mode>2" 3557 [(set (match_operand:VE 0 "s_register_operand" "=w") 3558 (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))] 3559 "TARGET_NEON" 3560 "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1" 3561 [(set_attr "type" "neon_cnt<q>")] 3562) 3563 3564(define_expand "neon_vcnt<mode>" 3565 [(match_operand:VE 0 "s_register_operand" "=w") 3566 (match_operand:VE 1 "s_register_operand" "w")] 3567 "TARGET_NEON" 3568{ 3569 emit_insn (gen_popcount<mode>2 (operands[0], operands[1])); 3570 DONE; 3571}) 3572 3573(define_insn "neon_vrecpe<mode>" 3574 [(set (match_operand:VH 0 "s_register_operand" "=w") 3575 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")] 3576 UNSPEC_VRECPE))] 3577 "TARGET_NEON_FP16INST" 3578 "vrecpe.f16\t%<V_reg>0, %<V_reg>1" 3579 [(set_attr "type" "neon_fp_recpe_s<q>")] 3580) 3581 3582(define_insn "neon_vrecpe<mode>" 3583 [(set (match_operand:V32 0 "s_register_operand" "=w") 3584 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")] 3585 UNSPEC_VRECPE))] 3586 "TARGET_NEON" 3587 "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1" 3588 [(set_attr "type" "neon_fp_recpe_s<q>")] 3589) 3590 3591(define_insn "neon_vrsqrte<mode>" 3592 [(set (match_operand:V32 0 "s_register_operand" "=w") 3593 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")] 3594 UNSPEC_VRSQRTE))] 3595 "TARGET_NEON" 3596 "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1" 3597 [(set_attr "type" "neon_fp_rsqrte_s<q>")] 3598) 3599 3600(define_expand "neon_vmvn<mode>" 3601 [(match_operand:VDQIW 0 "s_register_operand" "") 3602 (match_operand:VDQIW 1 "s_register_operand" "")] 3603 "TARGET_NEON" 3604{ 3605 emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1])); 3606 DONE; 3607}) 3608 3609(define_insn "neon_vget_lane<mode>_sext_internal" 3610 [(set (match_operand:SI 0 "s_register_operand" "=r") 3611 (sign_extend:SI 3612 (vec_select:<V_elem> 3613 (match_operand:VD 1 "s_register_operand" "w") 3614 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] 3615 "TARGET_NEON" 3616{ 3617 if (BYTES_BIG_ENDIAN) 3618 { 3619 int elt = INTVAL (operands[2]); 3620 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt; 3621 operands[2] = GEN_INT (elt); 3622 } 3623 return "vmov.s<V_sz_elem>\t%0, %P1[%c2]"; 3624} 3625 [(set_attr "type" "neon_to_gp")] 3626) 3627 3628(define_insn "neon_vget_lane<mode>_zext_internal" 3629 [(set (match_operand:SI 0 "s_register_operand" "=r") 3630 (zero_extend:SI 3631 (vec_select:<V_elem> 3632 (match_operand:VD 1 "s_register_operand" "w") 3633 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] 3634 "TARGET_NEON" 3635{ 3636 if (BYTES_BIG_ENDIAN) 3637 { 3638 int elt = INTVAL (operands[2]); 3639 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt; 3640 operands[2] = GEN_INT (elt); 3641 } 3642 return "vmov.u<V_sz_elem>\t%0, %P1[%c2]"; 3643} 3644 [(set_attr "type" "neon_to_gp")] 3645) 3646 3647(define_insn "neon_vget_lane<mode>_sext_internal" 3648 [(set (match_operand:SI 0 "s_register_operand" "=r") 3649 (sign_extend:SI 3650 (vec_select:<V_elem> 3651 (match_operand:VQ2 1 "s_register_operand" "w") 3652 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] 3653 "TARGET_NEON" 3654{ 3655 rtx ops[3]; 3656 int regno = REGNO (operands[1]); 3657 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2; 3658 unsigned int elt = INTVAL (operands[2]); 3659 unsigned int elt_adj = elt % halfelts; 3660 3661 if (BYTES_BIG_ENDIAN) 3662 elt_adj = halfelts - 1 - elt_adj; 3663 3664 ops[0] = operands[0]; 3665 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts)); 3666 ops[2] = GEN_INT (elt_adj); 3667 output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops); 3668 3669 return ""; 3670} 3671 [(set_attr "type" "neon_to_gp_q")] 3672) 3673 3674(define_insn "neon_vget_lane<mode>_zext_internal" 3675 [(set (match_operand:SI 0 "s_register_operand" "=r") 3676 (zero_extend:SI 3677 (vec_select:<V_elem> 3678 (match_operand:VQ2 1 "s_register_operand" "w") 3679 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] 3680 "TARGET_NEON" 3681{ 3682 rtx ops[3]; 3683 int regno = REGNO (operands[1]); 3684 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2; 3685 unsigned int elt = INTVAL (operands[2]); 3686 unsigned int elt_adj = elt % halfelts; 3687 3688 if (BYTES_BIG_ENDIAN) 3689 elt_adj = halfelts - 1 - elt_adj; 3690 3691 ops[0] = operands[0]; 3692 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts)); 3693 ops[2] = GEN_INT (elt_adj); 3694 output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops); 3695 3696 return ""; 3697} 3698 [(set_attr "type" "neon_to_gp_q")] 3699) 3700 3701(define_expand "neon_vget_lane<mode>" 3702 [(match_operand:<V_ext> 0 "s_register_operand" "") 3703 (match_operand:VDQW 1 "s_register_operand" "") 3704 (match_operand:SI 2 "immediate_operand" "")] 3705 "TARGET_NEON" 3706{ 3707 if (BYTES_BIG_ENDIAN) 3708 { 3709 /* The intrinsics are defined in terms of a model where the 3710 element ordering in memory is vldm order, whereas the generic 3711 RTL is defined in terms of a model where the element ordering 3712 in memory is array order. Convert the lane number to conform 3713 to this model. */ 3714 unsigned int elt = INTVAL (operands[2]); 3715 unsigned int reg_nelts 3716 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode); 3717 elt ^= reg_nelts - 1; 3718 operands[2] = GEN_INT (elt); 3719 } 3720 3721 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32) 3722 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1], 3723 operands[2])); 3724 else 3725 emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0], 3726 operands[1], 3727 operands[2])); 3728 DONE; 3729}) 3730 3731(define_expand "neon_vget_laneu<mode>" 3732 [(match_operand:<V_ext> 0 "s_register_operand" "") 3733 (match_operand:VDQIW 1 "s_register_operand" "") 3734 (match_operand:SI 2 "immediate_operand" "")] 3735 "TARGET_NEON" 3736{ 3737 if (BYTES_BIG_ENDIAN) 3738 { 3739 /* The intrinsics are defined in terms of a model where the 3740 element ordering in memory is vldm order, whereas the generic 3741 RTL is defined in terms of a model where the element ordering 3742 in memory is array order. Convert the lane number to conform 3743 to this model. */ 3744 unsigned int elt = INTVAL (operands[2]); 3745 unsigned int reg_nelts 3746 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode); 3747 elt ^= reg_nelts - 1; 3748 operands[2] = GEN_INT (elt); 3749 } 3750 3751 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32) 3752 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1], 3753 operands[2])); 3754 else 3755 emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0], 3756 operands[1], 3757 operands[2])); 3758 DONE; 3759}) 3760 3761(define_expand "neon_vget_lanedi" 3762 [(match_operand:DI 0 "s_register_operand" "=r") 3763 (match_operand:DI 1 "s_register_operand" "w") 3764 (match_operand:SI 2 "immediate_operand" "")] 3765 "TARGET_NEON" 3766{ 3767 emit_move_insn (operands[0], operands[1]); 3768 DONE; 3769}) 3770 3771(define_expand "neon_vget_lanev2di" 3772 [(match_operand:DI 0 "s_register_operand" "") 3773 (match_operand:V2DI 1 "s_register_operand" "") 3774 (match_operand:SI 2 "immediate_operand" "")] 3775 "TARGET_NEON" 3776{ 3777 int lane; 3778 3779if (BYTES_BIG_ENDIAN) 3780 { 3781 /* The intrinsics are defined in terms of a model where the 3782 element ordering in memory is vldm order, whereas the generic 3783 RTL is defined in terms of a model where the element ordering 3784 in memory is array order. Convert the lane number to conform 3785 to this model. */ 3786 unsigned int elt = INTVAL (operands[2]); 3787 unsigned int reg_nelts = 2; 3788 elt ^= reg_nelts - 1; 3789 operands[2] = GEN_INT (elt); 3790 } 3791 3792 lane = INTVAL (operands[2]); 3793 gcc_assert ((lane ==0) || (lane == 1)); 3794 emit_move_insn (operands[0], lane == 0 3795 ? gen_lowpart (DImode, operands[1]) 3796 : gen_highpart (DImode, operands[1])); 3797 DONE; 3798}) 3799 3800(define_expand "neon_vset_lane<mode>" 3801 [(match_operand:VDQ 0 "s_register_operand" "=w") 3802 (match_operand:<V_elem> 1 "s_register_operand" "r") 3803 (match_operand:VDQ 2 "s_register_operand" "0") 3804 (match_operand:SI 3 "immediate_operand" "i")] 3805 "TARGET_NEON" 3806{ 3807 unsigned int elt = INTVAL (operands[3]); 3808 3809 if (BYTES_BIG_ENDIAN) 3810 { 3811 unsigned int reg_nelts 3812 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode); 3813 elt ^= reg_nelts - 1; 3814 } 3815 3816 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1], 3817 GEN_INT (1 << elt), operands[2])); 3818 DONE; 3819}) 3820 3821; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored. 3822 3823(define_expand "neon_vset_lanedi" 3824 [(match_operand:DI 0 "s_register_operand" "=w") 3825 (match_operand:DI 1 "s_register_operand" "r") 3826 (match_operand:DI 2 "s_register_operand" "0") 3827 (match_operand:SI 3 "immediate_operand" "i")] 3828 "TARGET_NEON" 3829{ 3830 emit_move_insn (operands[0], operands[1]); 3831 DONE; 3832}) 3833 3834(define_expand "neon_vcreate<mode>" 3835 [(match_operand:VD_RE 0 "s_register_operand" "") 3836 (match_operand:DI 1 "general_operand" "")] 3837 "TARGET_NEON" 3838{ 3839 rtx src = gen_lowpart (<MODE>mode, operands[1]); 3840 emit_move_insn (operands[0], src); 3841 DONE; 3842}) 3843 3844(define_insn "neon_vdup_n<mode>" 3845 [(set (match_operand:VX 0 "s_register_operand" "=w") 3846 (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))] 3847 "TARGET_NEON" 3848 "vdup.<V_sz_elem>\t%<V_reg>0, %1" 3849 [(set_attr "type" "neon_from_gp<q>")] 3850) 3851 3852(define_insn "neon_vdup_nv4hf" 3853 [(set (match_operand:V4HF 0 "s_register_operand" "=w") 3854 (vec_duplicate:V4HF (match_operand:HF 1 "s_register_operand" "r")))] 3855 "TARGET_NEON" 3856 "vdup.16\t%P0, %1" 3857 [(set_attr "type" "neon_from_gp")] 3858) 3859 3860(define_insn "neon_vdup_nv8hf" 3861 [(set (match_operand:V8HF 0 "s_register_operand" "=w") 3862 (vec_duplicate:V8HF (match_operand:HF 1 "s_register_operand" "r")))] 3863 "TARGET_NEON" 3864 "vdup.16\t%q0, %1" 3865 [(set_attr "type" "neon_from_gp_q")] 3866) 3867 3868(define_insn "neon_vdup_n<mode>" 3869 [(set (match_operand:V32 0 "s_register_operand" "=w,w") 3870 (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))] 3871 "TARGET_NEON" 3872 "@ 3873 vdup.<V_sz_elem>\t%<V_reg>0, %1 3874 vdup.<V_sz_elem>\t%<V_reg>0, %y1" 3875 [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")] 3876) 3877 3878(define_expand "neon_vdup_ndi" 3879 [(match_operand:DI 0 "s_register_operand" "=w") 3880 (match_operand:DI 1 "s_register_operand" "r")] 3881 "TARGET_NEON" 3882{ 3883 emit_move_insn (operands[0], operands[1]); 3884 DONE; 3885} 3886) 3887 3888(define_insn "neon_vdup_nv2di" 3889 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w") 3890 (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))] 3891 "TARGET_NEON" 3892 "@ 3893 vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1 3894 vmov\t%e0, %P1\;vmov\t%f0, %P1" 3895 [(set_attr "length" "8") 3896 (set_attr "type" "multiple")] 3897) 3898 3899(define_insn "neon_vdup_lane<mode>_internal" 3900 [(set (match_operand:VDQW 0 "s_register_operand" "=w") 3901 (vec_duplicate:VDQW 3902 (vec_select:<V_elem> 3903 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w") 3904 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] 3905 "TARGET_NEON" 3906{ 3907 if (BYTES_BIG_ENDIAN) 3908 { 3909 int elt = INTVAL (operands[2]); 3910 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt; 3911 operands[2] = GEN_INT (elt); 3912 } 3913 if (<Is_d_reg>) 3914 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]"; 3915 else 3916 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]"; 3917} 3918 [(set_attr "type" "neon_dup<q>")] 3919) 3920 3921(define_insn "neon_vdup_lane<mode>_internal" 3922 [(set (match_operand:VH 0 "s_register_operand" "=w") 3923 (vec_duplicate:VH 3924 (vec_select:<V_elem> 3925 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w") 3926 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] 3927 "TARGET_NEON && TARGET_FP16" 3928{ 3929 if (BYTES_BIG_ENDIAN) 3930 { 3931 int elt = INTVAL (operands[2]); 3932 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt; 3933 operands[2] = GEN_INT (elt); 3934 } 3935 if (<Is_d_reg>) 3936 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]"; 3937 else 3938 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]"; 3939} 3940 [(set_attr "type" "neon_dup<q>")] 3941) 3942 3943(define_expand "neon_vdup_lane<mode>" 3944 [(match_operand:VDQW 0 "s_register_operand" "=w") 3945 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w") 3946 (match_operand:SI 2 "immediate_operand" "i")] 3947 "TARGET_NEON" 3948{ 3949 if (BYTES_BIG_ENDIAN) 3950 { 3951 unsigned int elt = INTVAL (operands[2]); 3952 unsigned int reg_nelts 3953 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode); 3954 elt ^= reg_nelts - 1; 3955 operands[2] = GEN_INT (elt); 3956 } 3957 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1], 3958 operands[2])); 3959 DONE; 3960}) 3961 3962(define_expand "neon_vdup_lane<mode>" 3963 [(match_operand:VH 0 "s_register_operand") 3964 (match_operand:<V_double_vector_mode> 1 "s_register_operand") 3965 (match_operand:SI 2 "immediate_operand")] 3966 "TARGET_NEON && TARGET_FP16" 3967{ 3968 if (BYTES_BIG_ENDIAN) 3969 { 3970 unsigned int elt = INTVAL (operands[2]); 3971 unsigned int reg_nelts 3972 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode); 3973 elt ^= reg_nelts - 1; 3974 operands[2] = GEN_INT (elt); 3975 } 3976 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1], 3977 operands[2])); 3978 DONE; 3979}) 3980 3981; Scalar index is ignored, since only zero is valid here. 3982(define_expand "neon_vdup_lanedi" 3983 [(match_operand:DI 0 "s_register_operand" "=w") 3984 (match_operand:DI 1 "s_register_operand" "w") 3985 (match_operand:SI 2 "immediate_operand" "i")] 3986 "TARGET_NEON" 3987{ 3988 emit_move_insn (operands[0], operands[1]); 3989 DONE; 3990}) 3991 3992; Likewise for v2di, as the DImode second operand has only a single element. 3993(define_expand "neon_vdup_lanev2di" 3994 [(match_operand:V2DI 0 "s_register_operand" "=w") 3995 (match_operand:DI 1 "s_register_operand" "w") 3996 (match_operand:SI 2 "immediate_operand" "i")] 3997 "TARGET_NEON" 3998{ 3999 emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1])); 4000 DONE; 4001}) 4002 4003; Disabled before reload because we don't want combine doing something silly, 4004; but used by the post-reload expansion of neon_vcombine. 4005(define_insn "*neon_vswp<mode>" 4006 [(set (match_operand:VDQX 0 "s_register_operand" "+w") 4007 (match_operand:VDQX 1 "s_register_operand" "+w")) 4008 (set (match_dup 1) (match_dup 0))] 4009 "TARGET_NEON && reload_completed" 4010 "vswp\t%<V_reg>0, %<V_reg>1" 4011 [(set_attr "type" "neon_permute<q>")] 4012) 4013 4014;; In this insn, operand 1 should be low, and operand 2 the high part of the 4015;; dest vector. 4016;; FIXME: A different implementation of this builtin could make it much 4017;; more likely that we wouldn't actually need to output anything (we could make 4018;; it so that the reg allocator puts things in the right places magically 4019;; instead). Lack of subregs for vectors makes that tricky though, I think. 4020 4021(define_insn_and_split "neon_vcombine<mode>" 4022 [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w") 4023 (vec_concat:<V_DOUBLE> 4024 (match_operand:VDX 1 "s_register_operand" "w") 4025 (match_operand:VDX 2 "s_register_operand" "w")))] 4026 "TARGET_NEON" 4027 "#" 4028 "&& reload_completed" 4029 [(const_int 0)] 4030{ 4031 neon_split_vcombine (operands); 4032 DONE; 4033} 4034[(set_attr "type" "multiple")] 4035) 4036 4037(define_expand "neon_vget_high<mode>" 4038 [(match_operand:<V_HALF> 0 "s_register_operand") 4039 (match_operand:VQX 1 "s_register_operand")] 4040 "TARGET_NEON" 4041{ 4042 emit_move_insn (operands[0], 4043 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 4044 GET_MODE_SIZE (<V_HALF>mode))); 4045 DONE; 4046}) 4047 4048(define_expand "neon_vget_low<mode>" 4049 [(match_operand:<V_HALF> 0 "s_register_operand") 4050 (match_operand:VQX 1 "s_register_operand")] 4051 "TARGET_NEON" 4052{ 4053 emit_move_insn (operands[0], 4054 simplify_gen_subreg (<V_HALF>mode, operands[1], 4055 <MODE>mode, 0)); 4056 DONE; 4057}) 4058 4059(define_insn "float<mode><V_cvtto>2" 4060 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") 4061 (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))] 4062 "TARGET_NEON && !flag_rounding_math" 4063 "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1" 4064 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")] 4065) 4066 4067(define_insn "floatuns<mode><V_cvtto>2" 4068 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") 4069 (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))] 4070 "TARGET_NEON && !flag_rounding_math" 4071 "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1" 4072 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")] 4073) 4074 4075(define_insn "fix_trunc<mode><V_cvtto>2" 4076 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") 4077 (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))] 4078 "TARGET_NEON" 4079 "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1" 4080 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")] 4081) 4082 4083(define_insn "fixuns_trunc<mode><V_cvtto>2" 4084 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") 4085 (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))] 4086 "TARGET_NEON" 4087 "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1" 4088 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")] 4089) 4090 4091(define_insn "neon_vcvt<sup><mode>" 4092 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") 4093 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")] 4094 VCVT_US))] 4095 "TARGET_NEON" 4096 "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1" 4097 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")] 4098) 4099 4100(define_insn "neon_vcvt<sup><mode>" 4101 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") 4102 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")] 4103 VCVT_US))] 4104 "TARGET_NEON" 4105 "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1" 4106 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")] 4107) 4108 4109(define_insn "neon_vcvtv4sfv4hf" 4110 [(set (match_operand:V4SF 0 "s_register_operand" "=w") 4111 (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")] 4112 UNSPEC_VCVT))] 4113 "TARGET_NEON && TARGET_FP16" 4114 "vcvt.f32.f16\t%q0, %P1" 4115 [(set_attr "type" "neon_fp_cvt_widen_h")] 4116) 4117 4118(define_insn "neon_vcvtv4hfv4sf" 4119 [(set (match_operand:V4HF 0 "s_register_operand" "=w") 4120 (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")] 4121 UNSPEC_VCVT))] 4122 "TARGET_NEON && TARGET_FP16" 4123 "vcvt.f16.f32\t%P0, %q1" 4124 [(set_attr "type" "neon_fp_cvt_narrow_s_q")] 4125) 4126 4127(define_insn "neon_vcvt<sup><mode>" 4128 [(set 4129 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w") 4130 (unspec:<VH_CVTTO> 4131 [(match_operand:VCVTHI 1 "s_register_operand" "w")] 4132 VCVT_US))] 4133 "TARGET_NEON_FP16INST" 4134 "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1" 4135 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")] 4136) 4137 4138(define_insn "neon_vcvt<sup><mode>" 4139 [(set 4140 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w") 4141 (unspec:<VH_CVTTO> 4142 [(match_operand:VH 1 "s_register_operand" "w")] 4143 VCVT_US))] 4144 "TARGET_NEON_FP16INST" 4145 "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1" 4146 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")] 4147) 4148 4149(define_insn "neon_vcvt<sup>_n<mode>" 4150 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") 4151 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w") 4152 (match_operand:SI 2 "immediate_operand" "i")] 4153 VCVT_US_N))] 4154 "TARGET_NEON" 4155{ 4156 arm_const_bounds (operands[2], 1, 33); 4157 return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2"; 4158} 4159 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")] 4160) 4161 4162(define_insn "neon_vcvt<sup>_n<mode>" 4163 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w") 4164 (unspec:<VH_CVTTO> 4165 [(match_operand:VH 1 "s_register_operand" "w") 4166 (match_operand:SI 2 "immediate_operand" "i")] 4167 VCVT_US_N))] 4168 "TARGET_NEON_FP16INST" 4169{ 4170 arm_const_bounds (operands[2], 0, 17); 4171 return "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1, %2"; 4172} 4173 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")] 4174) 4175 4176(define_insn "neon_vcvt<sup>_n<mode>" 4177 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") 4178 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w") 4179 (match_operand:SI 2 "immediate_operand" "i")] 4180 VCVT_US_N))] 4181 "TARGET_NEON" 4182{ 4183 arm_const_bounds (operands[2], 1, 33); 4184 return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2"; 4185} 4186 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")] 4187) 4188 4189(define_insn "neon_vcvt<sup>_n<mode>" 4190 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w") 4191 (unspec:<VH_CVTTO> 4192 [(match_operand:VCVTHI 1 "s_register_operand" "w") 4193 (match_operand:SI 2 "immediate_operand" "i")] 4194 VCVT_US_N))] 4195 "TARGET_NEON_FP16INST" 4196{ 4197 arm_const_bounds (operands[2], 0, 17); 4198 return "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1, %2"; 4199} 4200 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")] 4201) 4202 4203(define_insn "neon_vcvt<vcvth_op><sup><mode>" 4204 [(set 4205 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w") 4206 (unspec:<VH_CVTTO> 4207 [(match_operand:VH 1 "s_register_operand" "w")] 4208 VCVT_HF_US))] 4209 "TARGET_NEON_FP16INST" 4210 "vcvt<vcvth_op>.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1" 4211 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")] 4212) 4213 4214(define_insn "neon_vmovn<mode>" 4215 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") 4216 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")] 4217 UNSPEC_VMOVN))] 4218 "TARGET_NEON" 4219 "vmovn.<V_if_elem>\t%P0, %q1" 4220 [(set_attr "type" "neon_shift_imm_narrow_q")] 4221) 4222 4223(define_insn "neon_vqmovn<sup><mode>" 4224 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") 4225 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")] 4226 VQMOVN))] 4227 "TARGET_NEON" 4228 "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1" 4229 [(set_attr "type" "neon_sat_shift_imm_narrow_q")] 4230) 4231 4232(define_insn "neon_vqmovun<mode>" 4233 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") 4234 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")] 4235 UNSPEC_VQMOVUN))] 4236 "TARGET_NEON" 4237 "vqmovun.<V_s_elem>\t%P0, %q1" 4238 [(set_attr "type" "neon_sat_shift_imm_narrow_q")] 4239) 4240 4241(define_insn "neon_vmovl<sup><mode>" 4242 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 4243 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")] 4244 VMOVL))] 4245 "TARGET_NEON" 4246 "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1" 4247 [(set_attr "type" "neon_shift_imm_long")] 4248) 4249 4250(define_insn "neon_vmul_lane<mode>" 4251 [(set (match_operand:VMD 0 "s_register_operand" "=w") 4252 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w") 4253 (match_operand:VMD 2 "s_register_operand" 4254 "<scalar_mul_constraint>") 4255 (match_operand:SI 3 "immediate_operand" "i")] 4256 UNSPEC_VMUL_LANE))] 4257 "TARGET_NEON" 4258{ 4259 return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]"; 4260} 4261 [(set (attr "type") 4262 (if_then_else (match_test "<Is_float_mode>") 4263 (const_string "neon_fp_mul_s_scalar<q>") 4264 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))] 4265) 4266 4267(define_insn "neon_vmul_lane<mode>" 4268 [(set (match_operand:VMQ 0 "s_register_operand" "=w") 4269 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w") 4270 (match_operand:<V_HALF> 2 "s_register_operand" 4271 "<scalar_mul_constraint>") 4272 (match_operand:SI 3 "immediate_operand" "i")] 4273 UNSPEC_VMUL_LANE))] 4274 "TARGET_NEON" 4275{ 4276 return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]"; 4277} 4278 [(set (attr "type") 4279 (if_then_else (match_test "<Is_float_mode>") 4280 (const_string "neon_fp_mul_s_scalar<q>") 4281 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))] 4282) 4283 4284(define_insn "neon_vmul_lane<mode>" 4285 [(set (match_operand:VH 0 "s_register_operand" "=w") 4286 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w") 4287 (match_operand:V4HF 2 "s_register_operand" 4288 "<scalar_mul_constraint>") 4289 (match_operand:SI 3 "immediate_operand" "i")] 4290 UNSPEC_VMUL_LANE))] 4291 "TARGET_NEON_FP16INST" 4292 "vmul.f16\t%<V_reg>0, %<V_reg>1, %P2[%c3]" 4293 [(set_attr "type" "neon_fp_mul_s_scalar<q>")] 4294) 4295 4296(define_insn "neon_vmull<sup>_lane<mode>" 4297 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 4298 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w") 4299 (match_operand:VMDI 2 "s_register_operand" 4300 "<scalar_mul_constraint>") 4301 (match_operand:SI 3 "immediate_operand" "i")] 4302 VMULL_LANE))] 4303 "TARGET_NEON" 4304{ 4305 return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]"; 4306} 4307 [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")] 4308) 4309 4310(define_insn "neon_vqdmull_lane<mode>" 4311 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 4312 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w") 4313 (match_operand:VMDI 2 "s_register_operand" 4314 "<scalar_mul_constraint>") 4315 (match_operand:SI 3 "immediate_operand" "i")] 4316 UNSPEC_VQDMULL_LANE))] 4317 "TARGET_NEON" 4318{ 4319 return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]"; 4320} 4321 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")] 4322) 4323 4324(define_insn "neon_vq<r>dmulh_lane<mode>" 4325 [(set (match_operand:VMQI 0 "s_register_operand" "=w") 4326 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w") 4327 (match_operand:<V_HALF> 2 "s_register_operand" 4328 "<scalar_mul_constraint>") 4329 (match_operand:SI 3 "immediate_operand" "i")] 4330 VQDMULH_LANE))] 4331 "TARGET_NEON" 4332{ 4333 return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]"; 4334} 4335 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")] 4336) 4337 4338(define_insn "neon_vq<r>dmulh_lane<mode>" 4339 [(set (match_operand:VMDI 0 "s_register_operand" "=w") 4340 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w") 4341 (match_operand:VMDI 2 "s_register_operand" 4342 "<scalar_mul_constraint>") 4343 (match_operand:SI 3 "immediate_operand" "i")] 4344 VQDMULH_LANE))] 4345 "TARGET_NEON" 4346{ 4347 return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]"; 4348} 4349 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")] 4350) 4351 4352;; vqrdmlah_lane, vqrdmlsh_lane 4353(define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>" 4354 [(set (match_operand:VMQI 0 "s_register_operand" "=w") 4355 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0") 4356 (match_operand:VMQI 2 "s_register_operand" "w") 4357 (match_operand:<V_HALF> 3 "s_register_operand" 4358 "<scalar_mul_constraint>") 4359 (match_operand:SI 4 "immediate_operand" "i")] 4360 VQRDMLH_AS))] 4361 "TARGET_NEON_RDMA" 4362{ 4363 return 4364 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]"; 4365} 4366 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")] 4367) 4368 4369(define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>" 4370 [(set (match_operand:VMDI 0 "s_register_operand" "=w") 4371 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0") 4372 (match_operand:VMDI 2 "s_register_operand" "w") 4373 (match_operand:VMDI 3 "s_register_operand" 4374 "<scalar_mul_constraint>") 4375 (match_operand:SI 4 "immediate_operand" "i")] 4376 VQRDMLH_AS))] 4377 "TARGET_NEON_RDMA" 4378{ 4379 return 4380 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]"; 4381} 4382 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")] 4383) 4384 4385(define_insn "neon_vmla_lane<mode>" 4386 [(set (match_operand:VMD 0 "s_register_operand" "=w") 4387 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0") 4388 (match_operand:VMD 2 "s_register_operand" "w") 4389 (match_operand:VMD 3 "s_register_operand" 4390 "<scalar_mul_constraint>") 4391 (match_operand:SI 4 "immediate_operand" "i")] 4392 UNSPEC_VMLA_LANE))] 4393 "TARGET_NEON" 4394{ 4395 return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]"; 4396} 4397 [(set (attr "type") 4398 (if_then_else (match_test "<Is_float_mode>") 4399 (const_string "neon_fp_mla_s_scalar<q>") 4400 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))] 4401) 4402 4403(define_insn "neon_vmla_lane<mode>" 4404 [(set (match_operand:VMQ 0 "s_register_operand" "=w") 4405 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0") 4406 (match_operand:VMQ 2 "s_register_operand" "w") 4407 (match_operand:<V_HALF> 3 "s_register_operand" 4408 "<scalar_mul_constraint>") 4409 (match_operand:SI 4 "immediate_operand" "i")] 4410 UNSPEC_VMLA_LANE))] 4411 "TARGET_NEON" 4412{ 4413 return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]"; 4414} 4415 [(set (attr "type") 4416 (if_then_else (match_test "<Is_float_mode>") 4417 (const_string "neon_fp_mla_s_scalar<q>") 4418 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))] 4419) 4420 4421(define_insn "neon_vmlal<sup>_lane<mode>" 4422 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 4423 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") 4424 (match_operand:VMDI 2 "s_register_operand" "w") 4425 (match_operand:VMDI 3 "s_register_operand" 4426 "<scalar_mul_constraint>") 4427 (match_operand:SI 4 "immediate_operand" "i")] 4428 VMLAL_LANE))] 4429 "TARGET_NEON" 4430{ 4431 return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]"; 4432} 4433 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")] 4434) 4435 4436(define_insn "neon_vqdmlal_lane<mode>" 4437 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 4438 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") 4439 (match_operand:VMDI 2 "s_register_operand" "w") 4440 (match_operand:VMDI 3 "s_register_operand" 4441 "<scalar_mul_constraint>") 4442 (match_operand:SI 4 "immediate_operand" "i")] 4443 UNSPEC_VQDMLAL_LANE))] 4444 "TARGET_NEON" 4445{ 4446 return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]"; 4447} 4448 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")] 4449) 4450 4451(define_insn "neon_vmls_lane<mode>" 4452 [(set (match_operand:VMD 0 "s_register_operand" "=w") 4453 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0") 4454 (match_operand:VMD 2 "s_register_operand" "w") 4455 (match_operand:VMD 3 "s_register_operand" 4456 "<scalar_mul_constraint>") 4457 (match_operand:SI 4 "immediate_operand" "i")] 4458 UNSPEC_VMLS_LANE))] 4459 "TARGET_NEON" 4460{ 4461 return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]"; 4462} 4463 [(set (attr "type") 4464 (if_then_else (match_test "<Is_float_mode>") 4465 (const_string "neon_fp_mla_s_scalar<q>") 4466 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))] 4467) 4468 4469(define_insn "neon_vmls_lane<mode>" 4470 [(set (match_operand:VMQ 0 "s_register_operand" "=w") 4471 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0") 4472 (match_operand:VMQ 2 "s_register_operand" "w") 4473 (match_operand:<V_HALF> 3 "s_register_operand" 4474 "<scalar_mul_constraint>") 4475 (match_operand:SI 4 "immediate_operand" "i")] 4476 UNSPEC_VMLS_LANE))] 4477 "TARGET_NEON" 4478{ 4479 return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]"; 4480} 4481 [(set (attr "type") 4482 (if_then_else (match_test "<Is_float_mode>") 4483 (const_string "neon_fp_mla_s_scalar<q>") 4484 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))] 4485) 4486 4487(define_insn "neon_vmlsl<sup>_lane<mode>" 4488 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 4489 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") 4490 (match_operand:VMDI 2 "s_register_operand" "w") 4491 (match_operand:VMDI 3 "s_register_operand" 4492 "<scalar_mul_constraint>") 4493 (match_operand:SI 4 "immediate_operand" "i")] 4494 VMLSL_LANE))] 4495 "TARGET_NEON" 4496{ 4497 return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]"; 4498} 4499 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")] 4500) 4501 4502(define_insn "neon_vqdmlsl_lane<mode>" 4503 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 4504 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") 4505 (match_operand:VMDI 2 "s_register_operand" "w") 4506 (match_operand:VMDI 3 "s_register_operand" 4507 "<scalar_mul_constraint>") 4508 (match_operand:SI 4 "immediate_operand" "i")] 4509 UNSPEC_VQDMLSL_LANE))] 4510 "TARGET_NEON" 4511{ 4512 return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]"; 4513} 4514 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")] 4515) 4516 4517; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a 4518; core register into a temp register, then use a scalar taken from that. This 4519; isn't an optimal solution if e.g. the scalar has just been read from memory 4520; or extracted from another vector. The latter case it's currently better to 4521; use the "_lane" variant, and the former case can probably be implemented 4522; using vld1_lane, but that hasn't been done yet. 4523 4524(define_expand "neon_vmul_n<mode>" 4525 [(match_operand:VMD 0 "s_register_operand" "") 4526 (match_operand:VMD 1 "s_register_operand" "") 4527 (match_operand:<V_elem> 2 "s_register_operand" "")] 4528 "TARGET_NEON" 4529{ 4530 rtx tmp = gen_reg_rtx (<MODE>mode); 4531 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx)); 4532 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp, 4533 const0_rtx)); 4534 DONE; 4535}) 4536 4537(define_expand "neon_vmul_n<mode>" 4538 [(match_operand:VMQ 0 "s_register_operand" "") 4539 (match_operand:VMQ 1 "s_register_operand" "") 4540 (match_operand:<V_elem> 2 "s_register_operand" "")] 4541 "TARGET_NEON" 4542{ 4543 rtx tmp = gen_reg_rtx (<V_HALF>mode); 4544 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx)); 4545 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp, 4546 const0_rtx)); 4547 DONE; 4548}) 4549 4550(define_expand "neon_vmul_n<mode>" 4551 [(match_operand:VH 0 "s_register_operand") 4552 (match_operand:VH 1 "s_register_operand") 4553 (match_operand:<V_elem> 2 "s_register_operand")] 4554 "TARGET_NEON_FP16INST" 4555{ 4556 rtx tmp = gen_reg_rtx (V4HFmode); 4557 emit_insn (gen_neon_vset_lanev4hf (tmp, operands[2], tmp, const0_rtx)); 4558 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp, 4559 const0_rtx)); 4560 DONE; 4561}) 4562 4563(define_expand "neon_vmulls_n<mode>" 4564 [(match_operand:<V_widen> 0 "s_register_operand" "") 4565 (match_operand:VMDI 1 "s_register_operand" "") 4566 (match_operand:<V_elem> 2 "s_register_operand" "")] 4567 "TARGET_NEON" 4568{ 4569 rtx tmp = gen_reg_rtx (<MODE>mode); 4570 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx)); 4571 emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp, 4572 const0_rtx)); 4573 DONE; 4574}) 4575 4576(define_expand "neon_vmullu_n<mode>" 4577 [(match_operand:<V_widen> 0 "s_register_operand" "") 4578 (match_operand:VMDI 1 "s_register_operand" "") 4579 (match_operand:<V_elem> 2 "s_register_operand" "")] 4580 "TARGET_NEON" 4581{ 4582 rtx tmp = gen_reg_rtx (<MODE>mode); 4583 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx)); 4584 emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp, 4585 const0_rtx)); 4586 DONE; 4587}) 4588 4589(define_expand "neon_vqdmull_n<mode>" 4590 [(match_operand:<V_widen> 0 "s_register_operand" "") 4591 (match_operand:VMDI 1 "s_register_operand" "") 4592 (match_operand:<V_elem> 2 "s_register_operand" "")] 4593 "TARGET_NEON" 4594{ 4595 rtx tmp = gen_reg_rtx (<MODE>mode); 4596 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx)); 4597 emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp, 4598 const0_rtx)); 4599 DONE; 4600}) 4601 4602(define_expand "neon_vqdmulh_n<mode>" 4603 [(match_operand:VMDI 0 "s_register_operand" "") 4604 (match_operand:VMDI 1 "s_register_operand" "") 4605 (match_operand:<V_elem> 2 "s_register_operand" "")] 4606 "TARGET_NEON" 4607{ 4608 rtx tmp = gen_reg_rtx (<MODE>mode); 4609 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx)); 4610 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp, 4611 const0_rtx)); 4612 DONE; 4613}) 4614 4615(define_expand "neon_vqrdmulh_n<mode>" 4616 [(match_operand:VMDI 0 "s_register_operand" "") 4617 (match_operand:VMDI 1 "s_register_operand" "") 4618 (match_operand:<V_elem> 2 "s_register_operand" "")] 4619 "TARGET_NEON" 4620{ 4621 rtx tmp = gen_reg_rtx (<MODE>mode); 4622 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx)); 4623 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp, 4624 const0_rtx)); 4625 DONE; 4626}) 4627 4628(define_expand "neon_vqdmulh_n<mode>" 4629 [(match_operand:VMQI 0 "s_register_operand" "") 4630 (match_operand:VMQI 1 "s_register_operand" "") 4631 (match_operand:<V_elem> 2 "s_register_operand" "")] 4632 "TARGET_NEON" 4633{ 4634 rtx tmp = gen_reg_rtx (<V_HALF>mode); 4635 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx)); 4636 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp, 4637 const0_rtx)); 4638 DONE; 4639}) 4640 4641(define_expand "neon_vqrdmulh_n<mode>" 4642 [(match_operand:VMQI 0 "s_register_operand" "") 4643 (match_operand:VMQI 1 "s_register_operand" "") 4644 (match_operand:<V_elem> 2 "s_register_operand" "")] 4645 "TARGET_NEON" 4646{ 4647 rtx tmp = gen_reg_rtx (<V_HALF>mode); 4648 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx)); 4649 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp, 4650 const0_rtx)); 4651 DONE; 4652}) 4653 4654(define_expand "neon_vmla_n<mode>" 4655 [(match_operand:VMD 0 "s_register_operand" "") 4656 (match_operand:VMD 1 "s_register_operand" "") 4657 (match_operand:VMD 2 "s_register_operand" "") 4658 (match_operand:<V_elem> 3 "s_register_operand" "")] 4659 "TARGET_NEON" 4660{ 4661 rtx tmp = gen_reg_rtx (<MODE>mode); 4662 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); 4663 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2], 4664 tmp, const0_rtx)); 4665 DONE; 4666}) 4667 4668(define_expand "neon_vmla_n<mode>" 4669 [(match_operand:VMQ 0 "s_register_operand" "") 4670 (match_operand:VMQ 1 "s_register_operand" "") 4671 (match_operand:VMQ 2 "s_register_operand" "") 4672 (match_operand:<V_elem> 3 "s_register_operand" "")] 4673 "TARGET_NEON" 4674{ 4675 rtx tmp = gen_reg_rtx (<V_HALF>mode); 4676 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx)); 4677 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2], 4678 tmp, const0_rtx)); 4679 DONE; 4680}) 4681 4682(define_expand "neon_vmlals_n<mode>" 4683 [(match_operand:<V_widen> 0 "s_register_operand" "") 4684 (match_operand:<V_widen> 1 "s_register_operand" "") 4685 (match_operand:VMDI 2 "s_register_operand" "") 4686 (match_operand:<V_elem> 3 "s_register_operand" "")] 4687 "TARGET_NEON" 4688{ 4689 rtx tmp = gen_reg_rtx (<MODE>mode); 4690 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); 4691 emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2], 4692 tmp, const0_rtx)); 4693 DONE; 4694}) 4695 4696(define_expand "neon_vmlalu_n<mode>" 4697 [(match_operand:<V_widen> 0 "s_register_operand" "") 4698 (match_operand:<V_widen> 1 "s_register_operand" "") 4699 (match_operand:VMDI 2 "s_register_operand" "") 4700 (match_operand:<V_elem> 3 "s_register_operand" "")] 4701 "TARGET_NEON" 4702{ 4703 rtx tmp = gen_reg_rtx (<MODE>mode); 4704 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); 4705 emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2], 4706 tmp, const0_rtx)); 4707 DONE; 4708}) 4709 4710(define_expand "neon_vqdmlal_n<mode>" 4711 [(match_operand:<V_widen> 0 "s_register_operand" "") 4712 (match_operand:<V_widen> 1 "s_register_operand" "") 4713 (match_operand:VMDI 2 "s_register_operand" "") 4714 (match_operand:<V_elem> 3 "s_register_operand" "")] 4715 "TARGET_NEON" 4716{ 4717 rtx tmp = gen_reg_rtx (<MODE>mode); 4718 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); 4719 emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2], 4720 tmp, const0_rtx)); 4721 DONE; 4722}) 4723 4724(define_expand "neon_vmls_n<mode>" 4725 [(match_operand:VMD 0 "s_register_operand" "") 4726 (match_operand:VMD 1 "s_register_operand" "") 4727 (match_operand:VMD 2 "s_register_operand" "") 4728 (match_operand:<V_elem> 3 "s_register_operand" "")] 4729 "TARGET_NEON" 4730{ 4731 rtx tmp = gen_reg_rtx (<MODE>mode); 4732 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); 4733 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2], 4734 tmp, const0_rtx)); 4735 DONE; 4736}) 4737 4738(define_expand "neon_vmls_n<mode>" 4739 [(match_operand:VMQ 0 "s_register_operand" "") 4740 (match_operand:VMQ 1 "s_register_operand" "") 4741 (match_operand:VMQ 2 "s_register_operand" "") 4742 (match_operand:<V_elem> 3 "s_register_operand" "")] 4743 "TARGET_NEON" 4744{ 4745 rtx tmp = gen_reg_rtx (<V_HALF>mode); 4746 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx)); 4747 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2], 4748 tmp, const0_rtx)); 4749 DONE; 4750}) 4751 4752(define_expand "neon_vmlsls_n<mode>" 4753 [(match_operand:<V_widen> 0 "s_register_operand" "") 4754 (match_operand:<V_widen> 1 "s_register_operand" "") 4755 (match_operand:VMDI 2 "s_register_operand" "") 4756 (match_operand:<V_elem> 3 "s_register_operand" "")] 4757 "TARGET_NEON" 4758{ 4759 rtx tmp = gen_reg_rtx (<MODE>mode); 4760 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); 4761 emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2], 4762 tmp, const0_rtx)); 4763 DONE; 4764}) 4765 4766(define_expand "neon_vmlslu_n<mode>" 4767 [(match_operand:<V_widen> 0 "s_register_operand" "") 4768 (match_operand:<V_widen> 1 "s_register_operand" "") 4769 (match_operand:VMDI 2 "s_register_operand" "") 4770 (match_operand:<V_elem> 3 "s_register_operand" "")] 4771 "TARGET_NEON" 4772{ 4773 rtx tmp = gen_reg_rtx (<MODE>mode); 4774 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); 4775 emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2], 4776 tmp, const0_rtx)); 4777 DONE; 4778}) 4779 4780(define_expand "neon_vqdmlsl_n<mode>" 4781 [(match_operand:<V_widen> 0 "s_register_operand" "") 4782 (match_operand:<V_widen> 1 "s_register_operand" "") 4783 (match_operand:VMDI 2 "s_register_operand" "") 4784 (match_operand:<V_elem> 3 "s_register_operand" "")] 4785 "TARGET_NEON" 4786{ 4787 rtx tmp = gen_reg_rtx (<MODE>mode); 4788 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); 4789 emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2], 4790 tmp, const0_rtx)); 4791 DONE; 4792}) 4793 4794(define_insn "neon_vext<mode>" 4795 [(set (match_operand:VDQX 0 "s_register_operand" "=w") 4796 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w") 4797 (match_operand:VDQX 2 "s_register_operand" "w") 4798 (match_operand:SI 3 "immediate_operand" "i")] 4799 UNSPEC_VEXT))] 4800 "TARGET_NEON" 4801{ 4802 arm_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode)); 4803 return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3"; 4804} 4805 [(set_attr "type" "neon_ext<q>")] 4806) 4807 4808(define_insn "neon_vrev64<mode>" 4809 [(set (match_operand:VDQ 0 "s_register_operand" "=w") 4810 (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")] 4811 UNSPEC_VREV64))] 4812 "TARGET_NEON" 4813 "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1" 4814 [(set_attr "type" "neon_rev<q>")] 4815) 4816 4817(define_insn "neon_vrev32<mode>" 4818 [(set (match_operand:VX 0 "s_register_operand" "=w") 4819 (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")] 4820 UNSPEC_VREV32))] 4821 "TARGET_NEON" 4822 "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1" 4823 [(set_attr "type" "neon_rev<q>")] 4824) 4825 4826(define_insn "neon_vrev16<mode>" 4827 [(set (match_operand:VE 0 "s_register_operand" "=w") 4828 (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")] 4829 UNSPEC_VREV16))] 4830 "TARGET_NEON" 4831 "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1" 4832 [(set_attr "type" "neon_rev<q>")] 4833) 4834 4835; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register 4836; allocation. For an intrinsic of form: 4837; rD = vbsl_* (rS, rN, rM) 4838; We can use any of: 4839; vbsl rS, rN, rM (if D = S) 4840; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM) 4841; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN) 4842 4843(define_insn "neon_vbsl<mode>_internal" 4844 [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w") 4845 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w") 4846 (match_operand:VDQX 2 "s_register_operand" " w,w,0") 4847 (match_operand:VDQX 3 "s_register_operand" " w,0,w")] 4848 UNSPEC_VBSL))] 4849 "TARGET_NEON" 4850 "@ 4851 vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3 4852 vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1 4853 vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1" 4854 [(set_attr "type" "neon_bsl<q>")] 4855) 4856 4857(define_expand "neon_vbsl<mode>" 4858 [(set (match_operand:VDQX 0 "s_register_operand" "") 4859 (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand" "") 4860 (match_operand:VDQX 2 "s_register_operand" "") 4861 (match_operand:VDQX 3 "s_register_operand" "")] 4862 UNSPEC_VBSL))] 4863 "TARGET_NEON" 4864{ 4865 /* We can't alias operands together if they have different modes. */ 4866 operands[1] = gen_lowpart (<MODE>mode, operands[1]); 4867}) 4868 4869;; vshl, vrshl 4870(define_insn "neon_v<shift_op><sup><mode>" 4871 [(set (match_operand:VDQIX 0 "s_register_operand" "=w") 4872 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") 4873 (match_operand:VDQIX 2 "s_register_operand" "w")] 4874 VSHL))] 4875 "TARGET_NEON" 4876 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 4877 [(set_attr "type" "neon_shift_imm<q>")] 4878) 4879 4880;; vqshl, vqrshl 4881(define_insn "neon_v<shift_op><sup><mode>" 4882 [(set (match_operand:VDQIX 0 "s_register_operand" "=w") 4883 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") 4884 (match_operand:VDQIX 2 "s_register_operand" "w")] 4885 VQSHL))] 4886 "TARGET_NEON" 4887 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 4888 [(set_attr "type" "neon_sat_shift_imm<q>")] 4889) 4890 4891;; vshr_n, vrshr_n 4892(define_insn "neon_v<shift_op><sup>_n<mode>" 4893 [(set (match_operand:VDQIX 0 "s_register_operand" "=w") 4894 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") 4895 (match_operand:SI 2 "immediate_operand" "i")] 4896 VSHR_N))] 4897 "TARGET_NEON" 4898{ 4899 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1); 4900 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2"; 4901} 4902 [(set_attr "type" "neon_shift_imm<q>")] 4903) 4904 4905;; vshrn_n, vrshrn_n 4906(define_insn "neon_v<shift_op>_n<mode>" 4907 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") 4908 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w") 4909 (match_operand:SI 2 "immediate_operand" "i")] 4910 VSHRN_N))] 4911 "TARGET_NEON" 4912{ 4913 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1); 4914 return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2"; 4915} 4916 [(set_attr "type" "neon_shift_imm_narrow_q")] 4917) 4918 4919;; vqshrn_n, vqrshrn_n 4920(define_insn "neon_v<shift_op><sup>_n<mode>" 4921 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") 4922 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w") 4923 (match_operand:SI 2 "immediate_operand" "i")] 4924 VQSHRN_N))] 4925 "TARGET_NEON" 4926{ 4927 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1); 4928 return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2"; 4929} 4930 [(set_attr "type" "neon_sat_shift_imm_narrow_q")] 4931) 4932 4933;; vqshrun_n, vqrshrun_n 4934(define_insn "neon_v<shift_op>_n<mode>" 4935 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") 4936 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w") 4937 (match_operand:SI 2 "immediate_operand" "i")] 4938 VQSHRUN_N))] 4939 "TARGET_NEON" 4940{ 4941 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1); 4942 return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2"; 4943} 4944 [(set_attr "type" "neon_sat_shift_imm_narrow_q")] 4945) 4946 4947(define_insn "neon_vshl_n<mode>" 4948 [(set (match_operand:VDQIX 0 "s_register_operand" "=w") 4949 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") 4950 (match_operand:SI 2 "immediate_operand" "i")] 4951 UNSPEC_VSHL_N))] 4952 "TARGET_NEON" 4953{ 4954 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode)); 4955 return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2"; 4956} 4957 [(set_attr "type" "neon_shift_imm<q>")] 4958) 4959 4960(define_insn "neon_vqshl_<sup>_n<mode>" 4961 [(set (match_operand:VDQIX 0 "s_register_operand" "=w") 4962 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") 4963 (match_operand:SI 2 "immediate_operand" "i")] 4964 VQSHL_N))] 4965 "TARGET_NEON" 4966{ 4967 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode)); 4968 return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2"; 4969} 4970 [(set_attr "type" "neon_sat_shift_imm<q>")] 4971) 4972 4973(define_insn "neon_vqshlu_n<mode>" 4974 [(set (match_operand:VDQIX 0 "s_register_operand" "=w") 4975 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") 4976 (match_operand:SI 2 "immediate_operand" "i")] 4977 UNSPEC_VQSHLU_N))] 4978 "TARGET_NEON" 4979{ 4980 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode)); 4981 return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2"; 4982} 4983 [(set_attr "type" "neon_sat_shift_imm<q>")] 4984) 4985 4986(define_insn "neon_vshll<sup>_n<mode>" 4987 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 4988 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w") 4989 (match_operand:SI 2 "immediate_operand" "i")] 4990 VSHLL_N))] 4991 "TARGET_NEON" 4992{ 4993 /* The boundaries are: 0 < imm <= size. */ 4994 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1); 4995 return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2"; 4996} 4997 [(set_attr "type" "neon_shift_imm_long")] 4998) 4999 5000;; vsra_n, vrsra_n 5001(define_insn "neon_v<shift_op><sup>_n<mode>" 5002 [(set (match_operand:VDQIX 0 "s_register_operand" "=w") 5003 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0") 5004 (match_operand:VDQIX 2 "s_register_operand" "w") 5005 (match_operand:SI 3 "immediate_operand" "i")] 5006 VSRA_N))] 5007 "TARGET_NEON" 5008{ 5009 arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1); 5010 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3"; 5011} 5012 [(set_attr "type" "neon_shift_acc<q>")] 5013) 5014 5015(define_insn "neon_vsri_n<mode>" 5016 [(set (match_operand:VDQIX 0 "s_register_operand" "=w") 5017 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0") 5018 (match_operand:VDQIX 2 "s_register_operand" "w") 5019 (match_operand:SI 3 "immediate_operand" "i")] 5020 UNSPEC_VSRI))] 5021 "TARGET_NEON" 5022{ 5023 arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1); 5024 return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3"; 5025} 5026 [(set_attr "type" "neon_shift_reg<q>")] 5027) 5028 5029(define_insn "neon_vsli_n<mode>" 5030 [(set (match_operand:VDQIX 0 "s_register_operand" "=w") 5031 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0") 5032 (match_operand:VDQIX 2 "s_register_operand" "w") 5033 (match_operand:SI 3 "immediate_operand" "i")] 5034 UNSPEC_VSLI))] 5035 "TARGET_NEON" 5036{ 5037 arm_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode)); 5038 return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3"; 5039} 5040 [(set_attr "type" "neon_shift_reg<q>")] 5041) 5042 5043(define_insn "neon_vtbl1v8qi" 5044 [(set (match_operand:V8QI 0 "s_register_operand" "=w") 5045 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w") 5046 (match_operand:V8QI 2 "s_register_operand" "w")] 5047 UNSPEC_VTBL))] 5048 "TARGET_NEON" 5049 "vtbl.8\t%P0, {%P1}, %P2" 5050 [(set_attr "type" "neon_tbl1")] 5051) 5052 5053(define_insn "neon_vtbl2v8qi" 5054 [(set (match_operand:V8QI 0 "s_register_operand" "=w") 5055 (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w") 5056 (match_operand:V8QI 2 "s_register_operand" "w")] 5057 UNSPEC_VTBL))] 5058 "TARGET_NEON" 5059{ 5060 rtx ops[4]; 5061 int tabbase = REGNO (operands[1]); 5062 5063 ops[0] = operands[0]; 5064 ops[1] = gen_rtx_REG (V8QImode, tabbase); 5065 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); 5066 ops[3] = operands[2]; 5067 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops); 5068 5069 return ""; 5070} 5071 [(set_attr "type" "neon_tbl2")] 5072) 5073 5074(define_insn "neon_vtbl3v8qi" 5075 [(set (match_operand:V8QI 0 "s_register_operand" "=w") 5076 (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w") 5077 (match_operand:V8QI 2 "s_register_operand" "w")] 5078 UNSPEC_VTBL))] 5079 "TARGET_NEON" 5080{ 5081 rtx ops[5]; 5082 int tabbase = REGNO (operands[1]); 5083 5084 ops[0] = operands[0]; 5085 ops[1] = gen_rtx_REG (V8QImode, tabbase); 5086 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); 5087 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4); 5088 ops[4] = operands[2]; 5089 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops); 5090 5091 return ""; 5092} 5093 [(set_attr "type" "neon_tbl3")] 5094) 5095 5096(define_insn "neon_vtbl4v8qi" 5097 [(set (match_operand:V8QI 0 "s_register_operand" "=w") 5098 (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w") 5099 (match_operand:V8QI 2 "s_register_operand" "w")] 5100 UNSPEC_VTBL))] 5101 "TARGET_NEON" 5102{ 5103 rtx ops[6]; 5104 int tabbase = REGNO (operands[1]); 5105 5106 ops[0] = operands[0]; 5107 ops[1] = gen_rtx_REG (V8QImode, tabbase); 5108 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); 5109 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4); 5110 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6); 5111 ops[5] = operands[2]; 5112 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops); 5113 5114 return ""; 5115} 5116 [(set_attr "type" "neon_tbl4")] 5117) 5118 5119;; These three are used by the vec_perm infrastructure for V16QImode. 5120(define_insn_and_split "neon_vtbl1v16qi" 5121 [(set (match_operand:V16QI 0 "s_register_operand" "=&w") 5122 (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w") 5123 (match_operand:V16QI 2 "s_register_operand" "w")] 5124 UNSPEC_VTBL))] 5125 "TARGET_NEON" 5126 "#" 5127 "&& reload_completed" 5128 [(const_int 0)] 5129{ 5130 rtx op0, op1, op2, part0, part2; 5131 unsigned ofs; 5132 5133 op0 = operands[0]; 5134 op1 = gen_lowpart (TImode, operands[1]); 5135 op2 = operands[2]; 5136 5137 ofs = subreg_lowpart_offset (V8QImode, V16QImode); 5138 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs); 5139 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs); 5140 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2)); 5141 5142 ofs = subreg_highpart_offset (V8QImode, V16QImode); 5143 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs); 5144 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs); 5145 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2)); 5146 DONE; 5147} 5148 [(set_attr "type" "multiple")] 5149) 5150 5151(define_insn_and_split "neon_vtbl2v16qi" 5152 [(set (match_operand:V16QI 0 "s_register_operand" "=&w") 5153 (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w") 5154 (match_operand:V16QI 2 "s_register_operand" "w")] 5155 UNSPEC_VTBL))] 5156 "TARGET_NEON" 5157 "#" 5158 "&& reload_completed" 5159 [(const_int 0)] 5160{ 5161 rtx op0, op1, op2, part0, part2; 5162 unsigned ofs; 5163 5164 op0 = operands[0]; 5165 op1 = operands[1]; 5166 op2 = operands[2]; 5167 5168 ofs = subreg_lowpart_offset (V8QImode, V16QImode); 5169 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs); 5170 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs); 5171 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2)); 5172 5173 ofs = subreg_highpart_offset (V8QImode, V16QImode); 5174 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs); 5175 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs); 5176 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2)); 5177 DONE; 5178} 5179 [(set_attr "type" "multiple")] 5180) 5181 5182;; ??? Logically we should extend the regular neon_vcombine pattern to 5183;; handle quad-word input modes, producing octa-word output modes. But 5184;; that requires us to add support for octa-word vector modes in moves. 5185;; That seems overkill for this one use in vec_perm. 5186(define_insn_and_split "neon_vcombinev16qi" 5187 [(set (match_operand:OI 0 "s_register_operand" "=w") 5188 (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w") 5189 (match_operand:V16QI 2 "s_register_operand" "w")] 5190 UNSPEC_VCONCAT))] 5191 "TARGET_NEON" 5192 "#" 5193 "&& reload_completed" 5194 [(const_int 0)] 5195{ 5196 neon_split_vcombine (operands); 5197 DONE; 5198} 5199[(set_attr "type" "multiple")] 5200) 5201 5202(define_insn "neon_vtbx1v8qi" 5203 [(set (match_operand:V8QI 0 "s_register_operand" "=w") 5204 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0") 5205 (match_operand:V8QI 2 "s_register_operand" "w") 5206 (match_operand:V8QI 3 "s_register_operand" "w")] 5207 UNSPEC_VTBX))] 5208 "TARGET_NEON" 5209 "vtbx.8\t%P0, {%P2}, %P3" 5210 [(set_attr "type" "neon_tbl1")] 5211) 5212 5213(define_insn "neon_vtbx2v8qi" 5214 [(set (match_operand:V8QI 0 "s_register_operand" "=w") 5215 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0") 5216 (match_operand:TI 2 "s_register_operand" "w") 5217 (match_operand:V8QI 3 "s_register_operand" "w")] 5218 UNSPEC_VTBX))] 5219 "TARGET_NEON" 5220{ 5221 rtx ops[4]; 5222 int tabbase = REGNO (operands[2]); 5223 5224 ops[0] = operands[0]; 5225 ops[1] = gen_rtx_REG (V8QImode, tabbase); 5226 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); 5227 ops[3] = operands[3]; 5228 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops); 5229 5230 return ""; 5231} 5232 [(set_attr "type" "neon_tbl2")] 5233) 5234 5235(define_insn "neon_vtbx3v8qi" 5236 [(set (match_operand:V8QI 0 "s_register_operand" "=w") 5237 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0") 5238 (match_operand:EI 2 "s_register_operand" "w") 5239 (match_operand:V8QI 3 "s_register_operand" "w")] 5240 UNSPEC_VTBX))] 5241 "TARGET_NEON" 5242{ 5243 rtx ops[5]; 5244 int tabbase = REGNO (operands[2]); 5245 5246 ops[0] = operands[0]; 5247 ops[1] = gen_rtx_REG (V8QImode, tabbase); 5248 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); 5249 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4); 5250 ops[4] = operands[3]; 5251 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops); 5252 5253 return ""; 5254} 5255 [(set_attr "type" "neon_tbl3")] 5256) 5257 5258(define_insn "neon_vtbx4v8qi" 5259 [(set (match_operand:V8QI 0 "s_register_operand" "=w") 5260 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0") 5261 (match_operand:OI 2 "s_register_operand" "w") 5262 (match_operand:V8QI 3 "s_register_operand" "w")] 5263 UNSPEC_VTBX))] 5264 "TARGET_NEON" 5265{ 5266 rtx ops[6]; 5267 int tabbase = REGNO (operands[2]); 5268 5269 ops[0] = operands[0]; 5270 ops[1] = gen_rtx_REG (V8QImode, tabbase); 5271 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); 5272 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4); 5273 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6); 5274 ops[5] = operands[3]; 5275 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops); 5276 5277 return ""; 5278} 5279 [(set_attr "type" "neon_tbl4")] 5280) 5281 5282(define_expand "neon_vtrn<mode>_internal" 5283 [(parallel 5284 [(set (match_operand:VDQWH 0 "s_register_operand") 5285 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand") 5286 (match_operand:VDQWH 2 "s_register_operand")] 5287 UNSPEC_VTRN1)) 5288 (set (match_operand:VDQWH 3 "s_register_operand") 5289 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])] 5290 "TARGET_NEON" 5291 "" 5292) 5293 5294;; Note: Different operand numbering to handle tied registers correctly. 5295(define_insn "*neon_vtrn<mode>_insn" 5296 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w") 5297 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0") 5298 (match_operand:VDQWH 3 "s_register_operand" "2")] 5299 UNSPEC_VTRN1)) 5300 (set (match_operand:VDQWH 2 "s_register_operand" "=&w") 5301 (unspec:VDQWH [(match_dup 1) (match_dup 3)] 5302 UNSPEC_VTRN2))] 5303 "TARGET_NEON" 5304 "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" 5305 [(set_attr "type" "neon_permute<q>")] 5306) 5307 5308(define_expand "neon_vzip<mode>_internal" 5309 [(parallel 5310 [(set (match_operand:VDQWH 0 "s_register_operand") 5311 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand") 5312 (match_operand:VDQWH 2 "s_register_operand")] 5313 UNSPEC_VZIP1)) 5314 (set (match_operand:VDQWH 3 "s_register_operand") 5315 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])] 5316 "TARGET_NEON" 5317 "" 5318) 5319 5320;; Note: Different operand numbering to handle tied registers correctly. 5321(define_insn "*neon_vzip<mode>_insn" 5322 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w") 5323 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0") 5324 (match_operand:VDQWH 3 "s_register_operand" "2")] 5325 UNSPEC_VZIP1)) 5326 (set (match_operand:VDQWH 2 "s_register_operand" "=&w") 5327 (unspec:VDQWH [(match_dup 1) (match_dup 3)] 5328 UNSPEC_VZIP2))] 5329 "TARGET_NEON" 5330 "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" 5331 [(set_attr "type" "neon_zip<q>")] 5332) 5333 5334(define_expand "neon_vuzp<mode>_internal" 5335 [(parallel 5336 [(set (match_operand:VDQWH 0 "s_register_operand") 5337 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand") 5338 (match_operand:VDQWH 2 "s_register_operand")] 5339 UNSPEC_VUZP1)) 5340 (set (match_operand:VDQWH 3 "s_register_operand" "") 5341 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])] 5342 "TARGET_NEON" 5343 "" 5344) 5345 5346;; Note: Different operand numbering to handle tied registers correctly. 5347(define_insn "*neon_vuzp<mode>_insn" 5348 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w") 5349 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0") 5350 (match_operand:VDQWH 3 "s_register_operand" "2")] 5351 UNSPEC_VUZP1)) 5352 (set (match_operand:VDQWH 2 "s_register_operand" "=&w") 5353 (unspec:VDQWH [(match_dup 1) (match_dup 3)] 5354 UNSPEC_VUZP2))] 5355 "TARGET_NEON" 5356 "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" 5357 [(set_attr "type" "neon_zip<q>")] 5358) 5359 5360(define_expand "vec_load_lanes<mode><mode>" 5361 [(set (match_operand:VDQX 0 "s_register_operand") 5362 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")] 5363 UNSPEC_VLD1))] 5364 "TARGET_NEON") 5365 5366(define_insn "neon_vld1<mode>" 5367 [(set (match_operand:VDQX 0 "s_register_operand" "=w") 5368 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")] 5369 UNSPEC_VLD1))] 5370 "TARGET_NEON" 5371 "vld1.<V_sz_elem>\t%h0, %A1" 5372 [(set_attr "type" "neon_load1_1reg<q>")] 5373) 5374 5375;; The lane numbers in the RTL are in GCC lane order, having been flipped 5376;; in arm_expand_neon_args. The lane numbers are restored to architectural 5377;; lane order here. 5378(define_insn "neon_vld1_lane<mode>" 5379 [(set (match_operand:VDX 0 "s_register_operand" "=w") 5380 (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um") 5381 (match_operand:VDX 2 "s_register_operand" "0") 5382 (match_operand:SI 3 "immediate_operand" "i")] 5383 UNSPEC_VLD1_LANE))] 5384 "TARGET_NEON" 5385{ 5386 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); 5387 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); 5388 operands[3] = GEN_INT (lane); 5389 if (max == 1) 5390 return "vld1.<V_sz_elem>\t%P0, %A1"; 5391 else 5392 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1"; 5393} 5394 [(set_attr "type" "neon_load1_one_lane<q>")] 5395) 5396 5397;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 5398;; here on big endian targets. 5399(define_insn "neon_vld1_lane<mode>" 5400 [(set (match_operand:VQX 0 "s_register_operand" "=w") 5401 (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um") 5402 (match_operand:VQX 2 "s_register_operand" "0") 5403 (match_operand:SI 3 "immediate_operand" "i")] 5404 UNSPEC_VLD1_LANE))] 5405 "TARGET_NEON" 5406{ 5407 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); 5408 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); 5409 operands[3] = GEN_INT (lane); 5410 int regno = REGNO (operands[0]); 5411 if (lane >= max / 2) 5412 { 5413 lane -= max / 2; 5414 regno += 2; 5415 operands[3] = GEN_INT (lane); 5416 } 5417 operands[0] = gen_rtx_REG (<V_HALF>mode, regno); 5418 if (max == 2) 5419 return "vld1.<V_sz_elem>\t%P0, %A1"; 5420 else 5421 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1"; 5422} 5423 [(set_attr "type" "neon_load1_one_lane<q>")] 5424) 5425 5426(define_insn "neon_vld1_dup<mode>" 5427 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w") 5428 (vec_duplicate:VD_LANE (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))] 5429 "TARGET_NEON" 5430 "vld1.<V_sz_elem>\t{%P0[]}, %A1" 5431 [(set_attr "type" "neon_load1_all_lanes<q>")] 5432) 5433 5434;; Special case for DImode. Treat it exactly like a simple load. 5435(define_expand "neon_vld1_dupdi" 5436 [(set (match_operand:DI 0 "s_register_operand" "") 5437 (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")] 5438 UNSPEC_VLD1))] 5439 "TARGET_NEON" 5440 "" 5441) 5442 5443(define_insn "neon_vld1_dup<mode>" 5444 [(set (match_operand:VQ2 0 "s_register_operand" "=w") 5445 (vec_duplicate:VQ2 (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))] 5446 "TARGET_NEON" 5447{ 5448 return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1"; 5449} 5450 [(set_attr "type" "neon_load1_all_lanes<q>")] 5451) 5452 5453(define_insn_and_split "neon_vld1_dupv2di" 5454 [(set (match_operand:V2DI 0 "s_register_operand" "=w") 5455 (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))] 5456 "TARGET_NEON" 5457 "#" 5458 "&& reload_completed" 5459 [(const_int 0)] 5460 { 5461 rtx tmprtx = gen_lowpart (DImode, operands[0]); 5462 emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1])); 5463 emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx ); 5464 DONE; 5465 } 5466 [(set_attr "length" "8") 5467 (set_attr "type" "neon_load1_all_lanes_q")] 5468) 5469 5470(define_expand "vec_store_lanes<mode><mode>" 5471 [(set (match_operand:VDQX 0 "neon_struct_operand") 5472 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")] 5473 UNSPEC_VST1))] 5474 "TARGET_NEON") 5475 5476(define_insn "neon_vst1<mode>" 5477 [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um") 5478 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")] 5479 UNSPEC_VST1))] 5480 "TARGET_NEON" 5481 "vst1.<V_sz_elem>\t%h1, %A0" 5482 [(set_attr "type" "neon_store1_1reg<q>")]) 5483 5484;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 5485;; here on big endian targets. 5486(define_insn "neon_vst1_lane<mode>" 5487 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um") 5488 (unspec:<V_elem> 5489 [(match_operand:VDX 1 "s_register_operand" "w") 5490 (match_operand:SI 2 "immediate_operand" "i")] 5491 UNSPEC_VST1_LANE))] 5492 "TARGET_NEON" 5493{ 5494 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); 5495 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); 5496 operands[2] = GEN_INT (lane); 5497 if (max == 1) 5498 return "vst1.<V_sz_elem>\t{%P1}, %A0"; 5499 else 5500 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0"; 5501} 5502 [(set_attr "type" "neon_store1_one_lane<q>")] 5503) 5504 5505;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 5506;; here on big endian targets. 5507(define_insn "neon_vst1_lane<mode>" 5508 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um") 5509 (unspec:<V_elem> 5510 [(match_operand:VQX 1 "s_register_operand" "w") 5511 (match_operand:SI 2 "immediate_operand" "i")] 5512 UNSPEC_VST1_LANE))] 5513 "TARGET_NEON" 5514{ 5515 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); 5516 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); 5517 int regno = REGNO (operands[1]); 5518 if (lane >= max / 2) 5519 { 5520 lane -= max / 2; 5521 regno += 2; 5522 } 5523 operands[2] = GEN_INT (lane); 5524 operands[1] = gen_rtx_REG (<V_HALF>mode, regno); 5525 if (max == 2) 5526 return "vst1.<V_sz_elem>\t{%P1}, %A0"; 5527 else 5528 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0"; 5529} 5530 [(set_attr "type" "neon_store1_one_lane<q>")] 5531) 5532 5533(define_expand "vec_load_lanesti<mode>" 5534 [(set (match_operand:TI 0 "s_register_operand") 5535 (unspec:TI [(match_operand:TI 1 "neon_struct_operand") 5536 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5537 UNSPEC_VLD2))] 5538 "TARGET_NEON") 5539 5540(define_insn "neon_vld2<mode>" 5541 [(set (match_operand:TI 0 "s_register_operand" "=w") 5542 (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um") 5543 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5544 UNSPEC_VLD2))] 5545 "TARGET_NEON" 5546{ 5547 if (<V_sz_elem> == 64) 5548 return "vld1.64\t%h0, %A1"; 5549 else 5550 return "vld2.<V_sz_elem>\t%h0, %A1"; 5551} 5552 [(set (attr "type") 5553 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) 5554 (const_string "neon_load1_2reg<q>") 5555 (const_string "neon_load2_2reg<q>")))] 5556) 5557 5558(define_expand "vec_load_lanesoi<mode>" 5559 [(set (match_operand:OI 0 "s_register_operand") 5560 (unspec:OI [(match_operand:OI 1 "neon_struct_operand") 5561 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5562 UNSPEC_VLD2))] 5563 "TARGET_NEON") 5564 5565(define_insn "neon_vld2<mode>" 5566 [(set (match_operand:OI 0 "s_register_operand" "=w") 5567 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um") 5568 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5569 UNSPEC_VLD2))] 5570 "TARGET_NEON" 5571 "vld2.<V_sz_elem>\t%h0, %A1" 5572 [(set_attr "type" "neon_load2_2reg_q")]) 5573 5574;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 5575;; here on big endian targets. 5576(define_insn "neon_vld2_lane<mode>" 5577 [(set (match_operand:TI 0 "s_register_operand" "=w") 5578 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um") 5579 (match_operand:TI 2 "s_register_operand" "0") 5580 (match_operand:SI 3 "immediate_operand" "i") 5581 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5582 UNSPEC_VLD2_LANE))] 5583 "TARGET_NEON" 5584{ 5585 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); 5586 int regno = REGNO (operands[0]); 5587 rtx ops[4]; 5588 ops[0] = gen_rtx_REG (DImode, regno); 5589 ops[1] = gen_rtx_REG (DImode, regno + 2); 5590 ops[2] = operands[1]; 5591 ops[3] = GEN_INT (lane); 5592 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops); 5593 return ""; 5594} 5595 [(set_attr "type" "neon_load2_one_lane<q>")] 5596) 5597 5598;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 5599;; here on big endian targets. 5600(define_insn "neon_vld2_lane<mode>" 5601 [(set (match_operand:OI 0 "s_register_operand" "=w") 5602 (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um") 5603 (match_operand:OI 2 "s_register_operand" "0") 5604 (match_operand:SI 3 "immediate_operand" "i") 5605 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5606 UNSPEC_VLD2_LANE))] 5607 "TARGET_NEON" 5608{ 5609 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); 5610 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); 5611 int regno = REGNO (operands[0]); 5612 rtx ops[4]; 5613 if (lane >= max / 2) 5614 { 5615 lane -= max / 2; 5616 regno += 2; 5617 } 5618 ops[0] = gen_rtx_REG (DImode, regno); 5619 ops[1] = gen_rtx_REG (DImode, regno + 4); 5620 ops[2] = operands[1]; 5621 ops[3] = GEN_INT (lane); 5622 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops); 5623 return ""; 5624} 5625 [(set_attr "type" "neon_load2_one_lane<q>")] 5626) 5627 5628(define_insn "neon_vld2_dup<mode>" 5629 [(set (match_operand:TI 0 "s_register_operand" "=w") 5630 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um") 5631 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5632 UNSPEC_VLD2_DUP))] 5633 "TARGET_NEON" 5634{ 5635 if (GET_MODE_NUNITS (<MODE>mode) > 1) 5636 return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1"; 5637 else 5638 return "vld1.<V_sz_elem>\t%h0, %A1"; 5639} 5640 [(set (attr "type") 5641 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1")) 5642 (const_string "neon_load2_all_lanes<q>") 5643 (const_string "neon_load1_1reg<q>")))] 5644) 5645 5646(define_expand "vec_store_lanesti<mode>" 5647 [(set (match_operand:TI 0 "neon_struct_operand") 5648 (unspec:TI [(match_operand:TI 1 "s_register_operand") 5649 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5650 UNSPEC_VST2))] 5651 "TARGET_NEON") 5652 5653(define_insn "neon_vst2<mode>" 5654 [(set (match_operand:TI 0 "neon_struct_operand" "=Um") 5655 (unspec:TI [(match_operand:TI 1 "s_register_operand" "w") 5656 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5657 UNSPEC_VST2))] 5658 "TARGET_NEON" 5659{ 5660 if (<V_sz_elem> == 64) 5661 return "vst1.64\t%h1, %A0"; 5662 else 5663 return "vst2.<V_sz_elem>\t%h1, %A0"; 5664} 5665 [(set (attr "type") 5666 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) 5667 (const_string "neon_store1_2reg<q>") 5668 (const_string "neon_store2_one_lane<q>")))] 5669) 5670 5671(define_expand "vec_store_lanesoi<mode>" 5672 [(set (match_operand:OI 0 "neon_struct_operand") 5673 (unspec:OI [(match_operand:OI 1 "s_register_operand") 5674 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5675 UNSPEC_VST2))] 5676 "TARGET_NEON") 5677 5678(define_insn "neon_vst2<mode>" 5679 [(set (match_operand:OI 0 "neon_struct_operand" "=Um") 5680 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w") 5681 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5682 UNSPEC_VST2))] 5683 "TARGET_NEON" 5684 "vst2.<V_sz_elem>\t%h1, %A0" 5685 [(set_attr "type" "neon_store2_4reg<q>")] 5686) 5687 5688;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 5689;; here on big endian targets. 5690(define_insn "neon_vst2_lane<mode>" 5691 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um") 5692 (unspec:<V_two_elem> 5693 [(match_operand:TI 1 "s_register_operand" "w") 5694 (match_operand:SI 2 "immediate_operand" "i") 5695 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5696 UNSPEC_VST2_LANE))] 5697 "TARGET_NEON" 5698{ 5699 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); 5700 int regno = REGNO (operands[1]); 5701 rtx ops[4]; 5702 ops[0] = operands[0]; 5703 ops[1] = gen_rtx_REG (DImode, regno); 5704 ops[2] = gen_rtx_REG (DImode, regno + 2); 5705 ops[3] = GEN_INT (lane); 5706 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops); 5707 return ""; 5708} 5709 [(set_attr "type" "neon_store2_one_lane<q>")] 5710) 5711 5712;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 5713;; here on big endian targets. 5714(define_insn "neon_vst2_lane<mode>" 5715 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um") 5716 (unspec:<V_two_elem> 5717 [(match_operand:OI 1 "s_register_operand" "w") 5718 (match_operand:SI 2 "immediate_operand" "i") 5719 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5720 UNSPEC_VST2_LANE))] 5721 "TARGET_NEON" 5722{ 5723 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); 5724 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); 5725 int regno = REGNO (operands[1]); 5726 rtx ops[4]; 5727 if (lane >= max / 2) 5728 { 5729 lane -= max / 2; 5730 regno += 2; 5731 } 5732 ops[0] = operands[0]; 5733 ops[1] = gen_rtx_REG (DImode, regno); 5734 ops[2] = gen_rtx_REG (DImode, regno + 4); 5735 ops[3] = GEN_INT (lane); 5736 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops); 5737 return ""; 5738} 5739 [(set_attr "type" "neon_store2_one_lane<q>")] 5740) 5741 5742(define_expand "vec_load_lanesei<mode>" 5743 [(set (match_operand:EI 0 "s_register_operand") 5744 (unspec:EI [(match_operand:EI 1 "neon_struct_operand") 5745 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5746 UNSPEC_VLD3))] 5747 "TARGET_NEON") 5748 5749(define_insn "neon_vld3<mode>" 5750 [(set (match_operand:EI 0 "s_register_operand" "=w") 5751 (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um") 5752 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5753 UNSPEC_VLD3))] 5754 "TARGET_NEON" 5755{ 5756 if (<V_sz_elem> == 64) 5757 return "vld1.64\t%h0, %A1"; 5758 else 5759 return "vld3.<V_sz_elem>\t%h0, %A1"; 5760} 5761 [(set (attr "type") 5762 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) 5763 (const_string "neon_load1_3reg<q>") 5764 (const_string "neon_load3_3reg<q>")))] 5765) 5766 5767(define_expand "vec_load_lanesci<mode>" 5768 [(match_operand:CI 0 "s_register_operand") 5769 (match_operand:CI 1 "neon_struct_operand") 5770 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5771 "TARGET_NEON" 5772{ 5773 emit_insn (gen_neon_vld3<mode> (operands[0], operands[1])); 5774 DONE; 5775}) 5776 5777(define_expand "neon_vld3<mode>" 5778 [(match_operand:CI 0 "s_register_operand") 5779 (match_operand:CI 1 "neon_struct_operand") 5780 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5781 "TARGET_NEON" 5782{ 5783 rtx mem; 5784 5785 mem = adjust_address (operands[1], EImode, 0); 5786 emit_insn (gen_neon_vld3qa<mode> (operands[0], mem)); 5787 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode)); 5788 emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0])); 5789 DONE; 5790}) 5791 5792(define_insn "neon_vld3qa<mode>" 5793 [(set (match_operand:CI 0 "s_register_operand" "=w") 5794 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um") 5795 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5796 UNSPEC_VLD3A))] 5797 "TARGET_NEON" 5798{ 5799 int regno = REGNO (operands[0]); 5800 rtx ops[4]; 5801 ops[0] = gen_rtx_REG (DImode, regno); 5802 ops[1] = gen_rtx_REG (DImode, regno + 4); 5803 ops[2] = gen_rtx_REG (DImode, regno + 8); 5804 ops[3] = operands[1]; 5805 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops); 5806 return ""; 5807} 5808 [(set_attr "type" "neon_load3_3reg<q>")] 5809) 5810 5811(define_insn "neon_vld3qb<mode>" 5812 [(set (match_operand:CI 0 "s_register_operand" "=w") 5813 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um") 5814 (match_operand:CI 2 "s_register_operand" "0") 5815 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5816 UNSPEC_VLD3B))] 5817 "TARGET_NEON" 5818{ 5819 int regno = REGNO (operands[0]); 5820 rtx ops[4]; 5821 ops[0] = gen_rtx_REG (DImode, regno + 2); 5822 ops[1] = gen_rtx_REG (DImode, regno + 6); 5823 ops[2] = gen_rtx_REG (DImode, regno + 10); 5824 ops[3] = operands[1]; 5825 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops); 5826 return ""; 5827} 5828 [(set_attr "type" "neon_load3_3reg<q>")] 5829) 5830 5831;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 5832;; here on big endian targets. 5833(define_insn "neon_vld3_lane<mode>" 5834 [(set (match_operand:EI 0 "s_register_operand" "=w") 5835 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um") 5836 (match_operand:EI 2 "s_register_operand" "0") 5837 (match_operand:SI 3 "immediate_operand" "i") 5838 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5839 UNSPEC_VLD3_LANE))] 5840 "TARGET_NEON" 5841{ 5842 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])); 5843 int regno = REGNO (operands[0]); 5844 rtx ops[5]; 5845 ops[0] = gen_rtx_REG (DImode, regno); 5846 ops[1] = gen_rtx_REG (DImode, regno + 2); 5847 ops[2] = gen_rtx_REG (DImode, regno + 4); 5848 ops[3] = operands[1]; 5849 ops[4] = GEN_INT (lane); 5850 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3", 5851 ops); 5852 return ""; 5853} 5854 [(set_attr "type" "neon_load3_one_lane<q>")] 5855) 5856 5857;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 5858;; here on big endian targets. 5859(define_insn "neon_vld3_lane<mode>" 5860 [(set (match_operand:CI 0 "s_register_operand" "=w") 5861 (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um") 5862 (match_operand:CI 2 "s_register_operand" "0") 5863 (match_operand:SI 3 "immediate_operand" "i") 5864 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5865 UNSPEC_VLD3_LANE))] 5866 "TARGET_NEON" 5867{ 5868 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); 5869 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); 5870 int regno = REGNO (operands[0]); 5871 rtx ops[5]; 5872 if (lane >= max / 2) 5873 { 5874 lane -= max / 2; 5875 regno += 2; 5876 } 5877 ops[0] = gen_rtx_REG (DImode, regno); 5878 ops[1] = gen_rtx_REG (DImode, regno + 4); 5879 ops[2] = gen_rtx_REG (DImode, regno + 8); 5880 ops[3] = operands[1]; 5881 ops[4] = GEN_INT (lane); 5882 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3", 5883 ops); 5884 return ""; 5885} 5886 [(set_attr "type" "neon_load3_one_lane<q>")] 5887) 5888 5889(define_insn "neon_vld3_dup<mode>" 5890 [(set (match_operand:EI 0 "s_register_operand" "=w") 5891 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um") 5892 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5893 UNSPEC_VLD3_DUP))] 5894 "TARGET_NEON" 5895{ 5896 if (GET_MODE_NUNITS (<MODE>mode) > 1) 5897 { 5898 int regno = REGNO (operands[0]); 5899 rtx ops[4]; 5900 ops[0] = gen_rtx_REG (DImode, regno); 5901 ops[1] = gen_rtx_REG (DImode, regno + 2); 5902 ops[2] = gen_rtx_REG (DImode, regno + 4); 5903 ops[3] = operands[1]; 5904 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops); 5905 return ""; 5906 } 5907 else 5908 return "vld1.<V_sz_elem>\t%h0, %A1"; 5909} 5910 [(set (attr "type") 5911 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1")) 5912 (const_string "neon_load3_all_lanes<q>") 5913 (const_string "neon_load1_1reg<q>")))]) 5914 5915(define_expand "vec_store_lanesei<mode>" 5916 [(set (match_operand:EI 0 "neon_struct_operand") 5917 (unspec:EI [(match_operand:EI 1 "s_register_operand") 5918 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5919 UNSPEC_VST3))] 5920 "TARGET_NEON") 5921 5922(define_insn "neon_vst3<mode>" 5923 [(set (match_operand:EI 0 "neon_struct_operand" "=Um") 5924 (unspec:EI [(match_operand:EI 1 "s_register_operand" "w") 5925 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5926 UNSPEC_VST3))] 5927 "TARGET_NEON" 5928{ 5929 if (<V_sz_elem> == 64) 5930 return "vst1.64\t%h1, %A0"; 5931 else 5932 return "vst3.<V_sz_elem>\t%h1, %A0"; 5933} 5934 [(set (attr "type") 5935 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) 5936 (const_string "neon_store1_3reg<q>") 5937 (const_string "neon_store3_one_lane<q>")))]) 5938 5939(define_expand "vec_store_lanesci<mode>" 5940 [(match_operand:CI 0 "neon_struct_operand") 5941 (match_operand:CI 1 "s_register_operand") 5942 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5943 "TARGET_NEON" 5944{ 5945 emit_insn (gen_neon_vst3<mode> (operands[0], operands[1])); 5946 DONE; 5947}) 5948 5949(define_expand "neon_vst3<mode>" 5950 [(match_operand:CI 0 "neon_struct_operand") 5951 (match_operand:CI 1 "s_register_operand") 5952 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5953 "TARGET_NEON" 5954{ 5955 rtx mem; 5956 5957 mem = adjust_address (operands[0], EImode, 0); 5958 emit_insn (gen_neon_vst3qa<mode> (mem, operands[1])); 5959 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode)); 5960 emit_insn (gen_neon_vst3qb<mode> (mem, operands[1])); 5961 DONE; 5962}) 5963 5964(define_insn "neon_vst3qa<mode>" 5965 [(set (match_operand:EI 0 "neon_struct_operand" "=Um") 5966 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w") 5967 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5968 UNSPEC_VST3A))] 5969 "TARGET_NEON" 5970{ 5971 int regno = REGNO (operands[1]); 5972 rtx ops[4]; 5973 ops[0] = operands[0]; 5974 ops[1] = gen_rtx_REG (DImode, regno); 5975 ops[2] = gen_rtx_REG (DImode, regno + 4); 5976 ops[3] = gen_rtx_REG (DImode, regno + 8); 5977 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops); 5978 return ""; 5979} 5980 [(set_attr "type" "neon_store3_3reg<q>")] 5981) 5982 5983(define_insn "neon_vst3qb<mode>" 5984 [(set (match_operand:EI 0 "neon_struct_operand" "=Um") 5985 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w") 5986 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5987 UNSPEC_VST3B))] 5988 "TARGET_NEON" 5989{ 5990 int regno = REGNO (operands[1]); 5991 rtx ops[4]; 5992 ops[0] = operands[0]; 5993 ops[1] = gen_rtx_REG (DImode, regno + 2); 5994 ops[2] = gen_rtx_REG (DImode, regno + 6); 5995 ops[3] = gen_rtx_REG (DImode, regno + 10); 5996 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops); 5997 return ""; 5998} 5999 [(set_attr "type" "neon_store3_3reg<q>")] 6000) 6001 6002;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 6003;; here on big endian targets. 6004(define_insn "neon_vst3_lane<mode>" 6005 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um") 6006 (unspec:<V_three_elem> 6007 [(match_operand:EI 1 "s_register_operand" "w") 6008 (match_operand:SI 2 "immediate_operand" "i") 6009 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6010 UNSPEC_VST3_LANE))] 6011 "TARGET_NEON" 6012{ 6013 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); 6014 int regno = REGNO (operands[1]); 6015 rtx ops[5]; 6016 ops[0] = operands[0]; 6017 ops[1] = gen_rtx_REG (DImode, regno); 6018 ops[2] = gen_rtx_REG (DImode, regno + 2); 6019 ops[3] = gen_rtx_REG (DImode, regno + 4); 6020 ops[4] = GEN_INT (lane); 6021 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0", 6022 ops); 6023 return ""; 6024} 6025 [(set_attr "type" "neon_store3_one_lane<q>")] 6026) 6027 6028;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 6029;; here on big endian targets. 6030(define_insn "neon_vst3_lane<mode>" 6031 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um") 6032 (unspec:<V_three_elem> 6033 [(match_operand:CI 1 "s_register_operand" "w") 6034 (match_operand:SI 2 "immediate_operand" "i") 6035 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6036 UNSPEC_VST3_LANE))] 6037 "TARGET_NEON" 6038{ 6039 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); 6040 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); 6041 int regno = REGNO (operands[1]); 6042 rtx ops[5]; 6043 if (lane >= max / 2) 6044 { 6045 lane -= max / 2; 6046 regno += 2; 6047 } 6048 ops[0] = operands[0]; 6049 ops[1] = gen_rtx_REG (DImode, regno); 6050 ops[2] = gen_rtx_REG (DImode, regno + 4); 6051 ops[3] = gen_rtx_REG (DImode, regno + 8); 6052 ops[4] = GEN_INT (lane); 6053 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0", 6054 ops); 6055 return ""; 6056} 6057 [(set_attr "type" "neon_store3_one_lane<q>")] 6058) 6059 6060(define_expand "vec_load_lanesoi<mode>" 6061 [(set (match_operand:OI 0 "s_register_operand") 6062 (unspec:OI [(match_operand:OI 1 "neon_struct_operand") 6063 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6064 UNSPEC_VLD4))] 6065 "TARGET_NEON") 6066 6067(define_insn "neon_vld4<mode>" 6068 [(set (match_operand:OI 0 "s_register_operand" "=w") 6069 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um") 6070 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6071 UNSPEC_VLD4))] 6072 "TARGET_NEON" 6073{ 6074 if (<V_sz_elem> == 64) 6075 return "vld1.64\t%h0, %A1"; 6076 else 6077 return "vld4.<V_sz_elem>\t%h0, %A1"; 6078} 6079 [(set (attr "type") 6080 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) 6081 (const_string "neon_load1_4reg<q>") 6082 (const_string "neon_load4_4reg<q>")))] 6083) 6084 6085(define_expand "vec_load_lanesxi<mode>" 6086 [(match_operand:XI 0 "s_register_operand") 6087 (match_operand:XI 1 "neon_struct_operand") 6088 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6089 "TARGET_NEON" 6090{ 6091 emit_insn (gen_neon_vld4<mode> (operands[0], operands[1])); 6092 DONE; 6093}) 6094 6095(define_expand "neon_vld4<mode>" 6096 [(match_operand:XI 0 "s_register_operand") 6097 (match_operand:XI 1 "neon_struct_operand") 6098 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6099 "TARGET_NEON" 6100{ 6101 rtx mem; 6102 6103 mem = adjust_address (operands[1], OImode, 0); 6104 emit_insn (gen_neon_vld4qa<mode> (operands[0], mem)); 6105 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode)); 6106 emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0])); 6107 DONE; 6108}) 6109 6110(define_insn "neon_vld4qa<mode>" 6111 [(set (match_operand:XI 0 "s_register_operand" "=w") 6112 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um") 6113 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6114 UNSPEC_VLD4A))] 6115 "TARGET_NEON" 6116{ 6117 int regno = REGNO (operands[0]); 6118 rtx ops[5]; 6119 ops[0] = gen_rtx_REG (DImode, regno); 6120 ops[1] = gen_rtx_REG (DImode, regno + 4); 6121 ops[2] = gen_rtx_REG (DImode, regno + 8); 6122 ops[3] = gen_rtx_REG (DImode, regno + 12); 6123 ops[4] = operands[1]; 6124 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops); 6125 return ""; 6126} 6127 [(set_attr "type" "neon_load4_4reg<q>")] 6128) 6129 6130(define_insn "neon_vld4qb<mode>" 6131 [(set (match_operand:XI 0 "s_register_operand" "=w") 6132 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um") 6133 (match_operand:XI 2 "s_register_operand" "0") 6134 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6135 UNSPEC_VLD4B))] 6136 "TARGET_NEON" 6137{ 6138 int regno = REGNO (operands[0]); 6139 rtx ops[5]; 6140 ops[0] = gen_rtx_REG (DImode, regno + 2); 6141 ops[1] = gen_rtx_REG (DImode, regno + 6); 6142 ops[2] = gen_rtx_REG (DImode, regno + 10); 6143 ops[3] = gen_rtx_REG (DImode, regno + 14); 6144 ops[4] = operands[1]; 6145 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops); 6146 return ""; 6147} 6148 [(set_attr "type" "neon_load4_4reg<q>")] 6149) 6150 6151;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 6152;; here on big endian targets. 6153(define_insn "neon_vld4_lane<mode>" 6154 [(set (match_operand:OI 0 "s_register_operand" "=w") 6155 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um") 6156 (match_operand:OI 2 "s_register_operand" "0") 6157 (match_operand:SI 3 "immediate_operand" "i") 6158 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6159 UNSPEC_VLD4_LANE))] 6160 "TARGET_NEON" 6161{ 6162 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); 6163 int regno = REGNO (operands[0]); 6164 rtx ops[6]; 6165 ops[0] = gen_rtx_REG (DImode, regno); 6166 ops[1] = gen_rtx_REG (DImode, regno + 2); 6167 ops[2] = gen_rtx_REG (DImode, regno + 4); 6168 ops[3] = gen_rtx_REG (DImode, regno + 6); 6169 ops[4] = operands[1]; 6170 ops[5] = GEN_INT (lane); 6171 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4", 6172 ops); 6173 return ""; 6174} 6175 [(set_attr "type" "neon_load4_one_lane<q>")] 6176) 6177 6178;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 6179;; here on big endian targets. 6180(define_insn "neon_vld4_lane<mode>" 6181 [(set (match_operand:XI 0 "s_register_operand" "=w") 6182 (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um") 6183 (match_operand:XI 2 "s_register_operand" "0") 6184 (match_operand:SI 3 "immediate_operand" "i") 6185 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6186 UNSPEC_VLD4_LANE))] 6187 "TARGET_NEON" 6188{ 6189 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); 6190 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); 6191 int regno = REGNO (operands[0]); 6192 rtx ops[6]; 6193 if (lane >= max / 2) 6194 { 6195 lane -= max / 2; 6196 regno += 2; 6197 } 6198 ops[0] = gen_rtx_REG (DImode, regno); 6199 ops[1] = gen_rtx_REG (DImode, regno + 4); 6200 ops[2] = gen_rtx_REG (DImode, regno + 8); 6201 ops[3] = gen_rtx_REG (DImode, regno + 12); 6202 ops[4] = operands[1]; 6203 ops[5] = GEN_INT (lane); 6204 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4", 6205 ops); 6206 return ""; 6207} 6208 [(set_attr "type" "neon_load4_one_lane<q>")] 6209) 6210 6211(define_insn "neon_vld4_dup<mode>" 6212 [(set (match_operand:OI 0 "s_register_operand" "=w") 6213 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um") 6214 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6215 UNSPEC_VLD4_DUP))] 6216 "TARGET_NEON" 6217{ 6218 if (GET_MODE_NUNITS (<MODE>mode) > 1) 6219 { 6220 int regno = REGNO (operands[0]); 6221 rtx ops[5]; 6222 ops[0] = gen_rtx_REG (DImode, regno); 6223 ops[1] = gen_rtx_REG (DImode, regno + 2); 6224 ops[2] = gen_rtx_REG (DImode, regno + 4); 6225 ops[3] = gen_rtx_REG (DImode, regno + 6); 6226 ops[4] = operands[1]; 6227 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4", 6228 ops); 6229 return ""; 6230 } 6231 else 6232 return "vld1.<V_sz_elem>\t%h0, %A1"; 6233} 6234 [(set (attr "type") 6235 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1")) 6236 (const_string "neon_load4_all_lanes<q>") 6237 (const_string "neon_load1_1reg<q>")))] 6238) 6239 6240(define_expand "vec_store_lanesoi<mode>" 6241 [(set (match_operand:OI 0 "neon_struct_operand") 6242 (unspec:OI [(match_operand:OI 1 "s_register_operand") 6243 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6244 UNSPEC_VST4))] 6245 "TARGET_NEON") 6246 6247(define_insn "neon_vst4<mode>" 6248 [(set (match_operand:OI 0 "neon_struct_operand" "=Um") 6249 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w") 6250 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6251 UNSPEC_VST4))] 6252 "TARGET_NEON" 6253{ 6254 if (<V_sz_elem> == 64) 6255 return "vst1.64\t%h1, %A0"; 6256 else 6257 return "vst4.<V_sz_elem>\t%h1, %A0"; 6258} 6259 [(set (attr "type") 6260 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) 6261 (const_string "neon_store1_4reg<q>") 6262 (const_string "neon_store4_4reg<q>")))] 6263) 6264 6265(define_expand "vec_store_lanesxi<mode>" 6266 [(match_operand:XI 0 "neon_struct_operand") 6267 (match_operand:XI 1 "s_register_operand") 6268 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6269 "TARGET_NEON" 6270{ 6271 emit_insn (gen_neon_vst4<mode> (operands[0], operands[1])); 6272 DONE; 6273}) 6274 6275(define_expand "neon_vst4<mode>" 6276 [(match_operand:XI 0 "neon_struct_operand") 6277 (match_operand:XI 1 "s_register_operand") 6278 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6279 "TARGET_NEON" 6280{ 6281 rtx mem; 6282 6283 mem = adjust_address (operands[0], OImode, 0); 6284 emit_insn (gen_neon_vst4qa<mode> (mem, operands[1])); 6285 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode)); 6286 emit_insn (gen_neon_vst4qb<mode> (mem, operands[1])); 6287 DONE; 6288}) 6289 6290(define_insn "neon_vst4qa<mode>" 6291 [(set (match_operand:OI 0 "neon_struct_operand" "=Um") 6292 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w") 6293 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6294 UNSPEC_VST4A))] 6295 "TARGET_NEON" 6296{ 6297 int regno = REGNO (operands[1]); 6298 rtx ops[5]; 6299 ops[0] = operands[0]; 6300 ops[1] = gen_rtx_REG (DImode, regno); 6301 ops[2] = gen_rtx_REG (DImode, regno + 4); 6302 ops[3] = gen_rtx_REG (DImode, regno + 8); 6303 ops[4] = gen_rtx_REG (DImode, regno + 12); 6304 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops); 6305 return ""; 6306} 6307 [(set_attr "type" "neon_store4_4reg<q>")] 6308) 6309 6310(define_insn "neon_vst4qb<mode>" 6311 [(set (match_operand:OI 0 "neon_struct_operand" "=Um") 6312 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w") 6313 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6314 UNSPEC_VST4B))] 6315 "TARGET_NEON" 6316{ 6317 int regno = REGNO (operands[1]); 6318 rtx ops[5]; 6319 ops[0] = operands[0]; 6320 ops[1] = gen_rtx_REG (DImode, regno + 2); 6321 ops[2] = gen_rtx_REG (DImode, regno + 6); 6322 ops[3] = gen_rtx_REG (DImode, regno + 10); 6323 ops[4] = gen_rtx_REG (DImode, regno + 14); 6324 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops); 6325 return ""; 6326} 6327 [(set_attr "type" "neon_store4_4reg<q>")] 6328) 6329 6330;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 6331;; here on big endian targets. 6332(define_insn "neon_vst4_lane<mode>" 6333 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um") 6334 (unspec:<V_four_elem> 6335 [(match_operand:OI 1 "s_register_operand" "w") 6336 (match_operand:SI 2 "immediate_operand" "i") 6337 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6338 UNSPEC_VST4_LANE))] 6339 "TARGET_NEON" 6340{ 6341 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); 6342 int regno = REGNO (operands[1]); 6343 rtx ops[6]; 6344 ops[0] = operands[0]; 6345 ops[1] = gen_rtx_REG (DImode, regno); 6346 ops[2] = gen_rtx_REG (DImode, regno + 2); 6347 ops[3] = gen_rtx_REG (DImode, regno + 4); 6348 ops[4] = gen_rtx_REG (DImode, regno + 6); 6349 ops[5] = GEN_INT (lane); 6350 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0", 6351 ops); 6352 return ""; 6353} 6354 [(set_attr "type" "neon_store4_one_lane<q>")] 6355) 6356 6357;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 6358;; here on big endian targets. 6359(define_insn "neon_vst4_lane<mode>" 6360 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um") 6361 (unspec:<V_four_elem> 6362 [(match_operand:XI 1 "s_register_operand" "w") 6363 (match_operand:SI 2 "immediate_operand" "i") 6364 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6365 UNSPEC_VST4_LANE))] 6366 "TARGET_NEON" 6367{ 6368 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); 6369 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); 6370 int regno = REGNO (operands[1]); 6371 rtx ops[6]; 6372 if (lane >= max / 2) 6373 { 6374 lane -= max / 2; 6375 regno += 2; 6376 } 6377 ops[0] = operands[0]; 6378 ops[1] = gen_rtx_REG (DImode, regno); 6379 ops[2] = gen_rtx_REG (DImode, regno + 4); 6380 ops[3] = gen_rtx_REG (DImode, regno + 8); 6381 ops[4] = gen_rtx_REG (DImode, regno + 12); 6382 ops[5] = GEN_INT (lane); 6383 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0", 6384 ops); 6385 return ""; 6386} 6387 [(set_attr "type" "neon_store4_4reg<q>")] 6388) 6389 6390(define_insn "neon_vec_unpack<US>_lo_<mode>" 6391 [(set (match_operand:<V_unpack> 0 "register_operand" "=w") 6392 (SE:<V_unpack> (vec_select:<V_HALF> 6393 (match_operand:VU 1 "register_operand" "w") 6394 (match_operand:VU 2 "vect_par_constant_low" ""))))] 6395 "TARGET_NEON && !BYTES_BIG_ENDIAN" 6396 "vmovl.<US><V_sz_elem> %q0, %e1" 6397 [(set_attr "type" "neon_shift_imm_long")] 6398) 6399 6400(define_insn "neon_vec_unpack<US>_hi_<mode>" 6401 [(set (match_operand:<V_unpack> 0 "register_operand" "=w") 6402 (SE:<V_unpack> (vec_select:<V_HALF> 6403 (match_operand:VU 1 "register_operand" "w") 6404 (match_operand:VU 2 "vect_par_constant_high" ""))))] 6405 "TARGET_NEON && !BYTES_BIG_ENDIAN" 6406 "vmovl.<US><V_sz_elem> %q0, %f1" 6407 [(set_attr "type" "neon_shift_imm_long")] 6408) 6409 6410(define_expand "vec_unpack<US>_hi_<mode>" 6411 [(match_operand:<V_unpack> 0 "register_operand" "") 6412 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))] 6413 "TARGET_NEON && !BYTES_BIG_ENDIAN" 6414 { 6415 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ; 6416 rtx t1; 6417 int i; 6418 for (i = 0; i < (<V_mode_nunits>/2); i++) 6419 RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i); 6420 6421 t1 = gen_rtx_PARALLEL (<MODE>mode, v); 6422 emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0], 6423 operands[1], 6424 t1)); 6425 DONE; 6426 } 6427) 6428 6429(define_expand "vec_unpack<US>_lo_<mode>" 6430 [(match_operand:<V_unpack> 0 "register_operand" "") 6431 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))] 6432 "TARGET_NEON && !BYTES_BIG_ENDIAN" 6433 { 6434 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ; 6435 rtx t1; 6436 int i; 6437 for (i = 0; i < (<V_mode_nunits>/2) ; i++) 6438 RTVEC_ELT (v, i) = GEN_INT (i); 6439 t1 = gen_rtx_PARALLEL (<MODE>mode, v); 6440 emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0], 6441 operands[1], 6442 t1)); 6443 DONE; 6444 } 6445) 6446 6447(define_insn "neon_vec_<US>mult_lo_<mode>" 6448 [(set (match_operand:<V_unpack> 0 "register_operand" "=w") 6449 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF> 6450 (match_operand:VU 1 "register_operand" "w") 6451 (match_operand:VU 2 "vect_par_constant_low" ""))) 6452 (SE:<V_unpack> (vec_select:<V_HALF> 6453 (match_operand:VU 3 "register_operand" "w") 6454 (match_dup 2)))))] 6455 "TARGET_NEON && !BYTES_BIG_ENDIAN" 6456 "vmull.<US><V_sz_elem> %q0, %e1, %e3" 6457 [(set_attr "type" "neon_mul_<V_elem_ch>_long")] 6458) 6459 6460(define_expand "vec_widen_<US>mult_lo_<mode>" 6461 [(match_operand:<V_unpack> 0 "register_operand" "") 6462 (SE:<V_unpack> (match_operand:VU 1 "register_operand" "")) 6463 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))] 6464 "TARGET_NEON && !BYTES_BIG_ENDIAN" 6465 { 6466 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ; 6467 rtx t1; 6468 int i; 6469 for (i = 0; i < (<V_mode_nunits>/2) ; i++) 6470 RTVEC_ELT (v, i) = GEN_INT (i); 6471 t1 = gen_rtx_PARALLEL (<MODE>mode, v); 6472 6473 emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0], 6474 operands[1], 6475 t1, 6476 operands[2])); 6477 DONE; 6478 } 6479) 6480 6481(define_insn "neon_vec_<US>mult_hi_<mode>" 6482 [(set (match_operand:<V_unpack> 0 "register_operand" "=w") 6483 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF> 6484 (match_operand:VU 1 "register_operand" "w") 6485 (match_operand:VU 2 "vect_par_constant_high" ""))) 6486 (SE:<V_unpack> (vec_select:<V_HALF> 6487 (match_operand:VU 3 "register_operand" "w") 6488 (match_dup 2)))))] 6489 "TARGET_NEON && !BYTES_BIG_ENDIAN" 6490 "vmull.<US><V_sz_elem> %q0, %f1, %f3" 6491 [(set_attr "type" "neon_mul_<V_elem_ch>_long")] 6492) 6493 6494(define_expand "vec_widen_<US>mult_hi_<mode>" 6495 [(match_operand:<V_unpack> 0 "register_operand" "") 6496 (SE:<V_unpack> (match_operand:VU 1 "register_operand" "")) 6497 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))] 6498 "TARGET_NEON && !BYTES_BIG_ENDIAN" 6499 { 6500 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ; 6501 rtx t1; 6502 int i; 6503 for (i = 0; i < (<V_mode_nunits>/2) ; i++) 6504 RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i); 6505 t1 = gen_rtx_PARALLEL (<MODE>mode, v); 6506 6507 emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0], 6508 operands[1], 6509 t1, 6510 operands[2])); 6511 DONE; 6512 6513 } 6514) 6515 6516(define_insn "neon_vec_<US>shiftl_<mode>" 6517 [(set (match_operand:<V_widen> 0 "register_operand" "=w") 6518 (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w") 6519 (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))] 6520 "TARGET_NEON" 6521{ 6522 return "vshll.<US><V_sz_elem> %q0, %P1, %2"; 6523} 6524 [(set_attr "type" "neon_shift_imm_long")] 6525) 6526 6527(define_expand "vec_widen_<US>shiftl_lo_<mode>" 6528 [(match_operand:<V_unpack> 0 "register_operand" "") 6529 (SE:<V_unpack> (match_operand:VU 1 "register_operand" "")) 6530 (match_operand:SI 2 "immediate_operand" "i")] 6531 "TARGET_NEON && !BYTES_BIG_ENDIAN" 6532 { 6533 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0], 6534 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0), 6535 operands[2])); 6536 DONE; 6537 } 6538) 6539 6540(define_expand "vec_widen_<US>shiftl_hi_<mode>" 6541 [(match_operand:<V_unpack> 0 "register_operand" "") 6542 (SE:<V_unpack> (match_operand:VU 1 "register_operand" "")) 6543 (match_operand:SI 2 "immediate_operand" "i")] 6544 "TARGET_NEON && !BYTES_BIG_ENDIAN" 6545 { 6546 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0], 6547 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 6548 GET_MODE_SIZE (<V_HALF>mode)), 6549 operands[2])); 6550 DONE; 6551 } 6552) 6553 6554;; Vectorize for non-neon-quad case 6555(define_insn "neon_unpack<US>_<mode>" 6556 [(set (match_operand:<V_widen> 0 "register_operand" "=w") 6557 (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))] 6558 "TARGET_NEON" 6559 "vmovl.<US><V_sz_elem> %q0, %P1" 6560 [(set_attr "type" "neon_move")] 6561) 6562 6563(define_expand "vec_unpack<US>_lo_<mode>" 6564 [(match_operand:<V_double_width> 0 "register_operand" "") 6565 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))] 6566 "TARGET_NEON" 6567{ 6568 rtx tmpreg = gen_reg_rtx (<V_widen>mode); 6569 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1])); 6570 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg)); 6571 6572 DONE; 6573} 6574) 6575 6576(define_expand "vec_unpack<US>_hi_<mode>" 6577 [(match_operand:<V_double_width> 0 "register_operand" "") 6578 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))] 6579 "TARGET_NEON" 6580{ 6581 rtx tmpreg = gen_reg_rtx (<V_widen>mode); 6582 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1])); 6583 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg)); 6584 6585 DONE; 6586} 6587) 6588 6589(define_insn "neon_vec_<US>mult_<mode>" 6590 [(set (match_operand:<V_widen> 0 "register_operand" "=w") 6591 (mult:<V_widen> (SE:<V_widen> 6592 (match_operand:VDI 1 "register_operand" "w")) 6593 (SE:<V_widen> 6594 (match_operand:VDI 2 "register_operand" "w"))))] 6595 "TARGET_NEON" 6596 "vmull.<US><V_sz_elem> %q0, %P1, %P2" 6597 [(set_attr "type" "neon_mul_<V_elem_ch>_long")] 6598) 6599 6600(define_expand "vec_widen_<US>mult_hi_<mode>" 6601 [(match_operand:<V_double_width> 0 "register_operand" "") 6602 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" "")) 6603 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))] 6604 "TARGET_NEON" 6605 { 6606 rtx tmpreg = gen_reg_rtx (<V_widen>mode); 6607 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2])); 6608 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg)); 6609 6610 DONE; 6611 6612 } 6613) 6614 6615(define_expand "vec_widen_<US>mult_lo_<mode>" 6616 [(match_operand:<V_double_width> 0 "register_operand" "") 6617 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" "")) 6618 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))] 6619 "TARGET_NEON" 6620 { 6621 rtx tmpreg = gen_reg_rtx (<V_widen>mode); 6622 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2])); 6623 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg)); 6624 6625 DONE; 6626 6627 } 6628) 6629 6630(define_expand "vec_widen_<US>shiftl_hi_<mode>" 6631 [(match_operand:<V_double_width> 0 "register_operand" "") 6632 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" "")) 6633 (match_operand:SI 2 "immediate_operand" "i")] 6634 "TARGET_NEON" 6635 { 6636 rtx tmpreg = gen_reg_rtx (<V_widen>mode); 6637 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2])); 6638 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg)); 6639 6640 DONE; 6641 } 6642) 6643 6644(define_expand "vec_widen_<US>shiftl_lo_<mode>" 6645 [(match_operand:<V_double_width> 0 "register_operand" "") 6646 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" "")) 6647 (match_operand:SI 2 "immediate_operand" "i")] 6648 "TARGET_NEON" 6649 { 6650 rtx tmpreg = gen_reg_rtx (<V_widen>mode); 6651 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2])); 6652 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg)); 6653 6654 DONE; 6655 } 6656) 6657 6658; FIXME: These instruction patterns can't be used safely in big-endian mode 6659; because the ordering of vector elements in Q registers is different from what 6660; the semantics of the instructions require. 6661 6662(define_insn "vec_pack_trunc_<mode>" 6663 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w") 6664 (vec_concat:<V_narrow_pack> 6665 (truncate:<V_narrow> 6666 (match_operand:VN 1 "register_operand" "w")) 6667 (truncate:<V_narrow> 6668 (match_operand:VN 2 "register_operand" "w"))))] 6669 "TARGET_NEON && !BYTES_BIG_ENDIAN" 6670 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2" 6671 [(set_attr "type" "multiple") 6672 (set_attr "length" "8")] 6673) 6674 6675;; For the non-quad case. 6676(define_insn "neon_vec_pack_trunc_<mode>" 6677 [(set (match_operand:<V_narrow> 0 "register_operand" "=w") 6678 (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))] 6679 "TARGET_NEON && !BYTES_BIG_ENDIAN" 6680 "vmovn.i<V_sz_elem>\t%P0, %q1" 6681 [(set_attr "type" "neon_move_narrow_q")] 6682) 6683 6684(define_expand "vec_pack_trunc_<mode>" 6685 [(match_operand:<V_narrow_pack> 0 "register_operand" "") 6686 (match_operand:VSHFT 1 "register_operand" "") 6687 (match_operand:VSHFT 2 "register_operand")] 6688 "TARGET_NEON && !BYTES_BIG_ENDIAN" 6689{ 6690 rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode); 6691 6692 emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1])); 6693 emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2])); 6694 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg)); 6695 DONE; 6696}) 6697 6698(define_insn "neon_vabd<mode>_2" 6699 [(set (match_operand:VF 0 "s_register_operand" "=w") 6700 (abs:VF (minus:VF (match_operand:VF 1 "s_register_operand" "w") 6701 (match_operand:VF 2 "s_register_operand" "w"))))] 6702 "TARGET_NEON && flag_unsafe_math_optimizations" 6703 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2" 6704 [(set_attr "type" "neon_fp_abd_s<q>")] 6705) 6706 6707(define_insn "neon_vabd<mode>_3" 6708 [(set (match_operand:VF 0 "s_register_operand" "=w") 6709 (abs:VF (unspec:VF [(match_operand:VF 1 "s_register_operand" "w") 6710 (match_operand:VF 2 "s_register_operand" "w")] 6711 UNSPEC_VSUB)))] 6712 "TARGET_NEON && flag_unsafe_math_optimizations" 6713 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2" 6714 [(set_attr "type" "neon_fp_abd_s<q>")] 6715) 6716 6717;; Copy from core-to-neon regs, then extend, not vice-versa 6718 6719(define_split 6720 [(set (match_operand:DI 0 "s_register_operand" "") 6721 (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))] 6722 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))" 6723 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1))) 6724 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))] 6725 { 6726 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0])); 6727 }) 6728 6729(define_split 6730 [(set (match_operand:DI 0 "s_register_operand" "") 6731 (sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))] 6732 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))" 6733 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1))) 6734 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))] 6735 { 6736 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0])); 6737 }) 6738 6739(define_split 6740 [(set (match_operand:DI 0 "s_register_operand" "") 6741 (sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))] 6742 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))" 6743 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1))) 6744 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))] 6745 { 6746 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0])); 6747 }) 6748 6749(define_split 6750 [(set (match_operand:DI 0 "s_register_operand" "") 6751 (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))] 6752 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))" 6753 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1))) 6754 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))] 6755 { 6756 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0])); 6757 }) 6758 6759(define_split 6760 [(set (match_operand:DI 0 "s_register_operand" "") 6761 (zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))] 6762 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))" 6763 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1))) 6764 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))] 6765 { 6766 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0])); 6767 }) 6768 6769(define_split 6770 [(set (match_operand:DI 0 "s_register_operand" "") 6771 (zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))] 6772 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))" 6773 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1))) 6774 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))] 6775 { 6776 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0])); 6777 }) 6778