1;; ARM NEON coprocessor Machine Description 2;; Copyright (C) 2006-2016 Free Software Foundation, Inc. 3;; Written by CodeSourcery. 4;; 5;; This file is part of GCC. 6;; 7;; GCC is free software; you can redistribute it and/or modify it 8;; under the terms of the GNU General Public License as published by 9;; the Free Software Foundation; either version 3, or (at your option) 10;; any later version. 11;; 12;; GCC is distributed in the hope that it will be useful, but 13;; WITHOUT ANY WARRANTY; without even the implied warranty of 14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15;; General Public License for more details. 16;; 17;; You should have received a copy of the GNU General Public License 18;; along with GCC; see the file COPYING3. If not see 19;; <http://www.gnu.org/licenses/>. 20 21 22;; Attribute used to permit string comparisons against <VQH_mnem> in 23;; type attribute definitions. 24(define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd")) 25 26(define_insn "*neon_mov<mode>" 27 [(set (match_operand:VDX 0 "nonimmediate_operand" 28 "=w,Un,w, w, ?r,?w,?r,?r, ?Us") 29 (match_operand:VDX 1 "general_operand" 30 " w,w, Dn,Uni, w, r, r, Usi,r"))] 31 "TARGET_NEON 32 && (register_operand (operands[0], <MODE>mode) 33 || register_operand (operands[1], <MODE>mode))" 34{ 35 if (which_alternative == 2) 36 { 37 int width, is_valid; 38 static char templ[40]; 39 40 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode, 41 &operands[1], &width); 42 43 gcc_assert (is_valid != 0); 44 45 if (width == 0) 46 return "vmov.f32\t%P0, %1 @ <mode>"; 47 else 48 sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width); 49 50 return templ; 51 } 52 53 switch (which_alternative) 54 { 55 case 0: return "vmov\t%P0, %P1 @ <mode>"; 56 case 1: case 3: return output_move_neon (operands); 57 case 2: gcc_unreachable (); 58 case 4: return "vmov\t%Q0, %R0, %P1 @ <mode>"; 59 case 5: return "vmov\t%P0, %Q1, %R1 @ <mode>"; 60 default: return output_move_double (operands, true, NULL); 61 } 62} 63 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\ 64 neon_load1_1reg, neon_to_gp<q>,neon_from_gp<q>,mov_reg,\ 65 neon_load1_2reg, neon_store1_2reg") 66 (set_attr "length" "4,4,4,4,4,4,8,8,8") 67 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*") 68 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*") 69 (set_attr "neg_pool_range" "*,*,*,1004,*,*,*,1004,*")]) 70 71(define_insn "*neon_mov<mode>" 72 [(set (match_operand:VQXMOV 0 "nonimmediate_operand" 73 "=w,Un,w, w, ?r,?w,?r,?r, ?Us") 74 (match_operand:VQXMOV 1 "general_operand" 75 " w,w, Dn,Uni, w, r, r, Usi, r"))] 76 "TARGET_NEON 77 && (register_operand (operands[0], <MODE>mode) 78 || register_operand (operands[1], <MODE>mode))" 79{ 80 if (which_alternative == 2) 81 { 82 int width, is_valid; 83 static char templ[40]; 84 85 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode, 86 &operands[1], &width); 87 88 gcc_assert (is_valid != 0); 89 90 if (width == 0) 91 return "vmov.f32\t%q0, %1 @ <mode>"; 92 else 93 sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width); 94 95 return templ; 96 } 97 98 switch (which_alternative) 99 { 100 case 0: return "vmov\t%q0, %q1 @ <mode>"; 101 case 1: case 3: return output_move_neon (operands); 102 case 2: gcc_unreachable (); 103 case 4: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1"; 104 case 5: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1"; 105 default: return output_move_quad (operands); 106 } 107} 108 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\ 109 neon_load2_2reg_q,neon_to_gp_q,neon_from_gp_q,\ 110 mov_reg,neon_load1_4reg,neon_store1_4reg") 111 (set_attr "length" "4,8,4,8,8,8,16,8,16") 112 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*") 113 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*") 114 (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")]) 115 116(define_expand "movti" 117 [(set (match_operand:TI 0 "nonimmediate_operand" "") 118 (match_operand:TI 1 "general_operand" ""))] 119 "TARGET_NEON" 120{ 121 if (can_create_pseudo_p ()) 122 { 123 if (!REG_P (operands[0])) 124 operands[1] = force_reg (TImode, operands[1]); 125 } 126}) 127 128(define_expand "mov<mode>" 129 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "") 130 (match_operand:VSTRUCT 1 "general_operand" ""))] 131 "TARGET_NEON" 132{ 133 if (can_create_pseudo_p ()) 134 { 135 if (!REG_P (operands[0])) 136 operands[1] = force_reg (<MODE>mode, operands[1]); 137 } 138}) 139 140(define_expand "movv4hf" 141 [(set (match_operand:V4HF 0 "s_register_operand") 142 (match_operand:V4HF 1 "s_register_operand"))] 143 "TARGET_NEON && TARGET_FP16" 144{ 145 /* We need to use force_reg to avoid CANNOT_CHANGE_MODE_CLASS 146 causing an ICE on big-endian because it cannot extract subregs in 147 this case. */ 148 if (can_create_pseudo_p ()) 149 { 150 if (!REG_P (operands[0])) 151 operands[1] = force_reg (V4HFmode, operands[1]); 152 } 153}) 154 155(define_expand "movv8hf" 156 [(set (match_operand:V8HF 0 "") 157 (match_operand:V8HF 1 ""))] 158 "TARGET_NEON && TARGET_FP16" 159{ 160 /* We need to use force_reg to avoid CANNOT_CHANGE_MODE_CLASS 161 causing an ICE on big-endian because it cannot extract subregs in 162 this case. */ 163 if (can_create_pseudo_p ()) 164 { 165 if (!REG_P (operands[0])) 166 operands[1] = force_reg (V8HFmode, operands[1]); 167 } 168}) 169 170(define_insn "*neon_mov<mode>" 171 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w") 172 (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))] 173 "TARGET_NEON 174 && (register_operand (operands[0], <MODE>mode) 175 || register_operand (operands[1], <MODE>mode))" 176{ 177 switch (which_alternative) 178 { 179 case 0: return "#"; 180 case 1: case 2: return output_move_neon (operands); 181 default: gcc_unreachable (); 182 } 183} 184 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q") 185 (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))]) 186 187(define_split 188 [(set (match_operand:EI 0 "s_register_operand" "") 189 (match_operand:EI 1 "s_register_operand" ""))] 190 "TARGET_NEON && reload_completed" 191 [(set (match_dup 0) (match_dup 1)) 192 (set (match_dup 2) (match_dup 3))] 193{ 194 int rdest = REGNO (operands[0]); 195 int rsrc = REGNO (operands[1]); 196 rtx dest[2], src[2]; 197 198 dest[0] = gen_rtx_REG (TImode, rdest); 199 src[0] = gen_rtx_REG (TImode, rsrc); 200 dest[1] = gen_rtx_REG (DImode, rdest + 4); 201 src[1] = gen_rtx_REG (DImode, rsrc + 4); 202 203 neon_disambiguate_copy (operands, dest, src, 2); 204}) 205 206(define_split 207 [(set (match_operand:OI 0 "s_register_operand" "") 208 (match_operand:OI 1 "s_register_operand" ""))] 209 "TARGET_NEON && reload_completed" 210 [(set (match_dup 0) (match_dup 1)) 211 (set (match_dup 2) (match_dup 3))] 212{ 213 int rdest = REGNO (operands[0]); 214 int rsrc = REGNO (operands[1]); 215 rtx dest[2], src[2]; 216 217 dest[0] = gen_rtx_REG (TImode, rdest); 218 src[0] = gen_rtx_REG (TImode, rsrc); 219 dest[1] = gen_rtx_REG (TImode, rdest + 4); 220 src[1] = gen_rtx_REG (TImode, rsrc + 4); 221 222 neon_disambiguate_copy (operands, dest, src, 2); 223}) 224 225(define_split 226 [(set (match_operand:CI 0 "s_register_operand" "") 227 (match_operand:CI 1 "s_register_operand" ""))] 228 "TARGET_NEON && reload_completed" 229 [(set (match_dup 0) (match_dup 1)) 230 (set (match_dup 2) (match_dup 3)) 231 (set (match_dup 4) (match_dup 5))] 232{ 233 int rdest = REGNO (operands[0]); 234 int rsrc = REGNO (operands[1]); 235 rtx dest[3], src[3]; 236 237 dest[0] = gen_rtx_REG (TImode, rdest); 238 src[0] = gen_rtx_REG (TImode, rsrc); 239 dest[1] = gen_rtx_REG (TImode, rdest + 4); 240 src[1] = gen_rtx_REG (TImode, rsrc + 4); 241 dest[2] = gen_rtx_REG (TImode, rdest + 8); 242 src[2] = gen_rtx_REG (TImode, rsrc + 8); 243 244 neon_disambiguate_copy (operands, dest, src, 3); 245}) 246 247(define_split 248 [(set (match_operand:XI 0 "s_register_operand" "") 249 (match_operand:XI 1 "s_register_operand" ""))] 250 "TARGET_NEON && reload_completed" 251 [(set (match_dup 0) (match_dup 1)) 252 (set (match_dup 2) (match_dup 3)) 253 (set (match_dup 4) (match_dup 5)) 254 (set (match_dup 6) (match_dup 7))] 255{ 256 int rdest = REGNO (operands[0]); 257 int rsrc = REGNO (operands[1]); 258 rtx dest[4], src[4]; 259 260 dest[0] = gen_rtx_REG (TImode, rdest); 261 src[0] = gen_rtx_REG (TImode, rsrc); 262 dest[1] = gen_rtx_REG (TImode, rdest + 4); 263 src[1] = gen_rtx_REG (TImode, rsrc + 4); 264 dest[2] = gen_rtx_REG (TImode, rdest + 8); 265 src[2] = gen_rtx_REG (TImode, rsrc + 8); 266 dest[3] = gen_rtx_REG (TImode, rdest + 12); 267 src[3] = gen_rtx_REG (TImode, rsrc + 12); 268 269 neon_disambiguate_copy (operands, dest, src, 4); 270}) 271 272(define_expand "movmisalign<mode>" 273 [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand") 274 (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")] 275 UNSPEC_MISALIGNED_ACCESS))] 276 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access" 277{ 278 rtx adjust_mem; 279 /* This pattern is not permitted to fail during expansion: if both arguments 280 are non-registers (e.g. memory := constant, which can be created by the 281 auto-vectorizer), force operand 1 into a register. */ 282 if (!s_register_operand (operands[0], <MODE>mode) 283 && !s_register_operand (operands[1], <MODE>mode)) 284 operands[1] = force_reg (<MODE>mode, operands[1]); 285 286 if (s_register_operand (operands[0], <MODE>mode)) 287 adjust_mem = operands[1]; 288 else 289 adjust_mem = operands[0]; 290 291 /* Legitimize address. */ 292 if (!neon_vector_mem_operand (adjust_mem, 2, true)) 293 XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0)); 294 295}) 296 297(define_insn "*movmisalign<mode>_neon_store" 298 [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um") 299 (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")] 300 UNSPEC_MISALIGNED_ACCESS))] 301 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access" 302 "vst1.<V_sz_elem>\t{%P1}, %A0" 303 [(set_attr "type" "neon_store1_1reg<q>")]) 304 305(define_insn "*movmisalign<mode>_neon_load" 306 [(set (match_operand:VDX 0 "s_register_operand" "=w") 307 (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand" 308 " Um")] 309 UNSPEC_MISALIGNED_ACCESS))] 310 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access" 311 "vld1.<V_sz_elem>\t{%P0}, %A1" 312 [(set_attr "type" "neon_load1_1reg<q>")]) 313 314(define_insn "*movmisalign<mode>_neon_store" 315 [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um") 316 (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")] 317 UNSPEC_MISALIGNED_ACCESS))] 318 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access" 319 "vst1.<V_sz_elem>\t{%q1}, %A0" 320 [(set_attr "type" "neon_store1_1reg<q>")]) 321 322(define_insn "*movmisalign<mode>_neon_load" 323 [(set (match_operand:VQX 0 "s_register_operand" "=w") 324 (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand" 325 " Um")] 326 UNSPEC_MISALIGNED_ACCESS))] 327 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access" 328 "vld1.<V_sz_elem>\t{%q0}, %A1" 329 [(set_attr "type" "neon_load1_1reg<q>")]) 330 331(define_insn "vec_set<mode>_internal" 332 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w,w") 333 (vec_merge:VD_LANE 334 (vec_duplicate:VD_LANE 335 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r")) 336 (match_operand:VD_LANE 3 "s_register_operand" "0,0") 337 (match_operand:SI 2 "immediate_operand" "i,i")))] 338 "TARGET_NEON" 339{ 340 int elt = ffs ((int) INTVAL (operands[2])) - 1; 341 if (BYTES_BIG_ENDIAN) 342 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt; 343 operands[2] = GEN_INT (elt); 344 345 if (which_alternative == 0) 346 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1"; 347 else 348 return "vmov.<V_sz_elem>\t%P0[%c2], %1"; 349} 350 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]) 351 352(define_insn "vec_set<mode>_internal" 353 [(set (match_operand:VQ2 0 "s_register_operand" "=w,w") 354 (vec_merge:VQ2 355 (vec_duplicate:VQ2 356 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r")) 357 (match_operand:VQ2 3 "s_register_operand" "0,0") 358 (match_operand:SI 2 "immediate_operand" "i,i")))] 359 "TARGET_NEON" 360{ 361 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1; 362 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2; 363 int elt = elem % half_elts; 364 int hi = (elem / half_elts) * 2; 365 int regno = REGNO (operands[0]); 366 367 if (BYTES_BIG_ENDIAN) 368 elt = half_elts - 1 - elt; 369 370 operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi); 371 operands[2] = GEN_INT (elt); 372 373 if (which_alternative == 0) 374 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1"; 375 else 376 return "vmov.<V_sz_elem>\t%P0[%c2], %1"; 377} 378 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")] 379) 380 381(define_insn "vec_setv2di_internal" 382 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w") 383 (vec_merge:V2DI 384 (vec_duplicate:V2DI 385 (match_operand:DI 1 "nonimmediate_operand" "Um,r")) 386 (match_operand:V2DI 3 "s_register_operand" "0,0") 387 (match_operand:SI 2 "immediate_operand" "i,i")))] 388 "TARGET_NEON" 389{ 390 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1; 391 int regno = REGNO (operands[0]) + 2 * elem; 392 393 operands[0] = gen_rtx_REG (DImode, regno); 394 395 if (which_alternative == 0) 396 return "vld1.64\t%P0, %A1"; 397 else 398 return "vmov\t%P0, %Q1, %R1"; 399} 400 [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")] 401) 402 403(define_expand "vec_set<mode>" 404 [(match_operand:VDQ 0 "s_register_operand" "") 405 (match_operand:<V_elem> 1 "s_register_operand" "") 406 (match_operand:SI 2 "immediate_operand" "")] 407 "TARGET_NEON" 408{ 409 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]); 410 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1], 411 GEN_INT (elem), operands[0])); 412 DONE; 413}) 414 415(define_insn "vec_extract<mode>" 416 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r") 417 (vec_select:<V_elem> 418 (match_operand:VD_LANE 1 "s_register_operand" "w,w") 419 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))] 420 "TARGET_NEON" 421{ 422 if (BYTES_BIG_ENDIAN) 423 { 424 int elt = INTVAL (operands[2]); 425 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt; 426 operands[2] = GEN_INT (elt); 427 } 428 429 if (which_alternative == 0) 430 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0"; 431 else 432 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]"; 433} 434 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")] 435) 436 437(define_insn "vec_extract<mode>" 438 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r") 439 (vec_select:<V_elem> 440 (match_operand:VQ2 1 "s_register_operand" "w,w") 441 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))] 442 "TARGET_NEON" 443{ 444 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2; 445 int elt = INTVAL (operands[2]) % half_elts; 446 int hi = (INTVAL (operands[2]) / half_elts) * 2; 447 int regno = REGNO (operands[1]); 448 449 if (BYTES_BIG_ENDIAN) 450 elt = half_elts - 1 - elt; 451 452 operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi); 453 operands[2] = GEN_INT (elt); 454 455 if (which_alternative == 0) 456 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0"; 457 else 458 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]"; 459} 460 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")] 461) 462 463(define_insn "vec_extractv2di" 464 [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r") 465 (vec_select:DI 466 (match_operand:V2DI 1 "s_register_operand" "w,w") 467 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))] 468 "TARGET_NEON" 469{ 470 int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]); 471 472 operands[1] = gen_rtx_REG (DImode, regno); 473 474 if (which_alternative == 0) 475 return "vst1.64\t{%P1}, %A0 @ v2di"; 476 else 477 return "vmov\t%Q0, %R0, %P1 @ v2di"; 478} 479 [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")] 480) 481 482(define_expand "vec_init<mode>" 483 [(match_operand:VDQ 0 "s_register_operand" "") 484 (match_operand 1 "" "")] 485 "TARGET_NEON" 486{ 487 neon_expand_vector_init (operands[0], operands[1]); 488 DONE; 489}) 490 491;; Doubleword and quadword arithmetic. 492 493;; NOTE: some other instructions also support 64-bit integer 494;; element size, which we could potentially use for "long long" operations. 495 496(define_insn "*add<mode>3_neon" 497 [(set (match_operand:VDQ 0 "s_register_operand" "=w") 498 (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w") 499 (match_operand:VDQ 2 "s_register_operand" "w")))] 500 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" 501 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 502 [(set (attr "type") 503 (if_then_else (match_test "<Is_float_mode>") 504 (const_string "neon_fp_addsub_s<q>") 505 (const_string "neon_add<q>")))] 506) 507 508(define_insn "adddi3_neon" 509 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r") 510 (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r") 511 (match_operand:DI 2 "arm_adddi_operand" "w,r,0,w,r,Dd,Dd"))) 512 (clobber (reg:CC CC_REGNUM))] 513 "TARGET_NEON" 514{ 515 switch (which_alternative) 516 { 517 case 0: /* fall through */ 518 case 3: return "vadd.i64\t%P0, %P1, %P2"; 519 case 1: return "#"; 520 case 2: return "#"; 521 case 4: return "#"; 522 case 5: return "#"; 523 case 6: return "#"; 524 default: gcc_unreachable (); 525 } 526} 527 [(set_attr "type" "neon_add,multiple,multiple,neon_add,\ 528 multiple,multiple,multiple") 529 (set_attr "conds" "*,clob,clob,*,clob,clob,clob") 530 (set_attr "length" "*,8,8,*,8,8,8") 531 (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")] 532) 533 534(define_insn "*sub<mode>3_neon" 535 [(set (match_operand:VDQ 0 "s_register_operand" "=w") 536 (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w") 537 (match_operand:VDQ 2 "s_register_operand" "w")))] 538 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" 539 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 540 [(set (attr "type") 541 (if_then_else (match_test "<Is_float_mode>") 542 (const_string "neon_fp_addsub_s<q>") 543 (const_string "neon_sub<q>")))] 544) 545 546(define_insn "subdi3_neon" 547 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w") 548 (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w") 549 (match_operand:DI 2 "s_register_operand" "w,r,0,0,w"))) 550 (clobber (reg:CC CC_REGNUM))] 551 "TARGET_NEON" 552{ 553 switch (which_alternative) 554 { 555 case 0: /* fall through */ 556 case 4: return "vsub.i64\t%P0, %P1, %P2"; 557 case 1: /* fall through */ 558 case 2: /* fall through */ 559 case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2"; 560 default: gcc_unreachable (); 561 } 562} 563 [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub") 564 (set_attr "conds" "*,clob,clob,clob,*") 565 (set_attr "length" "*,8,8,8,*") 566 (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")] 567) 568 569(define_insn "*mul<mode>3_neon" 570 [(set (match_operand:VDQW 0 "s_register_operand" "=w") 571 (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w") 572 (match_operand:VDQW 2 "s_register_operand" "w")))] 573 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" 574 "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 575 [(set (attr "type") 576 (if_then_else (match_test "<Is_float_mode>") 577 (const_string "neon_fp_mul_s<q>") 578 (const_string "neon_mul_<V_elem_ch><q>")))] 579) 580 581(define_insn "mul<mode>3add<mode>_neon" 582 [(set (match_operand:VDQW 0 "s_register_operand" "=w") 583 (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w") 584 (match_operand:VDQW 3 "s_register_operand" "w")) 585 (match_operand:VDQW 1 "s_register_operand" "0")))] 586 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" 587 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" 588 [(set (attr "type") 589 (if_then_else (match_test "<Is_float_mode>") 590 (const_string "neon_fp_mla_s<q>") 591 (const_string "neon_mla_<V_elem_ch><q>")))] 592) 593 594(define_insn "mul<mode>3neg<mode>add<mode>_neon" 595 [(set (match_operand:VDQW 0 "s_register_operand" "=w") 596 (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0") 597 (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w") 598 (match_operand:VDQW 3 "s_register_operand" "w"))))] 599 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" 600 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" 601 [(set (attr "type") 602 (if_then_else (match_test "<Is_float_mode>") 603 (const_string "neon_fp_mla_s<q>") 604 (const_string "neon_mla_<V_elem_ch><q>")))] 605) 606 607;; Fused multiply-accumulate 608;; We define each insn twice here: 609;; 1: with flag_unsafe_math_optimizations for the widening multiply phase 610;; to be able to use when converting to FMA. 611;; 2: without flag_unsafe_math_optimizations for the intrinsics to use. 612(define_insn "fma<VCVTF:mode>4" 613 [(set (match_operand:VCVTF 0 "register_operand" "=w") 614 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w") 615 (match_operand:VCVTF 2 "register_operand" "w") 616 (match_operand:VCVTF 3 "register_operand" "0")))] 617 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations" 618 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 619 [(set_attr "type" "neon_fp_mla_s<q>")] 620) 621 622(define_insn "fma<VCVTF:mode>4_intrinsic" 623 [(set (match_operand:VCVTF 0 "register_operand" "=w") 624 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w") 625 (match_operand:VCVTF 2 "register_operand" "w") 626 (match_operand:VCVTF 3 "register_operand" "0")))] 627 "TARGET_NEON && TARGET_FMA" 628 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 629 [(set_attr "type" "neon_fp_mla_s<q>")] 630) 631 632(define_insn "*fmsub<VCVTF:mode>4" 633 [(set (match_operand:VCVTF 0 "register_operand" "=w") 634 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w")) 635 (match_operand:VCVTF 2 "register_operand" "w") 636 (match_operand:VCVTF 3 "register_operand" "0")))] 637 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations" 638 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 639 [(set_attr "type" "neon_fp_mla_s<q>")] 640) 641 642(define_insn "fmsub<VCVTF:mode>4_intrinsic" 643 [(set (match_operand:VCVTF 0 "register_operand" "=w") 644 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w")) 645 (match_operand:VCVTF 2 "register_operand" "w") 646 (match_operand:VCVTF 3 "register_operand" "0")))] 647 "TARGET_NEON && TARGET_FMA" 648 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 649 [(set_attr "type" "neon_fp_mla_s<q>")] 650) 651 652(define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>" 653 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 654 (unspec:VCVTF [(match_operand:VCVTF 1 655 "s_register_operand" "w")] 656 NEON_VRINT))] 657 "TARGET_NEON && TARGET_FPU_ARMV8" 658 "vrint<nvrint_variant>%?.f32\\t%<V_reg>0, %<V_reg>1" 659 [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")] 660) 661 662(define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>" 663 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w") 664 (FIXUORS:<V_cmp_result> (unspec:VCVTF 665 [(match_operand:VCVTF 1 "register_operand" "w")] 666 NEON_VCVT)))] 667 "TARGET_NEON && TARGET_FPU_ARMV8" 668 "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1" 669 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>") 670 (set_attr "predicable" "no")] 671) 672 673(define_insn "ior<mode>3" 674 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w") 675 (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0") 676 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))] 677 "TARGET_NEON" 678{ 679 switch (which_alternative) 680 { 681 case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2"; 682 case 1: return neon_output_logic_immediate ("vorr", &operands[2], 683 <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode)); 684 default: gcc_unreachable (); 685 } 686} 687 [(set_attr "type" "neon_logic<q>")] 688) 689 690;; The concrete forms of the Neon immediate-logic instructions are vbic and 691;; vorr. We support the pseudo-instruction vand instead, because that 692;; corresponds to the canonical form the middle-end expects to use for 693;; immediate bitwise-ANDs. 694 695(define_insn "and<mode>3" 696 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w") 697 (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0") 698 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))] 699 "TARGET_NEON" 700{ 701 switch (which_alternative) 702 { 703 case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2"; 704 case 1: return neon_output_logic_immediate ("vand", &operands[2], 705 <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode)); 706 default: gcc_unreachable (); 707 } 708} 709 [(set_attr "type" "neon_logic<q>")] 710) 711 712(define_insn "orn<mode>3_neon" 713 [(set (match_operand:VDQ 0 "s_register_operand" "=w") 714 (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w")) 715 (match_operand:VDQ 1 "s_register_operand" "w")))] 716 "TARGET_NEON" 717 "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 718 [(set_attr "type" "neon_logic<q>")] 719) 720 721;; TODO: investigate whether we should disable 722;; this and bicdi3_neon for the A8 in line with the other 723;; changes above. 724(define_insn_and_split "orndi3_neon" 725 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r") 726 (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r")) 727 (match_operand:DI 1 "s_register_operand" "w,r,r,0")))] 728 "TARGET_NEON" 729 "@ 730 vorn\t%P0, %P1, %P2 731 # 732 # 733 #" 734 "reload_completed && 735 (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))" 736 [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1))) 737 (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))] 738 " 739 { 740 if (TARGET_THUMB2) 741 { 742 operands[3] = gen_highpart (SImode, operands[0]); 743 operands[0] = gen_lowpart (SImode, operands[0]); 744 operands[4] = gen_highpart (SImode, operands[2]); 745 operands[2] = gen_lowpart (SImode, operands[2]); 746 operands[5] = gen_highpart (SImode, operands[1]); 747 operands[1] = gen_lowpart (SImode, operands[1]); 748 } 749 else 750 { 751 emit_insn (gen_one_cmpldi2 (operands[0], operands[2])); 752 emit_insn (gen_iordi3 (operands[0], operands[1], operands[0])); 753 DONE; 754 } 755 }" 756 [(set_attr "type" "neon_logic,multiple,multiple,multiple") 757 (set_attr "length" "*,16,8,8") 758 (set_attr "arch" "any,a,t2,t2")] 759) 760 761(define_insn "bic<mode>3_neon" 762 [(set (match_operand:VDQ 0 "s_register_operand" "=w") 763 (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w")) 764 (match_operand:VDQ 1 "s_register_operand" "w")))] 765 "TARGET_NEON" 766 "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 767 [(set_attr "type" "neon_logic<q>")] 768) 769 770;; Compare to *anddi_notdi_di. 771(define_insn "bicdi3_neon" 772 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r") 773 (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0")) 774 (match_operand:DI 1 "s_register_operand" "w,0,r")))] 775 "TARGET_NEON" 776 "@ 777 vbic\t%P0, %P1, %P2 778 # 779 #" 780 [(set_attr "type" "neon_logic,multiple,multiple") 781 (set_attr "length" "*,8,8")] 782) 783 784(define_insn "xor<mode>3" 785 [(set (match_operand:VDQ 0 "s_register_operand" "=w") 786 (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w") 787 (match_operand:VDQ 2 "s_register_operand" "w")))] 788 "TARGET_NEON" 789 "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 790 [(set_attr "type" "neon_logic<q>")] 791) 792 793(define_insn "one_cmpl<mode>2" 794 [(set (match_operand:VDQ 0 "s_register_operand" "=w") 795 (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))] 796 "TARGET_NEON" 797 "vmvn\t%<V_reg>0, %<V_reg>1" 798 [(set_attr "type" "neon_move<q>")] 799) 800 801(define_insn "abs<mode>2" 802 [(set (match_operand:VDQW 0 "s_register_operand" "=w") 803 (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))] 804 "TARGET_NEON" 805 "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1" 806 [(set (attr "type") 807 (if_then_else (match_test "<Is_float_mode>") 808 (const_string "neon_fp_abs_s<q>") 809 (const_string "neon_abs<q>")))] 810) 811 812(define_insn "neg<mode>2" 813 [(set (match_operand:VDQW 0 "s_register_operand" "=w") 814 (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))] 815 "TARGET_NEON" 816 "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1" 817 [(set (attr "type") 818 (if_then_else (match_test "<Is_float_mode>") 819 (const_string "neon_fp_neg_s<q>") 820 (const_string "neon_neg<q>")))] 821) 822 823(define_insn "negdi2_neon" 824 [(set (match_operand:DI 0 "s_register_operand" "=&w, w,r,&r") 825 (neg:DI (match_operand:DI 1 "s_register_operand" " w, w,0, r"))) 826 (clobber (match_scratch:DI 2 "= X,&w,X, X")) 827 (clobber (reg:CC CC_REGNUM))] 828 "TARGET_NEON" 829 "#" 830 [(set_attr "length" "8") 831 (set_attr "type" "multiple")] 832) 833 834; Split negdi2_neon for vfp registers 835(define_split 836 [(set (match_operand:DI 0 "s_register_operand" "") 837 (neg:DI (match_operand:DI 1 "s_register_operand" ""))) 838 (clobber (match_scratch:DI 2 "")) 839 (clobber (reg:CC CC_REGNUM))] 840 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))" 841 [(set (match_dup 2) (const_int 0)) 842 (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1))) 843 (clobber (reg:CC CC_REGNUM))])] 844 { 845 if (!REG_P (operands[2])) 846 operands[2] = operands[0]; 847 } 848) 849 850; Split negdi2_neon for core registers 851(define_split 852 [(set (match_operand:DI 0 "s_register_operand" "") 853 (neg:DI (match_operand:DI 1 "s_register_operand" ""))) 854 (clobber (match_scratch:DI 2 "")) 855 (clobber (reg:CC CC_REGNUM))] 856 "TARGET_32BIT && reload_completed 857 && arm_general_register_operand (operands[0], DImode)" 858 [(parallel [(set (match_dup 0) (neg:DI (match_dup 1))) 859 (clobber (reg:CC CC_REGNUM))])] 860 "" 861) 862 863(define_insn "*umin<mode>3_neon" 864 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 865 (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w") 866 (match_operand:VDQIW 2 "s_register_operand" "w")))] 867 "TARGET_NEON" 868 "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 869 [(set_attr "type" "neon_minmax<q>")] 870) 871 872(define_insn "*umax<mode>3_neon" 873 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 874 (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w") 875 (match_operand:VDQIW 2 "s_register_operand" "w")))] 876 "TARGET_NEON" 877 "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 878 [(set_attr "type" "neon_minmax<q>")] 879) 880 881(define_insn "*smin<mode>3_neon" 882 [(set (match_operand:VDQW 0 "s_register_operand" "=w") 883 (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w") 884 (match_operand:VDQW 2 "s_register_operand" "w")))] 885 "TARGET_NEON" 886 "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 887 [(set (attr "type") 888 (if_then_else (match_test "<Is_float_mode>") 889 (const_string "neon_fp_minmax_s<q>") 890 (const_string "neon_minmax<q>")))] 891) 892 893(define_insn "*smax<mode>3_neon" 894 [(set (match_operand:VDQW 0 "s_register_operand" "=w") 895 (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w") 896 (match_operand:VDQW 2 "s_register_operand" "w")))] 897 "TARGET_NEON" 898 "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 899 [(set (attr "type") 900 (if_then_else (match_test "<Is_float_mode>") 901 (const_string "neon_fp_minmax_s<q>") 902 (const_string "neon_minmax<q>")))] 903) 904 905; TODO: V2DI shifts are current disabled because there are bugs in the 906; generic vectorizer code. It ends up creating a V2DI constructor with 907; SImode elements. 908 909(define_insn "vashl<mode>3" 910 [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w") 911 (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w") 912 (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))] 913 "TARGET_NEON" 914 { 915 switch (which_alternative) 916 { 917 case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"; 918 case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2], 919 <MODE>mode, 920 VALID_NEON_QREG_MODE (<MODE>mode), 921 true); 922 default: gcc_unreachable (); 923 } 924 } 925 [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")] 926) 927 928(define_insn "vashr<mode>3_imm" 929 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 930 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w") 931 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))] 932 "TARGET_NEON" 933 { 934 return neon_output_shift_immediate ("vshr", 's', &operands[2], 935 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode), 936 false); 937 } 938 [(set_attr "type" "neon_shift_imm<q>")] 939) 940 941(define_insn "vlshr<mode>3_imm" 942 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 943 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w") 944 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))] 945 "TARGET_NEON" 946 { 947 return neon_output_shift_immediate ("vshr", 'u', &operands[2], 948 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode), 949 false); 950 } 951 [(set_attr "type" "neon_shift_imm<q>")] 952) 953 954; Used for implementing logical shift-right, which is a left-shift by a negative 955; amount, with signed operands. This is essentially the same as ashl<mode>3 956; above, but using an unspec in case GCC tries anything tricky with negative 957; shift amounts. 958 959(define_insn "ashl<mode>3_signed" 960 [(set (match_operand:VDQI 0 "s_register_operand" "=w") 961 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w") 962 (match_operand:VDQI 2 "s_register_operand" "w")] 963 UNSPEC_ASHIFT_SIGNED))] 964 "TARGET_NEON" 965 "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 966 [(set_attr "type" "neon_shift_reg<q>")] 967) 968 969; Used for implementing logical shift-right, which is a left-shift by a negative 970; amount, with unsigned operands. 971 972(define_insn "ashl<mode>3_unsigned" 973 [(set (match_operand:VDQI 0 "s_register_operand" "=w") 974 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w") 975 (match_operand:VDQI 2 "s_register_operand" "w")] 976 UNSPEC_ASHIFT_UNSIGNED))] 977 "TARGET_NEON" 978 "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 979 [(set_attr "type" "neon_shift_reg<q>")] 980) 981 982(define_expand "vashr<mode>3" 983 [(set (match_operand:VDQIW 0 "s_register_operand" "") 984 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "") 985 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))] 986 "TARGET_NEON" 987{ 988 if (s_register_operand (operands[2], <MODE>mode)) 989 { 990 rtx neg = gen_reg_rtx (<MODE>mode); 991 emit_insn (gen_neg<mode>2 (neg, operands[2])); 992 emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg)); 993 } 994 else 995 emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2])); 996 DONE; 997}) 998 999(define_expand "vlshr<mode>3" 1000 [(set (match_operand:VDQIW 0 "s_register_operand" "") 1001 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "") 1002 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))] 1003 "TARGET_NEON" 1004{ 1005 if (s_register_operand (operands[2], <MODE>mode)) 1006 { 1007 rtx neg = gen_reg_rtx (<MODE>mode); 1008 emit_insn (gen_neg<mode>2 (neg, operands[2])); 1009 emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg)); 1010 } 1011 else 1012 emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2])); 1013 DONE; 1014}) 1015 1016;; 64-bit shifts 1017 1018;; This pattern loads a 32-bit shift count into a 64-bit NEON register, 1019;; leaving the upper half uninitalized. This is OK since the shift 1020;; instruction only looks at the low 8 bits anyway. To avoid confusing 1021;; data flow analysis however, we pretend the full register is set 1022;; using an unspec. 1023(define_insn "neon_load_count" 1024 [(set (match_operand:DI 0 "s_register_operand" "=w,w") 1025 (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")] 1026 UNSPEC_LOAD_COUNT))] 1027 "TARGET_NEON" 1028 "@ 1029 vld1.32\t{%P0[0]}, %A1 1030 vmov.32\t%P0[0], %1" 1031 [(set_attr "type" "neon_load1_1reg,neon_from_gp")] 1032) 1033 1034(define_insn "ashldi3_neon_noclobber" 1035 [(set (match_operand:DI 0 "s_register_operand" "=w,w") 1036 (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w") 1037 (match_operand:DI 2 "reg_or_int_operand" " i,w")))] 1038 "TARGET_NEON && reload_completed 1039 && (!CONST_INT_P (operands[2]) 1040 || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))" 1041 "@ 1042 vshl.u64\t%P0, %P1, %2 1043 vshl.u64\t%P0, %P1, %P2" 1044 [(set_attr "type" "neon_shift_imm, neon_shift_reg")] 1045) 1046 1047(define_insn_and_split "ashldi3_neon" 1048 [(set (match_operand:DI 0 "s_register_operand" "= w, w, &r, r, &r, ?w,?w") 1049 (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r, 0w, w") 1050 (match_operand:SI 2 "general_operand" "rUm, i, r, i, i,rUm, i"))) 1051 (clobber (match_scratch:SI 3 "= X, X, &r, X, X, X, X")) 1052 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X, X")) 1053 (clobber (match_scratch:DI 5 "=&w, X, X, X, X, &w, X")) 1054 (clobber (reg:CC_C CC_REGNUM))] 1055 "TARGET_NEON" 1056 "#" 1057 "TARGET_NEON && reload_completed" 1058 [(const_int 0)] 1059 " 1060 { 1061 if (IS_VFP_REGNUM (REGNO (operands[0]))) 1062 { 1063 if (CONST_INT_P (operands[2])) 1064 { 1065 if (INTVAL (operands[2]) < 1) 1066 { 1067 emit_insn (gen_movdi (operands[0], operands[1])); 1068 DONE; 1069 } 1070 else if (INTVAL (operands[2]) > 63) 1071 operands[2] = gen_rtx_CONST_INT (VOIDmode, 63); 1072 } 1073 else 1074 { 1075 emit_insn (gen_neon_load_count (operands[5], operands[2])); 1076 operands[2] = operands[5]; 1077 } 1078 1079 /* Ditch the unnecessary clobbers. */ 1080 emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1], 1081 operands[2])); 1082 } 1083 else 1084 { 1085 /* The shift expanders support either full overlap or no overlap. */ 1086 gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1]) 1087 || REGNO (operands[0]) == REGNO (operands[1])); 1088 1089 if (operands[2] == CONST1_RTX (SImode)) 1090 /* This clobbers CC. */ 1091 emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1])); 1092 else 1093 arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1], 1094 operands[2], operands[3], operands[4]); 1095 } 1096 DONE; 1097 }" 1098 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits") 1099 (set_attr "opt" "*,*,speed,speed,speed,*,*") 1100 (set_attr "type" "multiple")] 1101) 1102 1103; The shift amount needs to be negated for right-shifts 1104(define_insn "signed_shift_di3_neon" 1105 [(set (match_operand:DI 0 "s_register_operand" "=w") 1106 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w") 1107 (match_operand:DI 2 "s_register_operand" " w")] 1108 UNSPEC_ASHIFT_SIGNED))] 1109 "TARGET_NEON && reload_completed" 1110 "vshl.s64\t%P0, %P1, %P2" 1111 [(set_attr "type" "neon_shift_reg")] 1112) 1113 1114; The shift amount needs to be negated for right-shifts 1115(define_insn "unsigned_shift_di3_neon" 1116 [(set (match_operand:DI 0 "s_register_operand" "=w") 1117 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w") 1118 (match_operand:DI 2 "s_register_operand" " w")] 1119 UNSPEC_ASHIFT_UNSIGNED))] 1120 "TARGET_NEON && reload_completed" 1121 "vshl.u64\t%P0, %P1, %P2" 1122 [(set_attr "type" "neon_shift_reg")] 1123) 1124 1125(define_insn "ashrdi3_neon_imm_noclobber" 1126 [(set (match_operand:DI 0 "s_register_operand" "=w") 1127 (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w") 1128 (match_operand:DI 2 "const_int_operand" " i")))] 1129 "TARGET_NEON && reload_completed 1130 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64" 1131 "vshr.s64\t%P0, %P1, %2" 1132 [(set_attr "type" "neon_shift_imm")] 1133) 1134 1135(define_insn "lshrdi3_neon_imm_noclobber" 1136 [(set (match_operand:DI 0 "s_register_operand" "=w") 1137 (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w") 1138 (match_operand:DI 2 "const_int_operand" " i")))] 1139 "TARGET_NEON && reload_completed 1140 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64" 1141 "vshr.u64\t%P0, %P1, %2" 1142 [(set_attr "type" "neon_shift_imm")] 1143) 1144 1145;; ashrdi3_neon 1146;; lshrdi3_neon 1147(define_insn_and_split "<shift>di3_neon" 1148 [(set (match_operand:DI 0 "s_register_operand" "= w, w, &r, r, &r,?w,?w") 1149 (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r,0w, w") 1150 (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, i, r, i"))) 1151 (clobber (match_scratch:SI 3 "=2r, X, &r, X, X,2r, X")) 1152 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X, X")) 1153 (clobber (match_scratch:DI 5 "=&w, X, X, X, X,&w, X")) 1154 (clobber (reg:CC CC_REGNUM))] 1155 "TARGET_NEON" 1156 "#" 1157 "TARGET_NEON && reload_completed" 1158 [(const_int 0)] 1159 " 1160 { 1161 if (IS_VFP_REGNUM (REGNO (operands[0]))) 1162 { 1163 if (CONST_INT_P (operands[2])) 1164 { 1165 if (INTVAL (operands[2]) < 1) 1166 { 1167 emit_insn (gen_movdi (operands[0], operands[1])); 1168 DONE; 1169 } 1170 else if (INTVAL (operands[2]) > 64) 1171 operands[2] = gen_rtx_CONST_INT (VOIDmode, 64); 1172 1173 /* Ditch the unnecessary clobbers. */ 1174 emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0], 1175 operands[1], 1176 operands[2])); 1177 } 1178 else 1179 { 1180 /* We must use a negative left-shift. */ 1181 emit_insn (gen_negsi2 (operands[3], operands[2])); 1182 emit_insn (gen_neon_load_count (operands[5], operands[3])); 1183 emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1], 1184 operands[5])); 1185 } 1186 } 1187 else 1188 { 1189 /* The shift expanders support either full overlap or no overlap. */ 1190 gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1]) 1191 || REGNO (operands[0]) == REGNO (operands[1])); 1192 1193 if (operands[2] == CONST1_RTX (SImode)) 1194 /* This clobbers CC. */ 1195 emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1])); 1196 else 1197 /* This clobbers CC (ASHIFTRT by register only). */ 1198 arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1], 1199 operands[2], operands[3], operands[4]); 1200 } 1201 1202 DONE; 1203 }" 1204 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits") 1205 (set_attr "opt" "*,*,speed,speed,speed,*,*") 1206 (set_attr "type" "multiple")] 1207) 1208 1209;; Widening operations 1210 1211(define_insn "widen_ssum<mode>3" 1212 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 1213 (plus:<V_widen> (sign_extend:<V_widen> 1214 (match_operand:VW 1 "s_register_operand" "%w")) 1215 (match_operand:<V_widen> 2 "s_register_operand" "w")))] 1216 "TARGET_NEON" 1217 "vaddw.<V_s_elem>\t%q0, %q2, %P1" 1218 [(set_attr "type" "neon_add_widen")] 1219) 1220 1221(define_insn "widen_usum<mode>3" 1222 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 1223 (plus:<V_widen> (zero_extend:<V_widen> 1224 (match_operand:VW 1 "s_register_operand" "%w")) 1225 (match_operand:<V_widen> 2 "s_register_operand" "w")))] 1226 "TARGET_NEON" 1227 "vaddw.<V_u_elem>\t%q0, %q2, %P1" 1228 [(set_attr "type" "neon_add_widen")] 1229) 1230 1231;; Helpers for quad-word reduction operations 1232 1233; Add (or smin, smax...) the low N/2 elements of the N-element vector 1234; operand[1] to the high N/2 elements of same. Put the result in operand[0], an 1235; N/2-element vector. 1236 1237(define_insn "quad_halves_<code>v4si" 1238 [(set (match_operand:V2SI 0 "s_register_operand" "=w") 1239 (VQH_OPS:V2SI 1240 (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w") 1241 (parallel [(const_int 0) (const_int 1)])) 1242 (vec_select:V2SI (match_dup 1) 1243 (parallel [(const_int 2) (const_int 3)]))))] 1244 "TARGET_NEON" 1245 "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1" 1246 [(set_attr "vqh_mnem" "<VQH_mnem>") 1247 (set_attr "type" "neon_reduc_<VQH_type>_q")] 1248) 1249 1250(define_insn "quad_halves_<code>v4sf" 1251 [(set (match_operand:V2SF 0 "s_register_operand" "=w") 1252 (VQHS_OPS:V2SF 1253 (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w") 1254 (parallel [(const_int 0) (const_int 1)])) 1255 (vec_select:V2SF (match_dup 1) 1256 (parallel [(const_int 2) (const_int 3)]))))] 1257 "TARGET_NEON && flag_unsafe_math_optimizations" 1258 "<VQH_mnem>.f32\t%P0, %e1, %f1" 1259 [(set_attr "vqh_mnem" "<VQH_mnem>") 1260 (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")] 1261) 1262 1263(define_insn "quad_halves_<code>v8hi" 1264 [(set (match_operand:V4HI 0 "s_register_operand" "+w") 1265 (VQH_OPS:V4HI 1266 (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w") 1267 (parallel [(const_int 0) (const_int 1) 1268 (const_int 2) (const_int 3)])) 1269 (vec_select:V4HI (match_dup 1) 1270 (parallel [(const_int 4) (const_int 5) 1271 (const_int 6) (const_int 7)]))))] 1272 "TARGET_NEON" 1273 "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1" 1274 [(set_attr "vqh_mnem" "<VQH_mnem>") 1275 (set_attr "type" "neon_reduc_<VQH_type>_q")] 1276) 1277 1278(define_insn "quad_halves_<code>v16qi" 1279 [(set (match_operand:V8QI 0 "s_register_operand" "+w") 1280 (VQH_OPS:V8QI 1281 (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w") 1282 (parallel [(const_int 0) (const_int 1) 1283 (const_int 2) (const_int 3) 1284 (const_int 4) (const_int 5) 1285 (const_int 6) (const_int 7)])) 1286 (vec_select:V8QI (match_dup 1) 1287 (parallel [(const_int 8) (const_int 9) 1288 (const_int 10) (const_int 11) 1289 (const_int 12) (const_int 13) 1290 (const_int 14) (const_int 15)]))))] 1291 "TARGET_NEON" 1292 "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1" 1293 [(set_attr "vqh_mnem" "<VQH_mnem>") 1294 (set_attr "type" "neon_reduc_<VQH_type>_q")] 1295) 1296 1297(define_expand "move_hi_quad_<mode>" 1298 [(match_operand:ANY128 0 "s_register_operand" "") 1299 (match_operand:<V_HALF> 1 "s_register_operand" "")] 1300 "TARGET_NEON" 1301{ 1302 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode, 1303 GET_MODE_SIZE (<V_HALF>mode)), 1304 operands[1]); 1305 DONE; 1306}) 1307 1308(define_expand "move_lo_quad_<mode>" 1309 [(match_operand:ANY128 0 "s_register_operand" "") 1310 (match_operand:<V_HALF> 1 "s_register_operand" "")] 1311 "TARGET_NEON" 1312{ 1313 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], 1314 <MODE>mode, 0), 1315 operands[1]); 1316 DONE; 1317}) 1318 1319;; Reduction operations 1320 1321(define_expand "reduc_plus_scal_<mode>" 1322 [(match_operand:<V_elem> 0 "nonimmediate_operand" "") 1323 (match_operand:VD 1 "s_register_operand" "")] 1324 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" 1325{ 1326 rtx vec = gen_reg_rtx (<MODE>mode); 1327 neon_pairwise_reduce (vec, operands[1], <MODE>mode, 1328 &gen_neon_vpadd_internal<mode>); 1329 /* The same result is actually computed into every element. */ 1330 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx)); 1331 DONE; 1332}) 1333 1334(define_expand "reduc_plus_scal_<mode>" 1335 [(match_operand:<V_elem> 0 "nonimmediate_operand" "") 1336 (match_operand:VQ 1 "s_register_operand" "")] 1337 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations) 1338 && !BYTES_BIG_ENDIAN" 1339{ 1340 rtx step1 = gen_reg_rtx (<V_HALF>mode); 1341 1342 emit_insn (gen_quad_halves_plus<mode> (step1, operands[1])); 1343 emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1)); 1344 1345 DONE; 1346}) 1347 1348(define_expand "reduc_plus_scal_v2di" 1349 [(match_operand:DI 0 "nonimmediate_operand" "=w") 1350 (match_operand:V2DI 1 "s_register_operand" "")] 1351 "TARGET_NEON && !BYTES_BIG_ENDIAN" 1352{ 1353 rtx vec = gen_reg_rtx (V2DImode); 1354 1355 emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1])); 1356 emit_insn (gen_vec_extractv2di (operands[0], vec, const0_rtx)); 1357 1358 DONE; 1359}) 1360 1361(define_insn "arm_reduc_plus_internal_v2di" 1362 [(set (match_operand:V2DI 0 "s_register_operand" "=w") 1363 (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")] 1364 UNSPEC_VPADD))] 1365 "TARGET_NEON && !BYTES_BIG_ENDIAN" 1366 "vadd.i64\t%e0, %e1, %f1" 1367 [(set_attr "type" "neon_add_q")] 1368) 1369 1370(define_expand "reduc_smin_scal_<mode>" 1371 [(match_operand:<V_elem> 0 "nonimmediate_operand" "") 1372 (match_operand:VD 1 "s_register_operand" "")] 1373 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" 1374{ 1375 rtx vec = gen_reg_rtx (<MODE>mode); 1376 1377 neon_pairwise_reduce (vec, operands[1], <MODE>mode, 1378 &gen_neon_vpsmin<mode>); 1379 /* The result is computed into every element of the vector. */ 1380 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx)); 1381 DONE; 1382}) 1383 1384(define_expand "reduc_smin_scal_<mode>" 1385 [(match_operand:<V_elem> 0 "nonimmediate_operand" "") 1386 (match_operand:VQ 1 "s_register_operand" "")] 1387 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations) 1388 && !BYTES_BIG_ENDIAN" 1389{ 1390 rtx step1 = gen_reg_rtx (<V_HALF>mode); 1391 1392 emit_insn (gen_quad_halves_smin<mode> (step1, operands[1])); 1393 emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1)); 1394 1395 DONE; 1396}) 1397 1398(define_expand "reduc_smax_scal_<mode>" 1399 [(match_operand:<V_elem> 0 "nonimmediate_operand" "") 1400 (match_operand:VD 1 "s_register_operand" "")] 1401 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" 1402{ 1403 rtx vec = gen_reg_rtx (<MODE>mode); 1404 neon_pairwise_reduce (vec, operands[1], <MODE>mode, 1405 &gen_neon_vpsmax<mode>); 1406 /* The result is computed into every element of the vector. */ 1407 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx)); 1408 DONE; 1409}) 1410 1411(define_expand "reduc_smax_scal_<mode>" 1412 [(match_operand:<V_elem> 0 "nonimmediate_operand" "") 1413 (match_operand:VQ 1 "s_register_operand" "")] 1414 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations) 1415 && !BYTES_BIG_ENDIAN" 1416{ 1417 rtx step1 = gen_reg_rtx (<V_HALF>mode); 1418 1419 emit_insn (gen_quad_halves_smax<mode> (step1, operands[1])); 1420 emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1)); 1421 1422 DONE; 1423}) 1424 1425(define_expand "reduc_umin_scal_<mode>" 1426 [(match_operand:<V_elem> 0 "nonimmediate_operand" "") 1427 (match_operand:VDI 1 "s_register_operand" "")] 1428 "TARGET_NEON" 1429{ 1430 rtx vec = gen_reg_rtx (<MODE>mode); 1431 neon_pairwise_reduce (vec, operands[1], <MODE>mode, 1432 &gen_neon_vpumin<mode>); 1433 /* The result is computed into every element of the vector. */ 1434 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx)); 1435 DONE; 1436}) 1437 1438(define_expand "reduc_umin_scal_<mode>" 1439 [(match_operand:<V_elem> 0 "nonimmediate_operand" "") 1440 (match_operand:VQI 1 "s_register_operand" "")] 1441 "TARGET_NEON && !BYTES_BIG_ENDIAN" 1442{ 1443 rtx step1 = gen_reg_rtx (<V_HALF>mode); 1444 1445 emit_insn (gen_quad_halves_umin<mode> (step1, operands[1])); 1446 emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1)); 1447 1448 DONE; 1449}) 1450 1451(define_expand "reduc_umax_scal_<mode>" 1452 [(match_operand:<V_elem> 0 "nonimmediate_operand" "") 1453 (match_operand:VDI 1 "s_register_operand" "")] 1454 "TARGET_NEON" 1455{ 1456 rtx vec = gen_reg_rtx (<MODE>mode); 1457 neon_pairwise_reduce (vec, operands[1], <MODE>mode, 1458 &gen_neon_vpumax<mode>); 1459 /* The result is computed into every element of the vector. */ 1460 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx)); 1461 DONE; 1462}) 1463 1464(define_expand "reduc_umax_scal_<mode>" 1465 [(match_operand:<V_elem> 0 "nonimmediate_operand" "") 1466 (match_operand:VQI 1 "s_register_operand" "")] 1467 "TARGET_NEON && !BYTES_BIG_ENDIAN" 1468{ 1469 rtx step1 = gen_reg_rtx (<V_HALF>mode); 1470 1471 emit_insn (gen_quad_halves_umax<mode> (step1, operands[1])); 1472 emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1)); 1473 1474 DONE; 1475}) 1476 1477(define_insn "neon_vpadd_internal<mode>" 1478 [(set (match_operand:VD 0 "s_register_operand" "=w") 1479 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w") 1480 (match_operand:VD 2 "s_register_operand" "w")] 1481 UNSPEC_VPADD))] 1482 "TARGET_NEON" 1483 "vpadd.<V_if_elem>\t%P0, %P1, %P2" 1484 ;; Assume this schedules like vadd. 1485 [(set (attr "type") 1486 (if_then_else (match_test "<Is_float_mode>") 1487 (const_string "neon_fp_reduc_add_s<q>") 1488 (const_string "neon_reduc_add<q>")))] 1489) 1490 1491(define_insn "neon_vpsmin<mode>" 1492 [(set (match_operand:VD 0 "s_register_operand" "=w") 1493 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w") 1494 (match_operand:VD 2 "s_register_operand" "w")] 1495 UNSPEC_VPSMIN))] 1496 "TARGET_NEON" 1497 "vpmin.<V_s_elem>\t%P0, %P1, %P2" 1498 [(set (attr "type") 1499 (if_then_else (match_test "<Is_float_mode>") 1500 (const_string "neon_fp_reduc_minmax_s<q>") 1501 (const_string "neon_reduc_minmax<q>")))] 1502) 1503 1504(define_insn "neon_vpsmax<mode>" 1505 [(set (match_operand:VD 0 "s_register_operand" "=w") 1506 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w") 1507 (match_operand:VD 2 "s_register_operand" "w")] 1508 UNSPEC_VPSMAX))] 1509 "TARGET_NEON" 1510 "vpmax.<V_s_elem>\t%P0, %P1, %P2" 1511 [(set (attr "type") 1512 (if_then_else (match_test "<Is_float_mode>") 1513 (const_string "neon_fp_reduc_minmax_s<q>") 1514 (const_string "neon_reduc_minmax<q>")))] 1515) 1516 1517(define_insn "neon_vpumin<mode>" 1518 [(set (match_operand:VDI 0 "s_register_operand" "=w") 1519 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w") 1520 (match_operand:VDI 2 "s_register_operand" "w")] 1521 UNSPEC_VPUMIN))] 1522 "TARGET_NEON" 1523 "vpmin.<V_u_elem>\t%P0, %P1, %P2" 1524 [(set_attr "type" "neon_reduc_minmax<q>")] 1525) 1526 1527(define_insn "neon_vpumax<mode>" 1528 [(set (match_operand:VDI 0 "s_register_operand" "=w") 1529 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w") 1530 (match_operand:VDI 2 "s_register_operand" "w")] 1531 UNSPEC_VPUMAX))] 1532 "TARGET_NEON" 1533 "vpmax.<V_u_elem>\t%P0, %P1, %P2" 1534 [(set_attr "type" "neon_reduc_minmax<q>")] 1535) 1536 1537;; Saturating arithmetic 1538 1539; NOTE: Neon supports many more saturating variants of instructions than the 1540; following, but these are all GCC currently understands. 1541; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself 1542; yet either, although these patterns may be used by intrinsics when they're 1543; added. 1544 1545(define_insn "*ss_add<mode>_neon" 1546 [(set (match_operand:VD 0 "s_register_operand" "=w") 1547 (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w") 1548 (match_operand:VD 2 "s_register_operand" "w")))] 1549 "TARGET_NEON" 1550 "vqadd.<V_s_elem>\t%P0, %P1, %P2" 1551 [(set_attr "type" "neon_qadd<q>")] 1552) 1553 1554(define_insn "*us_add<mode>_neon" 1555 [(set (match_operand:VD 0 "s_register_operand" "=w") 1556 (us_plus:VD (match_operand:VD 1 "s_register_operand" "w") 1557 (match_operand:VD 2 "s_register_operand" "w")))] 1558 "TARGET_NEON" 1559 "vqadd.<V_u_elem>\t%P0, %P1, %P2" 1560 [(set_attr "type" "neon_qadd<q>")] 1561) 1562 1563(define_insn "*ss_sub<mode>_neon" 1564 [(set (match_operand:VD 0 "s_register_operand" "=w") 1565 (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w") 1566 (match_operand:VD 2 "s_register_operand" "w")))] 1567 "TARGET_NEON" 1568 "vqsub.<V_s_elem>\t%P0, %P1, %P2" 1569 [(set_attr "type" "neon_qsub<q>")] 1570) 1571 1572(define_insn "*us_sub<mode>_neon" 1573 [(set (match_operand:VD 0 "s_register_operand" "=w") 1574 (us_minus:VD (match_operand:VD 1 "s_register_operand" "w") 1575 (match_operand:VD 2 "s_register_operand" "w")))] 1576 "TARGET_NEON" 1577 "vqsub.<V_u_elem>\t%P0, %P1, %P2" 1578 [(set_attr "type" "neon_qsub<q>")] 1579) 1580 1581;; Conditional instructions. These are comparisons with conditional moves for 1582;; vectors. They perform the assignment: 1583;; 1584;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2; 1585;; 1586;; where op3 is <, <=, ==, !=, >= or >. Operations are performed 1587;; element-wise. 1588 1589(define_expand "vcond<mode><mode>" 1590 [(set (match_operand:VDQW 0 "s_register_operand" "") 1591 (if_then_else:VDQW 1592 (match_operator 3 "comparison_operator" 1593 [(match_operand:VDQW 4 "s_register_operand" "") 1594 (match_operand:VDQW 5 "nonmemory_operand" "")]) 1595 (match_operand:VDQW 1 "s_register_operand" "") 1596 (match_operand:VDQW 2 "s_register_operand" "")))] 1597 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" 1598{ 1599 int inverse = 0; 1600 int use_zero_form = 0; 1601 int swap_bsl_operands = 0; 1602 rtx mask = gen_reg_rtx (<V_cmp_result>mode); 1603 rtx tmp = gen_reg_rtx (<V_cmp_result>mode); 1604 1605 rtx (*base_comparison) (rtx, rtx, rtx); 1606 rtx (*complimentary_comparison) (rtx, rtx, rtx); 1607 1608 switch (GET_CODE (operands[3])) 1609 { 1610 case GE: 1611 case GT: 1612 case LE: 1613 case LT: 1614 case EQ: 1615 if (operands[5] == CONST0_RTX (<MODE>mode)) 1616 { 1617 use_zero_form = 1; 1618 break; 1619 } 1620 /* Fall through. */ 1621 default: 1622 if (!REG_P (operands[5])) 1623 operands[5] = force_reg (<MODE>mode, operands[5]); 1624 } 1625 1626 switch (GET_CODE (operands[3])) 1627 { 1628 case LT: 1629 case UNLT: 1630 inverse = 1; 1631 /* Fall through. */ 1632 case GE: 1633 case UNGE: 1634 case ORDERED: 1635 case UNORDERED: 1636 base_comparison = gen_neon_vcge<mode>; 1637 complimentary_comparison = gen_neon_vcgt<mode>; 1638 break; 1639 case LE: 1640 case UNLE: 1641 inverse = 1; 1642 /* Fall through. */ 1643 case GT: 1644 case UNGT: 1645 base_comparison = gen_neon_vcgt<mode>; 1646 complimentary_comparison = gen_neon_vcge<mode>; 1647 break; 1648 case EQ: 1649 case NE: 1650 case UNEQ: 1651 base_comparison = gen_neon_vceq<mode>; 1652 complimentary_comparison = gen_neon_vceq<mode>; 1653 break; 1654 default: 1655 gcc_unreachable (); 1656 } 1657 1658 switch (GET_CODE (operands[3])) 1659 { 1660 case LT: 1661 case LE: 1662 case GT: 1663 case GE: 1664 case EQ: 1665 /* The easy case. Here we emit one of vcge, vcgt or vceq. 1666 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are: 1667 a GE b -> a GE b 1668 a GT b -> a GT b 1669 a LE b -> b GE a 1670 a LT b -> b GT a 1671 a EQ b -> a EQ b 1672 Note that there also exist direct comparison against 0 forms, 1673 so catch those as a special case. */ 1674 if (use_zero_form) 1675 { 1676 inverse = 0; 1677 switch (GET_CODE (operands[3])) 1678 { 1679 case LT: 1680 base_comparison = gen_neon_vclt<mode>; 1681 break; 1682 case LE: 1683 base_comparison = gen_neon_vcle<mode>; 1684 break; 1685 default: 1686 /* Do nothing, other zero form cases already have the correct 1687 base_comparison. */ 1688 break; 1689 } 1690 } 1691 1692 if (!inverse) 1693 emit_insn (base_comparison (mask, operands[4], operands[5])); 1694 else 1695 emit_insn (complimentary_comparison (mask, operands[5], operands[4])); 1696 break; 1697 case UNLT: 1698 case UNLE: 1699 case UNGT: 1700 case UNGE: 1701 case NE: 1702 /* Vector compare returns false for lanes which are unordered, so if we use 1703 the inverse of the comparison we actually want to emit, then 1704 swap the operands to BSL, we will end up with the correct result. 1705 Note that a NE NaN and NaN NE b are true for all a, b. 1706 1707 Our transformations are: 1708 a GE b -> !(b GT a) 1709 a GT b -> !(b GE a) 1710 a LE b -> !(a GT b) 1711 a LT b -> !(a GE b) 1712 a NE b -> !(a EQ b) */ 1713 1714 if (inverse) 1715 emit_insn (base_comparison (mask, operands[4], operands[5])); 1716 else 1717 emit_insn (complimentary_comparison (mask, operands[5], operands[4])); 1718 1719 swap_bsl_operands = 1; 1720 break; 1721 case UNEQ: 1722 /* We check (a > b || b > a). combining these comparisons give us 1723 true iff !(a != b && a ORDERED b), swapping the operands to BSL 1724 will then give us (a == b || a UNORDERED b) as intended. */ 1725 1726 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5])); 1727 emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4])); 1728 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp)); 1729 swap_bsl_operands = 1; 1730 break; 1731 case UNORDERED: 1732 /* Operands are ORDERED iff (a > b || b >= a). 1733 Swapping the operands to BSL will give the UNORDERED case. */ 1734 swap_bsl_operands = 1; 1735 /* Fall through. */ 1736 case ORDERED: 1737 emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5])); 1738 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4])); 1739 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp)); 1740 break; 1741 default: 1742 gcc_unreachable (); 1743 } 1744 1745 if (swap_bsl_operands) 1746 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2], 1747 operands[1])); 1748 else 1749 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1], 1750 operands[2])); 1751 DONE; 1752}) 1753 1754(define_expand "vcondu<mode><mode>" 1755 [(set (match_operand:VDQIW 0 "s_register_operand" "") 1756 (if_then_else:VDQIW 1757 (match_operator 3 "arm_comparison_operator" 1758 [(match_operand:VDQIW 4 "s_register_operand" "") 1759 (match_operand:VDQIW 5 "s_register_operand" "")]) 1760 (match_operand:VDQIW 1 "s_register_operand" "") 1761 (match_operand:VDQIW 2 "s_register_operand" "")))] 1762 "TARGET_NEON" 1763{ 1764 rtx mask; 1765 int inverse = 0, immediate_zero = 0; 1766 1767 mask = gen_reg_rtx (<V_cmp_result>mode); 1768 1769 if (operands[5] == CONST0_RTX (<MODE>mode)) 1770 immediate_zero = 1; 1771 else if (!REG_P (operands[5])) 1772 operands[5] = force_reg (<MODE>mode, operands[5]); 1773 1774 switch (GET_CODE (operands[3])) 1775 { 1776 case GEU: 1777 emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5])); 1778 break; 1779 1780 case GTU: 1781 emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5])); 1782 break; 1783 1784 case EQ: 1785 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5])); 1786 break; 1787 1788 case LEU: 1789 if (immediate_zero) 1790 emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5])); 1791 else 1792 emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4])); 1793 break; 1794 1795 case LTU: 1796 if (immediate_zero) 1797 emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5])); 1798 else 1799 emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4])); 1800 break; 1801 1802 case NE: 1803 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5])); 1804 inverse = 1; 1805 break; 1806 1807 default: 1808 gcc_unreachable (); 1809 } 1810 1811 if (inverse) 1812 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2], 1813 operands[1])); 1814 else 1815 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1], 1816 operands[2])); 1817 1818 DONE; 1819}) 1820 1821;; Patterns for builtins. 1822 1823; good for plain vadd, vaddq. 1824 1825(define_expand "neon_vadd<mode>" 1826 [(match_operand:VCVTF 0 "s_register_operand" "=w") 1827 (match_operand:VCVTF 1 "s_register_operand" "w") 1828 (match_operand:VCVTF 2 "s_register_operand" "w")] 1829 "TARGET_NEON" 1830{ 1831 if (!<Is_float_mode> || flag_unsafe_math_optimizations) 1832 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2])); 1833 else 1834 emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1], 1835 operands[2])); 1836 DONE; 1837}) 1838 1839; Note that NEON operations don't support the full IEEE 754 standard: in 1840; particular, denormal values are flushed to zero. This means that GCC cannot 1841; use those instructions for autovectorization, etc. unless 1842; -funsafe-math-optimizations is in effect (in which case flush-to-zero 1843; behavior is permissible). Intrinsic operations (provided by the arm_neon.h 1844; header) must work in either case: if -funsafe-math-optimizations is given, 1845; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics 1846; expand to unspecs (which may potentially limit the extent to which they might 1847; be optimized by generic code). 1848 1849; Used for intrinsics when flag_unsafe_math_optimizations is false. 1850 1851(define_insn "neon_vadd<mode>_unspec" 1852 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 1853 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") 1854 (match_operand:VCVTF 2 "s_register_operand" "w")] 1855 UNSPEC_VADD))] 1856 "TARGET_NEON" 1857 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 1858 [(set (attr "type") 1859 (if_then_else (match_test "<Is_float_mode>") 1860 (const_string "neon_fp_addsub_s<q>") 1861 (const_string "neon_add<q>")))] 1862) 1863 1864(define_insn "neon_vaddl<sup><mode>" 1865 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 1866 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w") 1867 (match_operand:VDI 2 "s_register_operand" "w")] 1868 VADDL))] 1869 "TARGET_NEON" 1870 "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2" 1871 [(set_attr "type" "neon_add_long")] 1872) 1873 1874(define_insn "neon_vaddw<sup><mode>" 1875 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 1876 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w") 1877 (match_operand:VDI 2 "s_register_operand" "w")] 1878 VADDW))] 1879 "TARGET_NEON" 1880 "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2" 1881 [(set_attr "type" "neon_add_widen")] 1882) 1883 1884; vhadd and vrhadd. 1885 1886(define_insn "neon_v<r>hadd<sup><mode>" 1887 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 1888 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") 1889 (match_operand:VDQIW 2 "s_register_operand" "w")] 1890 VHADD))] 1891 "TARGET_NEON" 1892 "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 1893 [(set_attr "type" "neon_add_halve_q")] 1894) 1895 1896(define_insn "neon_vqadd<sup><mode>" 1897 [(set (match_operand:VDQIX 0 "s_register_operand" "=w") 1898 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") 1899 (match_operand:VDQIX 2 "s_register_operand" "w")] 1900 VQADD))] 1901 "TARGET_NEON" 1902 "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 1903 [(set_attr "type" "neon_qadd<q>")] 1904) 1905 1906(define_insn "neon_v<r>addhn<mode>" 1907 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") 1908 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w") 1909 (match_operand:VN 2 "s_register_operand" "w")] 1910 VADDHN))] 1911 "TARGET_NEON" 1912 "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2" 1913 [(set_attr "type" "neon_add_halve_narrow_q")] 1914) 1915 1916;; Polynomial and Float multiplication. 1917(define_insn "neon_vmul<pf><mode>" 1918 [(set (match_operand:VPF 0 "s_register_operand" "=w") 1919 (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w") 1920 (match_operand:VPF 2 "s_register_operand" "w")] 1921 UNSPEC_VMUL))] 1922 "TARGET_NEON" 1923 "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 1924 [(set (attr "type") 1925 (if_then_else (match_test "<Is_float_mode>") 1926 (const_string "neon_fp_mul_s<q>") 1927 (const_string "neon_mul_<V_elem_ch><q>")))] 1928) 1929 1930(define_expand "neon_vmla<mode>" 1931 [(match_operand:VDQW 0 "s_register_operand" "=w") 1932 (match_operand:VDQW 1 "s_register_operand" "0") 1933 (match_operand:VDQW 2 "s_register_operand" "w") 1934 (match_operand:VDQW 3 "s_register_operand" "w")] 1935 "TARGET_NEON" 1936{ 1937 if (!<Is_float_mode> || flag_unsafe_math_optimizations) 1938 emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1], 1939 operands[2], operands[3])); 1940 else 1941 emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1], 1942 operands[2], operands[3])); 1943 DONE; 1944}) 1945 1946(define_expand "neon_vfma<VCVTF:mode>" 1947 [(match_operand:VCVTF 0 "s_register_operand") 1948 (match_operand:VCVTF 1 "s_register_operand") 1949 (match_operand:VCVTF 2 "s_register_operand") 1950 (match_operand:VCVTF 3 "s_register_operand")] 1951 "TARGET_NEON && TARGET_FMA" 1952{ 1953 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3], 1954 operands[1])); 1955 DONE; 1956}) 1957 1958(define_expand "neon_vfms<VCVTF:mode>" 1959 [(match_operand:VCVTF 0 "s_register_operand") 1960 (match_operand:VCVTF 1 "s_register_operand") 1961 (match_operand:VCVTF 2 "s_register_operand") 1962 (match_operand:VCVTF 3 "s_register_operand")] 1963 "TARGET_NEON && TARGET_FMA" 1964{ 1965 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3], 1966 operands[1])); 1967 DONE; 1968}) 1969 1970; Used for intrinsics when flag_unsafe_math_optimizations is false. 1971 1972(define_insn "neon_vmla<mode>_unspec" 1973 [(set (match_operand:VDQW 0 "s_register_operand" "=w") 1974 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") 1975 (match_operand:VDQW 2 "s_register_operand" "w") 1976 (match_operand:VDQW 3 "s_register_operand" "w")] 1977 UNSPEC_VMLA))] 1978 "TARGET_NEON" 1979 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" 1980 [(set (attr "type") 1981 (if_then_else (match_test "<Is_float_mode>") 1982 (const_string "neon_fp_mla_s<q>") 1983 (const_string "neon_mla_<V_elem_ch><q>")))] 1984) 1985 1986(define_insn "neon_vmlal<sup><mode>" 1987 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 1988 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") 1989 (match_operand:VW 2 "s_register_operand" "w") 1990 (match_operand:VW 3 "s_register_operand" "w")] 1991 VMLAL))] 1992 "TARGET_NEON" 1993 "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3" 1994 [(set_attr "type" "neon_mla_<V_elem_ch>_long")] 1995) 1996 1997(define_expand "neon_vmls<mode>" 1998 [(match_operand:VDQW 0 "s_register_operand" "=w") 1999 (match_operand:VDQW 1 "s_register_operand" "0") 2000 (match_operand:VDQW 2 "s_register_operand" "w") 2001 (match_operand:VDQW 3 "s_register_operand" "w")] 2002 "TARGET_NEON" 2003{ 2004 if (!<Is_float_mode> || flag_unsafe_math_optimizations) 2005 emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0], 2006 operands[1], operands[2], operands[3])); 2007 else 2008 emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1], 2009 operands[2], operands[3])); 2010 DONE; 2011}) 2012 2013; Used for intrinsics when flag_unsafe_math_optimizations is false. 2014 2015(define_insn "neon_vmls<mode>_unspec" 2016 [(set (match_operand:VDQW 0 "s_register_operand" "=w") 2017 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") 2018 (match_operand:VDQW 2 "s_register_operand" "w") 2019 (match_operand:VDQW 3 "s_register_operand" "w")] 2020 UNSPEC_VMLS))] 2021 "TARGET_NEON" 2022 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" 2023 [(set (attr "type") 2024 (if_then_else (match_test "<Is_float_mode>") 2025 (const_string "neon_fp_mla_s<q>") 2026 (const_string "neon_mla_<V_elem_ch><q>")))] 2027) 2028 2029(define_insn "neon_vmlsl<sup><mode>" 2030 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 2031 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") 2032 (match_operand:VW 2 "s_register_operand" "w") 2033 (match_operand:VW 3 "s_register_operand" "w")] 2034 VMLSL))] 2035 "TARGET_NEON" 2036 "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3" 2037 [(set_attr "type" "neon_mla_<V_elem_ch>_long")] 2038) 2039 2040;; vqdmulh, vqrdmulh 2041(define_insn "neon_vq<r>dmulh<mode>" 2042 [(set (match_operand:VMDQI 0 "s_register_operand" "=w") 2043 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w") 2044 (match_operand:VMDQI 2 "s_register_operand" "w")] 2045 VQDMULH))] 2046 "TARGET_NEON" 2047 "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2048 [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")] 2049) 2050 2051;; vqrdmlah, vqrdmlsh 2052(define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>" 2053 [(set (match_operand:VMDQI 0 "s_register_operand" "=w") 2054 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0") 2055 (match_operand:VMDQI 2 "s_register_operand" "w") 2056 (match_operand:VMDQI 3 "s_register_operand" "w")] 2057 VQRDMLH_AS))] 2058 "TARGET_NEON_RDMA" 2059 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" 2060 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")] 2061) 2062 2063(define_insn "neon_vqdmlal<mode>" 2064 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 2065 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") 2066 (match_operand:VMDI 2 "s_register_operand" "w") 2067 (match_operand:VMDI 3 "s_register_operand" "w")] 2068 UNSPEC_VQDMLAL))] 2069 "TARGET_NEON" 2070 "vqdmlal.<V_s_elem>\t%q0, %P2, %P3" 2071 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")] 2072) 2073 2074(define_insn "neon_vqdmlsl<mode>" 2075 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 2076 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") 2077 (match_operand:VMDI 2 "s_register_operand" "w") 2078 (match_operand:VMDI 3 "s_register_operand" "w")] 2079 UNSPEC_VQDMLSL))] 2080 "TARGET_NEON" 2081 "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3" 2082 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")] 2083) 2084 2085(define_insn "neon_vmull<sup><mode>" 2086 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 2087 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w") 2088 (match_operand:VW 2 "s_register_operand" "w")] 2089 VMULL))] 2090 "TARGET_NEON" 2091 "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2" 2092 [(set_attr "type" "neon_mul_<V_elem_ch>_long")] 2093) 2094 2095(define_insn "neon_vqdmull<mode>" 2096 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 2097 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w") 2098 (match_operand:VMDI 2 "s_register_operand" "w")] 2099 UNSPEC_VQDMULL))] 2100 "TARGET_NEON" 2101 "vqdmull.<V_s_elem>\t%q0, %P1, %P2" 2102 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")] 2103) 2104 2105(define_expand "neon_vsub<mode>" 2106 [(match_operand:VCVTF 0 "s_register_operand" "=w") 2107 (match_operand:VCVTF 1 "s_register_operand" "w") 2108 (match_operand:VCVTF 2 "s_register_operand" "w")] 2109 "TARGET_NEON" 2110{ 2111 if (!<Is_float_mode> || flag_unsafe_math_optimizations) 2112 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2])); 2113 else 2114 emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1], 2115 operands[2])); 2116 DONE; 2117}) 2118 2119; Used for intrinsics when flag_unsafe_math_optimizations is false. 2120 2121(define_insn "neon_vsub<mode>_unspec" 2122 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 2123 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") 2124 (match_operand:VCVTF 2 "s_register_operand" "w")] 2125 UNSPEC_VSUB))] 2126 "TARGET_NEON" 2127 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2128 [(set (attr "type") 2129 (if_then_else (match_test "<Is_float_mode>") 2130 (const_string "neon_fp_addsub_s<q>") 2131 (const_string "neon_sub<q>")))] 2132) 2133 2134(define_insn "neon_vsubl<sup><mode>" 2135 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 2136 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w") 2137 (match_operand:VDI 2 "s_register_operand" "w")] 2138 VSUBL))] 2139 "TARGET_NEON" 2140 "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2" 2141 [(set_attr "type" "neon_sub_long")] 2142) 2143 2144(define_insn "neon_vsubw<sup><mode>" 2145 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 2146 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w") 2147 (match_operand:VDI 2 "s_register_operand" "w")] 2148 VSUBW))] 2149 "TARGET_NEON" 2150 "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2" 2151 [(set_attr "type" "neon_sub_widen")] 2152) 2153 2154(define_insn "neon_vqsub<sup><mode>" 2155 [(set (match_operand:VDQIX 0 "s_register_operand" "=w") 2156 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") 2157 (match_operand:VDQIX 2 "s_register_operand" "w")] 2158 VQSUB))] 2159 "TARGET_NEON" 2160 "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2161 [(set_attr "type" "neon_qsub<q>")] 2162) 2163 2164(define_insn "neon_vhsub<sup><mode>" 2165 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 2166 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") 2167 (match_operand:VDQIW 2 "s_register_operand" "w")] 2168 VHSUB))] 2169 "TARGET_NEON" 2170 "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2171 [(set_attr "type" "neon_sub_halve<q>")] 2172) 2173 2174(define_insn "neon_v<r>subhn<mode>" 2175 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") 2176 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w") 2177 (match_operand:VN 2 "s_register_operand" "w")] 2178 VSUBHN))] 2179 "TARGET_NEON" 2180 "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2" 2181 [(set_attr "type" "neon_sub_halve_narrow_q")] 2182) 2183 2184;; These may expand to an UNSPEC pattern when a floating point mode is used 2185;; without unsafe math optimizations. 2186(define_expand "neon_vc<cmp_op><mode>" 2187 [(match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w") 2188 (neg:<V_cmp_result> 2189 (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand" "w,w") 2190 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")))] 2191 "TARGET_NEON" 2192 { 2193 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations 2194 are enabled. */ 2195 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT 2196 && !flag_unsafe_math_optimizations) 2197 { 2198 /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because 2199 we define gen_neon_vceq<mode>_insn_unspec only for float modes 2200 whereas this expander iterates over the integer modes as well, 2201 but we will never expand to UNSPECs for the integer comparisons. */ 2202 switch (<MODE>mode) 2203 { 2204 case V2SFmode: 2205 emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0], 2206 operands[1], 2207 operands[2])); 2208 break; 2209 case V4SFmode: 2210 emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0], 2211 operands[1], 2212 operands[2])); 2213 break; 2214 default: 2215 gcc_unreachable (); 2216 } 2217 } 2218 else 2219 emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0], 2220 operands[1], 2221 operands[2])); 2222 DONE; 2223 } 2224) 2225 2226(define_insn "neon_vc<cmp_op><mode>_insn" 2227 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w") 2228 (neg:<V_cmp_result> 2229 (COMPARISONS:<V_cmp_result> 2230 (match_operand:VDQW 1 "s_register_operand" "w,w") 2231 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))] 2232 "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT 2233 && !flag_unsafe_math_optimizations)" 2234 { 2235 char pattern[100]; 2236 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0," 2237 " %%<V_reg>1, %s", 2238 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT 2239 ? "f" : "<cmp_type>", 2240 which_alternative == 0 2241 ? "%<V_reg>2" : "#0"); 2242 output_asm_insn (pattern, operands); 2243 return ""; 2244 } 2245 [(set (attr "type") 2246 (if_then_else (match_operand 2 "zero_operand") 2247 (const_string "neon_compare_zero<q>") 2248 (const_string "neon_compare<q>")))] 2249) 2250 2251(define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec" 2252 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w") 2253 (unspec:<V_cmp_result> 2254 [(match_operand:VCVTF 1 "s_register_operand" "w,w") 2255 (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")] 2256 NEON_VCMP))] 2257 "TARGET_NEON" 2258 { 2259 char pattern[100]; 2260 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0," 2261 " %%<V_reg>1, %s", 2262 which_alternative == 0 2263 ? "%<V_reg>2" : "#0"); 2264 output_asm_insn (pattern, operands); 2265 return ""; 2266} 2267 [(set_attr "type" "neon_fp_compare_s<q>")] 2268) 2269 2270(define_insn "neon_vc<cmp_op>u<mode>" 2271 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w") 2272 (neg:<V_cmp_result> 2273 (GTUGEU:<V_cmp_result> 2274 (match_operand:VDQIW 1 "s_register_operand" "w") 2275 (match_operand:VDQIW 2 "s_register_operand" "w"))))] 2276 "TARGET_NEON" 2277 "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2278 [(set_attr "type" "neon_compare<q>")] 2279) 2280 2281(define_expand "neon_vca<cmp_op><mode>" 2282 [(set (match_operand:<V_cmp_result> 0 "s_register_operand") 2283 (neg:<V_cmp_result> 2284 (GTGE:<V_cmp_result> 2285 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand")) 2286 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))] 2287 "TARGET_NEON" 2288 { 2289 if (flag_unsafe_math_optimizations) 2290 emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1], 2291 operands[2])); 2292 else 2293 emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0], 2294 operands[1], 2295 operands[2])); 2296 DONE; 2297 } 2298) 2299 2300(define_insn "neon_vca<cmp_op><mode>_insn" 2301 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w") 2302 (neg:<V_cmp_result> 2303 (GTGE:<V_cmp_result> 2304 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w")) 2305 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))] 2306 "TARGET_NEON && flag_unsafe_math_optimizations" 2307 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2308 [(set_attr "type" "neon_fp_compare_s<q>")] 2309) 2310 2311(define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec" 2312 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w") 2313 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w") 2314 (match_operand:VCVTF 2 "s_register_operand" "w")] 2315 NEON_VACMP))] 2316 "TARGET_NEON" 2317 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2318 [(set_attr "type" "neon_fp_compare_s<q>")] 2319) 2320 2321(define_insn "neon_vtst<mode>" 2322 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 2323 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") 2324 (match_operand:VDQIW 2 "s_register_operand" "w")] 2325 UNSPEC_VTST))] 2326 "TARGET_NEON" 2327 "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2328 [(set_attr "type" "neon_tst<q>")] 2329) 2330 2331(define_insn "neon_vabd<sup><mode>" 2332 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 2333 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") 2334 (match_operand:VDQIW 2 "s_register_operand" "w")] 2335 VABD))] 2336 "TARGET_NEON" 2337 "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2338 [(set_attr "type" "neon_abd<q>")] 2339) 2340 2341(define_insn "neon_vabdf<mode>" 2342 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 2343 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") 2344 (match_operand:VCVTF 2 "s_register_operand" "w")] 2345 UNSPEC_VABD_F))] 2346 "TARGET_NEON" 2347 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2348 [(set_attr "type" "neon_fp_abd_s<q>")] 2349) 2350 2351(define_insn "neon_vabdl<sup><mode>" 2352 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 2353 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w") 2354 (match_operand:VW 2 "s_register_operand" "w")] 2355 VABDL))] 2356 "TARGET_NEON" 2357 "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2" 2358 [(set_attr "type" "neon_abd_long")] 2359) 2360 2361(define_insn "neon_vaba<sup><mode>" 2362 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 2363 (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w") 2364 (match_operand:VDQIW 3 "s_register_operand" "w")] 2365 VABD) 2366 (match_operand:VDQIW 1 "s_register_operand" "0")))] 2367 "TARGET_NEON" 2368 "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" 2369 [(set_attr "type" "neon_arith_acc<q>")] 2370) 2371 2372(define_insn "neon_vabal<sup><mode>" 2373 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 2374 (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w") 2375 (match_operand:VW 3 "s_register_operand" "w")] 2376 VABDL) 2377 (match_operand:<V_widen> 1 "s_register_operand" "0")))] 2378 "TARGET_NEON" 2379 "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3" 2380 [(set_attr "type" "neon_arith_acc<q>")] 2381) 2382 2383(define_insn "neon_v<maxmin><sup><mode>" 2384 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 2385 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") 2386 (match_operand:VDQIW 2 "s_register_operand" "w")] 2387 VMAXMIN))] 2388 "TARGET_NEON" 2389 "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2390 [(set_attr "type" "neon_minmax<q>")] 2391) 2392 2393(define_insn "neon_v<maxmin>f<mode>" 2394 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 2395 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") 2396 (match_operand:VCVTF 2 "s_register_operand" "w")] 2397 VMAXMINF))] 2398 "TARGET_NEON" 2399 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2400 [(set_attr "type" "neon_fp_minmax_s<q>")] 2401) 2402 2403;; Vector forms for the IEEE-754 fmax()/fmin() functions 2404(define_insn "<fmaxmin><mode>3" 2405 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 2406 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") 2407 (match_operand:VCVTF 2 "s_register_operand" "w")] 2408 VMAXMINFNM))] 2409 "TARGET_NEON && TARGET_FPU_ARMV8" 2410 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2411 [(set_attr "type" "neon_fp_minmax_s<q>")] 2412) 2413 2414(define_expand "neon_vpadd<mode>" 2415 [(match_operand:VD 0 "s_register_operand" "=w") 2416 (match_operand:VD 1 "s_register_operand" "w") 2417 (match_operand:VD 2 "s_register_operand" "w")] 2418 "TARGET_NEON" 2419{ 2420 emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1], 2421 operands[2])); 2422 DONE; 2423}) 2424 2425(define_insn "neon_vpaddl<sup><mode>" 2426 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w") 2427 (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")] 2428 VPADDL))] 2429 "TARGET_NEON" 2430 "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1" 2431 [(set_attr "type" "neon_reduc_add_long")] 2432) 2433 2434(define_insn "neon_vpadal<sup><mode>" 2435 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w") 2436 (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0") 2437 (match_operand:VDQIW 2 "s_register_operand" "w")] 2438 VPADAL))] 2439 "TARGET_NEON" 2440 "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2" 2441 [(set_attr "type" "neon_reduc_add_acc")] 2442) 2443 2444(define_insn "neon_vp<maxmin><sup><mode>" 2445 [(set (match_operand:VDI 0 "s_register_operand" "=w") 2446 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w") 2447 (match_operand:VDI 2 "s_register_operand" "w")] 2448 VPMAXMIN))] 2449 "TARGET_NEON" 2450 "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2451 [(set_attr "type" "neon_reduc_minmax<q>")] 2452) 2453 2454(define_insn "neon_vp<maxmin>f<mode>" 2455 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 2456 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") 2457 (match_operand:VCVTF 2 "s_register_operand" "w")] 2458 VPMAXMINF))] 2459 "TARGET_NEON" 2460 "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2461 [(set_attr "type" "neon_fp_reduc_minmax_s<q>")] 2462) 2463 2464(define_insn "neon_vrecps<mode>" 2465 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 2466 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") 2467 (match_operand:VCVTF 2 "s_register_operand" "w")] 2468 UNSPEC_VRECPS))] 2469 "TARGET_NEON" 2470 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2471 [(set_attr "type" "neon_fp_recps_s<q>")] 2472) 2473 2474(define_insn "neon_vrsqrts<mode>" 2475 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 2476 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") 2477 (match_operand:VCVTF 2 "s_register_operand" "w")] 2478 UNSPEC_VRSQRTS))] 2479 "TARGET_NEON" 2480 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2481 [(set_attr "type" "neon_fp_rsqrts_s<q>")] 2482) 2483 2484(define_expand "neon_vabs<mode>" 2485 [(match_operand:VDQW 0 "s_register_operand" "") 2486 (match_operand:VDQW 1 "s_register_operand" "")] 2487 "TARGET_NEON" 2488{ 2489 emit_insn (gen_abs<mode>2 (operands[0], operands[1])); 2490 DONE; 2491}) 2492 2493(define_insn "neon_vqabs<mode>" 2494 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 2495 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")] 2496 UNSPEC_VQABS))] 2497 "TARGET_NEON" 2498 "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1" 2499 [(set_attr "type" "neon_qabs<q>")] 2500) 2501 2502(define_insn "neon_bswap<mode>" 2503 [(set (match_operand:VDQHSD 0 "register_operand" "=w") 2504 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))] 2505 "TARGET_NEON" 2506 "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1" 2507 [(set_attr "type" "neon_rev<q>")] 2508) 2509 2510(define_expand "neon_vneg<mode>" 2511 [(match_operand:VDQW 0 "s_register_operand" "") 2512 (match_operand:VDQW 1 "s_register_operand" "")] 2513 "TARGET_NEON" 2514{ 2515 emit_insn (gen_neg<mode>2 (operands[0], operands[1])); 2516 DONE; 2517}) 2518 2519(define_expand "neon_copysignf<mode>" 2520 [(match_operand:VCVTF 0 "register_operand") 2521 (match_operand:VCVTF 1 "register_operand") 2522 (match_operand:VCVTF 2 "register_operand")] 2523 "TARGET_NEON" 2524 "{ 2525 rtx v_bitmask_cast; 2526 rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode); 2527 int i, n_elt = GET_MODE_NUNITS (<MODE>mode); 2528 rtvec v = rtvec_alloc (n_elt); 2529 2530 /* Create bitmask for vector select. */ 2531 for (i = 0; i < n_elt; ++i) 2532 RTVEC_ELT (v, i) = GEN_INT (0x80000000); 2533 2534 emit_move_insn (v_bitmask, 2535 gen_rtx_CONST_VECTOR (<VCVTF:V_cmp_result>mode, v)); 2536 emit_move_insn (operands[0], operands[2]); 2537 v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask, 2538 <VCVTF:V_cmp_result>mode, 0); 2539 emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0], 2540 operands[1])); 2541 2542 DONE; 2543 }" 2544) 2545 2546(define_insn "neon_vqneg<mode>" 2547 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 2548 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")] 2549 UNSPEC_VQNEG))] 2550 "TARGET_NEON" 2551 "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1" 2552 [(set_attr "type" "neon_qneg<q>")] 2553) 2554 2555(define_insn "neon_vcls<mode>" 2556 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 2557 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")] 2558 UNSPEC_VCLS))] 2559 "TARGET_NEON" 2560 "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1" 2561 [(set_attr "type" "neon_cls<q>")] 2562) 2563 2564(define_insn "clz<mode>2" 2565 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 2566 (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))] 2567 "TARGET_NEON" 2568 "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1" 2569 [(set_attr "type" "neon_cnt<q>")] 2570) 2571 2572(define_expand "neon_vclz<mode>" 2573 [(match_operand:VDQIW 0 "s_register_operand" "") 2574 (match_operand:VDQIW 1 "s_register_operand" "")] 2575 "TARGET_NEON" 2576{ 2577 emit_insn (gen_clz<mode>2 (operands[0], operands[1])); 2578 DONE; 2579}) 2580 2581(define_insn "popcount<mode>2" 2582 [(set (match_operand:VE 0 "s_register_operand" "=w") 2583 (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))] 2584 "TARGET_NEON" 2585 "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1" 2586 [(set_attr "type" "neon_cnt<q>")] 2587) 2588 2589(define_expand "neon_vcnt<mode>" 2590 [(match_operand:VE 0 "s_register_operand" "=w") 2591 (match_operand:VE 1 "s_register_operand" "w")] 2592 "TARGET_NEON" 2593{ 2594 emit_insn (gen_popcount<mode>2 (operands[0], operands[1])); 2595 DONE; 2596}) 2597 2598(define_insn "neon_vrecpe<mode>" 2599 [(set (match_operand:V32 0 "s_register_operand" "=w") 2600 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")] 2601 UNSPEC_VRECPE))] 2602 "TARGET_NEON" 2603 "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1" 2604 [(set_attr "type" "neon_fp_recpe_s<q>")] 2605) 2606 2607(define_insn "neon_vrsqrte<mode>" 2608 [(set (match_operand:V32 0 "s_register_operand" "=w") 2609 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")] 2610 UNSPEC_VRSQRTE))] 2611 "TARGET_NEON" 2612 "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1" 2613 [(set_attr "type" "neon_fp_rsqrte_s<q>")] 2614) 2615 2616(define_expand "neon_vmvn<mode>" 2617 [(match_operand:VDQIW 0 "s_register_operand" "") 2618 (match_operand:VDQIW 1 "s_register_operand" "")] 2619 "TARGET_NEON" 2620{ 2621 emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1])); 2622 DONE; 2623}) 2624 2625(define_insn "neon_vget_lane<mode>_sext_internal" 2626 [(set (match_operand:SI 0 "s_register_operand" "=r") 2627 (sign_extend:SI 2628 (vec_select:<V_elem> 2629 (match_operand:VD 1 "s_register_operand" "w") 2630 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] 2631 "TARGET_NEON" 2632{ 2633 if (BYTES_BIG_ENDIAN) 2634 { 2635 int elt = INTVAL (operands[2]); 2636 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt; 2637 operands[2] = GEN_INT (elt); 2638 } 2639 return "vmov.s<V_sz_elem>\t%0, %P1[%c2]"; 2640} 2641 [(set_attr "type" "neon_to_gp")] 2642) 2643 2644(define_insn "neon_vget_lane<mode>_zext_internal" 2645 [(set (match_operand:SI 0 "s_register_operand" "=r") 2646 (zero_extend:SI 2647 (vec_select:<V_elem> 2648 (match_operand:VD 1 "s_register_operand" "w") 2649 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] 2650 "TARGET_NEON" 2651{ 2652 if (BYTES_BIG_ENDIAN) 2653 { 2654 int elt = INTVAL (operands[2]); 2655 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt; 2656 operands[2] = GEN_INT (elt); 2657 } 2658 return "vmov.u<V_sz_elem>\t%0, %P1[%c2]"; 2659} 2660 [(set_attr "type" "neon_to_gp")] 2661) 2662 2663(define_insn "neon_vget_lane<mode>_sext_internal" 2664 [(set (match_operand:SI 0 "s_register_operand" "=r") 2665 (sign_extend:SI 2666 (vec_select:<V_elem> 2667 (match_operand:VQ2 1 "s_register_operand" "w") 2668 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] 2669 "TARGET_NEON" 2670{ 2671 rtx ops[3]; 2672 int regno = REGNO (operands[1]); 2673 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2; 2674 unsigned int elt = INTVAL (operands[2]); 2675 unsigned int elt_adj = elt % halfelts; 2676 2677 if (BYTES_BIG_ENDIAN) 2678 elt_adj = halfelts - 1 - elt_adj; 2679 2680 ops[0] = operands[0]; 2681 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts)); 2682 ops[2] = GEN_INT (elt_adj); 2683 output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops); 2684 2685 return ""; 2686} 2687 [(set_attr "type" "neon_to_gp_q")] 2688) 2689 2690(define_insn "neon_vget_lane<mode>_zext_internal" 2691 [(set (match_operand:SI 0 "s_register_operand" "=r") 2692 (zero_extend:SI 2693 (vec_select:<V_elem> 2694 (match_operand:VQ2 1 "s_register_operand" "w") 2695 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] 2696 "TARGET_NEON" 2697{ 2698 rtx ops[3]; 2699 int regno = REGNO (operands[1]); 2700 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2; 2701 unsigned int elt = INTVAL (operands[2]); 2702 unsigned int elt_adj = elt % halfelts; 2703 2704 if (BYTES_BIG_ENDIAN) 2705 elt_adj = halfelts - 1 - elt_adj; 2706 2707 ops[0] = operands[0]; 2708 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts)); 2709 ops[2] = GEN_INT (elt_adj); 2710 output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops); 2711 2712 return ""; 2713} 2714 [(set_attr "type" "neon_to_gp_q")] 2715) 2716 2717(define_expand "neon_vget_lane<mode>" 2718 [(match_operand:<V_ext> 0 "s_register_operand" "") 2719 (match_operand:VDQW 1 "s_register_operand" "") 2720 (match_operand:SI 2 "immediate_operand" "")] 2721 "TARGET_NEON" 2722{ 2723 if (BYTES_BIG_ENDIAN) 2724 { 2725 /* The intrinsics are defined in terms of a model where the 2726 element ordering in memory is vldm order, whereas the generic 2727 RTL is defined in terms of a model where the element ordering 2728 in memory is array order. Convert the lane number to conform 2729 to this model. */ 2730 unsigned int elt = INTVAL (operands[2]); 2731 unsigned int reg_nelts 2732 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode); 2733 elt ^= reg_nelts - 1; 2734 operands[2] = GEN_INT (elt); 2735 } 2736 2737 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32) 2738 emit_insn (gen_vec_extract<mode> (operands[0], operands[1], operands[2])); 2739 else 2740 emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0], 2741 operands[1], 2742 operands[2])); 2743 DONE; 2744}) 2745 2746(define_expand "neon_vget_laneu<mode>" 2747 [(match_operand:<V_ext> 0 "s_register_operand" "") 2748 (match_operand:VDQIW 1 "s_register_operand" "") 2749 (match_operand:SI 2 "immediate_operand" "")] 2750 "TARGET_NEON" 2751{ 2752 if (BYTES_BIG_ENDIAN) 2753 { 2754 /* The intrinsics are defined in terms of a model where the 2755 element ordering in memory is vldm order, whereas the generic 2756 RTL is defined in terms of a model where the element ordering 2757 in memory is array order. Convert the lane number to conform 2758 to this model. */ 2759 unsigned int elt = INTVAL (operands[2]); 2760 unsigned int reg_nelts 2761 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode); 2762 elt ^= reg_nelts - 1; 2763 operands[2] = GEN_INT (elt); 2764 } 2765 2766 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32) 2767 emit_insn (gen_vec_extract<mode> (operands[0], operands[1], operands[2])); 2768 else 2769 emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0], 2770 operands[1], 2771 operands[2])); 2772 DONE; 2773}) 2774 2775(define_expand "neon_vget_lanedi" 2776 [(match_operand:DI 0 "s_register_operand" "=r") 2777 (match_operand:DI 1 "s_register_operand" "w") 2778 (match_operand:SI 2 "immediate_operand" "")] 2779 "TARGET_NEON" 2780{ 2781 emit_move_insn (operands[0], operands[1]); 2782 DONE; 2783}) 2784 2785(define_expand "neon_vget_lanev2di" 2786 [(match_operand:DI 0 "s_register_operand" "") 2787 (match_operand:V2DI 1 "s_register_operand" "") 2788 (match_operand:SI 2 "immediate_operand" "")] 2789 "TARGET_NEON" 2790{ 2791 int lane; 2792 2793if (BYTES_BIG_ENDIAN) 2794 { 2795 /* The intrinsics are defined in terms of a model where the 2796 element ordering in memory is vldm order, whereas the generic 2797 RTL is defined in terms of a model where the element ordering 2798 in memory is array order. Convert the lane number to conform 2799 to this model. */ 2800 unsigned int elt = INTVAL (operands[2]); 2801 unsigned int reg_nelts = 2; 2802 elt ^= reg_nelts - 1; 2803 operands[2] = GEN_INT (elt); 2804 } 2805 2806 lane = INTVAL (operands[2]); 2807 gcc_assert ((lane ==0) || (lane == 1)); 2808 emit_move_insn (operands[0], lane == 0 2809 ? gen_lowpart (DImode, operands[1]) 2810 : gen_highpart (DImode, operands[1])); 2811 DONE; 2812}) 2813 2814(define_expand "neon_vset_lane<mode>" 2815 [(match_operand:VDQ 0 "s_register_operand" "=w") 2816 (match_operand:<V_elem> 1 "s_register_operand" "r") 2817 (match_operand:VDQ 2 "s_register_operand" "0") 2818 (match_operand:SI 3 "immediate_operand" "i")] 2819 "TARGET_NEON" 2820{ 2821 unsigned int elt = INTVAL (operands[3]); 2822 2823 if (BYTES_BIG_ENDIAN) 2824 { 2825 unsigned int reg_nelts 2826 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode); 2827 elt ^= reg_nelts - 1; 2828 } 2829 2830 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1], 2831 GEN_INT (1 << elt), operands[2])); 2832 DONE; 2833}) 2834 2835; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored. 2836 2837(define_expand "neon_vset_lanedi" 2838 [(match_operand:DI 0 "s_register_operand" "=w") 2839 (match_operand:DI 1 "s_register_operand" "r") 2840 (match_operand:DI 2 "s_register_operand" "0") 2841 (match_operand:SI 3 "immediate_operand" "i")] 2842 "TARGET_NEON" 2843{ 2844 emit_move_insn (operands[0], operands[1]); 2845 DONE; 2846}) 2847 2848(define_expand "neon_vcreate<mode>" 2849 [(match_operand:VD_RE 0 "s_register_operand" "") 2850 (match_operand:DI 1 "general_operand" "")] 2851 "TARGET_NEON" 2852{ 2853 rtx src = gen_lowpart (<MODE>mode, operands[1]); 2854 emit_move_insn (operands[0], src); 2855 DONE; 2856}) 2857 2858(define_insn "neon_vdup_n<mode>" 2859 [(set (match_operand:VX 0 "s_register_operand" "=w") 2860 (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))] 2861 "TARGET_NEON" 2862 "vdup.<V_sz_elem>\t%<V_reg>0, %1" 2863 [(set_attr "type" "neon_from_gp<q>")] 2864) 2865 2866(define_insn "neon_vdup_nv4hf" 2867 [(set (match_operand:V4HF 0 "s_register_operand" "=w") 2868 (vec_duplicate:V4HF (match_operand:HF 1 "s_register_operand" "r")))] 2869 "TARGET_NEON" 2870 "vdup.16\t%P0, %1" 2871 [(set_attr "type" "neon_from_gp")] 2872) 2873 2874(define_insn "neon_vdup_nv8hf" 2875 [(set (match_operand:V8HF 0 "s_register_operand" "=w") 2876 (vec_duplicate:V8HF (match_operand:HF 1 "s_register_operand" "r")))] 2877 "TARGET_NEON" 2878 "vdup.16\t%q0, %1" 2879 [(set_attr "type" "neon_from_gp_q")] 2880) 2881 2882(define_insn "neon_vdup_n<mode>" 2883 [(set (match_operand:V32 0 "s_register_operand" "=w,w") 2884 (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))] 2885 "TARGET_NEON" 2886 "@ 2887 vdup.<V_sz_elem>\t%<V_reg>0, %1 2888 vdup.<V_sz_elem>\t%<V_reg>0, %y1" 2889 [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")] 2890) 2891 2892(define_expand "neon_vdup_ndi" 2893 [(match_operand:DI 0 "s_register_operand" "=w") 2894 (match_operand:DI 1 "s_register_operand" "r")] 2895 "TARGET_NEON" 2896{ 2897 emit_move_insn (operands[0], operands[1]); 2898 DONE; 2899} 2900) 2901 2902(define_insn "neon_vdup_nv2di" 2903 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w") 2904 (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))] 2905 "TARGET_NEON" 2906 "@ 2907 vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1 2908 vmov\t%e0, %P1\;vmov\t%f0, %P1" 2909 [(set_attr "length" "8") 2910 (set_attr "type" "multiple")] 2911) 2912 2913(define_insn "neon_vdup_lane<mode>_internal" 2914 [(set (match_operand:VDQW 0 "s_register_operand" "=w") 2915 (vec_duplicate:VDQW 2916 (vec_select:<V_elem> 2917 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w") 2918 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] 2919 "TARGET_NEON" 2920{ 2921 if (BYTES_BIG_ENDIAN) 2922 { 2923 int elt = INTVAL (operands[2]); 2924 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt; 2925 operands[2] = GEN_INT (elt); 2926 } 2927 if (<Is_d_reg>) 2928 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]"; 2929 else 2930 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]"; 2931} 2932 [(set_attr "type" "neon_dup<q>")] 2933) 2934 2935(define_expand "neon_vdup_lane<mode>" 2936 [(match_operand:VDQW 0 "s_register_operand" "=w") 2937 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w") 2938 (match_operand:SI 2 "immediate_operand" "i")] 2939 "TARGET_NEON" 2940{ 2941 if (BYTES_BIG_ENDIAN) 2942 { 2943 unsigned int elt = INTVAL (operands[2]); 2944 unsigned int reg_nelts 2945 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode); 2946 elt ^= reg_nelts - 1; 2947 operands[2] = GEN_INT (elt); 2948 } 2949 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1], 2950 operands[2])); 2951 DONE; 2952}) 2953 2954; Scalar index is ignored, since only zero is valid here. 2955(define_expand "neon_vdup_lanedi" 2956 [(match_operand:DI 0 "s_register_operand" "=w") 2957 (match_operand:DI 1 "s_register_operand" "w") 2958 (match_operand:SI 2 "immediate_operand" "i")] 2959 "TARGET_NEON" 2960{ 2961 emit_move_insn (operands[0], operands[1]); 2962 DONE; 2963}) 2964 2965; Likewise for v2di, as the DImode second operand has only a single element. 2966(define_expand "neon_vdup_lanev2di" 2967 [(match_operand:V2DI 0 "s_register_operand" "=w") 2968 (match_operand:DI 1 "s_register_operand" "w") 2969 (match_operand:SI 2 "immediate_operand" "i")] 2970 "TARGET_NEON" 2971{ 2972 emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1])); 2973 DONE; 2974}) 2975 2976; Disabled before reload because we don't want combine doing something silly, 2977; but used by the post-reload expansion of neon_vcombine. 2978(define_insn "*neon_vswp<mode>" 2979 [(set (match_operand:VDQX 0 "s_register_operand" "+w") 2980 (match_operand:VDQX 1 "s_register_operand" "+w")) 2981 (set (match_dup 1) (match_dup 0))] 2982 "TARGET_NEON && reload_completed" 2983 "vswp\t%<V_reg>0, %<V_reg>1" 2984 [(set_attr "type" "neon_permute<q>")] 2985) 2986 2987;; In this insn, operand 1 should be low, and operand 2 the high part of the 2988;; dest vector. 2989;; FIXME: A different implementation of this builtin could make it much 2990;; more likely that we wouldn't actually need to output anything (we could make 2991;; it so that the reg allocator puts things in the right places magically 2992;; instead). Lack of subregs for vectors makes that tricky though, I think. 2993 2994(define_insn_and_split "neon_vcombine<mode>" 2995 [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w") 2996 (vec_concat:<V_DOUBLE> 2997 (match_operand:VDX 1 "s_register_operand" "w") 2998 (match_operand:VDX 2 "s_register_operand" "w")))] 2999 "TARGET_NEON" 3000 "#" 3001 "&& reload_completed" 3002 [(const_int 0)] 3003{ 3004 neon_split_vcombine (operands); 3005 DONE; 3006} 3007[(set_attr "type" "multiple")] 3008) 3009 3010(define_expand "neon_vget_high<mode>" 3011 [(match_operand:<V_HALF> 0 "s_register_operand") 3012 (match_operand:VQX 1 "s_register_operand")] 3013 "TARGET_NEON" 3014{ 3015 emit_move_insn (operands[0], 3016 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 3017 GET_MODE_SIZE (<V_HALF>mode))); 3018 DONE; 3019}) 3020 3021(define_expand "neon_vget_low<mode>" 3022 [(match_operand:<V_HALF> 0 "s_register_operand") 3023 (match_operand:VQX 1 "s_register_operand")] 3024 "TARGET_NEON" 3025{ 3026 emit_move_insn (operands[0], 3027 simplify_gen_subreg (<V_HALF>mode, operands[1], 3028 <MODE>mode, 0)); 3029 DONE; 3030}) 3031 3032(define_insn "float<mode><V_cvtto>2" 3033 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") 3034 (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))] 3035 "TARGET_NEON && !flag_rounding_math" 3036 "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1" 3037 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")] 3038) 3039 3040(define_insn "floatuns<mode><V_cvtto>2" 3041 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") 3042 (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))] 3043 "TARGET_NEON && !flag_rounding_math" 3044 "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1" 3045 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")] 3046) 3047 3048(define_insn "fix_trunc<mode><V_cvtto>2" 3049 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") 3050 (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))] 3051 "TARGET_NEON" 3052 "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1" 3053 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")] 3054) 3055 3056(define_insn "fixuns_trunc<mode><V_cvtto>2" 3057 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") 3058 (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))] 3059 "TARGET_NEON" 3060 "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1" 3061 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")] 3062) 3063 3064(define_insn "neon_vcvt<sup><mode>" 3065 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") 3066 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")] 3067 VCVT_US))] 3068 "TARGET_NEON" 3069 "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1" 3070 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")] 3071) 3072 3073(define_insn "neon_vcvt<sup><mode>" 3074 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") 3075 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")] 3076 VCVT_US))] 3077 "TARGET_NEON" 3078 "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1" 3079 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")] 3080) 3081 3082(define_insn "neon_vcvtv4sfv4hf" 3083 [(set (match_operand:V4SF 0 "s_register_operand" "=w") 3084 (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")] 3085 UNSPEC_VCVT))] 3086 "TARGET_NEON && TARGET_FP16" 3087 "vcvt.f32.f16\t%q0, %P1" 3088 [(set_attr "type" "neon_fp_cvt_widen_h")] 3089) 3090 3091(define_insn "neon_vcvtv4hfv4sf" 3092 [(set (match_operand:V4HF 0 "s_register_operand" "=w") 3093 (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")] 3094 UNSPEC_VCVT))] 3095 "TARGET_NEON && TARGET_FP16" 3096 "vcvt.f16.f32\t%P0, %q1" 3097 [(set_attr "type" "neon_fp_cvt_narrow_s_q")] 3098) 3099 3100(define_insn "neon_vcvt<sup>_n<mode>" 3101 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") 3102 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w") 3103 (match_operand:SI 2 "immediate_operand" "i")] 3104 VCVT_US_N))] 3105 "TARGET_NEON" 3106{ 3107 neon_const_bounds (operands[2], 1, 33); 3108 return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2"; 3109} 3110 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")] 3111) 3112 3113(define_insn "neon_vcvt<sup>_n<mode>" 3114 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") 3115 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w") 3116 (match_operand:SI 2 "immediate_operand" "i")] 3117 VCVT_US_N))] 3118 "TARGET_NEON" 3119{ 3120 neon_const_bounds (operands[2], 1, 33); 3121 return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2"; 3122} 3123 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")] 3124) 3125 3126(define_insn "neon_vmovn<mode>" 3127 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") 3128 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")] 3129 UNSPEC_VMOVN))] 3130 "TARGET_NEON" 3131 "vmovn.<V_if_elem>\t%P0, %q1" 3132 [(set_attr "type" "neon_shift_imm_narrow_q")] 3133) 3134 3135(define_insn "neon_vqmovn<sup><mode>" 3136 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") 3137 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")] 3138 VQMOVN))] 3139 "TARGET_NEON" 3140 "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1" 3141 [(set_attr "type" "neon_sat_shift_imm_narrow_q")] 3142) 3143 3144(define_insn "neon_vqmovun<mode>" 3145 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") 3146 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")] 3147 UNSPEC_VQMOVUN))] 3148 "TARGET_NEON" 3149 "vqmovun.<V_s_elem>\t%P0, %q1" 3150 [(set_attr "type" "neon_sat_shift_imm_narrow_q")] 3151) 3152 3153(define_insn "neon_vmovl<sup><mode>" 3154 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 3155 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")] 3156 VMOVL))] 3157 "TARGET_NEON" 3158 "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1" 3159 [(set_attr "type" "neon_shift_imm_long")] 3160) 3161 3162(define_insn "neon_vmul_lane<mode>" 3163 [(set (match_operand:VMD 0 "s_register_operand" "=w") 3164 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w") 3165 (match_operand:VMD 2 "s_register_operand" 3166 "<scalar_mul_constraint>") 3167 (match_operand:SI 3 "immediate_operand" "i")] 3168 UNSPEC_VMUL_LANE))] 3169 "TARGET_NEON" 3170{ 3171 return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]"; 3172} 3173 [(set (attr "type") 3174 (if_then_else (match_test "<Is_float_mode>") 3175 (const_string "neon_fp_mul_s_scalar<q>") 3176 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))] 3177) 3178 3179(define_insn "neon_vmul_lane<mode>" 3180 [(set (match_operand:VMQ 0 "s_register_operand" "=w") 3181 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w") 3182 (match_operand:<V_HALF> 2 "s_register_operand" 3183 "<scalar_mul_constraint>") 3184 (match_operand:SI 3 "immediate_operand" "i")] 3185 UNSPEC_VMUL_LANE))] 3186 "TARGET_NEON" 3187{ 3188 return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]"; 3189} 3190 [(set (attr "type") 3191 (if_then_else (match_test "<Is_float_mode>") 3192 (const_string "neon_fp_mul_s_scalar<q>") 3193 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))] 3194) 3195 3196(define_insn "neon_vmull<sup>_lane<mode>" 3197 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 3198 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w") 3199 (match_operand:VMDI 2 "s_register_operand" 3200 "<scalar_mul_constraint>") 3201 (match_operand:SI 3 "immediate_operand" "i")] 3202 VMULL_LANE))] 3203 "TARGET_NEON" 3204{ 3205 return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]"; 3206} 3207 [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")] 3208) 3209 3210(define_insn "neon_vqdmull_lane<mode>" 3211 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 3212 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w") 3213 (match_operand:VMDI 2 "s_register_operand" 3214 "<scalar_mul_constraint>") 3215 (match_operand:SI 3 "immediate_operand" "i")] 3216 UNSPEC_VQDMULL_LANE))] 3217 "TARGET_NEON" 3218{ 3219 return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]"; 3220} 3221 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")] 3222) 3223 3224(define_insn "neon_vq<r>dmulh_lane<mode>" 3225 [(set (match_operand:VMQI 0 "s_register_operand" "=w") 3226 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w") 3227 (match_operand:<V_HALF> 2 "s_register_operand" 3228 "<scalar_mul_constraint>") 3229 (match_operand:SI 3 "immediate_operand" "i")] 3230 VQDMULH_LANE))] 3231 "TARGET_NEON" 3232{ 3233 return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]"; 3234} 3235 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")] 3236) 3237 3238(define_insn "neon_vq<r>dmulh_lane<mode>" 3239 [(set (match_operand:VMDI 0 "s_register_operand" "=w") 3240 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w") 3241 (match_operand:VMDI 2 "s_register_operand" 3242 "<scalar_mul_constraint>") 3243 (match_operand:SI 3 "immediate_operand" "i")] 3244 VQDMULH_LANE))] 3245 "TARGET_NEON" 3246{ 3247 return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]"; 3248} 3249 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")] 3250) 3251 3252;; vqrdmlah_lane, vqrdmlsh_lane 3253(define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>" 3254 [(set (match_operand:VMQI 0 "s_register_operand" "=w") 3255 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0") 3256 (match_operand:VMQI 2 "s_register_operand" "w") 3257 (match_operand:<V_HALF> 3 "s_register_operand" 3258 "<scalar_mul_constraint>") 3259 (match_operand:SI 4 "immediate_operand" "i")] 3260 VQRDMLH_AS))] 3261 "TARGET_NEON_RDMA" 3262{ 3263 return 3264 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]"; 3265} 3266 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")] 3267) 3268 3269(define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>" 3270 [(set (match_operand:VMDI 0 "s_register_operand" "=w") 3271 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0") 3272 (match_operand:VMDI 2 "s_register_operand" "w") 3273 (match_operand:VMDI 3 "s_register_operand" 3274 "<scalar_mul_constraint>") 3275 (match_operand:SI 4 "immediate_operand" "i")] 3276 VQRDMLH_AS))] 3277 "TARGET_NEON_RDMA" 3278{ 3279 return 3280 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]"; 3281} 3282 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")] 3283) 3284 3285(define_insn "neon_vmla_lane<mode>" 3286 [(set (match_operand:VMD 0 "s_register_operand" "=w") 3287 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0") 3288 (match_operand:VMD 2 "s_register_operand" "w") 3289 (match_operand:VMD 3 "s_register_operand" 3290 "<scalar_mul_constraint>") 3291 (match_operand:SI 4 "immediate_operand" "i")] 3292 UNSPEC_VMLA_LANE))] 3293 "TARGET_NEON" 3294{ 3295 return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]"; 3296} 3297 [(set (attr "type") 3298 (if_then_else (match_test "<Is_float_mode>") 3299 (const_string "neon_fp_mla_s_scalar<q>") 3300 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))] 3301) 3302 3303(define_insn "neon_vmla_lane<mode>" 3304 [(set (match_operand:VMQ 0 "s_register_operand" "=w") 3305 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0") 3306 (match_operand:VMQ 2 "s_register_operand" "w") 3307 (match_operand:<V_HALF> 3 "s_register_operand" 3308 "<scalar_mul_constraint>") 3309 (match_operand:SI 4 "immediate_operand" "i")] 3310 UNSPEC_VMLA_LANE))] 3311 "TARGET_NEON" 3312{ 3313 return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]"; 3314} 3315 [(set (attr "type") 3316 (if_then_else (match_test "<Is_float_mode>") 3317 (const_string "neon_fp_mla_s_scalar<q>") 3318 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))] 3319) 3320 3321(define_insn "neon_vmlal<sup>_lane<mode>" 3322 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 3323 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") 3324 (match_operand:VMDI 2 "s_register_operand" "w") 3325 (match_operand:VMDI 3 "s_register_operand" 3326 "<scalar_mul_constraint>") 3327 (match_operand:SI 4 "immediate_operand" "i")] 3328 VMLAL_LANE))] 3329 "TARGET_NEON" 3330{ 3331 return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]"; 3332} 3333 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")] 3334) 3335 3336(define_insn "neon_vqdmlal_lane<mode>" 3337 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 3338 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") 3339 (match_operand:VMDI 2 "s_register_operand" "w") 3340 (match_operand:VMDI 3 "s_register_operand" 3341 "<scalar_mul_constraint>") 3342 (match_operand:SI 4 "immediate_operand" "i")] 3343 UNSPEC_VQDMLAL_LANE))] 3344 "TARGET_NEON" 3345{ 3346 return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]"; 3347} 3348 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")] 3349) 3350 3351(define_insn "neon_vmls_lane<mode>" 3352 [(set (match_operand:VMD 0 "s_register_operand" "=w") 3353 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0") 3354 (match_operand:VMD 2 "s_register_operand" "w") 3355 (match_operand:VMD 3 "s_register_operand" 3356 "<scalar_mul_constraint>") 3357 (match_operand:SI 4 "immediate_operand" "i")] 3358 UNSPEC_VMLS_LANE))] 3359 "TARGET_NEON" 3360{ 3361 return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]"; 3362} 3363 [(set (attr "type") 3364 (if_then_else (match_test "<Is_float_mode>") 3365 (const_string "neon_fp_mla_s_scalar<q>") 3366 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))] 3367) 3368 3369(define_insn "neon_vmls_lane<mode>" 3370 [(set (match_operand:VMQ 0 "s_register_operand" "=w") 3371 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0") 3372 (match_operand:VMQ 2 "s_register_operand" "w") 3373 (match_operand:<V_HALF> 3 "s_register_operand" 3374 "<scalar_mul_constraint>") 3375 (match_operand:SI 4 "immediate_operand" "i")] 3376 UNSPEC_VMLS_LANE))] 3377 "TARGET_NEON" 3378{ 3379 return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]"; 3380} 3381 [(set (attr "type") 3382 (if_then_else (match_test "<Is_float_mode>") 3383 (const_string "neon_fp_mla_s_scalar<q>") 3384 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))] 3385) 3386 3387(define_insn "neon_vmlsl<sup>_lane<mode>" 3388 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 3389 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") 3390 (match_operand:VMDI 2 "s_register_operand" "w") 3391 (match_operand:VMDI 3 "s_register_operand" 3392 "<scalar_mul_constraint>") 3393 (match_operand:SI 4 "immediate_operand" "i")] 3394 VMLSL_LANE))] 3395 "TARGET_NEON" 3396{ 3397 return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]"; 3398} 3399 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")] 3400) 3401 3402(define_insn "neon_vqdmlsl_lane<mode>" 3403 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 3404 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") 3405 (match_operand:VMDI 2 "s_register_operand" "w") 3406 (match_operand:VMDI 3 "s_register_operand" 3407 "<scalar_mul_constraint>") 3408 (match_operand:SI 4 "immediate_operand" "i")] 3409 UNSPEC_VQDMLSL_LANE))] 3410 "TARGET_NEON" 3411{ 3412 return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]"; 3413} 3414 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")] 3415) 3416 3417; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a 3418; core register into a temp register, then use a scalar taken from that. This 3419; isn't an optimal solution if e.g. the scalar has just been read from memory 3420; or extracted from another vector. The latter case it's currently better to 3421; use the "_lane" variant, and the former case can probably be implemented 3422; using vld1_lane, but that hasn't been done yet. 3423 3424(define_expand "neon_vmul_n<mode>" 3425 [(match_operand:VMD 0 "s_register_operand" "") 3426 (match_operand:VMD 1 "s_register_operand" "") 3427 (match_operand:<V_elem> 2 "s_register_operand" "")] 3428 "TARGET_NEON" 3429{ 3430 rtx tmp = gen_reg_rtx (<MODE>mode); 3431 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx)); 3432 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp, 3433 const0_rtx)); 3434 DONE; 3435}) 3436 3437(define_expand "neon_vmul_n<mode>" 3438 [(match_operand:VMQ 0 "s_register_operand" "") 3439 (match_operand:VMQ 1 "s_register_operand" "") 3440 (match_operand:<V_elem> 2 "s_register_operand" "")] 3441 "TARGET_NEON" 3442{ 3443 rtx tmp = gen_reg_rtx (<V_HALF>mode); 3444 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx)); 3445 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp, 3446 const0_rtx)); 3447 DONE; 3448}) 3449 3450(define_expand "neon_vmulls_n<mode>" 3451 [(match_operand:<V_widen> 0 "s_register_operand" "") 3452 (match_operand:VMDI 1 "s_register_operand" "") 3453 (match_operand:<V_elem> 2 "s_register_operand" "")] 3454 "TARGET_NEON" 3455{ 3456 rtx tmp = gen_reg_rtx (<MODE>mode); 3457 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx)); 3458 emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp, 3459 const0_rtx)); 3460 DONE; 3461}) 3462 3463(define_expand "neon_vmullu_n<mode>" 3464 [(match_operand:<V_widen> 0 "s_register_operand" "") 3465 (match_operand:VMDI 1 "s_register_operand" "") 3466 (match_operand:<V_elem> 2 "s_register_operand" "")] 3467 "TARGET_NEON" 3468{ 3469 rtx tmp = gen_reg_rtx (<MODE>mode); 3470 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx)); 3471 emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp, 3472 const0_rtx)); 3473 DONE; 3474}) 3475 3476(define_expand "neon_vqdmull_n<mode>" 3477 [(match_operand:<V_widen> 0 "s_register_operand" "") 3478 (match_operand:VMDI 1 "s_register_operand" "") 3479 (match_operand:<V_elem> 2 "s_register_operand" "")] 3480 "TARGET_NEON" 3481{ 3482 rtx tmp = gen_reg_rtx (<MODE>mode); 3483 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx)); 3484 emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp, 3485 const0_rtx)); 3486 DONE; 3487}) 3488 3489(define_expand "neon_vqdmulh_n<mode>" 3490 [(match_operand:VMDI 0 "s_register_operand" "") 3491 (match_operand:VMDI 1 "s_register_operand" "") 3492 (match_operand:<V_elem> 2 "s_register_operand" "")] 3493 "TARGET_NEON" 3494{ 3495 rtx tmp = gen_reg_rtx (<MODE>mode); 3496 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx)); 3497 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp, 3498 const0_rtx)); 3499 DONE; 3500}) 3501 3502(define_expand "neon_vqrdmulh_n<mode>" 3503 [(match_operand:VMDI 0 "s_register_operand" "") 3504 (match_operand:VMDI 1 "s_register_operand" "") 3505 (match_operand:<V_elem> 2 "s_register_operand" "")] 3506 "TARGET_NEON" 3507{ 3508 rtx tmp = gen_reg_rtx (<MODE>mode); 3509 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx)); 3510 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp, 3511 const0_rtx)); 3512 DONE; 3513}) 3514 3515(define_expand "neon_vqdmulh_n<mode>" 3516 [(match_operand:VMQI 0 "s_register_operand" "") 3517 (match_operand:VMQI 1 "s_register_operand" "") 3518 (match_operand:<V_elem> 2 "s_register_operand" "")] 3519 "TARGET_NEON" 3520{ 3521 rtx tmp = gen_reg_rtx (<V_HALF>mode); 3522 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx)); 3523 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp, 3524 const0_rtx)); 3525 DONE; 3526}) 3527 3528(define_expand "neon_vqrdmulh_n<mode>" 3529 [(match_operand:VMQI 0 "s_register_operand" "") 3530 (match_operand:VMQI 1 "s_register_operand" "") 3531 (match_operand:<V_elem> 2 "s_register_operand" "")] 3532 "TARGET_NEON" 3533{ 3534 rtx tmp = gen_reg_rtx (<V_HALF>mode); 3535 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx)); 3536 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp, 3537 const0_rtx)); 3538 DONE; 3539}) 3540 3541(define_expand "neon_vmla_n<mode>" 3542 [(match_operand:VMD 0 "s_register_operand" "") 3543 (match_operand:VMD 1 "s_register_operand" "") 3544 (match_operand:VMD 2 "s_register_operand" "") 3545 (match_operand:<V_elem> 3 "s_register_operand" "")] 3546 "TARGET_NEON" 3547{ 3548 rtx tmp = gen_reg_rtx (<MODE>mode); 3549 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); 3550 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2], 3551 tmp, const0_rtx)); 3552 DONE; 3553}) 3554 3555(define_expand "neon_vmla_n<mode>" 3556 [(match_operand:VMQ 0 "s_register_operand" "") 3557 (match_operand:VMQ 1 "s_register_operand" "") 3558 (match_operand:VMQ 2 "s_register_operand" "") 3559 (match_operand:<V_elem> 3 "s_register_operand" "")] 3560 "TARGET_NEON" 3561{ 3562 rtx tmp = gen_reg_rtx (<V_HALF>mode); 3563 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx)); 3564 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2], 3565 tmp, const0_rtx)); 3566 DONE; 3567}) 3568 3569(define_expand "neon_vmlals_n<mode>" 3570 [(match_operand:<V_widen> 0 "s_register_operand" "") 3571 (match_operand:<V_widen> 1 "s_register_operand" "") 3572 (match_operand:VMDI 2 "s_register_operand" "") 3573 (match_operand:<V_elem> 3 "s_register_operand" "")] 3574 "TARGET_NEON" 3575{ 3576 rtx tmp = gen_reg_rtx (<MODE>mode); 3577 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); 3578 emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2], 3579 tmp, const0_rtx)); 3580 DONE; 3581}) 3582 3583(define_expand "neon_vmlalu_n<mode>" 3584 [(match_operand:<V_widen> 0 "s_register_operand" "") 3585 (match_operand:<V_widen> 1 "s_register_operand" "") 3586 (match_operand:VMDI 2 "s_register_operand" "") 3587 (match_operand:<V_elem> 3 "s_register_operand" "")] 3588 "TARGET_NEON" 3589{ 3590 rtx tmp = gen_reg_rtx (<MODE>mode); 3591 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); 3592 emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2], 3593 tmp, const0_rtx)); 3594 DONE; 3595}) 3596 3597(define_expand "neon_vqdmlal_n<mode>" 3598 [(match_operand:<V_widen> 0 "s_register_operand" "") 3599 (match_operand:<V_widen> 1 "s_register_operand" "") 3600 (match_operand:VMDI 2 "s_register_operand" "") 3601 (match_operand:<V_elem> 3 "s_register_operand" "")] 3602 "TARGET_NEON" 3603{ 3604 rtx tmp = gen_reg_rtx (<MODE>mode); 3605 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); 3606 emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2], 3607 tmp, const0_rtx)); 3608 DONE; 3609}) 3610 3611(define_expand "neon_vmls_n<mode>" 3612 [(match_operand:VMD 0 "s_register_operand" "") 3613 (match_operand:VMD 1 "s_register_operand" "") 3614 (match_operand:VMD 2 "s_register_operand" "") 3615 (match_operand:<V_elem> 3 "s_register_operand" "")] 3616 "TARGET_NEON" 3617{ 3618 rtx tmp = gen_reg_rtx (<MODE>mode); 3619 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); 3620 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2], 3621 tmp, const0_rtx)); 3622 DONE; 3623}) 3624 3625(define_expand "neon_vmls_n<mode>" 3626 [(match_operand:VMQ 0 "s_register_operand" "") 3627 (match_operand:VMQ 1 "s_register_operand" "") 3628 (match_operand:VMQ 2 "s_register_operand" "") 3629 (match_operand:<V_elem> 3 "s_register_operand" "")] 3630 "TARGET_NEON" 3631{ 3632 rtx tmp = gen_reg_rtx (<V_HALF>mode); 3633 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx)); 3634 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2], 3635 tmp, const0_rtx)); 3636 DONE; 3637}) 3638 3639(define_expand "neon_vmlsls_n<mode>" 3640 [(match_operand:<V_widen> 0 "s_register_operand" "") 3641 (match_operand:<V_widen> 1 "s_register_operand" "") 3642 (match_operand:VMDI 2 "s_register_operand" "") 3643 (match_operand:<V_elem> 3 "s_register_operand" "")] 3644 "TARGET_NEON" 3645{ 3646 rtx tmp = gen_reg_rtx (<MODE>mode); 3647 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); 3648 emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2], 3649 tmp, const0_rtx)); 3650 DONE; 3651}) 3652 3653(define_expand "neon_vmlslu_n<mode>" 3654 [(match_operand:<V_widen> 0 "s_register_operand" "") 3655 (match_operand:<V_widen> 1 "s_register_operand" "") 3656 (match_operand:VMDI 2 "s_register_operand" "") 3657 (match_operand:<V_elem> 3 "s_register_operand" "")] 3658 "TARGET_NEON" 3659{ 3660 rtx tmp = gen_reg_rtx (<MODE>mode); 3661 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); 3662 emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2], 3663 tmp, const0_rtx)); 3664 DONE; 3665}) 3666 3667(define_expand "neon_vqdmlsl_n<mode>" 3668 [(match_operand:<V_widen> 0 "s_register_operand" "") 3669 (match_operand:<V_widen> 1 "s_register_operand" "") 3670 (match_operand:VMDI 2 "s_register_operand" "") 3671 (match_operand:<V_elem> 3 "s_register_operand" "")] 3672 "TARGET_NEON" 3673{ 3674 rtx tmp = gen_reg_rtx (<MODE>mode); 3675 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); 3676 emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2], 3677 tmp, const0_rtx)); 3678 DONE; 3679}) 3680 3681(define_insn "neon_vext<mode>" 3682 [(set (match_operand:VDQX 0 "s_register_operand" "=w") 3683 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w") 3684 (match_operand:VDQX 2 "s_register_operand" "w") 3685 (match_operand:SI 3 "immediate_operand" "i")] 3686 UNSPEC_VEXT))] 3687 "TARGET_NEON" 3688{ 3689 neon_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode)); 3690 return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3"; 3691} 3692 [(set_attr "type" "neon_ext<q>")] 3693) 3694 3695(define_insn "neon_vrev64<mode>" 3696 [(set (match_operand:VDQ 0 "s_register_operand" "=w") 3697 (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")] 3698 UNSPEC_VREV64))] 3699 "TARGET_NEON" 3700 "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1" 3701 [(set_attr "type" "neon_rev<q>")] 3702) 3703 3704(define_insn "neon_vrev32<mode>" 3705 [(set (match_operand:VX 0 "s_register_operand" "=w") 3706 (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")] 3707 UNSPEC_VREV32))] 3708 "TARGET_NEON" 3709 "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1" 3710 [(set_attr "type" "neon_rev<q>")] 3711) 3712 3713(define_insn "neon_vrev16<mode>" 3714 [(set (match_operand:VE 0 "s_register_operand" "=w") 3715 (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")] 3716 UNSPEC_VREV16))] 3717 "TARGET_NEON" 3718 "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1" 3719 [(set_attr "type" "neon_rev<q>")] 3720) 3721 3722; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register 3723; allocation. For an intrinsic of form: 3724; rD = vbsl_* (rS, rN, rM) 3725; We can use any of: 3726; vbsl rS, rN, rM (if D = S) 3727; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM) 3728; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN) 3729 3730(define_insn "neon_vbsl<mode>_internal" 3731 [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w") 3732 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w") 3733 (match_operand:VDQX 2 "s_register_operand" " w,w,0") 3734 (match_operand:VDQX 3 "s_register_operand" " w,0,w")] 3735 UNSPEC_VBSL))] 3736 "TARGET_NEON" 3737 "@ 3738 vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3 3739 vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1 3740 vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1" 3741 [(set_attr "type" "neon_bsl<q>")] 3742) 3743 3744(define_expand "neon_vbsl<mode>" 3745 [(set (match_operand:VDQX 0 "s_register_operand" "") 3746 (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand" "") 3747 (match_operand:VDQX 2 "s_register_operand" "") 3748 (match_operand:VDQX 3 "s_register_operand" "")] 3749 UNSPEC_VBSL))] 3750 "TARGET_NEON" 3751{ 3752 /* We can't alias operands together if they have different modes. */ 3753 operands[1] = gen_lowpart (<MODE>mode, operands[1]); 3754}) 3755 3756;; vshl, vrshl 3757(define_insn "neon_v<shift_op><sup><mode>" 3758 [(set (match_operand:VDQIX 0 "s_register_operand" "=w") 3759 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") 3760 (match_operand:VDQIX 2 "s_register_operand" "w")] 3761 VSHL))] 3762 "TARGET_NEON" 3763 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 3764 [(set_attr "type" "neon_shift_imm<q>")] 3765) 3766 3767;; vqshl, vqrshl 3768(define_insn "neon_v<shift_op><sup><mode>" 3769 [(set (match_operand:VDQIX 0 "s_register_operand" "=w") 3770 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") 3771 (match_operand:VDQIX 2 "s_register_operand" "w")] 3772 VQSHL))] 3773 "TARGET_NEON" 3774 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 3775 [(set_attr "type" "neon_sat_shift_imm<q>")] 3776) 3777 3778;; vshr_n, vrshr_n 3779(define_insn "neon_v<shift_op><sup>_n<mode>" 3780 [(set (match_operand:VDQIX 0 "s_register_operand" "=w") 3781 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") 3782 (match_operand:SI 2 "immediate_operand" "i")] 3783 VSHR_N))] 3784 "TARGET_NEON" 3785{ 3786 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1); 3787 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2"; 3788} 3789 [(set_attr "type" "neon_shift_imm<q>")] 3790) 3791 3792;; vshrn_n, vrshrn_n 3793(define_insn "neon_v<shift_op>_n<mode>" 3794 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") 3795 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w") 3796 (match_operand:SI 2 "immediate_operand" "i")] 3797 VSHRN_N))] 3798 "TARGET_NEON" 3799{ 3800 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1); 3801 return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2"; 3802} 3803 [(set_attr "type" "neon_shift_imm_narrow_q")] 3804) 3805 3806;; vqshrn_n, vqrshrn_n 3807(define_insn "neon_v<shift_op><sup>_n<mode>" 3808 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") 3809 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w") 3810 (match_operand:SI 2 "immediate_operand" "i")] 3811 VQSHRN_N))] 3812 "TARGET_NEON" 3813{ 3814 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1); 3815 return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2"; 3816} 3817 [(set_attr "type" "neon_sat_shift_imm_narrow_q")] 3818) 3819 3820;; vqshrun_n, vqrshrun_n 3821(define_insn "neon_v<shift_op>_n<mode>" 3822 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") 3823 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w") 3824 (match_operand:SI 2 "immediate_operand" "i")] 3825 VQSHRUN_N))] 3826 "TARGET_NEON" 3827{ 3828 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1); 3829 return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2"; 3830} 3831 [(set_attr "type" "neon_sat_shift_imm_narrow_q")] 3832) 3833 3834(define_insn "neon_vshl_n<mode>" 3835 [(set (match_operand:VDQIX 0 "s_register_operand" "=w") 3836 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") 3837 (match_operand:SI 2 "immediate_operand" "i")] 3838 UNSPEC_VSHL_N))] 3839 "TARGET_NEON" 3840{ 3841 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode)); 3842 return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2"; 3843} 3844 [(set_attr "type" "neon_shift_imm<q>")] 3845) 3846 3847(define_insn "neon_vqshl_<sup>_n<mode>" 3848 [(set (match_operand:VDQIX 0 "s_register_operand" "=w") 3849 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") 3850 (match_operand:SI 2 "immediate_operand" "i")] 3851 VQSHL_N))] 3852 "TARGET_NEON" 3853{ 3854 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode)); 3855 return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2"; 3856} 3857 [(set_attr "type" "neon_sat_shift_imm<q>")] 3858) 3859 3860(define_insn "neon_vqshlu_n<mode>" 3861 [(set (match_operand:VDQIX 0 "s_register_operand" "=w") 3862 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") 3863 (match_operand:SI 2 "immediate_operand" "i")] 3864 UNSPEC_VQSHLU_N))] 3865 "TARGET_NEON" 3866{ 3867 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode)); 3868 return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2"; 3869} 3870 [(set_attr "type" "neon_sat_shift_imm<q>")] 3871) 3872 3873(define_insn "neon_vshll<sup>_n<mode>" 3874 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 3875 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w") 3876 (match_operand:SI 2 "immediate_operand" "i")] 3877 VSHLL_N))] 3878 "TARGET_NEON" 3879{ 3880 /* The boundaries are: 0 < imm <= size. */ 3881 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1); 3882 return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2"; 3883} 3884 [(set_attr "type" "neon_shift_imm_long")] 3885) 3886 3887;; vsra_n, vrsra_n 3888(define_insn "neon_v<shift_op><sup>_n<mode>" 3889 [(set (match_operand:VDQIX 0 "s_register_operand" "=w") 3890 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0") 3891 (match_operand:VDQIX 2 "s_register_operand" "w") 3892 (match_operand:SI 3 "immediate_operand" "i")] 3893 VSRA_N))] 3894 "TARGET_NEON" 3895{ 3896 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1); 3897 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3"; 3898} 3899 [(set_attr "type" "neon_shift_acc<q>")] 3900) 3901 3902(define_insn "neon_vsri_n<mode>" 3903 [(set (match_operand:VDQIX 0 "s_register_operand" "=w") 3904 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0") 3905 (match_operand:VDQIX 2 "s_register_operand" "w") 3906 (match_operand:SI 3 "immediate_operand" "i")] 3907 UNSPEC_VSRI))] 3908 "TARGET_NEON" 3909{ 3910 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1); 3911 return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3"; 3912} 3913 [(set_attr "type" "neon_shift_reg<q>")] 3914) 3915 3916(define_insn "neon_vsli_n<mode>" 3917 [(set (match_operand:VDQIX 0 "s_register_operand" "=w") 3918 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0") 3919 (match_operand:VDQIX 2 "s_register_operand" "w") 3920 (match_operand:SI 3 "immediate_operand" "i")] 3921 UNSPEC_VSLI))] 3922 "TARGET_NEON" 3923{ 3924 neon_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode)); 3925 return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3"; 3926} 3927 [(set_attr "type" "neon_shift_reg<q>")] 3928) 3929 3930(define_insn "neon_vtbl1v8qi" 3931 [(set (match_operand:V8QI 0 "s_register_operand" "=w") 3932 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w") 3933 (match_operand:V8QI 2 "s_register_operand" "w")] 3934 UNSPEC_VTBL))] 3935 "TARGET_NEON" 3936 "vtbl.8\t%P0, {%P1}, %P2" 3937 [(set_attr "type" "neon_tbl1")] 3938) 3939 3940(define_insn "neon_vtbl2v8qi" 3941 [(set (match_operand:V8QI 0 "s_register_operand" "=w") 3942 (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w") 3943 (match_operand:V8QI 2 "s_register_operand" "w")] 3944 UNSPEC_VTBL))] 3945 "TARGET_NEON" 3946{ 3947 rtx ops[4]; 3948 int tabbase = REGNO (operands[1]); 3949 3950 ops[0] = operands[0]; 3951 ops[1] = gen_rtx_REG (V8QImode, tabbase); 3952 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); 3953 ops[3] = operands[2]; 3954 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops); 3955 3956 return ""; 3957} 3958 [(set_attr "type" "neon_tbl2")] 3959) 3960 3961(define_insn "neon_vtbl3v8qi" 3962 [(set (match_operand:V8QI 0 "s_register_operand" "=w") 3963 (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w") 3964 (match_operand:V8QI 2 "s_register_operand" "w")] 3965 UNSPEC_VTBL))] 3966 "TARGET_NEON" 3967{ 3968 rtx ops[5]; 3969 int tabbase = REGNO (operands[1]); 3970 3971 ops[0] = operands[0]; 3972 ops[1] = gen_rtx_REG (V8QImode, tabbase); 3973 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); 3974 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4); 3975 ops[4] = operands[2]; 3976 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops); 3977 3978 return ""; 3979} 3980 [(set_attr "type" "neon_tbl3")] 3981) 3982 3983(define_insn "neon_vtbl4v8qi" 3984 [(set (match_operand:V8QI 0 "s_register_operand" "=w") 3985 (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w") 3986 (match_operand:V8QI 2 "s_register_operand" "w")] 3987 UNSPEC_VTBL))] 3988 "TARGET_NEON" 3989{ 3990 rtx ops[6]; 3991 int tabbase = REGNO (operands[1]); 3992 3993 ops[0] = operands[0]; 3994 ops[1] = gen_rtx_REG (V8QImode, tabbase); 3995 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); 3996 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4); 3997 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6); 3998 ops[5] = operands[2]; 3999 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops); 4000 4001 return ""; 4002} 4003 [(set_attr "type" "neon_tbl4")] 4004) 4005 4006;; These three are used by the vec_perm infrastructure for V16QImode. 4007(define_insn_and_split "neon_vtbl1v16qi" 4008 [(set (match_operand:V16QI 0 "s_register_operand" "=&w") 4009 (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w") 4010 (match_operand:V16QI 2 "s_register_operand" "w")] 4011 UNSPEC_VTBL))] 4012 "TARGET_NEON" 4013 "#" 4014 "&& reload_completed" 4015 [(const_int 0)] 4016{ 4017 rtx op0, op1, op2, part0, part2; 4018 unsigned ofs; 4019 4020 op0 = operands[0]; 4021 op1 = gen_lowpart (TImode, operands[1]); 4022 op2 = operands[2]; 4023 4024 ofs = subreg_lowpart_offset (V8QImode, V16QImode); 4025 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs); 4026 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs); 4027 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2)); 4028 4029 ofs = subreg_highpart_offset (V8QImode, V16QImode); 4030 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs); 4031 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs); 4032 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2)); 4033 DONE; 4034} 4035 [(set_attr "type" "multiple")] 4036) 4037 4038(define_insn_and_split "neon_vtbl2v16qi" 4039 [(set (match_operand:V16QI 0 "s_register_operand" "=&w") 4040 (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w") 4041 (match_operand:V16QI 2 "s_register_operand" "w")] 4042 UNSPEC_VTBL))] 4043 "TARGET_NEON" 4044 "#" 4045 "&& reload_completed" 4046 [(const_int 0)] 4047{ 4048 rtx op0, op1, op2, part0, part2; 4049 unsigned ofs; 4050 4051 op0 = operands[0]; 4052 op1 = operands[1]; 4053 op2 = operands[2]; 4054 4055 ofs = subreg_lowpart_offset (V8QImode, V16QImode); 4056 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs); 4057 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs); 4058 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2)); 4059 4060 ofs = subreg_highpart_offset (V8QImode, V16QImode); 4061 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs); 4062 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs); 4063 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2)); 4064 DONE; 4065} 4066 [(set_attr "type" "multiple")] 4067) 4068 4069;; ??? Logically we should extend the regular neon_vcombine pattern to 4070;; handle quad-word input modes, producing octa-word output modes. But 4071;; that requires us to add support for octa-word vector modes in moves. 4072;; That seems overkill for this one use in vec_perm. 4073(define_insn_and_split "neon_vcombinev16qi" 4074 [(set (match_operand:OI 0 "s_register_operand" "=w") 4075 (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w") 4076 (match_operand:V16QI 2 "s_register_operand" "w")] 4077 UNSPEC_VCONCAT))] 4078 "TARGET_NEON" 4079 "#" 4080 "&& reload_completed" 4081 [(const_int 0)] 4082{ 4083 neon_split_vcombine (operands); 4084 DONE; 4085} 4086[(set_attr "type" "multiple")] 4087) 4088 4089(define_insn "neon_vtbx1v8qi" 4090 [(set (match_operand:V8QI 0 "s_register_operand" "=w") 4091 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0") 4092 (match_operand:V8QI 2 "s_register_operand" "w") 4093 (match_operand:V8QI 3 "s_register_operand" "w")] 4094 UNSPEC_VTBX))] 4095 "TARGET_NEON" 4096 "vtbx.8\t%P0, {%P2}, %P3" 4097 [(set_attr "type" "neon_tbl1")] 4098) 4099 4100(define_insn "neon_vtbx2v8qi" 4101 [(set (match_operand:V8QI 0 "s_register_operand" "=w") 4102 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0") 4103 (match_operand:TI 2 "s_register_operand" "w") 4104 (match_operand:V8QI 3 "s_register_operand" "w")] 4105 UNSPEC_VTBX))] 4106 "TARGET_NEON" 4107{ 4108 rtx ops[4]; 4109 int tabbase = REGNO (operands[2]); 4110 4111 ops[0] = operands[0]; 4112 ops[1] = gen_rtx_REG (V8QImode, tabbase); 4113 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); 4114 ops[3] = operands[3]; 4115 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops); 4116 4117 return ""; 4118} 4119 [(set_attr "type" "neon_tbl2")] 4120) 4121 4122(define_insn "neon_vtbx3v8qi" 4123 [(set (match_operand:V8QI 0 "s_register_operand" "=w") 4124 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0") 4125 (match_operand:EI 2 "s_register_operand" "w") 4126 (match_operand:V8QI 3 "s_register_operand" "w")] 4127 UNSPEC_VTBX))] 4128 "TARGET_NEON" 4129{ 4130 rtx ops[5]; 4131 int tabbase = REGNO (operands[2]); 4132 4133 ops[0] = operands[0]; 4134 ops[1] = gen_rtx_REG (V8QImode, tabbase); 4135 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); 4136 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4); 4137 ops[4] = operands[3]; 4138 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops); 4139 4140 return ""; 4141} 4142 [(set_attr "type" "neon_tbl3")] 4143) 4144 4145(define_insn "neon_vtbx4v8qi" 4146 [(set (match_operand:V8QI 0 "s_register_operand" "=w") 4147 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0") 4148 (match_operand:OI 2 "s_register_operand" "w") 4149 (match_operand:V8QI 3 "s_register_operand" "w")] 4150 UNSPEC_VTBX))] 4151 "TARGET_NEON" 4152{ 4153 rtx ops[6]; 4154 int tabbase = REGNO (operands[2]); 4155 4156 ops[0] = operands[0]; 4157 ops[1] = gen_rtx_REG (V8QImode, tabbase); 4158 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); 4159 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4); 4160 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6); 4161 ops[5] = operands[3]; 4162 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops); 4163 4164 return ""; 4165} 4166 [(set_attr "type" "neon_tbl4")] 4167) 4168 4169(define_expand "neon_vtrn<mode>_internal" 4170 [(parallel 4171 [(set (match_operand:VDQW 0 "s_register_operand" "") 4172 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "") 4173 (match_operand:VDQW 2 "s_register_operand" "")] 4174 UNSPEC_VTRN1)) 4175 (set (match_operand:VDQW 3 "s_register_operand" "") 4176 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])] 4177 "TARGET_NEON" 4178 "" 4179) 4180 4181;; Note: Different operand numbering to handle tied registers correctly. 4182(define_insn "*neon_vtrn<mode>_insn" 4183 [(set (match_operand:VDQW 0 "s_register_operand" "=&w") 4184 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") 4185 (match_operand:VDQW 3 "s_register_operand" "2")] 4186 UNSPEC_VTRN1)) 4187 (set (match_operand:VDQW 2 "s_register_operand" "=&w") 4188 (unspec:VDQW [(match_dup 1) (match_dup 3)] 4189 UNSPEC_VTRN2))] 4190 "TARGET_NEON" 4191 "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" 4192 [(set_attr "type" "neon_permute<q>")] 4193) 4194 4195(define_expand "neon_vzip<mode>_internal" 4196 [(parallel 4197 [(set (match_operand:VDQW 0 "s_register_operand" "") 4198 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "") 4199 (match_operand:VDQW 2 "s_register_operand" "")] 4200 UNSPEC_VZIP1)) 4201 (set (match_operand:VDQW 3 "s_register_operand" "") 4202 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])] 4203 "TARGET_NEON" 4204 "" 4205) 4206 4207;; Note: Different operand numbering to handle tied registers correctly. 4208(define_insn "*neon_vzip<mode>_insn" 4209 [(set (match_operand:VDQW 0 "s_register_operand" "=&w") 4210 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") 4211 (match_operand:VDQW 3 "s_register_operand" "2")] 4212 UNSPEC_VZIP1)) 4213 (set (match_operand:VDQW 2 "s_register_operand" "=&w") 4214 (unspec:VDQW [(match_dup 1) (match_dup 3)] 4215 UNSPEC_VZIP2))] 4216 "TARGET_NEON" 4217 "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" 4218 [(set_attr "type" "neon_zip<q>")] 4219) 4220 4221(define_expand "neon_vuzp<mode>_internal" 4222 [(parallel 4223 [(set (match_operand:VDQW 0 "s_register_operand" "") 4224 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "") 4225 (match_operand:VDQW 2 "s_register_operand" "")] 4226 UNSPEC_VUZP1)) 4227 (set (match_operand:VDQW 3 "s_register_operand" "") 4228 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])] 4229 "TARGET_NEON" 4230 "" 4231) 4232 4233;; Note: Different operand numbering to handle tied registers correctly. 4234(define_insn "*neon_vuzp<mode>_insn" 4235 [(set (match_operand:VDQW 0 "s_register_operand" "=&w") 4236 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") 4237 (match_operand:VDQW 3 "s_register_operand" "2")] 4238 UNSPEC_VUZP1)) 4239 (set (match_operand:VDQW 2 "s_register_operand" "=&w") 4240 (unspec:VDQW [(match_dup 1) (match_dup 3)] 4241 UNSPEC_VUZP2))] 4242 "TARGET_NEON" 4243 "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" 4244 [(set_attr "type" "neon_zip<q>")] 4245) 4246 4247(define_expand "vec_load_lanes<mode><mode>" 4248 [(set (match_operand:VDQX 0 "s_register_operand") 4249 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")] 4250 UNSPEC_VLD1))] 4251 "TARGET_NEON") 4252 4253(define_insn "neon_vld1<mode>" 4254 [(set (match_operand:VDQX 0 "s_register_operand" "=w") 4255 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")] 4256 UNSPEC_VLD1))] 4257 "TARGET_NEON" 4258 "vld1.<V_sz_elem>\t%h0, %A1" 4259 [(set_attr "type" "neon_load1_1reg<q>")] 4260) 4261 4262;; The lane numbers in the RTL are in GCC lane order, having been flipped 4263;; in arm_expand_neon_args. The lane numbers are restored to architectural 4264;; lane order here. 4265(define_insn "neon_vld1_lane<mode>" 4266 [(set (match_operand:VDX 0 "s_register_operand" "=w") 4267 (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um") 4268 (match_operand:VDX 2 "s_register_operand" "0") 4269 (match_operand:SI 3 "immediate_operand" "i")] 4270 UNSPEC_VLD1_LANE))] 4271 "TARGET_NEON" 4272{ 4273 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); 4274 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); 4275 operands[3] = GEN_INT (lane); 4276 if (max == 1) 4277 return "vld1.<V_sz_elem>\t%P0, %A1"; 4278 else 4279 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1"; 4280} 4281 [(set_attr "type" "neon_load1_one_lane<q>")] 4282) 4283 4284;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 4285;; here on big endian targets. 4286(define_insn "neon_vld1_lane<mode>" 4287 [(set (match_operand:VQX 0 "s_register_operand" "=w") 4288 (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um") 4289 (match_operand:VQX 2 "s_register_operand" "0") 4290 (match_operand:SI 3 "immediate_operand" "i")] 4291 UNSPEC_VLD1_LANE))] 4292 "TARGET_NEON" 4293{ 4294 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); 4295 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); 4296 operands[3] = GEN_INT (lane); 4297 int regno = REGNO (operands[0]); 4298 if (lane >= max / 2) 4299 { 4300 lane -= max / 2; 4301 regno += 2; 4302 operands[3] = GEN_INT (lane); 4303 } 4304 operands[0] = gen_rtx_REG (<V_HALF>mode, regno); 4305 if (max == 2) 4306 return "vld1.<V_sz_elem>\t%P0, %A1"; 4307 else 4308 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1"; 4309} 4310 [(set_attr "type" "neon_load1_one_lane<q>")] 4311) 4312 4313(define_insn "neon_vld1_dup<mode>" 4314 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w") 4315 (vec_duplicate:VD_LANE (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))] 4316 "TARGET_NEON" 4317 "vld1.<V_sz_elem>\t{%P0[]}, %A1" 4318 [(set_attr "type" "neon_load1_all_lanes<q>")] 4319) 4320 4321;; Special case for DImode. Treat it exactly like a simple load. 4322(define_expand "neon_vld1_dupdi" 4323 [(set (match_operand:DI 0 "s_register_operand" "") 4324 (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")] 4325 UNSPEC_VLD1))] 4326 "TARGET_NEON" 4327 "" 4328) 4329 4330(define_insn "neon_vld1_dup<mode>" 4331 [(set (match_operand:VQ2 0 "s_register_operand" "=w") 4332 (vec_duplicate:VQ2 (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))] 4333 "TARGET_NEON" 4334{ 4335 return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1"; 4336} 4337 [(set_attr "type" "neon_load1_all_lanes<q>")] 4338) 4339 4340(define_insn_and_split "neon_vld1_dupv2di" 4341 [(set (match_operand:V2DI 0 "s_register_operand" "=w") 4342 (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))] 4343 "TARGET_NEON" 4344 "#" 4345 "&& reload_completed" 4346 [(const_int 0)] 4347 { 4348 rtx tmprtx = gen_lowpart (DImode, operands[0]); 4349 emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1])); 4350 emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx ); 4351 DONE; 4352 } 4353 [(set_attr "length" "8") 4354 (set_attr "type" "neon_load1_all_lanes_q")] 4355) 4356 4357(define_expand "vec_store_lanes<mode><mode>" 4358 [(set (match_operand:VDQX 0 "neon_struct_operand") 4359 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")] 4360 UNSPEC_VST1))] 4361 "TARGET_NEON") 4362 4363(define_insn "neon_vst1<mode>" 4364 [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um") 4365 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")] 4366 UNSPEC_VST1))] 4367 "TARGET_NEON" 4368 "vst1.<V_sz_elem>\t%h1, %A0" 4369 [(set_attr "type" "neon_store1_1reg<q>")]) 4370 4371;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 4372;; here on big endian targets. 4373(define_insn "neon_vst1_lane<mode>" 4374 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um") 4375 (unspec:<V_elem> 4376 [(match_operand:VDX 1 "s_register_operand" "w") 4377 (match_operand:SI 2 "immediate_operand" "i")] 4378 UNSPEC_VST1_LANE))] 4379 "TARGET_NEON" 4380{ 4381 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); 4382 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); 4383 operands[2] = GEN_INT (lane); 4384 if (max == 1) 4385 return "vst1.<V_sz_elem>\t{%P1}, %A0"; 4386 else 4387 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0"; 4388} 4389 [(set_attr "type" "neon_store1_one_lane<q>")] 4390) 4391 4392;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 4393;; here on big endian targets. 4394(define_insn "neon_vst1_lane<mode>" 4395 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um") 4396 (unspec:<V_elem> 4397 [(match_operand:VQX 1 "s_register_operand" "w") 4398 (match_operand:SI 2 "immediate_operand" "i")] 4399 UNSPEC_VST1_LANE))] 4400 "TARGET_NEON" 4401{ 4402 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); 4403 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); 4404 int regno = REGNO (operands[1]); 4405 if (lane >= max / 2) 4406 { 4407 lane -= max / 2; 4408 regno += 2; 4409 } 4410 operands[2] = GEN_INT (lane); 4411 operands[1] = gen_rtx_REG (<V_HALF>mode, regno); 4412 if (max == 2) 4413 return "vst1.<V_sz_elem>\t{%P1}, %A0"; 4414 else 4415 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0"; 4416} 4417 [(set_attr "type" "neon_store1_one_lane<q>")] 4418) 4419 4420(define_expand "vec_load_lanesti<mode>" 4421 [(set (match_operand:TI 0 "s_register_operand") 4422 (unspec:TI [(match_operand:TI 1 "neon_struct_operand") 4423 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4424 UNSPEC_VLD2))] 4425 "TARGET_NEON") 4426 4427(define_insn "neon_vld2<mode>" 4428 [(set (match_operand:TI 0 "s_register_operand" "=w") 4429 (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um") 4430 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4431 UNSPEC_VLD2))] 4432 "TARGET_NEON" 4433{ 4434 if (<V_sz_elem> == 64) 4435 return "vld1.64\t%h0, %A1"; 4436 else 4437 return "vld2.<V_sz_elem>\t%h0, %A1"; 4438} 4439 [(set (attr "type") 4440 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) 4441 (const_string "neon_load1_2reg<q>") 4442 (const_string "neon_load2_2reg<q>")))] 4443) 4444 4445(define_expand "vec_load_lanesoi<mode>" 4446 [(set (match_operand:OI 0 "s_register_operand") 4447 (unspec:OI [(match_operand:OI 1 "neon_struct_operand") 4448 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4449 UNSPEC_VLD2))] 4450 "TARGET_NEON") 4451 4452(define_insn "neon_vld2<mode>" 4453 [(set (match_operand:OI 0 "s_register_operand" "=w") 4454 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um") 4455 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4456 UNSPEC_VLD2))] 4457 "TARGET_NEON" 4458 "vld2.<V_sz_elem>\t%h0, %A1" 4459 [(set_attr "type" "neon_load2_2reg_q")]) 4460 4461;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 4462;; here on big endian targets. 4463(define_insn "neon_vld2_lane<mode>" 4464 [(set (match_operand:TI 0 "s_register_operand" "=w") 4465 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um") 4466 (match_operand:TI 2 "s_register_operand" "0") 4467 (match_operand:SI 3 "immediate_operand" "i") 4468 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4469 UNSPEC_VLD2_LANE))] 4470 "TARGET_NEON" 4471{ 4472 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); 4473 int regno = REGNO (operands[0]); 4474 rtx ops[4]; 4475 ops[0] = gen_rtx_REG (DImode, regno); 4476 ops[1] = gen_rtx_REG (DImode, regno + 2); 4477 ops[2] = operands[1]; 4478 ops[3] = GEN_INT (lane); 4479 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops); 4480 return ""; 4481} 4482 [(set_attr "type" "neon_load2_one_lane<q>")] 4483) 4484 4485;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 4486;; here on big endian targets. 4487(define_insn "neon_vld2_lane<mode>" 4488 [(set (match_operand:OI 0 "s_register_operand" "=w") 4489 (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um") 4490 (match_operand:OI 2 "s_register_operand" "0") 4491 (match_operand:SI 3 "immediate_operand" "i") 4492 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4493 UNSPEC_VLD2_LANE))] 4494 "TARGET_NEON" 4495{ 4496 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); 4497 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); 4498 int regno = REGNO (operands[0]); 4499 rtx ops[4]; 4500 if (lane >= max / 2) 4501 { 4502 lane -= max / 2; 4503 regno += 2; 4504 } 4505 ops[0] = gen_rtx_REG (DImode, regno); 4506 ops[1] = gen_rtx_REG (DImode, regno + 4); 4507 ops[2] = operands[1]; 4508 ops[3] = GEN_INT (lane); 4509 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops); 4510 return ""; 4511} 4512 [(set_attr "type" "neon_load2_one_lane<q>")] 4513) 4514 4515(define_insn "neon_vld2_dup<mode>" 4516 [(set (match_operand:TI 0 "s_register_operand" "=w") 4517 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um") 4518 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4519 UNSPEC_VLD2_DUP))] 4520 "TARGET_NEON" 4521{ 4522 if (GET_MODE_NUNITS (<MODE>mode) > 1) 4523 return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1"; 4524 else 4525 return "vld1.<V_sz_elem>\t%h0, %A1"; 4526} 4527 [(set (attr "type") 4528 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1")) 4529 (const_string "neon_load2_all_lanes<q>") 4530 (const_string "neon_load1_1reg<q>")))] 4531) 4532 4533(define_expand "vec_store_lanesti<mode>" 4534 [(set (match_operand:TI 0 "neon_struct_operand") 4535 (unspec:TI [(match_operand:TI 1 "s_register_operand") 4536 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4537 UNSPEC_VST2))] 4538 "TARGET_NEON") 4539 4540(define_insn "neon_vst2<mode>" 4541 [(set (match_operand:TI 0 "neon_struct_operand" "=Um") 4542 (unspec:TI [(match_operand:TI 1 "s_register_operand" "w") 4543 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4544 UNSPEC_VST2))] 4545 "TARGET_NEON" 4546{ 4547 if (<V_sz_elem> == 64) 4548 return "vst1.64\t%h1, %A0"; 4549 else 4550 return "vst2.<V_sz_elem>\t%h1, %A0"; 4551} 4552 [(set (attr "type") 4553 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) 4554 (const_string "neon_store1_2reg<q>") 4555 (const_string "neon_store2_one_lane<q>")))] 4556) 4557 4558(define_expand "vec_store_lanesoi<mode>" 4559 [(set (match_operand:OI 0 "neon_struct_operand") 4560 (unspec:OI [(match_operand:OI 1 "s_register_operand") 4561 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4562 UNSPEC_VST2))] 4563 "TARGET_NEON") 4564 4565(define_insn "neon_vst2<mode>" 4566 [(set (match_operand:OI 0 "neon_struct_operand" "=Um") 4567 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w") 4568 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4569 UNSPEC_VST2))] 4570 "TARGET_NEON" 4571 "vst2.<V_sz_elem>\t%h1, %A0" 4572 [(set_attr "type" "neon_store2_4reg<q>")] 4573) 4574 4575;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 4576;; here on big endian targets. 4577(define_insn "neon_vst2_lane<mode>" 4578 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um") 4579 (unspec:<V_two_elem> 4580 [(match_operand:TI 1 "s_register_operand" "w") 4581 (match_operand:SI 2 "immediate_operand" "i") 4582 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4583 UNSPEC_VST2_LANE))] 4584 "TARGET_NEON" 4585{ 4586 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); 4587 int regno = REGNO (operands[1]); 4588 rtx ops[4]; 4589 ops[0] = operands[0]; 4590 ops[1] = gen_rtx_REG (DImode, regno); 4591 ops[2] = gen_rtx_REG (DImode, regno + 2); 4592 ops[3] = GEN_INT (lane); 4593 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops); 4594 return ""; 4595} 4596 [(set_attr "type" "neon_store2_one_lane<q>")] 4597) 4598 4599;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 4600;; here on big endian targets. 4601(define_insn "neon_vst2_lane<mode>" 4602 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um") 4603 (unspec:<V_two_elem> 4604 [(match_operand:OI 1 "s_register_operand" "w") 4605 (match_operand:SI 2 "immediate_operand" "i") 4606 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4607 UNSPEC_VST2_LANE))] 4608 "TARGET_NEON" 4609{ 4610 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); 4611 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); 4612 int regno = REGNO (operands[1]); 4613 rtx ops[4]; 4614 if (lane >= max / 2) 4615 { 4616 lane -= max / 2; 4617 regno += 2; 4618 } 4619 ops[0] = operands[0]; 4620 ops[1] = gen_rtx_REG (DImode, regno); 4621 ops[2] = gen_rtx_REG (DImode, regno + 4); 4622 ops[3] = GEN_INT (lane); 4623 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops); 4624 return ""; 4625} 4626 [(set_attr "type" "neon_store2_one_lane<q>")] 4627) 4628 4629(define_expand "vec_load_lanesei<mode>" 4630 [(set (match_operand:EI 0 "s_register_operand") 4631 (unspec:EI [(match_operand:EI 1 "neon_struct_operand") 4632 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4633 UNSPEC_VLD3))] 4634 "TARGET_NEON") 4635 4636(define_insn "neon_vld3<mode>" 4637 [(set (match_operand:EI 0 "s_register_operand" "=w") 4638 (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um") 4639 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4640 UNSPEC_VLD3))] 4641 "TARGET_NEON" 4642{ 4643 if (<V_sz_elem> == 64) 4644 return "vld1.64\t%h0, %A1"; 4645 else 4646 return "vld3.<V_sz_elem>\t%h0, %A1"; 4647} 4648 [(set (attr "type") 4649 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) 4650 (const_string "neon_load1_3reg<q>") 4651 (const_string "neon_load3_3reg<q>")))] 4652) 4653 4654(define_expand "vec_load_lanesci<mode>" 4655 [(match_operand:CI 0 "s_register_operand") 4656 (match_operand:CI 1 "neon_struct_operand") 4657 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4658 "TARGET_NEON" 4659{ 4660 emit_insn (gen_neon_vld3<mode> (operands[0], operands[1])); 4661 DONE; 4662}) 4663 4664(define_expand "neon_vld3<mode>" 4665 [(match_operand:CI 0 "s_register_operand") 4666 (match_operand:CI 1 "neon_struct_operand") 4667 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4668 "TARGET_NEON" 4669{ 4670 rtx mem; 4671 4672 mem = adjust_address (operands[1], EImode, 0); 4673 emit_insn (gen_neon_vld3qa<mode> (operands[0], mem)); 4674 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode)); 4675 emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0])); 4676 DONE; 4677}) 4678 4679(define_insn "neon_vld3qa<mode>" 4680 [(set (match_operand:CI 0 "s_register_operand" "=w") 4681 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um") 4682 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4683 UNSPEC_VLD3A))] 4684 "TARGET_NEON" 4685{ 4686 int regno = REGNO (operands[0]); 4687 rtx ops[4]; 4688 ops[0] = gen_rtx_REG (DImode, regno); 4689 ops[1] = gen_rtx_REG (DImode, regno + 4); 4690 ops[2] = gen_rtx_REG (DImode, regno + 8); 4691 ops[3] = operands[1]; 4692 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops); 4693 return ""; 4694} 4695 [(set_attr "type" "neon_load3_3reg<q>")] 4696) 4697 4698(define_insn "neon_vld3qb<mode>" 4699 [(set (match_operand:CI 0 "s_register_operand" "=w") 4700 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um") 4701 (match_operand:CI 2 "s_register_operand" "0") 4702 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4703 UNSPEC_VLD3B))] 4704 "TARGET_NEON" 4705{ 4706 int regno = REGNO (operands[0]); 4707 rtx ops[4]; 4708 ops[0] = gen_rtx_REG (DImode, regno + 2); 4709 ops[1] = gen_rtx_REG (DImode, regno + 6); 4710 ops[2] = gen_rtx_REG (DImode, regno + 10); 4711 ops[3] = operands[1]; 4712 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops); 4713 return ""; 4714} 4715 [(set_attr "type" "neon_load3_3reg<q>")] 4716) 4717 4718;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 4719;; here on big endian targets. 4720(define_insn "neon_vld3_lane<mode>" 4721 [(set (match_operand:EI 0 "s_register_operand" "=w") 4722 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um") 4723 (match_operand:EI 2 "s_register_operand" "0") 4724 (match_operand:SI 3 "immediate_operand" "i") 4725 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4726 UNSPEC_VLD3_LANE))] 4727 "TARGET_NEON" 4728{ 4729 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])); 4730 int regno = REGNO (operands[0]); 4731 rtx ops[5]; 4732 ops[0] = gen_rtx_REG (DImode, regno); 4733 ops[1] = gen_rtx_REG (DImode, regno + 2); 4734 ops[2] = gen_rtx_REG (DImode, regno + 4); 4735 ops[3] = operands[1]; 4736 ops[4] = GEN_INT (lane); 4737 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3", 4738 ops); 4739 return ""; 4740} 4741 [(set_attr "type" "neon_load3_one_lane<q>")] 4742) 4743 4744;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 4745;; here on big endian targets. 4746(define_insn "neon_vld3_lane<mode>" 4747 [(set (match_operand:CI 0 "s_register_operand" "=w") 4748 (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um") 4749 (match_operand:CI 2 "s_register_operand" "0") 4750 (match_operand:SI 3 "immediate_operand" "i") 4751 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4752 UNSPEC_VLD3_LANE))] 4753 "TARGET_NEON" 4754{ 4755 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); 4756 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); 4757 int regno = REGNO (operands[0]); 4758 rtx ops[5]; 4759 if (lane >= max / 2) 4760 { 4761 lane -= max / 2; 4762 regno += 2; 4763 } 4764 ops[0] = gen_rtx_REG (DImode, regno); 4765 ops[1] = gen_rtx_REG (DImode, regno + 4); 4766 ops[2] = gen_rtx_REG (DImode, regno + 8); 4767 ops[3] = operands[1]; 4768 ops[4] = GEN_INT (lane); 4769 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3", 4770 ops); 4771 return ""; 4772} 4773 [(set_attr "type" "neon_load3_one_lane<q>")] 4774) 4775 4776(define_insn "neon_vld3_dup<mode>" 4777 [(set (match_operand:EI 0 "s_register_operand" "=w") 4778 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um") 4779 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4780 UNSPEC_VLD3_DUP))] 4781 "TARGET_NEON" 4782{ 4783 if (GET_MODE_NUNITS (<MODE>mode) > 1) 4784 { 4785 int regno = REGNO (operands[0]); 4786 rtx ops[4]; 4787 ops[0] = gen_rtx_REG (DImode, regno); 4788 ops[1] = gen_rtx_REG (DImode, regno + 2); 4789 ops[2] = gen_rtx_REG (DImode, regno + 4); 4790 ops[3] = operands[1]; 4791 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops); 4792 return ""; 4793 } 4794 else 4795 return "vld1.<V_sz_elem>\t%h0, %A1"; 4796} 4797 [(set (attr "type") 4798 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1")) 4799 (const_string "neon_load3_all_lanes<q>") 4800 (const_string "neon_load1_1reg<q>")))]) 4801 4802(define_expand "vec_store_lanesei<mode>" 4803 [(set (match_operand:EI 0 "neon_struct_operand") 4804 (unspec:EI [(match_operand:EI 1 "s_register_operand") 4805 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4806 UNSPEC_VST3))] 4807 "TARGET_NEON") 4808 4809(define_insn "neon_vst3<mode>" 4810 [(set (match_operand:EI 0 "neon_struct_operand" "=Um") 4811 (unspec:EI [(match_operand:EI 1 "s_register_operand" "w") 4812 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4813 UNSPEC_VST3))] 4814 "TARGET_NEON" 4815{ 4816 if (<V_sz_elem> == 64) 4817 return "vst1.64\t%h1, %A0"; 4818 else 4819 return "vst3.<V_sz_elem>\t%h1, %A0"; 4820} 4821 [(set (attr "type") 4822 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) 4823 (const_string "neon_store1_3reg<q>") 4824 (const_string "neon_store3_one_lane<q>")))]) 4825 4826(define_expand "vec_store_lanesci<mode>" 4827 [(match_operand:CI 0 "neon_struct_operand") 4828 (match_operand:CI 1 "s_register_operand") 4829 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4830 "TARGET_NEON" 4831{ 4832 emit_insn (gen_neon_vst3<mode> (operands[0], operands[1])); 4833 DONE; 4834}) 4835 4836(define_expand "neon_vst3<mode>" 4837 [(match_operand:CI 0 "neon_struct_operand") 4838 (match_operand:CI 1 "s_register_operand") 4839 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4840 "TARGET_NEON" 4841{ 4842 rtx mem; 4843 4844 mem = adjust_address (operands[0], EImode, 0); 4845 emit_insn (gen_neon_vst3qa<mode> (mem, operands[1])); 4846 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode)); 4847 emit_insn (gen_neon_vst3qb<mode> (mem, operands[1])); 4848 DONE; 4849}) 4850 4851(define_insn "neon_vst3qa<mode>" 4852 [(set (match_operand:EI 0 "neon_struct_operand" "=Um") 4853 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w") 4854 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4855 UNSPEC_VST3A))] 4856 "TARGET_NEON" 4857{ 4858 int regno = REGNO (operands[1]); 4859 rtx ops[4]; 4860 ops[0] = operands[0]; 4861 ops[1] = gen_rtx_REG (DImode, regno); 4862 ops[2] = gen_rtx_REG (DImode, regno + 4); 4863 ops[3] = gen_rtx_REG (DImode, regno + 8); 4864 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops); 4865 return ""; 4866} 4867 [(set_attr "type" "neon_store3_3reg<q>")] 4868) 4869 4870(define_insn "neon_vst3qb<mode>" 4871 [(set (match_operand:EI 0 "neon_struct_operand" "=Um") 4872 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w") 4873 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4874 UNSPEC_VST3B))] 4875 "TARGET_NEON" 4876{ 4877 int regno = REGNO (operands[1]); 4878 rtx ops[4]; 4879 ops[0] = operands[0]; 4880 ops[1] = gen_rtx_REG (DImode, regno + 2); 4881 ops[2] = gen_rtx_REG (DImode, regno + 6); 4882 ops[3] = gen_rtx_REG (DImode, regno + 10); 4883 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops); 4884 return ""; 4885} 4886 [(set_attr "type" "neon_store3_3reg<q>")] 4887) 4888 4889;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 4890;; here on big endian targets. 4891(define_insn "neon_vst3_lane<mode>" 4892 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um") 4893 (unspec:<V_three_elem> 4894 [(match_operand:EI 1 "s_register_operand" "w") 4895 (match_operand:SI 2 "immediate_operand" "i") 4896 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4897 UNSPEC_VST3_LANE))] 4898 "TARGET_NEON" 4899{ 4900 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); 4901 int regno = REGNO (operands[1]); 4902 rtx ops[5]; 4903 ops[0] = operands[0]; 4904 ops[1] = gen_rtx_REG (DImode, regno); 4905 ops[2] = gen_rtx_REG (DImode, regno + 2); 4906 ops[3] = gen_rtx_REG (DImode, regno + 4); 4907 ops[4] = GEN_INT (lane); 4908 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0", 4909 ops); 4910 return ""; 4911} 4912 [(set_attr "type" "neon_store3_one_lane<q>")] 4913) 4914 4915;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 4916;; here on big endian targets. 4917(define_insn "neon_vst3_lane<mode>" 4918 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um") 4919 (unspec:<V_three_elem> 4920 [(match_operand:CI 1 "s_register_operand" "w") 4921 (match_operand:SI 2 "immediate_operand" "i") 4922 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4923 UNSPEC_VST3_LANE))] 4924 "TARGET_NEON" 4925{ 4926 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); 4927 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); 4928 int regno = REGNO (operands[1]); 4929 rtx ops[5]; 4930 if (lane >= max / 2) 4931 { 4932 lane -= max / 2; 4933 regno += 2; 4934 } 4935 ops[0] = operands[0]; 4936 ops[1] = gen_rtx_REG (DImode, regno); 4937 ops[2] = gen_rtx_REG (DImode, regno + 4); 4938 ops[3] = gen_rtx_REG (DImode, regno + 8); 4939 ops[4] = GEN_INT (lane); 4940 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0", 4941 ops); 4942 return ""; 4943} 4944 [(set_attr "type" "neon_store3_one_lane<q>")] 4945) 4946 4947(define_expand "vec_load_lanesoi<mode>" 4948 [(set (match_operand:OI 0 "s_register_operand") 4949 (unspec:OI [(match_operand:OI 1 "neon_struct_operand") 4950 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4951 UNSPEC_VLD4))] 4952 "TARGET_NEON") 4953 4954(define_insn "neon_vld4<mode>" 4955 [(set (match_operand:OI 0 "s_register_operand" "=w") 4956 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um") 4957 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4958 UNSPEC_VLD4))] 4959 "TARGET_NEON" 4960{ 4961 if (<V_sz_elem> == 64) 4962 return "vld1.64\t%h0, %A1"; 4963 else 4964 return "vld4.<V_sz_elem>\t%h0, %A1"; 4965} 4966 [(set (attr "type") 4967 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) 4968 (const_string "neon_load1_4reg<q>") 4969 (const_string "neon_load4_4reg<q>")))] 4970) 4971 4972(define_expand "vec_load_lanesxi<mode>" 4973 [(match_operand:XI 0 "s_register_operand") 4974 (match_operand:XI 1 "neon_struct_operand") 4975 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4976 "TARGET_NEON" 4977{ 4978 emit_insn (gen_neon_vld4<mode> (operands[0], operands[1])); 4979 DONE; 4980}) 4981 4982(define_expand "neon_vld4<mode>" 4983 [(match_operand:XI 0 "s_register_operand") 4984 (match_operand:XI 1 "neon_struct_operand") 4985 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4986 "TARGET_NEON" 4987{ 4988 rtx mem; 4989 4990 mem = adjust_address (operands[1], OImode, 0); 4991 emit_insn (gen_neon_vld4qa<mode> (operands[0], mem)); 4992 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode)); 4993 emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0])); 4994 DONE; 4995}) 4996 4997(define_insn "neon_vld4qa<mode>" 4998 [(set (match_operand:XI 0 "s_register_operand" "=w") 4999 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um") 5000 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5001 UNSPEC_VLD4A))] 5002 "TARGET_NEON" 5003{ 5004 int regno = REGNO (operands[0]); 5005 rtx ops[5]; 5006 ops[0] = gen_rtx_REG (DImode, regno); 5007 ops[1] = gen_rtx_REG (DImode, regno + 4); 5008 ops[2] = gen_rtx_REG (DImode, regno + 8); 5009 ops[3] = gen_rtx_REG (DImode, regno + 12); 5010 ops[4] = operands[1]; 5011 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops); 5012 return ""; 5013} 5014 [(set_attr "type" "neon_load4_4reg<q>")] 5015) 5016 5017(define_insn "neon_vld4qb<mode>" 5018 [(set (match_operand:XI 0 "s_register_operand" "=w") 5019 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um") 5020 (match_operand:XI 2 "s_register_operand" "0") 5021 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5022 UNSPEC_VLD4B))] 5023 "TARGET_NEON" 5024{ 5025 int regno = REGNO (operands[0]); 5026 rtx ops[5]; 5027 ops[0] = gen_rtx_REG (DImode, regno + 2); 5028 ops[1] = gen_rtx_REG (DImode, regno + 6); 5029 ops[2] = gen_rtx_REG (DImode, regno + 10); 5030 ops[3] = gen_rtx_REG (DImode, regno + 14); 5031 ops[4] = operands[1]; 5032 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops); 5033 return ""; 5034} 5035 [(set_attr "type" "neon_load4_4reg<q>")] 5036) 5037 5038;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 5039;; here on big endian targets. 5040(define_insn "neon_vld4_lane<mode>" 5041 [(set (match_operand:OI 0 "s_register_operand" "=w") 5042 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um") 5043 (match_operand:OI 2 "s_register_operand" "0") 5044 (match_operand:SI 3 "immediate_operand" "i") 5045 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5046 UNSPEC_VLD4_LANE))] 5047 "TARGET_NEON" 5048{ 5049 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); 5050 int regno = REGNO (operands[0]); 5051 rtx ops[6]; 5052 ops[0] = gen_rtx_REG (DImode, regno); 5053 ops[1] = gen_rtx_REG (DImode, regno + 2); 5054 ops[2] = gen_rtx_REG (DImode, regno + 4); 5055 ops[3] = gen_rtx_REG (DImode, regno + 6); 5056 ops[4] = operands[1]; 5057 ops[5] = GEN_INT (lane); 5058 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4", 5059 ops); 5060 return ""; 5061} 5062 [(set_attr "type" "neon_load4_one_lane<q>")] 5063) 5064 5065;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 5066;; here on big endian targets. 5067(define_insn "neon_vld4_lane<mode>" 5068 [(set (match_operand:XI 0 "s_register_operand" "=w") 5069 (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um") 5070 (match_operand:XI 2 "s_register_operand" "0") 5071 (match_operand:SI 3 "immediate_operand" "i") 5072 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5073 UNSPEC_VLD4_LANE))] 5074 "TARGET_NEON" 5075{ 5076 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); 5077 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); 5078 int regno = REGNO (operands[0]); 5079 rtx ops[6]; 5080 if (lane >= max / 2) 5081 { 5082 lane -= max / 2; 5083 regno += 2; 5084 } 5085 ops[0] = gen_rtx_REG (DImode, regno); 5086 ops[1] = gen_rtx_REG (DImode, regno + 4); 5087 ops[2] = gen_rtx_REG (DImode, regno + 8); 5088 ops[3] = gen_rtx_REG (DImode, regno + 12); 5089 ops[4] = operands[1]; 5090 ops[5] = GEN_INT (lane); 5091 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4", 5092 ops); 5093 return ""; 5094} 5095 [(set_attr "type" "neon_load4_one_lane<q>")] 5096) 5097 5098(define_insn "neon_vld4_dup<mode>" 5099 [(set (match_operand:OI 0 "s_register_operand" "=w") 5100 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um") 5101 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5102 UNSPEC_VLD4_DUP))] 5103 "TARGET_NEON" 5104{ 5105 if (GET_MODE_NUNITS (<MODE>mode) > 1) 5106 { 5107 int regno = REGNO (operands[0]); 5108 rtx ops[5]; 5109 ops[0] = gen_rtx_REG (DImode, regno); 5110 ops[1] = gen_rtx_REG (DImode, regno + 2); 5111 ops[2] = gen_rtx_REG (DImode, regno + 4); 5112 ops[3] = gen_rtx_REG (DImode, regno + 6); 5113 ops[4] = operands[1]; 5114 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4", 5115 ops); 5116 return ""; 5117 } 5118 else 5119 return "vld1.<V_sz_elem>\t%h0, %A1"; 5120} 5121 [(set (attr "type") 5122 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1")) 5123 (const_string "neon_load4_all_lanes<q>") 5124 (const_string "neon_load1_1reg<q>")))] 5125) 5126 5127(define_expand "vec_store_lanesoi<mode>" 5128 [(set (match_operand:OI 0 "neon_struct_operand") 5129 (unspec:OI [(match_operand:OI 1 "s_register_operand") 5130 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5131 UNSPEC_VST4))] 5132 "TARGET_NEON") 5133 5134(define_insn "neon_vst4<mode>" 5135 [(set (match_operand:OI 0 "neon_struct_operand" "=Um") 5136 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w") 5137 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5138 UNSPEC_VST4))] 5139 "TARGET_NEON" 5140{ 5141 if (<V_sz_elem> == 64) 5142 return "vst1.64\t%h1, %A0"; 5143 else 5144 return "vst4.<V_sz_elem>\t%h1, %A0"; 5145} 5146 [(set (attr "type") 5147 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) 5148 (const_string "neon_store1_4reg<q>") 5149 (const_string "neon_store4_4reg<q>")))] 5150) 5151 5152(define_expand "vec_store_lanesxi<mode>" 5153 [(match_operand:XI 0 "neon_struct_operand") 5154 (match_operand:XI 1 "s_register_operand") 5155 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5156 "TARGET_NEON" 5157{ 5158 emit_insn (gen_neon_vst4<mode> (operands[0], operands[1])); 5159 DONE; 5160}) 5161 5162(define_expand "neon_vst4<mode>" 5163 [(match_operand:XI 0 "neon_struct_operand") 5164 (match_operand:XI 1 "s_register_operand") 5165 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5166 "TARGET_NEON" 5167{ 5168 rtx mem; 5169 5170 mem = adjust_address (operands[0], OImode, 0); 5171 emit_insn (gen_neon_vst4qa<mode> (mem, operands[1])); 5172 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode)); 5173 emit_insn (gen_neon_vst4qb<mode> (mem, operands[1])); 5174 DONE; 5175}) 5176 5177(define_insn "neon_vst4qa<mode>" 5178 [(set (match_operand:OI 0 "neon_struct_operand" "=Um") 5179 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w") 5180 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5181 UNSPEC_VST4A))] 5182 "TARGET_NEON" 5183{ 5184 int regno = REGNO (operands[1]); 5185 rtx ops[5]; 5186 ops[0] = operands[0]; 5187 ops[1] = gen_rtx_REG (DImode, regno); 5188 ops[2] = gen_rtx_REG (DImode, regno + 4); 5189 ops[3] = gen_rtx_REG (DImode, regno + 8); 5190 ops[4] = gen_rtx_REG (DImode, regno + 12); 5191 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops); 5192 return ""; 5193} 5194 [(set_attr "type" "neon_store4_4reg<q>")] 5195) 5196 5197(define_insn "neon_vst4qb<mode>" 5198 [(set (match_operand:OI 0 "neon_struct_operand" "=Um") 5199 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w") 5200 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5201 UNSPEC_VST4B))] 5202 "TARGET_NEON" 5203{ 5204 int regno = REGNO (operands[1]); 5205 rtx ops[5]; 5206 ops[0] = operands[0]; 5207 ops[1] = gen_rtx_REG (DImode, regno + 2); 5208 ops[2] = gen_rtx_REG (DImode, regno + 6); 5209 ops[3] = gen_rtx_REG (DImode, regno + 10); 5210 ops[4] = gen_rtx_REG (DImode, regno + 14); 5211 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops); 5212 return ""; 5213} 5214 [(set_attr "type" "neon_store4_4reg<q>")] 5215) 5216 5217;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 5218;; here on big endian targets. 5219(define_insn "neon_vst4_lane<mode>" 5220 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um") 5221 (unspec:<V_four_elem> 5222 [(match_operand:OI 1 "s_register_operand" "w") 5223 (match_operand:SI 2 "immediate_operand" "i") 5224 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5225 UNSPEC_VST4_LANE))] 5226 "TARGET_NEON" 5227{ 5228 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); 5229 int regno = REGNO (operands[1]); 5230 rtx ops[6]; 5231 ops[0] = operands[0]; 5232 ops[1] = gen_rtx_REG (DImode, regno); 5233 ops[2] = gen_rtx_REG (DImode, regno + 2); 5234 ops[3] = gen_rtx_REG (DImode, regno + 4); 5235 ops[4] = gen_rtx_REG (DImode, regno + 6); 5236 ops[5] = GEN_INT (lane); 5237 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0", 5238 ops); 5239 return ""; 5240} 5241 [(set_attr "type" "neon_store4_one_lane<q>")] 5242) 5243 5244;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 5245;; here on big endian targets. 5246(define_insn "neon_vst4_lane<mode>" 5247 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um") 5248 (unspec:<V_four_elem> 5249 [(match_operand:XI 1 "s_register_operand" "w") 5250 (match_operand:SI 2 "immediate_operand" "i") 5251 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5252 UNSPEC_VST4_LANE))] 5253 "TARGET_NEON" 5254{ 5255 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); 5256 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); 5257 int regno = REGNO (operands[1]); 5258 rtx ops[6]; 5259 if (lane >= max / 2) 5260 { 5261 lane -= max / 2; 5262 regno += 2; 5263 } 5264 ops[0] = operands[0]; 5265 ops[1] = gen_rtx_REG (DImode, regno); 5266 ops[2] = gen_rtx_REG (DImode, regno + 4); 5267 ops[3] = gen_rtx_REG (DImode, regno + 8); 5268 ops[4] = gen_rtx_REG (DImode, regno + 12); 5269 ops[5] = GEN_INT (lane); 5270 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0", 5271 ops); 5272 return ""; 5273} 5274 [(set_attr "type" "neon_store4_4reg<q>")] 5275) 5276 5277(define_insn "neon_vec_unpack<US>_lo_<mode>" 5278 [(set (match_operand:<V_unpack> 0 "register_operand" "=w") 5279 (SE:<V_unpack> (vec_select:<V_HALF> 5280 (match_operand:VU 1 "register_operand" "w") 5281 (match_operand:VU 2 "vect_par_constant_low" ""))))] 5282 "TARGET_NEON && !BYTES_BIG_ENDIAN" 5283 "vmovl.<US><V_sz_elem> %q0, %e1" 5284 [(set_attr "type" "neon_shift_imm_long")] 5285) 5286 5287(define_insn "neon_vec_unpack<US>_hi_<mode>" 5288 [(set (match_operand:<V_unpack> 0 "register_operand" "=w") 5289 (SE:<V_unpack> (vec_select:<V_HALF> 5290 (match_operand:VU 1 "register_operand" "w") 5291 (match_operand:VU 2 "vect_par_constant_high" ""))))] 5292 "TARGET_NEON && !BYTES_BIG_ENDIAN" 5293 "vmovl.<US><V_sz_elem> %q0, %f1" 5294 [(set_attr "type" "neon_shift_imm_long")] 5295) 5296 5297(define_expand "vec_unpack<US>_hi_<mode>" 5298 [(match_operand:<V_unpack> 0 "register_operand" "") 5299 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))] 5300 "TARGET_NEON && !BYTES_BIG_ENDIAN" 5301 { 5302 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ; 5303 rtx t1; 5304 int i; 5305 for (i = 0; i < (<V_mode_nunits>/2); i++) 5306 RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i); 5307 5308 t1 = gen_rtx_PARALLEL (<MODE>mode, v); 5309 emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0], 5310 operands[1], 5311 t1)); 5312 DONE; 5313 } 5314) 5315 5316(define_expand "vec_unpack<US>_lo_<mode>" 5317 [(match_operand:<V_unpack> 0 "register_operand" "") 5318 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))] 5319 "TARGET_NEON && !BYTES_BIG_ENDIAN" 5320 { 5321 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ; 5322 rtx t1; 5323 int i; 5324 for (i = 0; i < (<V_mode_nunits>/2) ; i++) 5325 RTVEC_ELT (v, i) = GEN_INT (i); 5326 t1 = gen_rtx_PARALLEL (<MODE>mode, v); 5327 emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0], 5328 operands[1], 5329 t1)); 5330 DONE; 5331 } 5332) 5333 5334(define_insn "neon_vec_<US>mult_lo_<mode>" 5335 [(set (match_operand:<V_unpack> 0 "register_operand" "=w") 5336 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF> 5337 (match_operand:VU 1 "register_operand" "w") 5338 (match_operand:VU 2 "vect_par_constant_low" ""))) 5339 (SE:<V_unpack> (vec_select:<V_HALF> 5340 (match_operand:VU 3 "register_operand" "w") 5341 (match_dup 2)))))] 5342 "TARGET_NEON && !BYTES_BIG_ENDIAN" 5343 "vmull.<US><V_sz_elem> %q0, %e1, %e3" 5344 [(set_attr "type" "neon_mul_<V_elem_ch>_long")] 5345) 5346 5347(define_expand "vec_widen_<US>mult_lo_<mode>" 5348 [(match_operand:<V_unpack> 0 "register_operand" "") 5349 (SE:<V_unpack> (match_operand:VU 1 "register_operand" "")) 5350 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))] 5351 "TARGET_NEON && !BYTES_BIG_ENDIAN" 5352 { 5353 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ; 5354 rtx t1; 5355 int i; 5356 for (i = 0; i < (<V_mode_nunits>/2) ; i++) 5357 RTVEC_ELT (v, i) = GEN_INT (i); 5358 t1 = gen_rtx_PARALLEL (<MODE>mode, v); 5359 5360 emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0], 5361 operands[1], 5362 t1, 5363 operands[2])); 5364 DONE; 5365 } 5366) 5367 5368(define_insn "neon_vec_<US>mult_hi_<mode>" 5369 [(set (match_operand:<V_unpack> 0 "register_operand" "=w") 5370 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF> 5371 (match_operand:VU 1 "register_operand" "w") 5372 (match_operand:VU 2 "vect_par_constant_high" ""))) 5373 (SE:<V_unpack> (vec_select:<V_HALF> 5374 (match_operand:VU 3 "register_operand" "w") 5375 (match_dup 2)))))] 5376 "TARGET_NEON && !BYTES_BIG_ENDIAN" 5377 "vmull.<US><V_sz_elem> %q0, %f1, %f3" 5378 [(set_attr "type" "neon_mul_<V_elem_ch>_long")] 5379) 5380 5381(define_expand "vec_widen_<US>mult_hi_<mode>" 5382 [(match_operand:<V_unpack> 0 "register_operand" "") 5383 (SE:<V_unpack> (match_operand:VU 1 "register_operand" "")) 5384 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))] 5385 "TARGET_NEON && !BYTES_BIG_ENDIAN" 5386 { 5387 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ; 5388 rtx t1; 5389 int i; 5390 for (i = 0; i < (<V_mode_nunits>/2) ; i++) 5391 RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i); 5392 t1 = gen_rtx_PARALLEL (<MODE>mode, v); 5393 5394 emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0], 5395 operands[1], 5396 t1, 5397 operands[2])); 5398 DONE; 5399 5400 } 5401) 5402 5403(define_insn "neon_vec_<US>shiftl_<mode>" 5404 [(set (match_operand:<V_widen> 0 "register_operand" "=w") 5405 (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w") 5406 (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))] 5407 "TARGET_NEON" 5408{ 5409 return "vshll.<US><V_sz_elem> %q0, %P1, %2"; 5410} 5411 [(set_attr "type" "neon_shift_imm_long")] 5412) 5413 5414(define_expand "vec_widen_<US>shiftl_lo_<mode>" 5415 [(match_operand:<V_unpack> 0 "register_operand" "") 5416 (SE:<V_unpack> (match_operand:VU 1 "register_operand" "")) 5417 (match_operand:SI 2 "immediate_operand" "i")] 5418 "TARGET_NEON && !BYTES_BIG_ENDIAN" 5419 { 5420 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0], 5421 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0), 5422 operands[2])); 5423 DONE; 5424 } 5425) 5426 5427(define_expand "vec_widen_<US>shiftl_hi_<mode>" 5428 [(match_operand:<V_unpack> 0 "register_operand" "") 5429 (SE:<V_unpack> (match_operand:VU 1 "register_operand" "")) 5430 (match_operand:SI 2 "immediate_operand" "i")] 5431 "TARGET_NEON && !BYTES_BIG_ENDIAN" 5432 { 5433 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0], 5434 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 5435 GET_MODE_SIZE (<V_HALF>mode)), 5436 operands[2])); 5437 DONE; 5438 } 5439) 5440 5441;; Vectorize for non-neon-quad case 5442(define_insn "neon_unpack<US>_<mode>" 5443 [(set (match_operand:<V_widen> 0 "register_operand" "=w") 5444 (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))] 5445 "TARGET_NEON" 5446 "vmovl.<US><V_sz_elem> %q0, %P1" 5447 [(set_attr "type" "neon_move")] 5448) 5449 5450(define_expand "vec_unpack<US>_lo_<mode>" 5451 [(match_operand:<V_double_width> 0 "register_operand" "") 5452 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))] 5453 "TARGET_NEON" 5454{ 5455 rtx tmpreg = gen_reg_rtx (<V_widen>mode); 5456 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1])); 5457 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg)); 5458 5459 DONE; 5460} 5461) 5462 5463(define_expand "vec_unpack<US>_hi_<mode>" 5464 [(match_operand:<V_double_width> 0 "register_operand" "") 5465 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))] 5466 "TARGET_NEON" 5467{ 5468 rtx tmpreg = gen_reg_rtx (<V_widen>mode); 5469 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1])); 5470 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg)); 5471 5472 DONE; 5473} 5474) 5475 5476(define_insn "neon_vec_<US>mult_<mode>" 5477 [(set (match_operand:<V_widen> 0 "register_operand" "=w") 5478 (mult:<V_widen> (SE:<V_widen> 5479 (match_operand:VDI 1 "register_operand" "w")) 5480 (SE:<V_widen> 5481 (match_operand:VDI 2 "register_operand" "w"))))] 5482 "TARGET_NEON" 5483 "vmull.<US><V_sz_elem> %q0, %P1, %P2" 5484 [(set_attr "type" "neon_mul_<V_elem_ch>_long")] 5485) 5486 5487(define_expand "vec_widen_<US>mult_hi_<mode>" 5488 [(match_operand:<V_double_width> 0 "register_operand" "") 5489 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" "")) 5490 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))] 5491 "TARGET_NEON" 5492 { 5493 rtx tmpreg = gen_reg_rtx (<V_widen>mode); 5494 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2])); 5495 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg)); 5496 5497 DONE; 5498 5499 } 5500) 5501 5502(define_expand "vec_widen_<US>mult_lo_<mode>" 5503 [(match_operand:<V_double_width> 0 "register_operand" "") 5504 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" "")) 5505 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))] 5506 "TARGET_NEON" 5507 { 5508 rtx tmpreg = gen_reg_rtx (<V_widen>mode); 5509 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2])); 5510 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg)); 5511 5512 DONE; 5513 5514 } 5515) 5516 5517(define_expand "vec_widen_<US>shiftl_hi_<mode>" 5518 [(match_operand:<V_double_width> 0 "register_operand" "") 5519 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" "")) 5520 (match_operand:SI 2 "immediate_operand" "i")] 5521 "TARGET_NEON" 5522 { 5523 rtx tmpreg = gen_reg_rtx (<V_widen>mode); 5524 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2])); 5525 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg)); 5526 5527 DONE; 5528 } 5529) 5530 5531(define_expand "vec_widen_<US>shiftl_lo_<mode>" 5532 [(match_operand:<V_double_width> 0 "register_operand" "") 5533 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" "")) 5534 (match_operand:SI 2 "immediate_operand" "i")] 5535 "TARGET_NEON" 5536 { 5537 rtx tmpreg = gen_reg_rtx (<V_widen>mode); 5538 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2])); 5539 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg)); 5540 5541 DONE; 5542 } 5543) 5544 5545; FIXME: These instruction patterns can't be used safely in big-endian mode 5546; because the ordering of vector elements in Q registers is different from what 5547; the semantics of the instructions require. 5548 5549(define_insn "vec_pack_trunc_<mode>" 5550 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w") 5551 (vec_concat:<V_narrow_pack> 5552 (truncate:<V_narrow> 5553 (match_operand:VN 1 "register_operand" "w")) 5554 (truncate:<V_narrow> 5555 (match_operand:VN 2 "register_operand" "w"))))] 5556 "TARGET_NEON && !BYTES_BIG_ENDIAN" 5557 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2" 5558 [(set_attr "type" "multiple") 5559 (set_attr "length" "8")] 5560) 5561 5562;; For the non-quad case. 5563(define_insn "neon_vec_pack_trunc_<mode>" 5564 [(set (match_operand:<V_narrow> 0 "register_operand" "=w") 5565 (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))] 5566 "TARGET_NEON && !BYTES_BIG_ENDIAN" 5567 "vmovn.i<V_sz_elem>\t%P0, %q1" 5568 [(set_attr "type" "neon_move_narrow_q")] 5569) 5570 5571(define_expand "vec_pack_trunc_<mode>" 5572 [(match_operand:<V_narrow_pack> 0 "register_operand" "") 5573 (match_operand:VSHFT 1 "register_operand" "") 5574 (match_operand:VSHFT 2 "register_operand")] 5575 "TARGET_NEON && !BYTES_BIG_ENDIAN" 5576{ 5577 rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode); 5578 5579 emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1])); 5580 emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2])); 5581 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg)); 5582 DONE; 5583}) 5584 5585(define_insn "neon_vabd<mode>_2" 5586 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 5587 (abs:VCVTF (minus:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w") 5588 (match_operand:VCVTF 2 "s_register_operand" "w"))))] 5589 "TARGET_NEON && flag_unsafe_math_optimizations" 5590 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2" 5591 [(set_attr "type" "neon_fp_abd_s<q>")] 5592) 5593 5594(define_insn "neon_vabd<mode>_3" 5595 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 5596 (abs:VCVTF (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") 5597 (match_operand:VCVTF 2 "s_register_operand" "w")] 5598 UNSPEC_VSUB)))] 5599 "TARGET_NEON && flag_unsafe_math_optimizations" 5600 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2" 5601 [(set_attr "type" "neon_fp_abd_s<q>")] 5602) 5603 5604;; Copy from core-to-neon regs, then extend, not vice-versa 5605 5606(define_split 5607 [(set (match_operand:DI 0 "s_register_operand" "") 5608 (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))] 5609 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))" 5610 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1))) 5611 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))] 5612 { 5613 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0])); 5614 }) 5615 5616(define_split 5617 [(set (match_operand:DI 0 "s_register_operand" "") 5618 (sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))] 5619 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))" 5620 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1))) 5621 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))] 5622 { 5623 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0])); 5624 }) 5625 5626(define_split 5627 [(set (match_operand:DI 0 "s_register_operand" "") 5628 (sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))] 5629 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))" 5630 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1))) 5631 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))] 5632 { 5633 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0])); 5634 }) 5635 5636(define_split 5637 [(set (match_operand:DI 0 "s_register_operand" "") 5638 (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))] 5639 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))" 5640 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1))) 5641 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))] 5642 { 5643 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0])); 5644 }) 5645 5646(define_split 5647 [(set (match_operand:DI 0 "s_register_operand" "") 5648 (zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))] 5649 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))" 5650 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1))) 5651 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))] 5652 { 5653 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0])); 5654 }) 5655 5656(define_split 5657 [(set (match_operand:DI 0 "s_register_operand" "") 5658 (zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))] 5659 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))" 5660 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1))) 5661 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))] 5662 { 5663 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0])); 5664 }) 5665