1;; Machine description for AArch64 AdvSIMD architecture.
2;; Copyright (C) 2011-2021 Free Software Foundation, Inc.
3;; Contributed by ARM Ltd.
4;;
5;; This file is part of GCC.
6;;
7;; GCC is free software; you can redistribute it and/or modify it
8;; under the terms of the GNU General Public License as published by
9;; the Free Software Foundation; either version 3, or (at your option)
10;; any later version.
11;;
12;; GCC is distributed in the hope that it will be useful, but
13;; WITHOUT ANY WARRANTY; without even the implied warranty of
14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15;; General Public License for more details.
16;;
17;; You should have received a copy of the GNU General Public License
18;; along with GCC; see the file COPYING3.  If not see
19;; <http://www.gnu.org/licenses/>.
20
21(define_expand "mov<mode>"
22  [(set (match_operand:VALL_F16MOV 0 "nonimmediate_operand")
23	(match_operand:VALL_F16MOV 1 "general_operand"))]
24  "TARGET_SIMD"
25  "
26  /* Force the operand into a register if it is not an
27     immediate whose use can be replaced with xzr.
28     If the mode is 16 bytes wide, then we will be doing
29     a stp in DI mode, so we check the validity of that.
30     If the mode is 8 bytes wide, then we will do doing a
31     normal str, so the check need not apply.  */
32  if (GET_CODE (operands[0]) == MEM
33      && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
34	   && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
35		&& aarch64_mem_pair_operand (operands[0], DImode))
36	       || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
37      operands[1] = force_reg (<MODE>mode, operands[1]);
38
39  /* If a constant is too complex to force to memory (e.g. because it
40     contains CONST_POLY_INTs), build it up from individual elements instead.
41     We should only need to do this before RA; aarch64_legitimate_constant_p
42     should ensure that we don't try to rematerialize the constant later.  */
43  if (GET_CODE (operands[1]) == CONST_VECTOR
44      && targetm.cannot_force_const_mem (<MODE>mode, operands[1]))
45    {
46      aarch64_expand_vector_init (operands[0], operands[1]);
47      DONE;
48    }
49  "
50)
51
52(define_expand "movmisalign<mode>"
53  [(set (match_operand:VALL 0 "nonimmediate_operand")
54        (match_operand:VALL 1 "general_operand"))]
55  "TARGET_SIMD && !STRICT_ALIGNMENT"
56{
57  /* This pattern is not permitted to fail during expansion: if both arguments
58     are non-registers (e.g. memory := constant, which can be created by the
59     auto-vectorizer), force operand 1 into a register.  */
60  if (!register_operand (operands[0], <MODE>mode)
61      && !register_operand (operands[1], <MODE>mode))
62    operands[1] = force_reg (<MODE>mode, operands[1]);
63})
64
65(define_insn "aarch64_simd_dup<mode>"
66  [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
67	(vec_duplicate:VDQ_I
68	  (match_operand:<VEL> 1 "register_operand" "w,?r")))]
69  "TARGET_SIMD"
70  "@
71   dup\\t%0.<Vtype>, %1.<Vetype>[0]
72   dup\\t%0.<Vtype>, %<vw>1"
73  [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
74)
75
76(define_insn "aarch64_simd_dup<mode>"
77  [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
78	(vec_duplicate:VDQF_F16
79	  (match_operand:<VEL> 1 "register_operand" "w")))]
80  "TARGET_SIMD"
81  "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
82  [(set_attr "type" "neon_dup<q>")]
83)
84
85(define_insn "aarch64_dup_lane<mode>"
86  [(set (match_operand:VALL_F16 0 "register_operand" "=w")
87	(vec_duplicate:VALL_F16
88	  (vec_select:<VEL>
89	    (match_operand:VALL_F16 1 "register_operand" "w")
90	    (parallel [(match_operand:SI 2 "immediate_operand" "i")])
91          )))]
92  "TARGET_SIMD"
93  {
94    operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
95    return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
96  }
97  [(set_attr "type" "neon_dup<q>")]
98)
99
100(define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
101  [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
102	(vec_duplicate:VALL_F16_NO_V2Q
103	  (vec_select:<VEL>
104	    (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
105	    (parallel [(match_operand:SI 2 "immediate_operand" "i")])
106          )))]
107  "TARGET_SIMD"
108  {
109    operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
110    return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
111  }
112  [(set_attr "type" "neon_dup<q>")]
113)
114
115(define_insn "*aarch64_simd_mov<VDMOV:mode>"
116  [(set (match_operand:VDMOV 0 "nonimmediate_operand"
117		"=w, m,  m,  w, ?r, ?w, ?r, w")
118	(match_operand:VDMOV 1 "general_operand"
119		"m,  Dz, w,  w,  w,  r,  r, Dn"))]
120  "TARGET_SIMD
121   && (register_operand (operands[0], <MODE>mode)
122       || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
123{
124   switch (which_alternative)
125     {
126     case 0: return "ldr\t%d0, %1";
127     case 1: return "str\txzr, %0";
128     case 2: return "str\t%d1, %0";
129     case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
130     case 4: return "umov\t%0, %1.d[0]";
131     case 5: return "fmov\t%d0, %1";
132     case 6: return "mov\t%0, %1";
133     case 7:
134	return aarch64_output_simd_mov_immediate (operands[1], 64);
135     default: gcc_unreachable ();
136     }
137}
138  [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
139		     neon_logic<q>, neon_to_gp<q>, f_mcr,\
140		     mov_reg, neon_move<q>")]
141)
142
143(define_insn "*aarch64_simd_mov<VQMOV:mode>"
144  [(set (match_operand:VQMOV 0 "nonimmediate_operand"
145		"=w, Umn,  m,  w, ?r, ?w, ?r, w")
146	(match_operand:VQMOV 1 "general_operand"
147		"m,  Dz, w,  w,  w,  r,  r, Dn"))]
148  "TARGET_SIMD
149   && (register_operand (operands[0], <MODE>mode)
150       || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
151{
152  switch (which_alternative)
153    {
154    case 0:
155	return "ldr\t%q0, %1";
156    case 1:
157	return "stp\txzr, xzr, %0";
158    case 2:
159	return "str\t%q1, %0";
160    case 3:
161	return "mov\t%0.<Vbtype>, %1.<Vbtype>";
162    case 4:
163    case 5:
164    case 6:
165	return "#";
166    case 7:
167	return aarch64_output_simd_mov_immediate (operands[1], 128);
168    default:
169	gcc_unreachable ();
170    }
171}
172  [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
173		     neon_logic<q>, multiple, multiple,\
174		     multiple, neon_move<q>")
175   (set_attr "length" "4,4,4,4,8,8,8,4")]
176)
177
178;; When storing lane zero we can use the normal STR and its more permissive
179;; addressing modes.
180
181(define_insn "aarch64_store_lane0<mode>"
182  [(set (match_operand:<VEL> 0 "memory_operand" "=m")
183	(vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
184			(parallel [(match_operand 2 "const_int_operand" "n")])))]
185  "TARGET_SIMD
186   && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
187  "str\\t%<Vetype>1, %0"
188  [(set_attr "type" "neon_store1_1reg<q>")]
189)
190
191(define_insn "load_pair<DREG:mode><DREG2:mode>"
192  [(set (match_operand:DREG 0 "register_operand" "=w")
193	(match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
194   (set (match_operand:DREG2 2 "register_operand" "=w")
195	(match_operand:DREG2 3 "memory_operand" "m"))]
196  "TARGET_SIMD
197   && rtx_equal_p (XEXP (operands[3], 0),
198		   plus_constant (Pmode,
199				  XEXP (operands[1], 0),
200				  GET_MODE_SIZE (<DREG:MODE>mode)))"
201  "ldp\\t%d0, %d2, %z1"
202  [(set_attr "type" "neon_ldp")]
203)
204
205(define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
206  [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
207	(match_operand:DREG 1 "register_operand" "w"))
208   (set (match_operand:DREG2 2 "memory_operand" "=m")
209	(match_operand:DREG2 3 "register_operand" "w"))]
210  "TARGET_SIMD
211   && rtx_equal_p (XEXP (operands[2], 0),
212		   plus_constant (Pmode,
213				  XEXP (operands[0], 0),
214				  GET_MODE_SIZE (<DREG:MODE>mode)))"
215  "stp\\t%d1, %d3, %z0"
216  [(set_attr "type" "neon_stp")]
217)
218
219(define_insn "load_pair<VQ:mode><VQ2:mode>"
220  [(set (match_operand:VQ 0 "register_operand" "=w")
221	(match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
222   (set (match_operand:VQ2 2 "register_operand" "=w")
223	(match_operand:VQ2 3 "memory_operand" "m"))]
224  "TARGET_SIMD
225    && rtx_equal_p (XEXP (operands[3], 0),
226		    plus_constant (Pmode,
227			       XEXP (operands[1], 0),
228			       GET_MODE_SIZE (<VQ:MODE>mode)))"
229  "ldp\\t%q0, %q2, %z1"
230  [(set_attr "type" "neon_ldp_q")]
231)
232
233(define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
234  [(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump")
235	(match_operand:VQ 1 "register_operand" "w"))
236   (set (match_operand:VQ2 2 "memory_operand" "=m")
237	(match_operand:VQ2 3 "register_operand" "w"))]
238  "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0),
239		plus_constant (Pmode,
240			       XEXP (operands[0], 0),
241			       GET_MODE_SIZE (<VQ:MODE>mode)))"
242  "stp\\t%q1, %q3, %z0"
243  [(set_attr "type" "neon_stp_q")]
244)
245
246
247(define_split
248  [(set (match_operand:VQMOV 0 "register_operand" "")
249      (match_operand:VQMOV 1 "register_operand" ""))]
250  "TARGET_SIMD && reload_completed
251   && GP_REGNUM_P (REGNO (operands[0]))
252   && GP_REGNUM_P (REGNO (operands[1]))"
253  [(const_int 0)]
254{
255  aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
256  DONE;
257})
258
259(define_split
260  [(set (match_operand:VQMOV 0 "register_operand" "")
261        (match_operand:VQMOV 1 "register_operand" ""))]
262  "TARGET_SIMD && reload_completed
263   && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
264       || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
265  [(const_int 0)]
266{
267  aarch64_split_simd_move (operands[0], operands[1]);
268  DONE;
269})
270
271(define_expand "@aarch64_split_simd_mov<mode>"
272  [(set (match_operand:VQMOV 0)
273        (match_operand:VQMOV 1))]
274  "TARGET_SIMD"
275  {
276    rtx dst = operands[0];
277    rtx src = operands[1];
278
279    if (GP_REGNUM_P (REGNO (src)))
280      {
281        rtx src_low_part = gen_lowpart (<VHALF>mode, src);
282        rtx src_high_part = gen_highpart (<VHALF>mode, src);
283
284        emit_insn
285          (gen_move_lo_quad_<mode> (dst, src_low_part));
286        emit_insn
287          (gen_move_hi_quad_<mode> (dst, src_high_part));
288      }
289
290    else
291      {
292        rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
293        rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
294	rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
295	rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
296        emit_insn (gen_aarch64_get_half<mode> (dst_low_part, src, lo));
297        emit_insn (gen_aarch64_get_half<mode> (dst_high_part, src, hi));
298      }
299    DONE;
300  }
301)
302
303(define_expand "aarch64_get_half<mode>"
304  [(set (match_operand:<VHALF> 0 "register_operand")
305        (vec_select:<VHALF>
306          (match_operand:VQMOV 1 "register_operand")
307          (match_operand 2 "ascending_int_parallel")))]
308  "TARGET_SIMD"
309)
310
311(define_expand "aarch64_get_low<mode>"
312  [(match_operand:<VHALF> 0 "register_operand")
313   (match_operand:VQMOV 1 "register_operand")]
314  "TARGET_SIMD"
315  {
316    rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
317    emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], lo));
318    DONE;
319  }
320)
321
322(define_expand "aarch64_get_high<mode>"
323  [(match_operand:<VHALF> 0 "register_operand")
324   (match_operand:VQMOV 1 "register_operand")]
325  "TARGET_SIMD"
326  {
327    rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
328    emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], hi));
329    DONE;
330  }
331)
332
333(define_insn_and_split "aarch64_simd_mov_from_<mode>low"
334  [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r")
335        (vec_select:<VHALF>
336          (match_operand:VQMOV_NO2E 1 "register_operand" "w,w")
337          (match_operand:VQMOV_NO2E 2 "vect_par_cnst_lo_half" "")))]
338  "TARGET_SIMD"
339  "@
340   #
341   umov\t%0, %1.d[0]"
342  "&& reload_completed && aarch64_simd_register (operands[0], <VHALF>mode)"
343  [(set (match_dup 0) (match_dup 1))]
344  {
345    operands[1] = aarch64_replace_reg_mode (operands[1], <VHALF>mode);
346  }
347  [(set_attr "type" "mov_reg,neon_to_gp<q>")
348   (set_attr "length" "4")]
349)
350
351(define_insn "aarch64_simd_mov_from_<mode>high"
352  [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r")
353        (vec_select:<VHALF>
354          (match_operand:VQMOV_NO2E 1 "register_operand" "w,w")
355          (match_operand:VQMOV_NO2E 2 "vect_par_cnst_hi_half" "")))]
356  "TARGET_SIMD"
357  "@
358   dup\\t%d0, %1.d[1]
359   umov\t%0, %1.d[1]"
360  [(set_attr "type" "neon_dup<q>,neon_to_gp<q>")
361   (set_attr "length" "4")]
362)
363
364(define_insn "orn<mode>3"
365 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
366       (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
367		(match_operand:VDQ_I 2 "register_operand" "w")))]
368 "TARGET_SIMD"
369 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
370  [(set_attr "type" "neon_logic<q>")]
371)
372
373(define_insn "bic<mode>3"
374 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
375       (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
376		(match_operand:VDQ_I 2 "register_operand" "w")))]
377 "TARGET_SIMD"
378 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
379  [(set_attr "type" "neon_logic<q>")]
380)
381
382(define_insn "add<mode>3"
383  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
384        (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
385		  (match_operand:VDQ_I 2 "register_operand" "w")))]
386  "TARGET_SIMD"
387  "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
388  [(set_attr "type" "neon_add<q>")]
389)
390
391(define_insn "sub<mode>3"
392  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
393        (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
394		   (match_operand:VDQ_I 2 "register_operand" "w")))]
395  "TARGET_SIMD"
396  "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
397  [(set_attr "type" "neon_sub<q>")]
398)
399
400(define_insn "mul<mode>3"
401  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
402        (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
403		   (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
404  "TARGET_SIMD"
405  "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
406  [(set_attr "type" "neon_mul_<Vetype><q>")]
407)
408
409(define_insn "bswap<mode>2"
410  [(set (match_operand:VDQHSD 0 "register_operand" "=w")
411        (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
412  "TARGET_SIMD"
413  "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
414  [(set_attr "type" "neon_rev<q>")]
415)
416
417(define_insn "aarch64_rbit<mode>"
418  [(set (match_operand:VB 0 "register_operand" "=w")
419	(unspec:VB [(match_operand:VB 1 "register_operand" "w")]
420		   UNSPEC_RBIT))]
421  "TARGET_SIMD"
422  "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
423  [(set_attr "type" "neon_rbit")]
424)
425
426(define_expand "ctz<mode>2"
427  [(set (match_operand:VS 0 "register_operand")
428        (ctz:VS (match_operand:VS 1 "register_operand")))]
429  "TARGET_SIMD"
430  {
431     emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
432     rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
433					     <MODE>mode, 0);
434     emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
435     emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
436     DONE;
437  }
438)
439
440(define_expand "xorsign<mode>3"
441  [(match_operand:VHSDF 0 "register_operand")
442   (match_operand:VHSDF 1 "register_operand")
443   (match_operand:VHSDF 2 "register_operand")]
444  "TARGET_SIMD"
445{
446
447  machine_mode imode = <V_INT_EQUIV>mode;
448  rtx v_bitmask = gen_reg_rtx (imode);
449  rtx op1x = gen_reg_rtx (imode);
450  rtx op2x = gen_reg_rtx (imode);
451
452  rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
453  rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
454
455  int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
456
457  emit_move_insn (v_bitmask,
458		  aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
459						     HOST_WIDE_INT_M1U << bits));
460
461  emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
462  emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
463  emit_move_insn (operands[0],
464		  lowpart_subreg (<MODE>mode, op1x, imode));
465  DONE;
466}
467)
468
469;; The fcadd and fcmla patterns are made UNSPEC for the explicitly due to the
470;; fact that their usage need to guarantee that the source vectors are
471;; contiguous.  It would be wrong to describe the operation without being able
472;; to describe the permute that is also required, but even if that is done
473;; the permute would have been created as a LOAD_LANES which means the values
474;; in the registers are in the wrong order.
475(define_insn "aarch64_fcadd<rot><mode>"
476  [(set (match_operand:VHSDF 0 "register_operand" "=w")
477	(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
478		       (match_operand:VHSDF 2 "register_operand" "w")]
479		       FCADD))]
480  "TARGET_COMPLEX"
481  "fcadd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>, #<rot>"
482  [(set_attr "type" "neon_fcadd")]
483)
484
485(define_expand "cadd<rot><mode>3"
486  [(set (match_operand:VHSDF 0 "register_operand")
487	(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
488		       (match_operand:VHSDF 2 "register_operand")]
489		       FCADD))]
490  "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
491)
492
493(define_insn "aarch64_fcmla<rot><mode>"
494  [(set (match_operand:VHSDF 0 "register_operand" "=w")
495	(plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
496		    (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
497				   (match_operand:VHSDF 3 "register_operand" "w")]
498				   FCMLA)))]
499  "TARGET_COMPLEX"
500  "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>, #<rot>"
501  [(set_attr "type" "neon_fcmla")]
502)
503
504
505(define_insn "aarch64_fcmla_lane<rot><mode>"
506  [(set (match_operand:VHSDF 0 "register_operand" "=w")
507	(plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
508		    (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
509				   (match_operand:VHSDF 3 "register_operand" "w")
510				   (match_operand:SI 4 "const_int_operand" "n")]
511				   FCMLA)))]
512  "TARGET_COMPLEX"
513{
514  operands[4] = aarch64_endian_lane_rtx (<VHALF>mode, INTVAL (operands[4]));
515  return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
516}
517  [(set_attr "type" "neon_fcmla")]
518)
519
520(define_insn "aarch64_fcmla_laneq<rot>v4hf"
521  [(set (match_operand:V4HF 0 "register_operand" "=w")
522	(plus:V4HF (match_operand:V4HF 1 "register_operand" "0")
523		   (unspec:V4HF [(match_operand:V4HF 2 "register_operand" "w")
524				 (match_operand:V8HF 3 "register_operand" "w")
525				 (match_operand:SI 4 "const_int_operand" "n")]
526				 FCMLA)))]
527  "TARGET_COMPLEX"
528{
529  operands[4] = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
530  return "fcmla\t%0.4h, %2.4h, %3.h[%4], #<rot>";
531}
532  [(set_attr "type" "neon_fcmla")]
533)
534
535(define_insn "aarch64_fcmlaq_lane<rot><mode>"
536  [(set (match_operand:VQ_HSF 0 "register_operand" "=w")
537	(plus:VQ_HSF (match_operand:VQ_HSF 1 "register_operand" "0")
538		     (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "register_operand" "w")
539				     (match_operand:<VHALF> 3 "register_operand" "w")
540				     (match_operand:SI 4 "const_int_operand" "n")]
541				     FCMLA)))]
542  "TARGET_COMPLEX"
543{
544  int nunits = GET_MODE_NUNITS (<VHALF>mode).to_constant ();
545  operands[4]
546    = gen_int_mode (ENDIAN_LANE_N (nunits / 2, INTVAL (operands[4])), SImode);
547  return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
548}
549  [(set_attr "type" "neon_fcmla")]
550)
551
552;; The complex mla/mls operations always need to expand to two instructions.
553;; The first operation does half the computation and the second does the
554;; remainder.  Because of this, expand early.
555(define_expand "cml<fcmac1><conj_op><mode>4"
556  [(set (match_operand:VHSDF 0 "register_operand")
557	(plus:VHSDF (match_operand:VHSDF 1 "register_operand")
558		    (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand")
559				   (match_operand:VHSDF 3 "register_operand")]
560				   FCMLA_OP)))]
561  "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
562{
563  rtx tmp = gen_reg_rtx (<MODE>mode);
564  emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[1],
565						 operands[3], operands[2]));
566  emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], tmp,
567						 operands[3], operands[2]));
568  DONE;
569})
570
571;; The complex mul operations always need to expand to two instructions.
572;; The first operation does half the computation and the second does the
573;; remainder.  Because of this, expand early.
574(define_expand "cmul<conj_op><mode>3"
575  [(set (match_operand:VHSDF 0 "register_operand")
576	(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
577		       (match_operand:VHSDF 2 "register_operand")]
578		       FCMUL_OP))]
579  "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
580{
581  rtx tmp = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
582  rtx res1 = gen_reg_rtx (<MODE>mode);
583  emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (res1, tmp,
584						 operands[2], operands[1]));
585  emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], res1,
586						 operands[2], operands[1]));
587  DONE;
588})
589
590;; These instructions map to the __builtins for the Dot Product operations.
591(define_insn "aarch64_<sur>dot<vsi2qi>"
592  [(set (match_operand:VS 0 "register_operand" "=w")
593	(plus:VS (match_operand:VS 1 "register_operand" "0")
594		(unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
595			    (match_operand:<VSI2QI> 3 "register_operand" "w")]
596		DOTPROD)))]
597  "TARGET_DOTPROD"
598  "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
599  [(set_attr "type" "neon_dot<q>")]
600)
601
602;; These instructions map to the __builtins for the armv8.6a I8MM usdot
603;; (vector) Dot Product operation.
604(define_insn "aarch64_usdot<vsi2qi>"
605  [(set (match_operand:VS 0 "register_operand" "=w")
606	(plus:VS
607	  (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
608		      (match_operand:<VSI2QI> 3 "register_operand" "w")]
609	  UNSPEC_USDOT)
610	  (match_operand:VS 1 "register_operand" "0")))]
611  "TARGET_I8MM"
612  "usdot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
613  [(set_attr "type" "neon_dot<q>")]
614)
615
616;; These expands map to the Dot Product optab the vectorizer checks for.
617;; The auto-vectorizer expects a dot product builtin that also does an
618;; accumulation into the provided register.
619;; Given the following pattern
620;;
621;; for (i=0; i<len; i++) {
622;;     c = a[i] * b[i];
623;;     r += c;
624;; }
625;; return result;
626;;
627;; This can be auto-vectorized to
628;; r  = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
629;;
630;; given enough iterations.  However the vectorizer can keep unrolling the loop
631;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
632;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
633;; ...
634;;
635;; and so the vectorizer provides r, in which the result has to be accumulated.
636(define_expand "<sur>dot_prod<vsi2qi>"
637  [(set (match_operand:VS 0 "register_operand")
638	(plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
639			    (match_operand:<VSI2QI> 2 "register_operand")]
640		 DOTPROD)
641		(match_operand:VS 3 "register_operand")))]
642  "TARGET_DOTPROD"
643{
644  emit_insn (
645    gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
646				    operands[2]));
647  emit_insn (gen_rtx_SET (operands[0], operands[3]));
648  DONE;
649})
650
651;; These instructions map to the __builtins for the Dot Product
652;; indexed operations.
653(define_insn "aarch64_<sur>dot_lane<vsi2qi>"
654  [(set (match_operand:VS 0 "register_operand" "=w")
655	(plus:VS (match_operand:VS 1 "register_operand" "0")
656		(unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
657			    (match_operand:V8QI 3 "register_operand" "<h_con>")
658			    (match_operand:SI 4 "immediate_operand" "i")]
659		DOTPROD)))]
660  "TARGET_DOTPROD"
661  {
662    operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
663    return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
664  }
665  [(set_attr "type" "neon_dot<q>")]
666)
667
668(define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
669  [(set (match_operand:VS 0 "register_operand" "=w")
670	(plus:VS (match_operand:VS 1 "register_operand" "0")
671		(unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
672			    (match_operand:V16QI 3 "register_operand" "<h_con>")
673			    (match_operand:SI 4 "immediate_operand" "i")]
674		DOTPROD)))]
675  "TARGET_DOTPROD"
676  {
677    operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
678    return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
679  }
680  [(set_attr "type" "neon_dot<q>")]
681)
682
683;; These instructions map to the __builtins for the armv8.6a I8MM usdot, sudot
684;; (by element) Dot Product operations.
685(define_insn "aarch64_<DOTPROD_I8MM:sur>dot_lane<VB:isquadop><VS:vsi2qi>"
686  [(set (match_operand:VS 0 "register_operand" "=w")
687	(plus:VS
688	  (unspec:VS [(match_operand:<VS:VSI2QI> 2 "register_operand" "w")
689		      (match_operand:VB 3 "register_operand" "w")
690		      (match_operand:SI 4 "immediate_operand" "i")]
691	  DOTPROD_I8MM)
692	  (match_operand:VS 1 "register_operand" "0")))]
693  "TARGET_I8MM"
694  {
695    int nunits = GET_MODE_NUNITS (<VB:MODE>mode).to_constant ();
696    int lane = INTVAL (operands[4]);
697    operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane), SImode);
698    return "<DOTPROD_I8MM:sur>dot\\t%0.<VS:Vtype>, %2.<VS:Vdottype>, %3.4b[%4]";
699  }
700  [(set_attr "type" "neon_dot<VS:q>")]
701)
702
703(define_expand "copysign<mode>3"
704  [(match_operand:VHSDF 0 "register_operand")
705   (match_operand:VHSDF 1 "register_operand")
706   (match_operand:VHSDF 2 "register_operand")]
707  "TARGET_FLOAT && TARGET_SIMD"
708{
709  rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
710  int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
711
712  emit_move_insn (v_bitmask,
713		  aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
714						     HOST_WIDE_INT_M1U << bits));
715  emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
716					 operands[2], operands[1]));
717  DONE;
718}
719)
720
721(define_insn "*aarch64_mul3_elt<mode>"
722 [(set (match_operand:VMUL 0 "register_operand" "=w")
723    (mult:VMUL
724      (vec_duplicate:VMUL
725	  (vec_select:<VEL>
726	    (match_operand:VMUL 1 "register_operand" "<h_con>")
727	    (parallel [(match_operand:SI 2 "immediate_operand")])))
728      (match_operand:VMUL 3 "register_operand" "w")))]
729  "TARGET_SIMD"
730  {
731    operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
732    return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
733  }
734  [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
735)
736
737(define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
738  [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
739     (mult:VMUL_CHANGE_NLANES
740       (vec_duplicate:VMUL_CHANGE_NLANES
741	  (vec_select:<VEL>
742	    (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
743	    (parallel [(match_operand:SI 2 "immediate_operand")])))
744      (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
745  "TARGET_SIMD"
746  {
747    operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
748    return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
749  }
750  [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
751)
752
753(define_insn "*aarch64_mul3_elt_from_dup<mode>"
754 [(set (match_operand:VMUL 0 "register_operand" "=w")
755    (mult:VMUL
756      (vec_duplicate:VMUL
757	    (match_operand:<VEL> 1 "register_operand" "<h_con>"))
758      (match_operand:VMUL 2 "register_operand" "w")))]
759  "TARGET_SIMD"
760  "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
761  [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
762)
763
764(define_insn "@aarch64_rsqrte<mode>"
765  [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
766	(unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
767		     UNSPEC_RSQRTE))]
768  "TARGET_SIMD"
769  "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
770  [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
771
772(define_insn "@aarch64_rsqrts<mode>"
773  [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
774	(unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
775			    (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
776	 UNSPEC_RSQRTS))]
777  "TARGET_SIMD"
778  "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
779  [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
780
781(define_expand "rsqrt<mode>2"
782  [(set (match_operand:VALLF 0 "register_operand")
783	(unspec:VALLF [(match_operand:VALLF 1 "register_operand")]
784		     UNSPEC_RSQRT))]
785  "TARGET_SIMD"
786{
787  aarch64_emit_approx_sqrt (operands[0], operands[1], true);
788  DONE;
789})
790
791(define_insn "aarch64_ursqrte<mode>"
792[(set (match_operand:VDQ_SI 0 "register_operand" "=w")
793      (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
794		   UNSPEC_RSQRTE))]
795"TARGET_SIMD"
796"ursqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
797[(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
798
799(define_insn "*aarch64_mul3_elt_to_64v2df"
800  [(set (match_operand:DF 0 "register_operand" "=w")
801     (mult:DF
802       (vec_select:DF
803	 (match_operand:V2DF 1 "register_operand" "w")
804	 (parallel [(match_operand:SI 2 "immediate_operand")]))
805       (match_operand:DF 3 "register_operand" "w")))]
806  "TARGET_SIMD"
807  {
808    operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
809    return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
810  }
811  [(set_attr "type" "neon_fp_mul_d_scalar_q")]
812)
813
814(define_insn "neg<mode>2"
815  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
816	(neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
817  "TARGET_SIMD"
818  "neg\t%0.<Vtype>, %1.<Vtype>"
819  [(set_attr "type" "neon_neg<q>")]
820)
821
822(define_insn "abs<mode>2"
823  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
824        (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
825  "TARGET_SIMD"
826  "abs\t%0.<Vtype>, %1.<Vtype>"
827  [(set_attr "type" "neon_abs<q>")]
828)
829
830;; The intrinsic version of integer ABS must not be allowed to
831;; combine with any operation with an integerated ABS step, such
832;; as SABD.
833(define_insn "aarch64_abs<mode>"
834  [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
835	  (unspec:VSDQ_I_DI
836	    [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
837	   UNSPEC_ABS))]
838  "TARGET_SIMD"
839  "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
840  [(set_attr "type" "neon_abs<q>")]
841)
842
843;; It's tempting to represent SABD as ABS (MINUS op1 op2).
844;; This isn't accurate as ABS treats always its input as a signed value.
845;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64.
846;; Whereas SABD would return 192 (-64 signed) on the above example.
847;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead.
848(define_insn "aarch64_<su>abd<mode>"
849  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
850	(minus:VDQ_BHSI
851	  (USMAX:VDQ_BHSI
852	    (match_operand:VDQ_BHSI 1 "register_operand" "w")
853	    (match_operand:VDQ_BHSI 2 "register_operand" "w"))
854	  (<max_opp>:VDQ_BHSI
855	    (match_dup 1)
856	    (match_dup 2))))]
857  "TARGET_SIMD"
858  "<su>abd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
859  [(set_attr "type" "neon_abd<q>")]
860)
861
862
863(define_insn "aarch64_<sur>abdl<mode>"
864  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
865	(unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
866			 (match_operand:VD_BHSI 2 "register_operand" "w")]
867	ABDL))]
868  "TARGET_SIMD"
869  "<sur>abdl\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
870  [(set_attr "type" "neon_abd<q>")]
871)
872
873(define_insn "aarch64_<sur>abdl2<mode>"
874  [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
875	(unspec:<VDBLW> [(match_operand:VQW 1 "register_operand" "w")
876			 (match_operand:VQW 2 "register_operand" "w")]
877	ABDL2))]
878  "TARGET_SIMD"
879  "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
880  [(set_attr "type" "neon_abd<q>")]
881)
882
883(define_insn "aarch64_<sur>abal<mode>"
884  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
885	(unspec:<VWIDE> [(match_operand:VD_BHSI 2 "register_operand" "w")
886			  (match_operand:VD_BHSI 3 "register_operand" "w")
887			 (match_operand:<VWIDE> 1 "register_operand" "0")]
888	ABAL))]
889  "TARGET_SIMD"
890  "<sur>abal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
891  [(set_attr "type" "neon_arith_acc<q>")]
892)
893
894(define_insn "aarch64_<sur>abal2<mode>"
895  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
896	(unspec:<VWIDE> [(match_operand:VQW 2 "register_operand" "w")
897			  (match_operand:VQW 3 "register_operand" "w")
898			 (match_operand:<VWIDE> 1 "register_operand" "0")]
899	ABAL2))]
900  "TARGET_SIMD"
901  "<sur>abal2\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
902  [(set_attr "type" "neon_arith_acc<q>")]
903)
904
905(define_insn "aarch64_<sur>adalp<mode>"
906  [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
907	(unspec:<VDBLW> [(match_operand:VDQV_S 2 "register_operand" "w")
908			  (match_operand:<VDBLW> 1 "register_operand" "0")]
909	ADALP))]
910  "TARGET_SIMD"
911  "<sur>adalp\t%0.<Vwhalf>, %2.<Vtype>"
912  [(set_attr "type" "neon_reduc_add<q>")]
913)
914
915;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
916;; inputs in operands 1 and 2.  The sequence also has to perform a widening
917;; reduction of the difference into a V4SI vector and accumulate that into
918;; operand 3 before copying that into the result operand 0.
919;; Perform that with a sequence of:
920;; UABDL2	tmp.8h, op1.16b, op2.16b
921;; UABAL	tmp.8h, op1.8b, op2.8b
922;; UADALP	op3.4s, tmp.8h
923;; MOV		op0, op3 // should be eliminated in later passes.
924;;
925;; For TARGET_DOTPROD we do:
926;; MOV	tmp1.16b, #1 // Can be CSE'd and hoisted out of loops.
927;; UABD	tmp2.16b, op1.16b, op2.16b
928;; UDOT	op3.4s, tmp2.16b, tmp1.16b
929;; MOV	op0, op3 // RA will tie the operands of UDOT appropriately.
930;;
931;; The signed version just uses the signed variants of the above instructions
932;; but for TARGET_DOTPROD still emits a UDOT as the absolute difference is
933;; unsigned.
934
935(define_expand "<sur>sadv16qi"
936  [(use (match_operand:V4SI 0 "register_operand"))
937   (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
938		  (use (match_operand:V16QI 2 "register_operand"))] ABAL)
939   (use (match_operand:V4SI 3 "register_operand"))]
940  "TARGET_SIMD"
941  {
942    if (TARGET_DOTPROD)
943      {
944	rtx ones = force_reg (V16QImode, CONST1_RTX (V16QImode));
945	rtx abd = gen_reg_rtx (V16QImode);
946	emit_insn (gen_aarch64_<sur>abdv16qi (abd, operands[1], operands[2]));
947	emit_insn (gen_aarch64_udotv16qi (operands[0], operands[3],
948					  abd, ones));
949	DONE;
950      }
951    rtx reduc = gen_reg_rtx (V8HImode);
952    emit_insn (gen_aarch64_<sur>abdl2v16qi (reduc, operands[1],
953					    operands[2]));
954    emit_insn (gen_aarch64_<sur>abalv8qi (reduc, reduc,
955					  gen_lowpart (V8QImode, operands[1]),
956					  gen_lowpart (V8QImode,
957						       operands[2])));
958    emit_insn (gen_aarch64_<sur>adalpv8hi (operands[3], operands[3], reduc));
959    emit_move_insn (operands[0], operands[3]);
960    DONE;
961  }
962)
963
964(define_insn "aarch64_<su>aba<mode>"
965  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
966	(plus:VDQ_BHSI (minus:VDQ_BHSI
967			 (USMAX:VDQ_BHSI
968			   (match_operand:VDQ_BHSI 2 "register_operand" "w")
969			   (match_operand:VDQ_BHSI 3 "register_operand" "w"))
970			 (<max_opp>:VDQ_BHSI
971			   (match_dup 2)
972			   (match_dup 3)))
973		       (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
974  "TARGET_SIMD"
975  "<su>aba\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
976  [(set_attr "type" "neon_arith_acc<q>")]
977)
978
979(define_insn "fabd<mode>3"
980  [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
981	(abs:VHSDF_HSDF
982	  (minus:VHSDF_HSDF
983	    (match_operand:VHSDF_HSDF 1 "register_operand" "w")
984	    (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
985  "TARGET_SIMD"
986  "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
987  [(set_attr "type" "neon_fp_abd_<stype><q>")]
988)
989
990;; For AND (vector, register) and BIC (vector, immediate)
991(define_insn "and<mode>3"
992  [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
993	(and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
994		   (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
995  "TARGET_SIMD"
996  {
997    switch (which_alternative)
998      {
999      case 0:
1000	return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
1001      case 1:
1002	return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
1003						  AARCH64_CHECK_BIC);
1004      default:
1005	gcc_unreachable ();
1006      }
1007  }
1008  [(set_attr "type" "neon_logic<q>")]
1009)
1010
1011;; For ORR (vector, register) and ORR (vector, immediate)
1012(define_insn "ior<mode>3"
1013  [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
1014	(ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
1015		   (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
1016  "TARGET_SIMD"
1017  {
1018    switch (which_alternative)
1019      {
1020      case 0:
1021	return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
1022      case 1:
1023	return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
1024						  AARCH64_CHECK_ORR);
1025      default:
1026	gcc_unreachable ();
1027      }
1028  }
1029  [(set_attr "type" "neon_logic<q>")]
1030)
1031
1032(define_insn "xor<mode>3"
1033  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1034        (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1035		 (match_operand:VDQ_I 2 "register_operand" "w")))]
1036  "TARGET_SIMD"
1037  "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
1038  [(set_attr "type" "neon_logic<q>")]
1039)
1040
1041(define_insn "one_cmpl<mode>2"
1042  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1043        (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
1044  "TARGET_SIMD"
1045  "not\t%0.<Vbtype>, %1.<Vbtype>"
1046  [(set_attr "type" "neon_logic<q>")]
1047)
1048
1049(define_insn "aarch64_simd_vec_set<mode>"
1050  [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
1051	(vec_merge:VALL_F16
1052	    (vec_duplicate:VALL_F16
1053		(match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv"))
1054	    (match_operand:VALL_F16 3 "register_operand" "0,0,0")
1055	    (match_operand:SI 2 "immediate_operand" "i,i,i")))]
1056  "TARGET_SIMD"
1057  {
1058   int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1059   operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
1060   switch (which_alternative)
1061     {
1062     case 0:
1063	return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
1064     case 1:
1065	return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
1066     case 2:
1067        return "ld1\\t{%0.<Vetype>}[%p2], %1";
1068     default:
1069	gcc_unreachable ();
1070     }
1071  }
1072  [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
1073)
1074
1075(define_insn "@aarch64_simd_vec_copy_lane<mode>"
1076  [(set (match_operand:VALL_F16 0 "register_operand" "=w")
1077	(vec_merge:VALL_F16
1078	    (vec_duplicate:VALL_F16
1079	      (vec_select:<VEL>
1080		(match_operand:VALL_F16 3 "register_operand" "w")
1081		(parallel
1082		  [(match_operand:SI 4 "immediate_operand" "i")])))
1083	    (match_operand:VALL_F16 1 "register_operand" "0")
1084	    (match_operand:SI 2 "immediate_operand" "i")))]
1085  "TARGET_SIMD"
1086  {
1087    int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1088    operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
1089    operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
1090
1091    return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
1092  }
1093  [(set_attr "type" "neon_ins<q>")]
1094)
1095
1096(define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
1097  [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
1098	(vec_merge:VALL_F16_NO_V2Q
1099	    (vec_duplicate:VALL_F16_NO_V2Q
1100	      (vec_select:<VEL>
1101		(match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
1102		(parallel
1103		  [(match_operand:SI 4 "immediate_operand" "i")])))
1104	    (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
1105	    (match_operand:SI 2 "immediate_operand" "i")))]
1106  "TARGET_SIMD"
1107  {
1108    int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1109    operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
1110    operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
1111					   INTVAL (operands[4]));
1112
1113    return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
1114  }
1115  [(set_attr "type" "neon_ins<q>")]
1116)
1117
1118(define_expand "signbit<mode>2"
1119  [(use (match_operand:<V_INT_EQUIV> 0 "register_operand"))
1120   (use (match_operand:VDQSF 1 "register_operand"))]
1121  "TARGET_SIMD"
1122{
1123  int shift_amount = GET_MODE_UNIT_BITSIZE (<V_INT_EQUIV>mode) - 1;
1124  rtx shift_vector = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
1125                                                        shift_amount);
1126  operands[1] = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
1127
1128  emit_insn (gen_aarch64_simd_lshr<v_int_equiv> (operands[0], operands[1],
1129                                                 shift_vector));
1130  DONE;
1131})
1132
1133(define_insn "aarch64_simd_lshr<mode>"
1134 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1135       (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1136		     (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
1137 "TARGET_SIMD"
1138 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
1139  [(set_attr "type" "neon_shift_imm<q>")]
1140)
1141
1142(define_insn "aarch64_simd_ashr<mode>"
1143 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1144       (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1145		     (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
1146 "TARGET_SIMD"
1147 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
1148  [(set_attr "type" "neon_shift_imm<q>")]
1149)
1150
1151(define_insn "*aarch64_simd_sra<mode>"
1152 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1153	(plus:VDQ_I
1154	   (SHIFTRT:VDQ_I
1155		(match_operand:VDQ_I 1 "register_operand" "w")
1156		(match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr"))
1157	   (match_operand:VDQ_I 3 "register_operand" "0")))]
1158  "TARGET_SIMD"
1159  "<sra_op>sra\t%0.<Vtype>, %1.<Vtype>, %2"
1160  [(set_attr "type" "neon_shift_acc<q>")]
1161)
1162
1163(define_insn "aarch64_simd_imm_shl<mode>"
1164 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1165       (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1166		   (match_operand:VDQ_I  2 "aarch64_simd_lshift_imm" "Dl")))]
1167 "TARGET_SIMD"
1168  "shl\t%0.<Vtype>, %1.<Vtype>, %2"
1169  [(set_attr "type" "neon_shift_imm<q>")]
1170)
1171
1172(define_insn "aarch64_simd_reg_sshl<mode>"
1173 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1174       (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1175		   (match_operand:VDQ_I 2 "register_operand" "w")))]
1176 "TARGET_SIMD"
1177 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1178  [(set_attr "type" "neon_shift_reg<q>")]
1179)
1180
1181(define_insn "aarch64_simd_reg_shl<mode>_unsigned"
1182 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1183       (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1184		    (match_operand:VDQ_I 2 "register_operand" "w")]
1185		   UNSPEC_ASHIFT_UNSIGNED))]
1186 "TARGET_SIMD"
1187 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1188  [(set_attr "type" "neon_shift_reg<q>")]
1189)
1190
1191(define_insn "aarch64_simd_reg_shl<mode>_signed"
1192 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1193       (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1194		    (match_operand:VDQ_I 2 "register_operand" "w")]
1195		   UNSPEC_ASHIFT_SIGNED))]
1196 "TARGET_SIMD"
1197 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1198  [(set_attr "type" "neon_shift_reg<q>")]
1199)
1200
1201(define_expand "ashl<mode>3"
1202  [(match_operand:VDQ_I 0 "register_operand")
1203   (match_operand:VDQ_I 1 "register_operand")
1204   (match_operand:SI  2 "general_operand")]
1205 "TARGET_SIMD"
1206{
1207  int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1208  int shift_amount;
1209
1210  if (CONST_INT_P (operands[2]))
1211    {
1212      shift_amount = INTVAL (operands[2]);
1213      if (shift_amount >= 0 && shift_amount < bit_width)
1214        {
1215	  rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1216						       shift_amount);
1217	  emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
1218						     operands[1],
1219						     tmp));
1220          DONE;
1221        }
1222    }
1223
1224  operands[2] = force_reg (SImode, operands[2]);
1225
1226  rtx tmp = gen_reg_rtx (<MODE>mode);
1227  emit_insn (gen_aarch64_simd_dup<mode> (tmp, convert_to_mode (<VEL>mode,
1228							       operands[2],
1229							       0)));
1230  emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1], tmp));
1231  DONE;
1232})
1233
1234(define_expand "lshr<mode>3"
1235  [(match_operand:VDQ_I 0 "register_operand")
1236   (match_operand:VDQ_I 1 "register_operand")
1237   (match_operand:SI  2 "general_operand")]
1238 "TARGET_SIMD"
1239{
1240  int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1241  int shift_amount;
1242
1243  if (CONST_INT_P (operands[2]))
1244    {
1245      shift_amount = INTVAL (operands[2]);
1246      if (shift_amount > 0 && shift_amount <= bit_width)
1247        {
1248	  rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1249						       shift_amount);
1250          emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
1251						  operands[1],
1252						  tmp));
1253	  DONE;
1254	}
1255    }
1256
1257  operands[2] = force_reg (SImode, operands[2]);
1258
1259  rtx tmp = gen_reg_rtx (SImode);
1260  rtx tmp1 = gen_reg_rtx (<MODE>mode);
1261  emit_insn (gen_negsi2 (tmp, operands[2]));
1262  emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1263					 convert_to_mode (<VEL>mode, tmp, 0)));
1264  emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1265						      tmp1));
1266  DONE;
1267})
1268
1269(define_expand "ashr<mode>3"
1270  [(match_operand:VDQ_I 0 "register_operand")
1271   (match_operand:VDQ_I 1 "register_operand")
1272   (match_operand:SI  2 "general_operand")]
1273 "TARGET_SIMD"
1274{
1275  int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1276  int shift_amount;
1277
1278  if (CONST_INT_P (operands[2]))
1279    {
1280      shift_amount = INTVAL (operands[2]);
1281      if (shift_amount > 0 && shift_amount <= bit_width)
1282        {
1283	  rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1284						       shift_amount);
1285          emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
1286						  operands[1],
1287						  tmp));
1288          DONE;
1289	}
1290    }
1291
1292  operands[2] = force_reg (SImode, operands[2]);
1293
1294  rtx tmp = gen_reg_rtx (SImode);
1295  rtx tmp1 = gen_reg_rtx (<MODE>mode);
1296  emit_insn (gen_negsi2 (tmp, operands[2]));
1297  emit_insn (gen_aarch64_simd_dup<mode> (tmp1, convert_to_mode (<VEL>mode,
1298								tmp, 0)));
1299  emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1300						    tmp1));
1301  DONE;
1302})
1303
1304(define_expand "vashl<mode>3"
1305 [(match_operand:VDQ_I 0 "register_operand")
1306  (match_operand:VDQ_I 1 "register_operand")
1307  (match_operand:VDQ_I 2 "register_operand")]
1308 "TARGET_SIMD"
1309{
1310  emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1311					      operands[2]));
1312  DONE;
1313})
1314
1315;; Using mode VDQ_BHSI as there is no V2DImode neg!
1316;; Negating individual lanes most certainly offsets the
1317;; gain from vectorization.
1318(define_expand "vashr<mode>3"
1319 [(match_operand:VDQ_BHSI 0 "register_operand")
1320  (match_operand:VDQ_BHSI 1 "register_operand")
1321  (match_operand:VDQ_BHSI 2 "register_operand")]
1322 "TARGET_SIMD"
1323{
1324  rtx neg = gen_reg_rtx (<MODE>mode);
1325  emit (gen_neg<mode>2 (neg, operands[2]));
1326  emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1327						    neg));
1328  DONE;
1329})
1330
1331;; DI vector shift
1332(define_expand "aarch64_ashr_simddi"
1333  [(match_operand:DI 0 "register_operand")
1334   (match_operand:DI 1 "register_operand")
1335   (match_operand:SI 2 "aarch64_shift_imm64_di")]
1336  "TARGET_SIMD"
1337  {
1338    /* An arithmetic shift right by 64 fills the result with copies of the sign
1339       bit, just like asr by 63 - however the standard pattern does not handle
1340       a shift by 64.  */
1341    if (INTVAL (operands[2]) == 64)
1342      operands[2] = GEN_INT (63);
1343    emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1344    DONE;
1345  }
1346)
1347
1348(define_expand "vlshr<mode>3"
1349 [(match_operand:VDQ_BHSI 0 "register_operand")
1350  (match_operand:VDQ_BHSI 1 "register_operand")
1351  (match_operand:VDQ_BHSI 2 "register_operand")]
1352 "TARGET_SIMD"
1353{
1354  rtx neg = gen_reg_rtx (<MODE>mode);
1355  emit (gen_neg<mode>2 (neg, operands[2]));
1356  emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1357						      neg));
1358  DONE;
1359})
1360
1361(define_expand "aarch64_lshr_simddi"
1362  [(match_operand:DI 0 "register_operand")
1363   (match_operand:DI 1 "register_operand")
1364   (match_operand:SI 2 "aarch64_shift_imm64_di")]
1365  "TARGET_SIMD"
1366  {
1367    if (INTVAL (operands[2]) == 64)
1368      emit_move_insn (operands[0], const0_rtx);
1369    else
1370      emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1371    DONE;
1372  }
1373)
1374
1375;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1376(define_insn "vec_shr_<mode>"
1377  [(set (match_operand:VD 0 "register_operand" "=w")
1378        (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1379		    (match_operand:SI 2 "immediate_operand" "i")]
1380		   UNSPEC_VEC_SHR))]
1381  "TARGET_SIMD"
1382  {
1383    if (BYTES_BIG_ENDIAN)
1384      return "shl %d0, %d1, %2";
1385    else
1386      return "ushr %d0, %d1, %2";
1387  }
1388  [(set_attr "type" "neon_shift_imm")]
1389)
1390
1391(define_expand "vec_set<mode>"
1392  [(match_operand:VALL_F16 0 "register_operand")
1393   (match_operand:<VEL> 1 "register_operand")
1394   (match_operand:SI 2 "immediate_operand")]
1395  "TARGET_SIMD"
1396  {
1397    HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1398    emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1399					  GEN_INT (elem), operands[0]));
1400    DONE;
1401  }
1402)
1403
1404
1405(define_insn "aarch64_mla<mode>"
1406 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1407       (plus:VDQ_BHSI (mult:VDQ_BHSI
1408			(match_operand:VDQ_BHSI 2 "register_operand" "w")
1409			(match_operand:VDQ_BHSI 3 "register_operand" "w"))
1410		      (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1411 "TARGET_SIMD"
1412 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1413  [(set_attr "type" "neon_mla_<Vetype><q>")]
1414)
1415
1416(define_insn "*aarch64_mla_elt<mode>"
1417 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1418       (plus:VDQHS
1419	 (mult:VDQHS
1420	   (vec_duplicate:VDQHS
1421	      (vec_select:<VEL>
1422		(match_operand:VDQHS 1 "register_operand" "<h_con>")
1423		  (parallel [(match_operand:SI 2 "immediate_operand")])))
1424	   (match_operand:VDQHS 3 "register_operand" "w"))
1425	 (match_operand:VDQHS 4 "register_operand" "0")))]
1426 "TARGET_SIMD"
1427  {
1428    operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1429    return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1430  }
1431  [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1432)
1433
1434(define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1435 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1436       (plus:VDQHS
1437	 (mult:VDQHS
1438	   (vec_duplicate:VDQHS
1439	      (vec_select:<VEL>
1440		(match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1441		  (parallel [(match_operand:SI 2 "immediate_operand")])))
1442	   (match_operand:VDQHS 3 "register_operand" "w"))
1443	 (match_operand:VDQHS 4 "register_operand" "0")))]
1444 "TARGET_SIMD"
1445  {
1446    operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1447    return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1448  }
1449  [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1450)
1451
1452(define_insn "aarch64_mla_n<mode>"
1453 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1454	(plus:VDQHS
1455	  (mult:VDQHS
1456	    (vec_duplicate:VDQHS
1457	      (match_operand:<VEL> 3 "register_operand" "<h_con>"))
1458	    (match_operand:VDQHS 2 "register_operand" "w"))
1459	  (match_operand:VDQHS 1 "register_operand" "0")))]
1460 "TARGET_SIMD"
1461 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[0]"
1462  [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1463)
1464
1465(define_insn "aarch64_mls<mode>"
1466 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1467       (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1468		   (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1469			      (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1470 "TARGET_SIMD"
1471 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1472  [(set_attr "type" "neon_mla_<Vetype><q>")]
1473)
1474
1475(define_insn "*aarch64_mls_elt<mode>"
1476 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1477       (minus:VDQHS
1478	 (match_operand:VDQHS 4 "register_operand" "0")
1479	 (mult:VDQHS
1480	   (vec_duplicate:VDQHS
1481	      (vec_select:<VEL>
1482		(match_operand:VDQHS 1 "register_operand" "<h_con>")
1483		  (parallel [(match_operand:SI 2 "immediate_operand")])))
1484	   (match_operand:VDQHS 3 "register_operand" "w"))))]
1485 "TARGET_SIMD"
1486  {
1487    operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1488    return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1489  }
1490  [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1491)
1492
1493(define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1494 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1495       (minus:VDQHS
1496	 (match_operand:VDQHS 4 "register_operand" "0")
1497	 (mult:VDQHS
1498	   (vec_duplicate:VDQHS
1499	      (vec_select:<VEL>
1500		(match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1501		  (parallel [(match_operand:SI 2 "immediate_operand")])))
1502	   (match_operand:VDQHS 3 "register_operand" "w"))))]
1503 "TARGET_SIMD"
1504  {
1505    operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1506    return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1507  }
1508  [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1509)
1510
1511(define_insn "aarch64_mls_n<mode>"
1512  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1513	(minus:VDQHS
1514	  (match_operand:VDQHS 1 "register_operand" "0")
1515	  (mult:VDQHS
1516	    (vec_duplicate:VDQHS
1517	      (match_operand:<VEL> 3 "register_operand" "<h_con>"))
1518	    (match_operand:VDQHS 2 "register_operand" "w"))))]
1519  "TARGET_SIMD"
1520  "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[0]"
1521  [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1522)
1523
1524;; Max/Min operations.
1525(define_insn "<su><maxmin><mode>3"
1526 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1527       (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1528		    (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1529 "TARGET_SIMD"
1530 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1531  [(set_attr "type" "neon_minmax<q>")]
1532)
1533
1534(define_expand "<su><maxmin>v2di3"
1535 [(set (match_operand:V2DI 0 "register_operand")
1536       (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand")
1537                    (match_operand:V2DI 2 "register_operand")))]
1538 "TARGET_SIMD"
1539{
1540  enum rtx_code cmp_operator;
1541  rtx cmp_fmt;
1542
1543  switch (<CODE>)
1544    {
1545    case UMIN:
1546      cmp_operator = LTU;
1547      break;
1548    case SMIN:
1549      cmp_operator = LT;
1550      break;
1551    case UMAX:
1552      cmp_operator = GTU;
1553      break;
1554    case SMAX:
1555      cmp_operator = GT;
1556      break;
1557    default:
1558      gcc_unreachable ();
1559    }
1560
1561  cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1562  emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1563              operands[2], cmp_fmt, operands[1], operands[2]));
1564  DONE;
1565})
1566
1567;; Pairwise Integer Max/Min operations.
1568(define_insn "aarch64_<maxmin_uns>p<mode>"
1569 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1570       (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1571			 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1572			MAXMINV))]
1573 "TARGET_SIMD"
1574 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1575  [(set_attr "type" "neon_minmax<q>")]
1576)
1577
1578;; Pairwise FP Max/Min operations.
1579(define_insn "aarch64_<maxmin_uns>p<mode>"
1580 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1581       (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1582		      (match_operand:VHSDF 2 "register_operand" "w")]
1583		      FMAXMINV))]
1584 "TARGET_SIMD"
1585 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1586  [(set_attr "type" "neon_minmax<q>")]
1587)
1588
1589;; vec_concat gives a new vector with the low elements from operand 1, and
1590;; the high elements from operand 2.  That is to say, given op1 = { a, b }
1591;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1592;; What that means, is that the RTL descriptions of the below patterns
1593;; need to change depending on endianness.
1594
1595;; Move to the low architectural bits of the register.
1596;; On little-endian this is { operand, zeroes }
1597;; On big-endian this is { zeroes, operand }
1598
1599(define_insn "move_lo_quad_internal_<mode>"
1600  [(set (match_operand:VQMOV 0 "register_operand" "=w,w,w")
1601	(vec_concat:VQMOV
1602	  (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1603	  (match_operand:<VHALF> 2 "aarch64_simd_or_scalar_imm_zero")))]
1604  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1605  "@
1606   dup\\t%d0, %1.d[0]
1607   fmov\\t%d0, %1
1608   dup\\t%d0, %1"
1609  [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1610   (set_attr "length" "4")
1611   (set_attr "arch" "simd,fp,simd")]
1612)
1613
1614(define_insn "move_lo_quad_internal_be_<mode>"
1615  [(set (match_operand:VQMOV 0 "register_operand" "=w,w,w")
1616	(vec_concat:VQMOV
1617	  (match_operand:<VHALF> 2 "aarch64_simd_or_scalar_imm_zero")
1618	  (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1619  "TARGET_SIMD && BYTES_BIG_ENDIAN"
1620  "@
1621   dup\\t%d0, %1.d[0]
1622   fmov\\t%d0, %1
1623   dup\\t%d0, %1"
1624  [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1625   (set_attr "length" "4")
1626   (set_attr "arch" "simd,fp,simd")]
1627)
1628
1629(define_expand "move_lo_quad_<mode>"
1630  [(match_operand:VQMOV 0 "register_operand")
1631   (match_operand:<VHALF> 1 "register_operand")]
1632  "TARGET_SIMD"
1633{
1634  rtx zs = CONST0_RTX (<VHALF>mode);
1635  if (BYTES_BIG_ENDIAN)
1636    emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1], zs));
1637  else
1638    emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1], zs));
1639  DONE;
1640}
1641)
1642
1643;; Move operand1 to the high architectural bits of the register, keeping
1644;; the low architectural bits of operand2.
1645;; For little-endian this is { operand2, operand1 }
1646;; For big-endian this is { operand1, operand2 }
1647
1648(define_insn "aarch64_simd_move_hi_quad_<mode>"
1649  [(set (match_operand:VQMOV 0 "register_operand" "+w,w")
1650        (vec_concat:VQMOV
1651          (vec_select:<VHALF>
1652                (match_dup 0)
1653                (match_operand:VQMOV 2 "vect_par_cnst_lo_half" ""))
1654	  (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1655  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1656  "@
1657   ins\\t%0.d[1], %1.d[0]
1658   ins\\t%0.d[1], %1"
1659  [(set_attr "type" "neon_ins")]
1660)
1661
1662(define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1663  [(set (match_operand:VQMOV 0 "register_operand" "+w,w")
1664        (vec_concat:VQMOV
1665	  (match_operand:<VHALF> 1 "register_operand" "w,r")
1666          (vec_select:<VHALF>
1667                (match_dup 0)
1668                (match_operand:VQMOV 2 "vect_par_cnst_lo_half" ""))))]
1669  "TARGET_SIMD && BYTES_BIG_ENDIAN"
1670  "@
1671   ins\\t%0.d[1], %1.d[0]
1672   ins\\t%0.d[1], %1"
1673  [(set_attr "type" "neon_ins")]
1674)
1675
1676(define_expand "move_hi_quad_<mode>"
1677 [(match_operand:VQMOV 0 "register_operand")
1678  (match_operand:<VHALF> 1 "register_operand")]
1679 "TARGET_SIMD"
1680{
1681  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1682  if (BYTES_BIG_ENDIAN)
1683    emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1684		    operands[1], p));
1685  else
1686    emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1687		    operands[1], p));
1688  DONE;
1689})
1690
1691;; Narrowing operations.
1692
1693;; For doubles.
1694(define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1695 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1696       (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1697 "TARGET_SIMD"
1698 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1699  [(set_attr "type" "neon_shift_imm_narrow_q")]
1700)
1701
1702(define_expand "vec_pack_trunc_<mode>"
1703 [(match_operand:<VNARROWD> 0 "register_operand")
1704  (match_operand:VDN 1 "register_operand")
1705  (match_operand:VDN 2 "register_operand")]
1706 "TARGET_SIMD"
1707{
1708  rtx tempreg = gen_reg_rtx (<VDBL>mode);
1709  int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1710  int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1711
1712  emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1713  emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1714  emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1715  DONE;
1716})
1717
1718(define_insn "aarch64_shrn<mode>_insn_le"
1719  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1720	(vec_concat:<VNARROWQ2>
1721	  (truncate:<VNARROWQ>
1722	    (lshiftrt:VQN (match_operand:VQN 1 "register_operand" "w")
1723	      (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")))
1724	  (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))]
1725  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1726  "shrn\\t%0.<Vntype>, %1.<Vtype>, %2"
1727  [(set_attr "type" "neon_shift_imm_narrow_q")]
1728)
1729
1730(define_insn "aarch64_shrn<mode>_insn_be"
1731  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1732	(vec_concat:<VNARROWQ2>
1733	  (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")
1734	  (truncate:<VNARROWQ>
1735	    (lshiftrt:VQN (match_operand:VQN 1 "register_operand" "w")
1736	      (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
1737  "TARGET_SIMD && BYTES_BIG_ENDIAN"
1738  "shrn\\t%0.<Vntype>, %1.<Vtype>, %2"
1739  [(set_attr "type" "neon_shift_imm_narrow_q")]
1740)
1741
1742(define_expand "aarch64_shrn<mode>"
1743  [(set (match_operand:<VNARROWQ> 0 "register_operand")
1744	(truncate:<VNARROWQ>
1745	  (lshiftrt:VQN (match_operand:VQN 1 "register_operand")
1746	    (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>"))))]
1747  "TARGET_SIMD"
1748  {
1749    operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1750						 INTVAL (operands[2]));
1751    rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
1752    if (BYTES_BIG_ENDIAN)
1753      emit_insn (gen_aarch64_shrn<mode>_insn_be (tmp, operands[1],
1754				operands[2], CONST0_RTX (<VNARROWQ>mode)));
1755    else
1756      emit_insn (gen_aarch64_shrn<mode>_insn_le (tmp, operands[1],
1757				operands[2], CONST0_RTX (<VNARROWQ>mode)));
1758
1759    /* The intrinsic expects a narrow result, so emit a subreg that will get
1760       optimized away as appropriate.  */
1761    emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
1762						 <VNARROWQ2>mode));
1763    DONE;
1764  }
1765)
1766
1767(define_insn "aarch64_rshrn<mode>_insn_le"
1768  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1769	(vec_concat:<VNARROWQ2>
1770	  (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
1771		(match_operand:VQN 2
1772		  "aarch64_simd_shift_imm_vec_<vn_mode>")] UNSPEC_RSHRN)
1773	  (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))]
1774  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1775  "rshrn\\t%0.<Vntype>, %1.<Vtype>, %2"
1776  [(set_attr "type" "neon_shift_imm_narrow_q")]
1777)
1778
1779(define_insn "aarch64_rshrn<mode>_insn_be"
1780  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1781	(vec_concat:<VNARROWQ2>
1782	  (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")
1783	  (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
1784		(match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")]
1785		  UNSPEC_RSHRN)))]
1786  "TARGET_SIMD && BYTES_BIG_ENDIAN"
1787  "rshrn\\t%0.<Vntype>, %1.<Vtype>, %2"
1788  [(set_attr "type" "neon_shift_imm_narrow_q")]
1789)
1790
1791(define_expand "aarch64_rshrn<mode>"
1792  [(match_operand:<VNARROWQ> 0 "register_operand")
1793   (match_operand:VQN 1 "register_operand")
1794   (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>")]
1795  "TARGET_SIMD"
1796  {
1797    operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1798						 INTVAL (operands[2]));
1799    rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
1800    if (BYTES_BIG_ENDIAN)
1801      emit_insn (gen_aarch64_rshrn<mode>_insn_be (tmp, operands[1],
1802				operands[2], CONST0_RTX (<VNARROWQ>mode)));
1803    else
1804      emit_insn (gen_aarch64_rshrn<mode>_insn_le (tmp, operands[1],
1805				operands[2], CONST0_RTX (<VNARROWQ>mode)));
1806
1807    /* The intrinsic expects a narrow result, so emit a subreg that will get
1808       optimized away as appropriate.  */
1809    emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
1810						 <VNARROWQ2>mode));
1811    DONE;
1812  }
1813)
1814
1815(define_insn "aarch64_shrn2<mode>_insn_le"
1816  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1817	(vec_concat:<VNARROWQ2>
1818	  (match_operand:<VNARROWQ> 1 "register_operand" "0")
1819	  (truncate:<VNARROWQ>
1820	    (lshiftrt:VQN (match_operand:VQN 2 "register_operand" "w")
1821	      (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
1822  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1823  "shrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
1824  [(set_attr "type" "neon_shift_imm_narrow_q")]
1825)
1826
1827(define_insn "aarch64_shrn2<mode>_insn_be"
1828  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1829	(vec_concat:<VNARROWQ2>
1830	  (truncate:<VNARROWQ>
1831	    (lshiftrt:VQN (match_operand:VQN 2 "register_operand" "w")
1832	      (match_operand:VQN 3
1833		"aarch64_simd_shift_imm_vec_<vn_mode>")))
1834	  (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
1835  "TARGET_SIMD && BYTES_BIG_ENDIAN"
1836  "shrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
1837  [(set_attr "type" "neon_shift_imm_narrow_q")]
1838)
1839
1840(define_expand "aarch64_shrn2<mode>"
1841  [(match_operand:<VNARROWQ2> 0 "register_operand")
1842   (match_operand:<VNARROWQ> 1 "register_operand")
1843   (match_operand:VQN 2 "register_operand")
1844   (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
1845  "TARGET_SIMD"
1846  {
1847    operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1848						 INTVAL (operands[3]));
1849    if (BYTES_BIG_ENDIAN)
1850      emit_insn (gen_aarch64_shrn2<mode>_insn_be (operands[0], operands[1],
1851						  operands[2], operands[3]));
1852    else
1853      emit_insn (gen_aarch64_shrn2<mode>_insn_le (operands[0], operands[1],
1854						  operands[2], operands[3]));
1855    DONE;
1856  }
1857)
1858
1859(define_insn "aarch64_rshrn2<mode>_insn_le"
1860  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1861	(vec_concat:<VNARROWQ2>
1862	  (match_operand:<VNARROWQ> 1 "register_operand" "0")
1863	  (unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w")
1864	    (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")]
1865		UNSPEC_RSHRN)))]
1866  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1867  "rshrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
1868  [(set_attr "type" "neon_shift_imm_narrow_q")]
1869)
1870
1871(define_insn "aarch64_rshrn2<mode>_insn_be"
1872  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1873	(vec_concat:<VNARROWQ2>
1874	  (unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w")
1875		(match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")]
1876		  UNSPEC_RSHRN)
1877	  (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
1878  "TARGET_SIMD && BYTES_BIG_ENDIAN"
1879  "rshrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
1880  [(set_attr "type" "neon_shift_imm_narrow_q")]
1881)
1882
1883(define_expand "aarch64_rshrn2<mode>"
1884  [(match_operand:<VNARROWQ2> 0 "register_operand")
1885   (match_operand:<VNARROWQ> 1 "register_operand")
1886   (match_operand:VQN 2 "register_operand")
1887   (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
1888  "TARGET_SIMD"
1889  {
1890    operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1891						 INTVAL (operands[3]));
1892    if (BYTES_BIG_ENDIAN)
1893      emit_insn (gen_aarch64_rshrn2<mode>_insn_be (operands[0], operands[1],
1894						  operands[2], operands[3]));
1895    else
1896      emit_insn (gen_aarch64_rshrn2<mode>_insn_le (operands[0], operands[1],
1897						  operands[2], operands[3]));
1898    DONE;
1899  }
1900)
1901
1902;; For quads.
1903
1904(define_insn "vec_pack_trunc_<mode>"
1905 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1906       (vec_concat:<VNARROWQ2>
1907	 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1908	 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1909 "TARGET_SIMD"
1910 {
1911   if (BYTES_BIG_ENDIAN)
1912     return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1913   else
1914     return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1915 }
1916  [(set_attr "type" "multiple")
1917   (set_attr "length" "8")]
1918)
1919
1920;; Widening operations.
1921
1922(define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1923  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1924        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1925			       (match_operand:VQW 1 "register_operand" "w")
1926			       (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1927			    )))]
1928  "TARGET_SIMD"
1929  "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>"
1930  [(set_attr "type" "neon_shift_imm_long")]
1931)
1932
1933(define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1934  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1935        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1936			       (match_operand:VQW 1 "register_operand" "w")
1937			       (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1938			    )))]
1939  "TARGET_SIMD"
1940  "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
1941  [(set_attr "type" "neon_shift_imm_long")]
1942)
1943
1944(define_expand "vec_unpack<su>_hi_<mode>"
1945  [(match_operand:<VWIDE> 0 "register_operand")
1946   (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1947  "TARGET_SIMD"
1948  {
1949    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1950    emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1951							  operands[1], p));
1952    DONE;
1953  }
1954)
1955
1956(define_expand "vec_unpack<su>_lo_<mode>"
1957  [(match_operand:<VWIDE> 0 "register_operand")
1958   (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1959  "TARGET_SIMD"
1960  {
1961    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1962    emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1963							  operands[1], p));
1964    DONE;
1965  }
1966)
1967
1968;; Widening arithmetic.
1969
1970(define_insn "*aarch64_<su>mlal_lo<mode>"
1971  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1972        (plus:<VWIDE>
1973          (mult:<VWIDE>
1974              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1975                 (match_operand:VQW 2 "register_operand" "w")
1976                 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1977              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1978                 (match_operand:VQW 4 "register_operand" "w")
1979                 (match_dup 3))))
1980          (match_operand:<VWIDE> 1 "register_operand" "0")))]
1981  "TARGET_SIMD"
1982  "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1983  [(set_attr "type" "neon_mla_<Vetype>_long")]
1984)
1985
1986(define_insn "aarch64_<su>mlal_hi<mode>_insn"
1987  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1988        (plus:<VWIDE>
1989          (mult:<VWIDE>
1990              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1991                 (match_operand:VQW 2 "register_operand" "w")
1992                 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1993              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1994                 (match_operand:VQW 4 "register_operand" "w")
1995                 (match_dup 3))))
1996          (match_operand:<VWIDE> 1 "register_operand" "0")))]
1997  "TARGET_SIMD"
1998  "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1999  [(set_attr "type" "neon_mla_<Vetype>_long")]
2000)
2001
2002(define_expand "aarch64_<su>mlal_hi<mode>"
2003  [(match_operand:<VWIDE> 0 "register_operand")
2004   (match_operand:<VWIDE> 1 "register_operand")
2005   (ANY_EXTEND:<VWIDE>(match_operand:VQW 2 "register_operand"))
2006   (match_operand:VQW 3 "register_operand")]
2007  "TARGET_SIMD"
2008{
2009  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2010  emit_insn (gen_aarch64_<su>mlal_hi<mode>_insn (operands[0], operands[1],
2011						 operands[2], p, operands[3]));
2012  DONE;
2013}
2014)
2015
2016(define_insn "aarch64_<su>mlal_hi_n<mode>_insn"
2017  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2018        (plus:<VWIDE>
2019          (mult:<VWIDE>
2020              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2021                 (match_operand:VQ_HSI 2 "register_operand" "w")
2022                 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2023              (ANY_EXTEND:<VWIDE> (vec_duplicate:<VCOND>
2024	               (match_operand:<VEL> 4 "register_operand" "<h_con>"))))
2025          (match_operand:<VWIDE> 1 "register_operand" "0")))]
2026  "TARGET_SIMD"
2027  "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[0]"
2028  [(set_attr "type" "neon_mla_<Vetype>_long")]
2029)
2030
2031(define_expand "aarch64_<su>mlal_hi_n<mode>"
2032  [(match_operand:<VWIDE> 0 "register_operand")
2033   (match_operand:<VWIDE> 1 "register_operand")
2034   (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2035   (match_operand:<VEL> 3 "register_operand")]
2036  "TARGET_SIMD"
2037{
2038  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2039  emit_insn (gen_aarch64_<su>mlal_hi_n<mode>_insn (operands[0],
2040             operands[1], operands[2], p, operands[3]));
2041  DONE;
2042}
2043)
2044
2045(define_insn "*aarch64_<su>mlsl_lo<mode>"
2046  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2047        (minus:<VWIDE>
2048          (match_operand:<VWIDE> 1 "register_operand" "0")
2049          (mult:<VWIDE>
2050              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2051                 (match_operand:VQW 2 "register_operand" "w")
2052                 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2053              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2054                 (match_operand:VQW 4 "register_operand" "w")
2055                 (match_dup 3))))))]
2056  "TARGET_SIMD"
2057  "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
2058  [(set_attr "type" "neon_mla_<Vetype>_long")]
2059)
2060
2061(define_insn "aarch64_<su>mlsl_hi<mode>_insn"
2062  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2063        (minus:<VWIDE>
2064          (match_operand:<VWIDE> 1 "register_operand" "0")
2065          (mult:<VWIDE>
2066              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2067                 (match_operand:VQW 2 "register_operand" "w")
2068                 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2069              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2070                 (match_operand:VQW 4 "register_operand" "w")
2071                 (match_dup 3))))))]
2072  "TARGET_SIMD"
2073  "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
2074  [(set_attr "type" "neon_mla_<Vetype>_long")]
2075)
2076
2077(define_expand "aarch64_<su>mlsl_hi<mode>"
2078  [(match_operand:<VWIDE> 0 "register_operand")
2079   (match_operand:<VWIDE> 1 "register_operand")
2080   (ANY_EXTEND:<VWIDE>(match_operand:VQW 2 "register_operand"))
2081   (match_operand:VQW 3 "register_operand")]
2082  "TARGET_SIMD"
2083{
2084  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2085  emit_insn (gen_aarch64_<su>mlsl_hi<mode>_insn (operands[0], operands[1],
2086						 operands[2], p, operands[3]));
2087  DONE;
2088}
2089)
2090
2091(define_insn "aarch64_<su>mlsl_hi_n<mode>_insn"
2092  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2093        (minus:<VWIDE>
2094          (match_operand:<VWIDE> 1 "register_operand" "0")
2095          (mult:<VWIDE>
2096            (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2097              (match_operand:VQ_HSI 2 "register_operand" "w")
2098              (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2099            (ANY_EXTEND:<VWIDE> (vec_duplicate:<VCOND>
2100	            (match_operand:<VEL> 4 "register_operand" "<h_con>"))))))]
2101  "TARGET_SIMD"
2102  "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[0]"
2103  [(set_attr "type" "neon_mla_<Vetype>_long")]
2104)
2105
2106(define_expand "aarch64_<su>mlsl_hi_n<mode>"
2107  [(match_operand:<VWIDE> 0 "register_operand")
2108   (match_operand:<VWIDE> 1 "register_operand")
2109   (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2110   (match_operand:<VEL> 3 "register_operand")]
2111  "TARGET_SIMD"
2112{
2113  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2114  emit_insn (gen_aarch64_<su>mlsl_hi_n<mode>_insn (operands[0],
2115             operands[1], operands[2], p, operands[3]));
2116  DONE;
2117}
2118)
2119
2120(define_insn "aarch64_<su>mlal<mode>"
2121  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2122        (plus:<VWIDE>
2123          (mult:<VWIDE>
2124            (ANY_EXTEND:<VWIDE>
2125              (match_operand:VD_BHSI 2 "register_operand" "w"))
2126            (ANY_EXTEND:<VWIDE>
2127              (match_operand:VD_BHSI 3 "register_operand" "w")))
2128          (match_operand:<VWIDE> 1 "register_operand" "0")))]
2129  "TARGET_SIMD"
2130  "<su>mlal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
2131  [(set_attr "type" "neon_mla_<Vetype>_long")]
2132)
2133
2134(define_insn "aarch64_<su>mlal_n<mode>"
2135  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2136        (plus:<VWIDE>
2137          (mult:<VWIDE>
2138            (ANY_EXTEND:<VWIDE>
2139              (match_operand:VD_HSI 2 "register_operand" "w"))
2140            (ANY_EXTEND:<VWIDE>
2141              (vec_duplicate:VD_HSI
2142	              (match_operand:<VEL> 3 "register_operand" "<h_con>"))))
2143          (match_operand:<VWIDE> 1 "register_operand" "0")))]
2144  "TARGET_SIMD"
2145  "<su>mlal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vetype>[0]"
2146  [(set_attr "type" "neon_mla_<Vetype>_long")]
2147)
2148
2149(define_insn "aarch64_<su>mlsl<mode>"
2150  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2151        (minus:<VWIDE>
2152          (match_operand:<VWIDE> 1 "register_operand" "0")
2153          (mult:<VWIDE>
2154            (ANY_EXTEND:<VWIDE>
2155              (match_operand:VD_BHSI 2 "register_operand" "w"))
2156            (ANY_EXTEND:<VWIDE>
2157              (match_operand:VD_BHSI 3 "register_operand" "w")))))]
2158  "TARGET_SIMD"
2159  "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
2160  [(set_attr "type" "neon_mla_<Vetype>_long")]
2161)
2162
2163(define_insn "aarch64_<su>mlsl_n<mode>"
2164  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2165        (minus:<VWIDE>
2166          (match_operand:<VWIDE> 1 "register_operand" "0")
2167          (mult:<VWIDE>
2168            (ANY_EXTEND:<VWIDE>
2169              (match_operand:VD_HSI 2 "register_operand" "w"))
2170            (ANY_EXTEND:<VWIDE>
2171              (vec_duplicate:VD_HSI
2172	              (match_operand:<VEL> 3 "register_operand" "<h_con>"))))))]
2173  "TARGET_SIMD"
2174  "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vetype>[0]"
2175  [(set_attr "type" "neon_mla_<Vetype>_long")]
2176)
2177
2178(define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
2179 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2180       (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2181			   (match_operand:VQW 1 "register_operand" "w")
2182                           (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2183		     (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2184                           (match_operand:VQW 2 "register_operand" "w")
2185                           (match_dup 3)))))]
2186  "TARGET_SIMD"
2187  "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
2188  [(set_attr "type" "neon_mul_<Vetype>_long")]
2189)
2190
2191(define_insn "aarch64_intrinsic_vec_<su>mult_lo_<mode>"
2192  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2193	(mult:<VWIDE> (ANY_EXTEND:<VWIDE>
2194			 (match_operand:VD_BHSI 1 "register_operand" "w"))
2195		      (ANY_EXTEND:<VWIDE>
2196			 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
2197  "TARGET_SIMD"
2198  "<su>mull\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2199  [(set_attr "type" "neon_mul_<Vetype>_long")]
2200)
2201
2202(define_expand "vec_widen_<su>mult_lo_<mode>"
2203  [(match_operand:<VWIDE> 0 "register_operand")
2204   (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
2205   (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
2206 "TARGET_SIMD"
2207 {
2208   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2209   emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
2210						       operands[1],
2211						       operands[2], p));
2212   DONE;
2213 }
2214)
2215
2216(define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
2217 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2218      (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2219			    (match_operand:VQW 1 "register_operand" "w")
2220			    (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2221		    (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2222			    (match_operand:VQW 2 "register_operand" "w")
2223			    (match_dup 3)))))]
2224  "TARGET_SIMD"
2225  "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2226  [(set_attr "type" "neon_mul_<Vetype>_long")]
2227)
2228
2229(define_expand "vec_widen_<su>mult_hi_<mode>"
2230  [(match_operand:<VWIDE> 0 "register_operand")
2231   (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
2232   (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
2233 "TARGET_SIMD"
2234 {
2235   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2236   emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
2237						       operands[1],
2238						       operands[2], p));
2239   DONE;
2240
2241 }
2242)
2243
2244;; vmull_lane_s16 intrinsics
2245(define_insn "aarch64_vec_<su>mult_lane<Qlane>"
2246  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2247	(mult:<VWIDE>
2248	  (ANY_EXTEND:<VWIDE>
2249	    (match_operand:<VCOND> 1 "register_operand" "w"))
2250	  (ANY_EXTEND:<VWIDE>
2251	    (vec_duplicate:<VCOND>
2252	      (vec_select:<VEL>
2253		(match_operand:VDQHS 2 "register_operand" "<vwx>")
2254		(parallel [(match_operand:SI 3 "immediate_operand" "i")]))))))]
2255  "TARGET_SIMD"
2256  {
2257    operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
2258    return "<su>mull\\t%0.<Vwtype>, %1.<Vcondtype>, %2.<Vetype>[%3]";
2259  }
2260  [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2261)
2262
2263(define_insn "aarch64_<su>mull_hi_lane<mode>_insn"
2264  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2265	(mult:<VWIDE>
2266	  (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2267	    (match_operand:VQ_HSI 1 "register_operand" "w")
2268	    (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" "")))
2269	  (ANY_EXTEND:<VWIDE> (vec_duplicate:<VHALF>
2270	    (vec_select:<VEL>
2271	      (match_operand:<VCOND> 3 "register_operand" "<vwx>")
2272	      (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))]
2273  "TARGET_SIMD"
2274  {
2275    operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
2276    return "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %3.<Vetype>[%4]";
2277  }
2278  [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2279)
2280
2281(define_expand "aarch64_<su>mull_hi_lane<mode>"
2282  [(match_operand:<VWIDE> 0 "register_operand")
2283   (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 1 "register_operand"))
2284   (match_operand:<VCOND> 2 "register_operand")
2285   (match_operand:SI 3 "immediate_operand")]
2286  "TARGET_SIMD"
2287{
2288  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2289  emit_insn (gen_aarch64_<su>mull_hi_lane<mode>_insn (operands[0],
2290	     operands[1], p, operands[2], operands[3]));
2291  DONE;
2292}
2293)
2294
2295(define_insn "aarch64_<su>mull_hi_laneq<mode>_insn"
2296  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2297	(mult:<VWIDE>
2298	  (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2299	    (match_operand:VQ_HSI 1 "register_operand" "w")
2300	    (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" "")))
2301	  (ANY_EXTEND:<VWIDE> (vec_duplicate:<VHALF>
2302	    (vec_select:<VEL>
2303	      (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
2304	      (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))]
2305  "TARGET_SIMD"
2306  {
2307    operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
2308    return "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %3.<Vetype>[%4]";
2309  }
2310  [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2311)
2312
2313(define_expand "aarch64_<su>mull_hi_laneq<mode>"
2314  [(match_operand:<VWIDE> 0 "register_operand")
2315   (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 1 "register_operand"))
2316   (match_operand:<VCONQ> 2 "register_operand")
2317   (match_operand:SI 3 "immediate_operand")]
2318  "TARGET_SIMD"
2319{
2320  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2321  emit_insn (gen_aarch64_<su>mull_hi_laneq<mode>_insn (operands[0],
2322	     operands[1], p, operands[2], operands[3]));
2323  DONE;
2324}
2325)
2326
2327(define_insn "aarch64_<su>mull_n<mode>"
2328  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2329        (mult:<VWIDE>
2330          (ANY_EXTEND:<VWIDE>
2331            (match_operand:VD_HSI 1 "register_operand" "w"))
2332          (ANY_EXTEND:<VWIDE>
2333            (vec_duplicate:<VCOND>
2334	      (match_operand:<VEL> 2 "register_operand" "<h_con>")))))]
2335  "TARGET_SIMD"
2336  "<su>mull\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[0]"
2337  [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2338)
2339
2340(define_insn "aarch64_<su>mull_hi_n<mode>_insn"
2341  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2342	(mult:<VWIDE>
2343	  (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2344	    (match_operand:VQ_HSI 1 "register_operand" "w")
2345	    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2346	  (ANY_EXTEND:<VWIDE>
2347	    (vec_duplicate:<VCOND>
2348	      (match_operand:<VEL> 2 "register_operand" "<h_con>")))))]
2349  "TARGET_SIMD"
2350  "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[0]"
2351  [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2352)
2353
2354(define_expand "aarch64_<su>mull_hi_n<mode>"
2355  [(match_operand:<VWIDE> 0 "register_operand")
2356   (ANY_EXTEND:<VWIDE> (match_operand:VQ_HSI 1 "register_operand"))
2357   (match_operand:<VEL> 2 "register_operand")]
2358 "TARGET_SIMD"
2359 {
2360   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2361   emit_insn (gen_aarch64_<su>mull_hi_n<mode>_insn (operands[0], operands[1],
2362						    operands[2], p));
2363   DONE;
2364 }
2365)
2366
2367;; vmlal_lane_s16 intrinsics
2368(define_insn "aarch64_vec_<su>mlal_lane<Qlane>"
2369  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2370	(plus:<VWIDE>
2371	  (mult:<VWIDE>
2372	    (ANY_EXTEND:<VWIDE>
2373	      (match_operand:<VCOND> 2 "register_operand" "w"))
2374	    (ANY_EXTEND:<VWIDE>
2375	      (vec_duplicate:<VCOND>
2376		(vec_select:<VEL>
2377		  (match_operand:VDQHS 3 "register_operand" "<vwx>")
2378		  (parallel [(match_operand:SI 4 "immediate_operand" "i")])))))
2379	  (match_operand:<VWIDE> 1 "register_operand" "0")))]
2380  "TARGET_SIMD"
2381  {
2382    operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
2383    return "<su>mlal\\t%0.<Vwtype>, %2.<Vcondtype>, %3.<Vetype>[%4]";
2384  }
2385  [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2386)
2387
2388(define_insn "aarch64_<su>mlal_hi_lane<mode>_insn"
2389  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2390	(plus:<VWIDE>
2391	  (mult:<VWIDE>
2392	    (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2393	      (match_operand:VQ_HSI 2 "register_operand" "w")
2394	      (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2395	    (ANY_EXTEND:<VWIDE> (vec_duplicate:<VHALF>
2396	      (vec_select:<VEL>
2397		(match_operand:<VCOND> 4 "register_operand" "<vwx>")
2398		(parallel [(match_operand:SI 5 "immediate_operand" "i")])))))
2399	  (match_operand:<VWIDE> 1 "register_operand" "0")))]
2400  "TARGET_SIMD"
2401  {
2402    operands[5] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[5]));
2403    return "<su>mlal2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2404  }
2405  [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2406)
2407
2408(define_expand "aarch64_<su>mlal_hi_lane<mode>"
2409  [(match_operand:<VWIDE> 0 "register_operand")
2410   (match_operand:<VWIDE> 1 "register_operand")
2411   (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2412   (match_operand:<VCOND> 3 "register_operand")
2413   (match_operand:SI 4 "immediate_operand")]
2414  "TARGET_SIMD"
2415{
2416  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2417  emit_insn (gen_aarch64_<su>mlal_hi_lane<mode>_insn (operands[0],
2418	     operands[1], operands[2], p, operands[3], operands[4]));
2419  DONE;
2420}
2421)
2422
2423(define_insn "aarch64_<su>mlal_hi_laneq<mode>_insn"
2424  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2425	(plus:<VWIDE>
2426	  (mult:<VWIDE>
2427	    (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2428	      (match_operand:VQ_HSI 2 "register_operand" "w")
2429	      (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2430	    (ANY_EXTEND:<VWIDE> (vec_duplicate:<VHALF>
2431	      (vec_select:<VEL>
2432		(match_operand:<VCONQ> 4 "register_operand" "<vwx>")
2433		(parallel [(match_operand:SI 5 "immediate_operand" "i")])))))
2434	  (match_operand:<VWIDE> 1 "register_operand" "0")))]
2435  "TARGET_SIMD"
2436  {
2437    operands[5] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[5]));
2438    return "<su>mlal2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2439  }
2440  [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2441)
2442
2443(define_expand "aarch64_<su>mlal_hi_laneq<mode>"
2444  [(match_operand:<VWIDE> 0 "register_operand")
2445   (match_operand:<VWIDE> 1 "register_operand")
2446   (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2447   (match_operand:<VCONQ> 3 "register_operand")
2448   (match_operand:SI 4 "immediate_operand")]
2449  "TARGET_SIMD"
2450{
2451  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2452  emit_insn (gen_aarch64_<su>mlal_hi_laneq<mode>_insn (operands[0],
2453	     operands[1], operands[2], p, operands[3], operands[4]));
2454  DONE;
2455}
2456)
2457
2458(define_insn "aarch64_vec_<su>mlsl_lane<Qlane>"
2459  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2460   (minus:<VWIDE>
2461     (match_operand:<VWIDE> 1 "register_operand" "0")
2462     (mult:<VWIDE>
2463       (ANY_EXTEND:<VWIDE>
2464	 (match_operand:<VCOND> 2 "register_operand" "w"))
2465       (ANY_EXTEND:<VWIDE>
2466	 (vec_duplicate:<VCOND>
2467	   (vec_select:<VEL>
2468	     (match_operand:VDQHS 3 "register_operand" "<vwx>")
2469	     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))))))]
2470  "TARGET_SIMD"
2471  {
2472    operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
2473    return "<su>mlsl\\t%0.<Vwtype>, %2.<Vcondtype>, %3.<Vetype>[%4]";
2474  }
2475  [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2476)
2477
2478(define_insn "aarch64_<su>mlsl_hi_lane<mode>_insn"
2479  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2480	(minus:<VWIDE>
2481	  (match_operand:<VWIDE> 1 "register_operand" "0")
2482	  (mult:<VWIDE>
2483	    (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2484	      (match_operand:VQ_HSI 2 "register_operand" "w")
2485	      (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2486	    (ANY_EXTEND:<VWIDE> (vec_duplicate:<VHALF>
2487	      (vec_select:<VEL>
2488		(match_operand:<VCOND> 4 "register_operand" "<vwx>")
2489		(parallel [(match_operand:SI 5 "immediate_operand" "i")]))))
2490	  )))]
2491  "TARGET_SIMD"
2492  {
2493    operands[5] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[5]));
2494    return "<su>mlsl2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2495  }
2496  [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2497)
2498
2499(define_expand "aarch64_<su>mlsl_hi_lane<mode>"
2500  [(match_operand:<VWIDE> 0 "register_operand")
2501   (match_operand:<VWIDE> 1 "register_operand")
2502   (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2503   (match_operand:<VCOND> 3 "register_operand")
2504   (match_operand:SI 4 "immediate_operand")]
2505  "TARGET_SIMD"
2506{
2507  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2508  emit_insn (gen_aarch64_<su>mlsl_hi_lane<mode>_insn (operands[0],
2509	     operands[1], operands[2], p, operands[3], operands[4]));
2510  DONE;
2511}
2512)
2513
2514(define_insn "aarch64_<su>mlsl_hi_laneq<mode>_insn"
2515  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2516	(minus:<VWIDE>
2517	  (match_operand:<VWIDE> 1 "register_operand" "0")
2518	  (mult:<VWIDE>
2519	    (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2520	      (match_operand:VQ_HSI 2 "register_operand" "w")
2521	      (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2522	    (ANY_EXTEND:<VWIDE> (vec_duplicate:<VHALF>
2523	      (vec_select:<VEL>
2524		(match_operand:<VCONQ> 4 "register_operand" "<vwx>")
2525		(parallel [(match_operand:SI 5 "immediate_operand" "i")]))))
2526	  )))]
2527  "TARGET_SIMD"
2528  {
2529    operands[5] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[5]));
2530    return "<su>mlsl2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2531  }
2532  [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2533)
2534
2535(define_expand "aarch64_<su>mlsl_hi_laneq<mode>"
2536  [(match_operand:<VWIDE> 0 "register_operand")
2537   (match_operand:<VWIDE> 1 "register_operand")
2538   (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2539   (match_operand:<VCONQ> 3 "register_operand")
2540   (match_operand:SI 4 "immediate_operand")]
2541  "TARGET_SIMD"
2542{
2543  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2544  emit_insn (gen_aarch64_<su>mlsl_hi_laneq<mode>_insn (operands[0],
2545	     operands[1], operands[2], p, operands[3], operands[4]));
2546  DONE;
2547}
2548)
2549
2550;; FP vector operations.
2551;; AArch64 AdvSIMD supports single-precision (32-bit) and
2552;; double-precision (64-bit) floating-point data types and arithmetic as
2553;; defined by the IEEE 754-2008 standard.  This makes them vectorizable
2554;; without the need for -ffast-math or -funsafe-math-optimizations.
2555;;
2556;; Floating-point operations can raise an exception.  Vectorizing such
2557;; operations are safe because of reasons explained below.
2558;;
2559;; ARMv8 permits an extension to enable trapped floating-point
2560;; exception handling, however this is an optional feature.  In the
2561;; event of a floating-point exception being raised by vectorised
2562;; code then:
2563;; 1.  If trapped floating-point exceptions are available, then a trap
2564;;     will be taken when any lane raises an enabled exception.  A trap
2565;;     handler may determine which lane raised the exception.
2566;; 2.  Alternatively a sticky exception flag is set in the
2567;;     floating-point status register (FPSR).  Software may explicitly
2568;;     test the exception flags, in which case the tests will either
2569;;     prevent vectorisation, allowing precise identification of the
2570;;     failing operation, or if tested outside of vectorisable regions
2571;;     then the specific operation and lane are not of interest.
2572
2573;; FP arithmetic operations.
2574
2575(define_insn "add<mode>3"
2576 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2577       (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2578		   (match_operand:VHSDF 2 "register_operand" "w")))]
2579 "TARGET_SIMD"
2580 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2581  [(set_attr "type" "neon_fp_addsub_<stype><q>")]
2582)
2583
2584(define_insn "sub<mode>3"
2585 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2586       (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2587		    (match_operand:VHSDF 2 "register_operand" "w")))]
2588 "TARGET_SIMD"
2589 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2590  [(set_attr "type" "neon_fp_addsub_<stype><q>")]
2591)
2592
2593(define_insn "mul<mode>3"
2594 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2595       (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2596		   (match_operand:VHSDF 2 "register_operand" "w")))]
2597 "TARGET_SIMD"
2598 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2599  [(set_attr "type" "neon_fp_mul_<stype><q>")]
2600)
2601
2602(define_expand "div<mode>3"
2603 [(set (match_operand:VHSDF 0 "register_operand")
2604       (div:VHSDF (match_operand:VHSDF 1 "register_operand")
2605		  (match_operand:VHSDF 2 "register_operand")))]
2606 "TARGET_SIMD"
2607{
2608  if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
2609    DONE;
2610
2611  operands[1] = force_reg (<MODE>mode, operands[1]);
2612})
2613
2614(define_insn "*div<mode>3"
2615 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2616       (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2617		 (match_operand:VHSDF 2 "register_operand" "w")))]
2618 "TARGET_SIMD"
2619 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2620  [(set_attr "type" "neon_fp_div_<stype><q>")]
2621)
2622
2623(define_insn "neg<mode>2"
2624 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2625       (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
2626 "TARGET_SIMD"
2627 "fneg\\t%0.<Vtype>, %1.<Vtype>"
2628  [(set_attr "type" "neon_fp_neg_<stype><q>")]
2629)
2630
2631(define_insn "abs<mode>2"
2632 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2633       (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
2634 "TARGET_SIMD"
2635 "fabs\\t%0.<Vtype>, %1.<Vtype>"
2636  [(set_attr "type" "neon_fp_abs_<stype><q>")]
2637)
2638
2639(define_insn "fma<mode>4"
2640  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2641       (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2642		  (match_operand:VHSDF 2 "register_operand" "w")
2643		  (match_operand:VHSDF 3 "register_operand" "0")))]
2644  "TARGET_SIMD"
2645 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2646  [(set_attr "type" "neon_fp_mla_<stype><q>")]
2647)
2648
2649(define_insn "*aarch64_fma4_elt<mode>"
2650  [(set (match_operand:VDQF 0 "register_operand" "=w")
2651    (fma:VDQF
2652      (vec_duplicate:VDQF
2653	(vec_select:<VEL>
2654	  (match_operand:VDQF 1 "register_operand" "<h_con>")
2655	  (parallel [(match_operand:SI 2 "immediate_operand")])))
2656      (match_operand:VDQF 3 "register_operand" "w")
2657      (match_operand:VDQF 4 "register_operand" "0")))]
2658  "TARGET_SIMD"
2659  {
2660    operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2661    return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2662  }
2663  [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2664)
2665
2666(define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
2667  [(set (match_operand:VDQSF 0 "register_operand" "=w")
2668    (fma:VDQSF
2669      (vec_duplicate:VDQSF
2670	(vec_select:<VEL>
2671	  (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
2672	  (parallel [(match_operand:SI 2 "immediate_operand")])))
2673      (match_operand:VDQSF 3 "register_operand" "w")
2674      (match_operand:VDQSF 4 "register_operand" "0")))]
2675  "TARGET_SIMD"
2676  {
2677    operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
2678    return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2679  }
2680  [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2681)
2682
2683(define_insn "*aarch64_fma4_elt_from_dup<mode>"
2684  [(set (match_operand:VMUL 0 "register_operand" "=w")
2685    (fma:VMUL
2686      (vec_duplicate:VMUL
2687	  (match_operand:<VEL> 1 "register_operand" "<h_con>"))
2688      (match_operand:VMUL 2 "register_operand" "w")
2689      (match_operand:VMUL 3 "register_operand" "0")))]
2690  "TARGET_SIMD"
2691  "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
2692  [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
2693)
2694
2695(define_insn "*aarch64_fma4_elt_to_64v2df"
2696  [(set (match_operand:DF 0 "register_operand" "=w")
2697    (fma:DF
2698	(vec_select:DF
2699	  (match_operand:V2DF 1 "register_operand" "w")
2700	  (parallel [(match_operand:SI 2 "immediate_operand")]))
2701      (match_operand:DF 3 "register_operand" "w")
2702      (match_operand:DF 4 "register_operand" "0")))]
2703  "TARGET_SIMD"
2704  {
2705    operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
2706    return "fmla\\t%0.2d, %3.2d, %1.d[%2]";
2707  }
2708  [(set_attr "type" "neon_fp_mla_d_scalar_q")]
2709)
2710
2711(define_insn "fnma<mode>4"
2712  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2713	(fma:VHSDF
2714	  (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
2715	  (match_operand:VHSDF 2 "register_operand" "w")
2716	  (match_operand:VHSDF 3 "register_operand" "0")))]
2717  "TARGET_SIMD"
2718  "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2719  [(set_attr "type" "neon_fp_mla_<stype><q>")]
2720)
2721
2722(define_insn "*aarch64_fnma4_elt<mode>"
2723  [(set (match_operand:VDQF 0 "register_operand" "=w")
2724    (fma:VDQF
2725      (neg:VDQF
2726        (match_operand:VDQF 3 "register_operand" "w"))
2727      (vec_duplicate:VDQF
2728	(vec_select:<VEL>
2729	  (match_operand:VDQF 1 "register_operand" "<h_con>")
2730	  (parallel [(match_operand:SI 2 "immediate_operand")])))
2731      (match_operand:VDQF 4 "register_operand" "0")))]
2732  "TARGET_SIMD"
2733  {
2734    operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2735    return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2736  }
2737  [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2738)
2739
2740(define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
2741  [(set (match_operand:VDQSF 0 "register_operand" "=w")
2742    (fma:VDQSF
2743      (neg:VDQSF
2744        (match_operand:VDQSF 3 "register_operand" "w"))
2745      (vec_duplicate:VDQSF
2746	(vec_select:<VEL>
2747	  (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
2748	  (parallel [(match_operand:SI 2 "immediate_operand")])))
2749      (match_operand:VDQSF 4 "register_operand" "0")))]
2750  "TARGET_SIMD"
2751  {
2752    operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
2753    return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2754  }
2755  [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2756)
2757
2758(define_insn "*aarch64_fnma4_elt_from_dup<mode>"
2759  [(set (match_operand:VMUL 0 "register_operand" "=w")
2760    (fma:VMUL
2761      (neg:VMUL
2762        (match_operand:VMUL 2 "register_operand" "w"))
2763      (vec_duplicate:VMUL
2764	(match_operand:<VEL> 1 "register_operand" "<h_con>"))
2765      (match_operand:VMUL 3 "register_operand" "0")))]
2766  "TARGET_SIMD"
2767  "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
2768  [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
2769)
2770
2771(define_insn "*aarch64_fnma4_elt_to_64v2df"
2772  [(set (match_operand:DF 0 "register_operand" "=w")
2773    (fma:DF
2774      (vec_select:DF
2775	(match_operand:V2DF 1 "register_operand" "w")
2776	(parallel [(match_operand:SI 2 "immediate_operand")]))
2777      (neg:DF
2778        (match_operand:DF 3 "register_operand" "w"))
2779      (match_operand:DF 4 "register_operand" "0")))]
2780  "TARGET_SIMD"
2781  {
2782    operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
2783    return "fmls\\t%0.2d, %3.2d, %1.d[%2]";
2784  }
2785  [(set_attr "type" "neon_fp_mla_d_scalar_q")]
2786)
2787
2788;; Vector versions of the floating-point frint patterns.
2789;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
2790(define_insn "<frint_pattern><mode>2"
2791  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2792	(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2793		       FRINT))]
2794  "TARGET_SIMD"
2795  "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
2796  [(set_attr "type" "neon_fp_round_<stype><q>")]
2797)
2798
2799;; Vector versions of the fcvt standard patterns.
2800;; Expands to lbtrunc, lround, lceil, lfloor
2801(define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
2802  [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2803	(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2804			       [(match_operand:VHSDF 1 "register_operand" "w")]
2805			       FCVT)))]
2806  "TARGET_SIMD"
2807  "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
2808  [(set_attr "type" "neon_fp_to_int_<stype><q>")]
2809)
2810
2811;; HF Scalar variants of related SIMD instructions.
2812(define_insn "l<fcvt_pattern><su_optab>hfhi2"
2813  [(set (match_operand:HI 0 "register_operand" "=w")
2814	(FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
2815		      FCVT)))]
2816  "TARGET_SIMD_F16INST"
2817  "fcvt<frint_suffix><su>\t%h0, %h1"
2818  [(set_attr "type" "neon_fp_to_int_s")]
2819)
2820
2821(define_insn "<optab>_trunchfhi2"
2822  [(set (match_operand:HI 0 "register_operand" "=w")
2823	(FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
2824  "TARGET_SIMD_F16INST"
2825  "fcvtz<su>\t%h0, %h1"
2826  [(set_attr "type" "neon_fp_to_int_s")]
2827)
2828
2829(define_insn "<optab>hihf2"
2830  [(set (match_operand:HF 0 "register_operand" "=w")
2831	(FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
2832  "TARGET_SIMD_F16INST"
2833  "<su_optab>cvtf\t%h0, %h1"
2834  [(set_attr "type" "neon_int_to_fp_s")]
2835)
2836
2837(define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
2838  [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2839	(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2840			       [(mult:VDQF
2841	 (match_operand:VDQF 1 "register_operand" "w")
2842	 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
2843			       UNSPEC_FRINTZ)))]
2844  "TARGET_SIMD
2845   && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
2846		GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
2847  {
2848    int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
2849    char buf[64];
2850    snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
2851    output_asm_insn (buf, operands);
2852    return "";
2853  }
2854  [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
2855)
2856
2857(define_expand "<optab><VHSDF:mode><fcvt_target>2"
2858  [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2859	(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2860			       [(match_operand:VHSDF 1 "register_operand")]
2861				UNSPEC_FRINTZ)))]
2862  "TARGET_SIMD"
2863  {})
2864
2865(define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
2866  [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2867	(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2868			       [(match_operand:VHSDF 1 "register_operand")]
2869				UNSPEC_FRINTZ)))]
2870  "TARGET_SIMD"
2871  {})
2872
2873(define_expand "ftrunc<VHSDF:mode>2"
2874  [(set (match_operand:VHSDF 0 "register_operand")
2875	(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2876		       UNSPEC_FRINTZ))]
2877  "TARGET_SIMD"
2878  {})
2879
2880(define_insn "<optab><fcvt_target><VHSDF:mode>2"
2881  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2882	(FLOATUORS:VHSDF
2883	  (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2884  "TARGET_SIMD"
2885  "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2886  [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2887)
2888
2889;; Conversions between vectors of floats and doubles.
2890;; Contains a mix of patterns to match standard pattern names
2891;; and those for intrinsics.
2892
2893;; Float widening operations.
2894
2895(define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2896  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2897        (float_extend:<VWIDE> (vec_select:<VHALF>
2898			       (match_operand:VQ_HSF 1 "register_operand" "w")
2899			       (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2900			    )))]
2901  "TARGET_SIMD"
2902  "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2903  [(set_attr "type" "neon_fp_cvt_widen_s")]
2904)
2905
2906;; Convert between fixed-point and floating-point (vector modes)
2907
2908(define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2909  [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2910	(unspec:<VHSDF:FCVT_TARGET>
2911	  [(match_operand:VHSDF 1 "register_operand" "w")
2912	   (match_operand:SI 2 "immediate_operand" "i")]
2913	 FCVT_F2FIXED))]
2914  "TARGET_SIMD"
2915  "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2916  [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2917)
2918
2919(define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2920  [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2921	(unspec:<VDQ_HSDI:FCVT_TARGET>
2922	  [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2923	   (match_operand:SI 2 "immediate_operand" "i")]
2924	 FCVT_FIXED2F))]
2925  "TARGET_SIMD"
2926  "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2927  [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2928)
2929
2930;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2931;; is inconsistent with vector ordering elsewhere in the compiler, in that
2932;; the meaning of HI and LO changes depending on the target endianness.
2933;; While elsewhere we map the higher numbered elements of a vector to
2934;; the lower architectural lanes of the vector, for these patterns we want
2935;; to always treat "hi" as referring to the higher architectural lanes.
2936;; Consequently, while the patterns below look inconsistent with our
2937;; other big-endian patterns their behavior is as required.
2938
2939(define_expand "vec_unpacks_lo_<mode>"
2940  [(match_operand:<VWIDE> 0 "register_operand")
2941   (match_operand:VQ_HSF 1 "register_operand")]
2942  "TARGET_SIMD"
2943  {
2944    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2945    emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2946						       operands[1], p));
2947    DONE;
2948  }
2949)
2950
2951(define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2952  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2953        (float_extend:<VWIDE> (vec_select:<VHALF>
2954			       (match_operand:VQ_HSF 1 "register_operand" "w")
2955			       (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2956			    )))]
2957  "TARGET_SIMD"
2958  "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2959  [(set_attr "type" "neon_fp_cvt_widen_s")]
2960)
2961
2962(define_expand "vec_unpacks_hi_<mode>"
2963  [(match_operand:<VWIDE> 0 "register_operand")
2964   (match_operand:VQ_HSF 1 "register_operand")]
2965  "TARGET_SIMD"
2966  {
2967    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2968    emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2969						       operands[1], p));
2970    DONE;
2971  }
2972)
2973(define_insn "aarch64_float_extend_lo_<Vwide>"
2974  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2975	(float_extend:<VWIDE>
2976	  (match_operand:VDF 1 "register_operand" "w")))]
2977  "TARGET_SIMD"
2978  "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2979  [(set_attr "type" "neon_fp_cvt_widen_s")]
2980)
2981
2982;; Float narrowing operations.
2983
2984(define_insn "aarch64_float_truncate_lo_<mode>"
2985  [(set (match_operand:VDF 0 "register_operand" "=w")
2986      (float_truncate:VDF
2987	(match_operand:<VWIDE> 1 "register_operand" "w")))]
2988  "TARGET_SIMD"
2989  "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2990  [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2991)
2992
2993(define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2994  [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2995    (vec_concat:<VDBL>
2996      (match_operand:VDF 1 "register_operand" "0")
2997      (float_truncate:VDF
2998	(match_operand:<VWIDE> 2 "register_operand" "w"))))]
2999  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3000  "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
3001  [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3002)
3003
3004(define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
3005  [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3006    (vec_concat:<VDBL>
3007      (float_truncate:VDF
3008	(match_operand:<VWIDE> 2 "register_operand" "w"))
3009      (match_operand:VDF 1 "register_operand" "0")))]
3010  "TARGET_SIMD && BYTES_BIG_ENDIAN"
3011  "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
3012  [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3013)
3014
3015(define_expand "aarch64_float_truncate_hi_<Vdbl>"
3016  [(match_operand:<VDBL> 0 "register_operand")
3017   (match_operand:VDF 1 "register_operand")
3018   (match_operand:<VWIDE> 2 "register_operand")]
3019  "TARGET_SIMD"
3020{
3021  rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
3022			     ? gen_aarch64_float_truncate_hi_<Vdbl>_be
3023			     : gen_aarch64_float_truncate_hi_<Vdbl>_le;
3024  emit_insn (gen (operands[0], operands[1], operands[2]));
3025  DONE;
3026}
3027)
3028
3029(define_expand "vec_pack_trunc_v2df"
3030  [(set (match_operand:V4SF 0 "register_operand")
3031      (vec_concat:V4SF
3032	(float_truncate:V2SF
3033	    (match_operand:V2DF 1 "register_operand"))
3034	(float_truncate:V2SF
3035	    (match_operand:V2DF 2 "register_operand"))
3036	  ))]
3037  "TARGET_SIMD"
3038  {
3039    rtx tmp = gen_reg_rtx (V2SFmode);
3040    int lo = BYTES_BIG_ENDIAN ? 2 : 1;
3041    int hi = BYTES_BIG_ENDIAN ? 1 : 2;
3042
3043    emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
3044    emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
3045						   tmp, operands[hi]));
3046    DONE;
3047  }
3048)
3049
3050(define_expand "vec_pack_trunc_df"
3051  [(set (match_operand:V2SF 0 "register_operand")
3052      (vec_concat:V2SF
3053	(float_truncate:SF
3054	    (match_operand:DF 1 "register_operand"))
3055	(float_truncate:SF
3056	    (match_operand:DF 2 "register_operand"))
3057	  ))]
3058  "TARGET_SIMD"
3059  {
3060    rtx tmp = gen_reg_rtx (V2SFmode);
3061    int lo = BYTES_BIG_ENDIAN ? 2 : 1;
3062    int hi = BYTES_BIG_ENDIAN ? 1 : 2;
3063
3064    emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
3065    emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
3066    emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
3067    DONE;
3068  }
3069)
3070
3071;; FP Max/Min
3072;; Max/Min are introduced by idiom recognition by GCC's mid-end.  An
3073;; expression like:
3074;;      a = (b < c) ? b : c;
3075;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
3076;; -fno-signed-zeros are enabled either explicitly or indirectly via
3077;; -ffast-math.
3078;;
3079;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
3080;; The 'smax' and 'smin' RTL standard pattern names do not specify which
3081;; operand will be returned when both operands are zero (i.e. they may not
3082;; honour signed zeroes), or when either operand is NaN.  Therefore GCC
3083;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
3084;; NaNs.
3085
3086(define_insn "<su><maxmin><mode>3"
3087  [(set (match_operand:VHSDF 0 "register_operand" "=w")
3088	(FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
3089		       (match_operand:VHSDF 2 "register_operand" "w")))]
3090  "TARGET_SIMD"
3091  "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3092  [(set_attr "type" "neon_fp_minmax_<stype><q>")]
3093)
3094
3095;; Vector forms for fmax, fmin, fmaxnm, fminnm.
3096;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
3097;; which implement the IEEE fmax ()/fmin () functions.
3098(define_insn "<maxmin_uns><mode>3"
3099  [(set (match_operand:VHSDF 0 "register_operand" "=w")
3100       (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
3101		      (match_operand:VHSDF 2 "register_operand" "w")]
3102		      FMAXMIN_UNS))]
3103  "TARGET_SIMD"
3104  "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3105  [(set_attr "type" "neon_fp_minmax_<stype><q>")]
3106)
3107
3108;; 'across lanes' add.
3109
3110(define_expand "reduc_plus_scal_<mode>"
3111  [(match_operand:<VEL> 0 "register_operand")
3112   (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand")]
3113	       UNSPEC_ADDV)]
3114  "TARGET_SIMD"
3115  {
3116    rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
3117    rtx scratch = gen_reg_rtx (<MODE>mode);
3118    emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
3119    emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
3120    DONE;
3121  }
3122)
3123
3124(define_insn "aarch64_faddp<mode>"
3125 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3126       (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
3127		      (match_operand:VHSDF 2 "register_operand" "w")]
3128	UNSPEC_FADDV))]
3129 "TARGET_SIMD"
3130 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3131  [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
3132)
3133
3134(define_insn "aarch64_reduc_plus_internal<mode>"
3135 [(set (match_operand:VDQV 0 "register_operand" "=w")
3136       (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
3137		    UNSPEC_ADDV))]
3138 "TARGET_SIMD"
3139 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
3140  [(set_attr "type" "neon_reduc_add<q>")]
3141)
3142
3143(define_insn "aarch64_<su>addlv<mode>"
3144 [(set (match_operand:<VWIDE_S> 0 "register_operand" "=w")
3145       (unspec:<VWIDE_S> [(match_operand:VDQV_L 1 "register_operand" "w")]
3146		    USADDLV))]
3147 "TARGET_SIMD"
3148 "<su>addl<vp>\\t%<Vwstype>0<Vwsuf>, %1.<Vtype>"
3149  [(set_attr "type" "neon_reduc_add<q>")]
3150)
3151
3152;; ADDV with result zero-extended to SI/DImode (for popcount).
3153(define_insn "aarch64_zero_extend<GPI:mode>_reduc_plus_<VDQV_E:mode>"
3154 [(set (match_operand:GPI 0 "register_operand" "=w")
3155       (zero_extend:GPI
3156	(unspec:<VDQV_E:VEL> [(match_operand:VDQV_E 1 "register_operand" "w")]
3157			     UNSPEC_ADDV)))]
3158 "TARGET_SIMD"
3159 "add<VDQV_E:vp>\\t%<VDQV_E:Vetype>0, %1.<VDQV_E:Vtype>"
3160  [(set_attr "type" "neon_reduc_add<VDQV_E:q>")]
3161)
3162
3163(define_insn "aarch64_reduc_plus_internalv2si"
3164 [(set (match_operand:V2SI 0 "register_operand" "=w")
3165       (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
3166		    UNSPEC_ADDV))]
3167 "TARGET_SIMD"
3168 "addp\\t%0.2s, %1.2s, %1.2s"
3169  [(set_attr "type" "neon_reduc_add")]
3170)
3171
3172(define_insn "reduc_plus_scal_<mode>"
3173 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3174       (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
3175		   UNSPEC_FADDV))]
3176 "TARGET_SIMD"
3177 "faddp\\t%<Vetype>0, %1.<Vtype>"
3178  [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
3179)
3180
3181(define_expand "reduc_plus_scal_v4sf"
3182 [(set (match_operand:SF 0 "register_operand")
3183       (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
3184		    UNSPEC_FADDV))]
3185 "TARGET_SIMD"
3186{
3187  rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
3188  rtx scratch = gen_reg_rtx (V4SFmode);
3189  emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
3190  emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
3191  emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
3192  DONE;
3193})
3194
3195(define_insn "clrsb<mode>2"
3196  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3197        (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
3198  "TARGET_SIMD"
3199  "cls\\t%0.<Vtype>, %1.<Vtype>"
3200  [(set_attr "type" "neon_cls<q>")]
3201)
3202
3203(define_insn "clz<mode>2"
3204 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3205       (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
3206 "TARGET_SIMD"
3207 "clz\\t%0.<Vtype>, %1.<Vtype>"
3208  [(set_attr "type" "neon_cls<q>")]
3209)
3210
3211(define_insn "popcount<mode>2"
3212  [(set (match_operand:VB 0 "register_operand" "=w")
3213        (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
3214  "TARGET_SIMD"
3215  "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
3216  [(set_attr "type" "neon_cnt<q>")]
3217)
3218
3219;; 'across lanes' max and min ops.
3220
3221;; Template for outputting a scalar, so we can create __builtins which can be
3222;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function.  (This is FP smax/smin).
3223(define_expand "reduc_<maxmin_uns>_scal_<mode>"
3224  [(match_operand:<VEL> 0 "register_operand")
3225   (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
3226		  FMAXMINV)]
3227  "TARGET_SIMD"
3228  {
3229    rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
3230    rtx scratch = gen_reg_rtx (<MODE>mode);
3231    emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
3232							      operands[1]));
3233    emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
3234    DONE;
3235  }
3236)
3237
3238;; Likewise for integer cases, signed and unsigned.
3239(define_expand "reduc_<maxmin_uns>_scal_<mode>"
3240  [(match_operand:<VEL> 0 "register_operand")
3241   (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
3242		    MAXMINV)]
3243  "TARGET_SIMD"
3244  {
3245    rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
3246    rtx scratch = gen_reg_rtx (<MODE>mode);
3247    emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
3248							      operands[1]));
3249    emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
3250    DONE;
3251  }
3252)
3253
3254(define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
3255 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
3256       (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
3257		    MAXMINV))]
3258 "TARGET_SIMD"
3259 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
3260  [(set_attr "type" "neon_reduc_minmax<q>")]
3261)
3262
3263(define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
3264 [(set (match_operand:V2SI 0 "register_operand" "=w")
3265       (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
3266		    MAXMINV))]
3267 "TARGET_SIMD"
3268 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
3269  [(set_attr "type" "neon_reduc_minmax")]
3270)
3271
3272(define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
3273 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3274       (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
3275		      FMAXMINV))]
3276 "TARGET_SIMD"
3277 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
3278  [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
3279)
3280
3281;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
3282;; allocation.
3283;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
3284;; to select.
3285;;
3286;; Thus our BSL is of the form:
3287;;   op0 = bsl (mask, op2, op3)
3288;; We can use any of:
3289;;
3290;;   if (op0 = mask)
3291;;     bsl mask, op1, op2
3292;;   if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
3293;;     bit op0, op2, mask
3294;;   if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
3295;;     bif op0, op1, mask
3296;;
3297;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
3298;; Some forms of straight-line code may generate the equivalent form
3299;; in *aarch64_simd_bsl<mode>_alt.
3300
3301(define_insn "aarch64_simd_bsl<mode>_internal"
3302  [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
3303	(xor:VDQ_I
3304	   (and:VDQ_I
3305	     (xor:VDQ_I
3306	       (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
3307	       (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
3308	     (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
3309	  (match_dup:<V_INT_EQUIV> 3)
3310	))]
3311  "TARGET_SIMD"
3312  "@
3313  bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
3314  bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
3315  bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
3316  [(set_attr "type" "neon_bsl<q>")]
3317)
3318
3319;; We need this form in addition to the above pattern to match the case
3320;; when combine tries merging three insns such that the second operand of
3321;; the outer XOR matches the second operand of the inner XOR rather than
3322;; the first.  The two are equivalent but since recog doesn't try all
3323;; permutations of commutative operations, we have to have a separate pattern.
3324
3325(define_insn "*aarch64_simd_bsl<mode>_alt"
3326  [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
3327	(xor:VDQ_I
3328	   (and:VDQ_I
3329	     (xor:VDQ_I
3330	       (match_operand:VDQ_I 3 "register_operand" "w,w,0")
3331	       (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
3332	      (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
3333	  (match_dup:<V_INT_EQUIV> 2)))]
3334  "TARGET_SIMD"
3335  "@
3336  bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
3337  bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
3338  bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
3339  [(set_attr "type" "neon_bsl<q>")]
3340)
3341
3342;; DImode is special, we want to avoid computing operations which are
3343;; more naturally computed in general purpose registers in the vector
3344;; registers.  If we do that, we need to move all three operands from general
3345;; purpose registers to vector registers, then back again.  However, we
3346;; don't want to make this pattern an UNSPEC as we'd lose scope for
3347;; optimizations based on the component operations of a BSL.
3348;;
3349;; That means we need a splitter back to the individual operations, if they
3350;; would be better calculated on the integer side.
3351
3352(define_insn_and_split "aarch64_simd_bsldi_internal"
3353  [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
3354	(xor:DI
3355	   (and:DI
3356	     (xor:DI
3357	       (match_operand:DI 3 "register_operand" "w,0,w,r")
3358	       (match_operand:DI 2 "register_operand" "w,w,0,r"))
3359	     (match_operand:DI 1 "register_operand" "0,w,w,r"))
3360	  (match_dup:DI 3)
3361	))]
3362  "TARGET_SIMD"
3363  "@
3364  bsl\\t%0.8b, %2.8b, %3.8b
3365  bit\\t%0.8b, %2.8b, %1.8b
3366  bif\\t%0.8b, %3.8b, %1.8b
3367  #"
3368  "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
3369  [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
3370{
3371  /* Split back to individual operations.  If we're before reload, and
3372     able to create a temporary register, do so.  If we're after reload,
3373     we've got an early-clobber destination register, so use that.
3374     Otherwise, we can't create pseudos and we can't yet guarantee that
3375     operands[0] is safe to write, so FAIL to split.  */
3376
3377  rtx scratch;
3378  if (reload_completed)
3379    scratch = operands[0];
3380  else if (can_create_pseudo_p ())
3381    scratch = gen_reg_rtx (DImode);
3382  else
3383    FAIL;
3384
3385  emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
3386  emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
3387  emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
3388  DONE;
3389}
3390  [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
3391   (set_attr "length" "4,4,4,12")]
3392)
3393
3394(define_insn_and_split "aarch64_simd_bsldi_alt"
3395  [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
3396	(xor:DI
3397	   (and:DI
3398	     (xor:DI
3399	       (match_operand:DI 3 "register_operand" "w,w,0,r")
3400	       (match_operand:DI 2 "register_operand" "w,0,w,r"))
3401	     (match_operand:DI 1 "register_operand" "0,w,w,r"))
3402	  (match_dup:DI 2)
3403	))]
3404  "TARGET_SIMD"
3405  "@
3406  bsl\\t%0.8b, %3.8b, %2.8b
3407  bit\\t%0.8b, %3.8b, %1.8b
3408  bif\\t%0.8b, %2.8b, %1.8b
3409  #"
3410  "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
3411  [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
3412{
3413  /* Split back to individual operations.  If we're before reload, and
3414     able to create a temporary register, do so.  If we're after reload,
3415     we've got an early-clobber destination register, so use that.
3416     Otherwise, we can't create pseudos and we can't yet guarantee that
3417     operands[0] is safe to write, so FAIL to split.  */
3418
3419  rtx scratch;
3420  if (reload_completed)
3421    scratch = operands[0];
3422  else if (can_create_pseudo_p ())
3423    scratch = gen_reg_rtx (DImode);
3424  else
3425    FAIL;
3426
3427  emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
3428  emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
3429  emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
3430  DONE;
3431}
3432  [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
3433   (set_attr "length" "4,4,4,12")]
3434)
3435
3436(define_expand "aarch64_simd_bsl<mode>"
3437  [(match_operand:VALLDIF 0 "register_operand")
3438   (match_operand:<V_INT_EQUIV> 1 "register_operand")
3439   (match_operand:VALLDIF 2 "register_operand")
3440   (match_operand:VALLDIF 3 "register_operand")]
3441 "TARGET_SIMD"
3442{
3443  /* We can't alias operands together if they have different modes.  */
3444  rtx tmp = operands[0];
3445  if (FLOAT_MODE_P (<MODE>mode))
3446    {
3447      operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
3448      operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
3449      tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
3450    }
3451  operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
3452  emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
3453							 operands[1],
3454							 operands[2],
3455							 operands[3]));
3456  if (tmp != operands[0])
3457    emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
3458
3459  DONE;
3460})
3461
3462(define_expand "vcond_mask_<mode><v_int_equiv>"
3463  [(match_operand:VALLDI 0 "register_operand")
3464   (match_operand:VALLDI 1 "nonmemory_operand")
3465   (match_operand:VALLDI 2 "nonmemory_operand")
3466   (match_operand:<V_INT_EQUIV> 3 "register_operand")]
3467  "TARGET_SIMD"
3468{
3469  /* If we have (a = (P) ? -1 : 0);
3470     Then we can simply move the generated mask (result must be int).  */
3471  if (operands[1] == CONSTM1_RTX (<MODE>mode)
3472      && operands[2] == CONST0_RTX (<MODE>mode))
3473    emit_move_insn (operands[0], operands[3]);
3474  /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask.  */
3475  else if (operands[1] == CONST0_RTX (<MODE>mode)
3476	   && operands[2] == CONSTM1_RTX (<MODE>mode))
3477    emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
3478  else
3479    {
3480      if (!REG_P (operands[1]))
3481	operands[1] = force_reg (<MODE>mode, operands[1]);
3482      if (!REG_P (operands[2]))
3483	operands[2] = force_reg (<MODE>mode, operands[2]);
3484      emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
3485					     operands[1], operands[2]));
3486    }
3487
3488  DONE;
3489})
3490
3491;; Patterns comparing two vectors to produce a mask.
3492
3493(define_expand "vec_cmp<mode><mode>"
3494  [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3495	  (match_operator 1 "comparison_operator"
3496	    [(match_operand:VSDQ_I_DI 2 "register_operand")
3497	     (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
3498  "TARGET_SIMD"
3499{
3500  rtx mask = operands[0];
3501  enum rtx_code code = GET_CODE (operands[1]);
3502
3503  switch (code)
3504    {
3505    case NE:
3506    case LE:
3507    case LT:
3508    case GE:
3509    case GT:
3510    case EQ:
3511      if (operands[3] == CONST0_RTX (<MODE>mode))
3512	break;
3513
3514      /* Fall through.  */
3515    default:
3516      if (!REG_P (operands[3]))
3517	operands[3] = force_reg (<MODE>mode, operands[3]);
3518
3519      break;
3520    }
3521
3522  switch (code)
3523    {
3524    case LT:
3525      emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
3526      break;
3527
3528    case GE:
3529      emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
3530      break;
3531
3532    case LE:
3533      emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
3534      break;
3535
3536    case GT:
3537      emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
3538      break;
3539
3540    case LTU:
3541      emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
3542      break;
3543
3544    case GEU:
3545      emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
3546      break;
3547
3548    case LEU:
3549      emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
3550      break;
3551
3552    case GTU:
3553      emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
3554      break;
3555
3556    case NE:
3557      /* Handle NE as !EQ.  */
3558      emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
3559      emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
3560      break;
3561
3562    case EQ:
3563      emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
3564      break;
3565
3566    default:
3567      gcc_unreachable ();
3568    }
3569
3570  DONE;
3571})
3572
3573(define_expand "vec_cmp<mode><v_int_equiv>"
3574  [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
3575	(match_operator 1 "comparison_operator"
3576	    [(match_operand:VDQF 2 "register_operand")
3577	     (match_operand:VDQF 3 "nonmemory_operand")]))]
3578  "TARGET_SIMD"
3579{
3580  int use_zero_form = 0;
3581  enum rtx_code code = GET_CODE (operands[1]);
3582  rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
3583
3584  rtx (*comparison) (rtx, rtx, rtx) = NULL;
3585
3586  switch (code)
3587    {
3588    case LE:
3589    case LT:
3590    case GE:
3591    case GT:
3592    case EQ:
3593      if (operands[3] == CONST0_RTX (<MODE>mode))
3594	{
3595	  use_zero_form = 1;
3596	  break;
3597	}
3598      /* Fall through.  */
3599    default:
3600      if (!REG_P (operands[3]))
3601	operands[3] = force_reg (<MODE>mode, operands[3]);
3602
3603      break;
3604    }
3605
3606  switch (code)
3607    {
3608    case LT:
3609      if (use_zero_form)
3610	{
3611	  comparison = gen_aarch64_cmlt<mode>;
3612	  break;
3613	}
3614      /* Fall through.  */
3615    case UNLT:
3616      std::swap (operands[2], operands[3]);
3617      /* Fall through.  */
3618    case UNGT:
3619    case GT:
3620      comparison = gen_aarch64_cmgt<mode>;
3621      break;
3622    case LE:
3623      if (use_zero_form)
3624	{
3625	  comparison = gen_aarch64_cmle<mode>;
3626	  break;
3627	}
3628      /* Fall through.  */
3629    case UNLE:
3630      std::swap (operands[2], operands[3]);
3631      /* Fall through.  */
3632    case UNGE:
3633    case GE:
3634      comparison = gen_aarch64_cmge<mode>;
3635      break;
3636    case NE:
3637    case EQ:
3638      comparison = gen_aarch64_cmeq<mode>;
3639      break;
3640    case UNEQ:
3641    case ORDERED:
3642    case UNORDERED:
3643    case LTGT:
3644      break;
3645    default:
3646      gcc_unreachable ();
3647    }
3648
3649  switch (code)
3650    {
3651    case UNGE:
3652    case UNGT:
3653    case UNLE:
3654    case UNLT:
3655      {
3656	/* All of the above must not raise any FP exceptions.  Thus we first
3657	   check each operand for NaNs and force any elements containing NaN to
3658	   zero before using them in the compare.
3659	   Example: UN<cc> (a, b) -> UNORDERED (a, b) |
3660				     (cm<cc> (isnan (a) ? 0.0 : a,
3661					      isnan (b) ? 0.0 : b))
3662	   We use the following transformations for doing the comparisions:
3663	   a UNGE b -> a GE b
3664	   a UNGT b -> a GT b
3665	   a UNLE b -> b GE a
3666	   a UNLT b -> b GT a.  */
3667
3668	rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
3669	rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
3670	rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
3671	emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
3672	emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
3673	emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
3674	emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
3675					  lowpart_subreg (<V_INT_EQUIV>mode,
3676							  operands[2],
3677							  <MODE>mode)));
3678	emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
3679					  lowpart_subreg (<V_INT_EQUIV>mode,
3680							  operands[3],
3681							  <MODE>mode)));
3682	gcc_assert (comparison != NULL);
3683	emit_insn (comparison (operands[0],
3684			       lowpart_subreg (<MODE>mode,
3685					       tmp0, <V_INT_EQUIV>mode),
3686			       lowpart_subreg (<MODE>mode,
3687					       tmp1, <V_INT_EQUIV>mode)));
3688	emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
3689      }
3690      break;
3691
3692    case LT:
3693    case LE:
3694    case GT:
3695    case GE:
3696    case EQ:
3697    case NE:
3698      /* The easy case.  Here we emit one of FCMGE, FCMGT or FCMEQ.
3699	 As a LT b <=> b GE a && a LE b <=> b GT a.  Our transformations are:
3700	 a GE b -> a GE b
3701	 a GT b -> a GT b
3702	 a LE b -> b GE a
3703	 a LT b -> b GT a
3704	 a EQ b -> a EQ b
3705	 a NE b -> ~(a EQ b)  */
3706      gcc_assert (comparison != NULL);
3707      emit_insn (comparison (operands[0], operands[2], operands[3]));
3708      if (code == NE)
3709	emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
3710      break;
3711
3712    case LTGT:
3713      /* LTGT is not guranteed to not generate a FP exception.  So let's
3714	 go the faster way : ((a > b) || (b > a)).  */
3715      emit_insn (gen_aarch64_cmgt<mode> (operands[0],
3716					 operands[2], operands[3]));
3717      emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
3718      emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
3719      break;
3720
3721    case ORDERED:
3722    case UNORDERED:
3723    case UNEQ:
3724      /* cmeq (a, a) & cmeq (b, b).  */
3725      emit_insn (gen_aarch64_cmeq<mode> (operands[0],
3726					 operands[2], operands[2]));
3727      emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
3728      emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
3729
3730      if (code == UNORDERED)
3731	emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
3732      else if (code == UNEQ)
3733	{
3734	  emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
3735	  emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
3736	}
3737      break;
3738
3739    default:
3740      gcc_unreachable ();
3741    }
3742
3743  DONE;
3744})
3745
3746(define_expand "vec_cmpu<mode><mode>"
3747  [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3748	  (match_operator 1 "comparison_operator"
3749	    [(match_operand:VSDQ_I_DI 2 "register_operand")
3750	     (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
3751  "TARGET_SIMD"
3752{
3753  emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
3754				      operands[2], operands[3]));
3755  DONE;
3756})
3757
3758(define_expand "vcond<mode><mode>"
3759  [(set (match_operand:VALLDI 0 "register_operand")
3760	(if_then_else:VALLDI
3761	  (match_operator 3 "comparison_operator"
3762	    [(match_operand:VALLDI 4 "register_operand")
3763	     (match_operand:VALLDI 5 "nonmemory_operand")])
3764	  (match_operand:VALLDI 1 "nonmemory_operand")
3765	  (match_operand:VALLDI 2 "nonmemory_operand")))]
3766  "TARGET_SIMD"
3767{
3768  rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3769  enum rtx_code code = GET_CODE (operands[3]);
3770
3771  /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3772     it as well as switch operands 1/2 in order to avoid the additional
3773     NOT instruction.  */
3774  if (code == NE)
3775    {
3776      operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3777				    operands[4], operands[5]);
3778      std::swap (operands[1], operands[2]);
3779    }
3780  emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
3781					     operands[4], operands[5]));
3782  emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3783						 operands[2], mask));
3784
3785  DONE;
3786})
3787
3788(define_expand "vcond<v_cmp_mixed><mode>"
3789  [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
3790	(if_then_else:<V_cmp_mixed>
3791	  (match_operator 3 "comparison_operator"
3792	    [(match_operand:VDQF_COND 4 "register_operand")
3793	     (match_operand:VDQF_COND 5 "nonmemory_operand")])
3794	  (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
3795	  (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
3796  "TARGET_SIMD"
3797{
3798  rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3799  enum rtx_code code = GET_CODE (operands[3]);
3800
3801  /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3802     it as well as switch operands 1/2 in order to avoid the additional
3803     NOT instruction.  */
3804  if (code == NE)
3805    {
3806      operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3807				    operands[4], operands[5]);
3808      std::swap (operands[1], operands[2]);
3809    }
3810  emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
3811					     operands[4], operands[5]));
3812  emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
3813						operands[0], operands[1],
3814						operands[2], mask));
3815
3816  DONE;
3817})
3818
3819(define_expand "vcondu<mode><mode>"
3820  [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3821	(if_then_else:VSDQ_I_DI
3822	  (match_operator 3 "comparison_operator"
3823	    [(match_operand:VSDQ_I_DI 4 "register_operand")
3824	     (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
3825	  (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
3826	  (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
3827  "TARGET_SIMD"
3828{
3829  rtx mask = gen_reg_rtx (<MODE>mode);
3830  enum rtx_code code = GET_CODE (operands[3]);
3831
3832  /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3833     it as well as switch operands 1/2 in order to avoid the additional
3834     NOT instruction.  */
3835  if (code == NE)
3836    {
3837      operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3838				    operands[4], operands[5]);
3839      std::swap (operands[1], operands[2]);
3840    }
3841  emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
3842				      operands[4], operands[5]));
3843  emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3844						 operands[2], mask));
3845  DONE;
3846})
3847
3848(define_expand "vcondu<mode><v_cmp_mixed>"
3849  [(set (match_operand:VDQF 0 "register_operand")
3850	(if_then_else:VDQF
3851	  (match_operator 3 "comparison_operator"
3852	    [(match_operand:<V_cmp_mixed> 4 "register_operand")
3853	     (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
3854	  (match_operand:VDQF 1 "nonmemory_operand")
3855	  (match_operand:VDQF 2 "nonmemory_operand")))]
3856  "TARGET_SIMD"
3857{
3858  rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3859  enum rtx_code code = GET_CODE (operands[3]);
3860
3861  /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3862     it as well as switch operands 1/2 in order to avoid the additional
3863     NOT instruction.  */
3864  if (code == NE)
3865    {
3866      operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3867				    operands[4], operands[5]);
3868      std::swap (operands[1], operands[2]);
3869    }
3870  emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
3871						  mask, operands[3],
3872						  operands[4], operands[5]));
3873  emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3874						 operands[2], mask));
3875  DONE;
3876})
3877
3878;; Patterns for AArch64 SIMD Intrinsics.
3879
3880;; Lane extraction with sign extension to general purpose register.
3881(define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
3882  [(set (match_operand:GPI 0 "register_operand" "=r")
3883	(sign_extend:GPI
3884	  (vec_select:<VDQQH:VEL>
3885	    (match_operand:VDQQH 1 "register_operand" "w")
3886	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3887  "TARGET_SIMD"
3888  {
3889    operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
3890					   INTVAL (operands[2]));
3891    return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
3892  }
3893  [(set_attr "type" "neon_to_gp<VDQQH:q>")]
3894)
3895
3896(define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
3897  [(set (match_operand:GPI 0 "register_operand" "=r")
3898	(zero_extend:GPI
3899	  (vec_select:<VDQQH:VEL>
3900	    (match_operand:VDQQH 1 "register_operand" "w")
3901	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3902  "TARGET_SIMD"
3903  {
3904    operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
3905					   INTVAL (operands[2]));
3906    return "umov\\t%w0, %1.<VDQQH:Vetype>[%2]";
3907  }
3908  [(set_attr "type" "neon_to_gp<VDQQH:q>")]
3909)
3910
3911;; Lane extraction of a value, neither sign nor zero extension
3912;; is guaranteed so upper bits should be considered undefined.
3913;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3914;; Extracting lane zero is split into a simple move when it is between SIMD
3915;; registers or a store.
3916(define_insn_and_split "aarch64_get_lane<mode>"
3917  [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
3918	(vec_select:<VEL>
3919	  (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3920	  (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3921  "TARGET_SIMD"
3922  {
3923    operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3924    switch (which_alternative)
3925      {
3926	case 0:
3927	  return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3928	case 1:
3929	  return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3930	case 2:
3931	  return "st1\\t{%1.<Vetype>}[%2], %0";
3932	default:
3933	  gcc_unreachable ();
3934      }
3935  }
3936 "&& reload_completed
3937  && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
3938 [(set (match_dup 0) (match_dup 1))]
3939 {
3940   operands[1] = aarch64_replace_reg_mode (operands[1], <VEL>mode);
3941 }
3942  [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3943)
3944
3945(define_insn "load_pair_lanes<mode>"
3946  [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3947	(vec_concat:<VDBL>
3948	   (match_operand:VDC 1 "memory_operand" "Utq")
3949	   (match_operand:VDC 2 "memory_operand" "m")))]
3950  "TARGET_SIMD && !STRICT_ALIGNMENT
3951   && rtx_equal_p (XEXP (operands[2], 0),
3952		   plus_constant (Pmode,
3953				  XEXP (operands[1], 0),
3954				  GET_MODE_SIZE (<MODE>mode)))"
3955  "ldr\\t%q0, %1"
3956  [(set_attr "type" "neon_load1_1reg_q")]
3957)
3958
3959(define_insn "store_pair_lanes<mode>"
3960  [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn")
3961	(vec_concat:<VDBL>
3962	   (match_operand:VDC 1 "register_operand" "w, r")
3963	   (match_operand:VDC 2 "register_operand" "w, r")))]
3964  "TARGET_SIMD"
3965  "@
3966   stp\\t%d1, %d2, %y0
3967   stp\\t%x1, %x2, %y0"
3968  [(set_attr "type" "neon_stp, store_16")]
3969)
3970
3971;; In this insn, operand 1 should be low, and operand 2 the high part of the
3972;; dest vector.
3973
3974(define_insn "@aarch64_combinez<mode>"
3975  [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3976	(vec_concat:<VDBL>
3977	  (match_operand:VDC 1 "general_operand" "w,?r,m")
3978	  (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3979  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3980  "@
3981   mov\\t%0.8b, %1.8b
3982   fmov\t%d0, %1
3983   ldr\\t%d0, %1"
3984  [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3985   (set_attr "arch" "simd,fp,simd")]
3986)
3987
3988(define_insn "@aarch64_combinez_be<mode>"
3989  [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3990        (vec_concat:<VDBL>
3991	  (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3992	  (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3993  "TARGET_SIMD && BYTES_BIG_ENDIAN"
3994  "@
3995   mov\\t%0.8b, %1.8b
3996   fmov\t%d0, %1
3997   ldr\\t%d0, %1"
3998  [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3999   (set_attr "arch" "simd,fp,simd")]
4000)
4001
4002(define_expand "aarch64_combine<mode>"
4003  [(match_operand:<VDBL> 0 "register_operand")
4004   (match_operand:VDC 1 "register_operand")
4005   (match_operand:VDC 2 "aarch64_simd_reg_or_zero")]
4006  "TARGET_SIMD"
4007{
4008  if (operands[2] == CONST0_RTX (<MODE>mode))
4009    {
4010      if (BYTES_BIG_ENDIAN)
4011	emit_insn (gen_aarch64_combinez_be<mode> (operands[0], operands[1],
4012						  operands[2]));
4013      else
4014	emit_insn (gen_aarch64_combinez<mode> (operands[0], operands[1],
4015					       operands[2]));
4016    }
4017  else
4018    aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
4019  DONE;
4020}
4021)
4022
4023(define_expand "@aarch64_simd_combine<mode>"
4024  [(match_operand:<VDBL> 0 "register_operand")
4025   (match_operand:VDC 1 "register_operand")
4026   (match_operand:VDC 2 "register_operand")]
4027  "TARGET_SIMD"
4028  {
4029    emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
4030    emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
4031    DONE;
4032  }
4033[(set_attr "type" "multiple")]
4034)
4035
4036;; <su><addsub>l<q>.
4037
4038(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
4039 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4040       (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4041			   (match_operand:VQW 1 "register_operand" "w")
4042			   (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
4043		       (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4044			   (match_operand:VQW 2 "register_operand" "w")
4045			   (match_dup 3)))))]
4046  "TARGET_SIMD"
4047  "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
4048  [(set_attr "type" "neon_<ADDSUB:optab>_long")]
4049)
4050
4051(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
4052 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4053       (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4054                           (match_operand:VQW 1 "register_operand" "w")
4055                           (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
4056                       (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4057                           (match_operand:VQW 2 "register_operand" "w")
4058                           (match_dup 3)))))]
4059  "TARGET_SIMD"
4060  "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
4061  [(set_attr "type" "neon_<ADDSUB:optab>_long")]
4062)
4063
4064(define_expand "vec_widen_<su>addl_lo_<mode>"
4065  [(match_operand:<VWIDE> 0 "register_operand")
4066   (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4067   (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4068  "TARGET_SIMD"
4069{
4070  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4071  emit_insn (gen_aarch64_<su>addl<mode>_lo_internal (operands[0], operands[1],
4072						     operands[2], p));
4073  DONE;
4074})
4075
4076(define_expand "vec_widen_<su>addl_hi_<mode>"
4077  [(match_operand:<VWIDE> 0 "register_operand")
4078   (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4079   (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4080  "TARGET_SIMD"
4081{
4082  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4083  emit_insn (gen_aarch64_<su>addl<mode>_hi_internal (operands[0], operands[1],
4084						     operands[2], p));
4085  DONE;
4086})
4087
4088(define_expand "vec_widen_<su>subl_lo_<mode>"
4089  [(match_operand:<VWIDE> 0 "register_operand")
4090   (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4091   (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4092  "TARGET_SIMD"
4093{
4094  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4095  emit_insn (gen_aarch64_<su>subl<mode>_lo_internal (operands[0], operands[1],
4096						     operands[2], p));
4097  DONE;
4098})
4099
4100(define_expand "vec_widen_<su>subl_hi_<mode>"
4101  [(match_operand:<VWIDE> 0 "register_operand")
4102   (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4103   (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4104  "TARGET_SIMD"
4105{
4106  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4107  emit_insn (gen_aarch64_<su>subl<mode>_hi_internal (operands[0], operands[1],
4108						     operands[2], p));
4109  DONE;
4110})
4111
4112(define_expand "aarch64_saddl2<mode>"
4113  [(match_operand:<VWIDE> 0 "register_operand")
4114   (match_operand:VQW 1 "register_operand")
4115   (match_operand:VQW 2 "register_operand")]
4116  "TARGET_SIMD"
4117{
4118  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4119  emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
4120                                                  operands[2], p));
4121  DONE;
4122})
4123
4124(define_expand "aarch64_uaddl2<mode>"
4125  [(match_operand:<VWIDE> 0 "register_operand")
4126   (match_operand:VQW 1 "register_operand")
4127   (match_operand:VQW 2 "register_operand")]
4128  "TARGET_SIMD"
4129{
4130  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4131  emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
4132                                                  operands[2], p));
4133  DONE;
4134})
4135
4136(define_expand "aarch64_ssubl2<mode>"
4137  [(match_operand:<VWIDE> 0 "register_operand")
4138   (match_operand:VQW 1 "register_operand")
4139   (match_operand:VQW 2 "register_operand")]
4140  "TARGET_SIMD"
4141{
4142  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4143  emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
4144						operands[2], p));
4145  DONE;
4146})
4147
4148(define_expand "aarch64_usubl2<mode>"
4149  [(match_operand:<VWIDE> 0 "register_operand")
4150   (match_operand:VQW 1 "register_operand")
4151   (match_operand:VQW 2 "register_operand")]
4152  "TARGET_SIMD"
4153{
4154  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4155  emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
4156						operands[2], p));
4157  DONE;
4158})
4159
4160(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
4161 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4162       (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
4163			   (match_operand:VD_BHSI 1 "register_operand" "w"))
4164		       (ANY_EXTEND:<VWIDE>
4165			   (match_operand:VD_BHSI 2 "register_operand" "w"))))]
4166  "TARGET_SIMD"
4167  "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
4168  [(set_attr "type" "neon_<ADDSUB:optab>_long")]
4169)
4170
4171;; <su><addsub>w<q>.
4172
4173(define_expand "widen_ssum<mode>3"
4174  [(set (match_operand:<VDBLW> 0 "register_operand")
4175	(plus:<VDBLW> (sign_extend:<VDBLW>
4176		        (match_operand:VQW 1 "register_operand"))
4177		      (match_operand:<VDBLW> 2 "register_operand")))]
4178  "TARGET_SIMD"
4179  {
4180    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4181    rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
4182
4183    emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
4184						operands[1], p));
4185    emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
4186    DONE;
4187  }
4188)
4189
4190(define_expand "widen_ssum<mode>3"
4191  [(set (match_operand:<VWIDE> 0 "register_operand")
4192	(plus:<VWIDE> (sign_extend:<VWIDE>
4193		        (match_operand:VD_BHSI 1 "register_operand"))
4194		      (match_operand:<VWIDE> 2 "register_operand")))]
4195  "TARGET_SIMD"
4196{
4197  emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
4198  DONE;
4199})
4200
4201(define_expand "widen_usum<mode>3"
4202  [(set (match_operand:<VDBLW> 0 "register_operand")
4203	(plus:<VDBLW> (zero_extend:<VDBLW>
4204		        (match_operand:VQW 1 "register_operand"))
4205		      (match_operand:<VDBLW> 2 "register_operand")))]
4206  "TARGET_SIMD"
4207  {
4208    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4209    rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
4210
4211    emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
4212						 operands[1], p));
4213    emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
4214    DONE;
4215  }
4216)
4217
4218(define_expand "widen_usum<mode>3"
4219  [(set (match_operand:<VWIDE> 0 "register_operand")
4220	(plus:<VWIDE> (zero_extend:<VWIDE>
4221		        (match_operand:VD_BHSI 1 "register_operand"))
4222		      (match_operand:<VWIDE> 2 "register_operand")))]
4223  "TARGET_SIMD"
4224{
4225  emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
4226  DONE;
4227})
4228
4229(define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
4230  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4231	(minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
4232	  (ANY_EXTEND:<VWIDE>
4233	    (match_operand:VD_BHSI 2 "register_operand" "w"))))]
4234  "TARGET_SIMD"
4235  "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4236  [(set_attr "type" "neon_sub_widen")]
4237)
4238
4239(define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
4240  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4241	(minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
4242	  (ANY_EXTEND:<VWIDE>
4243	    (vec_select:<VHALF>
4244	      (match_operand:VQW 2 "register_operand" "w")
4245	      (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
4246  "TARGET_SIMD"
4247  "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
4248  [(set_attr "type" "neon_sub_widen")]
4249)
4250
4251(define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
4252  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4253	(minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
4254	  (ANY_EXTEND:<VWIDE>
4255	    (vec_select:<VHALF>
4256	      (match_operand:VQW 2 "register_operand" "w")
4257	      (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
4258  "TARGET_SIMD"
4259  "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4260  [(set_attr "type" "neon_sub_widen")]
4261)
4262
4263(define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
4264  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4265	(plus:<VWIDE>
4266	  (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w"))
4267	  (match_operand:<VWIDE> 1 "register_operand" "w")))]
4268  "TARGET_SIMD"
4269  "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4270  [(set_attr "type" "neon_add_widen")]
4271)
4272
4273(define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
4274  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4275	(plus:<VWIDE>
4276	  (ANY_EXTEND:<VWIDE>
4277	    (vec_select:<VHALF>
4278	      (match_operand:VQW 2 "register_operand" "w")
4279	      (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
4280	  (match_operand:<VWIDE> 1 "register_operand" "w")))]
4281  "TARGET_SIMD"
4282  "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
4283  [(set_attr "type" "neon_add_widen")]
4284)
4285
4286(define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
4287  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4288	(plus:<VWIDE>
4289	  (ANY_EXTEND:<VWIDE>
4290	    (vec_select:<VHALF>
4291	      (match_operand:VQW 2 "register_operand" "w")
4292	      (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
4293	  (match_operand:<VWIDE> 1 "register_operand" "w")))]
4294  "TARGET_SIMD"
4295  "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4296  [(set_attr "type" "neon_add_widen")]
4297)
4298
4299(define_expand "aarch64_saddw2<mode>"
4300  [(match_operand:<VWIDE> 0 "register_operand")
4301   (match_operand:<VWIDE> 1 "register_operand")
4302   (match_operand:VQW 2 "register_operand")]
4303  "TARGET_SIMD"
4304{
4305  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4306  emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
4307						operands[2], p));
4308  DONE;
4309})
4310
4311(define_expand "aarch64_uaddw2<mode>"
4312  [(match_operand:<VWIDE> 0 "register_operand")
4313   (match_operand:<VWIDE> 1 "register_operand")
4314   (match_operand:VQW 2 "register_operand")]
4315  "TARGET_SIMD"
4316{
4317  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4318  emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
4319						operands[2], p));
4320  DONE;
4321})
4322
4323
4324(define_expand "aarch64_ssubw2<mode>"
4325  [(match_operand:<VWIDE> 0 "register_operand")
4326   (match_operand:<VWIDE> 1 "register_operand")
4327   (match_operand:VQW 2 "register_operand")]
4328  "TARGET_SIMD"
4329{
4330  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4331  emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
4332						operands[2], p));
4333  DONE;
4334})
4335
4336(define_expand "aarch64_usubw2<mode>"
4337  [(match_operand:<VWIDE> 0 "register_operand")
4338   (match_operand:<VWIDE> 1 "register_operand")
4339   (match_operand:VQW 2 "register_operand")]
4340  "TARGET_SIMD"
4341{
4342  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4343  emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
4344						operands[2], p));
4345  DONE;
4346})
4347
4348;; <su><r>h<addsub>.
4349
4350(define_expand "<u>avg<mode>3_floor"
4351  [(set (match_operand:VDQ_BHSI 0 "register_operand")
4352	(unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
4353			  (match_operand:VDQ_BHSI 2 "register_operand")]
4354			 HADD))]
4355  "TARGET_SIMD"
4356)
4357
4358(define_expand "<u>avg<mode>3_ceil"
4359  [(set (match_operand:VDQ_BHSI 0 "register_operand")
4360	(unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
4361			  (match_operand:VDQ_BHSI 2 "register_operand")]
4362			 RHADD))]
4363  "TARGET_SIMD"
4364)
4365
4366(define_insn "aarch64_<sur>h<addsub><mode>"
4367  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
4368        (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
4369		      (match_operand:VDQ_BHSI 2 "register_operand" "w")]
4370		     HADDSUB))]
4371  "TARGET_SIMD"
4372  "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
4373  [(set_attr "type" "neon_<addsub>_halve<q>")]
4374)
4375
4376;; <r><addsub>hn<q>.
4377
4378(define_insn "aarch64_<sur><addsub>hn<mode>"
4379  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4380        (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
4381			    (match_operand:VQN 2 "register_operand" "w")]
4382                           ADDSUBHN))]
4383  "TARGET_SIMD"
4384  "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
4385  [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
4386)
4387
4388(define_insn "aarch64_<sur><addsub>hn2<mode>"
4389  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4390        (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
4391			     (match_operand:VQN 2 "register_operand" "w")
4392			     (match_operand:VQN 3 "register_operand" "w")]
4393                            ADDSUBHN2))]
4394  "TARGET_SIMD"
4395  "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
4396  [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
4397)
4398
4399;; pmul.
4400
4401(define_insn "aarch64_pmul<mode>"
4402  [(set (match_operand:VB 0 "register_operand" "=w")
4403        (unspec:VB [(match_operand:VB 1 "register_operand" "w")
4404		    (match_operand:VB 2 "register_operand" "w")]
4405		   UNSPEC_PMUL))]
4406 "TARGET_SIMD"
4407 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
4408  [(set_attr "type" "neon_mul_<Vetype><q>")]
4409)
4410
4411;; fmulx.
4412
4413(define_insn "aarch64_fmulx<mode>"
4414  [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
4415	(unspec:VHSDF_HSDF
4416	  [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
4417	   (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
4418	   UNSPEC_FMULX))]
4419 "TARGET_SIMD"
4420 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4421 [(set_attr "type" "neon_fp_mul_<stype>")]
4422)
4423
4424;; vmulxq_lane_f32, and vmulx_laneq_f32
4425
4426(define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
4427  [(set (match_operand:VDQSF 0 "register_operand" "=w")
4428	(unspec:VDQSF
4429	 [(match_operand:VDQSF 1 "register_operand" "w")
4430	  (vec_duplicate:VDQSF
4431	   (vec_select:<VEL>
4432	    (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
4433	    (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
4434	 UNSPEC_FMULX))]
4435  "TARGET_SIMD"
4436  {
4437    operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
4438    return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4439  }
4440  [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
4441)
4442
4443;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
4444
4445(define_insn "*aarch64_mulx_elt<mode>"
4446  [(set (match_operand:VDQF 0 "register_operand" "=w")
4447	(unspec:VDQF
4448	 [(match_operand:VDQF 1 "register_operand" "w")
4449	  (vec_duplicate:VDQF
4450	   (vec_select:<VEL>
4451	    (match_operand:VDQF 2 "register_operand" "w")
4452	    (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
4453	 UNSPEC_FMULX))]
4454  "TARGET_SIMD"
4455  {
4456    operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4457    return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4458  }
4459  [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
4460)
4461
4462;; vmulxq_lane
4463
4464(define_insn "*aarch64_mulx_elt_from_dup<mode>"
4465  [(set (match_operand:VHSDF 0 "register_operand" "=w")
4466	(unspec:VHSDF
4467	 [(match_operand:VHSDF 1 "register_operand" "w")
4468	  (vec_duplicate:VHSDF
4469	    (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
4470	 UNSPEC_FMULX))]
4471  "TARGET_SIMD"
4472  "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
4473  [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
4474)
4475
4476;; vmulxs_lane_f32, vmulxs_laneq_f32
4477;; vmulxd_lane_f64 ==  vmulx_lane_f64
4478;; vmulxd_laneq_f64 == vmulx_laneq_f64
4479
4480(define_insn "*aarch64_vgetfmulx<mode>"
4481  [(set (match_operand:<VEL> 0 "register_operand" "=w")
4482	(unspec:<VEL>
4483	 [(match_operand:<VEL> 1 "register_operand" "w")
4484	  (vec_select:<VEL>
4485	   (match_operand:VDQF 2 "register_operand" "w")
4486	    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
4487	 UNSPEC_FMULX))]
4488  "TARGET_SIMD"
4489  {
4490    operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4491    return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
4492  }
4493  [(set_attr "type" "fmul<Vetype>")]
4494)
4495;; <su>q<addsub>
4496
4497(define_insn "aarch64_<su_optab>q<addsub><mode>"
4498  [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4499	(BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
4500			(match_operand:VSDQ_I 2 "register_operand" "w")))]
4501  "TARGET_SIMD"
4502  "<su_optab>q<addsub>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4503  [(set_attr "type" "neon_q<addsub><q>")]
4504)
4505
4506;; suqadd and usqadd
4507
4508(define_insn "aarch64_<sur>qadd<mode>"
4509  [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4510	(unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
4511			(match_operand:VSDQ_I 2 "register_operand" "w")]
4512		       USSUQADD))]
4513  "TARGET_SIMD"
4514  "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
4515  [(set_attr "type" "neon_qadd<q>")]
4516)
4517
4518;; sqmovun
4519
4520(define_insn "aarch64_sqmovun<mode>"
4521  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4522	(unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
4523                            UNSPEC_SQXTUN))]
4524   "TARGET_SIMD"
4525   "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
4526   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4527)
4528
4529;; sqmovn and uqmovn
4530
4531(define_insn "aarch64_<sur>qmovn<mode>"
4532  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4533	(unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
4534                            SUQMOVN))]
4535  "TARGET_SIMD"
4536  "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
4537   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4538)
4539
4540(define_insn "aarch64_<su>qxtn2<mode>_le"
4541  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4542	(vec_concat:<VNARROWQ2>
4543	  (match_operand:<VNARROWQ> 1 "register_operand" "0")
4544	  (SAT_TRUNC:<VNARROWQ>
4545	    (match_operand:VQN 2 "register_operand" "w"))))]
4546  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
4547  "<su>qxtn2\\t%0.<V2ntype>, %2.<Vtype>"
4548   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4549)
4550
4551(define_insn "aarch64_<su>qxtn2<mode>_be"
4552  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4553	(vec_concat:<VNARROWQ2>
4554	  (SAT_TRUNC:<VNARROWQ>
4555	    (match_operand:VQN 2 "register_operand" "w"))
4556	  (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
4557  "TARGET_SIMD && BYTES_BIG_ENDIAN"
4558  "<su>qxtn2\\t%0.<V2ntype>, %2.<Vtype>"
4559   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4560)
4561
4562(define_expand "aarch64_<su>qxtn2<mode>"
4563  [(match_operand:<VNARROWQ2> 0 "register_operand")
4564   (match_operand:<VNARROWQ> 1 "register_operand")
4565   (SAT_TRUNC:<VNARROWQ>
4566     (match_operand:VQN 2 "register_operand"))]
4567  "TARGET_SIMD"
4568  {
4569    if (BYTES_BIG_ENDIAN)
4570      emit_insn (gen_aarch64_<su>qxtn2<mode>_be (operands[0], operands[1],
4571						 operands[2]));
4572    else
4573      emit_insn (gen_aarch64_<su>qxtn2<mode>_le (operands[0], operands[1],
4574						 operands[2]));
4575    DONE;
4576  }
4577)
4578
4579(define_insn "aarch64_sqxtun2<mode>_le"
4580  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4581	(vec_concat:<VNARROWQ2>
4582	  (match_operand:<VNARROWQ> 1 "register_operand" "0")
4583	  (unspec:<VNARROWQ>
4584	    [(match_operand:VQN 2 "register_operand" "w")] UNSPEC_SQXTUN2)))]
4585  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
4586  "sqxtun2\\t%0.<V2ntype>, %2.<Vtype>"
4587   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4588)
4589
4590(define_insn "aarch64_sqxtun2<mode>_be"
4591  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4592	(vec_concat:<VNARROWQ2>
4593	  (unspec:<VNARROWQ>
4594	    [(match_operand:VQN 2 "register_operand" "w")] UNSPEC_SQXTUN2)
4595	  (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
4596  "TARGET_SIMD && BYTES_BIG_ENDIAN"
4597  "sqxtun2\\t%0.<V2ntype>, %2.<Vtype>"
4598   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4599)
4600
4601(define_expand "aarch64_sqxtun2<mode>"
4602  [(match_operand:<VNARROWQ2> 0 "register_operand")
4603   (match_operand:<VNARROWQ> 1 "register_operand")
4604   (unspec:<VNARROWQ>
4605     [(match_operand:VQN 2 "register_operand")] UNSPEC_SQXTUN2)]
4606  "TARGET_SIMD"
4607  {
4608    if (BYTES_BIG_ENDIAN)
4609      emit_insn (gen_aarch64_sqxtun2<mode>_be (operands[0], operands[1],
4610					      operands[2]));
4611    else
4612      emit_insn (gen_aarch64_sqxtun2<mode>_le (operands[0], operands[1],
4613					       operands[2]));
4614    DONE;
4615  }
4616)
4617
4618;; <su>q<absneg>
4619
4620(define_insn "aarch64_s<optab><mode>"
4621  [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4622	(UNQOPS:VSDQ_I
4623	  (match_operand:VSDQ_I 1 "register_operand" "w")))]
4624  "TARGET_SIMD"
4625  "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
4626  [(set_attr "type" "neon_<optab><q>")]
4627)
4628
4629;; sq<r>dmulh.
4630
4631(define_insn "aarch64_sq<r>dmulh<mode>"
4632  [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
4633	(unspec:VSDQ_HSI
4634	  [(match_operand:VSDQ_HSI 1 "register_operand" "w")
4635	   (match_operand:VSDQ_HSI 2 "register_operand" "w")]
4636	 VQDMULH))]
4637  "TARGET_SIMD"
4638  "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4639  [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
4640)
4641
4642;; sq<r>dmulh_lane
4643
4644(define_insn "aarch64_sq<r>dmulh_lane<mode>"
4645  [(set (match_operand:VDQHS 0 "register_operand" "=w")
4646        (unspec:VDQHS
4647	  [(match_operand:VDQHS 1 "register_operand" "w")
4648           (vec_select:<VEL>
4649             (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4650             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
4651	 VQDMULH))]
4652  "TARGET_SIMD"
4653  "*
4654   operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4655   return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
4656  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
4657)
4658
4659(define_insn "aarch64_sq<r>dmulh_laneq<mode>"
4660  [(set (match_operand:VDQHS 0 "register_operand" "=w")
4661        (unspec:VDQHS
4662	  [(match_operand:VDQHS 1 "register_operand" "w")
4663           (vec_select:<VEL>
4664             (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4665             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
4666	 VQDMULH))]
4667  "TARGET_SIMD"
4668  "*
4669   operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4670   return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
4671  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
4672)
4673
4674(define_insn "aarch64_sq<r>dmulh_lane<mode>"
4675  [(set (match_operand:SD_HSI 0 "register_operand" "=w")
4676        (unspec:SD_HSI
4677	  [(match_operand:SD_HSI 1 "register_operand" "w")
4678           (vec_select:<VEL>
4679             (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4680             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
4681	 VQDMULH))]
4682  "TARGET_SIMD"
4683  "*
4684   operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4685   return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
4686  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
4687)
4688
4689(define_insn "aarch64_sq<r>dmulh_laneq<mode>"
4690  [(set (match_operand:SD_HSI 0 "register_operand" "=w")
4691        (unspec:SD_HSI
4692	  [(match_operand:SD_HSI 1 "register_operand" "w")
4693           (vec_select:<VEL>
4694             (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4695             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
4696	 VQDMULH))]
4697  "TARGET_SIMD"
4698  "*
4699   operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4700   return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
4701  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
4702)
4703
4704;; sqrdml[as]h.
4705
4706(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
4707  [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
4708	(unspec:VSDQ_HSI
4709	  [(match_operand:VSDQ_HSI 1 "register_operand" "0")
4710	   (match_operand:VSDQ_HSI 2 "register_operand" "w")
4711	   (match_operand:VSDQ_HSI 3 "register_operand" "w")]
4712	  SQRDMLH_AS))]
4713   "TARGET_SIMD_RDMA"
4714   "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
4715   [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
4716)
4717
4718;; sqrdml[as]h_lane.
4719
4720(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
4721  [(set (match_operand:VDQHS 0 "register_operand" "=w")
4722	(unspec:VDQHS
4723	  [(match_operand:VDQHS 1 "register_operand" "0")
4724	   (match_operand:VDQHS 2 "register_operand" "w")
4725	   (vec_select:<VEL>
4726	     (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4727	     (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
4728	  SQRDMLH_AS))]
4729   "TARGET_SIMD_RDMA"
4730   {
4731     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4732     return
4733      "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
4734   }
4735   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4736)
4737
4738(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
4739  [(set (match_operand:SD_HSI 0 "register_operand" "=w")
4740	(unspec:SD_HSI
4741	  [(match_operand:SD_HSI 1 "register_operand" "0")
4742	   (match_operand:SD_HSI 2 "register_operand" "w")
4743	   (vec_select:<VEL>
4744	     (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4745	     (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
4746	  SQRDMLH_AS))]
4747   "TARGET_SIMD_RDMA"
4748   {
4749     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4750     return
4751      "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
4752   }
4753   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4754)
4755
4756;; sqrdml[as]h_laneq.
4757
4758(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
4759  [(set (match_operand:VDQHS 0 "register_operand" "=w")
4760	(unspec:VDQHS
4761	  [(match_operand:VDQHS 1 "register_operand" "0")
4762	   (match_operand:VDQHS 2 "register_operand" "w")
4763	   (vec_select:<VEL>
4764	     (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4765	     (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
4766	  SQRDMLH_AS))]
4767   "TARGET_SIMD_RDMA"
4768   {
4769     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4770     return
4771      "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
4772   }
4773   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4774)
4775
4776(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
4777  [(set (match_operand:SD_HSI 0 "register_operand" "=w")
4778	(unspec:SD_HSI
4779	  [(match_operand:SD_HSI 1 "register_operand" "0")
4780	   (match_operand:SD_HSI 2 "register_operand" "w")
4781	   (vec_select:<VEL>
4782	     (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4783	     (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
4784	  SQRDMLH_AS))]
4785   "TARGET_SIMD_RDMA"
4786   {
4787     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4788     return
4789      "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
4790   }
4791   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4792)
4793
4794;; vqdml[sa]l
4795
4796(define_insn "aarch64_sqdmlal<mode>"
4797  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4798        (ss_plus:<VWIDE>
4799	  (ss_ashift:<VWIDE>
4800	      (mult:<VWIDE>
4801		(sign_extend:<VWIDE>
4802		      (match_operand:VSD_HSI 2 "register_operand" "w"))
4803		(sign_extend:<VWIDE>
4804		      (match_operand:VSD_HSI 3 "register_operand" "w")))
4805	      (const_int 1))
4806	  (match_operand:<VWIDE> 1 "register_operand" "0")))]
4807  "TARGET_SIMD"
4808  "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
4809  [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
4810)
4811
4812(define_insn "aarch64_sqdmlsl<mode>"
4813  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4814        (ss_minus:<VWIDE>
4815	  (match_operand:<VWIDE> 1 "register_operand" "0")
4816	  (ss_ashift:<VWIDE>
4817	      (mult:<VWIDE>
4818		(sign_extend:<VWIDE>
4819		      (match_operand:VSD_HSI 2 "register_operand" "w"))
4820		(sign_extend:<VWIDE>
4821		      (match_operand:VSD_HSI 3 "register_operand" "w")))
4822	      (const_int 1))))]
4823  "TARGET_SIMD"
4824  "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
4825  [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
4826)
4827
4828;; vqdml[sa]l_lane
4829
4830(define_insn "aarch64_sqdmlal_lane<mode>"
4831  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4832        (ss_plus:<VWIDE>
4833	  (ss_ashift:<VWIDE>
4834	    (mult:<VWIDE>
4835	      (sign_extend:<VWIDE>
4836		(match_operand:VD_HSI 2 "register_operand" "w"))
4837	      (sign_extend:<VWIDE>
4838		(vec_duplicate:VD_HSI
4839		  (vec_select:<VEL>
4840		    (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4841		    (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4842              ))
4843	    (const_int 1))
4844	  (match_operand:<VWIDE> 1 "register_operand" "0")))]
4845  "TARGET_SIMD"
4846  {
4847    operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4848    return
4849      "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4850  }
4851  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4852)
4853
4854(define_insn "aarch64_sqdmlsl_lane<mode>"
4855  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4856        (ss_minus:<VWIDE>
4857	  (match_operand:<VWIDE> 1 "register_operand" "0")
4858	  (ss_ashift:<VWIDE>
4859	    (mult:<VWIDE>
4860	      (sign_extend:<VWIDE>
4861		(match_operand:VD_HSI 2 "register_operand" "w"))
4862	      (sign_extend:<VWIDE>
4863		(vec_duplicate:VD_HSI
4864		  (vec_select:<VEL>
4865		    (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4866		    (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4867              ))
4868	    (const_int 1))))]
4869  "TARGET_SIMD"
4870  {
4871    operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4872    return
4873      "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4874  }
4875  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4876)
4877
4878
4879(define_insn "aarch64_sqdmlsl_laneq<mode>"
4880  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4881        (ss_minus:<VWIDE>
4882	  (match_operand:<VWIDE> 1 "register_operand" "0")
4883	  (ss_ashift:<VWIDE>
4884	    (mult:<VWIDE>
4885	      (sign_extend:<VWIDE>
4886		(match_operand:VD_HSI 2 "register_operand" "w"))
4887	      (sign_extend:<VWIDE>
4888		(vec_duplicate:VD_HSI
4889		  (vec_select:<VEL>
4890		    (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4891		    (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4892              ))
4893	    (const_int 1))))]
4894  "TARGET_SIMD"
4895  {
4896    operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4897    return
4898      "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4899  }
4900  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4901)
4902
4903(define_insn "aarch64_sqdmlal_laneq<mode>"
4904  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4905        (ss_plus:<VWIDE>
4906	  (ss_ashift:<VWIDE>
4907	    (mult:<VWIDE>
4908	      (sign_extend:<VWIDE>
4909		(match_operand:VD_HSI 2 "register_operand" "w"))
4910	      (sign_extend:<VWIDE>
4911		(vec_duplicate:VD_HSI
4912		  (vec_select:<VEL>
4913		    (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4914		    (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4915              ))
4916	    (const_int 1))
4917	  (match_operand:<VWIDE> 1 "register_operand" "0")))]
4918  "TARGET_SIMD"
4919  {
4920    operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4921    return
4922      "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4923  }
4924  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4925)
4926
4927
4928(define_insn "aarch64_sqdmlal_lane<mode>"
4929  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4930        (ss_plus:<VWIDE>
4931	  (ss_ashift:<VWIDE>
4932	    (mult:<VWIDE>
4933	      (sign_extend:<VWIDE>
4934		(match_operand:SD_HSI 2 "register_operand" "w"))
4935	      (sign_extend:<VWIDE>
4936		(vec_select:<VEL>
4937		  (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4938		  (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4939              )
4940	    (const_int 1))
4941	  (match_operand:<VWIDE> 1 "register_operand" "0")))]
4942  "TARGET_SIMD"
4943  {
4944    operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4945    return
4946      "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4947  }
4948  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4949)
4950
4951(define_insn "aarch64_sqdmlsl_lane<mode>"
4952  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4953        (ss_minus:<VWIDE>
4954	  (match_operand:<VWIDE> 1 "register_operand" "0")
4955	  (ss_ashift:<VWIDE>
4956	    (mult:<VWIDE>
4957	      (sign_extend:<VWIDE>
4958		(match_operand:SD_HSI 2 "register_operand" "w"))
4959	      (sign_extend:<VWIDE>
4960		(vec_select:<VEL>
4961		  (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4962		  (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4963              )
4964	    (const_int 1))))]
4965  "TARGET_SIMD"
4966  {
4967    operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4968    return
4969      "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4970  }
4971  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4972)
4973
4974
4975(define_insn "aarch64_sqdmlal_laneq<mode>"
4976  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4977        (ss_plus:<VWIDE>
4978	  (ss_ashift:<VWIDE>
4979	    (mult:<VWIDE>
4980	      (sign_extend:<VWIDE>
4981		(match_operand:SD_HSI 2 "register_operand" "w"))
4982	      (sign_extend:<VWIDE>
4983		(vec_select:<VEL>
4984		  (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4985		  (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4986              )
4987	    (const_int 1))
4988	  (match_operand:<VWIDE> 1 "register_operand" "0")))]
4989  "TARGET_SIMD"
4990  {
4991    operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4992    return
4993      "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4994  }
4995  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4996)
4997
4998(define_insn "aarch64_sqdmlsl_laneq<mode>"
4999  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5000        (ss_minus:<VWIDE>
5001	  (match_operand:<VWIDE> 1 "register_operand" "0")
5002	  (ss_ashift:<VWIDE>
5003	    (mult:<VWIDE>
5004	      (sign_extend:<VWIDE>
5005		(match_operand:SD_HSI 2 "register_operand" "w"))
5006	      (sign_extend:<VWIDE>
5007		(vec_select:<VEL>
5008		  (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5009		  (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5010              )
5011	    (const_int 1))))]
5012  "TARGET_SIMD"
5013  {
5014    operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5015    return
5016      "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5017  }
5018  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5019)
5020
5021;; vqdml[sa]l_n
5022
5023(define_insn "aarch64_sqdmlsl_n<mode>"
5024  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5025        (ss_minus:<VWIDE>
5026	  (match_operand:<VWIDE> 1 "register_operand" "0")
5027	  (ss_ashift:<VWIDE>
5028	      (mult:<VWIDE>
5029		(sign_extend:<VWIDE>
5030		      (match_operand:VD_HSI 2 "register_operand" "w"))
5031		(sign_extend:<VWIDE>
5032		  (vec_duplicate:VD_HSI
5033		    (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
5034	      (const_int 1))))]
5035  "TARGET_SIMD"
5036  "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
5037  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5038)
5039
5040(define_insn "aarch64_sqdmlal_n<mode>"
5041  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5042        (ss_plus:<VWIDE>
5043	  (ss_ashift:<VWIDE>
5044	      (mult:<VWIDE>
5045		(sign_extend:<VWIDE>
5046		      (match_operand:VD_HSI 2 "register_operand" "w"))
5047		(sign_extend:<VWIDE>
5048		  (vec_duplicate:VD_HSI
5049		    (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
5050	      (const_int 1))
5051	  (match_operand:<VWIDE> 1 "register_operand" "0")))]
5052  "TARGET_SIMD"
5053  "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
5054  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5055)
5056
5057
5058;; sqdml[as]l2
5059
5060(define_insn "aarch64_sqdmlal2<mode>_internal"
5061  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5062        (ss_plus:<VWIDE>
5063         (ss_ashift:<VWIDE>
5064             (mult:<VWIDE>
5065               (sign_extend:<VWIDE>
5066                 (vec_select:<VHALF>
5067                     (match_operand:VQ_HSI 2 "register_operand" "w")
5068                     (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
5069               (sign_extend:<VWIDE>
5070                 (vec_select:<VHALF>
5071                     (match_operand:VQ_HSI 3 "register_operand" "w")
5072                     (match_dup 4))))
5073             (const_int 1))
5074	  (match_operand:<VWIDE> 1 "register_operand" "0")))]
5075  "TARGET_SIMD"
5076  "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5077  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5078)
5079
5080(define_insn "aarch64_sqdmlsl2<mode>_internal"
5081  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5082        (ss_minus:<VWIDE>
5083         (match_operand:<VWIDE> 1 "register_operand" "0")
5084         (ss_ashift:<VWIDE>
5085             (mult:<VWIDE>
5086               (sign_extend:<VWIDE>
5087                 (vec_select:<VHALF>
5088                     (match_operand:VQ_HSI 2 "register_operand" "w")
5089                     (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
5090               (sign_extend:<VWIDE>
5091                 (vec_select:<VHALF>
5092                     (match_operand:VQ_HSI 3 "register_operand" "w")
5093                     (match_dup 4))))
5094             (const_int 1))))]
5095  "TARGET_SIMD"
5096  "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5097  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5098)
5099
5100(define_expand "aarch64_sqdmlal2<mode>"
5101  [(match_operand:<VWIDE> 0 "register_operand")
5102   (match_operand:<VWIDE> 1 "register_operand")
5103   (match_operand:VQ_HSI 2 "register_operand")
5104   (match_operand:VQ_HSI 3 "register_operand")]
5105  "TARGET_SIMD"
5106{
5107  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
5108  emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
5109						  operands[2], operands[3], p));
5110  DONE;
5111})
5112
5113(define_expand "aarch64_sqdmlsl2<mode>"
5114  [(match_operand:<VWIDE> 0 "register_operand")
5115   (match_operand:<VWIDE> 1 "register_operand")
5116   (match_operand:VQ_HSI 2 "register_operand")
5117   (match_operand:VQ_HSI 3 "register_operand")]
5118  "TARGET_SIMD"
5119{
5120  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
5121  emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
5122						  operands[2], operands[3], p));
5123  DONE;
5124})
5125
5126;; vqdml[sa]l2_lane
5127
5128(define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
5129  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5130        (SBINQOPS:<VWIDE>
5131	  (match_operand:<VWIDE> 1 "register_operand" "0")
5132	  (ss_ashift:<VWIDE>
5133	      (mult:<VWIDE>
5134		(sign_extend:<VWIDE>
5135                  (vec_select:<VHALF>
5136                    (match_operand:VQ_HSI 2 "register_operand" "w")
5137                    (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
5138		(sign_extend:<VWIDE>
5139                  (vec_duplicate:<VHALF>
5140		    (vec_select:<VEL>
5141		      (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5142		      (parallel [(match_operand:SI 4 "immediate_operand" "i")])
5143		    ))))
5144	      (const_int 1))))]
5145  "TARGET_SIMD"
5146  {
5147    operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5148    return
5149     "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5150  }
5151  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5152)
5153
5154(define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
5155  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5156        (SBINQOPS:<VWIDE>
5157	  (match_operand:<VWIDE> 1 "register_operand" "0")
5158	  (ss_ashift:<VWIDE>
5159	      (mult:<VWIDE>
5160		(sign_extend:<VWIDE>
5161                  (vec_select:<VHALF>
5162                    (match_operand:VQ_HSI 2 "register_operand" "w")
5163                    (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
5164		(sign_extend:<VWIDE>
5165                  (vec_duplicate:<VHALF>
5166		    (vec_select:<VEL>
5167		      (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5168		      (parallel [(match_operand:SI 4 "immediate_operand" "i")])
5169		    ))))
5170	      (const_int 1))))]
5171  "TARGET_SIMD"
5172  {
5173    operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5174    return
5175     "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5176  }
5177  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5178)
5179
5180(define_expand "aarch64_sqdmlal2_lane<mode>"
5181  [(match_operand:<VWIDE> 0 "register_operand")
5182   (match_operand:<VWIDE> 1 "register_operand")
5183   (match_operand:VQ_HSI 2 "register_operand")
5184   (match_operand:<VCOND> 3 "register_operand")
5185   (match_operand:SI 4 "immediate_operand")]
5186  "TARGET_SIMD"
5187{
5188  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
5189  emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
5190						       operands[2], operands[3],
5191						       operands[4], p));
5192  DONE;
5193})
5194
5195(define_expand "aarch64_sqdmlal2_laneq<mode>"
5196  [(match_operand:<VWIDE> 0 "register_operand")
5197   (match_operand:<VWIDE> 1 "register_operand")
5198   (match_operand:VQ_HSI 2 "register_operand")
5199   (match_operand:<VCONQ> 3 "register_operand")
5200   (match_operand:SI 4 "immediate_operand")]
5201  "TARGET_SIMD"
5202{
5203  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
5204  emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
5205						       operands[2], operands[3],
5206						       operands[4], p));
5207  DONE;
5208})
5209
5210(define_expand "aarch64_sqdmlsl2_lane<mode>"
5211  [(match_operand:<VWIDE> 0 "register_operand")
5212   (match_operand:<VWIDE> 1 "register_operand")
5213   (match_operand:VQ_HSI 2 "register_operand")
5214   (match_operand:<VCOND> 3 "register_operand")
5215   (match_operand:SI 4 "immediate_operand")]
5216  "TARGET_SIMD"
5217{
5218  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
5219  emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
5220						       operands[2], operands[3],
5221						       operands[4], p));
5222  DONE;
5223})
5224
5225(define_expand "aarch64_sqdmlsl2_laneq<mode>"
5226  [(match_operand:<VWIDE> 0 "register_operand")
5227   (match_operand:<VWIDE> 1 "register_operand")
5228   (match_operand:VQ_HSI 2 "register_operand")
5229   (match_operand:<VCONQ> 3 "register_operand")
5230   (match_operand:SI 4 "immediate_operand")]
5231  "TARGET_SIMD"
5232{
5233  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
5234  emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
5235						       operands[2], operands[3],
5236						       operands[4], p));
5237  DONE;
5238})
5239
5240(define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
5241  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5242        (SBINQOPS:<VWIDE>
5243	  (match_operand:<VWIDE> 1 "register_operand" "0")
5244	  (ss_ashift:<VWIDE>
5245	    (mult:<VWIDE>
5246	      (sign_extend:<VWIDE>
5247                (vec_select:<VHALF>
5248                  (match_operand:VQ_HSI 2 "register_operand" "w")
5249                  (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
5250	      (sign_extend:<VWIDE>
5251                (vec_duplicate:<VHALF>
5252		  (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
5253	    (const_int 1))))]
5254  "TARGET_SIMD"
5255  "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
5256  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5257)
5258
5259(define_expand "aarch64_sqdmlal2_n<mode>"
5260  [(match_operand:<VWIDE> 0 "register_operand")
5261   (match_operand:<VWIDE> 1 "register_operand")
5262   (match_operand:VQ_HSI 2 "register_operand")
5263   (match_operand:<VEL> 3 "register_operand")]
5264  "TARGET_SIMD"
5265{
5266  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
5267  emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
5268						    operands[2], operands[3],
5269						    p));
5270  DONE;
5271})
5272
5273(define_expand "aarch64_sqdmlsl2_n<mode>"
5274  [(match_operand:<VWIDE> 0 "register_operand")
5275   (match_operand:<VWIDE> 1 "register_operand")
5276   (match_operand:VQ_HSI 2 "register_operand")
5277   (match_operand:<VEL> 3 "register_operand")]
5278  "TARGET_SIMD"
5279{
5280  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
5281  emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
5282						    operands[2], operands[3],
5283						    p));
5284  DONE;
5285})
5286
5287;; vqdmull
5288
5289(define_insn "aarch64_sqdmull<mode>"
5290  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5291        (ss_ashift:<VWIDE>
5292	     (mult:<VWIDE>
5293	       (sign_extend:<VWIDE>
5294		     (match_operand:VSD_HSI 1 "register_operand" "w"))
5295	       (sign_extend:<VWIDE>
5296		     (match_operand:VSD_HSI 2 "register_operand" "w")))
5297	     (const_int 1)))]
5298  "TARGET_SIMD"
5299  "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5300  [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
5301)
5302
5303;; vqdmull_lane
5304
5305(define_insn "aarch64_sqdmull_lane<mode>"
5306  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5307        (ss_ashift:<VWIDE>
5308	     (mult:<VWIDE>
5309	       (sign_extend:<VWIDE>
5310		 (match_operand:VD_HSI 1 "register_operand" "w"))
5311	       (sign_extend:<VWIDE>
5312                 (vec_duplicate:VD_HSI
5313                   (vec_select:<VEL>
5314		     (match_operand:<VCOND> 2 "register_operand" "<vwx>")
5315		     (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
5316	       ))
5317	     (const_int 1)))]
5318  "TARGET_SIMD"
5319  {
5320    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
5321    return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
5322  }
5323  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
5324)
5325
5326(define_insn "aarch64_sqdmull_laneq<mode>"
5327  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5328        (ss_ashift:<VWIDE>
5329	     (mult:<VWIDE>
5330	       (sign_extend:<VWIDE>
5331		 (match_operand:VD_HSI 1 "register_operand" "w"))
5332	       (sign_extend:<VWIDE>
5333                 (vec_duplicate:VD_HSI
5334                   (vec_select:<VEL>
5335		     (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
5336		     (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
5337	       ))
5338	     (const_int 1)))]
5339  "TARGET_SIMD"
5340  {
5341    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
5342    return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
5343  }
5344  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
5345)
5346
5347(define_insn "aarch64_sqdmull_lane<mode>"
5348  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5349        (ss_ashift:<VWIDE>
5350	     (mult:<VWIDE>
5351	       (sign_extend:<VWIDE>
5352		 (match_operand:SD_HSI 1 "register_operand" "w"))
5353	       (sign_extend:<VWIDE>
5354                 (vec_select:<VEL>
5355		   (match_operand:<VCOND> 2 "register_operand" "<vwx>")
5356		   (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
5357	       ))
5358	     (const_int 1)))]
5359  "TARGET_SIMD"
5360  {
5361    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
5362    return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
5363  }
5364  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
5365)
5366
5367(define_insn "aarch64_sqdmull_laneq<mode>"
5368  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5369        (ss_ashift:<VWIDE>
5370	     (mult:<VWIDE>
5371	       (sign_extend:<VWIDE>
5372		 (match_operand:SD_HSI 1 "register_operand" "w"))
5373	       (sign_extend:<VWIDE>
5374                 (vec_select:<VEL>
5375		   (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
5376		   (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
5377	       ))
5378	     (const_int 1)))]
5379  "TARGET_SIMD"
5380  {
5381    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
5382    return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
5383  }
5384  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
5385)
5386
5387;; vqdmull_n
5388
5389(define_insn "aarch64_sqdmull_n<mode>"
5390  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5391        (ss_ashift:<VWIDE>
5392	     (mult:<VWIDE>
5393	       (sign_extend:<VWIDE>
5394		 (match_operand:VD_HSI 1 "register_operand" "w"))
5395	       (sign_extend:<VWIDE>
5396                 (vec_duplicate:VD_HSI
5397                   (match_operand:<VEL> 2 "register_operand" "<vwx>")))
5398	       )
5399	     (const_int 1)))]
5400  "TARGET_SIMD"
5401  "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
5402  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
5403)
5404
5405;; vqdmull2
5406
5407
5408
5409(define_insn "aarch64_sqdmull2<mode>_internal"
5410  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5411        (ss_ashift:<VWIDE>
5412	     (mult:<VWIDE>
5413	       (sign_extend:<VWIDE>
5414		 (vec_select:<VHALF>
5415                   (match_operand:VQ_HSI 1 "register_operand" "w")
5416                   (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
5417	       (sign_extend:<VWIDE>
5418		 (vec_select:<VHALF>
5419                   (match_operand:VQ_HSI 2 "register_operand" "w")
5420                   (match_dup 3)))
5421	       )
5422	     (const_int 1)))]
5423  "TARGET_SIMD"
5424  "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5425  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
5426)
5427
5428(define_expand "aarch64_sqdmull2<mode>"
5429  [(match_operand:<VWIDE> 0 "register_operand")
5430   (match_operand:VQ_HSI 1 "register_operand")
5431   (match_operand:VQ_HSI 2 "register_operand")]
5432  "TARGET_SIMD"
5433{
5434  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
5435  emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
5436						  operands[2], p));
5437  DONE;
5438})
5439
5440;; vqdmull2_lane
5441
5442(define_insn "aarch64_sqdmull2_lane<mode>_internal"
5443  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5444        (ss_ashift:<VWIDE>
5445	     (mult:<VWIDE>
5446	       (sign_extend:<VWIDE>
5447		 (vec_select:<VHALF>
5448                   (match_operand:VQ_HSI 1 "register_operand" "w")
5449                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
5450	       (sign_extend:<VWIDE>
5451                 (vec_duplicate:<VHALF>
5452                   (vec_select:<VEL>
5453		     (match_operand:<VCOND> 2 "register_operand" "<vwx>")
5454		     (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
5455	       ))
5456	     (const_int 1)))]
5457  "TARGET_SIMD"
5458  {
5459    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
5460    return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
5461  }
5462  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
5463)
5464
5465(define_insn "aarch64_sqdmull2_laneq<mode>_internal"
5466  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5467        (ss_ashift:<VWIDE>
5468	     (mult:<VWIDE>
5469	       (sign_extend:<VWIDE>
5470		 (vec_select:<VHALF>
5471                   (match_operand:VQ_HSI 1 "register_operand" "w")
5472                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
5473	       (sign_extend:<VWIDE>
5474                 (vec_duplicate:<VHALF>
5475                   (vec_select:<VEL>
5476		     (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
5477		     (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
5478	       ))
5479	     (const_int 1)))]
5480  "TARGET_SIMD"
5481  {
5482    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
5483    return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
5484  }
5485  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
5486)
5487
5488(define_expand "aarch64_sqdmull2_lane<mode>"
5489  [(match_operand:<VWIDE> 0 "register_operand")
5490   (match_operand:VQ_HSI 1 "register_operand")
5491   (match_operand:<VCOND> 2 "register_operand")
5492   (match_operand:SI 3 "immediate_operand")]
5493  "TARGET_SIMD"
5494{
5495  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
5496  emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
5497						       operands[2], operands[3],
5498						       p));
5499  DONE;
5500})
5501
5502(define_expand "aarch64_sqdmull2_laneq<mode>"
5503  [(match_operand:<VWIDE> 0 "register_operand")
5504   (match_operand:VQ_HSI 1 "register_operand")
5505   (match_operand:<VCONQ> 2 "register_operand")
5506   (match_operand:SI 3 "immediate_operand")]
5507  "TARGET_SIMD"
5508{
5509  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
5510  emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
5511						       operands[2], operands[3],
5512						       p));
5513  DONE;
5514})
5515
5516;; vqdmull2_n
5517
5518(define_insn "aarch64_sqdmull2_n<mode>_internal"
5519  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5520        (ss_ashift:<VWIDE>
5521	     (mult:<VWIDE>
5522	       (sign_extend:<VWIDE>
5523		 (vec_select:<VHALF>
5524                   (match_operand:VQ_HSI 1 "register_operand" "w")
5525                   (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
5526	       (sign_extend:<VWIDE>
5527                 (vec_duplicate:<VHALF>
5528                   (match_operand:<VEL> 2 "register_operand" "<vwx>")))
5529	       )
5530	     (const_int 1)))]
5531  "TARGET_SIMD"
5532  "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
5533  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
5534)
5535
5536(define_expand "aarch64_sqdmull2_n<mode>"
5537  [(match_operand:<VWIDE> 0 "register_operand")
5538   (match_operand:VQ_HSI 1 "register_operand")
5539   (match_operand:<VEL> 2 "register_operand")]
5540  "TARGET_SIMD"
5541{
5542  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
5543  emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
5544						    operands[2], p));
5545  DONE;
5546})
5547
5548;; vshl
5549
5550(define_insn "aarch64_<sur>shl<mode>"
5551  [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
5552        (unspec:VSDQ_I_DI
5553	  [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
5554           (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
5555         VSHL))]
5556  "TARGET_SIMD"
5557  "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
5558  [(set_attr "type" "neon_shift_reg<q>")]
5559)
5560
5561
5562;; vqshl
5563
5564(define_insn "aarch64_<sur>q<r>shl<mode>"
5565  [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
5566        (unspec:VSDQ_I
5567	  [(match_operand:VSDQ_I 1 "register_operand" "w")
5568           (match_operand:VSDQ_I 2 "register_operand" "w")]
5569         VQSHL))]
5570  "TARGET_SIMD"
5571  "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
5572  [(set_attr "type" "neon_sat_shift_reg<q>")]
5573)
5574
5575(define_expand "vec_widen_<sur>shiftl_lo_<mode>"
5576  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5577	(unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
5578			 (match_operand:SI 2
5579			   "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
5580			 VSHLL))]
5581  "TARGET_SIMD"
5582  {
5583    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
5584    emit_insn (gen_aarch64_<sur>shll<mode>_internal (operands[0], operands[1],
5585						     p, operands[2]));
5586    DONE;
5587  }
5588)
5589
5590(define_expand "vec_widen_<sur>shiftl_hi_<mode>"
5591   [(set (match_operand:<VWIDE> 0 "register_operand")
5592	(unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
5593			 (match_operand:SI 2
5594			   "immediate_operand" "i")]
5595			  VSHLL))]
5596   "TARGET_SIMD"
5597   {
5598    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
5599    emit_insn (gen_aarch64_<sur>shll2<mode>_internal (operands[0], operands[1],
5600						      p, operands[2]));
5601    DONE;
5602   }
5603)
5604
5605;; vshll_n
5606
5607(define_insn "aarch64_<sur>shll<mode>_internal"
5608  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5609	(unspec:<VWIDE> [(vec_select:<VHALF>
5610			    (match_operand:VQW 1 "register_operand" "w")
5611			    (match_operand:VQW 2 "vect_par_cnst_lo_half" ""))
5612			 (match_operand:SI 3
5613			   "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
5614			 VSHLL))]
5615  "TARGET_SIMD"
5616  {
5617    if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
5618      return "shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3";
5619    else
5620      return "<sur>shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3";
5621  }
5622  [(set_attr "type" "neon_shift_imm_long")]
5623)
5624
5625(define_insn "aarch64_<sur>shll2<mode>_internal"
5626  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5627	(unspec:<VWIDE> [(vec_select:<VHALF>
5628			    (match_operand:VQW 1 "register_operand" "w")
5629			    (match_operand:VQW 2 "vect_par_cnst_hi_half" ""))
5630			 (match_operand:SI 3
5631			   "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
5632			 VSHLL))]
5633  "TARGET_SIMD"
5634  {
5635    if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
5636      return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %3";
5637    else
5638      return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %3";
5639  }
5640  [(set_attr "type" "neon_shift_imm_long")]
5641)
5642
5643(define_insn "aarch64_<sur>shll_n<mode>"
5644  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5645	(unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
5646			 (match_operand:SI 2
5647			   "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
5648                         VSHLL))]
5649  "TARGET_SIMD"
5650  {
5651    if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
5652      return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
5653    else
5654      return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
5655  }
5656  [(set_attr "type" "neon_shift_imm_long")]
5657)
5658
5659;; vshll_high_n
5660
5661(define_insn "aarch64_<sur>shll2_n<mode>"
5662  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5663	(unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
5664			 (match_operand:SI 2 "immediate_operand" "i")]
5665                         VSHLL))]
5666  "TARGET_SIMD"
5667  {
5668    if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
5669      return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
5670    else
5671      return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
5672  }
5673  [(set_attr "type" "neon_shift_imm_long")]
5674)
5675
5676;; vrshr_n
5677
5678(define_insn "aarch64_<sur>shr_n<mode>"
5679  [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
5680        (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
5681			   (match_operand:SI 2
5682			     "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
5683			  VRSHR_N))]
5684  "TARGET_SIMD"
5685  "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
5686  [(set_attr "type" "neon_sat_shift_imm<q>")]
5687)
5688
5689;; v(r)sra_n
5690
5691(define_insn "aarch64_<sur>sra_n<mode>"
5692  [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
5693	(unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
5694		       (match_operand:VSDQ_I_DI 2 "register_operand" "w")
5695                       (match_operand:SI 3
5696			 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
5697                      VSRA))]
5698  "TARGET_SIMD"
5699  "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
5700  [(set_attr "type" "neon_shift_acc<q>")]
5701)
5702
5703;; vs<lr>i_n
5704
5705(define_insn "aarch64_<sur>s<lr>i_n<mode>"
5706  [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
5707	(unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
5708		       (match_operand:VSDQ_I_DI 2 "register_operand" "w")
5709                       (match_operand:SI 3
5710			 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
5711                      VSLRI))]
5712  "TARGET_SIMD"
5713  "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
5714  [(set_attr "type" "neon_shift_imm<q>")]
5715)
5716
5717;; vqshl(u)
5718
5719(define_insn "aarch64_<sur>qshl<u>_n<mode>"
5720  [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
5721	(unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
5722		       (match_operand:SI 2
5723			 "aarch64_simd_shift_imm_<ve_mode>" "i")]
5724                      VQSHL_N))]
5725  "TARGET_SIMD"
5726  "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
5727  [(set_attr "type" "neon_sat_shift_imm<q>")]
5728)
5729
5730
5731;; vq(r)shr(u)n_n
5732
5733(define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
5734  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5735        (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
5736			    (match_operand:SI 2
5737			      "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
5738			   VQSHRN_N))]
5739  "TARGET_SIMD"
5740  "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
5741  [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5742)
5743
5744(define_insn "aarch64_<sur>q<r>shr<u>n2_n<mode>"
5745  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5746        (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
5747			     (match_operand:VQN 2 "register_operand" "w")
5748			     (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
5749                            VQSHRN_N))]
5750  "TARGET_SIMD"
5751  "<sur>q<r>shr<u>n2\\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
5752  [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5753)
5754
5755
5756;; cm(eq|ge|gt|lt|le)
5757;; Note, we have constraints for Dz and Z as different expanders
5758;; have different ideas of what should be passed to this pattern.
5759
5760(define_insn "aarch64_cm<optab><mode>"
5761  [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
5762	(neg:<V_INT_EQUIV>
5763	  (COMPARISONS:<V_INT_EQUIV>
5764	    (match_operand:VDQ_I 1 "register_operand" "w,w")
5765	    (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
5766	  )))]
5767  "TARGET_SIMD"
5768  "@
5769  cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
5770  cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
5771  [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
5772)
5773
5774(define_insn_and_split "aarch64_cm<optab>di"
5775  [(set (match_operand:DI 0 "register_operand" "=w,w,r")
5776	(neg:DI
5777	  (COMPARISONS:DI
5778	    (match_operand:DI 1 "register_operand" "w,w,r")
5779	    (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
5780	  )))
5781     (clobber (reg:CC CC_REGNUM))]
5782  "TARGET_SIMD"
5783  "#"
5784  "&& reload_completed"
5785  [(set (match_operand:DI 0 "register_operand")
5786	(neg:DI
5787	  (COMPARISONS:DI
5788	    (match_operand:DI 1 "register_operand")
5789	    (match_operand:DI 2 "aarch64_simd_reg_or_zero")
5790	  )))]
5791  {
5792    /* If we are in the general purpose register file,
5793       we split to a sequence of comparison and store.  */
5794    if (GP_REGNUM_P (REGNO (operands[0]))
5795	&& GP_REGNUM_P (REGNO (operands[1])))
5796      {
5797	machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
5798	rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
5799	rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
5800	emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
5801	DONE;
5802      }
5803    /* Otherwise, we expand to a similar pattern which does not
5804       clobber CC_REGNUM.  */
5805  }
5806  [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
5807)
5808
5809(define_insn "*aarch64_cm<optab>di"
5810  [(set (match_operand:DI 0 "register_operand" "=w,w")
5811	(neg:DI
5812	  (COMPARISONS:DI
5813	    (match_operand:DI 1 "register_operand" "w,w")
5814	    (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
5815	  )))]
5816  "TARGET_SIMD && reload_completed"
5817  "@
5818  cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
5819  cm<optab>\t%d0, %d1, #0"
5820  [(set_attr "type" "neon_compare, neon_compare_zero")]
5821)
5822
5823;; cm(hs|hi)
5824
5825(define_insn "aarch64_cm<optab><mode>"
5826  [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
5827	(neg:<V_INT_EQUIV>
5828	  (UCOMPARISONS:<V_INT_EQUIV>
5829	    (match_operand:VDQ_I 1 "register_operand" "w")
5830	    (match_operand:VDQ_I 2 "register_operand" "w")
5831	  )))]
5832  "TARGET_SIMD"
5833  "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
5834  [(set_attr "type" "neon_compare<q>")]
5835)
5836
5837(define_insn_and_split "aarch64_cm<optab>di"
5838  [(set (match_operand:DI 0 "register_operand" "=w,r")
5839	(neg:DI
5840	  (UCOMPARISONS:DI
5841	    (match_operand:DI 1 "register_operand" "w,r")
5842	    (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
5843	  )))
5844    (clobber (reg:CC CC_REGNUM))]
5845  "TARGET_SIMD"
5846  "#"
5847  "&& reload_completed"
5848  [(set (match_operand:DI 0 "register_operand")
5849	(neg:DI
5850	  (UCOMPARISONS:DI
5851	    (match_operand:DI 1 "register_operand")
5852	    (match_operand:DI 2 "aarch64_simd_reg_or_zero")
5853	  )))]
5854  {
5855    /* If we are in the general purpose register file,
5856       we split to a sequence of comparison and store.  */
5857    if (GP_REGNUM_P (REGNO (operands[0]))
5858	&& GP_REGNUM_P (REGNO (operands[1])))
5859      {
5860	machine_mode mode = CCmode;
5861	rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
5862	rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
5863	emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
5864	DONE;
5865      }
5866    /* Otherwise, we expand to a similar pattern which does not
5867       clobber CC_REGNUM.  */
5868  }
5869  [(set_attr "type" "neon_compare,multiple")]
5870)
5871
5872(define_insn "*aarch64_cm<optab>di"
5873  [(set (match_operand:DI 0 "register_operand" "=w")
5874	(neg:DI
5875	  (UCOMPARISONS:DI
5876	    (match_operand:DI 1 "register_operand" "w")
5877	    (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
5878	  )))]
5879  "TARGET_SIMD && reload_completed"
5880  "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
5881  [(set_attr "type" "neon_compare")]
5882)
5883
5884;; cmtst
5885
5886;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
5887;; we don't have any insns using ne, and aarch64_vcond outputs
5888;; not (neg (eq (and x y) 0))
5889;; which is rewritten by simplify_rtx as
5890;; plus (eq (and x y) 0) -1.
5891
5892(define_insn "aarch64_cmtst<mode>"
5893  [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
5894	(plus:<V_INT_EQUIV>
5895	  (eq:<V_INT_EQUIV>
5896	    (and:VDQ_I
5897	      (match_operand:VDQ_I 1 "register_operand" "w")
5898	      (match_operand:VDQ_I 2 "register_operand" "w"))
5899	    (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
5900	  (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
5901  ]
5902  "TARGET_SIMD"
5903  "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5904  [(set_attr "type" "neon_tst<q>")]
5905)
5906
5907(define_insn_and_split "aarch64_cmtstdi"
5908  [(set (match_operand:DI 0 "register_operand" "=w,r")
5909	(neg:DI
5910	  (ne:DI
5911	    (and:DI
5912	      (match_operand:DI 1 "register_operand" "w,r")
5913	      (match_operand:DI 2 "register_operand" "w,r"))
5914	    (const_int 0))))
5915    (clobber (reg:CC CC_REGNUM))]
5916  "TARGET_SIMD"
5917  "#"
5918  "&& reload_completed"
5919  [(set (match_operand:DI 0 "register_operand")
5920	(neg:DI
5921	  (ne:DI
5922	    (and:DI
5923	      (match_operand:DI 1 "register_operand")
5924	      (match_operand:DI 2 "register_operand"))
5925	    (const_int 0))))]
5926  {
5927    /* If we are in the general purpose register file,
5928       we split to a sequence of comparison and store.  */
5929    if (GP_REGNUM_P (REGNO (operands[0]))
5930	&& GP_REGNUM_P (REGNO (operands[1])))
5931      {
5932	rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
5933	machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
5934	rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
5935	rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
5936	emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
5937	DONE;
5938      }
5939    /* Otherwise, we expand to a similar pattern which does not
5940       clobber CC_REGNUM.  */
5941  }
5942  [(set_attr "type" "neon_tst,multiple")]
5943)
5944
5945(define_insn "*aarch64_cmtstdi"
5946  [(set (match_operand:DI 0 "register_operand" "=w")
5947	(neg:DI
5948	  (ne:DI
5949	    (and:DI
5950	      (match_operand:DI 1 "register_operand" "w")
5951	      (match_operand:DI 2 "register_operand" "w"))
5952	    (const_int 0))))]
5953  "TARGET_SIMD"
5954  "cmtst\t%d0, %d1, %d2"
5955  [(set_attr "type" "neon_tst")]
5956)
5957
5958;; fcm(eq|ge|gt|le|lt)
5959
5960(define_insn "aarch64_cm<optab><mode>"
5961  [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
5962	(neg:<V_INT_EQUIV>
5963	  (COMPARISONS:<V_INT_EQUIV>
5964	    (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
5965	    (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
5966	  )))]
5967  "TARGET_SIMD"
5968  "@
5969  fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
5970  fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
5971  [(set_attr "type" "neon_fp_compare_<stype><q>")]
5972)
5973
5974;; fac(ge|gt)
5975;; Note we can also handle what would be fac(le|lt) by
5976;; generating fac(ge|gt).
5977
5978(define_insn "aarch64_fac<optab><mode>"
5979  [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
5980	(neg:<V_INT_EQUIV>
5981	  (FAC_COMPARISONS:<V_INT_EQUIV>
5982	    (abs:VHSDF_HSDF
5983	      (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
5984	    (abs:VHSDF_HSDF
5985	      (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
5986  )))]
5987  "TARGET_SIMD"
5988  "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
5989  [(set_attr "type" "neon_fp_compare_<stype><q>")]
5990)
5991
5992;; addp
5993
5994(define_insn "aarch64_addp<mode>"
5995  [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
5996        (unspec:VD_BHSI
5997          [(match_operand:VD_BHSI 1 "register_operand" "w")
5998	   (match_operand:VD_BHSI 2 "register_operand" "w")]
5999          UNSPEC_ADDP))]
6000  "TARGET_SIMD"
6001  "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
6002  [(set_attr "type" "neon_reduc_add<q>")]
6003)
6004
6005(define_insn "aarch64_addpdi"
6006  [(set (match_operand:DI 0 "register_operand" "=w")
6007        (unspec:DI
6008          [(match_operand:V2DI 1 "register_operand" "w")]
6009          UNSPEC_ADDP))]
6010  "TARGET_SIMD"
6011  "addp\t%d0, %1.2d"
6012  [(set_attr "type" "neon_reduc_add")]
6013)
6014
6015;; sqrt
6016
6017(define_expand "sqrt<mode>2"
6018  [(set (match_operand:VHSDF 0 "register_operand")
6019	(sqrt:VHSDF (match_operand:VHSDF 1 "register_operand")))]
6020  "TARGET_SIMD"
6021{
6022  if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
6023    DONE;
6024})
6025
6026(define_insn "*sqrt<mode>2"
6027  [(set (match_operand:VHSDF 0 "register_operand" "=w")
6028	(sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
6029  "TARGET_SIMD"
6030  "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
6031  [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
6032)
6033
6034;; Patterns for vector struct loads and stores.
6035
6036(define_insn "aarch64_simd_ld2<mode>"
6037  [(set (match_operand:OI 0 "register_operand" "=w")
6038	(unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
6039		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6040		   UNSPEC_LD2))]
6041  "TARGET_SIMD"
6042  "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
6043  [(set_attr "type" "neon_load2_2reg<q>")]
6044)
6045
6046(define_insn "aarch64_simd_ld2r<mode>"
6047  [(set (match_operand:OI 0 "register_operand" "=w")
6048       (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
6049                   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
6050                  UNSPEC_LD2_DUP))]
6051  "TARGET_SIMD"
6052  "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
6053  [(set_attr "type" "neon_load2_all_lanes<q>")]
6054)
6055
6056(define_insn "aarch64_vec_load_lanesoi_lane<mode>"
6057  [(set (match_operand:OI 0 "register_operand" "=w")
6058	(unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
6059		    (match_operand:OI 2 "register_operand" "0")
6060		    (match_operand:SI 3 "immediate_operand" "i")
6061		    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
6062		   UNSPEC_LD2_LANE))]
6063  "TARGET_SIMD"
6064  {
6065    operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
6066    return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
6067  }
6068  [(set_attr "type" "neon_load2_one_lane")]
6069)
6070
6071(define_expand "vec_load_lanesoi<mode>"
6072  [(set (match_operand:OI 0 "register_operand")
6073	(unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand")
6074		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6075		   UNSPEC_LD2))]
6076  "TARGET_SIMD"
6077{
6078  if (BYTES_BIG_ENDIAN)
6079    {
6080      rtx tmp = gen_reg_rtx (OImode);
6081      rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
6082      emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
6083      emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
6084    }
6085  else
6086    emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
6087  DONE;
6088})
6089
6090(define_insn "aarch64_simd_st2<mode>"
6091  [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
6092	(unspec:OI [(match_operand:OI 1 "register_operand" "w")
6093                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6094                   UNSPEC_ST2))]
6095  "TARGET_SIMD"
6096  "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
6097  [(set_attr "type" "neon_store2_2reg<q>")]
6098)
6099
6100;; RTL uses GCC vector extension indices, so flip only for assembly.
6101(define_insn "aarch64_vec_store_lanesoi_lane<mode>"
6102  [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
6103	(unspec:BLK [(match_operand:OI 1 "register_operand" "w")
6104		    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
6105		    (match_operand:SI 2 "immediate_operand" "i")]
6106		   UNSPEC_ST2_LANE))]
6107  "TARGET_SIMD"
6108  {
6109    operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
6110    return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
6111  }
6112  [(set_attr "type" "neon_store2_one_lane<q>")]
6113)
6114
6115(define_expand "vec_store_lanesoi<mode>"
6116  [(set (match_operand:OI 0 "aarch64_simd_struct_operand")
6117	(unspec:OI [(match_operand:OI 1 "register_operand")
6118                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6119                   UNSPEC_ST2))]
6120  "TARGET_SIMD"
6121{
6122  if (BYTES_BIG_ENDIAN)
6123    {
6124      rtx tmp = gen_reg_rtx (OImode);
6125      rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
6126      emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
6127      emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
6128    }
6129  else
6130    emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
6131  DONE;
6132})
6133
6134(define_insn "aarch64_simd_ld3<mode>"
6135  [(set (match_operand:CI 0 "register_operand" "=w")
6136	(unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
6137		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6138		   UNSPEC_LD3))]
6139  "TARGET_SIMD"
6140  "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
6141  [(set_attr "type" "neon_load3_3reg<q>")]
6142)
6143
6144(define_insn "aarch64_simd_ld3r<mode>"
6145  [(set (match_operand:CI 0 "register_operand" "=w")
6146       (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
6147                   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
6148                  UNSPEC_LD3_DUP))]
6149  "TARGET_SIMD"
6150  "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
6151  [(set_attr "type" "neon_load3_all_lanes<q>")]
6152)
6153
6154(define_insn "aarch64_vec_load_lanesci_lane<mode>"
6155  [(set (match_operand:CI 0 "register_operand" "=w")
6156	(unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
6157		    (match_operand:CI 2 "register_operand" "0")
6158		    (match_operand:SI 3 "immediate_operand" "i")
6159		    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6160		   UNSPEC_LD3_LANE))]
6161  "TARGET_SIMD"
6162{
6163    operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
6164    return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
6165}
6166  [(set_attr "type" "neon_load3_one_lane")]
6167)
6168
6169(define_expand "vec_load_lanesci<mode>"
6170  [(set (match_operand:CI 0 "register_operand")
6171	(unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand")
6172		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6173		   UNSPEC_LD3))]
6174  "TARGET_SIMD"
6175{
6176  if (BYTES_BIG_ENDIAN)
6177    {
6178      rtx tmp = gen_reg_rtx (CImode);
6179      rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
6180      emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
6181      emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
6182    }
6183  else
6184    emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
6185  DONE;
6186})
6187
6188(define_insn "aarch64_simd_st3<mode>"
6189  [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
6190	(unspec:CI [(match_operand:CI 1 "register_operand" "w")
6191                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6192                   UNSPEC_ST3))]
6193  "TARGET_SIMD"
6194  "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
6195  [(set_attr "type" "neon_store3_3reg<q>")]
6196)
6197
6198;; RTL uses GCC vector extension indices, so flip only for assembly.
6199(define_insn "aarch64_vec_store_lanesci_lane<mode>"
6200  [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
6201	(unspec:BLK [(match_operand:CI 1 "register_operand" "w")
6202		     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
6203		     (match_operand:SI 2 "immediate_operand" "i")]
6204		    UNSPEC_ST3_LANE))]
6205  "TARGET_SIMD"
6206  {
6207    operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
6208    return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
6209  }
6210  [(set_attr "type" "neon_store3_one_lane<q>")]
6211)
6212
6213(define_expand "vec_store_lanesci<mode>"
6214  [(set (match_operand:CI 0 "aarch64_simd_struct_operand")
6215	(unspec:CI [(match_operand:CI 1 "register_operand")
6216                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6217                   UNSPEC_ST3))]
6218  "TARGET_SIMD"
6219{
6220  if (BYTES_BIG_ENDIAN)
6221    {
6222      rtx tmp = gen_reg_rtx (CImode);
6223      rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
6224      emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
6225      emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
6226    }
6227  else
6228    emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
6229  DONE;
6230})
6231
6232(define_insn "aarch64_simd_ld4<mode>"
6233  [(set (match_operand:XI 0 "register_operand" "=w")
6234	(unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
6235		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6236		   UNSPEC_LD4))]
6237  "TARGET_SIMD"
6238  "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
6239  [(set_attr "type" "neon_load4_4reg<q>")]
6240)
6241
6242(define_insn "aarch64_simd_ld4r<mode>"
6243  [(set (match_operand:XI 0 "register_operand" "=w")
6244       (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
6245                   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
6246                  UNSPEC_LD4_DUP))]
6247  "TARGET_SIMD"
6248  "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
6249  [(set_attr "type" "neon_load4_all_lanes<q>")]
6250)
6251
6252(define_insn "aarch64_vec_load_lanesxi_lane<mode>"
6253  [(set (match_operand:XI 0 "register_operand" "=w")
6254	(unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
6255		    (match_operand:XI 2 "register_operand" "0")
6256		    (match_operand:SI 3 "immediate_operand" "i")
6257		    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6258		   UNSPEC_LD4_LANE))]
6259  "TARGET_SIMD"
6260{
6261    operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
6262    return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
6263}
6264  [(set_attr "type" "neon_load4_one_lane")]
6265)
6266
6267(define_expand "vec_load_lanesxi<mode>"
6268  [(set (match_operand:XI 0 "register_operand")
6269	(unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand")
6270		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6271		   UNSPEC_LD4))]
6272  "TARGET_SIMD"
6273{
6274  if (BYTES_BIG_ENDIAN)
6275    {
6276      rtx tmp = gen_reg_rtx (XImode);
6277      rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
6278      emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
6279      emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
6280    }
6281  else
6282    emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
6283  DONE;
6284})
6285
6286(define_insn "aarch64_simd_st4<mode>"
6287  [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
6288	(unspec:XI [(match_operand:XI 1 "register_operand" "w")
6289                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6290                   UNSPEC_ST4))]
6291  "TARGET_SIMD"
6292  "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
6293  [(set_attr "type" "neon_store4_4reg<q>")]
6294)
6295
6296;; RTL uses GCC vector extension indices, so flip only for assembly.
6297(define_insn "aarch64_vec_store_lanesxi_lane<mode>"
6298  [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
6299	(unspec:BLK [(match_operand:XI 1 "register_operand" "w")
6300		     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
6301		     (match_operand:SI 2 "immediate_operand" "i")]
6302		    UNSPEC_ST4_LANE))]
6303  "TARGET_SIMD"
6304  {
6305    operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
6306    return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
6307  }
6308  [(set_attr "type" "neon_store4_one_lane<q>")]
6309)
6310
6311(define_expand "vec_store_lanesxi<mode>"
6312  [(set (match_operand:XI 0 "aarch64_simd_struct_operand")
6313	(unspec:XI [(match_operand:XI 1 "register_operand")
6314                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6315                   UNSPEC_ST4))]
6316  "TARGET_SIMD"
6317{
6318  if (BYTES_BIG_ENDIAN)
6319    {
6320      rtx tmp = gen_reg_rtx (XImode);
6321      rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
6322      emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
6323      emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
6324    }
6325  else
6326    emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
6327  DONE;
6328})
6329
6330(define_insn_and_split "aarch64_rev_reglist<mode>"
6331[(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
6332	(unspec:VSTRUCT
6333	           [(match_operand:VSTRUCT 1 "register_operand" "w")
6334		    (match_operand:V16QI 2 "register_operand" "w")]
6335                   UNSPEC_REV_REGLIST))]
6336  "TARGET_SIMD"
6337  "#"
6338  "&& reload_completed"
6339  [(const_int 0)]
6340{
6341  int i;
6342  int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
6343  for (i = 0; i < nregs; i++)
6344    {
6345      rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
6346      rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
6347      emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
6348    }
6349  DONE;
6350}
6351  [(set_attr "type" "neon_tbl1_q")
6352   (set_attr "length" "<insn_count>")]
6353)
6354
6355;; Reload patterns for AdvSIMD register list operands.
6356
6357(define_expand "mov<mode>"
6358  [(set (match_operand:VSTRUCT 0 "nonimmediate_operand")
6359	(match_operand:VSTRUCT 1 "general_operand"))]
6360  "TARGET_SIMD"
6361{
6362  if (can_create_pseudo_p ())
6363    {
6364      if (GET_CODE (operands[0]) != REG)
6365	operands[1] = force_reg (<MODE>mode, operands[1]);
6366    }
6367})
6368
6369
6370(define_expand "aarch64_ld1x3<VALLDIF:mode>"
6371  [(match_operand:CI 0 "register_operand")
6372   (match_operand:DI 1 "register_operand")
6373   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6374  "TARGET_SIMD"
6375{
6376  rtx mem = gen_rtx_MEM (CImode, operands[1]);
6377  emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[0], mem));
6378  DONE;
6379})
6380
6381(define_insn "aarch64_ld1_x3_<mode>"
6382  [(set (match_operand:CI 0 "register_operand" "=w")
6383        (unspec:CI
6384	  [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
6385	   (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))]
6386  "TARGET_SIMD"
6387  "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
6388  [(set_attr "type" "neon_load1_3reg<q>")]
6389)
6390
6391(define_expand "aarch64_ld1x4<VALLDIF:mode>"
6392  [(match_operand:XI 0 "register_operand" "=w")
6393   (match_operand:DI 1 "register_operand" "r")
6394   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6395  "TARGET_SIMD"
6396{
6397  rtx mem = gen_rtx_MEM (XImode, operands[1]);
6398  emit_insn (gen_aarch64_ld1_x4_<VALLDIF:mode> (operands[0], mem));
6399  DONE;
6400})
6401
6402(define_insn "aarch64_ld1_x4_<mode>"
6403  [(set (match_operand:XI 0 "register_operand" "=w")
6404	(unspec:XI
6405	  [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
6406	   (unspec:VALLDIF [(const_int 4)] UNSPEC_VSTRUCTDUMMY)]
6407	UNSPEC_LD1))]
6408  "TARGET_SIMD"
6409  "ld1\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
6410  [(set_attr "type" "neon_load1_4reg<q>")]
6411)
6412
6413(define_expand "aarch64_st1x2<VALLDIF:mode>"
6414  [(match_operand:DI 0 "register_operand")
6415   (match_operand:OI 1 "register_operand")
6416   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6417  "TARGET_SIMD"
6418{
6419  rtx mem = gen_rtx_MEM (OImode, operands[0]);
6420  emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[1]));
6421  DONE;
6422})
6423
6424(define_insn "aarch64_st1_x2_<mode>"
6425   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
6426	 (unspec:OI
6427	  [(match_operand:OI 1 "register_operand" "w")
6428          (unspec:VALLDIF [(const_int 2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
6429  "TARGET_SIMD"
6430  "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
6431  [(set_attr "type" "neon_store1_2reg<q>")]
6432)
6433
6434(define_expand "aarch64_st1x3<VALLDIF:mode>"
6435  [(match_operand:DI 0 "register_operand")
6436   (match_operand:CI 1 "register_operand")
6437   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6438  "TARGET_SIMD"
6439{
6440  rtx mem = gen_rtx_MEM (CImode, operands[0]);
6441  emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[1]));
6442  DONE;
6443})
6444
6445(define_insn "aarch64_st1_x3_<mode>"
6446   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
6447	(unspec:CI
6448         [(match_operand:CI 1 "register_operand" "w")
6449	  (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
6450  "TARGET_SIMD"
6451  "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
6452  [(set_attr "type" "neon_store1_3reg<q>")]
6453)
6454
6455(define_expand "aarch64_st1x4<VALLDIF:mode>"
6456  [(match_operand:DI 0 "register_operand" "")
6457   (match_operand:XI 1 "register_operand" "")
6458   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6459  "TARGET_SIMD"
6460{
6461  rtx mem = gen_rtx_MEM (XImode, operands[0]);
6462  emit_insn (gen_aarch64_st1_x4_<VALLDIF:mode> (mem, operands[1]));
6463  DONE;
6464})
6465
6466(define_insn "aarch64_st1_x4_<mode>"
6467  [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
6468	(unspec:XI
6469	   [(match_operand:XI 1 "register_operand" "w")
6470	   (unspec:VALLDIF [(const_int 4)] UNSPEC_VSTRUCTDUMMY)]
6471	UNSPEC_ST1))]
6472  "TARGET_SIMD"
6473  "st1\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
6474  [(set_attr "type" "neon_store1_4reg<q>")]
6475)
6476
6477(define_insn "*aarch64_mov<mode>"
6478  [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
6479	(match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
6480  "TARGET_SIMD && !BYTES_BIG_ENDIAN
6481   && (register_operand (operands[0], <MODE>mode)
6482       || register_operand (operands[1], <MODE>mode))"
6483  "@
6484   #
6485   st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
6486   ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
6487  [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
6488		     neon_load<nregs>_<nregs>reg_q")
6489   (set_attr "length" "<insn_count>,4,4")]
6490)
6491
6492(define_insn "aarch64_be_ld1<mode>"
6493  [(set (match_operand:VALLDI_F16 0	"register_operand" "=w")
6494	(unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
6495			     "aarch64_simd_struct_operand" "Utv")]
6496	UNSPEC_LD1))]
6497  "TARGET_SIMD"
6498  "ld1\\t{%0<Vmtype>}, %1"
6499  [(set_attr "type" "neon_load1_1reg<q>")]
6500)
6501
6502(define_insn "aarch64_be_st1<mode>"
6503  [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
6504	(unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
6505	UNSPEC_ST1))]
6506  "TARGET_SIMD"
6507  "st1\\t{%1<Vmtype>}, %0"
6508  [(set_attr "type" "neon_store1_1reg<q>")]
6509)
6510
6511(define_insn "*aarch64_be_movoi"
6512  [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
6513	(match_operand:OI 1 "general_operand"      " w,w,m"))]
6514  "TARGET_SIMD && BYTES_BIG_ENDIAN
6515   && (register_operand (operands[0], OImode)
6516       || register_operand (operands[1], OImode))"
6517  "@
6518   #
6519   stp\\t%q1, %R1, %0
6520   ldp\\t%q0, %R0, %1"
6521  [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
6522   (set_attr "length" "8,4,4")]
6523)
6524
6525(define_insn "*aarch64_be_movci"
6526  [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
6527	(match_operand:CI 1 "general_operand"      " w,w,o"))]
6528  "TARGET_SIMD && BYTES_BIG_ENDIAN
6529   && (register_operand (operands[0], CImode)
6530       || register_operand (operands[1], CImode))"
6531  "#"
6532  [(set_attr "type" "multiple")
6533   (set_attr "length" "12,4,4")]
6534)
6535
6536(define_insn "*aarch64_be_movxi"
6537  [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
6538	(match_operand:XI 1 "general_operand"      " w,w,o"))]
6539  "TARGET_SIMD && BYTES_BIG_ENDIAN
6540   && (register_operand (operands[0], XImode)
6541       || register_operand (operands[1], XImode))"
6542  "#"
6543  [(set_attr "type" "multiple")
6544   (set_attr "length" "16,4,4")]
6545)
6546
6547(define_split
6548  [(set (match_operand:OI 0 "register_operand")
6549	(match_operand:OI 1 "register_operand"))]
6550  "TARGET_SIMD && reload_completed"
6551  [(const_int 0)]
6552{
6553  aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
6554  DONE;
6555})
6556
6557(define_split
6558  [(set (match_operand:CI 0 "nonimmediate_operand")
6559	(match_operand:CI 1 "general_operand"))]
6560  "TARGET_SIMD && reload_completed"
6561  [(const_int 0)]
6562{
6563  if (register_operand (operands[0], CImode)
6564      && register_operand (operands[1], CImode))
6565    {
6566      aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
6567      DONE;
6568    }
6569  else if (BYTES_BIG_ENDIAN)
6570    {
6571      emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
6572		      simplify_gen_subreg (OImode, operands[1], CImode, 0));
6573      emit_move_insn (gen_lowpart (V16QImode,
6574				   simplify_gen_subreg (TImode, operands[0],
6575							CImode, 32)),
6576		      gen_lowpart (V16QImode,
6577				   simplify_gen_subreg (TImode, operands[1],
6578							CImode, 32)));
6579      DONE;
6580    }
6581  else
6582    FAIL;
6583})
6584
6585(define_split
6586  [(set (match_operand:XI 0 "nonimmediate_operand")
6587	(match_operand:XI 1 "general_operand"))]
6588  "TARGET_SIMD && reload_completed"
6589  [(const_int 0)]
6590{
6591  if (register_operand (operands[0], XImode)
6592      && register_operand (operands[1], XImode))
6593    {
6594      aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
6595      DONE;
6596    }
6597  else if (BYTES_BIG_ENDIAN)
6598    {
6599      emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
6600		      simplify_gen_subreg (OImode, operands[1], XImode, 0));
6601      emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
6602		      simplify_gen_subreg (OImode, operands[1], XImode, 32));
6603      DONE;
6604    }
6605  else
6606    FAIL;
6607})
6608
6609(define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
6610  [(match_operand:VSTRUCT 0 "register_operand")
6611   (match_operand:DI 1 "register_operand")
6612   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6613  "TARGET_SIMD"
6614{
6615  rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
6616  set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
6617		     * <VSTRUCT:nregs>);
6618
6619  emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
6620								mem));
6621  DONE;
6622})
6623
6624(define_insn "aarch64_ld2<mode>_dreg"
6625  [(set (match_operand:OI 0 "register_operand" "=w")
6626	(unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
6627		    (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6628		   UNSPEC_LD2_DREG))]
6629  "TARGET_SIMD"
6630  "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
6631  [(set_attr "type" "neon_load2_2reg<q>")]
6632)
6633
6634(define_insn "aarch64_ld2<mode>_dreg"
6635  [(set (match_operand:OI 0 "register_operand" "=w")
6636	(unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
6637		    (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6638		   UNSPEC_LD2_DREG))]
6639  "TARGET_SIMD"
6640  "ld1\\t{%S0.1d - %T0.1d}, %1"
6641  [(set_attr "type" "neon_load1_2reg<q>")]
6642)
6643
6644(define_insn "aarch64_ld3<mode>_dreg"
6645  [(set (match_operand:CI 0 "register_operand" "=w")
6646	(unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
6647		    (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6648		   UNSPEC_LD3_DREG))]
6649  "TARGET_SIMD"
6650  "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
6651  [(set_attr "type" "neon_load3_3reg<q>")]
6652)
6653
6654(define_insn "aarch64_ld3<mode>_dreg"
6655  [(set (match_operand:CI 0 "register_operand" "=w")
6656	(unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
6657		    (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6658		   UNSPEC_LD3_DREG))]
6659  "TARGET_SIMD"
6660  "ld1\\t{%S0.1d - %U0.1d}, %1"
6661  [(set_attr "type" "neon_load1_3reg<q>")]
6662)
6663
6664(define_insn "aarch64_ld4<mode>_dreg"
6665  [(set (match_operand:XI 0 "register_operand" "=w")
6666	(unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
6667		    (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6668		   UNSPEC_LD4_DREG))]
6669  "TARGET_SIMD"
6670  "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
6671  [(set_attr "type" "neon_load4_4reg<q>")]
6672)
6673
6674(define_insn "aarch64_ld4<mode>_dreg"
6675  [(set (match_operand:XI 0 "register_operand" "=w")
6676	(unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
6677		    (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6678		   UNSPEC_LD4_DREG))]
6679  "TARGET_SIMD"
6680  "ld1\\t{%S0.1d - %V0.1d}, %1"
6681  [(set_attr "type" "neon_load1_4reg<q>")]
6682)
6683
6684(define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
6685 [(match_operand:VSTRUCT 0 "register_operand")
6686  (match_operand:DI 1 "register_operand")
6687  (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6688  "TARGET_SIMD"
6689{
6690  rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
6691  set_mem_size (mem, <VSTRUCT:nregs> * 8);
6692
6693  emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
6694  DONE;
6695})
6696
6697(define_expand "aarch64_ld1<VALL_F16:mode>"
6698 [(match_operand:VALL_F16 0 "register_operand")
6699  (match_operand:DI 1 "register_operand")]
6700  "TARGET_SIMD"
6701{
6702  machine_mode mode = <VALL_F16:MODE>mode;
6703  rtx mem = gen_rtx_MEM (mode, operands[1]);
6704
6705  if (BYTES_BIG_ENDIAN)
6706    emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
6707  else
6708    emit_move_insn (operands[0], mem);
6709  DONE;
6710})
6711
6712(define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
6713 [(match_operand:VSTRUCT 0 "register_operand")
6714  (match_operand:DI 1 "register_operand")
6715  (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6716  "TARGET_SIMD"
6717{
6718  machine_mode mode = <VSTRUCT:MODE>mode;
6719  rtx mem = gen_rtx_MEM (mode, operands[1]);
6720
6721  emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
6722  DONE;
6723})
6724
6725(define_expand "aarch64_ld1x2<VQ:mode>"
6726 [(match_operand:OI 0 "register_operand")
6727  (match_operand:DI 1 "register_operand")
6728  (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6729  "TARGET_SIMD"
6730{
6731  machine_mode mode = OImode;
6732  rtx mem = gen_rtx_MEM (mode, operands[1]);
6733
6734  emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
6735  DONE;
6736})
6737
6738(define_expand "aarch64_ld1x2<VDC:mode>"
6739 [(match_operand:OI 0 "register_operand")
6740  (match_operand:DI 1 "register_operand")
6741  (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6742  "TARGET_SIMD"
6743{
6744  machine_mode mode = OImode;
6745  rtx mem = gen_rtx_MEM (mode, operands[1]);
6746
6747  emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
6748  DONE;
6749})
6750
6751
6752(define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
6753  [(match_operand:VSTRUCT 0 "register_operand")
6754	(match_operand:DI 1 "register_operand")
6755	(match_operand:VSTRUCT 2 "register_operand")
6756	(match_operand:SI 3 "immediate_operand")
6757	(unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6758  "TARGET_SIMD"
6759{
6760  rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
6761  set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
6762		     * <VSTRUCT:nregs>);
6763
6764  aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL);
6765  emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
6766	operands[0], mem, operands[2], operands[3]));
6767  DONE;
6768})
6769
6770;; Expanders for builtins to extract vector registers from large
6771;; opaque integer modes.
6772
6773;; D-register list.
6774
6775(define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
6776 [(match_operand:VDC 0 "register_operand")
6777  (match_operand:VSTRUCT 1 "register_operand")
6778  (match_operand:SI 2 "immediate_operand")]
6779  "TARGET_SIMD"
6780{
6781  int part = INTVAL (operands[2]);
6782  rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
6783  int offset = part * 16;
6784
6785  emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
6786  emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
6787  DONE;
6788})
6789
6790;; Q-register list.
6791
6792(define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
6793 [(match_operand:VQ 0 "register_operand")
6794  (match_operand:VSTRUCT 1 "register_operand")
6795  (match_operand:SI 2 "immediate_operand")]
6796  "TARGET_SIMD"
6797{
6798  int part = INTVAL (operands[2]);
6799  int offset = part * 16;
6800
6801  emit_move_insn (operands[0],
6802		  gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
6803  DONE;
6804})
6805
6806;; Permuted-store expanders for neon intrinsics.
6807
6808;; Permute instructions
6809
6810;; vec_perm support
6811
6812(define_expand "vec_perm<mode>"
6813  [(match_operand:VB 0 "register_operand")
6814   (match_operand:VB 1 "register_operand")
6815   (match_operand:VB 2 "register_operand")
6816   (match_operand:VB 3 "register_operand")]
6817  "TARGET_SIMD"
6818{
6819  aarch64_expand_vec_perm (operands[0], operands[1],
6820			   operands[2], operands[3], <nunits>);
6821  DONE;
6822})
6823
6824(define_insn "aarch64_tbl1<mode>"
6825  [(set (match_operand:VB 0 "register_operand" "=w")
6826	(unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
6827		    (match_operand:VB 2 "register_operand" "w")]
6828		   UNSPEC_TBL))]
6829  "TARGET_SIMD"
6830  "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
6831  [(set_attr "type" "neon_tbl1<q>")]
6832)
6833
6834;; Two source registers.
6835
6836(define_insn "aarch64_tbl2v16qi"
6837  [(set (match_operand:V16QI 0 "register_operand" "=w")
6838	(unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
6839		       (match_operand:V16QI 2 "register_operand" "w")]
6840		      UNSPEC_TBL))]
6841  "TARGET_SIMD"
6842  "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
6843  [(set_attr "type" "neon_tbl2_q")]
6844)
6845
6846(define_insn "aarch64_tbl3<mode>"
6847  [(set (match_operand:VB 0 "register_operand" "=w")
6848	(unspec:VB [(match_operand:OI 1 "register_operand" "w")
6849		      (match_operand:VB 2 "register_operand" "w")]
6850		      UNSPEC_TBL))]
6851  "TARGET_SIMD"
6852  "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
6853  [(set_attr "type" "neon_tbl3")]
6854)
6855
6856(define_insn "aarch64_tbx4<mode>"
6857  [(set (match_operand:VB 0 "register_operand" "=w")
6858	(unspec:VB [(match_operand:VB 1 "register_operand" "0")
6859		      (match_operand:OI 2 "register_operand" "w")
6860		      (match_operand:VB 3 "register_operand" "w")]
6861		      UNSPEC_TBX))]
6862  "TARGET_SIMD"
6863  "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
6864  [(set_attr "type" "neon_tbl4")]
6865)
6866
6867;; Three source registers.
6868
6869(define_insn "aarch64_qtbl3<mode>"
6870  [(set (match_operand:VB 0 "register_operand" "=w")
6871	(unspec:VB [(match_operand:CI 1 "register_operand" "w")
6872		      (match_operand:VB 2 "register_operand" "w")]
6873		      UNSPEC_TBL))]
6874  "TARGET_SIMD"
6875  "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
6876  [(set_attr "type" "neon_tbl3")]
6877)
6878
6879(define_insn "aarch64_qtbx3<mode>"
6880  [(set (match_operand:VB 0 "register_operand" "=w")
6881	(unspec:VB [(match_operand:VB 1 "register_operand" "0")
6882		      (match_operand:CI 2 "register_operand" "w")
6883		      (match_operand:VB 3 "register_operand" "w")]
6884		      UNSPEC_TBX))]
6885  "TARGET_SIMD"
6886  "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
6887  [(set_attr "type" "neon_tbl3")]
6888)
6889
6890;; Four source registers.
6891
6892(define_insn "aarch64_qtbl4<mode>"
6893  [(set (match_operand:VB 0 "register_operand" "=w")
6894	(unspec:VB [(match_operand:XI 1 "register_operand" "w")
6895		      (match_operand:VB 2 "register_operand" "w")]
6896		      UNSPEC_TBL))]
6897  "TARGET_SIMD"
6898  "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
6899  [(set_attr "type" "neon_tbl4")]
6900)
6901
6902(define_insn "aarch64_qtbx4<mode>"
6903  [(set (match_operand:VB 0 "register_operand" "=w")
6904	(unspec:VB [(match_operand:VB 1 "register_operand" "0")
6905		      (match_operand:XI 2 "register_operand" "w")
6906		      (match_operand:VB 3 "register_operand" "w")]
6907		      UNSPEC_TBX))]
6908  "TARGET_SIMD"
6909  "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
6910  [(set_attr "type" "neon_tbl4")]
6911)
6912
6913(define_insn_and_split "aarch64_combinev16qi"
6914  [(set (match_operand:OI 0 "register_operand" "=w")
6915	(unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
6916		    (match_operand:V16QI 2 "register_operand" "w")]
6917		   UNSPEC_CONCAT))]
6918  "TARGET_SIMD"
6919  "#"
6920  "&& reload_completed"
6921  [(const_int 0)]
6922{
6923  aarch64_split_combinev16qi (operands);
6924  DONE;
6925}
6926[(set_attr "type" "multiple")]
6927)
6928
6929;; This instruction's pattern is generated directly by
6930;; aarch64_expand_vec_perm_const, so any changes to the pattern would
6931;; need corresponding changes there.
6932(define_insn "aarch64_<PERMUTE:perm_insn><mode>"
6933  [(set (match_operand:VALL_F16 0 "register_operand" "=w")
6934	(unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
6935			  (match_operand:VALL_F16 2 "register_operand" "w")]
6936	 PERMUTE))]
6937  "TARGET_SIMD"
6938  "<PERMUTE:perm_insn>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
6939  [(set_attr "type" "neon_permute<q>")]
6940)
6941
6942;; This instruction's pattern is generated directly by
6943;; aarch64_expand_vec_perm_const, so any changes to the pattern would
6944;; need corresponding changes there.  Note that the immediate (third)
6945;; operand is a lane index not a byte index.
6946(define_insn "aarch64_ext<mode>"
6947  [(set (match_operand:VALL_F16 0 "register_operand" "=w")
6948        (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
6949			  (match_operand:VALL_F16 2 "register_operand" "w")
6950			  (match_operand:SI 3 "immediate_operand" "i")]
6951	 UNSPEC_EXT))]
6952  "TARGET_SIMD"
6953{
6954  operands[3] = GEN_INT (INTVAL (operands[3])
6955      * GET_MODE_UNIT_SIZE (<MODE>mode));
6956  return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
6957}
6958  [(set_attr "type" "neon_ext<q>")]
6959)
6960
6961;; This instruction's pattern is generated directly by
6962;; aarch64_expand_vec_perm_const, so any changes to the pattern would
6963;; need corresponding changes there.
6964(define_insn "aarch64_rev<REVERSE:rev_op><mode>"
6965  [(set (match_operand:VALL_F16 0 "register_operand" "=w")
6966	(unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
6967                    REVERSE))]
6968  "TARGET_SIMD"
6969  "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
6970  [(set_attr "type" "neon_rev<q>")]
6971)
6972
6973(define_insn "aarch64_st2<mode>_dreg"
6974  [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
6975	(unspec:BLK [(match_operand:OI 1 "register_operand" "w")
6976                    (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6977                   UNSPEC_ST2))]
6978  "TARGET_SIMD"
6979  "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
6980  [(set_attr "type" "neon_store2_2reg")]
6981)
6982
6983(define_insn "aarch64_st2<mode>_dreg"
6984  [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
6985	(unspec:BLK [(match_operand:OI 1 "register_operand" "w")
6986                    (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6987                   UNSPEC_ST2))]
6988  "TARGET_SIMD"
6989  "st1\\t{%S1.1d - %T1.1d}, %0"
6990  [(set_attr "type" "neon_store1_2reg")]
6991)
6992
6993(define_insn "aarch64_st3<mode>_dreg"
6994  [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
6995	(unspec:BLK [(match_operand:CI 1 "register_operand" "w")
6996                    (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6997                   UNSPEC_ST3))]
6998  "TARGET_SIMD"
6999  "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
7000  [(set_attr "type" "neon_store3_3reg")]
7001)
7002
7003(define_insn "aarch64_st3<mode>_dreg"
7004  [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
7005	(unspec:BLK [(match_operand:CI 1 "register_operand" "w")
7006                    (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
7007                   UNSPEC_ST3))]
7008  "TARGET_SIMD"
7009  "st1\\t{%S1.1d - %U1.1d}, %0"
7010  [(set_attr "type" "neon_store1_3reg")]
7011)
7012
7013(define_insn "aarch64_st4<mode>_dreg"
7014  [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
7015	(unspec:BLK [(match_operand:XI 1 "register_operand" "w")
7016                    (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
7017                   UNSPEC_ST4))]
7018  "TARGET_SIMD"
7019  "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
7020  [(set_attr "type" "neon_store4_4reg")]
7021)
7022
7023(define_insn "aarch64_st4<mode>_dreg"
7024  [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
7025	(unspec:BLK [(match_operand:XI 1 "register_operand" "w")
7026                    (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
7027                   UNSPEC_ST4))]
7028  "TARGET_SIMD"
7029  "st1\\t{%S1.1d - %V1.1d}, %0"
7030  [(set_attr "type" "neon_store1_4reg")]
7031)
7032
7033(define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
7034 [(match_operand:DI 0 "register_operand")
7035  (match_operand:VSTRUCT 1 "register_operand")
7036  (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
7037  "TARGET_SIMD"
7038{
7039  rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
7040  set_mem_size (mem, <VSTRUCT:nregs> * 8);
7041
7042  emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
7043  DONE;
7044})
7045
7046(define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
7047 [(match_operand:DI 0 "register_operand")
7048  (match_operand:VSTRUCT 1 "register_operand")
7049  (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
7050  "TARGET_SIMD"
7051{
7052  machine_mode mode = <VSTRUCT:MODE>mode;
7053  rtx mem = gen_rtx_MEM (mode, operands[0]);
7054
7055  emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
7056  DONE;
7057})
7058
7059(define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
7060 [(match_operand:DI 0 "register_operand")
7061  (match_operand:VSTRUCT 1 "register_operand")
7062  (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
7063  (match_operand:SI 2 "immediate_operand")]
7064  "TARGET_SIMD"
7065{
7066  rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
7067  set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
7068		     * <VSTRUCT:nregs>);
7069
7070  emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
7071		mem, operands[1], operands[2]));
7072  DONE;
7073})
7074
7075(define_expand "aarch64_st1<VALL_F16:mode>"
7076 [(match_operand:DI 0 "register_operand")
7077  (match_operand:VALL_F16 1 "register_operand")]
7078  "TARGET_SIMD"
7079{
7080  machine_mode mode = <VALL_F16:MODE>mode;
7081  rtx mem = gen_rtx_MEM (mode, operands[0]);
7082
7083  if (BYTES_BIG_ENDIAN)
7084    emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
7085  else
7086    emit_move_insn (mem, operands[1]);
7087  DONE;
7088})
7089
7090;; Expander for builtins to insert vector registers into large
7091;; opaque integer modes.
7092
7093;; Q-register list.  We don't need a D-reg inserter as we zero
7094;; extend them in arm_neon.h and insert the resulting Q-regs.
7095
7096(define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
7097 [(match_operand:VSTRUCT 0 "register_operand")
7098  (match_operand:VSTRUCT 1 "register_operand")
7099  (match_operand:VQ 2 "register_operand")
7100  (match_operand:SI 3 "immediate_operand")]
7101  "TARGET_SIMD"
7102{
7103  int part = INTVAL (operands[3]);
7104  int offset = part * 16;
7105
7106  emit_move_insn (operands[0], operands[1]);
7107  emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
7108		  operands[2]);
7109  DONE;
7110})
7111
7112;; Standard pattern name vec_init<mode><Vel>.
7113
7114(define_expand "vec_init<mode><Vel>"
7115  [(match_operand:VALL_F16 0 "register_operand")
7116   (match_operand 1 "" "")]
7117  "TARGET_SIMD"
7118{
7119  aarch64_expand_vector_init (operands[0], operands[1]);
7120  DONE;
7121})
7122
7123(define_expand "vec_init<mode><Vhalf>"
7124  [(match_operand:VQ_NO2E 0 "register_operand")
7125   (match_operand 1 "" "")]
7126  "TARGET_SIMD"
7127{
7128  aarch64_expand_vector_init (operands[0], operands[1]);
7129  DONE;
7130})
7131
7132(define_insn "*aarch64_simd_ld1r<mode>"
7133  [(set (match_operand:VALL_F16 0 "register_operand" "=w")
7134	(vec_duplicate:VALL_F16
7135	  (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
7136  "TARGET_SIMD"
7137  "ld1r\\t{%0.<Vtype>}, %1"
7138  [(set_attr "type" "neon_load1_all_lanes")]
7139)
7140
7141(define_insn "aarch64_simd_ld1<mode>_x2"
7142  [(set (match_operand:OI 0 "register_operand" "=w")
7143	(unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
7144		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
7145		   UNSPEC_LD1))]
7146  "TARGET_SIMD"
7147  "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
7148  [(set_attr "type" "neon_load1_2reg<q>")]
7149)
7150
7151(define_insn "aarch64_simd_ld1<mode>_x2"
7152  [(set (match_operand:OI 0 "register_operand" "=w")
7153	(unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
7154		    (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
7155		   UNSPEC_LD1))]
7156  "TARGET_SIMD"
7157  "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
7158  [(set_attr "type" "neon_load1_2reg<q>")]
7159)
7160
7161
7162(define_insn "@aarch64_frecpe<mode>"
7163  [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
7164	(unspec:VHSDF_HSDF
7165	 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
7166	 UNSPEC_FRECPE))]
7167  "TARGET_SIMD"
7168  "frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>"
7169  [(set_attr "type" "neon_fp_recpe_<stype><q>")]
7170)
7171
7172(define_insn "aarch64_frecpx<mode>"
7173  [(set (match_operand:GPF_F16 0 "register_operand" "=w")
7174	(unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
7175	 UNSPEC_FRECPX))]
7176  "TARGET_SIMD"
7177  "frecpx\t%<s>0, %<s>1"
7178  [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")]
7179)
7180
7181(define_insn "@aarch64_frecps<mode>"
7182  [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
7183	(unspec:VHSDF_HSDF
7184	  [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
7185	  (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
7186	  UNSPEC_FRECPS))]
7187  "TARGET_SIMD"
7188  "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
7189  [(set_attr "type" "neon_fp_recps_<stype><q>")]
7190)
7191
7192(define_insn "aarch64_urecpe<mode>"
7193  [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
7194        (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
7195                UNSPEC_URECPE))]
7196 "TARGET_SIMD"
7197 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
7198  [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
7199
7200;; Standard pattern name vec_extract<mode><Vel>.
7201
7202(define_expand "vec_extract<mode><Vel>"
7203  [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand")
7204   (match_operand:VALL_F16 1 "register_operand")
7205   (match_operand:SI 2 "immediate_operand")]
7206  "TARGET_SIMD"
7207{
7208    emit_insn
7209      (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
7210    DONE;
7211})
7212
7213;; Extract a 64-bit vector from one half of a 128-bit vector.
7214(define_expand "vec_extract<mode><Vhalf>"
7215  [(match_operand:<VHALF> 0 "register_operand")
7216   (match_operand:VQMOV_NO2E 1 "register_operand")
7217   (match_operand 2 "immediate_operand")]
7218  "TARGET_SIMD"
7219{
7220  int start = INTVAL (operands[2]);
7221  if (start != 0 && start != <nunits> / 2)
7222    FAIL;
7223  rtx sel = aarch64_gen_stepped_int_parallel (<nunits> / 2, start, 1);
7224  emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], sel));
7225  DONE;
7226})
7227
7228;; Extract a single-element 64-bit vector from one half of a 128-bit vector.
7229(define_expand "vec_extractv2dfv1df"
7230  [(match_operand:V1DF 0 "register_operand")
7231   (match_operand:V2DF 1 "register_operand")
7232   (match_operand 2 "immediate_operand")]
7233  "TARGET_SIMD"
7234{
7235  /* V1DF is rarely used by other patterns, so it should be better to hide
7236     it in a subreg destination of a normal DF op.  */
7237  rtx scalar0 = gen_lowpart (DFmode, operands[0]);
7238  emit_insn (gen_vec_extractv2dfdf (scalar0, operands[1], operands[2]));
7239  DONE;
7240})
7241
7242;; aes
7243
7244(define_insn "aarch64_crypto_aes<aes_op>v16qi"
7245  [(set (match_operand:V16QI 0 "register_operand" "=w")
7246	(unspec:V16QI
7247		[(xor:V16QI
7248		 (match_operand:V16QI 1 "register_operand" "%0")
7249		 (match_operand:V16QI 2 "register_operand" "w"))]
7250         CRYPTO_AES))]
7251  "TARGET_SIMD && TARGET_AES"
7252  "aes<aes_op>\\t%0.16b, %2.16b"
7253  [(set_attr "type" "crypto_aese")]
7254)
7255
7256(define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
7257  [(set (match_operand:V16QI 0 "register_operand" "=w")
7258	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")]
7259	 CRYPTO_AESMC))]
7260  "TARGET_SIMD && TARGET_AES"
7261  "aes<aesmc_op>\\t%0.16b, %1.16b"
7262  [(set_attr "type" "crypto_aesmc")]
7263)
7264
7265;; When AESE/AESMC fusion is enabled we really want to keep the two together
7266;; and enforce the register dependency without scheduling or register
7267;; allocation messing up the order or introducing moves inbetween.
7268;;  Mash the two together during combine.
7269
7270(define_insn "*aarch64_crypto_aese_fused"
7271  [(set (match_operand:V16QI 0 "register_operand" "=w")
7272	(unspec:V16QI
7273	  [(unspec:V16QI
7274	   [(xor:V16QI
7275		(match_operand:V16QI 1 "register_operand" "%0")
7276		(match_operand:V16QI 2 "register_operand" "w"))]
7277	     UNSPEC_AESE)]
7278	UNSPEC_AESMC))]
7279  "TARGET_SIMD && TARGET_AES
7280   && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
7281  "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
7282  [(set_attr "type" "crypto_aese")
7283   (set_attr "length" "8")]
7284)
7285
7286;; When AESD/AESIMC fusion is enabled we really want to keep the two together
7287;; and enforce the register dependency without scheduling or register
7288;; allocation messing up the order or introducing moves inbetween.
7289;;  Mash the two together during combine.
7290
7291(define_insn "*aarch64_crypto_aesd_fused"
7292  [(set (match_operand:V16QI 0 "register_operand" "=w")
7293	(unspec:V16QI
7294	  [(unspec:V16QI
7295		    [(xor:V16QI
7296			(match_operand:V16QI 1 "register_operand" "%0")
7297			(match_operand:V16QI 2 "register_operand" "w"))]
7298		UNSPEC_AESD)]
7299	  UNSPEC_AESIMC))]
7300  "TARGET_SIMD && TARGET_AES
7301   && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
7302  "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
7303  [(set_attr "type" "crypto_aese")
7304   (set_attr "length" "8")]
7305)
7306
7307;; sha1
7308
7309(define_insn "aarch64_crypto_sha1hsi"
7310  [(set (match_operand:SI 0 "register_operand" "=w")
7311        (unspec:SI [(match_operand:SI 1
7312                       "register_operand" "w")]
7313         UNSPEC_SHA1H))]
7314  "TARGET_SIMD && TARGET_SHA2"
7315  "sha1h\\t%s0, %s1"
7316  [(set_attr "type" "crypto_sha1_fast")]
7317)
7318
7319(define_insn "aarch64_crypto_sha1hv4si"
7320  [(set (match_operand:SI 0 "register_operand" "=w")
7321	(unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
7322		     (parallel [(const_int 0)]))]
7323	 UNSPEC_SHA1H))]
7324  "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
7325  "sha1h\\t%s0, %s1"
7326  [(set_attr "type" "crypto_sha1_fast")]
7327)
7328
7329(define_insn "aarch64_be_crypto_sha1hv4si"
7330  [(set (match_operand:SI 0 "register_operand" "=w")
7331	(unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
7332		     (parallel [(const_int 3)]))]
7333	 UNSPEC_SHA1H))]
7334  "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
7335  "sha1h\\t%s0, %s1"
7336  [(set_attr "type" "crypto_sha1_fast")]
7337)
7338
7339(define_insn "aarch64_crypto_sha1su1v4si"
7340  [(set (match_operand:V4SI 0 "register_operand" "=w")
7341        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
7342                      (match_operand:V4SI 2 "register_operand" "w")]
7343         UNSPEC_SHA1SU1))]
7344  "TARGET_SIMD && TARGET_SHA2"
7345  "sha1su1\\t%0.4s, %2.4s"
7346  [(set_attr "type" "crypto_sha1_fast")]
7347)
7348
7349(define_insn "aarch64_crypto_sha1<sha1_op>v4si"
7350  [(set (match_operand:V4SI 0 "register_operand" "=w")
7351        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
7352                      (match_operand:SI 2 "register_operand" "w")
7353                      (match_operand:V4SI 3 "register_operand" "w")]
7354         CRYPTO_SHA1))]
7355  "TARGET_SIMD && TARGET_SHA2"
7356  "sha1<sha1_op>\\t%q0, %s2, %3.4s"
7357  [(set_attr "type" "crypto_sha1_slow")]
7358)
7359
7360(define_insn "aarch64_crypto_sha1su0v4si"
7361  [(set (match_operand:V4SI 0 "register_operand" "=w")
7362        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
7363                      (match_operand:V4SI 2 "register_operand" "w")
7364                      (match_operand:V4SI 3 "register_operand" "w")]
7365         UNSPEC_SHA1SU0))]
7366  "TARGET_SIMD && TARGET_SHA2"
7367  "sha1su0\\t%0.4s, %2.4s, %3.4s"
7368  [(set_attr "type" "crypto_sha1_xor")]
7369)
7370
7371;; sha256
7372
7373(define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
7374  [(set (match_operand:V4SI 0 "register_operand" "=w")
7375        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
7376                      (match_operand:V4SI 2 "register_operand" "w")
7377                      (match_operand:V4SI 3 "register_operand" "w")]
7378         CRYPTO_SHA256))]
7379  "TARGET_SIMD && TARGET_SHA2"
7380  "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
7381  [(set_attr "type" "crypto_sha256_slow")]
7382)
7383
7384(define_insn "aarch64_crypto_sha256su0v4si"
7385  [(set (match_operand:V4SI 0 "register_operand" "=w")
7386        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
7387                      (match_operand:V4SI 2 "register_operand" "w")]
7388         UNSPEC_SHA256SU0))]
7389  "TARGET_SIMD && TARGET_SHA2"
7390  "sha256su0\\t%0.4s, %2.4s"
7391  [(set_attr "type" "crypto_sha256_fast")]
7392)
7393
7394(define_insn "aarch64_crypto_sha256su1v4si"
7395  [(set (match_operand:V4SI 0 "register_operand" "=w")
7396        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
7397                      (match_operand:V4SI 2 "register_operand" "w")
7398                      (match_operand:V4SI 3 "register_operand" "w")]
7399         UNSPEC_SHA256SU1))]
7400  "TARGET_SIMD && TARGET_SHA2"
7401  "sha256su1\\t%0.4s, %2.4s, %3.4s"
7402  [(set_attr "type" "crypto_sha256_slow")]
7403)
7404
7405;; sha512
7406
7407(define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
7408  [(set (match_operand:V2DI 0 "register_operand" "=w")
7409        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
7410                      (match_operand:V2DI 2 "register_operand" "w")
7411                      (match_operand:V2DI 3 "register_operand" "w")]
7412         CRYPTO_SHA512))]
7413  "TARGET_SIMD && TARGET_SHA3"
7414  "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
7415  [(set_attr "type" "crypto_sha512")]
7416)
7417
7418(define_insn "aarch64_crypto_sha512su0qv2di"
7419  [(set (match_operand:V2DI 0 "register_operand" "=w")
7420        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
7421                      (match_operand:V2DI 2 "register_operand" "w")]
7422         UNSPEC_SHA512SU0))]
7423  "TARGET_SIMD && TARGET_SHA3"
7424  "sha512su0\\t%0.2d, %2.2d"
7425  [(set_attr "type" "crypto_sha512")]
7426)
7427
7428(define_insn "aarch64_crypto_sha512su1qv2di"
7429  [(set (match_operand:V2DI 0 "register_operand" "=w")
7430        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
7431                      (match_operand:V2DI 2 "register_operand" "w")
7432                      (match_operand:V2DI 3 "register_operand" "w")]
7433         UNSPEC_SHA512SU1))]
7434  "TARGET_SIMD && TARGET_SHA3"
7435  "sha512su1\\t%0.2d, %2.2d, %3.2d"
7436  [(set_attr "type" "crypto_sha512")]
7437)
7438
7439;; sha3
7440
7441(define_insn "eor3q<mode>4"
7442  [(set (match_operand:VQ_I 0 "register_operand" "=w")
7443	(xor:VQ_I
7444	 (xor:VQ_I
7445	  (match_operand:VQ_I 2 "register_operand" "w")
7446	  (match_operand:VQ_I 3 "register_operand" "w"))
7447	 (match_operand:VQ_I 1 "register_operand" "w")))]
7448  "TARGET_SIMD && TARGET_SHA3"
7449  "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
7450  [(set_attr "type" "crypto_sha3")]
7451)
7452
7453(define_insn "aarch64_rax1qv2di"
7454  [(set (match_operand:V2DI 0 "register_operand" "=w")
7455	(xor:V2DI
7456	 (rotate:V2DI
7457	  (match_operand:V2DI 2 "register_operand" "w")
7458	  (const_int 1))
7459	 (match_operand:V2DI 1 "register_operand" "w")))]
7460  "TARGET_SIMD && TARGET_SHA3"
7461  "rax1\\t%0.2d, %1.2d, %2.2d"
7462  [(set_attr "type" "crypto_sha3")]
7463)
7464
7465(define_insn "aarch64_xarqv2di"
7466  [(set (match_operand:V2DI 0 "register_operand" "=w")
7467	(rotatert:V2DI
7468	 (xor:V2DI
7469	  (match_operand:V2DI 1 "register_operand" "%w")
7470	  (match_operand:V2DI 2 "register_operand" "w"))
7471	 (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
7472  "TARGET_SIMD && TARGET_SHA3"
7473  "xar\\t%0.2d, %1.2d, %2.2d, %3"
7474  [(set_attr "type" "crypto_sha3")]
7475)
7476
7477(define_insn "bcaxq<mode>4"
7478  [(set (match_operand:VQ_I 0 "register_operand" "=w")
7479	(xor:VQ_I
7480	 (and:VQ_I
7481	  (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
7482	  (match_operand:VQ_I 2 "register_operand" "w"))
7483	 (match_operand:VQ_I 1 "register_operand" "w")))]
7484  "TARGET_SIMD && TARGET_SHA3"
7485  "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
7486  [(set_attr "type" "crypto_sha3")]
7487)
7488
7489;; SM3
7490
7491(define_insn "aarch64_sm3ss1qv4si"
7492  [(set (match_operand:V4SI 0 "register_operand" "=w")
7493	(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
7494		      (match_operand:V4SI 2 "register_operand" "w")
7495		      (match_operand:V4SI 3 "register_operand" "w")]
7496	 UNSPEC_SM3SS1))]
7497  "TARGET_SIMD && TARGET_SM4"
7498  "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
7499  [(set_attr "type" "crypto_sm3")]
7500)
7501
7502
7503(define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
7504  [(set (match_operand:V4SI 0 "register_operand" "=w")
7505	(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
7506		      (match_operand:V4SI 2 "register_operand" "w")
7507		      (match_operand:V4SI 3 "register_operand" "w")
7508		      (match_operand:SI 4 "aarch64_imm2" "Ui2")]
7509	 CRYPTO_SM3TT))]
7510  "TARGET_SIMD && TARGET_SM4"
7511  "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
7512  [(set_attr "type" "crypto_sm3")]
7513)
7514
7515(define_insn "aarch64_sm3partw<sm3part_op>qv4si"
7516  [(set (match_operand:V4SI 0 "register_operand" "=w")
7517	(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
7518		      (match_operand:V4SI 2 "register_operand" "w")
7519		      (match_operand:V4SI 3 "register_operand" "w")]
7520	 CRYPTO_SM3PART))]
7521  "TARGET_SIMD && TARGET_SM4"
7522  "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
7523  [(set_attr "type" "crypto_sm3")]
7524)
7525
7526;; SM4
7527
7528(define_insn "aarch64_sm4eqv4si"
7529  [(set (match_operand:V4SI 0 "register_operand" "=w")
7530	(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
7531		      (match_operand:V4SI 2 "register_operand" "w")]
7532	 UNSPEC_SM4E))]
7533  "TARGET_SIMD && TARGET_SM4"
7534  "sm4e\\t%0.4s, %2.4s"
7535  [(set_attr "type" "crypto_sm4")]
7536)
7537
7538(define_insn "aarch64_sm4ekeyqv4si"
7539  [(set (match_operand:V4SI 0 "register_operand" "=w")
7540	(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
7541		      (match_operand:V4SI 2 "register_operand" "w")]
7542	 UNSPEC_SM4EKEY))]
7543  "TARGET_SIMD && TARGET_SM4"
7544  "sm4ekey\\t%0.4s, %1.4s, %2.4s"
7545  [(set_attr "type" "crypto_sm4")]
7546)
7547
7548;; fp16fml
7549
7550(define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
7551  [(set (match_operand:VDQSF 0 "register_operand")
7552	(unspec:VDQSF
7553	 [(match_operand:VDQSF 1 "register_operand")
7554	  (match_operand:<VFMLA_W> 2 "register_operand")
7555	  (match_operand:<VFMLA_W> 3 "register_operand")]
7556	 VFMLA16_LOW))]
7557  "TARGET_F16FML"
7558{
7559  rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
7560					    <nunits> * 2, false);
7561  rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
7562					    <nunits> * 2, false);
7563
7564  emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
7565								operands[1],
7566								operands[2],
7567								operands[3],
7568								p1, p2));
7569  DONE;
7570
7571})
7572
7573(define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
7574  [(set (match_operand:VDQSF 0 "register_operand")
7575	(unspec:VDQSF
7576	 [(match_operand:VDQSF 1 "register_operand")
7577	  (match_operand:<VFMLA_W> 2 "register_operand")
7578	  (match_operand:<VFMLA_W> 3 "register_operand")]
7579	 VFMLA16_HIGH))]
7580  "TARGET_F16FML"
7581{
7582  rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
7583  rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
7584
7585  emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
7586								 operands[1],
7587								 operands[2],
7588								 operands[3],
7589								 p1, p2));
7590  DONE;
7591})
7592
7593(define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
7594  [(set (match_operand:VDQSF 0 "register_operand" "=w")
7595	(fma:VDQSF
7596	 (float_extend:VDQSF
7597	  (vec_select:<VFMLA_SEL_W>
7598	   (match_operand:<VFMLA_W> 2 "register_operand" "w")
7599	   (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
7600	 (float_extend:VDQSF
7601	  (vec_select:<VFMLA_SEL_W>
7602	   (match_operand:<VFMLA_W> 3 "register_operand" "w")
7603	   (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
7604	 (match_operand:VDQSF 1 "register_operand" "0")))]
7605  "TARGET_F16FML"
7606  "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
7607  [(set_attr "type" "neon_fp_mul_s")]
7608)
7609
7610(define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
7611  [(set (match_operand:VDQSF 0 "register_operand" "=w")
7612	(fma:VDQSF
7613	 (float_extend:VDQSF
7614	  (neg:<VFMLA_SEL_W>
7615	   (vec_select:<VFMLA_SEL_W>
7616	    (match_operand:<VFMLA_W> 2 "register_operand" "w")
7617	    (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
7618	 (float_extend:VDQSF
7619	  (vec_select:<VFMLA_SEL_W>
7620	   (match_operand:<VFMLA_W> 3 "register_operand" "w")
7621	   (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
7622	 (match_operand:VDQSF 1 "register_operand" "0")))]
7623  "TARGET_F16FML"
7624  "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
7625  [(set_attr "type" "neon_fp_mul_s")]
7626)
7627
7628(define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
7629  [(set (match_operand:VDQSF 0 "register_operand" "=w")
7630	(fma:VDQSF
7631	 (float_extend:VDQSF
7632	  (vec_select:<VFMLA_SEL_W>
7633	   (match_operand:<VFMLA_W> 2 "register_operand" "w")
7634	   (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
7635	 (float_extend:VDQSF
7636	  (vec_select:<VFMLA_SEL_W>
7637	   (match_operand:<VFMLA_W> 3 "register_operand" "w")
7638	   (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
7639	 (match_operand:VDQSF 1 "register_operand" "0")))]
7640  "TARGET_F16FML"
7641  "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
7642  [(set_attr "type" "neon_fp_mul_s")]
7643)
7644
7645(define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
7646  [(set (match_operand:VDQSF 0 "register_operand" "=w")
7647	(fma:VDQSF
7648	 (float_extend:VDQSF
7649	  (neg:<VFMLA_SEL_W>
7650	   (vec_select:<VFMLA_SEL_W>
7651	    (match_operand:<VFMLA_W> 2 "register_operand" "w")
7652	    (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
7653	 (float_extend:VDQSF
7654	  (vec_select:<VFMLA_SEL_W>
7655	   (match_operand:<VFMLA_W> 3 "register_operand" "w")
7656	   (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
7657	 (match_operand:VDQSF 1 "register_operand" "0")))]
7658  "TARGET_F16FML"
7659  "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
7660  [(set_attr "type" "neon_fp_mul_s")]
7661)
7662
7663(define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
7664  [(set (match_operand:V2SF 0 "register_operand")
7665	(unspec:V2SF [(match_operand:V2SF 1 "register_operand")
7666			   (match_operand:V4HF 2 "register_operand")
7667			   (match_operand:V4HF 3 "register_operand")
7668			   (match_operand:SI 4 "aarch64_imm2")]
7669	 VFMLA16_LOW))]
7670  "TARGET_F16FML"
7671{
7672    rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
7673    rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
7674
7675    emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
7676							    operands[1],
7677							    operands[2],
7678							    operands[3],
7679							    p1, lane));
7680    DONE;
7681}
7682)
7683
7684(define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
7685  [(set (match_operand:V2SF 0 "register_operand")
7686	(unspec:V2SF [(match_operand:V2SF 1 "register_operand")
7687			   (match_operand:V4HF 2 "register_operand")
7688			   (match_operand:V4HF 3 "register_operand")
7689			   (match_operand:SI 4 "aarch64_imm2")]
7690	 VFMLA16_HIGH))]
7691  "TARGET_F16FML"
7692{
7693    rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
7694    rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
7695
7696    emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
7697							     operands[1],
7698							     operands[2],
7699							     operands[3],
7700							     p1, lane));
7701    DONE;
7702})
7703
7704(define_insn "aarch64_simd_fmlal_lane_lowv2sf"
7705  [(set (match_operand:V2SF 0 "register_operand" "=w")
7706	(fma:V2SF
7707	 (float_extend:V2SF
7708	   (vec_select:V2HF
7709	    (match_operand:V4HF 2 "register_operand" "w")
7710	    (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
7711	 (float_extend:V2SF
7712	   (vec_duplicate:V2HF
7713	    (vec_select:HF
7714	     (match_operand:V4HF 3 "register_operand" "x")
7715	     (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
7716	 (match_operand:V2SF 1 "register_operand" "0")))]
7717  "TARGET_F16FML"
7718  "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
7719  [(set_attr "type" "neon_fp_mul_s")]
7720)
7721
7722(define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
7723  [(set (match_operand:V2SF 0 "register_operand" "=w")
7724	(fma:V2SF
7725	 (float_extend:V2SF
7726	  (neg:V2HF
7727	   (vec_select:V2HF
7728	    (match_operand:V4HF 2 "register_operand" "w")
7729	    (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
7730	 (float_extend:V2SF
7731	  (vec_duplicate:V2HF
7732	   (vec_select:HF
7733	    (match_operand:V4HF 3 "register_operand" "x")
7734	    (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
7735	 (match_operand:V2SF 1 "register_operand" "0")))]
7736  "TARGET_F16FML"
7737  "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
7738  [(set_attr "type" "neon_fp_mul_s")]
7739)
7740
7741(define_insn "aarch64_simd_fmlal_lane_highv2sf"
7742  [(set (match_operand:V2SF 0 "register_operand" "=w")
7743	(fma:V2SF
7744	 (float_extend:V2SF
7745	   (vec_select:V2HF
7746	    (match_operand:V4HF 2 "register_operand" "w")
7747	    (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
7748	 (float_extend:V2SF
7749	   (vec_duplicate:V2HF
7750	    (vec_select:HF
7751	     (match_operand:V4HF 3 "register_operand" "x")
7752	     (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
7753	 (match_operand:V2SF 1 "register_operand" "0")))]
7754  "TARGET_F16FML"
7755  "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
7756  [(set_attr "type" "neon_fp_mul_s")]
7757)
7758
7759(define_insn "aarch64_simd_fmlsl_lane_highv2sf"
7760  [(set (match_operand:V2SF 0 "register_operand" "=w")
7761	(fma:V2SF
7762	 (float_extend:V2SF
7763	   (neg:V2HF
7764	    (vec_select:V2HF
7765	     (match_operand:V4HF 2 "register_operand" "w")
7766	     (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
7767	 (float_extend:V2SF
7768	   (vec_duplicate:V2HF
7769	    (vec_select:HF
7770	     (match_operand:V4HF 3 "register_operand" "x")
7771	     (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
7772	 (match_operand:V2SF 1 "register_operand" "0")))]
7773  "TARGET_F16FML"
7774  "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
7775  [(set_attr "type" "neon_fp_mul_s")]
7776)
7777
7778(define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
7779  [(set (match_operand:V4SF 0 "register_operand")
7780	(unspec:V4SF [(match_operand:V4SF 1 "register_operand")
7781			   (match_operand:V8HF 2 "register_operand")
7782			   (match_operand:V8HF 3 "register_operand")
7783			   (match_operand:SI 4 "aarch64_lane_imm3")]
7784	 VFMLA16_LOW))]
7785  "TARGET_F16FML"
7786{
7787    rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
7788    rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
7789
7790    emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
7791							      operands[1],
7792							      operands[2],
7793							      operands[3],
7794							      p1, lane));
7795    DONE;
7796})
7797
7798(define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
7799  [(set (match_operand:V4SF 0 "register_operand")
7800	(unspec:V4SF [(match_operand:V4SF 1 "register_operand")
7801			   (match_operand:V8HF 2 "register_operand")
7802			   (match_operand:V8HF 3 "register_operand")
7803			   (match_operand:SI 4 "aarch64_lane_imm3")]
7804	 VFMLA16_HIGH))]
7805  "TARGET_F16FML"
7806{
7807    rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
7808    rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
7809
7810    emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
7811							       operands[1],
7812							       operands[2],
7813							       operands[3],
7814							       p1, lane));
7815    DONE;
7816})
7817
7818(define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
7819  [(set (match_operand:V4SF 0 "register_operand" "=w")
7820	(fma:V4SF
7821	 (float_extend:V4SF
7822	  (vec_select:V4HF
7823	    (match_operand:V8HF 2 "register_operand" "w")
7824	    (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
7825	 (float_extend:V4SF
7826	  (vec_duplicate:V4HF
7827	   (vec_select:HF
7828	    (match_operand:V8HF 3 "register_operand" "x")
7829	    (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
7830	 (match_operand:V4SF 1 "register_operand" "0")))]
7831  "TARGET_F16FML"
7832  "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
7833  [(set_attr "type" "neon_fp_mul_s")]
7834)
7835
7836(define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
7837  [(set (match_operand:V4SF 0 "register_operand" "=w")
7838	(fma:V4SF
7839	  (float_extend:V4SF
7840	   (neg:V4HF
7841	    (vec_select:V4HF
7842	     (match_operand:V8HF 2 "register_operand" "w")
7843	     (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
7844	 (float_extend:V4SF
7845	  (vec_duplicate:V4HF
7846	   (vec_select:HF
7847	    (match_operand:V8HF 3 "register_operand" "x")
7848	    (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
7849	 (match_operand:V4SF 1 "register_operand" "0")))]
7850  "TARGET_F16FML"
7851  "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
7852  [(set_attr "type" "neon_fp_mul_s")]
7853)
7854
7855(define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
7856  [(set (match_operand:V4SF 0 "register_operand" "=w")
7857	(fma:V4SF
7858	 (float_extend:V4SF
7859	  (vec_select:V4HF
7860	    (match_operand:V8HF 2 "register_operand" "w")
7861	    (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
7862	 (float_extend:V4SF
7863	  (vec_duplicate:V4HF
7864	   (vec_select:HF
7865	    (match_operand:V8HF 3 "register_operand" "x")
7866	    (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
7867	 (match_operand:V4SF 1 "register_operand" "0")))]
7868  "TARGET_F16FML"
7869  "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
7870  [(set_attr "type" "neon_fp_mul_s")]
7871)
7872
7873(define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
7874  [(set (match_operand:V4SF 0 "register_operand" "=w")
7875	(fma:V4SF
7876	 (float_extend:V4SF
7877	  (neg:V4HF
7878	   (vec_select:V4HF
7879	    (match_operand:V8HF 2 "register_operand" "w")
7880	    (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
7881	 (float_extend:V4SF
7882	  (vec_duplicate:V4HF
7883	   (vec_select:HF
7884	    (match_operand:V8HF 3 "register_operand" "x")
7885	    (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
7886	 (match_operand:V4SF 1 "register_operand" "0")))]
7887  "TARGET_F16FML"
7888  "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
7889  [(set_attr "type" "neon_fp_mul_s")]
7890)
7891
7892(define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
7893  [(set (match_operand:V2SF 0 "register_operand")
7894	(unspec:V2SF [(match_operand:V2SF 1 "register_operand")
7895		      (match_operand:V4HF 2 "register_operand")
7896		      (match_operand:V8HF 3 "register_operand")
7897		      (match_operand:SI 4 "aarch64_lane_imm3")]
7898	 VFMLA16_LOW))]
7899  "TARGET_F16FML"
7900{
7901    rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
7902    rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
7903
7904    emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
7905							     operands[1],
7906							     operands[2],
7907							     operands[3],
7908							     p1, lane));
7909    DONE;
7910
7911})
7912
7913(define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
7914  [(set (match_operand:V2SF 0 "register_operand")
7915	(unspec:V2SF [(match_operand:V2SF 1 "register_operand")
7916		      (match_operand:V4HF 2 "register_operand")
7917		      (match_operand:V8HF 3 "register_operand")
7918		      (match_operand:SI 4 "aarch64_lane_imm3")]
7919	 VFMLA16_HIGH))]
7920  "TARGET_F16FML"
7921{
7922    rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
7923    rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
7924
7925    emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
7926							      operands[1],
7927							      operands[2],
7928							      operands[3],
7929							      p1, lane));
7930    DONE;
7931
7932})
7933
7934(define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
7935  [(set (match_operand:V2SF 0 "register_operand" "=w")
7936	(fma:V2SF
7937	 (float_extend:V2SF
7938	   (vec_select:V2HF
7939	    (match_operand:V4HF 2 "register_operand" "w")
7940	    (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
7941	 (float_extend:V2SF
7942	  (vec_duplicate:V2HF
7943	   (vec_select:HF
7944	    (match_operand:V8HF 3 "register_operand" "x")
7945	    (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
7946	 (match_operand:V2SF 1 "register_operand" "0")))]
7947  "TARGET_F16FML"
7948  "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
7949  [(set_attr "type" "neon_fp_mul_s")]
7950)
7951
7952(define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
7953  [(set (match_operand:V2SF 0 "register_operand" "=w")
7954	(fma:V2SF
7955	 (float_extend:V2SF
7956	  (neg:V2HF
7957	   (vec_select:V2HF
7958	    (match_operand:V4HF 2 "register_operand" "w")
7959	    (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
7960	 (float_extend:V2SF
7961	  (vec_duplicate:V2HF
7962	   (vec_select:HF
7963	    (match_operand:V8HF 3 "register_operand" "x")
7964	    (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
7965	 (match_operand:V2SF 1 "register_operand" "0")))]
7966  "TARGET_F16FML"
7967  "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
7968  [(set_attr "type" "neon_fp_mul_s")]
7969)
7970
7971(define_insn "aarch64_simd_fmlal_laneq_highv2sf"
7972  [(set (match_operand:V2SF 0 "register_operand" "=w")
7973	(fma:V2SF
7974	 (float_extend:V2SF
7975	   (vec_select:V2HF
7976	    (match_operand:V4HF 2 "register_operand" "w")
7977	    (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
7978	 (float_extend:V2SF
7979	  (vec_duplicate:V2HF
7980	   (vec_select:HF
7981	    (match_operand:V8HF 3 "register_operand" "x")
7982	    (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
7983	 (match_operand:V2SF 1 "register_operand" "0")))]
7984  "TARGET_F16FML"
7985  "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
7986  [(set_attr "type" "neon_fp_mul_s")]
7987)
7988
7989(define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
7990  [(set (match_operand:V2SF 0 "register_operand" "=w")
7991	(fma:V2SF
7992	 (float_extend:V2SF
7993	  (neg:V2HF
7994	   (vec_select:V2HF
7995	    (match_operand:V4HF 2 "register_operand" "w")
7996	    (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
7997	 (float_extend:V2SF
7998	  (vec_duplicate:V2HF
7999	   (vec_select:HF
8000	    (match_operand:V8HF 3 "register_operand" "x")
8001	    (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
8002	 (match_operand:V2SF 1 "register_operand" "0")))]
8003  "TARGET_F16FML"
8004  "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
8005  [(set_attr "type" "neon_fp_mul_s")]
8006)
8007
8008(define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
8009  [(set (match_operand:V4SF 0 "register_operand")
8010	(unspec:V4SF [(match_operand:V4SF 1 "register_operand")
8011		      (match_operand:V8HF 2 "register_operand")
8012		      (match_operand:V4HF 3 "register_operand")
8013		      (match_operand:SI 4 "aarch64_imm2")]
8014	 VFMLA16_LOW))]
8015  "TARGET_F16FML"
8016{
8017    rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
8018    rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
8019
8020    emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
8021							     operands[1],
8022							     operands[2],
8023							     operands[3],
8024							     p1, lane));
8025    DONE;
8026})
8027
8028(define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
8029  [(set (match_operand:V4SF 0 "register_operand")
8030	(unspec:V4SF [(match_operand:V4SF 1 "register_operand")
8031		      (match_operand:V8HF 2 "register_operand")
8032		      (match_operand:V4HF 3 "register_operand")
8033		      (match_operand:SI 4 "aarch64_imm2")]
8034	 VFMLA16_HIGH))]
8035  "TARGET_F16FML"
8036{
8037    rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
8038    rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
8039
8040    emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
8041							      operands[1],
8042							      operands[2],
8043							      operands[3],
8044							      p1, lane));
8045    DONE;
8046})
8047
8048(define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
8049  [(set (match_operand:V4SF 0 "register_operand" "=w")
8050	(fma:V4SF
8051	 (float_extend:V4SF
8052	  (vec_select:V4HF
8053	   (match_operand:V8HF 2 "register_operand" "w")
8054	   (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
8055	 (float_extend:V4SF
8056	  (vec_duplicate:V4HF
8057	   (vec_select:HF
8058	    (match_operand:V4HF 3 "register_operand" "x")
8059	    (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
8060	 (match_operand:V4SF 1 "register_operand" "0")))]
8061  "TARGET_F16FML"
8062  "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
8063  [(set_attr "type" "neon_fp_mul_s")]
8064)
8065
8066(define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
8067  [(set (match_operand:V4SF 0 "register_operand" "=w")
8068	(fma:V4SF
8069	 (float_extend:V4SF
8070	  (neg:V4HF
8071	   (vec_select:V4HF
8072	    (match_operand:V8HF 2 "register_operand" "w")
8073	    (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
8074	 (float_extend:V4SF
8075	  (vec_duplicate:V4HF
8076	   (vec_select:HF
8077	    (match_operand:V4HF 3 "register_operand" "x")
8078	    (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
8079	 (match_operand:V4SF 1 "register_operand" "0")))]
8080  "TARGET_F16FML"
8081  "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
8082  [(set_attr "type" "neon_fp_mul_s")]
8083)
8084
8085(define_insn "aarch64_simd_fmlalq_lane_highv4sf"
8086  [(set (match_operand:V4SF 0 "register_operand" "=w")
8087	(fma:V4SF
8088	 (float_extend:V4SF
8089	  (vec_select:V4HF
8090	   (match_operand:V8HF 2 "register_operand" "w")
8091	   (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
8092	 (float_extend:V4SF
8093	  (vec_duplicate:V4HF
8094	   (vec_select:HF
8095	    (match_operand:V4HF 3 "register_operand" "x")
8096	    (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
8097	 (match_operand:V4SF 1 "register_operand" "0")))]
8098  "TARGET_F16FML"
8099  "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
8100  [(set_attr "type" "neon_fp_mul_s")]
8101)
8102
8103(define_insn "aarch64_simd_fmlslq_lane_highv4sf"
8104  [(set (match_operand:V4SF 0 "register_operand" "=w")
8105	(fma:V4SF
8106	 (float_extend:V4SF
8107	  (neg:V4HF
8108	   (vec_select:V4HF
8109	    (match_operand:V8HF 2 "register_operand" "w")
8110	    (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
8111	 (float_extend:V4SF
8112	  (vec_duplicate:V4HF
8113	   (vec_select:HF
8114	    (match_operand:V4HF 3 "register_operand" "x")
8115	    (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
8116	 (match_operand:V4SF 1 "register_operand" "0")))]
8117  "TARGET_F16FML"
8118  "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
8119  [(set_attr "type" "neon_fp_mul_s")]
8120)
8121
8122;; pmull
8123
8124(define_insn "aarch64_crypto_pmulldi"
8125  [(set (match_operand:TI 0 "register_operand" "=w")
8126        (unspec:TI  [(match_operand:DI 1 "register_operand" "w")
8127		     (match_operand:DI 2 "register_operand" "w")]
8128		    UNSPEC_PMULL))]
8129 "TARGET_SIMD && TARGET_AES"
8130 "pmull\\t%0.1q, %1.1d, %2.1d"
8131  [(set_attr "type" "crypto_pmull")]
8132)
8133
8134(define_insn "aarch64_crypto_pmullv2di"
8135 [(set (match_operand:TI 0 "register_operand" "=w")
8136       (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
8137		   (match_operand:V2DI 2 "register_operand" "w")]
8138		  UNSPEC_PMULL2))]
8139  "TARGET_SIMD && TARGET_AES"
8140  "pmull2\\t%0.1q, %1.2d, %2.2d"
8141  [(set_attr "type" "crypto_pmull")]
8142)
8143
8144;; Sign- or zero-extend a 64-bit integer vector to a 128-bit vector.
8145(define_insn "<optab><Vnarrowq><mode>2"
8146  [(set (match_operand:VQN 0 "register_operand" "=w")
8147	(ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
8148  "TARGET_SIMD"
8149  "<su>xtl\t%0.<Vtype>, %1.<Vntype>"
8150  [(set_attr "type" "neon_shift_imm_long")]
8151)
8152
8153(define_expand "aarch64_<su>xtl<mode>"
8154  [(set (match_operand:VQN 0 "register_operand" "=w")
8155	(ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
8156  "TARGET_SIMD"
8157  ""
8158)
8159
8160(define_expand "aarch64_xtn<mode>"
8161  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
8162	(truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
8163  "TARGET_SIMD"
8164  ""
8165)
8166
8167;; Truncate a 128-bit integer vector to a 64-bit vector.
8168(define_insn "trunc<mode><Vnarrowq>2"
8169  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
8170	(truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
8171  "TARGET_SIMD"
8172  "xtn\t%0.<Vntype>, %1.<Vtype>"
8173  [(set_attr "type" "neon_shift_imm_narrow_q")]
8174)
8175
8176(define_insn "aarch64_xtn2<mode>_le"
8177  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
8178	(vec_concat:<VNARROWQ2>
8179	  (match_operand:<VNARROWQ> 1 "register_operand" "0")
8180	  (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
8181  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
8182  "xtn2\t%0.<V2ntype>, %2.<Vtype>"
8183  [(set_attr "type" "neon_shift_imm_narrow_q")]
8184)
8185
8186(define_insn "aarch64_xtn2<mode>_be"
8187  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
8188	(vec_concat:<VNARROWQ2>
8189	  (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))
8190	  (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
8191  "TARGET_SIMD && BYTES_BIG_ENDIAN"
8192  "xtn2\t%0.<V2ntype>, %2.<Vtype>"
8193  [(set_attr "type" "neon_shift_imm_narrow_q")]
8194)
8195
8196(define_expand "aarch64_xtn2<mode>"
8197  [(match_operand:<VNARROWQ2> 0 "register_operand")
8198   (match_operand:<VNARROWQ> 1 "register_operand")
8199   (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))]
8200  "TARGET_SIMD"
8201  {
8202    if (BYTES_BIG_ENDIAN)
8203      emit_insn (gen_aarch64_xtn2<mode>_be (operands[0], operands[1],
8204					     operands[2]));
8205    else
8206      emit_insn (gen_aarch64_xtn2<mode>_le (operands[0], operands[1],
8207					     operands[2]));
8208    DONE;
8209  }
8210)
8211
8212(define_insn "aarch64_bfdot<mode>"
8213  [(set (match_operand:VDQSF 0 "register_operand" "=w")
8214	(plus:VDQSF
8215	  (unspec:VDQSF
8216	   [(match_operand:<VBFMLA_W> 2 "register_operand" "w")
8217	    (match_operand:<VBFMLA_W> 3 "register_operand" "w")]
8218	    UNSPEC_BFDOT)
8219	  (match_operand:VDQSF 1 "register_operand" "0")))]
8220  "TARGET_BF16_SIMD"
8221  "bfdot\t%0.<Vtype>, %2.<Vbfdottype>, %3.<Vbfdottype>"
8222  [(set_attr "type" "neon_dot<q>")]
8223)
8224
8225(define_insn "aarch64_bfdot_lane<VBF:isquadop><VDQSF:mode>"
8226  [(set (match_operand:VDQSF 0 "register_operand" "=w")
8227	(plus:VDQSF
8228	  (unspec:VDQSF
8229	   [(match_operand:<VDQSF:VBFMLA_W> 2 "register_operand" "w")
8230	    (match_operand:VBF 3 "register_operand" "w")
8231	    (match_operand:SI 4 "const_int_operand" "n")]
8232	    UNSPEC_BFDOT)
8233	  (match_operand:VDQSF 1 "register_operand" "0")))]
8234  "TARGET_BF16_SIMD"
8235{
8236  int nunits = GET_MODE_NUNITS (<VBF:MODE>mode).to_constant ();
8237  int lane = INTVAL (operands[4]);
8238  operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 2, lane), SImode);
8239  return "bfdot\t%0.<VDQSF:Vtype>, %2.<VDQSF:Vbfdottype>, %3.2h[%4]";
8240}
8241  [(set_attr "type" "neon_dot<VDQSF:q>")]
8242)
8243
8244;; vget_low/high_bf16
8245(define_expand "aarch64_vget_lo_halfv8bf"
8246  [(match_operand:V4BF 0 "register_operand")
8247   (match_operand:V8BF 1 "register_operand")]
8248  "TARGET_BF16_SIMD"
8249{
8250  rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, false);
8251  emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p));
8252  DONE;
8253})
8254
8255(define_expand "aarch64_vget_hi_halfv8bf"
8256  [(match_operand:V4BF 0 "register_operand")
8257   (match_operand:V8BF 1 "register_operand")]
8258  "TARGET_BF16_SIMD"
8259{
8260  rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, true);
8261  emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p));
8262  DONE;
8263})
8264
8265;; bfmmla
8266(define_insn "aarch64_bfmmlaqv4sf"
8267  [(set (match_operand:V4SF 0 "register_operand" "=w")
8268        (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
8269                   (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
8270                                 (match_operand:V8BF 3 "register_operand" "w")]
8271                    UNSPEC_BFMMLA)))]
8272  "TARGET_BF16_SIMD"
8273  "bfmmla\\t%0.4s, %2.8h, %3.8h"
8274  [(set_attr "type" "neon_fp_mla_s_q")]
8275)
8276
8277;; bfmlal<bt>
8278(define_insn "aarch64_bfmlal<bt>v4sf"
8279  [(set (match_operand:V4SF 0 "register_operand" "=w")
8280        (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
8281                    (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
8282                                  (match_operand:V8BF 3 "register_operand" "w")]
8283                     BF_MLA)))]
8284  "TARGET_BF16_SIMD"
8285  "bfmlal<bt>\\t%0.4s, %2.8h, %3.8h"
8286  [(set_attr "type" "neon_fp_mla_s_q")]
8287)
8288
8289(define_insn "aarch64_bfmlal<bt>_lane<q>v4sf"
8290  [(set (match_operand:V4SF 0 "register_operand" "=w")
8291        (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
8292                    (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
8293                                  (match_operand:VBF 3 "register_operand" "w")
8294                                  (match_operand:SI 4 "const_int_operand" "n")]
8295                     BF_MLA)))]
8296  "TARGET_BF16_SIMD"
8297{
8298  operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
8299  return "bfmlal<bt>\\t%0.4s, %2.8h, %3.h[%4]";
8300}
8301  [(set_attr "type" "neon_fp_mla_s_scalar_q")]
8302)
8303
8304;; 8-bit integer matrix multiply-accumulate
8305(define_insn "aarch64_simd_<sur>mmlav16qi"
8306  [(set (match_operand:V4SI 0 "register_operand" "=w")
8307	(plus:V4SI
8308	 (unspec:V4SI [(match_operand:V16QI 2 "register_operand" "w")
8309		       (match_operand:V16QI 3 "register_operand" "w")] MATMUL)
8310	 (match_operand:V4SI 1 "register_operand" "0")))]
8311  "TARGET_I8MM"
8312  "<sur>mmla\\t%0.4s, %2.16b, %3.16b"
8313  [(set_attr "type" "neon_mla_s_q")]
8314)
8315
8316;; bfcvtn
8317(define_insn "aarch64_bfcvtn<q><mode>"
8318  [(set (match_operand:V4SF_TO_BF 0 "register_operand" "=w")
8319        (unspec:V4SF_TO_BF [(match_operand:V4SF 1 "register_operand" "w")]
8320                            UNSPEC_BFCVTN))]
8321  "TARGET_BF16_SIMD"
8322  "bfcvtn\\t%0.4h, %1.4s"
8323  [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
8324)
8325
8326(define_insn "aarch64_bfcvtn2v8bf"
8327  [(set (match_operand:V8BF 0 "register_operand" "=w")
8328        (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "0")
8329                      (match_operand:V4SF 2 "register_operand" "w")]
8330                      UNSPEC_BFCVTN2))]
8331  "TARGET_BF16_SIMD"
8332  "bfcvtn2\\t%0.8h, %2.4s"
8333  [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
8334)
8335
8336(define_insn "aarch64_bfcvtbf"
8337  [(set (match_operand:BF 0 "register_operand" "=w")
8338        (unspec:BF [(match_operand:SF 1 "register_operand" "w")]
8339                    UNSPEC_BFCVT))]
8340  "TARGET_BF16_FP"
8341  "bfcvt\\t%h0, %s1"
8342  [(set_attr "type" "f_cvt")]
8343)
8344
8345;; Use shl/shll/shll2 to convert BF scalar/vector modes to SF modes.
8346(define_insn "aarch64_vbfcvt<mode>"
8347  [(set (match_operand:V4SF 0 "register_operand" "=w")
8348	(unspec:V4SF [(match_operand:VBF 1 "register_operand" "w")]
8349		      UNSPEC_BFCVTN))]
8350  "TARGET_BF16_SIMD"
8351  "shll\\t%0.4s, %1.4h, #16"
8352  [(set_attr "type" "neon_shift_imm_long")]
8353)
8354
8355(define_insn "aarch64_vbfcvt_highv8bf"
8356  [(set (match_operand:V4SF 0 "register_operand" "=w")
8357	(unspec:V4SF [(match_operand:V8BF 1 "register_operand" "w")]
8358		      UNSPEC_BFCVTN2))]
8359  "TARGET_BF16_SIMD"
8360  "shll2\\t%0.4s, %1.8h, #16"
8361  [(set_attr "type" "neon_shift_imm_long")]
8362)
8363
8364(define_insn "aarch64_bfcvtsf"
8365  [(set (match_operand:SF 0 "register_operand" "=w")
8366	(unspec:SF [(match_operand:BF 1 "register_operand" "w")]
8367		    UNSPEC_BFCVT))]
8368  "TARGET_BF16_FP"
8369  "shl\\t%d0, %d1, #16"
8370  [(set_attr "type" "neon_shift_imm")]
8371)
8372