1;; Machine description for AArch64 AdvSIMD architecture.
2;; Copyright (C) 2011-2018 Free Software Foundation, Inc.
3;; Contributed by ARM Ltd.
4;;
5;; This file is part of GCC.
6;;
7;; GCC is free software; you can redistribute it and/or modify it
8;; under the terms of the GNU General Public License as published by
9;; the Free Software Foundation; either version 3, or (at your option)
10;; any later version.
11;;
12;; GCC is distributed in the hope that it will be useful, but
13;; WITHOUT ANY WARRANTY; without even the implied warranty of
14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15;; General Public License for more details.
16;;
17;; You should have received a copy of the GNU General Public License
18;; along with GCC; see the file COPYING3.  If not see
19;; <http://www.gnu.org/licenses/>.
20
21(define_expand "mov<mode>"
22  [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
23	(match_operand:VALL_F16 1 "general_operand" ""))]
24  "TARGET_SIMD"
25  "
26  /* Force the operand into a register if it is not an
27     immediate whose use can be replaced with xzr.
28     If the mode is 16 bytes wide, then we will be doing
29     a stp in DI mode, so we check the validity of that.
30     If the mode is 8 bytes wide, then we will do doing a
31     normal str, so the check need not apply.  */
32  if (GET_CODE (operands[0]) == MEM
33      && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
34	   && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
35		&& aarch64_mem_pair_operand (operands[0], DImode))
36	       || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
37      operands[1] = force_reg (<MODE>mode, operands[1]);
38  "
39)
40
41(define_expand "movmisalign<mode>"
42  [(set (match_operand:VALL 0 "nonimmediate_operand" "")
43        (match_operand:VALL 1 "general_operand" ""))]
44  "TARGET_SIMD"
45{
46  /* This pattern is not permitted to fail during expansion: if both arguments
47     are non-registers (e.g. memory := constant, which can be created by the
48     auto-vectorizer), force operand 1 into a register.  */
49  if (!register_operand (operands[0], <MODE>mode)
50      && !register_operand (operands[1], <MODE>mode))
51    operands[1] = force_reg (<MODE>mode, operands[1]);
52})
53
54(define_insn "aarch64_simd_dup<mode>"
55  [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
56	(vec_duplicate:VDQ_I
57	  (match_operand:<VEL> 1 "register_operand" "w,?r")))]
58  "TARGET_SIMD"
59  "@
60   dup\\t%0.<Vtype>, %1.<Vetype>[0]
61   dup\\t%0.<Vtype>, %<vw>1"
62  [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
63)
64
65(define_insn "aarch64_simd_dup<mode>"
66  [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
67	(vec_duplicate:VDQF_F16
68	  (match_operand:<VEL> 1 "register_operand" "w")))]
69  "TARGET_SIMD"
70  "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
71  [(set_attr "type" "neon_dup<q>")]
72)
73
74(define_insn "aarch64_dup_lane<mode>"
75  [(set (match_operand:VALL_F16 0 "register_operand" "=w")
76	(vec_duplicate:VALL_F16
77	  (vec_select:<VEL>
78	    (match_operand:VALL_F16 1 "register_operand" "w")
79	    (parallel [(match_operand:SI 2 "immediate_operand" "i")])
80          )))]
81  "TARGET_SIMD"
82  {
83    operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
84    return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
85  }
86  [(set_attr "type" "neon_dup<q>")]
87)
88
89(define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
90  [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
91	(vec_duplicate:VALL_F16_NO_V2Q
92	  (vec_select:<VEL>
93	    (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
94	    (parallel [(match_operand:SI 2 "immediate_operand" "i")])
95          )))]
96  "TARGET_SIMD"
97  {
98    operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
99    return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
100  }
101  [(set_attr "type" "neon_dup<q>")]
102)
103
104(define_insn "*aarch64_simd_mov<VD:mode>"
105  [(set (match_operand:VD 0 "nonimmediate_operand"
106		"=w, m,  m,  w, ?r, ?w, ?r, w")
107	(match_operand:VD 1 "general_operand"
108		"m,  Dz, w,  w,  w,  r,  r, Dn"))]
109  "TARGET_SIMD
110   && (register_operand (operands[0], <MODE>mode)
111       || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
112{
113   switch (which_alternative)
114     {
115     case 0: return "ldr\t%d0, %1";
116     case 1: return "str\txzr, %0";
117     case 2: return "str\t%d1, %0";
118     case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
119     case 4: return "umov\t%0, %1.d[0]";
120     case 5: return "fmov\t%d0, %1";
121     case 6: return "mov\t%0, %1";
122     case 7:
123	return aarch64_output_simd_mov_immediate (operands[1], 64);
124     default: gcc_unreachable ();
125     }
126}
127  [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
128		     neon_logic<q>, neon_to_gp<q>, f_mcr,\
129		     mov_reg, neon_move<q>")]
130)
131
132(define_insn "*aarch64_simd_mov<VQ:mode>"
133  [(set (match_operand:VQ 0 "nonimmediate_operand"
134		"=w, Umq,  m,  w, ?r, ?w, ?r, w")
135	(match_operand:VQ 1 "general_operand"
136		"m,  Dz, w,  w,  w,  r,  r, Dn"))]
137  "TARGET_SIMD
138   && (register_operand (operands[0], <MODE>mode)
139       || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
140{
141  switch (which_alternative)
142    {
143    case 0:
144	return "ldr\t%q0, %1";
145    case 1:
146	return "stp\txzr, xzr, %0";
147    case 2:
148	return "str\t%q1, %0";
149    case 3:
150	return "mov\t%0.<Vbtype>, %1.<Vbtype>";
151    case 4:
152    case 5:
153    case 6:
154	return "#";
155    case 7:
156	return aarch64_output_simd_mov_immediate (operands[1], 128);
157    default:
158	gcc_unreachable ();
159    }
160}
161  [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
162		     neon_logic<q>, multiple, multiple,\
163		     multiple, neon_move<q>")
164   (set_attr "length" "4,4,4,4,8,8,8,4")]
165)
166
167;; When storing lane zero we can use the normal STR and its more permissive
168;; addressing modes.
169
170(define_insn "aarch64_store_lane0<mode>"
171  [(set (match_operand:<VEL> 0 "memory_operand" "=m")
172	(vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
173			(parallel [(match_operand 2 "const_int_operand" "n")])))]
174  "TARGET_SIMD
175   && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
176  "str\\t%<Vetype>1, %0"
177  [(set_attr "type" "neon_store1_1reg<q>")]
178)
179
180(define_insn "load_pair<mode>"
181  [(set (match_operand:VD 0 "register_operand" "=w")
182	(match_operand:VD 1 "aarch64_mem_pair_operand" "Ump"))
183   (set (match_operand:VD 2 "register_operand" "=w")
184	(match_operand:VD 3 "memory_operand" "m"))]
185  "TARGET_SIMD
186   && rtx_equal_p (XEXP (operands[3], 0),
187		   plus_constant (Pmode,
188				  XEXP (operands[1], 0),
189				  GET_MODE_SIZE (<MODE>mode)))"
190  "ldp\\t%d0, %d2, %1"
191  [(set_attr "type" "neon_ldp")]
192)
193
194(define_insn "store_pair<mode>"
195  [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump")
196	(match_operand:VD 1 "register_operand" "w"))
197   (set (match_operand:VD 2 "memory_operand" "=m")
198	(match_operand:VD 3 "register_operand" "w"))]
199  "TARGET_SIMD
200   && rtx_equal_p (XEXP (operands[2], 0),
201		   plus_constant (Pmode,
202				  XEXP (operands[0], 0),
203				  GET_MODE_SIZE (<MODE>mode)))"
204  "stp\\t%d1, %d3, %0"
205  [(set_attr "type" "neon_stp")]
206)
207
208(define_split
209  [(set (match_operand:VQ 0 "register_operand" "")
210      (match_operand:VQ 1 "register_operand" ""))]
211  "TARGET_SIMD && reload_completed
212   && GP_REGNUM_P (REGNO (operands[0]))
213   && GP_REGNUM_P (REGNO (operands[1]))"
214  [(const_int 0)]
215{
216  aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
217  DONE;
218})
219
220(define_split
221  [(set (match_operand:VQ 0 "register_operand" "")
222        (match_operand:VQ 1 "register_operand" ""))]
223  "TARGET_SIMD && reload_completed
224   && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
225       || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
226  [(const_int 0)]
227{
228  aarch64_split_simd_move (operands[0], operands[1]);
229  DONE;
230})
231
232(define_expand "aarch64_split_simd_mov<mode>"
233  [(set (match_operand:VQ 0)
234        (match_operand:VQ 1))]
235  "TARGET_SIMD"
236  {
237    rtx dst = operands[0];
238    rtx src = operands[1];
239
240    if (GP_REGNUM_P (REGNO (src)))
241      {
242        rtx src_low_part = gen_lowpart (<VHALF>mode, src);
243        rtx src_high_part = gen_highpart (<VHALF>mode, src);
244
245        emit_insn
246          (gen_move_lo_quad_<mode> (dst, src_low_part));
247        emit_insn
248          (gen_move_hi_quad_<mode> (dst, src_high_part));
249      }
250
251    else
252      {
253        rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
254        rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
255	rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
256	rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
257
258        emit_insn
259          (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
260        emit_insn
261          (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
262      }
263    DONE;
264  }
265)
266
267(define_insn "aarch64_simd_mov_from_<mode>low"
268  [(set (match_operand:<VHALF> 0 "register_operand" "=r")
269        (vec_select:<VHALF>
270          (match_operand:VQ 1 "register_operand" "w")
271          (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
272  "TARGET_SIMD && reload_completed"
273  "umov\t%0, %1.d[0]"
274  [(set_attr "type" "neon_to_gp<q>")
275   (set_attr "length" "4")
276  ])
277
278(define_insn "aarch64_simd_mov_from_<mode>high"
279  [(set (match_operand:<VHALF> 0 "register_operand" "=r")
280        (vec_select:<VHALF>
281          (match_operand:VQ 1 "register_operand" "w")
282          (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
283  "TARGET_SIMD && reload_completed"
284  "umov\t%0, %1.d[1]"
285  [(set_attr "type" "neon_to_gp<q>")
286   (set_attr "length" "4")
287  ])
288
289(define_insn "orn<mode>3"
290 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
291       (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
292		(match_operand:VDQ_I 2 "register_operand" "w")))]
293 "TARGET_SIMD"
294 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
295  [(set_attr "type" "neon_logic<q>")]
296)
297
298(define_insn "bic<mode>3"
299 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
300       (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
301		(match_operand:VDQ_I 2 "register_operand" "w")))]
302 "TARGET_SIMD"
303 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
304  [(set_attr "type" "neon_logic<q>")]
305)
306
307(define_insn "add<mode>3"
308  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
309        (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
310		  (match_operand:VDQ_I 2 "register_operand" "w")))]
311  "TARGET_SIMD"
312  "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
313  [(set_attr "type" "neon_add<q>")]
314)
315
316(define_insn "sub<mode>3"
317  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
318        (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
319		   (match_operand:VDQ_I 2 "register_operand" "w")))]
320  "TARGET_SIMD"
321  "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
322  [(set_attr "type" "neon_sub<q>")]
323)
324
325(define_insn "mul<mode>3"
326  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
327        (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
328		   (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
329  "TARGET_SIMD"
330  "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
331  [(set_attr "type" "neon_mul_<Vetype><q>")]
332)
333
334(define_insn "bswap<mode>2"
335  [(set (match_operand:VDQHSD 0 "register_operand" "=w")
336        (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
337  "TARGET_SIMD"
338  "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
339  [(set_attr "type" "neon_rev<q>")]
340)
341
342(define_insn "aarch64_rbit<mode>"
343  [(set (match_operand:VB 0 "register_operand" "=w")
344	(unspec:VB [(match_operand:VB 1 "register_operand" "w")]
345		   UNSPEC_RBIT))]
346  "TARGET_SIMD"
347  "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
348  [(set_attr "type" "neon_rbit")]
349)
350
351(define_expand "ctz<mode>2"
352  [(set (match_operand:VS 0 "register_operand")
353        (ctz:VS (match_operand:VS 1 "register_operand")))]
354  "TARGET_SIMD"
355  {
356     emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
357     rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
358					     <MODE>mode, 0);
359     emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
360     emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
361     DONE;
362  }
363)
364
365(define_expand "xorsign<mode>3"
366  [(match_operand:VHSDF 0 "register_operand")
367   (match_operand:VHSDF 1 "register_operand")
368   (match_operand:VHSDF 2 "register_operand")]
369  "TARGET_SIMD"
370{
371
372  machine_mode imode = <V_INT_EQUIV>mode;
373  rtx v_bitmask = gen_reg_rtx (imode);
374  rtx op1x = gen_reg_rtx (imode);
375  rtx op2x = gen_reg_rtx (imode);
376
377  rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
378  rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
379
380  int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
381
382  emit_move_insn (v_bitmask,
383		  aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
384						     HOST_WIDE_INT_M1U << bits));
385
386  emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
387  emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
388  emit_move_insn (operands[0],
389		  lowpart_subreg (<MODE>mode, op1x, imode));
390  DONE;
391}
392)
393
394;; These instructions map to the __builtins for the Dot Product operations.
395(define_insn "aarch64_<sur>dot<vsi2qi>"
396  [(set (match_operand:VS 0 "register_operand" "=w")
397	(plus:VS (match_operand:VS 1 "register_operand" "0")
398		(unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
399			    (match_operand:<VSI2QI> 3 "register_operand" "w")]
400		DOTPROD)))]
401  "TARGET_DOTPROD"
402  "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
403  [(set_attr "type" "neon_dot")]
404)
405
406;; These expands map to the Dot Product optab the vectorizer checks for.
407;; The auto-vectorizer expects a dot product builtin that also does an
408;; accumulation into the provided register.
409;; Given the following pattern
410;;
411;; for (i=0; i<len; i++) {
412;;     c = a[i] * b[i];
413;;     r += c;
414;; }
415;; return result;
416;;
417;; This can be auto-vectorized to
418;; r  = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
419;;
420;; given enough iterations.  However the vectorizer can keep unrolling the loop
421;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
422;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
423;; ...
424;;
425;; and so the vectorizer provides r, in which the result has to be accumulated.
426(define_expand "<sur>dot_prod<vsi2qi>"
427  [(set (match_operand:VS 0 "register_operand")
428	(plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
429			    (match_operand:<VSI2QI> 2 "register_operand")]
430		 DOTPROD)
431		(match_operand:VS 3 "register_operand")))]
432  "TARGET_DOTPROD"
433{
434  emit_insn (
435    gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
436				    operands[2]));
437  emit_insn (gen_rtx_SET (operands[0], operands[3]));
438  DONE;
439})
440
441;; These instructions map to the __builtins for the Dot Product
442;; indexed operations.
443(define_insn "aarch64_<sur>dot_lane<vsi2qi>"
444  [(set (match_operand:VS 0 "register_operand" "=w")
445	(plus:VS (match_operand:VS 1 "register_operand" "0")
446		(unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
447			    (match_operand:V8QI 3 "register_operand" "<h_con>")
448			    (match_operand:SI 4 "immediate_operand" "i")]
449		DOTPROD)))]
450  "TARGET_DOTPROD"
451  {
452    operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
453    return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
454  }
455  [(set_attr "type" "neon_dot")]
456)
457
458(define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
459  [(set (match_operand:VS 0 "register_operand" "=w")
460	(plus:VS (match_operand:VS 1 "register_operand" "0")
461		(unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
462			    (match_operand:V16QI 3 "register_operand" "<h_con>")
463			    (match_operand:SI 4 "immediate_operand" "i")]
464		DOTPROD)))]
465  "TARGET_DOTPROD"
466  {
467    operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
468    return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
469  }
470  [(set_attr "type" "neon_dot")]
471)
472
473(define_expand "copysign<mode>3"
474  [(match_operand:VHSDF 0 "register_operand")
475   (match_operand:VHSDF 1 "register_operand")
476   (match_operand:VHSDF 2 "register_operand")]
477  "TARGET_FLOAT && TARGET_SIMD"
478{
479  rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
480  int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
481
482  emit_move_insn (v_bitmask,
483		  aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
484						     HOST_WIDE_INT_M1U << bits));
485  emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
486					 operands[2], operands[1]));
487  DONE;
488}
489)
490
491(define_insn "*aarch64_mul3_elt<mode>"
492 [(set (match_operand:VMUL 0 "register_operand" "=w")
493    (mult:VMUL
494      (vec_duplicate:VMUL
495	  (vec_select:<VEL>
496	    (match_operand:VMUL 1 "register_operand" "<h_con>")
497	    (parallel [(match_operand:SI 2 "immediate_operand")])))
498      (match_operand:VMUL 3 "register_operand" "w")))]
499  "TARGET_SIMD"
500  {
501    operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
502    return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
503  }
504  [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
505)
506
507(define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
508  [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
509     (mult:VMUL_CHANGE_NLANES
510       (vec_duplicate:VMUL_CHANGE_NLANES
511	  (vec_select:<VEL>
512	    (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
513	    (parallel [(match_operand:SI 2 "immediate_operand")])))
514      (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
515  "TARGET_SIMD"
516  {
517    operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
518    return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
519  }
520  [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
521)
522
523(define_insn "*aarch64_mul3_elt_from_dup<mode>"
524 [(set (match_operand:VMUL 0 "register_operand" "=w")
525    (mult:VMUL
526      (vec_duplicate:VMUL
527	    (match_operand:<VEL> 1 "register_operand" "<h_con>"))
528      (match_operand:VMUL 2 "register_operand" "w")))]
529  "TARGET_SIMD"
530  "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
531  [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
532)
533
534(define_insn "aarch64_rsqrte<mode>"
535  [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
536	(unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
537		     UNSPEC_RSQRTE))]
538  "TARGET_SIMD"
539  "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
540  [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
541
542(define_insn "aarch64_rsqrts<mode>"
543  [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
544	(unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
545			    (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
546	 UNSPEC_RSQRTS))]
547  "TARGET_SIMD"
548  "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
549  [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
550
551(define_expand "rsqrt<mode>2"
552  [(set (match_operand:VALLF 0 "register_operand" "=w")
553	(unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
554		     UNSPEC_RSQRT))]
555  "TARGET_SIMD"
556{
557  aarch64_emit_approx_sqrt (operands[0], operands[1], true);
558  DONE;
559})
560
561(define_insn "*aarch64_mul3_elt_to_64v2df"
562  [(set (match_operand:DF 0 "register_operand" "=w")
563     (mult:DF
564       (vec_select:DF
565	 (match_operand:V2DF 1 "register_operand" "w")
566	 (parallel [(match_operand:SI 2 "immediate_operand")]))
567       (match_operand:DF 3 "register_operand" "w")))]
568  "TARGET_SIMD"
569  {
570    operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
571    return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
572  }
573  [(set_attr "type" "neon_fp_mul_d_scalar_q")]
574)
575
576(define_insn "neg<mode>2"
577  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
578	(neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
579  "TARGET_SIMD"
580  "neg\t%0.<Vtype>, %1.<Vtype>"
581  [(set_attr "type" "neon_neg<q>")]
582)
583
584(define_insn "abs<mode>2"
585  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
586        (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
587  "TARGET_SIMD"
588  "abs\t%0.<Vtype>, %1.<Vtype>"
589  [(set_attr "type" "neon_abs<q>")]
590)
591
592;; The intrinsic version of integer ABS must not be allowed to
593;; combine with any operation with an integerated ABS step, such
594;; as SABD.
595(define_insn "aarch64_abs<mode>"
596  [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
597	  (unspec:VSDQ_I_DI
598	    [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
599	   UNSPEC_ABS))]
600  "TARGET_SIMD"
601  "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
602  [(set_attr "type" "neon_abs<q>")]
603)
604
605(define_insn "abd<mode>_3"
606  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
607	(abs:VDQ_BHSI (minus:VDQ_BHSI
608		       (match_operand:VDQ_BHSI 1 "register_operand" "w")
609		       (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
610  "TARGET_SIMD"
611  "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
612  [(set_attr "type" "neon_abd<q>")]
613)
614
615(define_insn "aba<mode>_3"
616  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
617	(plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
618			 (match_operand:VDQ_BHSI 1 "register_operand" "w")
619			 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
620		       (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
621  "TARGET_SIMD"
622  "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
623  [(set_attr "type" "neon_arith_acc<q>")]
624)
625
626(define_insn "fabd<mode>3"
627  [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
628	(abs:VHSDF_HSDF
629	  (minus:VHSDF_HSDF
630	    (match_operand:VHSDF_HSDF 1 "register_operand" "w")
631	    (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
632  "TARGET_SIMD"
633  "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
634  [(set_attr "type" "neon_fp_abd_<stype><q>")]
635)
636
637;; For AND (vector, register) and BIC (vector, immediate)
638(define_insn "and<mode>3"
639  [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
640	(and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
641		   (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
642  "TARGET_SIMD"
643  {
644    switch (which_alternative)
645      {
646      case 0:
647	return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
648      case 1:
649	return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
650						  AARCH64_CHECK_BIC);
651      default:
652	gcc_unreachable ();
653      }
654  }
655  [(set_attr "type" "neon_logic<q>")]
656)
657
658;; For ORR (vector, register) and ORR (vector, immediate)
659(define_insn "ior<mode>3"
660  [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
661	(ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
662		   (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
663  "TARGET_SIMD"
664  {
665    switch (which_alternative)
666      {
667      case 0:
668	return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
669      case 1:
670	return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
671						  AARCH64_CHECK_ORR);
672      default:
673	gcc_unreachable ();
674      }
675  }
676  [(set_attr "type" "neon_logic<q>")]
677)
678
679(define_insn "xor<mode>3"
680  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
681        (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
682		 (match_operand:VDQ_I 2 "register_operand" "w")))]
683  "TARGET_SIMD"
684  "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
685  [(set_attr "type" "neon_logic<q>")]
686)
687
688(define_insn "one_cmpl<mode>2"
689  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
690        (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
691  "TARGET_SIMD"
692  "not\t%0.<Vbtype>, %1.<Vbtype>"
693  [(set_attr "type" "neon_logic<q>")]
694)
695
696(define_insn "aarch64_simd_vec_set<mode>"
697  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w,w,w")
698        (vec_merge:VDQ_BHSI
699	    (vec_duplicate:VDQ_BHSI
700		(match_operand:<VEL> 1 "aarch64_simd_general_operand" "r,w,Utv"))
701	    (match_operand:VDQ_BHSI 3 "register_operand" "0,0,0")
702	    (match_operand:SI 2 "immediate_operand" "i,i,i")))]
703  "TARGET_SIMD"
704  {
705   int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
706   operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
707   switch (which_alternative)
708     {
709     case 0:
710	return "ins\\t%0.<Vetype>[%p2], %w1";
711     case 1:
712	return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
713     case 2:
714        return "ld1\\t{%0.<Vetype>}[%p2], %1";
715     default:
716	gcc_unreachable ();
717     }
718  }
719  [(set_attr "type" "neon_from_gp<q>, neon_ins<q>, neon_load1_one_lane<q>")]
720)
721
722(define_insn "*aarch64_simd_vec_copy_lane<mode>"
723  [(set (match_operand:VALL_F16 0 "register_operand" "=w")
724	(vec_merge:VALL_F16
725	    (vec_duplicate:VALL_F16
726	      (vec_select:<VEL>
727		(match_operand:VALL_F16 3 "register_operand" "w")
728		(parallel
729		  [(match_operand:SI 4 "immediate_operand" "i")])))
730	    (match_operand:VALL_F16 1 "register_operand" "0")
731	    (match_operand:SI 2 "immediate_operand" "i")))]
732  "TARGET_SIMD"
733  {
734    int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
735    operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
736    operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
737
738    return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
739  }
740  [(set_attr "type" "neon_ins<q>")]
741)
742
743(define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
744  [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
745	(vec_merge:VALL_F16_NO_V2Q
746	    (vec_duplicate:VALL_F16_NO_V2Q
747	      (vec_select:<VEL>
748		(match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
749		(parallel
750		  [(match_operand:SI 4 "immediate_operand" "i")])))
751	    (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
752	    (match_operand:SI 2 "immediate_operand" "i")))]
753  "TARGET_SIMD"
754  {
755    int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
756    operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
757    operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
758					   INTVAL (operands[4]));
759
760    return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
761  }
762  [(set_attr "type" "neon_ins<q>")]
763)
764
765(define_insn "aarch64_simd_lshr<mode>"
766 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
767       (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
768		     (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
769 "TARGET_SIMD"
770 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
771  [(set_attr "type" "neon_shift_imm<q>")]
772)
773
774(define_insn "aarch64_simd_ashr<mode>"
775 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
776       (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
777		     (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
778 "TARGET_SIMD"
779 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
780  [(set_attr "type" "neon_shift_imm<q>")]
781)
782
783(define_insn "aarch64_simd_imm_shl<mode>"
784 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
785       (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
786		   (match_operand:VDQ_I  2 "aarch64_simd_lshift_imm" "Dl")))]
787 "TARGET_SIMD"
788  "shl\t%0.<Vtype>, %1.<Vtype>, %2"
789  [(set_attr "type" "neon_shift_imm<q>")]
790)
791
792(define_insn "aarch64_simd_reg_sshl<mode>"
793 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
794       (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
795		   (match_operand:VDQ_I 2 "register_operand" "w")))]
796 "TARGET_SIMD"
797 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
798  [(set_attr "type" "neon_shift_reg<q>")]
799)
800
801(define_insn "aarch64_simd_reg_shl<mode>_unsigned"
802 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
803       (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
804		    (match_operand:VDQ_I 2 "register_operand" "w")]
805		   UNSPEC_ASHIFT_UNSIGNED))]
806 "TARGET_SIMD"
807 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
808  [(set_attr "type" "neon_shift_reg<q>")]
809)
810
811(define_insn "aarch64_simd_reg_shl<mode>_signed"
812 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
813       (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
814		    (match_operand:VDQ_I 2 "register_operand" "w")]
815		   UNSPEC_ASHIFT_SIGNED))]
816 "TARGET_SIMD"
817 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
818  [(set_attr "type" "neon_shift_reg<q>")]
819)
820
821(define_expand "ashl<mode>3"
822  [(match_operand:VDQ_I 0 "register_operand" "")
823   (match_operand:VDQ_I 1 "register_operand" "")
824   (match_operand:SI  2 "general_operand" "")]
825 "TARGET_SIMD"
826{
827  int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
828  int shift_amount;
829
830  if (CONST_INT_P (operands[2]))
831    {
832      shift_amount = INTVAL (operands[2]);
833      if (shift_amount >= 0 && shift_amount < bit_width)
834        {
835	  rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
836						       shift_amount);
837	  emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
838						     operands[1],
839						     tmp));
840          DONE;
841        }
842      else
843        {
844          operands[2] = force_reg (SImode, operands[2]);
845        }
846    }
847  else if (MEM_P (operands[2]))
848    {
849      operands[2] = force_reg (SImode, operands[2]);
850    }
851
852  if (REG_P (operands[2]))
853    {
854      rtx tmp = gen_reg_rtx (<MODE>mode);
855      emit_insn (gen_aarch64_simd_dup<mode> (tmp,
856					     convert_to_mode (<VEL>mode,
857							      operands[2],
858							      0)));
859      emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
860						  tmp));
861      DONE;
862    }
863  else
864    FAIL;
865}
866)
867
868(define_expand "lshr<mode>3"
869  [(match_operand:VDQ_I 0 "register_operand" "")
870   (match_operand:VDQ_I 1 "register_operand" "")
871   (match_operand:SI  2 "general_operand" "")]
872 "TARGET_SIMD"
873{
874  int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
875  int shift_amount;
876
877  if (CONST_INT_P (operands[2]))
878    {
879      shift_amount = INTVAL (operands[2]);
880      if (shift_amount > 0 && shift_amount <= bit_width)
881        {
882	  rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
883						       shift_amount);
884          emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
885						  operands[1],
886						  tmp));
887	  DONE;
888	}
889      else
890        operands[2] = force_reg (SImode, operands[2]);
891    }
892  else if (MEM_P (operands[2]))
893    {
894      operands[2] = force_reg (SImode, operands[2]);
895    }
896
897  if (REG_P (operands[2]))
898    {
899      rtx tmp = gen_reg_rtx (SImode);
900      rtx tmp1 = gen_reg_rtx (<MODE>mode);
901      emit_insn (gen_negsi2 (tmp, operands[2]));
902      emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
903					     convert_to_mode (<VEL>mode,
904							      tmp, 0)));
905      emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
906							  operands[1],
907							  tmp1));
908      DONE;
909    }
910  else
911    FAIL;
912}
913)
914
915(define_expand "ashr<mode>3"
916  [(match_operand:VDQ_I 0 "register_operand" "")
917   (match_operand:VDQ_I 1 "register_operand" "")
918   (match_operand:SI  2 "general_operand" "")]
919 "TARGET_SIMD"
920{
921  int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
922  int shift_amount;
923
924  if (CONST_INT_P (operands[2]))
925    {
926      shift_amount = INTVAL (operands[2]);
927      if (shift_amount > 0 && shift_amount <= bit_width)
928        {
929	  rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
930						       shift_amount);
931          emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
932						  operands[1],
933						  tmp));
934          DONE;
935	}
936      else
937        operands[2] = force_reg (SImode, operands[2]);
938    }
939  else if (MEM_P (operands[2]))
940    {
941      operands[2] = force_reg (SImode, operands[2]);
942    }
943
944  if (REG_P (operands[2]))
945    {
946      rtx tmp = gen_reg_rtx (SImode);
947      rtx tmp1 = gen_reg_rtx (<MODE>mode);
948      emit_insn (gen_negsi2 (tmp, operands[2]));
949      emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
950					     convert_to_mode (<VEL>mode,
951							      tmp, 0)));
952      emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
953							operands[1],
954							tmp1));
955      DONE;
956    }
957  else
958    FAIL;
959}
960)
961
962(define_expand "vashl<mode>3"
963 [(match_operand:VDQ_I 0 "register_operand" "")
964  (match_operand:VDQ_I 1 "register_operand" "")
965  (match_operand:VDQ_I 2 "register_operand" "")]
966 "TARGET_SIMD"
967{
968  emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
969					      operands[2]));
970  DONE;
971})
972
973;; Using mode VDQ_BHSI as there is no V2DImode neg!
974;; Negating individual lanes most certainly offsets the
975;; gain from vectorization.
976(define_expand "vashr<mode>3"
977 [(match_operand:VDQ_BHSI 0 "register_operand" "")
978  (match_operand:VDQ_BHSI 1 "register_operand" "")
979  (match_operand:VDQ_BHSI 2 "register_operand" "")]
980 "TARGET_SIMD"
981{
982  rtx neg = gen_reg_rtx (<MODE>mode);
983  emit (gen_neg<mode>2 (neg, operands[2]));
984  emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
985						    neg));
986  DONE;
987})
988
989;; DI vector shift
990(define_expand "aarch64_ashr_simddi"
991  [(match_operand:DI 0 "register_operand" "=w")
992   (match_operand:DI 1 "register_operand" "w")
993   (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
994  "TARGET_SIMD"
995  {
996    /* An arithmetic shift right by 64 fills the result with copies of the sign
997       bit, just like asr by 63 - however the standard pattern does not handle
998       a shift by 64.  */
999    if (INTVAL (operands[2]) == 64)
1000      operands[2] = GEN_INT (63);
1001    emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1002    DONE;
1003  }
1004)
1005
1006(define_expand "vlshr<mode>3"
1007 [(match_operand:VDQ_BHSI 0 "register_operand" "")
1008  (match_operand:VDQ_BHSI 1 "register_operand" "")
1009  (match_operand:VDQ_BHSI 2 "register_operand" "")]
1010 "TARGET_SIMD"
1011{
1012  rtx neg = gen_reg_rtx (<MODE>mode);
1013  emit (gen_neg<mode>2 (neg, operands[2]));
1014  emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1015						      neg));
1016  DONE;
1017})
1018
1019(define_expand "aarch64_lshr_simddi"
1020  [(match_operand:DI 0 "register_operand" "=w")
1021   (match_operand:DI 1 "register_operand" "w")
1022   (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1023  "TARGET_SIMD"
1024  {
1025    if (INTVAL (operands[2]) == 64)
1026      emit_move_insn (operands[0], const0_rtx);
1027    else
1028      emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1029    DONE;
1030  }
1031)
1032
1033(define_expand "vec_set<mode>"
1034  [(match_operand:VDQ_BHSI 0 "register_operand")
1035   (match_operand:<VEL> 1 "register_operand")
1036   (match_operand:SI 2 "immediate_operand")]
1037  "TARGET_SIMD"
1038  {
1039    HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1040    emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1041					    GEN_INT (elem), operands[0]));
1042    DONE;
1043  }
1044)
1045
1046;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1047(define_insn "vec_shr_<mode>"
1048  [(set (match_operand:VD 0 "register_operand" "=w")
1049        (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1050		    (match_operand:SI 2 "immediate_operand" "i")]
1051		   UNSPEC_VEC_SHR))]
1052  "TARGET_SIMD"
1053  {
1054    if (BYTES_BIG_ENDIAN)
1055      return "shl %d0, %d1, %2";
1056    else
1057      return "ushr %d0, %d1, %2";
1058  }
1059  [(set_attr "type" "neon_shift_imm")]
1060)
1061
1062(define_insn "aarch64_simd_vec_setv2di"
1063  [(set (match_operand:V2DI 0 "register_operand" "=w,w")
1064        (vec_merge:V2DI
1065	    (vec_duplicate:V2DI
1066		(match_operand:DI 1 "register_operand" "r,w"))
1067	    (match_operand:V2DI 3 "register_operand" "0,0")
1068	    (match_operand:SI 2 "immediate_operand" "i,i")))]
1069  "TARGET_SIMD"
1070  {
1071    int elt = ENDIAN_LANE_N (2, exact_log2 (INTVAL (operands[2])));
1072    operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
1073    switch (which_alternative)
1074      {
1075      case 0:
1076	return "ins\\t%0.d[%p2], %1";
1077      case 1:
1078        return "ins\\t%0.d[%p2], %1.d[0]";
1079      default:
1080	gcc_unreachable ();
1081      }
1082  }
1083  [(set_attr "type" "neon_from_gp, neon_ins_q")]
1084)
1085
1086(define_expand "vec_setv2di"
1087  [(match_operand:V2DI 0 "register_operand")
1088   (match_operand:DI 1 "register_operand")
1089   (match_operand:SI 2 "immediate_operand")]
1090  "TARGET_SIMD"
1091  {
1092    HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1093    emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1],
1094					  GEN_INT (elem), operands[0]));
1095    DONE;
1096  }
1097)
1098
1099(define_insn "aarch64_simd_vec_set<mode>"
1100  [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
1101	(vec_merge:VDQF_F16
1102	    (vec_duplicate:VDQF_F16
1103		(match_operand:<VEL> 1 "register_operand" "w"))
1104	    (match_operand:VDQF_F16 3 "register_operand" "0")
1105	    (match_operand:SI 2 "immediate_operand" "i")))]
1106  "TARGET_SIMD"
1107  {
1108    int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1109
1110    operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt);
1111    return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
1112  }
1113  [(set_attr "type" "neon_ins<q>")]
1114)
1115
1116(define_expand "vec_set<mode>"
1117  [(match_operand:VDQF_F16 0 "register_operand" "+w")
1118   (match_operand:<VEL> 1 "register_operand" "w")
1119   (match_operand:SI 2 "immediate_operand" "")]
1120  "TARGET_SIMD"
1121  {
1122    HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1123    emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1124					  GEN_INT (elem), operands[0]));
1125    DONE;
1126  }
1127)
1128
1129
1130(define_insn "aarch64_mla<mode>"
1131 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1132       (plus:VDQ_BHSI (mult:VDQ_BHSI
1133			(match_operand:VDQ_BHSI 2 "register_operand" "w")
1134			(match_operand:VDQ_BHSI 3 "register_operand" "w"))
1135		      (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1136 "TARGET_SIMD"
1137 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1138  [(set_attr "type" "neon_mla_<Vetype><q>")]
1139)
1140
1141(define_insn "*aarch64_mla_elt<mode>"
1142 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1143       (plus:VDQHS
1144	 (mult:VDQHS
1145	   (vec_duplicate:VDQHS
1146	      (vec_select:<VEL>
1147		(match_operand:VDQHS 1 "register_operand" "<h_con>")
1148		  (parallel [(match_operand:SI 2 "immediate_operand")])))
1149	   (match_operand:VDQHS 3 "register_operand" "w"))
1150	 (match_operand:VDQHS 4 "register_operand" "0")))]
1151 "TARGET_SIMD"
1152  {
1153    operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1154    return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1155  }
1156  [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1157)
1158
1159(define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1160 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1161       (plus:VDQHS
1162	 (mult:VDQHS
1163	   (vec_duplicate:VDQHS
1164	      (vec_select:<VEL>
1165		(match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1166		  (parallel [(match_operand:SI 2 "immediate_operand")])))
1167	   (match_operand:VDQHS 3 "register_operand" "w"))
1168	 (match_operand:VDQHS 4 "register_operand" "0")))]
1169 "TARGET_SIMD"
1170  {
1171    operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1172    return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1173  }
1174  [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1175)
1176
1177(define_insn "*aarch64_mla_elt_merge<mode>"
1178  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1179	(plus:VDQHS
1180	  (mult:VDQHS (vec_duplicate:VDQHS
1181		  (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1182		(match_operand:VDQHS 2 "register_operand" "w"))
1183	  (match_operand:VDQHS 3 "register_operand" "0")))]
1184 "TARGET_SIMD"
1185 "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1186  [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1187)
1188
1189(define_insn "aarch64_mls<mode>"
1190 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1191       (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1192		   (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1193			      (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1194 "TARGET_SIMD"
1195 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1196  [(set_attr "type" "neon_mla_<Vetype><q>")]
1197)
1198
1199(define_insn "*aarch64_mls_elt<mode>"
1200 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1201       (minus:VDQHS
1202	 (match_operand:VDQHS 4 "register_operand" "0")
1203	 (mult:VDQHS
1204	   (vec_duplicate:VDQHS
1205	      (vec_select:<VEL>
1206		(match_operand:VDQHS 1 "register_operand" "<h_con>")
1207		  (parallel [(match_operand:SI 2 "immediate_operand")])))
1208	   (match_operand:VDQHS 3 "register_operand" "w"))))]
1209 "TARGET_SIMD"
1210  {
1211    operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1212    return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1213  }
1214  [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1215)
1216
1217(define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1218 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1219       (minus:VDQHS
1220	 (match_operand:VDQHS 4 "register_operand" "0")
1221	 (mult:VDQHS
1222	   (vec_duplicate:VDQHS
1223	      (vec_select:<VEL>
1224		(match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1225		  (parallel [(match_operand:SI 2 "immediate_operand")])))
1226	   (match_operand:VDQHS 3 "register_operand" "w"))))]
1227 "TARGET_SIMD"
1228  {
1229    operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1230    return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1231  }
1232  [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1233)
1234
1235(define_insn "*aarch64_mls_elt_merge<mode>"
1236  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1237	(minus:VDQHS
1238	  (match_operand:VDQHS 1 "register_operand" "0")
1239	  (mult:VDQHS (vec_duplicate:VDQHS
1240		  (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1241		(match_operand:VDQHS 3 "register_operand" "w"))))]
1242  "TARGET_SIMD"
1243  "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1244  [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1245)
1246
1247;; Max/Min operations.
1248(define_insn "<su><maxmin><mode>3"
1249 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1250       (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1251		    (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1252 "TARGET_SIMD"
1253 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1254  [(set_attr "type" "neon_minmax<q>")]
1255)
1256
1257(define_expand "<su><maxmin>v2di3"
1258 [(set (match_operand:V2DI 0 "register_operand" "")
1259       (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1260                    (match_operand:V2DI 2 "register_operand" "")))]
1261 "TARGET_SIMD"
1262{
1263  enum rtx_code cmp_operator;
1264  rtx cmp_fmt;
1265
1266  switch (<CODE>)
1267    {
1268    case UMIN:
1269      cmp_operator = LTU;
1270      break;
1271    case SMIN:
1272      cmp_operator = LT;
1273      break;
1274    case UMAX:
1275      cmp_operator = GTU;
1276      break;
1277    case SMAX:
1278      cmp_operator = GT;
1279      break;
1280    default:
1281      gcc_unreachable ();
1282    }
1283
1284  cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1285  emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1286              operands[2], cmp_fmt, operands[1], operands[2]));
1287  DONE;
1288})
1289
1290;; Pairwise Integer Max/Min operations.
1291(define_insn "aarch64_<maxmin_uns>p<mode>"
1292 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1293       (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1294			 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1295			MAXMINV))]
1296 "TARGET_SIMD"
1297 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1298  [(set_attr "type" "neon_minmax<q>")]
1299)
1300
1301;; Pairwise FP Max/Min operations.
1302(define_insn "aarch64_<maxmin_uns>p<mode>"
1303 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1304       (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1305		      (match_operand:VHSDF 2 "register_operand" "w")]
1306		      FMAXMINV))]
1307 "TARGET_SIMD"
1308 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1309  [(set_attr "type" "neon_minmax<q>")]
1310)
1311
1312;; vec_concat gives a new vector with the low elements from operand 1, and
1313;; the high elements from operand 2.  That is to say, given op1 = { a, b }
1314;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1315;; What that means, is that the RTL descriptions of the below patterns
1316;; need to change depending on endianness.
1317
1318;; Move to the low architectural bits of the register.
1319;; On little-endian this is { operand, zeroes }
1320;; On big-endian this is { zeroes, operand }
1321
1322(define_insn "move_lo_quad_internal_<mode>"
1323  [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1324	(vec_concat:VQ_NO2E
1325	  (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1326	  (vec_duplicate:<VHALF> (const_int 0))))]
1327  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1328  "@
1329   dup\\t%d0, %1.d[0]
1330   fmov\\t%d0, %1
1331   dup\\t%d0, %1"
1332  [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1333   (set_attr "simd" "yes,*,yes")
1334   (set_attr "fp" "*,yes,*")
1335   (set_attr "length" "4")]
1336)
1337
1338(define_insn "move_lo_quad_internal_<mode>"
1339  [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1340	(vec_concat:VQ_2E
1341	  (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1342	  (const_int 0)))]
1343  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1344  "@
1345   dup\\t%d0, %1.d[0]
1346   fmov\\t%d0, %1
1347   dup\\t%d0, %1"
1348  [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1349   (set_attr "simd" "yes,*,yes")
1350   (set_attr "fp" "*,yes,*")
1351   (set_attr "length" "4")]
1352)
1353
1354(define_insn "move_lo_quad_internal_be_<mode>"
1355  [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1356	(vec_concat:VQ_NO2E
1357	  (vec_duplicate:<VHALF> (const_int 0))
1358	  (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1359  "TARGET_SIMD && BYTES_BIG_ENDIAN"
1360  "@
1361   dup\\t%d0, %1.d[0]
1362   fmov\\t%d0, %1
1363   dup\\t%d0, %1"
1364  [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1365   (set_attr "simd" "yes,*,yes")
1366   (set_attr "fp" "*,yes,*")
1367   (set_attr "length" "4")]
1368)
1369
1370(define_insn "move_lo_quad_internal_be_<mode>"
1371  [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1372	(vec_concat:VQ_2E
1373	  (const_int 0)
1374	  (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1375  "TARGET_SIMD && BYTES_BIG_ENDIAN"
1376  "@
1377   dup\\t%d0, %1.d[0]
1378   fmov\\t%d0, %1
1379   dup\\t%d0, %1"
1380  [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1381   (set_attr "simd" "yes,*,yes")
1382   (set_attr "fp" "*,yes,*")
1383   (set_attr "length" "4")]
1384)
1385
1386(define_expand "move_lo_quad_<mode>"
1387  [(match_operand:VQ 0 "register_operand")
1388   (match_operand:VQ 1 "register_operand")]
1389  "TARGET_SIMD"
1390{
1391  if (BYTES_BIG_ENDIAN)
1392    emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1393  else
1394    emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1395  DONE;
1396}
1397)
1398
1399;; Move operand1 to the high architectural bits of the register, keeping
1400;; the low architectural bits of operand2.
1401;; For little-endian this is { operand2, operand1 }
1402;; For big-endian this is { operand1, operand2 }
1403
1404(define_insn "aarch64_simd_move_hi_quad_<mode>"
1405  [(set (match_operand:VQ 0 "register_operand" "+w,w")
1406        (vec_concat:VQ
1407          (vec_select:<VHALF>
1408                (match_dup 0)
1409                (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1410	  (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1411  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1412  "@
1413   ins\\t%0.d[1], %1.d[0]
1414   ins\\t%0.d[1], %1"
1415  [(set_attr "type" "neon_ins")]
1416)
1417
1418(define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1419  [(set (match_operand:VQ 0 "register_operand" "+w,w")
1420        (vec_concat:VQ
1421	  (match_operand:<VHALF> 1 "register_operand" "w,r")
1422          (vec_select:<VHALF>
1423                (match_dup 0)
1424                (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1425  "TARGET_SIMD && BYTES_BIG_ENDIAN"
1426  "@
1427   ins\\t%0.d[1], %1.d[0]
1428   ins\\t%0.d[1], %1"
1429  [(set_attr "type" "neon_ins")]
1430)
1431
1432(define_expand "move_hi_quad_<mode>"
1433 [(match_operand:VQ 0 "register_operand" "")
1434  (match_operand:<VHALF> 1 "register_operand" "")]
1435 "TARGET_SIMD"
1436{
1437  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1438  if (BYTES_BIG_ENDIAN)
1439    emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1440		    operands[1], p));
1441  else
1442    emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1443		    operands[1], p));
1444  DONE;
1445})
1446
1447;; Narrowing operations.
1448
1449;; For doubles.
1450(define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1451 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1452       (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1453 "TARGET_SIMD"
1454 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1455  [(set_attr "type" "neon_shift_imm_narrow_q")]
1456)
1457
1458(define_expand "vec_pack_trunc_<mode>"
1459 [(match_operand:<VNARROWD> 0 "register_operand" "")
1460  (match_operand:VDN 1 "register_operand" "")
1461  (match_operand:VDN 2 "register_operand" "")]
1462 "TARGET_SIMD"
1463{
1464  rtx tempreg = gen_reg_rtx (<VDBL>mode);
1465  int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1466  int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1467
1468  emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1469  emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1470  emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1471  DONE;
1472})
1473
1474;; For quads.
1475
1476(define_insn "vec_pack_trunc_<mode>"
1477 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1478       (vec_concat:<VNARROWQ2>
1479	 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1480	 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1481 "TARGET_SIMD"
1482 {
1483   if (BYTES_BIG_ENDIAN)
1484     return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1485   else
1486     return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1487 }
1488  [(set_attr "type" "multiple")
1489   (set_attr "length" "8")]
1490)
1491
1492;; Widening operations.
1493
1494(define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1495  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1496        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1497			       (match_operand:VQW 1 "register_operand" "w")
1498			       (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1499			    )))]
1500  "TARGET_SIMD"
1501  "<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0"
1502  [(set_attr "type" "neon_shift_imm_long")]
1503)
1504
1505(define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1506  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1507        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1508			       (match_operand:VQW 1 "register_operand" "w")
1509			       (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1510			    )))]
1511  "TARGET_SIMD"
1512  "<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0"
1513  [(set_attr "type" "neon_shift_imm_long")]
1514)
1515
1516(define_expand "vec_unpack<su>_hi_<mode>"
1517  [(match_operand:<VWIDE> 0 "register_operand" "")
1518   (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1519  "TARGET_SIMD"
1520  {
1521    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1522    emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1523							  operands[1], p));
1524    DONE;
1525  }
1526)
1527
1528(define_expand "vec_unpack<su>_lo_<mode>"
1529  [(match_operand:<VWIDE> 0 "register_operand" "")
1530   (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1531  "TARGET_SIMD"
1532  {
1533    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1534    emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1535							  operands[1], p));
1536    DONE;
1537  }
1538)
1539
1540;; Widening arithmetic.
1541
1542(define_insn "*aarch64_<su>mlal_lo<mode>"
1543  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1544        (plus:<VWIDE>
1545          (mult:<VWIDE>
1546              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1547                 (match_operand:VQW 2 "register_operand" "w")
1548                 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1549              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1550                 (match_operand:VQW 4 "register_operand" "w")
1551                 (match_dup 3))))
1552          (match_operand:<VWIDE> 1 "register_operand" "0")))]
1553  "TARGET_SIMD"
1554  "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1555  [(set_attr "type" "neon_mla_<Vetype>_long")]
1556)
1557
1558(define_insn "*aarch64_<su>mlal_hi<mode>"
1559  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1560        (plus:<VWIDE>
1561          (mult:<VWIDE>
1562              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1563                 (match_operand:VQW 2 "register_operand" "w")
1564                 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1565              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1566                 (match_operand:VQW 4 "register_operand" "w")
1567                 (match_dup 3))))
1568          (match_operand:<VWIDE> 1 "register_operand" "0")))]
1569  "TARGET_SIMD"
1570  "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1571  [(set_attr "type" "neon_mla_<Vetype>_long")]
1572)
1573
1574(define_insn "*aarch64_<su>mlsl_lo<mode>"
1575  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1576        (minus:<VWIDE>
1577          (match_operand:<VWIDE> 1 "register_operand" "0")
1578          (mult:<VWIDE>
1579              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1580                 (match_operand:VQW 2 "register_operand" "w")
1581                 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1582              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1583                 (match_operand:VQW 4 "register_operand" "w")
1584                 (match_dup 3))))))]
1585  "TARGET_SIMD"
1586  "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1587  [(set_attr "type" "neon_mla_<Vetype>_long")]
1588)
1589
1590(define_insn "*aarch64_<su>mlsl_hi<mode>"
1591  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1592        (minus:<VWIDE>
1593          (match_operand:<VWIDE> 1 "register_operand" "0")
1594          (mult:<VWIDE>
1595              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1596                 (match_operand:VQW 2 "register_operand" "w")
1597                 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1598              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1599                 (match_operand:VQW 4 "register_operand" "w")
1600                 (match_dup 3))))))]
1601  "TARGET_SIMD"
1602  "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1603  [(set_attr "type" "neon_mla_<Vetype>_long")]
1604)
1605
1606(define_insn "*aarch64_<su>mlal<mode>"
1607  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1608        (plus:<VWIDE>
1609          (mult:<VWIDE>
1610            (ANY_EXTEND:<VWIDE>
1611              (match_operand:VD_BHSI 1 "register_operand" "w"))
1612            (ANY_EXTEND:<VWIDE>
1613              (match_operand:VD_BHSI 2 "register_operand" "w")))
1614          (match_operand:<VWIDE> 3 "register_operand" "0")))]
1615  "TARGET_SIMD"
1616  "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1617  [(set_attr "type" "neon_mla_<Vetype>_long")]
1618)
1619
1620(define_insn "*aarch64_<su>mlsl<mode>"
1621  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1622        (minus:<VWIDE>
1623          (match_operand:<VWIDE> 1 "register_operand" "0")
1624          (mult:<VWIDE>
1625            (ANY_EXTEND:<VWIDE>
1626              (match_operand:VD_BHSI 2 "register_operand" "w"))
1627            (ANY_EXTEND:<VWIDE>
1628              (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1629  "TARGET_SIMD"
1630  "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1631  [(set_attr "type" "neon_mla_<Vetype>_long")]
1632)
1633
1634(define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1635 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1636       (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1637			   (match_operand:VQW 1 "register_operand" "w")
1638                           (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1639		     (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1640                           (match_operand:VQW 2 "register_operand" "w")
1641                           (match_dup 3)))))]
1642  "TARGET_SIMD"
1643  "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1644  [(set_attr "type" "neon_mul_<Vetype>_long")]
1645)
1646
1647(define_expand "vec_widen_<su>mult_lo_<mode>"
1648  [(match_operand:<VWIDE> 0 "register_operand" "")
1649   (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1650   (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1651 "TARGET_SIMD"
1652 {
1653   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1654   emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1655						       operands[1],
1656						       operands[2], p));
1657   DONE;
1658 }
1659)
1660
1661(define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1662 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1663      (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1664			    (match_operand:VQW 1 "register_operand" "w")
1665			    (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1666		    (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1667			    (match_operand:VQW 2 "register_operand" "w")
1668			    (match_dup 3)))))]
1669  "TARGET_SIMD"
1670  "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1671  [(set_attr "type" "neon_mul_<Vetype>_long")]
1672)
1673
1674(define_expand "vec_widen_<su>mult_hi_<mode>"
1675  [(match_operand:<VWIDE> 0 "register_operand" "")
1676   (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1677   (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1678 "TARGET_SIMD"
1679 {
1680   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1681   emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1682						       operands[1],
1683						       operands[2], p));
1684   DONE;
1685
1686 }
1687)
1688
1689;; FP vector operations.
1690;; AArch64 AdvSIMD supports single-precision (32-bit) and
1691;; double-precision (64-bit) floating-point data types and arithmetic as
1692;; defined by the IEEE 754-2008 standard.  This makes them vectorizable
1693;; without the need for -ffast-math or -funsafe-math-optimizations.
1694;;
1695;; Floating-point operations can raise an exception.  Vectorizing such
1696;; operations are safe because of reasons explained below.
1697;;
1698;; ARMv8 permits an extension to enable trapped floating-point
1699;; exception handling, however this is an optional feature.  In the
1700;; event of a floating-point exception being raised by vectorised
1701;; code then:
1702;; 1.  If trapped floating-point exceptions are available, then a trap
1703;;     will be taken when any lane raises an enabled exception.  A trap
1704;;     handler may determine which lane raised the exception.
1705;; 2.  Alternatively a sticky exception flag is set in the
1706;;     floating-point status register (FPSR).  Software may explicitly
1707;;     test the exception flags, in which case the tests will either
1708;;     prevent vectorisation, allowing precise identification of the
1709;;     failing operation, or if tested outside of vectorisable regions
1710;;     then the specific operation and lane are not of interest.
1711
1712;; FP arithmetic operations.
1713
1714(define_insn "add<mode>3"
1715 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1716       (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1717		   (match_operand:VHSDF 2 "register_operand" "w")))]
1718 "TARGET_SIMD"
1719 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1720  [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1721)
1722
1723(define_insn "sub<mode>3"
1724 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1725       (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1726		    (match_operand:VHSDF 2 "register_operand" "w")))]
1727 "TARGET_SIMD"
1728 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1729  [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1730)
1731
1732(define_insn "mul<mode>3"
1733 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1734       (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1735		   (match_operand:VHSDF 2 "register_operand" "w")))]
1736 "TARGET_SIMD"
1737 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1738  [(set_attr "type" "neon_fp_mul_<stype><q>")]
1739)
1740
1741(define_expand "div<mode>3"
1742 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1743       (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1744		  (match_operand:VHSDF 2 "register_operand" "w")))]
1745 "TARGET_SIMD"
1746{
1747  if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1748    DONE;
1749
1750  operands[1] = force_reg (<MODE>mode, operands[1]);
1751})
1752
1753(define_insn "*div<mode>3"
1754 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1755       (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1756		 (match_operand:VHSDF 2 "register_operand" "w")))]
1757 "TARGET_SIMD"
1758 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1759  [(set_attr "type" "neon_fp_div_<stype><q>")]
1760)
1761
1762(define_insn "neg<mode>2"
1763 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1764       (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1765 "TARGET_SIMD"
1766 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1767  [(set_attr "type" "neon_fp_neg_<stype><q>")]
1768)
1769
1770(define_insn "abs<mode>2"
1771 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1772       (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1773 "TARGET_SIMD"
1774 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1775  [(set_attr "type" "neon_fp_abs_<stype><q>")]
1776)
1777
1778(define_insn "fma<mode>4"
1779  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1780       (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1781		  (match_operand:VHSDF 2 "register_operand" "w")
1782		  (match_operand:VHSDF 3 "register_operand" "0")))]
1783  "TARGET_SIMD"
1784 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1785  [(set_attr "type" "neon_fp_mla_<stype><q>")]
1786)
1787
1788(define_insn "*aarch64_fma4_elt<mode>"
1789  [(set (match_operand:VDQF 0 "register_operand" "=w")
1790    (fma:VDQF
1791      (vec_duplicate:VDQF
1792	(vec_select:<VEL>
1793	  (match_operand:VDQF 1 "register_operand" "<h_con>")
1794	  (parallel [(match_operand:SI 2 "immediate_operand")])))
1795      (match_operand:VDQF 3 "register_operand" "w")
1796      (match_operand:VDQF 4 "register_operand" "0")))]
1797  "TARGET_SIMD"
1798  {
1799    operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1800    return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1801  }
1802  [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1803)
1804
1805(define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1806  [(set (match_operand:VDQSF 0 "register_operand" "=w")
1807    (fma:VDQSF
1808      (vec_duplicate:VDQSF
1809	(vec_select:<VEL>
1810	  (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1811	  (parallel [(match_operand:SI 2 "immediate_operand")])))
1812      (match_operand:VDQSF 3 "register_operand" "w")
1813      (match_operand:VDQSF 4 "register_operand" "0")))]
1814  "TARGET_SIMD"
1815  {
1816    operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1817    return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1818  }
1819  [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1820)
1821
1822(define_insn "*aarch64_fma4_elt_from_dup<mode>"
1823  [(set (match_operand:VMUL 0 "register_operand" "=w")
1824    (fma:VMUL
1825      (vec_duplicate:VMUL
1826	  (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1827      (match_operand:VMUL 2 "register_operand" "w")
1828      (match_operand:VMUL 3 "register_operand" "0")))]
1829  "TARGET_SIMD"
1830  "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1831  [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1832)
1833
1834(define_insn "*aarch64_fma4_elt_to_64v2df"
1835  [(set (match_operand:DF 0 "register_operand" "=w")
1836    (fma:DF
1837	(vec_select:DF
1838	  (match_operand:V2DF 1 "register_operand" "w")
1839	  (parallel [(match_operand:SI 2 "immediate_operand")]))
1840      (match_operand:DF 3 "register_operand" "w")
1841      (match_operand:DF 4 "register_operand" "0")))]
1842  "TARGET_SIMD"
1843  {
1844    operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1845    return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1846  }
1847  [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1848)
1849
1850(define_insn "fnma<mode>4"
1851  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1852	(fma:VHSDF
1853	  (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
1854	  (match_operand:VHSDF 2 "register_operand" "w")
1855	  (match_operand:VHSDF 3 "register_operand" "0")))]
1856  "TARGET_SIMD"
1857  "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1858  [(set_attr "type" "neon_fp_mla_<stype><q>")]
1859)
1860
1861(define_insn "*aarch64_fnma4_elt<mode>"
1862  [(set (match_operand:VDQF 0 "register_operand" "=w")
1863    (fma:VDQF
1864      (neg:VDQF
1865        (match_operand:VDQF 3 "register_operand" "w"))
1866      (vec_duplicate:VDQF
1867	(vec_select:<VEL>
1868	  (match_operand:VDQF 1 "register_operand" "<h_con>")
1869	  (parallel [(match_operand:SI 2 "immediate_operand")])))
1870      (match_operand:VDQF 4 "register_operand" "0")))]
1871  "TARGET_SIMD"
1872  {
1873    operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1874    return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1875  }
1876  [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1877)
1878
1879(define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1880  [(set (match_operand:VDQSF 0 "register_operand" "=w")
1881    (fma:VDQSF
1882      (neg:VDQSF
1883        (match_operand:VDQSF 3 "register_operand" "w"))
1884      (vec_duplicate:VDQSF
1885	(vec_select:<VEL>
1886	  (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1887	  (parallel [(match_operand:SI 2 "immediate_operand")])))
1888      (match_operand:VDQSF 4 "register_operand" "0")))]
1889  "TARGET_SIMD"
1890  {
1891    operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1892    return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1893  }
1894  [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1895)
1896
1897(define_insn "*aarch64_fnma4_elt_from_dup<mode>"
1898  [(set (match_operand:VMUL 0 "register_operand" "=w")
1899    (fma:VMUL
1900      (neg:VMUL
1901        (match_operand:VMUL 2 "register_operand" "w"))
1902      (vec_duplicate:VMUL
1903	(match_operand:<VEL> 1 "register_operand" "<h_con>"))
1904      (match_operand:VMUL 3 "register_operand" "0")))]
1905  "TARGET_SIMD"
1906  "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1907  [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1908)
1909
1910(define_insn "*aarch64_fnma4_elt_to_64v2df"
1911  [(set (match_operand:DF 0 "register_operand" "=w")
1912    (fma:DF
1913      (vec_select:DF
1914	(match_operand:V2DF 1 "register_operand" "w")
1915	(parallel [(match_operand:SI 2 "immediate_operand")]))
1916      (neg:DF
1917        (match_operand:DF 3 "register_operand" "w"))
1918      (match_operand:DF 4 "register_operand" "0")))]
1919  "TARGET_SIMD"
1920  {
1921    operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1922    return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1923  }
1924  [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1925)
1926
1927;; Vector versions of the floating-point frint patterns.
1928;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1929(define_insn "<frint_pattern><mode>2"
1930  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1931	(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
1932		       FRINT))]
1933  "TARGET_SIMD"
1934  "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1935  [(set_attr "type" "neon_fp_round_<stype><q>")]
1936)
1937
1938;; Vector versions of the fcvt standard patterns.
1939;; Expands to lbtrunc, lround, lceil, lfloor
1940(define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
1941  [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1942	(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1943			       [(match_operand:VHSDF 1 "register_operand" "w")]
1944			       FCVT)))]
1945  "TARGET_SIMD"
1946  "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1947  [(set_attr "type" "neon_fp_to_int_<stype><q>")]
1948)
1949
1950;; HF Scalar variants of related SIMD instructions.
1951(define_insn "l<fcvt_pattern><su_optab>hfhi2"
1952  [(set (match_operand:HI 0 "register_operand" "=w")
1953	(FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
1954		      FCVT)))]
1955  "TARGET_SIMD_F16INST"
1956  "fcvt<frint_suffix><su>\t%h0, %h1"
1957  [(set_attr "type" "neon_fp_to_int_s")]
1958)
1959
1960(define_insn "<optab>_trunchfhi2"
1961  [(set (match_operand:HI 0 "register_operand" "=w")
1962	(FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
1963  "TARGET_SIMD_F16INST"
1964  "fcvtz<su>\t%h0, %h1"
1965  [(set_attr "type" "neon_fp_to_int_s")]
1966)
1967
1968(define_insn "<optab>hihf2"
1969  [(set (match_operand:HF 0 "register_operand" "=w")
1970	(FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
1971  "TARGET_SIMD_F16INST"
1972  "<su_optab>cvtf\t%h0, %h1"
1973  [(set_attr "type" "neon_int_to_fp_s")]
1974)
1975
1976(define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
1977  [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1978	(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1979			       [(mult:VDQF
1980	 (match_operand:VDQF 1 "register_operand" "w")
1981	 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
1982			       UNSPEC_FRINTZ)))]
1983  "TARGET_SIMD
1984   && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
1985		GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
1986  {
1987    int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
1988    char buf[64];
1989    snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
1990    output_asm_insn (buf, operands);
1991    return "";
1992  }
1993  [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
1994)
1995
1996(define_expand "<optab><VHSDF:mode><fcvt_target>2"
1997  [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1998	(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1999			       [(match_operand:VHSDF 1 "register_operand")]
2000				UNSPEC_FRINTZ)))]
2001  "TARGET_SIMD"
2002  {})
2003
2004(define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
2005  [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2006	(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2007			       [(match_operand:VHSDF 1 "register_operand")]
2008				UNSPEC_FRINTZ)))]
2009  "TARGET_SIMD"
2010  {})
2011
2012(define_expand "ftrunc<VHSDF:mode>2"
2013  [(set (match_operand:VHSDF 0 "register_operand")
2014	(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2015		       UNSPEC_FRINTZ))]
2016  "TARGET_SIMD"
2017  {})
2018
2019(define_insn "<optab><fcvt_target><VHSDF:mode>2"
2020  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2021	(FLOATUORS:VHSDF
2022	  (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2023  "TARGET_SIMD"
2024  "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2025  [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2026)
2027
2028;; Conversions between vectors of floats and doubles.
2029;; Contains a mix of patterns to match standard pattern names
2030;; and those for intrinsics.
2031
2032;; Float widening operations.
2033
2034(define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2035  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2036        (float_extend:<VWIDE> (vec_select:<VHALF>
2037			       (match_operand:VQ_HSF 1 "register_operand" "w")
2038			       (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2039			    )))]
2040  "TARGET_SIMD"
2041  "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2042  [(set_attr "type" "neon_fp_cvt_widen_s")]
2043)
2044
2045;; Convert between fixed-point and floating-point (vector modes)
2046
2047(define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2048  [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2049	(unspec:<VHSDF:FCVT_TARGET>
2050	  [(match_operand:VHSDF 1 "register_operand" "w")
2051	   (match_operand:SI 2 "immediate_operand" "i")]
2052	 FCVT_F2FIXED))]
2053  "TARGET_SIMD"
2054  "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2055  [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2056)
2057
2058(define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2059  [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2060	(unspec:<VDQ_HSDI:FCVT_TARGET>
2061	  [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2062	   (match_operand:SI 2 "immediate_operand" "i")]
2063	 FCVT_FIXED2F))]
2064  "TARGET_SIMD"
2065  "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2066  [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2067)
2068
2069;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2070;; is inconsistent with vector ordering elsewhere in the compiler, in that
2071;; the meaning of HI and LO changes depending on the target endianness.
2072;; While elsewhere we map the higher numbered elements of a vector to
2073;; the lower architectural lanes of the vector, for these patterns we want
2074;; to always treat "hi" as referring to the higher architectural lanes.
2075;; Consequently, while the patterns below look inconsistent with our
2076;; other big-endian patterns their behavior is as required.
2077
2078(define_expand "vec_unpacks_lo_<mode>"
2079  [(match_operand:<VWIDE> 0 "register_operand" "")
2080   (match_operand:VQ_HSF 1 "register_operand" "")]
2081  "TARGET_SIMD"
2082  {
2083    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2084    emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2085						       operands[1], p));
2086    DONE;
2087  }
2088)
2089
2090(define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2091  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2092        (float_extend:<VWIDE> (vec_select:<VHALF>
2093			       (match_operand:VQ_HSF 1 "register_operand" "w")
2094			       (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2095			    )))]
2096  "TARGET_SIMD"
2097  "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2098  [(set_attr "type" "neon_fp_cvt_widen_s")]
2099)
2100
2101(define_expand "vec_unpacks_hi_<mode>"
2102  [(match_operand:<VWIDE> 0 "register_operand" "")
2103   (match_operand:VQ_HSF 1 "register_operand" "")]
2104  "TARGET_SIMD"
2105  {
2106    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2107    emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2108						       operands[1], p));
2109    DONE;
2110  }
2111)
2112(define_insn "aarch64_float_extend_lo_<Vwide>"
2113  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2114	(float_extend:<VWIDE>
2115	  (match_operand:VDF 1 "register_operand" "w")))]
2116  "TARGET_SIMD"
2117  "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2118  [(set_attr "type" "neon_fp_cvt_widen_s")]
2119)
2120
2121;; Float narrowing operations.
2122
2123(define_insn "aarch64_float_truncate_lo_<mode>"
2124  [(set (match_operand:VDF 0 "register_operand" "=w")
2125      (float_truncate:VDF
2126	(match_operand:<VWIDE> 1 "register_operand" "w")))]
2127  "TARGET_SIMD"
2128  "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2129  [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2130)
2131
2132(define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2133  [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2134    (vec_concat:<VDBL>
2135      (match_operand:VDF 1 "register_operand" "0")
2136      (float_truncate:VDF
2137	(match_operand:<VWIDE> 2 "register_operand" "w"))))]
2138  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2139  "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2140  [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2141)
2142
2143(define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2144  [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2145    (vec_concat:<VDBL>
2146      (float_truncate:VDF
2147	(match_operand:<VWIDE> 2 "register_operand" "w"))
2148      (match_operand:VDF 1 "register_operand" "0")))]
2149  "TARGET_SIMD && BYTES_BIG_ENDIAN"
2150  "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2151  [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2152)
2153
2154(define_expand "aarch64_float_truncate_hi_<Vdbl>"
2155  [(match_operand:<VDBL> 0 "register_operand" "=w")
2156   (match_operand:VDF 1 "register_operand" "0")
2157   (match_operand:<VWIDE> 2 "register_operand" "w")]
2158  "TARGET_SIMD"
2159{
2160  rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2161			     ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2162			     : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2163  emit_insn (gen (operands[0], operands[1], operands[2]));
2164  DONE;
2165}
2166)
2167
2168(define_expand "vec_pack_trunc_v2df"
2169  [(set (match_operand:V4SF 0 "register_operand")
2170      (vec_concat:V4SF
2171	(float_truncate:V2SF
2172	    (match_operand:V2DF 1 "register_operand"))
2173	(float_truncate:V2SF
2174	    (match_operand:V2DF 2 "register_operand"))
2175	  ))]
2176  "TARGET_SIMD"
2177  {
2178    rtx tmp = gen_reg_rtx (V2SFmode);
2179    int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2180    int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2181
2182    emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2183    emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2184						   tmp, operands[hi]));
2185    DONE;
2186  }
2187)
2188
2189(define_expand "vec_pack_trunc_df"
2190  [(set (match_operand:V2SF 0 "register_operand")
2191      (vec_concat:V2SF
2192	(float_truncate:SF
2193	    (match_operand:DF 1 "register_operand"))
2194	(float_truncate:SF
2195	    (match_operand:DF 2 "register_operand"))
2196	  ))]
2197  "TARGET_SIMD"
2198  {
2199    rtx tmp = gen_reg_rtx (V2SFmode);
2200    int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2201    int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2202
2203    emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2204    emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2205    emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2206    DONE;
2207  }
2208)
2209
2210;; FP Max/Min
2211;; Max/Min are introduced by idiom recognition by GCC's mid-end.  An
2212;; expression like:
2213;;      a = (b < c) ? b : c;
2214;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
2215;; either explicitly or indirectly via -ffast-math.
2216;;
2217;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2218;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2219;; operand will be returned when both operands are zero (i.e. they may not
2220;; honour signed zeroes), or when either operand is NaN.  Therefore GCC
2221;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2222;; NaNs.
2223
2224(define_insn "<su><maxmin><mode>3"
2225  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2226	(FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2227		       (match_operand:VHSDF 2 "register_operand" "w")))]
2228  "TARGET_SIMD"
2229  "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2230  [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2231)
2232
2233;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2234;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2235;; which implement the IEEE fmax ()/fmin () functions.
2236(define_insn "<maxmin_uns><mode>3"
2237  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2238       (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2239		      (match_operand:VHSDF 2 "register_operand" "w")]
2240		      FMAXMIN_UNS))]
2241  "TARGET_SIMD"
2242  "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2243  [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2244)
2245
2246;; 'across lanes' add.
2247
2248(define_expand "reduc_plus_scal_<mode>"
2249  [(match_operand:<VEL> 0 "register_operand" "=w")
2250   (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2251	       UNSPEC_ADDV)]
2252  "TARGET_SIMD"
2253  {
2254    rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2255    rtx scratch = gen_reg_rtx (<MODE>mode);
2256    emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2257    emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2258    DONE;
2259  }
2260)
2261
2262(define_insn "aarch64_faddp<mode>"
2263 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2264       (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2265		      (match_operand:VHSDF 2 "register_operand" "w")]
2266	UNSPEC_FADDV))]
2267 "TARGET_SIMD"
2268 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2269  [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2270)
2271
2272(define_insn "aarch64_reduc_plus_internal<mode>"
2273 [(set (match_operand:VDQV 0 "register_operand" "=w")
2274       (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2275		    UNSPEC_ADDV))]
2276 "TARGET_SIMD"
2277 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2278  [(set_attr "type" "neon_reduc_add<q>")]
2279)
2280
2281(define_insn "aarch64_reduc_plus_internalv2si"
2282 [(set (match_operand:V2SI 0 "register_operand" "=w")
2283       (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2284		    UNSPEC_ADDV))]
2285 "TARGET_SIMD"
2286 "addp\\t%0.2s, %1.2s, %1.2s"
2287  [(set_attr "type" "neon_reduc_add")]
2288)
2289
2290(define_insn "reduc_plus_scal_<mode>"
2291 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2292       (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2293		   UNSPEC_FADDV))]
2294 "TARGET_SIMD"
2295 "faddp\\t%<Vetype>0, %1.<Vtype>"
2296  [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2297)
2298
2299(define_expand "reduc_plus_scal_v4sf"
2300 [(set (match_operand:SF 0 "register_operand")
2301       (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2302		    UNSPEC_FADDV))]
2303 "TARGET_SIMD"
2304{
2305  rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2306  rtx scratch = gen_reg_rtx (V4SFmode);
2307  emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2308  emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2309  emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2310  DONE;
2311})
2312
2313(define_insn "clrsb<mode>2"
2314  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2315        (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2316  "TARGET_SIMD"
2317  "cls\\t%0.<Vtype>, %1.<Vtype>"
2318  [(set_attr "type" "neon_cls<q>")]
2319)
2320
2321(define_insn "clz<mode>2"
2322 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2323       (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2324 "TARGET_SIMD"
2325 "clz\\t%0.<Vtype>, %1.<Vtype>"
2326  [(set_attr "type" "neon_cls<q>")]
2327)
2328
2329(define_insn "popcount<mode>2"
2330  [(set (match_operand:VB 0 "register_operand" "=w")
2331        (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2332  "TARGET_SIMD"
2333  "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2334  [(set_attr "type" "neon_cnt<q>")]
2335)
2336
2337;; 'across lanes' max and min ops.
2338
2339;; Template for outputting a scalar, so we can create __builtins which can be
2340;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function.  (This is FP smax/smin).
2341(define_expand "reduc_<maxmin_uns>_scal_<mode>"
2342  [(match_operand:<VEL> 0 "register_operand")
2343   (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2344		  FMAXMINV)]
2345  "TARGET_SIMD"
2346  {
2347    rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2348    rtx scratch = gen_reg_rtx (<MODE>mode);
2349    emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2350							      operands[1]));
2351    emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2352    DONE;
2353  }
2354)
2355
2356;; Likewise for integer cases, signed and unsigned.
2357(define_expand "reduc_<maxmin_uns>_scal_<mode>"
2358  [(match_operand:<VEL> 0 "register_operand")
2359   (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2360		    MAXMINV)]
2361  "TARGET_SIMD"
2362  {
2363    rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2364    rtx scratch = gen_reg_rtx (<MODE>mode);
2365    emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2366							      operands[1]));
2367    emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2368    DONE;
2369  }
2370)
2371
2372(define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2373 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2374       (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2375		    MAXMINV))]
2376 "TARGET_SIMD"
2377 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2378  [(set_attr "type" "neon_reduc_minmax<q>")]
2379)
2380
2381(define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2382 [(set (match_operand:V2SI 0 "register_operand" "=w")
2383       (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2384		    MAXMINV))]
2385 "TARGET_SIMD"
2386 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2387  [(set_attr "type" "neon_reduc_minmax")]
2388)
2389
2390(define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2391 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2392       (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2393		      FMAXMINV))]
2394 "TARGET_SIMD"
2395 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2396  [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2397)
2398
2399;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2400;; allocation.
2401;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2402;; to select.
2403;;
2404;; Thus our BSL is of the form:
2405;;   op0 = bsl (mask, op2, op3)
2406;; We can use any of:
2407;;
2408;;   if (op0 = mask)
2409;;     bsl mask, op1, op2
2410;;   if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2411;;     bit op0, op2, mask
2412;;   if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2413;;     bif op0, op1, mask
2414;;
2415;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2416;; Some forms of straight-line code may generate the equivalent form
2417;; in *aarch64_simd_bsl<mode>_alt.
2418
2419(define_insn "aarch64_simd_bsl<mode>_internal"
2420  [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2421	(xor:VDQ_I
2422	   (and:VDQ_I
2423	     (xor:VDQ_I
2424	       (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2425	       (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2426	     (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2427	  (match_dup:<V_INT_EQUIV> 3)
2428	))]
2429  "TARGET_SIMD"
2430  "@
2431  bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2432  bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2433  bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2434  [(set_attr "type" "neon_bsl<q>")]
2435)
2436
2437;; We need this form in addition to the above pattern to match the case
2438;; when combine tries merging three insns such that the second operand of
2439;; the outer XOR matches the second operand of the inner XOR rather than
2440;; the first.  The two are equivalent but since recog doesn't try all
2441;; permutations of commutative operations, we have to have a separate pattern.
2442
2443(define_insn "*aarch64_simd_bsl<mode>_alt"
2444  [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2445	(xor:VDQ_I
2446	   (and:VDQ_I
2447	     (xor:VDQ_I
2448	       (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2449	       (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2450	      (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2451	  (match_dup:<V_INT_EQUIV> 2)))]
2452  "TARGET_SIMD"
2453  "@
2454  bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2455  bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2456  bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2457  [(set_attr "type" "neon_bsl<q>")]
2458)
2459
2460;; DImode is special, we want to avoid computing operations which are
2461;; more naturally computed in general purpose registers in the vector
2462;; registers.  If we do that, we need to move all three operands from general
2463;; purpose registers to vector registers, then back again.  However, we
2464;; don't want to make this pattern an UNSPEC as we'd lose scope for
2465;; optimizations based on the component operations of a BSL.
2466;;
2467;; That means we need a splitter back to the individual operations, if they
2468;; would be better calculated on the integer side.
2469
2470(define_insn_and_split "aarch64_simd_bsldi_internal"
2471  [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2472	(xor:DI
2473	   (and:DI
2474	     (xor:DI
2475	       (match_operand:DI 3 "register_operand" "w,0,w,r")
2476	       (match_operand:DI 2 "register_operand" "w,w,0,r"))
2477	     (match_operand:DI 1 "register_operand" "0,w,w,r"))
2478	  (match_dup:DI 3)
2479	))]
2480  "TARGET_SIMD"
2481  "@
2482  bsl\\t%0.8b, %2.8b, %3.8b
2483  bit\\t%0.8b, %2.8b, %1.8b
2484  bif\\t%0.8b, %3.8b, %1.8b
2485  #"
2486  "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2487  [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2488{
2489  /* Split back to individual operations.  If we're before reload, and
2490     able to create a temporary register, do so.  If we're after reload,
2491     we've got an early-clobber destination register, so use that.
2492     Otherwise, we can't create pseudos and we can't yet guarantee that
2493     operands[0] is safe to write, so FAIL to split.  */
2494
2495  rtx scratch;
2496  if (reload_completed)
2497    scratch = operands[0];
2498  else if (can_create_pseudo_p ())
2499    scratch = gen_reg_rtx (DImode);
2500  else
2501    FAIL;
2502
2503  emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2504  emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2505  emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2506  DONE;
2507}
2508  [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2509   (set_attr "length" "4,4,4,12")]
2510)
2511
2512(define_insn_and_split "aarch64_simd_bsldi_alt"
2513  [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2514	(xor:DI
2515	   (and:DI
2516	     (xor:DI
2517	       (match_operand:DI 3 "register_operand" "w,w,0,r")
2518	       (match_operand:DI 2 "register_operand" "w,0,w,r"))
2519	     (match_operand:DI 1 "register_operand" "0,w,w,r"))
2520	  (match_dup:DI 2)
2521	))]
2522  "TARGET_SIMD"
2523  "@
2524  bsl\\t%0.8b, %3.8b, %2.8b
2525  bit\\t%0.8b, %3.8b, %1.8b
2526  bif\\t%0.8b, %2.8b, %1.8b
2527  #"
2528  "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2529  [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2530{
2531  /* Split back to individual operations.  If we're before reload, and
2532     able to create a temporary register, do so.  If we're after reload,
2533     we've got an early-clobber destination register, so use that.
2534     Otherwise, we can't create pseudos and we can't yet guarantee that
2535     operands[0] is safe to write, so FAIL to split.  */
2536
2537  rtx scratch;
2538  if (reload_completed)
2539    scratch = operands[0];
2540  else if (can_create_pseudo_p ())
2541    scratch = gen_reg_rtx (DImode);
2542  else
2543    FAIL;
2544
2545  emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2546  emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2547  emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2548  DONE;
2549}
2550  [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2551   (set_attr "length" "4,4,4,12")]
2552)
2553
2554(define_expand "aarch64_simd_bsl<mode>"
2555  [(match_operand:VALLDIF 0 "register_operand")
2556   (match_operand:<V_INT_EQUIV> 1 "register_operand")
2557   (match_operand:VALLDIF 2 "register_operand")
2558   (match_operand:VALLDIF 3 "register_operand")]
2559 "TARGET_SIMD"
2560{
2561  /* We can't alias operands together if they have different modes.  */
2562  rtx tmp = operands[0];
2563  if (FLOAT_MODE_P (<MODE>mode))
2564    {
2565      operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2566      operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2567      tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2568    }
2569  operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2570  emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2571							 operands[1],
2572							 operands[2],
2573							 operands[3]));
2574  if (tmp != operands[0])
2575    emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2576
2577  DONE;
2578})
2579
2580(define_expand "vcond_mask_<mode><v_int_equiv>"
2581  [(match_operand:VALLDI 0 "register_operand")
2582   (match_operand:VALLDI 1 "nonmemory_operand")
2583   (match_operand:VALLDI 2 "nonmemory_operand")
2584   (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2585  "TARGET_SIMD"
2586{
2587  /* If we have (a = (P) ? -1 : 0);
2588     Then we can simply move the generated mask (result must be int).  */
2589  if (operands[1] == CONSTM1_RTX (<MODE>mode)
2590      && operands[2] == CONST0_RTX (<MODE>mode))
2591    emit_move_insn (operands[0], operands[3]);
2592  /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask.  */
2593  else if (operands[1] == CONST0_RTX (<MODE>mode)
2594	   && operands[2] == CONSTM1_RTX (<MODE>mode))
2595    emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2596  else
2597    {
2598      if (!REG_P (operands[1]))
2599	operands[1] = force_reg (<MODE>mode, operands[1]);
2600      if (!REG_P (operands[2]))
2601	operands[2] = force_reg (<MODE>mode, operands[2]);
2602      emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2603					     operands[1], operands[2]));
2604    }
2605
2606  DONE;
2607})
2608
2609;; Patterns comparing two vectors to produce a mask.
2610
2611(define_expand "vec_cmp<mode><mode>"
2612  [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2613	  (match_operator 1 "comparison_operator"
2614	    [(match_operand:VSDQ_I_DI 2 "register_operand")
2615	     (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2616  "TARGET_SIMD"
2617{
2618  rtx mask = operands[0];
2619  enum rtx_code code = GET_CODE (operands[1]);
2620
2621  switch (code)
2622    {
2623    case NE:
2624    case LE:
2625    case LT:
2626    case GE:
2627    case GT:
2628    case EQ:
2629      if (operands[3] == CONST0_RTX (<MODE>mode))
2630	break;
2631
2632      /* Fall through.  */
2633    default:
2634      if (!REG_P (operands[3]))
2635	operands[3] = force_reg (<MODE>mode, operands[3]);
2636
2637      break;
2638    }
2639
2640  switch (code)
2641    {
2642    case LT:
2643      emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2644      break;
2645
2646    case GE:
2647      emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2648      break;
2649
2650    case LE:
2651      emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2652      break;
2653
2654    case GT:
2655      emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2656      break;
2657
2658    case LTU:
2659      emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2660      break;
2661
2662    case GEU:
2663      emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2664      break;
2665
2666    case LEU:
2667      emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2668      break;
2669
2670    case GTU:
2671      emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2672      break;
2673
2674    case NE:
2675      /* Handle NE as !EQ.  */
2676      emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2677      emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2678      break;
2679
2680    case EQ:
2681      emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2682      break;
2683
2684    default:
2685      gcc_unreachable ();
2686    }
2687
2688  DONE;
2689})
2690
2691(define_expand "vec_cmp<mode><v_int_equiv>"
2692  [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2693	(match_operator 1 "comparison_operator"
2694	    [(match_operand:VDQF 2 "register_operand")
2695	     (match_operand:VDQF 3 "nonmemory_operand")]))]
2696  "TARGET_SIMD"
2697{
2698  int use_zero_form = 0;
2699  enum rtx_code code = GET_CODE (operands[1]);
2700  rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2701
2702  rtx (*comparison) (rtx, rtx, rtx) = NULL;
2703
2704  switch (code)
2705    {
2706    case LE:
2707    case LT:
2708    case GE:
2709    case GT:
2710    case EQ:
2711      if (operands[3] == CONST0_RTX (<MODE>mode))
2712	{
2713	  use_zero_form = 1;
2714	  break;
2715	}
2716      /* Fall through.  */
2717    default:
2718      if (!REG_P (operands[3]))
2719	operands[3] = force_reg (<MODE>mode, operands[3]);
2720
2721      break;
2722    }
2723
2724  switch (code)
2725    {
2726    case LT:
2727      if (use_zero_form)
2728	{
2729	  comparison = gen_aarch64_cmlt<mode>;
2730	  break;
2731	}
2732      /* Fall through.  */
2733    case UNLT:
2734      std::swap (operands[2], operands[3]);
2735      /* Fall through.  */
2736    case UNGT:
2737    case GT:
2738      comparison = gen_aarch64_cmgt<mode>;
2739      break;
2740    case LE:
2741      if (use_zero_form)
2742	{
2743	  comparison = gen_aarch64_cmle<mode>;
2744	  break;
2745	}
2746      /* Fall through.  */
2747    case UNLE:
2748      std::swap (operands[2], operands[3]);
2749      /* Fall through.  */
2750    case UNGE:
2751    case GE:
2752      comparison = gen_aarch64_cmge<mode>;
2753      break;
2754    case NE:
2755    case EQ:
2756      comparison = gen_aarch64_cmeq<mode>;
2757      break;
2758    case UNEQ:
2759    case ORDERED:
2760    case UNORDERED:
2761    case LTGT:
2762      break;
2763    default:
2764      gcc_unreachable ();
2765    }
2766
2767  switch (code)
2768    {
2769    case UNGE:
2770    case UNGT:
2771    case UNLE:
2772    case UNLT:
2773      {
2774	/* All of the above must not raise any FP exceptions.  Thus we first
2775	   check each operand for NaNs and force any elements containing NaN to
2776	   zero before using them in the compare.
2777	   Example: UN<cc> (a, b) -> UNORDERED (a, b) |
2778				     (cm<cc> (isnan (a) ? 0.0 : a,
2779					      isnan (b) ? 0.0 : b))
2780	   We use the following transformations for doing the comparisions:
2781	   a UNGE b -> a GE b
2782	   a UNGT b -> a GT b
2783	   a UNLE b -> b GE a
2784	   a UNLT b -> b GT a.  */
2785
2786	rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
2787	rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
2788	rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
2789	emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
2790	emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
2791	emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
2792	emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
2793					  lowpart_subreg (<V_INT_EQUIV>mode,
2794							  operands[2],
2795							  <MODE>mode)));
2796	emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
2797					  lowpart_subreg (<V_INT_EQUIV>mode,
2798							  operands[3],
2799							  <MODE>mode)));
2800	gcc_assert (comparison != NULL);
2801	emit_insn (comparison (operands[0],
2802			       lowpart_subreg (<MODE>mode,
2803					       tmp0, <V_INT_EQUIV>mode),
2804			       lowpart_subreg (<MODE>mode,
2805					       tmp1, <V_INT_EQUIV>mode)));
2806	emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
2807      }
2808      break;
2809
2810    case LT:
2811    case LE:
2812    case GT:
2813    case GE:
2814    case EQ:
2815    case NE:
2816      /* The easy case.  Here we emit one of FCMGE, FCMGT or FCMEQ.
2817	 As a LT b <=> b GE a && a LE b <=> b GT a.  Our transformations are:
2818	 a GE b -> a GE b
2819	 a GT b -> a GT b
2820	 a LE b -> b GE a
2821	 a LT b -> b GT a
2822	 a EQ b -> a EQ b
2823	 a NE b -> ~(a EQ b)  */
2824      gcc_assert (comparison != NULL);
2825      emit_insn (comparison (operands[0], operands[2], operands[3]));
2826      if (code == NE)
2827	emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2828      break;
2829
2830    case LTGT:
2831      /* LTGT is not guranteed to not generate a FP exception.  So let's
2832	 go the faster way : ((a > b) || (b > a)).  */
2833      emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2834					 operands[2], operands[3]));
2835      emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2836      emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2837      break;
2838
2839    case ORDERED:
2840    case UNORDERED:
2841    case UNEQ:
2842      /* cmeq (a, a) & cmeq (b, b).  */
2843      emit_insn (gen_aarch64_cmeq<mode> (operands[0],
2844					 operands[2], operands[2]));
2845      emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
2846      emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
2847
2848      if (code == UNORDERED)
2849	emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2850      else if (code == UNEQ)
2851	{
2852	  emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
2853	  emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
2854	}
2855      break;
2856
2857    default:
2858      gcc_unreachable ();
2859    }
2860
2861  DONE;
2862})
2863
2864(define_expand "vec_cmpu<mode><mode>"
2865  [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2866	  (match_operator 1 "comparison_operator"
2867	    [(match_operand:VSDQ_I_DI 2 "register_operand")
2868	     (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2869  "TARGET_SIMD"
2870{
2871  emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
2872				      operands[2], operands[3]));
2873  DONE;
2874})
2875
2876(define_expand "vcond<mode><mode>"
2877  [(set (match_operand:VALLDI 0 "register_operand")
2878	(if_then_else:VALLDI
2879	  (match_operator 3 "comparison_operator"
2880	    [(match_operand:VALLDI 4 "register_operand")
2881	     (match_operand:VALLDI 5 "nonmemory_operand")])
2882	  (match_operand:VALLDI 1 "nonmemory_operand")
2883	  (match_operand:VALLDI 2 "nonmemory_operand")))]
2884  "TARGET_SIMD"
2885{
2886  rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2887  enum rtx_code code = GET_CODE (operands[3]);
2888
2889  /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2890     it as well as switch operands 1/2 in order to avoid the additional
2891     NOT instruction.  */
2892  if (code == NE)
2893    {
2894      operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2895				    operands[4], operands[5]);
2896      std::swap (operands[1], operands[2]);
2897    }
2898  emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2899					     operands[4], operands[5]));
2900  emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2901						 operands[2], mask));
2902
2903  DONE;
2904})
2905
2906(define_expand "vcond<v_cmp_mixed><mode>"
2907  [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
2908	(if_then_else:<V_cmp_mixed>
2909	  (match_operator 3 "comparison_operator"
2910	    [(match_operand:VDQF_COND 4 "register_operand")
2911	     (match_operand:VDQF_COND 5 "nonmemory_operand")])
2912	  (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
2913	  (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
2914  "TARGET_SIMD"
2915{
2916  rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2917  enum rtx_code code = GET_CODE (operands[3]);
2918
2919  /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2920     it as well as switch operands 1/2 in order to avoid the additional
2921     NOT instruction.  */
2922  if (code == NE)
2923    {
2924      operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2925				    operands[4], operands[5]);
2926      std::swap (operands[1], operands[2]);
2927    }
2928  emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2929					     operands[4], operands[5]));
2930  emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
2931						operands[0], operands[1],
2932						operands[2], mask));
2933
2934  DONE;
2935})
2936
2937(define_expand "vcondu<mode><mode>"
2938  [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2939	(if_then_else:VSDQ_I_DI
2940	  (match_operator 3 "comparison_operator"
2941	    [(match_operand:VSDQ_I_DI 4 "register_operand")
2942	     (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2943	  (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2944	  (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2945  "TARGET_SIMD"
2946{
2947  rtx mask = gen_reg_rtx (<MODE>mode);
2948  enum rtx_code code = GET_CODE (operands[3]);
2949
2950  /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2951     it as well as switch operands 1/2 in order to avoid the additional
2952     NOT instruction.  */
2953  if (code == NE)
2954    {
2955      operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2956				    operands[4], operands[5]);
2957      std::swap (operands[1], operands[2]);
2958    }
2959  emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
2960				      operands[4], operands[5]));
2961  emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2962						 operands[2], mask));
2963  DONE;
2964})
2965
2966(define_expand "vcondu<mode><v_cmp_mixed>"
2967  [(set (match_operand:VDQF 0 "register_operand")
2968	(if_then_else:VDQF
2969	  (match_operator 3 "comparison_operator"
2970	    [(match_operand:<V_cmp_mixed> 4 "register_operand")
2971	     (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
2972	  (match_operand:VDQF 1 "nonmemory_operand")
2973	  (match_operand:VDQF 2 "nonmemory_operand")))]
2974  "TARGET_SIMD"
2975{
2976  rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2977  enum rtx_code code = GET_CODE (operands[3]);
2978
2979  /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2980     it as well as switch operands 1/2 in order to avoid the additional
2981     NOT instruction.  */
2982  if (code == NE)
2983    {
2984      operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2985				    operands[4], operands[5]);
2986      std::swap (operands[1], operands[2]);
2987    }
2988  emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
2989						  mask, operands[3],
2990						  operands[4], operands[5]));
2991  emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2992						 operands[2], mask));
2993  DONE;
2994})
2995
2996;; Patterns for AArch64 SIMD Intrinsics.
2997
2998;; Lane extraction with sign extension to general purpose register.
2999(define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
3000  [(set (match_operand:GPI 0 "register_operand" "=r")
3001	(sign_extend:GPI
3002	  (vec_select:<VEL>
3003	    (match_operand:VDQQH 1 "register_operand" "w")
3004	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3005  "TARGET_SIMD"
3006  {
3007    operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3008    return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
3009  }
3010  [(set_attr "type" "neon_to_gp<q>")]
3011)
3012
3013(define_insn "*aarch64_get_lane_zero_extendsi<mode>"
3014  [(set (match_operand:SI 0 "register_operand" "=r")
3015	(zero_extend:SI
3016	  (vec_select:<VEL>
3017	    (match_operand:VDQQH 1 "register_operand" "w")
3018	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3019  "TARGET_SIMD"
3020  {
3021    operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3022    return "umov\\t%w0, %1.<Vetype>[%2]";
3023  }
3024  [(set_attr "type" "neon_to_gp<q>")]
3025)
3026
3027;; Lane extraction of a value, neither sign nor zero extension
3028;; is guaranteed so upper bits should be considered undefined.
3029;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3030(define_insn "aarch64_get_lane<mode>"
3031  [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
3032	(vec_select:<VEL>
3033	  (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3034	  (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3035  "TARGET_SIMD"
3036  {
3037    operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3038    switch (which_alternative)
3039      {
3040	case 0:
3041	  return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3042	case 1:
3043	  return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3044	case 2:
3045	  return "st1\\t{%1.<Vetype>}[%2], %0";
3046	default:
3047	  gcc_unreachable ();
3048      }
3049  }
3050  [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3051)
3052
3053(define_insn "load_pair_lanes<mode>"
3054  [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3055	(vec_concat:<VDBL>
3056	   (match_operand:VDC 1 "memory_operand" "Utq")
3057	   (match_operand:VDC 2 "memory_operand" "m")))]
3058  "TARGET_SIMD && !STRICT_ALIGNMENT
3059   && rtx_equal_p (XEXP (operands[2], 0),
3060		   plus_constant (Pmode,
3061				  XEXP (operands[1], 0),
3062				  GET_MODE_SIZE (<MODE>mode)))"
3063  "ldr\\t%q0, %1"
3064  [(set_attr "type" "neon_load1_1reg_q")]
3065)
3066
3067(define_insn "store_pair_lanes<mode>"
3068  [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Uml, Uml")
3069	(vec_concat:<VDBL>
3070	   (match_operand:VDC 1 "register_operand" "w, r")
3071	   (match_operand:VDC 2 "register_operand" "w, r")))]
3072  "TARGET_SIMD"
3073  "@
3074   stp\\t%d1, %d2, %y0
3075   stp\\t%x1, %x2, %y0"
3076  [(set_attr "type" "neon_stp, store_16")]
3077)
3078
3079;; In this insn, operand 1 should be low, and operand 2 the high part of the
3080;; dest vector.
3081
3082(define_insn "*aarch64_combinez<mode>"
3083  [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3084	(vec_concat:<VDBL>
3085	  (match_operand:VDC 1 "general_operand" "w,?r,m")
3086	  (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3087  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3088  "@
3089   mov\\t%0.8b, %1.8b
3090   fmov\t%d0, %1
3091   ldr\\t%d0, %1"
3092  [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3093   (set_attr "simd" "yes,*,yes")
3094   (set_attr "fp" "*,yes,*")]
3095)
3096
3097(define_insn "*aarch64_combinez_be<mode>"
3098  [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3099        (vec_concat:<VDBL>
3100	  (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3101	  (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3102  "TARGET_SIMD && BYTES_BIG_ENDIAN"
3103  "@
3104   mov\\t%0.8b, %1.8b
3105   fmov\t%d0, %1
3106   ldr\\t%d0, %1"
3107  [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3108   (set_attr "simd" "yes,*,yes")
3109   (set_attr "fp" "*,yes,*")]
3110)
3111
3112(define_expand "aarch64_combine<mode>"
3113  [(match_operand:<VDBL> 0 "register_operand")
3114   (match_operand:VDC 1 "register_operand")
3115   (match_operand:VDC 2 "register_operand")]
3116  "TARGET_SIMD"
3117{
3118  aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
3119
3120  DONE;
3121}
3122)
3123
3124(define_expand "aarch64_simd_combine<mode>"
3125  [(match_operand:<VDBL> 0 "register_operand")
3126   (match_operand:VDC 1 "register_operand")
3127   (match_operand:VDC 2 "register_operand")]
3128  "TARGET_SIMD"
3129  {
3130    emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3131    emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3132    DONE;
3133  }
3134[(set_attr "type" "multiple")]
3135)
3136
3137;; <su><addsub>l<q>.
3138
3139(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3140 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3141       (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3142			   (match_operand:VQW 1 "register_operand" "w")
3143			   (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3144		       (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3145			   (match_operand:VQW 2 "register_operand" "w")
3146			   (match_dup 3)))))]
3147  "TARGET_SIMD"
3148  "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3149  [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3150)
3151
3152(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3153 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3154       (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3155                           (match_operand:VQW 1 "register_operand" "w")
3156                           (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3157                       (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3158                           (match_operand:VQW 2 "register_operand" "w")
3159                           (match_dup 3)))))]
3160  "TARGET_SIMD"
3161  "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3162  [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3163)
3164
3165
3166(define_expand "aarch64_saddl2<mode>"
3167  [(match_operand:<VWIDE> 0 "register_operand" "=w")
3168   (match_operand:VQW 1 "register_operand" "w")
3169   (match_operand:VQW 2 "register_operand" "w")]
3170  "TARGET_SIMD"
3171{
3172  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3173  emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3174                                                  operands[2], p));
3175  DONE;
3176})
3177
3178(define_expand "aarch64_uaddl2<mode>"
3179  [(match_operand:<VWIDE> 0 "register_operand" "=w")
3180   (match_operand:VQW 1 "register_operand" "w")
3181   (match_operand:VQW 2 "register_operand" "w")]
3182  "TARGET_SIMD"
3183{
3184  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3185  emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3186                                                  operands[2], p));
3187  DONE;
3188})
3189
3190(define_expand "aarch64_ssubl2<mode>"
3191  [(match_operand:<VWIDE> 0 "register_operand" "=w")
3192   (match_operand:VQW 1 "register_operand" "w")
3193   (match_operand:VQW 2 "register_operand" "w")]
3194  "TARGET_SIMD"
3195{
3196  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3197  emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3198						operands[2], p));
3199  DONE;
3200})
3201
3202(define_expand "aarch64_usubl2<mode>"
3203  [(match_operand:<VWIDE> 0 "register_operand" "=w")
3204   (match_operand:VQW 1 "register_operand" "w")
3205   (match_operand:VQW 2 "register_operand" "w")]
3206  "TARGET_SIMD"
3207{
3208  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3209  emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3210						operands[2], p));
3211  DONE;
3212})
3213
3214(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3215 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3216       (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3217			   (match_operand:VD_BHSI 1 "register_operand" "w"))
3218		       (ANY_EXTEND:<VWIDE>
3219			   (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3220  "TARGET_SIMD"
3221  "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3222  [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3223)
3224
3225;; <su><addsub>w<q>.
3226
3227(define_expand "widen_ssum<mode>3"
3228  [(set (match_operand:<VDBLW> 0 "register_operand" "")
3229	(plus:<VDBLW> (sign_extend:<VDBLW>
3230		        (match_operand:VQW 1 "register_operand" ""))
3231		      (match_operand:<VDBLW> 2 "register_operand" "")))]
3232  "TARGET_SIMD"
3233  {
3234    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3235    rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3236
3237    emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3238						operands[1], p));
3239    emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3240    DONE;
3241  }
3242)
3243
3244(define_expand "widen_ssum<mode>3"
3245  [(set (match_operand:<VWIDE> 0 "register_operand" "")
3246	(plus:<VWIDE> (sign_extend:<VWIDE>
3247		        (match_operand:VD_BHSI 1 "register_operand" ""))
3248		      (match_operand:<VWIDE> 2 "register_operand" "")))]
3249  "TARGET_SIMD"
3250{
3251  emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3252  DONE;
3253})
3254
3255(define_expand "widen_usum<mode>3"
3256  [(set (match_operand:<VDBLW> 0 "register_operand" "")
3257	(plus:<VDBLW> (zero_extend:<VDBLW>
3258		        (match_operand:VQW 1 "register_operand" ""))
3259		      (match_operand:<VDBLW> 2 "register_operand" "")))]
3260  "TARGET_SIMD"
3261  {
3262    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3263    rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3264
3265    emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3266						 operands[1], p));
3267    emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3268    DONE;
3269  }
3270)
3271
3272(define_expand "widen_usum<mode>3"
3273  [(set (match_operand:<VWIDE> 0 "register_operand" "")
3274	(plus:<VWIDE> (zero_extend:<VWIDE>
3275		        (match_operand:VD_BHSI 1 "register_operand" ""))
3276		      (match_operand:<VWIDE> 2 "register_operand" "")))]
3277  "TARGET_SIMD"
3278{
3279  emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3280  DONE;
3281})
3282
3283(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
3284  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3285        (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3286			(ANY_EXTEND:<VWIDE>
3287			  (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3288  "TARGET_SIMD"
3289  "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3290  [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3291)
3292
3293(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
3294  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3295        (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3296			(ANY_EXTEND:<VWIDE>
3297			  (vec_select:<VHALF>
3298			   (match_operand:VQW 2 "register_operand" "w")
3299			   (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3300  "TARGET_SIMD"
3301  "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3302  [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3303)
3304
3305(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
3306  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3307        (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3308			(ANY_EXTEND:<VWIDE>
3309			  (vec_select:<VHALF>
3310			   (match_operand:VQW 2 "register_operand" "w")
3311			   (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3312  "TARGET_SIMD"
3313  "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3314  [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3315)
3316
3317(define_expand "aarch64_saddw2<mode>"
3318  [(match_operand:<VWIDE> 0 "register_operand" "=w")
3319   (match_operand:<VWIDE> 1 "register_operand" "w")
3320   (match_operand:VQW 2 "register_operand" "w")]
3321  "TARGET_SIMD"
3322{
3323  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3324  emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3325						operands[2], p));
3326  DONE;
3327})
3328
3329(define_expand "aarch64_uaddw2<mode>"
3330  [(match_operand:<VWIDE> 0 "register_operand" "=w")
3331   (match_operand:<VWIDE> 1 "register_operand" "w")
3332   (match_operand:VQW 2 "register_operand" "w")]
3333  "TARGET_SIMD"
3334{
3335  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3336  emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3337						operands[2], p));
3338  DONE;
3339})
3340
3341
3342(define_expand "aarch64_ssubw2<mode>"
3343  [(match_operand:<VWIDE> 0 "register_operand" "=w")
3344   (match_operand:<VWIDE> 1 "register_operand" "w")
3345   (match_operand:VQW 2 "register_operand" "w")]
3346  "TARGET_SIMD"
3347{
3348  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3349  emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3350						operands[2], p));
3351  DONE;
3352})
3353
3354(define_expand "aarch64_usubw2<mode>"
3355  [(match_operand:<VWIDE> 0 "register_operand" "=w")
3356   (match_operand:<VWIDE> 1 "register_operand" "w")
3357   (match_operand:VQW 2 "register_operand" "w")]
3358  "TARGET_SIMD"
3359{
3360  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3361  emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3362						operands[2], p));
3363  DONE;
3364})
3365
3366;; <su><r>h<addsub>.
3367
3368(define_insn "aarch64_<sur>h<addsub><mode>"
3369  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3370        (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3371		      (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3372		     HADDSUB))]
3373  "TARGET_SIMD"
3374  "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3375  [(set_attr "type" "neon_<addsub>_halve<q>")]
3376)
3377
3378;; <r><addsub>hn<q>.
3379
3380(define_insn "aarch64_<sur><addsub>hn<mode>"
3381  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3382        (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3383			    (match_operand:VQN 2 "register_operand" "w")]
3384                           ADDSUBHN))]
3385  "TARGET_SIMD"
3386  "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3387  [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3388)
3389
3390(define_insn "aarch64_<sur><addsub>hn2<mode>"
3391  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3392        (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3393			     (match_operand:VQN 2 "register_operand" "w")
3394			     (match_operand:VQN 3 "register_operand" "w")]
3395                            ADDSUBHN2))]
3396  "TARGET_SIMD"
3397  "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3398  [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3399)
3400
3401;; pmul.
3402
3403(define_insn "aarch64_pmul<mode>"
3404  [(set (match_operand:VB 0 "register_operand" "=w")
3405        (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3406		    (match_operand:VB 2 "register_operand" "w")]
3407		   UNSPEC_PMUL))]
3408 "TARGET_SIMD"
3409 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3410  [(set_attr "type" "neon_mul_<Vetype><q>")]
3411)
3412
3413;; fmulx.
3414
3415(define_insn "aarch64_fmulx<mode>"
3416  [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3417	(unspec:VHSDF_HSDF
3418	  [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3419	   (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3420	   UNSPEC_FMULX))]
3421 "TARGET_SIMD"
3422 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3423 [(set_attr "type" "neon_fp_mul_<stype>")]
3424)
3425
3426;; vmulxq_lane_f32, and vmulx_laneq_f32
3427
3428(define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3429  [(set (match_operand:VDQSF 0 "register_operand" "=w")
3430	(unspec:VDQSF
3431	 [(match_operand:VDQSF 1 "register_operand" "w")
3432	  (vec_duplicate:VDQSF
3433	   (vec_select:<VEL>
3434	    (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3435	    (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3436	 UNSPEC_FMULX))]
3437  "TARGET_SIMD"
3438  {
3439    operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3440    return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3441  }
3442  [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3443)
3444
3445;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3446
3447(define_insn "*aarch64_mulx_elt<mode>"
3448  [(set (match_operand:VDQF 0 "register_operand" "=w")
3449	(unspec:VDQF
3450	 [(match_operand:VDQF 1 "register_operand" "w")
3451	  (vec_duplicate:VDQF
3452	   (vec_select:<VEL>
3453	    (match_operand:VDQF 2 "register_operand" "w")
3454	    (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3455	 UNSPEC_FMULX))]
3456  "TARGET_SIMD"
3457  {
3458    operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3459    return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3460  }
3461  [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3462)
3463
3464;; vmulxq_lane
3465
3466(define_insn "*aarch64_mulx_elt_from_dup<mode>"
3467  [(set (match_operand:VHSDF 0 "register_operand" "=w")
3468	(unspec:VHSDF
3469	 [(match_operand:VHSDF 1 "register_operand" "w")
3470	  (vec_duplicate:VHSDF
3471	    (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3472	 UNSPEC_FMULX))]
3473  "TARGET_SIMD"
3474  "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3475  [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3476)
3477
3478;; vmulxs_lane_f32, vmulxs_laneq_f32
3479;; vmulxd_lane_f64 ==  vmulx_lane_f64
3480;; vmulxd_laneq_f64 == vmulx_laneq_f64
3481
3482(define_insn "*aarch64_vgetfmulx<mode>"
3483  [(set (match_operand:<VEL> 0 "register_operand" "=w")
3484	(unspec:<VEL>
3485	 [(match_operand:<VEL> 1 "register_operand" "w")
3486	  (vec_select:<VEL>
3487	   (match_operand:VDQF 2 "register_operand" "w")
3488	    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3489	 UNSPEC_FMULX))]
3490  "TARGET_SIMD"
3491  {
3492    operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3493    return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3494  }
3495  [(set_attr "type" "fmul<Vetype>")]
3496)
3497;; <su>q<addsub>
3498
3499(define_insn "aarch64_<su_optab><optab><mode>"
3500  [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3501	(BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3502			  (match_operand:VSDQ_I 2 "register_operand" "w")))]
3503  "TARGET_SIMD"
3504  "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3505  [(set_attr "type" "neon_<optab><q>")]
3506)
3507
3508;; suqadd and usqadd
3509
3510(define_insn "aarch64_<sur>qadd<mode>"
3511  [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3512	(unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3513			(match_operand:VSDQ_I 2 "register_operand" "w")]
3514		       USSUQADD))]
3515  "TARGET_SIMD"
3516  "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3517  [(set_attr "type" "neon_qadd<q>")]
3518)
3519
3520;; sqmovun
3521
3522(define_insn "aarch64_sqmovun<mode>"
3523  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3524	(unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3525                            UNSPEC_SQXTUN))]
3526   "TARGET_SIMD"
3527   "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3528   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3529)
3530
3531;; sqmovn and uqmovn
3532
3533(define_insn "aarch64_<sur>qmovn<mode>"
3534  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3535	(unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3536                            SUQMOVN))]
3537  "TARGET_SIMD"
3538  "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3539   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3540)
3541
3542;; <su>q<absneg>
3543
3544(define_insn "aarch64_s<optab><mode>"
3545  [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3546	(UNQOPS:VSDQ_I
3547	  (match_operand:VSDQ_I 1 "register_operand" "w")))]
3548  "TARGET_SIMD"
3549  "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3550  [(set_attr "type" "neon_<optab><q>")]
3551)
3552
3553;; sq<r>dmulh.
3554
3555(define_insn "aarch64_sq<r>dmulh<mode>"
3556  [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3557	(unspec:VSDQ_HSI
3558	  [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3559	   (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3560	 VQDMULH))]
3561  "TARGET_SIMD"
3562  "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3563  [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3564)
3565
3566;; sq<r>dmulh_lane
3567
3568(define_insn "aarch64_sq<r>dmulh_lane<mode>"
3569  [(set (match_operand:VDQHS 0 "register_operand" "=w")
3570        (unspec:VDQHS
3571	  [(match_operand:VDQHS 1 "register_operand" "w")
3572           (vec_select:<VEL>
3573             (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3574             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3575	 VQDMULH))]
3576  "TARGET_SIMD"
3577  "*
3578   operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3579   return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3580  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3581)
3582
3583(define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3584  [(set (match_operand:VDQHS 0 "register_operand" "=w")
3585        (unspec:VDQHS
3586	  [(match_operand:VDQHS 1 "register_operand" "w")
3587           (vec_select:<VEL>
3588             (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3589             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3590	 VQDMULH))]
3591  "TARGET_SIMD"
3592  "*
3593   operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3594   return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3595  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3596)
3597
3598(define_insn "aarch64_sq<r>dmulh_lane<mode>"
3599  [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3600        (unspec:SD_HSI
3601	  [(match_operand:SD_HSI 1 "register_operand" "w")
3602           (vec_select:<VEL>
3603             (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3604             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3605	 VQDMULH))]
3606  "TARGET_SIMD"
3607  "*
3608   operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3609   return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3610  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3611)
3612
3613(define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3614  [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3615        (unspec:SD_HSI
3616	  [(match_operand:SD_HSI 1 "register_operand" "w")
3617           (vec_select:<VEL>
3618             (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3619             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3620	 VQDMULH))]
3621  "TARGET_SIMD"
3622  "*
3623   operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3624   return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3625  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3626)
3627
3628;; sqrdml[as]h.
3629
3630(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3631  [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3632	(unspec:VSDQ_HSI
3633	  [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3634	   (match_operand:VSDQ_HSI 2 "register_operand" "w")
3635	   (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3636	  SQRDMLH_AS))]
3637   "TARGET_SIMD_RDMA"
3638   "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3639   [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3640)
3641
3642;; sqrdml[as]h_lane.
3643
3644(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3645  [(set (match_operand:VDQHS 0 "register_operand" "=w")
3646	(unspec:VDQHS
3647	  [(match_operand:VDQHS 1 "register_operand" "0")
3648	   (match_operand:VDQHS 2 "register_operand" "w")
3649	   (vec_select:<VEL>
3650	     (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3651	     (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3652	  SQRDMLH_AS))]
3653   "TARGET_SIMD_RDMA"
3654   {
3655     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3656     return
3657      "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3658   }
3659   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3660)
3661
3662(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3663  [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3664	(unspec:SD_HSI
3665	  [(match_operand:SD_HSI 1 "register_operand" "0")
3666	   (match_operand:SD_HSI 2 "register_operand" "w")
3667	   (vec_select:<VEL>
3668	     (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3669	     (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3670	  SQRDMLH_AS))]
3671   "TARGET_SIMD_RDMA"
3672   {
3673     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3674     return
3675      "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3676   }
3677   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3678)
3679
3680;; sqrdml[as]h_laneq.
3681
3682(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3683  [(set (match_operand:VDQHS 0 "register_operand" "=w")
3684	(unspec:VDQHS
3685	  [(match_operand:VDQHS 1 "register_operand" "0")
3686	   (match_operand:VDQHS 2 "register_operand" "w")
3687	   (vec_select:<VEL>
3688	     (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3689	     (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3690	  SQRDMLH_AS))]
3691   "TARGET_SIMD_RDMA"
3692   {
3693     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3694     return
3695      "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3696   }
3697   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3698)
3699
3700(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3701  [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3702	(unspec:SD_HSI
3703	  [(match_operand:SD_HSI 1 "register_operand" "0")
3704	   (match_operand:SD_HSI 2 "register_operand" "w")
3705	   (vec_select:<VEL>
3706	     (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3707	     (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3708	  SQRDMLH_AS))]
3709   "TARGET_SIMD_RDMA"
3710   {
3711     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3712     return
3713      "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3714   }
3715   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3716)
3717
3718;; vqdml[sa]l
3719
3720(define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3721  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3722        (SBINQOPS:<VWIDE>
3723	  (match_operand:<VWIDE> 1 "register_operand" "0")
3724	  (ss_ashift:<VWIDE>
3725	      (mult:<VWIDE>
3726		(sign_extend:<VWIDE>
3727		      (match_operand:VSD_HSI 2 "register_operand" "w"))
3728		(sign_extend:<VWIDE>
3729		      (match_operand:VSD_HSI 3 "register_operand" "w")))
3730	      (const_int 1))))]
3731  "TARGET_SIMD"
3732  "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3733  [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3734)
3735
3736;; vqdml[sa]l_lane
3737
3738(define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3739  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3740        (SBINQOPS:<VWIDE>
3741	  (match_operand:<VWIDE> 1 "register_operand" "0")
3742	  (ss_ashift:<VWIDE>
3743	    (mult:<VWIDE>
3744	      (sign_extend:<VWIDE>
3745		(match_operand:VD_HSI 2 "register_operand" "w"))
3746	      (sign_extend:<VWIDE>
3747		(vec_duplicate:VD_HSI
3748		  (vec_select:<VEL>
3749		    (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3750		    (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3751              ))
3752	    (const_int 1))))]
3753  "TARGET_SIMD"
3754  {
3755    operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3756    return
3757      "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3758  }
3759  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3760)
3761
3762(define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3763  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3764        (SBINQOPS:<VWIDE>
3765	  (match_operand:<VWIDE> 1 "register_operand" "0")
3766	  (ss_ashift:<VWIDE>
3767	    (mult:<VWIDE>
3768	      (sign_extend:<VWIDE>
3769		(match_operand:VD_HSI 2 "register_operand" "w"))
3770	      (sign_extend:<VWIDE>
3771		(vec_duplicate:VD_HSI
3772		  (vec_select:<VEL>
3773		    (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3774		    (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3775              ))
3776	    (const_int 1))))]
3777  "TARGET_SIMD"
3778  {
3779    operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3780    return
3781      "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3782  }
3783  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3784)
3785
3786(define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3787  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3788        (SBINQOPS:<VWIDE>
3789	  (match_operand:<VWIDE> 1 "register_operand" "0")
3790	  (ss_ashift:<VWIDE>
3791	    (mult:<VWIDE>
3792	      (sign_extend:<VWIDE>
3793		(match_operand:SD_HSI 2 "register_operand" "w"))
3794	      (sign_extend:<VWIDE>
3795		(vec_select:<VEL>
3796		  (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3797		  (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3798              )
3799	    (const_int 1))))]
3800  "TARGET_SIMD"
3801  {
3802    operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3803    return
3804      "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3805  }
3806  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3807)
3808
3809(define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3810  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3811        (SBINQOPS:<VWIDE>
3812	  (match_operand:<VWIDE> 1 "register_operand" "0")
3813	  (ss_ashift:<VWIDE>
3814	    (mult:<VWIDE>
3815	      (sign_extend:<VWIDE>
3816		(match_operand:SD_HSI 2 "register_operand" "w"))
3817	      (sign_extend:<VWIDE>
3818		(vec_select:<VEL>
3819		  (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3820		  (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3821              )
3822	    (const_int 1))))]
3823  "TARGET_SIMD"
3824  {
3825    operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3826    return
3827      "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3828  }
3829  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3830)
3831
3832;; vqdml[sa]l_n
3833
3834(define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3835  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3836        (SBINQOPS:<VWIDE>
3837	  (match_operand:<VWIDE> 1 "register_operand" "0")
3838	  (ss_ashift:<VWIDE>
3839	      (mult:<VWIDE>
3840		(sign_extend:<VWIDE>
3841		      (match_operand:VD_HSI 2 "register_operand" "w"))
3842		(sign_extend:<VWIDE>
3843		  (vec_duplicate:VD_HSI
3844		    (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3845	      (const_int 1))))]
3846  "TARGET_SIMD"
3847  "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3848  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3849)
3850
3851;; sqdml[as]l2
3852
3853(define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3854  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3855        (SBINQOPS:<VWIDE>
3856         (match_operand:<VWIDE> 1 "register_operand" "0")
3857         (ss_ashift:<VWIDE>
3858             (mult:<VWIDE>
3859               (sign_extend:<VWIDE>
3860                 (vec_select:<VHALF>
3861                     (match_operand:VQ_HSI 2 "register_operand" "w")
3862                     (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3863               (sign_extend:<VWIDE>
3864                 (vec_select:<VHALF>
3865                     (match_operand:VQ_HSI 3 "register_operand" "w")
3866                     (match_dup 4))))
3867             (const_int 1))))]
3868  "TARGET_SIMD"
3869  "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3870  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3871)
3872
3873(define_expand "aarch64_sqdmlal2<mode>"
3874  [(match_operand:<VWIDE> 0 "register_operand" "=w")
3875   (match_operand:<VWIDE> 1 "register_operand" "w")
3876   (match_operand:VQ_HSI 2 "register_operand" "w")
3877   (match_operand:VQ_HSI 3 "register_operand" "w")]
3878  "TARGET_SIMD"
3879{
3880  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3881  emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3882						  operands[2], operands[3], p));
3883  DONE;
3884})
3885
3886(define_expand "aarch64_sqdmlsl2<mode>"
3887  [(match_operand:<VWIDE> 0 "register_operand" "=w")
3888   (match_operand:<VWIDE> 1 "register_operand" "w")
3889   (match_operand:VQ_HSI 2 "register_operand" "w")
3890   (match_operand:VQ_HSI 3 "register_operand" "w")]
3891  "TARGET_SIMD"
3892{
3893  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3894  emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3895						  operands[2], operands[3], p));
3896  DONE;
3897})
3898
3899;; vqdml[sa]l2_lane
3900
3901(define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3902  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3903        (SBINQOPS:<VWIDE>
3904	  (match_operand:<VWIDE> 1 "register_operand" "0")
3905	  (ss_ashift:<VWIDE>
3906	      (mult:<VWIDE>
3907		(sign_extend:<VWIDE>
3908                  (vec_select:<VHALF>
3909                    (match_operand:VQ_HSI 2 "register_operand" "w")
3910                    (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3911		(sign_extend:<VWIDE>
3912                  (vec_duplicate:<VHALF>
3913		    (vec_select:<VEL>
3914		      (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3915		      (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3916		    ))))
3917	      (const_int 1))))]
3918  "TARGET_SIMD"
3919  {
3920    operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3921    return
3922     "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3923  }
3924  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3925)
3926
3927(define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3928  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3929        (SBINQOPS:<VWIDE>
3930	  (match_operand:<VWIDE> 1 "register_operand" "0")
3931	  (ss_ashift:<VWIDE>
3932	      (mult:<VWIDE>
3933		(sign_extend:<VWIDE>
3934                  (vec_select:<VHALF>
3935                    (match_operand:VQ_HSI 2 "register_operand" "w")
3936                    (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3937		(sign_extend:<VWIDE>
3938                  (vec_duplicate:<VHALF>
3939		    (vec_select:<VEL>
3940		      (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3941		      (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3942		    ))))
3943	      (const_int 1))))]
3944  "TARGET_SIMD"
3945  {
3946    operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3947    return
3948     "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3949  }
3950  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3951)
3952
3953(define_expand "aarch64_sqdmlal2_lane<mode>"
3954  [(match_operand:<VWIDE> 0 "register_operand" "=w")
3955   (match_operand:<VWIDE> 1 "register_operand" "w")
3956   (match_operand:VQ_HSI 2 "register_operand" "w")
3957   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3958   (match_operand:SI 4 "immediate_operand" "i")]
3959  "TARGET_SIMD"
3960{
3961  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3962  emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
3963						       operands[2], operands[3],
3964						       operands[4], p));
3965  DONE;
3966})
3967
3968(define_expand "aarch64_sqdmlal2_laneq<mode>"
3969  [(match_operand:<VWIDE> 0 "register_operand" "=w")
3970   (match_operand:<VWIDE> 1 "register_operand" "w")
3971   (match_operand:VQ_HSI 2 "register_operand" "w")
3972   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3973   (match_operand:SI 4 "immediate_operand" "i")]
3974  "TARGET_SIMD"
3975{
3976  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3977  emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
3978						       operands[2], operands[3],
3979						       operands[4], p));
3980  DONE;
3981})
3982
3983(define_expand "aarch64_sqdmlsl2_lane<mode>"
3984  [(match_operand:<VWIDE> 0 "register_operand" "=w")
3985   (match_operand:<VWIDE> 1 "register_operand" "w")
3986   (match_operand:VQ_HSI 2 "register_operand" "w")
3987   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3988   (match_operand:SI 4 "immediate_operand" "i")]
3989  "TARGET_SIMD"
3990{
3991  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3992  emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
3993						       operands[2], operands[3],
3994						       operands[4], p));
3995  DONE;
3996})
3997
3998(define_expand "aarch64_sqdmlsl2_laneq<mode>"
3999  [(match_operand:<VWIDE> 0 "register_operand" "=w")
4000   (match_operand:<VWIDE> 1 "register_operand" "w")
4001   (match_operand:VQ_HSI 2 "register_operand" "w")
4002   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4003   (match_operand:SI 4 "immediate_operand" "i")]
4004  "TARGET_SIMD"
4005{
4006  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4007  emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
4008						       operands[2], operands[3],
4009						       operands[4], p));
4010  DONE;
4011})
4012
4013(define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
4014  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4015        (SBINQOPS:<VWIDE>
4016	  (match_operand:<VWIDE> 1 "register_operand" "0")
4017	  (ss_ashift:<VWIDE>
4018	    (mult:<VWIDE>
4019	      (sign_extend:<VWIDE>
4020                (vec_select:<VHALF>
4021                  (match_operand:VQ_HSI 2 "register_operand" "w")
4022                  (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4023	      (sign_extend:<VWIDE>
4024                (vec_duplicate:<VHALF>
4025		  (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4026	    (const_int 1))))]
4027  "TARGET_SIMD"
4028  "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4029  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4030)
4031
4032(define_expand "aarch64_sqdmlal2_n<mode>"
4033  [(match_operand:<VWIDE> 0 "register_operand" "=w")
4034   (match_operand:<VWIDE> 1 "register_operand" "w")
4035   (match_operand:VQ_HSI 2 "register_operand" "w")
4036   (match_operand:<VEL> 3 "register_operand" "w")]
4037  "TARGET_SIMD"
4038{
4039  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4040  emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4041						    operands[2], operands[3],
4042						    p));
4043  DONE;
4044})
4045
4046(define_expand "aarch64_sqdmlsl2_n<mode>"
4047  [(match_operand:<VWIDE> 0 "register_operand" "=w")
4048   (match_operand:<VWIDE> 1 "register_operand" "w")
4049   (match_operand:VQ_HSI 2 "register_operand" "w")
4050   (match_operand:<VEL> 3 "register_operand" "w")]
4051  "TARGET_SIMD"
4052{
4053  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4054  emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4055						    operands[2], operands[3],
4056						    p));
4057  DONE;
4058})
4059
4060;; vqdmull
4061
4062(define_insn "aarch64_sqdmull<mode>"
4063  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4064        (ss_ashift:<VWIDE>
4065	     (mult:<VWIDE>
4066	       (sign_extend:<VWIDE>
4067		     (match_operand:VSD_HSI 1 "register_operand" "w"))
4068	       (sign_extend:<VWIDE>
4069		     (match_operand:VSD_HSI 2 "register_operand" "w")))
4070	     (const_int 1)))]
4071  "TARGET_SIMD"
4072  "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4073  [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4074)
4075
4076;; vqdmull_lane
4077
4078(define_insn "aarch64_sqdmull_lane<mode>"
4079  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4080        (ss_ashift:<VWIDE>
4081	     (mult:<VWIDE>
4082	       (sign_extend:<VWIDE>
4083		 (match_operand:VD_HSI 1 "register_operand" "w"))
4084	       (sign_extend:<VWIDE>
4085                 (vec_duplicate:VD_HSI
4086                   (vec_select:<VEL>
4087		     (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4088		     (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4089	       ))
4090	     (const_int 1)))]
4091  "TARGET_SIMD"
4092  {
4093    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4094    return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4095  }
4096  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4097)
4098
4099(define_insn "aarch64_sqdmull_laneq<mode>"
4100  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4101        (ss_ashift:<VWIDE>
4102	     (mult:<VWIDE>
4103	       (sign_extend:<VWIDE>
4104		 (match_operand:VD_HSI 1 "register_operand" "w"))
4105	       (sign_extend:<VWIDE>
4106                 (vec_duplicate:VD_HSI
4107                   (vec_select:<VEL>
4108		     (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4109		     (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4110	       ))
4111	     (const_int 1)))]
4112  "TARGET_SIMD"
4113  {
4114    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4115    return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4116  }
4117  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4118)
4119
4120(define_insn "aarch64_sqdmull_lane<mode>"
4121  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4122        (ss_ashift:<VWIDE>
4123	     (mult:<VWIDE>
4124	       (sign_extend:<VWIDE>
4125		 (match_operand:SD_HSI 1 "register_operand" "w"))
4126	       (sign_extend:<VWIDE>
4127                 (vec_select:<VEL>
4128		   (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4129		   (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4130	       ))
4131	     (const_int 1)))]
4132  "TARGET_SIMD"
4133  {
4134    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4135    return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4136  }
4137  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4138)
4139
4140(define_insn "aarch64_sqdmull_laneq<mode>"
4141  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4142        (ss_ashift:<VWIDE>
4143	     (mult:<VWIDE>
4144	       (sign_extend:<VWIDE>
4145		 (match_operand:SD_HSI 1 "register_operand" "w"))
4146	       (sign_extend:<VWIDE>
4147                 (vec_select:<VEL>
4148		   (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4149		   (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4150	       ))
4151	     (const_int 1)))]
4152  "TARGET_SIMD"
4153  {
4154    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4155    return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4156  }
4157  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4158)
4159
4160;; vqdmull_n
4161
4162(define_insn "aarch64_sqdmull_n<mode>"
4163  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4164        (ss_ashift:<VWIDE>
4165	     (mult:<VWIDE>
4166	       (sign_extend:<VWIDE>
4167		 (match_operand:VD_HSI 1 "register_operand" "w"))
4168	       (sign_extend:<VWIDE>
4169                 (vec_duplicate:VD_HSI
4170                   (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4171	       )
4172	     (const_int 1)))]
4173  "TARGET_SIMD"
4174  "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4175  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4176)
4177
4178;; vqdmull2
4179
4180
4181
4182(define_insn "aarch64_sqdmull2<mode>_internal"
4183  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4184        (ss_ashift:<VWIDE>
4185	     (mult:<VWIDE>
4186	       (sign_extend:<VWIDE>
4187		 (vec_select:<VHALF>
4188                   (match_operand:VQ_HSI 1 "register_operand" "w")
4189                   (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4190	       (sign_extend:<VWIDE>
4191		 (vec_select:<VHALF>
4192                   (match_operand:VQ_HSI 2 "register_operand" "w")
4193                   (match_dup 3)))
4194	       )
4195	     (const_int 1)))]
4196  "TARGET_SIMD"
4197  "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4198  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4199)
4200
4201(define_expand "aarch64_sqdmull2<mode>"
4202  [(match_operand:<VWIDE> 0 "register_operand" "=w")
4203   (match_operand:VQ_HSI 1 "register_operand" "w")
4204   (match_operand:VQ_HSI 2 "register_operand" "w")]
4205  "TARGET_SIMD"
4206{
4207  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4208  emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4209						  operands[2], p));
4210  DONE;
4211})
4212
4213;; vqdmull2_lane
4214
4215(define_insn "aarch64_sqdmull2_lane<mode>_internal"
4216  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4217        (ss_ashift:<VWIDE>
4218	     (mult:<VWIDE>
4219	       (sign_extend:<VWIDE>
4220		 (vec_select:<VHALF>
4221                   (match_operand:VQ_HSI 1 "register_operand" "w")
4222                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4223	       (sign_extend:<VWIDE>
4224                 (vec_duplicate:<VHALF>
4225                   (vec_select:<VEL>
4226		     (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4227		     (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4228	       ))
4229	     (const_int 1)))]
4230  "TARGET_SIMD"
4231  {
4232    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4233    return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4234  }
4235  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4236)
4237
4238(define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4239  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4240        (ss_ashift:<VWIDE>
4241	     (mult:<VWIDE>
4242	       (sign_extend:<VWIDE>
4243		 (vec_select:<VHALF>
4244                   (match_operand:VQ_HSI 1 "register_operand" "w")
4245                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4246	       (sign_extend:<VWIDE>
4247                 (vec_duplicate:<VHALF>
4248                   (vec_select:<VEL>
4249		     (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4250		     (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4251	       ))
4252	     (const_int 1)))]
4253  "TARGET_SIMD"
4254  {
4255    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4256    return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4257  }
4258  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4259)
4260
4261(define_expand "aarch64_sqdmull2_lane<mode>"
4262  [(match_operand:<VWIDE> 0 "register_operand" "=w")
4263   (match_operand:VQ_HSI 1 "register_operand" "w")
4264   (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4265   (match_operand:SI 3 "immediate_operand" "i")]
4266  "TARGET_SIMD"
4267{
4268  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4269  emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4270						       operands[2], operands[3],
4271						       p));
4272  DONE;
4273})
4274
4275(define_expand "aarch64_sqdmull2_laneq<mode>"
4276  [(match_operand:<VWIDE> 0 "register_operand" "=w")
4277   (match_operand:VQ_HSI 1 "register_operand" "w")
4278   (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4279   (match_operand:SI 3 "immediate_operand" "i")]
4280  "TARGET_SIMD"
4281{
4282  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4283  emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4284						       operands[2], operands[3],
4285						       p));
4286  DONE;
4287})
4288
4289;; vqdmull2_n
4290
4291(define_insn "aarch64_sqdmull2_n<mode>_internal"
4292  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4293        (ss_ashift:<VWIDE>
4294	     (mult:<VWIDE>
4295	       (sign_extend:<VWIDE>
4296		 (vec_select:<VHALF>
4297                   (match_operand:VQ_HSI 1 "register_operand" "w")
4298                   (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4299	       (sign_extend:<VWIDE>
4300                 (vec_duplicate:<VHALF>
4301                   (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4302	       )
4303	     (const_int 1)))]
4304  "TARGET_SIMD"
4305  "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4306  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4307)
4308
4309(define_expand "aarch64_sqdmull2_n<mode>"
4310  [(match_operand:<VWIDE> 0 "register_operand" "=w")
4311   (match_operand:VQ_HSI 1 "register_operand" "w")
4312   (match_operand:<VEL> 2 "register_operand" "w")]
4313  "TARGET_SIMD"
4314{
4315  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4316  emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4317						    operands[2], p));
4318  DONE;
4319})
4320
4321;; vshl
4322
4323(define_insn "aarch64_<sur>shl<mode>"
4324  [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4325        (unspec:VSDQ_I_DI
4326	  [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4327           (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4328         VSHL))]
4329  "TARGET_SIMD"
4330  "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4331  [(set_attr "type" "neon_shift_reg<q>")]
4332)
4333
4334
4335;; vqshl
4336
4337(define_insn "aarch64_<sur>q<r>shl<mode>"
4338  [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4339        (unspec:VSDQ_I
4340	  [(match_operand:VSDQ_I 1 "register_operand" "w")
4341           (match_operand:VSDQ_I 2 "register_operand" "w")]
4342         VQSHL))]
4343  "TARGET_SIMD"
4344  "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4345  [(set_attr "type" "neon_sat_shift_reg<q>")]
4346)
4347
4348;; vshll_n
4349
4350(define_insn "aarch64_<sur>shll_n<mode>"
4351  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4352	(unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4353			 (match_operand:SI 2
4354			   "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4355                         VSHLL))]
4356  "TARGET_SIMD"
4357  {
4358    if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4359      return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4360    else
4361      return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4362  }
4363  [(set_attr "type" "neon_shift_imm_long")]
4364)
4365
4366;; vshll_high_n
4367
4368(define_insn "aarch64_<sur>shll2_n<mode>"
4369  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4370	(unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4371			 (match_operand:SI 2 "immediate_operand" "i")]
4372                         VSHLL))]
4373  "TARGET_SIMD"
4374  {
4375    if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4376      return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4377    else
4378      return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4379  }
4380  [(set_attr "type" "neon_shift_imm_long")]
4381)
4382
4383;; vrshr_n
4384
4385(define_insn "aarch64_<sur>shr_n<mode>"
4386  [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4387        (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4388			   (match_operand:SI 2
4389			     "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4390			  VRSHR_N))]
4391  "TARGET_SIMD"
4392  "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4393  [(set_attr "type" "neon_sat_shift_imm<q>")]
4394)
4395
4396;; v(r)sra_n
4397
4398(define_insn "aarch64_<sur>sra_n<mode>"
4399  [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4400	(unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4401		       (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4402                       (match_operand:SI 3
4403			 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4404                      VSRA))]
4405  "TARGET_SIMD"
4406  "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4407  [(set_attr "type" "neon_shift_acc<q>")]
4408)
4409
4410;; vs<lr>i_n
4411
4412(define_insn "aarch64_<sur>s<lr>i_n<mode>"
4413  [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4414	(unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4415		       (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4416                       (match_operand:SI 3
4417			 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4418                      VSLRI))]
4419  "TARGET_SIMD"
4420  "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4421  [(set_attr "type" "neon_shift_imm<q>")]
4422)
4423
4424;; vqshl(u)
4425
4426(define_insn "aarch64_<sur>qshl<u>_n<mode>"
4427  [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4428	(unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4429		       (match_operand:SI 2
4430			 "aarch64_simd_shift_imm_<ve_mode>" "i")]
4431                      VQSHL_N))]
4432  "TARGET_SIMD"
4433  "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4434  [(set_attr "type" "neon_sat_shift_imm<q>")]
4435)
4436
4437
4438;; vq(r)shr(u)n_n
4439
4440(define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4441  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4442        (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4443			    (match_operand:SI 2
4444			      "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4445			   VQSHRN_N))]
4446  "TARGET_SIMD"
4447  "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4448  [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4449)
4450
4451
4452;; cm(eq|ge|gt|lt|le)
4453;; Note, we have constraints for Dz and Z as different expanders
4454;; have different ideas of what should be passed to this pattern.
4455
4456(define_insn "aarch64_cm<optab><mode>"
4457  [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4458	(neg:<V_INT_EQUIV>
4459	  (COMPARISONS:<V_INT_EQUIV>
4460	    (match_operand:VDQ_I 1 "register_operand" "w,w")
4461	    (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4462	  )))]
4463  "TARGET_SIMD"
4464  "@
4465  cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4466  cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4467  [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4468)
4469
4470(define_insn_and_split "aarch64_cm<optab>di"
4471  [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4472	(neg:DI
4473	  (COMPARISONS:DI
4474	    (match_operand:DI 1 "register_operand" "w,w,r")
4475	    (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4476	  )))
4477     (clobber (reg:CC CC_REGNUM))]
4478  "TARGET_SIMD"
4479  "#"
4480  "&& reload_completed"
4481  [(set (match_operand:DI 0 "register_operand")
4482	(neg:DI
4483	  (COMPARISONS:DI
4484	    (match_operand:DI 1 "register_operand")
4485	    (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4486	  )))]
4487  {
4488    /* If we are in the general purpose register file,
4489       we split to a sequence of comparison and store.  */
4490    if (GP_REGNUM_P (REGNO (operands[0]))
4491	&& GP_REGNUM_P (REGNO (operands[1])))
4492      {
4493	machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4494	rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4495	rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4496	emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4497	DONE;
4498      }
4499    /* Otherwise, we expand to a similar pattern which does not
4500       clobber CC_REGNUM.  */
4501  }
4502  [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4503)
4504
4505(define_insn "*aarch64_cm<optab>di"
4506  [(set (match_operand:DI 0 "register_operand" "=w,w")
4507	(neg:DI
4508	  (COMPARISONS:DI
4509	    (match_operand:DI 1 "register_operand" "w,w")
4510	    (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4511	  )))]
4512  "TARGET_SIMD && reload_completed"
4513  "@
4514  cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4515  cm<optab>\t%d0, %d1, #0"
4516  [(set_attr "type" "neon_compare, neon_compare_zero")]
4517)
4518
4519;; cm(hs|hi)
4520
4521(define_insn "aarch64_cm<optab><mode>"
4522  [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4523	(neg:<V_INT_EQUIV>
4524	  (UCOMPARISONS:<V_INT_EQUIV>
4525	    (match_operand:VDQ_I 1 "register_operand" "w")
4526	    (match_operand:VDQ_I 2 "register_operand" "w")
4527	  )))]
4528  "TARGET_SIMD"
4529  "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4530  [(set_attr "type" "neon_compare<q>")]
4531)
4532
4533(define_insn_and_split "aarch64_cm<optab>di"
4534  [(set (match_operand:DI 0 "register_operand" "=w,r")
4535	(neg:DI
4536	  (UCOMPARISONS:DI
4537	    (match_operand:DI 1 "register_operand" "w,r")
4538	    (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4539	  )))
4540    (clobber (reg:CC CC_REGNUM))]
4541  "TARGET_SIMD"
4542  "#"
4543  "&& reload_completed"
4544  [(set (match_operand:DI 0 "register_operand")
4545	(neg:DI
4546	  (UCOMPARISONS:DI
4547	    (match_operand:DI 1 "register_operand")
4548	    (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4549	  )))]
4550  {
4551    /* If we are in the general purpose register file,
4552       we split to a sequence of comparison and store.  */
4553    if (GP_REGNUM_P (REGNO (operands[0]))
4554	&& GP_REGNUM_P (REGNO (operands[1])))
4555      {
4556	machine_mode mode = CCmode;
4557	rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4558	rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4559	emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4560	DONE;
4561      }
4562    /* Otherwise, we expand to a similar pattern which does not
4563       clobber CC_REGNUM.  */
4564  }
4565  [(set_attr "type" "neon_compare,multiple")]
4566)
4567
4568(define_insn "*aarch64_cm<optab>di"
4569  [(set (match_operand:DI 0 "register_operand" "=w")
4570	(neg:DI
4571	  (UCOMPARISONS:DI
4572	    (match_operand:DI 1 "register_operand" "w")
4573	    (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4574	  )))]
4575  "TARGET_SIMD && reload_completed"
4576  "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4577  [(set_attr "type" "neon_compare")]
4578)
4579
4580;; cmtst
4581
4582;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4583;; we don't have any insns using ne, and aarch64_vcond outputs
4584;; not (neg (eq (and x y) 0))
4585;; which is rewritten by simplify_rtx as
4586;; plus (eq (and x y) 0) -1.
4587
4588(define_insn "aarch64_cmtst<mode>"
4589  [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4590	(plus:<V_INT_EQUIV>
4591	  (eq:<V_INT_EQUIV>
4592	    (and:VDQ_I
4593	      (match_operand:VDQ_I 1 "register_operand" "w")
4594	      (match_operand:VDQ_I 2 "register_operand" "w"))
4595	    (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4596	  (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4597  ]
4598  "TARGET_SIMD"
4599  "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4600  [(set_attr "type" "neon_tst<q>")]
4601)
4602
4603(define_insn_and_split "aarch64_cmtstdi"
4604  [(set (match_operand:DI 0 "register_operand" "=w,r")
4605	(neg:DI
4606	  (ne:DI
4607	    (and:DI
4608	      (match_operand:DI 1 "register_operand" "w,r")
4609	      (match_operand:DI 2 "register_operand" "w,r"))
4610	    (const_int 0))))
4611    (clobber (reg:CC CC_REGNUM))]
4612  "TARGET_SIMD"
4613  "#"
4614  "&& reload_completed"
4615  [(set (match_operand:DI 0 "register_operand")
4616	(neg:DI
4617	  (ne:DI
4618	    (and:DI
4619	      (match_operand:DI 1 "register_operand")
4620	      (match_operand:DI 2 "register_operand"))
4621	    (const_int 0))))]
4622  {
4623    /* If we are in the general purpose register file,
4624       we split to a sequence of comparison and store.  */
4625    if (GP_REGNUM_P (REGNO (operands[0]))
4626	&& GP_REGNUM_P (REGNO (operands[1])))
4627      {
4628	rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4629	machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4630	rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4631	rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4632	emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4633	DONE;
4634      }
4635    /* Otherwise, we expand to a similar pattern which does not
4636       clobber CC_REGNUM.  */
4637  }
4638  [(set_attr "type" "neon_tst,multiple")]
4639)
4640
4641(define_insn "*aarch64_cmtstdi"
4642  [(set (match_operand:DI 0 "register_operand" "=w")
4643	(neg:DI
4644	  (ne:DI
4645	    (and:DI
4646	      (match_operand:DI 1 "register_operand" "w")
4647	      (match_operand:DI 2 "register_operand" "w"))
4648	    (const_int 0))))]
4649  "TARGET_SIMD"
4650  "cmtst\t%d0, %d1, %d2"
4651  [(set_attr "type" "neon_tst")]
4652)
4653
4654;; fcm(eq|ge|gt|le|lt)
4655
4656(define_insn "aarch64_cm<optab><mode>"
4657  [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4658	(neg:<V_INT_EQUIV>
4659	  (COMPARISONS:<V_INT_EQUIV>
4660	    (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4661	    (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4662	  )))]
4663  "TARGET_SIMD"
4664  "@
4665  fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4666  fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4667  [(set_attr "type" "neon_fp_compare_<stype><q>")]
4668)
4669
4670;; fac(ge|gt)
4671;; Note we can also handle what would be fac(le|lt) by
4672;; generating fac(ge|gt).
4673
4674(define_insn "aarch64_fac<optab><mode>"
4675  [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4676	(neg:<V_INT_EQUIV>
4677	  (FAC_COMPARISONS:<V_INT_EQUIV>
4678	    (abs:VHSDF_HSDF
4679	      (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4680	    (abs:VHSDF_HSDF
4681	      (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4682  )))]
4683  "TARGET_SIMD"
4684  "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4685  [(set_attr "type" "neon_fp_compare_<stype><q>")]
4686)
4687
4688;; addp
4689
4690(define_insn "aarch64_addp<mode>"
4691  [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4692        (unspec:VD_BHSI
4693          [(match_operand:VD_BHSI 1 "register_operand" "w")
4694	   (match_operand:VD_BHSI 2 "register_operand" "w")]
4695          UNSPEC_ADDP))]
4696  "TARGET_SIMD"
4697  "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4698  [(set_attr "type" "neon_reduc_add<q>")]
4699)
4700
4701(define_insn "aarch64_addpdi"
4702  [(set (match_operand:DI 0 "register_operand" "=w")
4703        (unspec:DI
4704          [(match_operand:V2DI 1 "register_operand" "w")]
4705          UNSPEC_ADDP))]
4706  "TARGET_SIMD"
4707  "addp\t%d0, %1.2d"
4708  [(set_attr "type" "neon_reduc_add")]
4709)
4710
4711;; sqrt
4712
4713(define_expand "sqrt<mode>2"
4714  [(set (match_operand:VHSDF 0 "register_operand" "=w")
4715	(sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4716  "TARGET_SIMD"
4717{
4718  if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4719    DONE;
4720})
4721
4722(define_insn "*sqrt<mode>2"
4723  [(set (match_operand:VHSDF 0 "register_operand" "=w")
4724	(sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4725  "TARGET_SIMD"
4726  "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4727  [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4728)
4729
4730;; Patterns for vector struct loads and stores.
4731
4732(define_insn "aarch64_simd_ld2<mode>"
4733  [(set (match_operand:OI 0 "register_operand" "=w")
4734	(unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4735		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4736		   UNSPEC_LD2))]
4737  "TARGET_SIMD"
4738  "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4739  [(set_attr "type" "neon_load2_2reg<q>")]
4740)
4741
4742(define_insn "aarch64_simd_ld2r<mode>"
4743  [(set (match_operand:OI 0 "register_operand" "=w")
4744       (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4745                   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4746                  UNSPEC_LD2_DUP))]
4747  "TARGET_SIMD"
4748  "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4749  [(set_attr "type" "neon_load2_all_lanes<q>")]
4750)
4751
4752(define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4753  [(set (match_operand:OI 0 "register_operand" "=w")
4754	(unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4755		    (match_operand:OI 2 "register_operand" "0")
4756		    (match_operand:SI 3 "immediate_operand" "i")
4757		    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4758		   UNSPEC_LD2_LANE))]
4759  "TARGET_SIMD"
4760  {
4761    operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4762    return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4763  }
4764  [(set_attr "type" "neon_load2_one_lane")]
4765)
4766
4767(define_expand "vec_load_lanesoi<mode>"
4768  [(set (match_operand:OI 0 "register_operand" "=w")
4769	(unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4770		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4771		   UNSPEC_LD2))]
4772  "TARGET_SIMD"
4773{
4774  if (BYTES_BIG_ENDIAN)
4775    {
4776      rtx tmp = gen_reg_rtx (OImode);
4777      rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4778      emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4779      emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4780    }
4781  else
4782    emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4783  DONE;
4784})
4785
4786(define_insn "aarch64_simd_st2<mode>"
4787  [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4788	(unspec:OI [(match_operand:OI 1 "register_operand" "w")
4789                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4790                   UNSPEC_ST2))]
4791  "TARGET_SIMD"
4792  "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4793  [(set_attr "type" "neon_store2_2reg<q>")]
4794)
4795
4796;; RTL uses GCC vector extension indices, so flip only for assembly.
4797(define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4798  [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4799	(unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4800		    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4801		    (match_operand:SI 2 "immediate_operand" "i")]
4802		   UNSPEC_ST2_LANE))]
4803  "TARGET_SIMD"
4804  {
4805    operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4806    return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4807  }
4808  [(set_attr "type" "neon_store2_one_lane<q>")]
4809)
4810
4811(define_expand "vec_store_lanesoi<mode>"
4812  [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4813	(unspec:OI [(match_operand:OI 1 "register_operand" "w")
4814                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4815                   UNSPEC_ST2))]
4816  "TARGET_SIMD"
4817{
4818  if (BYTES_BIG_ENDIAN)
4819    {
4820      rtx tmp = gen_reg_rtx (OImode);
4821      rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4822      emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4823      emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4824    }
4825  else
4826    emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4827  DONE;
4828})
4829
4830(define_insn "aarch64_simd_ld3<mode>"
4831  [(set (match_operand:CI 0 "register_operand" "=w")
4832	(unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4833		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4834		   UNSPEC_LD3))]
4835  "TARGET_SIMD"
4836  "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4837  [(set_attr "type" "neon_load3_3reg<q>")]
4838)
4839
4840(define_insn "aarch64_simd_ld3r<mode>"
4841  [(set (match_operand:CI 0 "register_operand" "=w")
4842       (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4843                   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4844                  UNSPEC_LD3_DUP))]
4845  "TARGET_SIMD"
4846  "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4847  [(set_attr "type" "neon_load3_all_lanes<q>")]
4848)
4849
4850(define_insn "aarch64_vec_load_lanesci_lane<mode>"
4851  [(set (match_operand:CI 0 "register_operand" "=w")
4852	(unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4853		    (match_operand:CI 2 "register_operand" "0")
4854		    (match_operand:SI 3 "immediate_operand" "i")
4855		    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4856		   UNSPEC_LD3_LANE))]
4857  "TARGET_SIMD"
4858{
4859    operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4860    return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
4861}
4862  [(set_attr "type" "neon_load3_one_lane")]
4863)
4864
4865(define_expand "vec_load_lanesci<mode>"
4866  [(set (match_operand:CI 0 "register_operand" "=w")
4867	(unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4868		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4869		   UNSPEC_LD3))]
4870  "TARGET_SIMD"
4871{
4872  if (BYTES_BIG_ENDIAN)
4873    {
4874      rtx tmp = gen_reg_rtx (CImode);
4875      rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4876      emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4877      emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4878    }
4879  else
4880    emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4881  DONE;
4882})
4883
4884(define_insn "aarch64_simd_st3<mode>"
4885  [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4886	(unspec:CI [(match_operand:CI 1 "register_operand" "w")
4887                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4888                   UNSPEC_ST3))]
4889  "TARGET_SIMD"
4890  "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4891  [(set_attr "type" "neon_store3_3reg<q>")]
4892)
4893
4894;; RTL uses GCC vector extension indices, so flip only for assembly.
4895(define_insn "aarch64_vec_store_lanesci_lane<mode>"
4896  [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4897	(unspec:BLK [(match_operand:CI 1 "register_operand" "w")
4898		     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4899		     (match_operand:SI 2 "immediate_operand" "i")]
4900		    UNSPEC_ST3_LANE))]
4901  "TARGET_SIMD"
4902  {
4903    operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4904    return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4905  }
4906  [(set_attr "type" "neon_store3_one_lane<q>")]
4907)
4908
4909(define_expand "vec_store_lanesci<mode>"
4910  [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4911	(unspec:CI [(match_operand:CI 1 "register_operand" "w")
4912                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4913                   UNSPEC_ST3))]
4914  "TARGET_SIMD"
4915{
4916  if (BYTES_BIG_ENDIAN)
4917    {
4918      rtx tmp = gen_reg_rtx (CImode);
4919      rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4920      emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4921      emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
4922    }
4923  else
4924    emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
4925  DONE;
4926})
4927
4928(define_insn "aarch64_simd_ld4<mode>"
4929  [(set (match_operand:XI 0 "register_operand" "=w")
4930	(unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4931		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4932		   UNSPEC_LD4))]
4933  "TARGET_SIMD"
4934  "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4935  [(set_attr "type" "neon_load4_4reg<q>")]
4936)
4937
4938(define_insn "aarch64_simd_ld4r<mode>"
4939  [(set (match_operand:XI 0 "register_operand" "=w")
4940       (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4941                   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4942                  UNSPEC_LD4_DUP))]
4943  "TARGET_SIMD"
4944  "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4945  [(set_attr "type" "neon_load4_all_lanes<q>")]
4946)
4947
4948(define_insn "aarch64_vec_load_lanesxi_lane<mode>"
4949  [(set (match_operand:XI 0 "register_operand" "=w")
4950	(unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4951		    (match_operand:XI 2 "register_operand" "0")
4952		    (match_operand:SI 3 "immediate_operand" "i")
4953		    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4954		   UNSPEC_LD4_LANE))]
4955  "TARGET_SIMD"
4956{
4957    operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4958    return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
4959}
4960  [(set_attr "type" "neon_load4_one_lane")]
4961)
4962
4963(define_expand "vec_load_lanesxi<mode>"
4964  [(set (match_operand:XI 0 "register_operand" "=w")
4965	(unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4966		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4967		   UNSPEC_LD4))]
4968  "TARGET_SIMD"
4969{
4970  if (BYTES_BIG_ENDIAN)
4971    {
4972      rtx tmp = gen_reg_rtx (XImode);
4973      rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4974      emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
4975      emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
4976    }
4977  else
4978    emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
4979  DONE;
4980})
4981
4982(define_insn "aarch64_simd_st4<mode>"
4983  [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4984	(unspec:XI [(match_operand:XI 1 "register_operand" "w")
4985                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4986                   UNSPEC_ST4))]
4987  "TARGET_SIMD"
4988  "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
4989  [(set_attr "type" "neon_store4_4reg<q>")]
4990)
4991
4992;; RTL uses GCC vector extension indices, so flip only for assembly.
4993(define_insn "aarch64_vec_store_lanesxi_lane<mode>"
4994  [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4995	(unspec:BLK [(match_operand:XI 1 "register_operand" "w")
4996		     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4997		     (match_operand:SI 2 "immediate_operand" "i")]
4998		    UNSPEC_ST4_LANE))]
4999  "TARGET_SIMD"
5000  {
5001    operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5002    return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
5003  }
5004  [(set_attr "type" "neon_store4_one_lane<q>")]
5005)
5006
5007(define_expand "vec_store_lanesxi<mode>"
5008  [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5009	(unspec:XI [(match_operand:XI 1 "register_operand" "w")
5010                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5011                   UNSPEC_ST4))]
5012  "TARGET_SIMD"
5013{
5014  if (BYTES_BIG_ENDIAN)
5015    {
5016      rtx tmp = gen_reg_rtx (XImode);
5017      rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5018      emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
5019      emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
5020    }
5021  else
5022    emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
5023  DONE;
5024})
5025
5026(define_insn_and_split "aarch64_rev_reglist<mode>"
5027[(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5028	(unspec:VSTRUCT
5029	           [(match_operand:VSTRUCT 1 "register_operand" "w")
5030		    (match_operand:V16QI 2 "register_operand" "w")]
5031                   UNSPEC_REV_REGLIST))]
5032  "TARGET_SIMD"
5033  "#"
5034  "&& reload_completed"
5035  [(const_int 0)]
5036{
5037  int i;
5038  int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5039  for (i = 0; i < nregs; i++)
5040    {
5041      rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5042      rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5043      emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5044    }
5045  DONE;
5046}
5047  [(set_attr "type" "neon_tbl1_q")
5048   (set_attr "length" "<insn_count>")]
5049)
5050
5051;; Reload patterns for AdvSIMD register list operands.
5052
5053(define_expand "mov<mode>"
5054  [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
5055	(match_operand:VSTRUCT 1 "general_operand" ""))]
5056  "TARGET_SIMD"
5057{
5058  if (can_create_pseudo_p ())
5059    {
5060      if (GET_CODE (operands[0]) != REG)
5061	operands[1] = force_reg (<MODE>mode, operands[1]);
5062    }
5063})
5064
5065(define_insn "*aarch64_mov<mode>"
5066  [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5067	(match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5068  "TARGET_SIMD && !BYTES_BIG_ENDIAN
5069   && (register_operand (operands[0], <MODE>mode)
5070       || register_operand (operands[1], <MODE>mode))"
5071  "@
5072   #
5073   st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5074   ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5075  [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5076		     neon_load<nregs>_<nregs>reg_q")
5077   (set_attr "length" "<insn_count>,4,4")]
5078)
5079
5080(define_insn "aarch64_be_ld1<mode>"
5081  [(set (match_operand:VALLDI_F16 0	"register_operand" "=w")
5082	(unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5083			     "aarch64_simd_struct_operand" "Utv")]
5084	UNSPEC_LD1))]
5085  "TARGET_SIMD"
5086  "ld1\\t{%0<Vmtype>}, %1"
5087  [(set_attr "type" "neon_load1_1reg<q>")]
5088)
5089
5090(define_insn "aarch64_be_st1<mode>"
5091  [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5092	(unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
5093	UNSPEC_ST1))]
5094  "TARGET_SIMD"
5095  "st1\\t{%1<Vmtype>}, %0"
5096  [(set_attr "type" "neon_store1_1reg<q>")]
5097)
5098
5099(define_insn "*aarch64_be_movoi"
5100  [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5101	(match_operand:OI 1 "general_operand"      " w,w,m"))]
5102  "TARGET_SIMD && BYTES_BIG_ENDIAN
5103   && (register_operand (operands[0], OImode)
5104       || register_operand (operands[1], OImode))"
5105  "@
5106   #
5107   stp\\t%q1, %R1, %0
5108   ldp\\t%q0, %R0, %1"
5109  [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5110   (set_attr "length" "8,4,4")]
5111)
5112
5113(define_insn "*aarch64_be_movci"
5114  [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5115	(match_operand:CI 1 "general_operand"      " w,w,o"))]
5116  "TARGET_SIMD && BYTES_BIG_ENDIAN
5117   && (register_operand (operands[0], CImode)
5118       || register_operand (operands[1], CImode))"
5119  "#"
5120  [(set_attr "type" "multiple")
5121   (set_attr "length" "12,4,4")]
5122)
5123
5124(define_insn "*aarch64_be_movxi"
5125  [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5126	(match_operand:XI 1 "general_operand"      " w,w,o"))]
5127  "TARGET_SIMD && BYTES_BIG_ENDIAN
5128   && (register_operand (operands[0], XImode)
5129       || register_operand (operands[1], XImode))"
5130  "#"
5131  [(set_attr "type" "multiple")
5132   (set_attr "length" "16,4,4")]
5133)
5134
5135(define_split
5136  [(set (match_operand:OI 0 "register_operand")
5137	(match_operand:OI 1 "register_operand"))]
5138  "TARGET_SIMD && reload_completed"
5139  [(const_int 0)]
5140{
5141  aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5142  DONE;
5143})
5144
5145(define_split
5146  [(set (match_operand:CI 0 "nonimmediate_operand")
5147	(match_operand:CI 1 "general_operand"))]
5148  "TARGET_SIMD && reload_completed"
5149  [(const_int 0)]
5150{
5151  if (register_operand (operands[0], CImode)
5152      && register_operand (operands[1], CImode))
5153    {
5154      aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5155      DONE;
5156    }
5157  else if (BYTES_BIG_ENDIAN)
5158    {
5159      emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5160		      simplify_gen_subreg (OImode, operands[1], CImode, 0));
5161      emit_move_insn (gen_lowpart (V16QImode,
5162				   simplify_gen_subreg (TImode, operands[0],
5163							CImode, 32)),
5164		      gen_lowpart (V16QImode,
5165				   simplify_gen_subreg (TImode, operands[1],
5166							CImode, 32)));
5167      DONE;
5168    }
5169  else
5170    FAIL;
5171})
5172
5173(define_split
5174  [(set (match_operand:XI 0 "nonimmediate_operand")
5175	(match_operand:XI 1 "general_operand"))]
5176  "TARGET_SIMD && reload_completed"
5177  [(const_int 0)]
5178{
5179  if (register_operand (operands[0], XImode)
5180      && register_operand (operands[1], XImode))
5181    {
5182      aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5183      DONE;
5184    }
5185  else if (BYTES_BIG_ENDIAN)
5186    {
5187      emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5188		      simplify_gen_subreg (OImode, operands[1], XImode, 0));
5189      emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5190		      simplify_gen_subreg (OImode, operands[1], XImode, 32));
5191      DONE;
5192    }
5193  else
5194    FAIL;
5195})
5196
5197(define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5198  [(match_operand:VSTRUCT 0 "register_operand" "=w")
5199   (match_operand:DI 1 "register_operand" "w")
5200   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5201  "TARGET_SIMD"
5202{
5203  rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5204  set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5205		     * <VSTRUCT:nregs>);
5206
5207  emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5208								mem));
5209  DONE;
5210})
5211
5212(define_insn "aarch64_ld2<mode>_dreg"
5213  [(set (match_operand:OI 0 "register_operand" "=w")
5214	(unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5215		    (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5216		   UNSPEC_LD2_DREG))]
5217  "TARGET_SIMD"
5218  "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5219  [(set_attr "type" "neon_load2_2reg<q>")]
5220)
5221
5222(define_insn "aarch64_ld2<mode>_dreg"
5223  [(set (match_operand:OI 0 "register_operand" "=w")
5224	(unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5225		    (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5226		   UNSPEC_LD2_DREG))]
5227  "TARGET_SIMD"
5228  "ld1\\t{%S0.1d - %T0.1d}, %1"
5229  [(set_attr "type" "neon_load1_2reg<q>")]
5230)
5231
5232(define_insn "aarch64_ld3<mode>_dreg"
5233  [(set (match_operand:CI 0 "register_operand" "=w")
5234	(unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5235		    (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5236		   UNSPEC_LD3_DREG))]
5237  "TARGET_SIMD"
5238  "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5239  [(set_attr "type" "neon_load3_3reg<q>")]
5240)
5241
5242(define_insn "aarch64_ld3<mode>_dreg"
5243  [(set (match_operand:CI 0 "register_operand" "=w")
5244	(unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5245		    (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5246		   UNSPEC_LD3_DREG))]
5247  "TARGET_SIMD"
5248  "ld1\\t{%S0.1d - %U0.1d}, %1"
5249  [(set_attr "type" "neon_load1_3reg<q>")]
5250)
5251
5252(define_insn "aarch64_ld4<mode>_dreg"
5253  [(set (match_operand:XI 0 "register_operand" "=w")
5254	(unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5255		    (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5256		   UNSPEC_LD4_DREG))]
5257  "TARGET_SIMD"
5258  "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5259  [(set_attr "type" "neon_load4_4reg<q>")]
5260)
5261
5262(define_insn "aarch64_ld4<mode>_dreg"
5263  [(set (match_operand:XI 0 "register_operand" "=w")
5264	(unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5265		    (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5266		   UNSPEC_LD4_DREG))]
5267  "TARGET_SIMD"
5268  "ld1\\t{%S0.1d - %V0.1d}, %1"
5269  [(set_attr "type" "neon_load1_4reg<q>")]
5270)
5271
5272(define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5273 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5274  (match_operand:DI 1 "register_operand" "r")
5275  (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5276  "TARGET_SIMD"
5277{
5278  rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5279  set_mem_size (mem, <VSTRUCT:nregs> * 8);
5280
5281  emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5282  DONE;
5283})
5284
5285(define_expand "aarch64_ld1<VALL_F16:mode>"
5286 [(match_operand:VALL_F16 0 "register_operand")
5287  (match_operand:DI 1 "register_operand")]
5288  "TARGET_SIMD"
5289{
5290  machine_mode mode = <VALL_F16:MODE>mode;
5291  rtx mem = gen_rtx_MEM (mode, operands[1]);
5292
5293  if (BYTES_BIG_ENDIAN)
5294    emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5295  else
5296    emit_move_insn (operands[0], mem);
5297  DONE;
5298})
5299
5300(define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5301 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5302  (match_operand:DI 1 "register_operand" "r")
5303  (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5304  "TARGET_SIMD"
5305{
5306  machine_mode mode = <VSTRUCT:MODE>mode;
5307  rtx mem = gen_rtx_MEM (mode, operands[1]);
5308
5309  emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5310  DONE;
5311})
5312
5313(define_expand "aarch64_ld1x2<VQ:mode>"
5314 [(match_operand:OI 0 "register_operand" "=w")
5315  (match_operand:DI 1 "register_operand" "r")
5316  (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5317  "TARGET_SIMD"
5318{
5319  machine_mode mode = OImode;
5320  rtx mem = gen_rtx_MEM (mode, operands[1]);
5321
5322  emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
5323  DONE;
5324})
5325
5326(define_expand "aarch64_ld1x2<VDC:mode>"
5327 [(match_operand:OI 0 "register_operand" "=w")
5328  (match_operand:DI 1 "register_operand" "r")
5329  (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5330  "TARGET_SIMD"
5331{
5332  machine_mode mode = OImode;
5333  rtx mem = gen_rtx_MEM (mode, operands[1]);
5334
5335  emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
5336  DONE;
5337})
5338
5339
5340(define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5341  [(match_operand:VSTRUCT 0 "register_operand" "=w")
5342	(match_operand:DI 1 "register_operand" "w")
5343	(match_operand:VSTRUCT 2 "register_operand" "0")
5344	(match_operand:SI 3 "immediate_operand" "i")
5345	(unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5346  "TARGET_SIMD"
5347{
5348  rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5349  set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5350		     * <VSTRUCT:nregs>);
5351
5352  aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL);
5353  emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5354	operands[0], mem, operands[2], operands[3]));
5355  DONE;
5356})
5357
5358;; Expanders for builtins to extract vector registers from large
5359;; opaque integer modes.
5360
5361;; D-register list.
5362
5363(define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5364 [(match_operand:VDC 0 "register_operand" "=w")
5365  (match_operand:VSTRUCT 1 "register_operand" "w")
5366  (match_operand:SI 2 "immediate_operand" "i")]
5367  "TARGET_SIMD"
5368{
5369  int part = INTVAL (operands[2]);
5370  rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5371  int offset = part * 16;
5372
5373  emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5374  emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5375  DONE;
5376})
5377
5378;; Q-register list.
5379
5380(define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5381 [(match_operand:VQ 0 "register_operand" "=w")
5382  (match_operand:VSTRUCT 1 "register_operand" "w")
5383  (match_operand:SI 2 "immediate_operand" "i")]
5384  "TARGET_SIMD"
5385{
5386  int part = INTVAL (operands[2]);
5387  int offset = part * 16;
5388
5389  emit_move_insn (operands[0],
5390		  gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5391  DONE;
5392})
5393
5394;; Permuted-store expanders for neon intrinsics.
5395
5396;; Permute instructions
5397
5398;; vec_perm support
5399
5400(define_expand "vec_perm<mode>"
5401  [(match_operand:VB 0 "register_operand")
5402   (match_operand:VB 1 "register_operand")
5403   (match_operand:VB 2 "register_operand")
5404   (match_operand:VB 3 "register_operand")]
5405  "TARGET_SIMD"
5406{
5407  aarch64_expand_vec_perm (operands[0], operands[1],
5408			   operands[2], operands[3], <nunits>);
5409  DONE;
5410})
5411
5412(define_insn "aarch64_tbl1<mode>"
5413  [(set (match_operand:VB 0 "register_operand" "=w")
5414	(unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5415		    (match_operand:VB 2 "register_operand" "w")]
5416		   UNSPEC_TBL))]
5417  "TARGET_SIMD"
5418  "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5419  [(set_attr "type" "neon_tbl1<q>")]
5420)
5421
5422;; Two source registers.
5423
5424(define_insn "aarch64_tbl2v16qi"
5425  [(set (match_operand:V16QI 0 "register_operand" "=w")
5426	(unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5427		       (match_operand:V16QI 2 "register_operand" "w")]
5428		      UNSPEC_TBL))]
5429  "TARGET_SIMD"
5430  "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5431  [(set_attr "type" "neon_tbl2_q")]
5432)
5433
5434(define_insn "aarch64_tbl3<mode>"
5435  [(set (match_operand:VB 0 "register_operand" "=w")
5436	(unspec:VB [(match_operand:OI 1 "register_operand" "w")
5437		      (match_operand:VB 2 "register_operand" "w")]
5438		      UNSPEC_TBL))]
5439  "TARGET_SIMD"
5440  "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5441  [(set_attr "type" "neon_tbl3")]
5442)
5443
5444(define_insn "aarch64_tbx4<mode>"
5445  [(set (match_operand:VB 0 "register_operand" "=w")
5446	(unspec:VB [(match_operand:VB 1 "register_operand" "0")
5447		      (match_operand:OI 2 "register_operand" "w")
5448		      (match_operand:VB 3 "register_operand" "w")]
5449		      UNSPEC_TBX))]
5450  "TARGET_SIMD"
5451  "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5452  [(set_attr "type" "neon_tbl4")]
5453)
5454
5455;; Three source registers.
5456
5457(define_insn "aarch64_qtbl3<mode>"
5458  [(set (match_operand:VB 0 "register_operand" "=w")
5459	(unspec:VB [(match_operand:CI 1 "register_operand" "w")
5460		      (match_operand:VB 2 "register_operand" "w")]
5461		      UNSPEC_TBL))]
5462  "TARGET_SIMD"
5463  "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5464  [(set_attr "type" "neon_tbl3")]
5465)
5466
5467(define_insn "aarch64_qtbx3<mode>"
5468  [(set (match_operand:VB 0 "register_operand" "=w")
5469	(unspec:VB [(match_operand:VB 1 "register_operand" "0")
5470		      (match_operand:CI 2 "register_operand" "w")
5471		      (match_operand:VB 3 "register_operand" "w")]
5472		      UNSPEC_TBX))]
5473  "TARGET_SIMD"
5474  "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5475  [(set_attr "type" "neon_tbl3")]
5476)
5477
5478;; Four source registers.
5479
5480(define_insn "aarch64_qtbl4<mode>"
5481  [(set (match_operand:VB 0 "register_operand" "=w")
5482	(unspec:VB [(match_operand:XI 1 "register_operand" "w")
5483		      (match_operand:VB 2 "register_operand" "w")]
5484		      UNSPEC_TBL))]
5485  "TARGET_SIMD"
5486  "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5487  [(set_attr "type" "neon_tbl4")]
5488)
5489
5490(define_insn "aarch64_qtbx4<mode>"
5491  [(set (match_operand:VB 0 "register_operand" "=w")
5492	(unspec:VB [(match_operand:VB 1 "register_operand" "0")
5493		      (match_operand:XI 2 "register_operand" "w")
5494		      (match_operand:VB 3 "register_operand" "w")]
5495		      UNSPEC_TBX))]
5496  "TARGET_SIMD"
5497  "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5498  [(set_attr "type" "neon_tbl4")]
5499)
5500
5501(define_insn_and_split "aarch64_combinev16qi"
5502  [(set (match_operand:OI 0 "register_operand" "=w")
5503	(unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5504		    (match_operand:V16QI 2 "register_operand" "w")]
5505		   UNSPEC_CONCAT))]
5506  "TARGET_SIMD"
5507  "#"
5508  "&& reload_completed"
5509  [(const_int 0)]
5510{
5511  aarch64_split_combinev16qi (operands);
5512  DONE;
5513}
5514[(set_attr "type" "multiple")]
5515)
5516
5517;; This instruction's pattern is generated directly by
5518;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5519;; need corresponding changes there.
5520(define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5521  [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5522	(unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5523			  (match_operand:VALL_F16 2 "register_operand" "w")]
5524	 PERMUTE))]
5525  "TARGET_SIMD"
5526  "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5527  [(set_attr "type" "neon_permute<q>")]
5528)
5529
5530;; This instruction's pattern is generated directly by
5531;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5532;; need corresponding changes there.  Note that the immediate (third)
5533;; operand is a lane index not a byte index.
5534(define_insn "aarch64_ext<mode>"
5535  [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5536        (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5537			  (match_operand:VALL_F16 2 "register_operand" "w")
5538			  (match_operand:SI 3 "immediate_operand" "i")]
5539	 UNSPEC_EXT))]
5540  "TARGET_SIMD"
5541{
5542  operands[3] = GEN_INT (INTVAL (operands[3])
5543      * GET_MODE_UNIT_SIZE (<MODE>mode));
5544  return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5545}
5546  [(set_attr "type" "neon_ext<q>")]
5547)
5548
5549;; This instruction's pattern is generated directly by
5550;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5551;; need corresponding changes there.
5552(define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5553  [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5554	(unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5555                    REVERSE))]
5556  "TARGET_SIMD"
5557  "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5558  [(set_attr "type" "neon_rev<q>")]
5559)
5560
5561(define_insn "aarch64_st2<mode>_dreg"
5562  [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5563	(unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5564                    (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5565                   UNSPEC_ST2))]
5566  "TARGET_SIMD"
5567  "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5568  [(set_attr "type" "neon_store2_2reg")]
5569)
5570
5571(define_insn "aarch64_st2<mode>_dreg"
5572  [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5573	(unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5574                    (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5575                   UNSPEC_ST2))]
5576  "TARGET_SIMD"
5577  "st1\\t{%S1.1d - %T1.1d}, %0"
5578  [(set_attr "type" "neon_store1_2reg")]
5579)
5580
5581(define_insn "aarch64_st3<mode>_dreg"
5582  [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5583	(unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5584                    (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5585                   UNSPEC_ST3))]
5586  "TARGET_SIMD"
5587  "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5588  [(set_attr "type" "neon_store3_3reg")]
5589)
5590
5591(define_insn "aarch64_st3<mode>_dreg"
5592  [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5593	(unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5594                    (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5595                   UNSPEC_ST3))]
5596  "TARGET_SIMD"
5597  "st1\\t{%S1.1d - %U1.1d}, %0"
5598  [(set_attr "type" "neon_store1_3reg")]
5599)
5600
5601(define_insn "aarch64_st4<mode>_dreg"
5602  [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5603	(unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5604                    (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5605                   UNSPEC_ST4))]
5606  "TARGET_SIMD"
5607  "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5608  [(set_attr "type" "neon_store4_4reg")]
5609)
5610
5611(define_insn "aarch64_st4<mode>_dreg"
5612  [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5613	(unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5614                    (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5615                   UNSPEC_ST4))]
5616  "TARGET_SIMD"
5617  "st1\\t{%S1.1d - %V1.1d}, %0"
5618  [(set_attr "type" "neon_store1_4reg")]
5619)
5620
5621(define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5622 [(match_operand:DI 0 "register_operand" "r")
5623  (match_operand:VSTRUCT 1 "register_operand" "w")
5624  (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5625  "TARGET_SIMD"
5626{
5627  rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5628  set_mem_size (mem, <VSTRUCT:nregs> * 8);
5629
5630  emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5631  DONE;
5632})
5633
5634(define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5635 [(match_operand:DI 0 "register_operand" "r")
5636  (match_operand:VSTRUCT 1 "register_operand" "w")
5637  (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5638  "TARGET_SIMD"
5639{
5640  machine_mode mode = <VSTRUCT:MODE>mode;
5641  rtx mem = gen_rtx_MEM (mode, operands[0]);
5642
5643  emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5644  DONE;
5645})
5646
5647(define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5648 [(match_operand:DI 0 "register_operand" "r")
5649  (match_operand:VSTRUCT 1 "register_operand" "w")
5650  (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5651  (match_operand:SI 2 "immediate_operand")]
5652  "TARGET_SIMD"
5653{
5654  rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5655  set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5656		     * <VSTRUCT:nregs>);
5657
5658  emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5659		mem, operands[1], operands[2]));
5660  DONE;
5661})
5662
5663(define_expand "aarch64_st1<VALL_F16:mode>"
5664 [(match_operand:DI 0 "register_operand")
5665  (match_operand:VALL_F16 1 "register_operand")]
5666  "TARGET_SIMD"
5667{
5668  machine_mode mode = <VALL_F16:MODE>mode;
5669  rtx mem = gen_rtx_MEM (mode, operands[0]);
5670
5671  if (BYTES_BIG_ENDIAN)
5672    emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5673  else
5674    emit_move_insn (mem, operands[1]);
5675  DONE;
5676})
5677
5678;; Expander for builtins to insert vector registers into large
5679;; opaque integer modes.
5680
5681;; Q-register list.  We don't need a D-reg inserter as we zero
5682;; extend them in arm_neon.h and insert the resulting Q-regs.
5683
5684(define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5685 [(match_operand:VSTRUCT 0 "register_operand" "+w")
5686  (match_operand:VSTRUCT 1 "register_operand" "0")
5687  (match_operand:VQ 2 "register_operand" "w")
5688  (match_operand:SI 3 "immediate_operand" "i")]
5689  "TARGET_SIMD"
5690{
5691  int part = INTVAL (operands[3]);
5692  int offset = part * 16;
5693
5694  emit_move_insn (operands[0], operands[1]);
5695  emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5696		  operands[2]);
5697  DONE;
5698})
5699
5700;; Standard pattern name vec_init<mode><Vel>.
5701
5702(define_expand "vec_init<mode><Vel>"
5703  [(match_operand:VALL_F16 0 "register_operand" "")
5704   (match_operand 1 "" "")]
5705  "TARGET_SIMD"
5706{
5707  aarch64_expand_vector_init (operands[0], operands[1]);
5708  DONE;
5709})
5710
5711(define_insn "*aarch64_simd_ld1r<mode>"
5712  [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5713	(vec_duplicate:VALL_F16
5714	  (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5715  "TARGET_SIMD"
5716  "ld1r\\t{%0.<Vtype>}, %1"
5717  [(set_attr "type" "neon_load1_all_lanes")]
5718)
5719
5720(define_insn "aarch64_simd_ld1<mode>_x2"
5721  [(set (match_operand:OI 0 "register_operand" "=w")
5722	(unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5723		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5724		   UNSPEC_LD1))]
5725  "TARGET_SIMD"
5726  "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5727  [(set_attr "type" "neon_load1_2reg<q>")]
5728)
5729
5730(define_insn "aarch64_simd_ld1<mode>_x2"
5731  [(set (match_operand:OI 0 "register_operand" "=w")
5732	(unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5733		    (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5734		   UNSPEC_LD1))]
5735  "TARGET_SIMD"
5736  "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5737  [(set_attr "type" "neon_load1_2reg<q>")]
5738)
5739
5740
5741(define_insn "aarch64_frecpe<mode>"
5742  [(set (match_operand:VHSDF 0 "register_operand" "=w")
5743	(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
5744	 UNSPEC_FRECPE))]
5745  "TARGET_SIMD"
5746  "frecpe\\t%0.<Vtype>, %1.<Vtype>"
5747  [(set_attr "type" "neon_fp_recpe_<stype><q>")]
5748)
5749
5750(define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
5751  [(set (match_operand:GPF_F16 0 "register_operand" "=w")
5752	(unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
5753	 FRECP))]
5754  "TARGET_SIMD"
5755  "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
5756  [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")]
5757)
5758
5759(define_insn "aarch64_frecps<mode>"
5760  [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5761	(unspec:VHSDF_HSDF
5762	  [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5763	  (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5764	  UNSPEC_FRECPS))]
5765  "TARGET_SIMD"
5766  "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5767  [(set_attr "type" "neon_fp_recps_<stype><q>")]
5768)
5769
5770(define_insn "aarch64_urecpe<mode>"
5771  [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5772        (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5773                UNSPEC_URECPE))]
5774 "TARGET_SIMD"
5775 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5776  [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5777
5778;; Standard pattern name vec_extract<mode><Vel>.
5779
5780(define_expand "vec_extract<mode><Vel>"
5781  [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
5782   (match_operand:VALL_F16 1 "register_operand" "")
5783   (match_operand:SI 2 "immediate_operand" "")]
5784  "TARGET_SIMD"
5785{
5786    emit_insn
5787      (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
5788    DONE;
5789})
5790
5791;; aes
5792
5793(define_insn "aarch64_crypto_aes<aes_op>v16qi"
5794  [(set (match_operand:V16QI 0 "register_operand" "=w")
5795        (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5796		       (match_operand:V16QI 2 "register_operand" "w")]
5797         CRYPTO_AES))]
5798  "TARGET_SIMD && TARGET_AES"
5799  "aes<aes_op>\\t%0.16b, %2.16b"
5800  [(set_attr "type" "crypto_aese")]
5801)
5802
5803;; When AES/AESMC fusion is enabled we want the register allocation to
5804;; look like:
5805;;    AESE Vn, _
5806;;    AESMC Vn, Vn
5807;; So prefer to tie operand 1 to operand 0 when fusing.
5808
5809(define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5810  [(set (match_operand:V16QI 0 "register_operand" "=w,w")
5811	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
5812	 CRYPTO_AESMC))]
5813  "TARGET_SIMD && TARGET_AES"
5814  "aes<aesmc_op>\\t%0.16b, %1.16b"
5815  [(set_attr "type" "crypto_aesmc")
5816   (set_attr_alternative "enabled"
5817     [(if_then_else (match_test
5818		       "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
5819		     (const_string "yes" )
5820		     (const_string "no"))
5821      (const_string "yes")])]
5822)
5823
5824;; sha1
5825
5826(define_insn "aarch64_crypto_sha1hsi"
5827  [(set (match_operand:SI 0 "register_operand" "=w")
5828        (unspec:SI [(match_operand:SI 1
5829                       "register_operand" "w")]
5830         UNSPEC_SHA1H))]
5831  "TARGET_SIMD && TARGET_SHA2"
5832  "sha1h\\t%s0, %s1"
5833  [(set_attr "type" "crypto_sha1_fast")]
5834)
5835
5836(define_insn "aarch64_crypto_sha1hv4si"
5837  [(set (match_operand:SI 0 "register_operand" "=w")
5838	(unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5839		     (parallel [(const_int 0)]))]
5840	 UNSPEC_SHA1H))]
5841  "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
5842  "sha1h\\t%s0, %s1"
5843  [(set_attr "type" "crypto_sha1_fast")]
5844)
5845
5846(define_insn "aarch64_be_crypto_sha1hv4si"
5847  [(set (match_operand:SI 0 "register_operand" "=w")
5848	(unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5849		     (parallel [(const_int 3)]))]
5850	 UNSPEC_SHA1H))]
5851  "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
5852  "sha1h\\t%s0, %s1"
5853  [(set_attr "type" "crypto_sha1_fast")]
5854)
5855
5856(define_insn "aarch64_crypto_sha1su1v4si"
5857  [(set (match_operand:V4SI 0 "register_operand" "=w")
5858        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5859                      (match_operand:V4SI 2 "register_operand" "w")]
5860         UNSPEC_SHA1SU1))]
5861  "TARGET_SIMD && TARGET_SHA2"
5862  "sha1su1\\t%0.4s, %2.4s"
5863  [(set_attr "type" "crypto_sha1_fast")]
5864)
5865
5866(define_insn "aarch64_crypto_sha1<sha1_op>v4si"
5867  [(set (match_operand:V4SI 0 "register_operand" "=w")
5868        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5869                      (match_operand:SI 2 "register_operand" "w")
5870                      (match_operand:V4SI 3 "register_operand" "w")]
5871         CRYPTO_SHA1))]
5872  "TARGET_SIMD && TARGET_SHA2"
5873  "sha1<sha1_op>\\t%q0, %s2, %3.4s"
5874  [(set_attr "type" "crypto_sha1_slow")]
5875)
5876
5877(define_insn "aarch64_crypto_sha1su0v4si"
5878  [(set (match_operand:V4SI 0 "register_operand" "=w")
5879        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5880                      (match_operand:V4SI 2 "register_operand" "w")
5881                      (match_operand:V4SI 3 "register_operand" "w")]
5882         UNSPEC_SHA1SU0))]
5883  "TARGET_SIMD && TARGET_SHA2"
5884  "sha1su0\\t%0.4s, %2.4s, %3.4s"
5885  [(set_attr "type" "crypto_sha1_xor")]
5886)
5887
5888;; sha256
5889
5890(define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
5891  [(set (match_operand:V4SI 0 "register_operand" "=w")
5892        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5893                      (match_operand:V4SI 2 "register_operand" "w")
5894                      (match_operand:V4SI 3 "register_operand" "w")]
5895         CRYPTO_SHA256))]
5896  "TARGET_SIMD && TARGET_SHA2"
5897  "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
5898  [(set_attr "type" "crypto_sha256_slow")]
5899)
5900
5901(define_insn "aarch64_crypto_sha256su0v4si"
5902  [(set (match_operand:V4SI 0 "register_operand" "=w")
5903        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5904                      (match_operand:V4SI 2 "register_operand" "w")]
5905         UNSPEC_SHA256SU0))]
5906  "TARGET_SIMD && TARGET_SHA2"
5907  "sha256su0\\t%0.4s, %2.4s"
5908  [(set_attr "type" "crypto_sha256_fast")]
5909)
5910
5911(define_insn "aarch64_crypto_sha256su1v4si"
5912  [(set (match_operand:V4SI 0 "register_operand" "=w")
5913        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5914                      (match_operand:V4SI 2 "register_operand" "w")
5915                      (match_operand:V4SI 3 "register_operand" "w")]
5916         UNSPEC_SHA256SU1))]
5917  "TARGET_SIMD && TARGET_SHA2"
5918  "sha256su1\\t%0.4s, %2.4s, %3.4s"
5919  [(set_attr "type" "crypto_sha256_slow")]
5920)
5921
5922;; sha512
5923
5924(define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
5925  [(set (match_operand:V2DI 0 "register_operand" "=w")
5926        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5927                      (match_operand:V2DI 2 "register_operand" "w")
5928                      (match_operand:V2DI 3 "register_operand" "w")]
5929         CRYPTO_SHA512))]
5930  "TARGET_SIMD && TARGET_SHA3"
5931  "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
5932  [(set_attr "type" "crypto_sha512")]
5933)
5934
5935(define_insn "aarch64_crypto_sha512su0qv2di"
5936  [(set (match_operand:V2DI 0 "register_operand" "=w")
5937        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5938                      (match_operand:V2DI 2 "register_operand" "w")]
5939         UNSPEC_SHA512SU0))]
5940  "TARGET_SIMD && TARGET_SHA3"
5941  "sha512su0\\t%0.2d, %2.2d"
5942  [(set_attr "type" "crypto_sha512")]
5943)
5944
5945(define_insn "aarch64_crypto_sha512su1qv2di"
5946  [(set (match_operand:V2DI 0 "register_operand" "=w")
5947        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5948                      (match_operand:V2DI 2 "register_operand" "w")
5949                      (match_operand:V2DI 3 "register_operand" "w")]
5950         UNSPEC_SHA512SU1))]
5951  "TARGET_SIMD && TARGET_SHA3"
5952  "sha512su1\\t%0.2d, %2.2d, %3.2d"
5953  [(set_attr "type" "crypto_sha512")]
5954)
5955
5956;; sha3
5957
5958(define_insn "aarch64_eor3qv8hi"
5959  [(set (match_operand:V8HI 0 "register_operand" "=w")
5960	(xor:V8HI
5961	 (xor:V8HI
5962	  (match_operand:V8HI 2 "register_operand" "%w")
5963	  (match_operand:V8HI 3 "register_operand" "w"))
5964	 (match_operand:V8HI 1 "register_operand" "w")))]
5965  "TARGET_SIMD && TARGET_SHA3"
5966  "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
5967  [(set_attr "type" "crypto_sha3")]
5968)
5969
5970(define_insn "aarch64_rax1qv2di"
5971  [(set (match_operand:V2DI 0 "register_operand" "=w")
5972	(xor:V2DI
5973	 (rotate:V2DI
5974	  (match_operand:V2DI 2 "register_operand" "w")
5975	  (const_int 1))
5976	 (match_operand:V2DI 1 "register_operand" "w")))]
5977  "TARGET_SIMD && TARGET_SHA3"
5978  "rax1\\t%0.2d, %1.2d, %2.2d"
5979  [(set_attr "type" "crypto_sha3")]
5980)
5981
5982(define_insn "aarch64_xarqv2di"
5983  [(set (match_operand:V2DI 0 "register_operand" "=w")
5984	(rotatert:V2DI
5985	 (xor:V2DI
5986	  (match_operand:V2DI 1 "register_operand" "%w")
5987	  (match_operand:V2DI 2 "register_operand" "w"))
5988	 (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
5989  "TARGET_SIMD && TARGET_SHA3"
5990  "xar\\t%0.2d, %1.2d, %2.2d, %3"
5991  [(set_attr "type" "crypto_sha3")]
5992)
5993
5994(define_insn "aarch64_bcaxqv8hi"
5995  [(set (match_operand:V8HI 0 "register_operand" "=w")
5996	(xor:V8HI
5997	 (and:V8HI
5998	  (not:V8HI (match_operand:V8HI 3 "register_operand" "w"))
5999	  (match_operand:V8HI 2 "register_operand" "w"))
6000	 (match_operand:V8HI 1 "register_operand" "w")))]
6001  "TARGET_SIMD && TARGET_SHA3"
6002  "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
6003  [(set_attr "type" "crypto_sha3")]
6004)
6005
6006;; SM3
6007
6008(define_insn "aarch64_sm3ss1qv4si"
6009  [(set (match_operand:V4SI 0 "register_operand" "=w")
6010	(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6011		      (match_operand:V4SI 2 "register_operand" "w")
6012		      (match_operand:V4SI 3 "register_operand" "w")]
6013	 UNSPEC_SM3SS1))]
6014  "TARGET_SIMD && TARGET_SM4"
6015  "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
6016  [(set_attr "type" "crypto_sm3")]
6017)
6018
6019
6020(define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
6021  [(set (match_operand:V4SI 0 "register_operand" "=w")
6022	(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6023		      (match_operand:V4SI 2 "register_operand" "w")
6024		      (match_operand:V4SI 3 "register_operand" "w")
6025		      (match_operand:SI 4 "aarch64_imm2" "Ui2")]
6026	 CRYPTO_SM3TT))]
6027  "TARGET_SIMD && TARGET_SM4"
6028  "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
6029  [(set_attr "type" "crypto_sm3")]
6030)
6031
6032(define_insn "aarch64_sm3partw<sm3part_op>qv4si"
6033  [(set (match_operand:V4SI 0 "register_operand" "=w")
6034	(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6035		      (match_operand:V4SI 2 "register_operand" "w")
6036		      (match_operand:V4SI 3 "register_operand" "w")]
6037	 CRYPTO_SM3PART))]
6038  "TARGET_SIMD && TARGET_SM4"
6039  "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
6040  [(set_attr "type" "crypto_sm3")]
6041)
6042
6043;; SM4
6044
6045(define_insn "aarch64_sm4eqv4si"
6046  [(set (match_operand:V4SI 0 "register_operand" "=w")
6047	(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6048		      (match_operand:V4SI 2 "register_operand" "w")]
6049	 UNSPEC_SM4E))]
6050  "TARGET_SIMD && TARGET_SM4"
6051  "sm4e\\t%0.4s, %2.4s"
6052  [(set_attr "type" "crypto_sm4")]
6053)
6054
6055(define_insn "aarch64_sm4ekeyqv4si"
6056  [(set (match_operand:V4SI 0 "register_operand" "=w")
6057	(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6058		      (match_operand:V4SI 2 "register_operand" "w")]
6059	 UNSPEC_SM4EKEY))]
6060  "TARGET_SIMD && TARGET_SM4"
6061  "sm4ekey\\t%0.4s, %1.4s, %2.4s"
6062  [(set_attr "type" "crypto_sm4")]
6063)
6064
6065;; fp16fml
6066
6067(define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
6068  [(set (match_operand:VDQSF 0 "register_operand" "=w")
6069	(unspec:VDQSF
6070	 [(match_operand:VDQSF 1 "register_operand" "0")
6071	  (match_operand:<VFMLA_W> 2 "register_operand" "w")
6072	  (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6073	 VFMLA16_LOW))]
6074  "TARGET_F16FML"
6075{
6076  rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6077					    <nunits> * 2, false);
6078  rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6079					    <nunits> * 2, false);
6080
6081  emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
6082								operands[1],
6083								operands[2],
6084								operands[3],
6085								p1, p2));
6086  DONE;
6087
6088})
6089
6090(define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
6091  [(set (match_operand:VDQSF 0 "register_operand" "=w")
6092	(unspec:VDQSF
6093	 [(match_operand:VDQSF 1 "register_operand" "0")
6094	  (match_operand:<VFMLA_W> 2 "register_operand" "w")
6095	  (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6096	 VFMLA16_HIGH))]
6097  "TARGET_F16FML"
6098{
6099  rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6100  rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6101
6102  emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
6103								 operands[1],
6104								 operands[2],
6105								 operands[3],
6106								 p1, p2));
6107  DONE;
6108})
6109
6110(define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
6111  [(set (match_operand:VDQSF 0 "register_operand" "=w")
6112	(fma:VDQSF
6113	 (float_extend:VDQSF
6114	  (vec_select:<VFMLA_SEL_W>
6115	   (match_operand:<VFMLA_W> 2 "register_operand" "w")
6116	   (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
6117	 (float_extend:VDQSF
6118	  (vec_select:<VFMLA_SEL_W>
6119	   (match_operand:<VFMLA_W> 3 "register_operand" "w")
6120	   (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6121	 (match_operand:VDQSF 1 "register_operand" "0")))]
6122  "TARGET_F16FML"
6123  "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6124  [(set_attr "type" "neon_fp_mul_s")]
6125)
6126
6127(define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
6128  [(set (match_operand:VDQSF 0 "register_operand" "=w")
6129	(fma:VDQSF
6130	 (float_extend:VDQSF
6131	  (neg:<VFMLA_SEL_W>
6132	   (vec_select:<VFMLA_SEL_W>
6133	    (match_operand:<VFMLA_W> 2 "register_operand" "w")
6134	    (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
6135	 (float_extend:VDQSF
6136	  (vec_select:<VFMLA_SEL_W>
6137	   (match_operand:<VFMLA_W> 3 "register_operand" "w")
6138	   (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6139	 (match_operand:VDQSF 1 "register_operand" "0")))]
6140  "TARGET_F16FML"
6141  "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6142  [(set_attr "type" "neon_fp_mul_s")]
6143)
6144
6145(define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
6146  [(set (match_operand:VDQSF 0 "register_operand" "=w")
6147	(fma:VDQSF
6148	 (float_extend:VDQSF
6149	  (vec_select:<VFMLA_SEL_W>
6150	   (match_operand:<VFMLA_W> 2 "register_operand" "w")
6151	   (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
6152	 (float_extend:VDQSF
6153	  (vec_select:<VFMLA_SEL_W>
6154	   (match_operand:<VFMLA_W> 3 "register_operand" "w")
6155	   (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6156	 (match_operand:VDQSF 1 "register_operand" "0")))]
6157  "TARGET_F16FML"
6158  "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6159  [(set_attr "type" "neon_fp_mul_s")]
6160)
6161
6162(define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
6163  [(set (match_operand:VDQSF 0 "register_operand" "=w")
6164	(fma:VDQSF
6165	 (float_extend:VDQSF
6166	  (neg:<VFMLA_SEL_W>
6167	   (vec_select:<VFMLA_SEL_W>
6168	    (match_operand:<VFMLA_W> 2 "register_operand" "w")
6169	    (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
6170	 (float_extend:VDQSF
6171	  (vec_select:<VFMLA_SEL_W>
6172	   (match_operand:<VFMLA_W> 3 "register_operand" "w")
6173	   (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6174	 (match_operand:VDQSF 1 "register_operand" "0")))]
6175  "TARGET_F16FML"
6176  "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6177  [(set_attr "type" "neon_fp_mul_s")]
6178)
6179
6180(define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
6181  [(set (match_operand:V2SF 0 "register_operand" "")
6182	(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6183			   (match_operand:V4HF 2 "register_operand" "")
6184			   (match_operand:V4HF 3 "register_operand" "")
6185			   (match_operand:SI 4 "aarch64_imm2" "")]
6186	 VFMLA16_LOW))]
6187  "TARGET_F16FML"
6188{
6189    rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6190    rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6191
6192    emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
6193							    operands[1],
6194							    operands[2],
6195							    operands[3],
6196							    p1, lane));
6197    DONE;
6198}
6199)
6200
6201(define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
6202  [(set (match_operand:V2SF 0 "register_operand" "")
6203	(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6204			   (match_operand:V4HF 2 "register_operand" "")
6205			   (match_operand:V4HF 3 "register_operand" "")
6206			   (match_operand:SI 4 "aarch64_imm2" "")]
6207	 VFMLA16_HIGH))]
6208  "TARGET_F16FML"
6209{
6210    rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6211    rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6212
6213    emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
6214							     operands[1],
6215							     operands[2],
6216							     operands[3],
6217							     p1, lane));
6218    DONE;
6219})
6220
6221(define_insn "aarch64_simd_fmlal_lane_lowv2sf"
6222  [(set (match_operand:V2SF 0 "register_operand" "=w")
6223	(fma:V2SF
6224	 (float_extend:V2SF
6225	   (vec_select:V2HF
6226	    (match_operand:V4HF 2 "register_operand" "w")
6227	    (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6228	 (float_extend:V2SF
6229	   (vec_duplicate:V2HF
6230	    (vec_select:HF
6231	     (match_operand:V4HF 3 "register_operand" "x")
6232	     (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6233	 (match_operand:V2SF 1 "register_operand" "0")))]
6234  "TARGET_F16FML"
6235  "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6236  [(set_attr "type" "neon_fp_mul_s")]
6237)
6238
6239(define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
6240  [(set (match_operand:V2SF 0 "register_operand" "=w")
6241	(fma:V2SF
6242	 (float_extend:V2SF
6243	  (neg:V2HF
6244	   (vec_select:V2HF
6245	    (match_operand:V4HF 2 "register_operand" "w")
6246	    (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6247	 (float_extend:V2SF
6248	  (vec_duplicate:V2HF
6249	   (vec_select:HF
6250	    (match_operand:V4HF 3 "register_operand" "x")
6251	    (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6252	 (match_operand:V2SF 1 "register_operand" "0")))]
6253  "TARGET_F16FML"
6254  "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6255  [(set_attr "type" "neon_fp_mul_s")]
6256)
6257
6258(define_insn "aarch64_simd_fmlal_lane_highv2sf"
6259  [(set (match_operand:V2SF 0 "register_operand" "=w")
6260	(fma:V2SF
6261	 (float_extend:V2SF
6262	   (vec_select:V2HF
6263	    (match_operand:V4HF 2 "register_operand" "w")
6264	    (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6265	 (float_extend:V2SF
6266	   (vec_duplicate:V2HF
6267	    (vec_select:HF
6268	     (match_operand:V4HF 3 "register_operand" "x")
6269	     (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6270	 (match_operand:V2SF 1 "register_operand" "0")))]
6271  "TARGET_F16FML"
6272  "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6273  [(set_attr "type" "neon_fp_mul_s")]
6274)
6275
6276(define_insn "aarch64_simd_fmlsl_lane_highv2sf"
6277  [(set (match_operand:V2SF 0 "register_operand" "=w")
6278	(fma:V2SF
6279	 (float_extend:V2SF
6280	   (neg:V2HF
6281	    (vec_select:V2HF
6282	     (match_operand:V4HF 2 "register_operand" "w")
6283	     (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6284	 (float_extend:V2SF
6285	   (vec_duplicate:V2HF
6286	    (vec_select:HF
6287	     (match_operand:V4HF 3 "register_operand" "x")
6288	     (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6289	 (match_operand:V2SF 1 "register_operand" "0")))]
6290  "TARGET_F16FML"
6291  "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6292  [(set_attr "type" "neon_fp_mul_s")]
6293)
6294
6295(define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
6296  [(set (match_operand:V4SF 0 "register_operand" "")
6297	(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6298			   (match_operand:V8HF 2 "register_operand" "")
6299			   (match_operand:V8HF 3 "register_operand" "")
6300			   (match_operand:SI 4 "aarch64_lane_imm3" "")]
6301	 VFMLA16_LOW))]
6302  "TARGET_F16FML"
6303{
6304    rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6305    rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6306
6307    emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
6308							      operands[1],
6309							      operands[2],
6310							      operands[3],
6311							      p1, lane));
6312    DONE;
6313})
6314
6315(define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
6316  [(set (match_operand:V4SF 0 "register_operand" "")
6317	(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6318			   (match_operand:V8HF 2 "register_operand" "")
6319			   (match_operand:V8HF 3 "register_operand" "")
6320			   (match_operand:SI 4 "aarch64_lane_imm3" "")]
6321	 VFMLA16_HIGH))]
6322  "TARGET_F16FML"
6323{
6324    rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6325    rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6326
6327    emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
6328							       operands[1],
6329							       operands[2],
6330							       operands[3],
6331							       p1, lane));
6332    DONE;
6333})
6334
6335(define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
6336  [(set (match_operand:V4SF 0 "register_operand" "=w")
6337	(fma:V4SF
6338	 (float_extend:V4SF
6339	  (vec_select:V4HF
6340	    (match_operand:V8HF 2 "register_operand" "w")
6341	    (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6342	 (float_extend:V4SF
6343	  (vec_duplicate:V4HF
6344	   (vec_select:HF
6345	    (match_operand:V8HF 3 "register_operand" "x")
6346	    (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6347	 (match_operand:V4SF 1 "register_operand" "0")))]
6348  "TARGET_F16FML"
6349  "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6350  [(set_attr "type" "neon_fp_mul_s")]
6351)
6352
6353(define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
6354  [(set (match_operand:V4SF 0 "register_operand" "=w")
6355	(fma:V4SF
6356	  (float_extend:V4SF
6357	   (neg:V4HF
6358	    (vec_select:V4HF
6359	     (match_operand:V8HF 2 "register_operand" "w")
6360	     (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6361	 (float_extend:V4SF
6362	  (vec_duplicate:V4HF
6363	   (vec_select:HF
6364	    (match_operand:V8HF 3 "register_operand" "x")
6365	    (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6366	 (match_operand:V4SF 1 "register_operand" "0")))]
6367  "TARGET_F16FML"
6368  "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6369  [(set_attr "type" "neon_fp_mul_s")]
6370)
6371
6372(define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
6373  [(set (match_operand:V4SF 0 "register_operand" "=w")
6374	(fma:V4SF
6375	 (float_extend:V4SF
6376	  (vec_select:V4HF
6377	    (match_operand:V8HF 2 "register_operand" "w")
6378	    (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6379	 (float_extend:V4SF
6380	  (vec_duplicate:V4HF
6381	   (vec_select:HF
6382	    (match_operand:V8HF 3 "register_operand" "x")
6383	    (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6384	 (match_operand:V4SF 1 "register_operand" "0")))]
6385  "TARGET_F16FML"
6386  "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6387  [(set_attr "type" "neon_fp_mul_s")]
6388)
6389
6390(define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
6391  [(set (match_operand:V4SF 0 "register_operand" "=w")
6392	(fma:V4SF
6393	 (float_extend:V4SF
6394	  (neg:V4HF
6395	   (vec_select:V4HF
6396	    (match_operand:V8HF 2 "register_operand" "w")
6397	    (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6398	 (float_extend:V4SF
6399	  (vec_duplicate:V4HF
6400	   (vec_select:HF
6401	    (match_operand:V8HF 3 "register_operand" "x")
6402	    (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6403	 (match_operand:V4SF 1 "register_operand" "0")))]
6404  "TARGET_F16FML"
6405  "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6406  [(set_attr "type" "neon_fp_mul_s")]
6407)
6408
6409(define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
6410  [(set (match_operand:V2SF 0 "register_operand" "")
6411	(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6412		      (match_operand:V4HF 2 "register_operand" "")
6413		      (match_operand:V8HF 3 "register_operand" "")
6414		      (match_operand:SI 4 "aarch64_lane_imm3" "")]
6415	 VFMLA16_LOW))]
6416  "TARGET_F16FML"
6417{
6418    rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6419    rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6420
6421    emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
6422							     operands[1],
6423							     operands[2],
6424							     operands[3],
6425							     p1, lane));
6426    DONE;
6427
6428})
6429
6430(define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
6431  [(set (match_operand:V2SF 0 "register_operand" "")
6432	(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6433		      (match_operand:V4HF 2 "register_operand" "")
6434		      (match_operand:V8HF 3 "register_operand" "")
6435		      (match_operand:SI 4 "aarch64_lane_imm3" "")]
6436	 VFMLA16_HIGH))]
6437  "TARGET_F16FML"
6438{
6439    rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6440    rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6441
6442    emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
6443							      operands[1],
6444							      operands[2],
6445							      operands[3],
6446							      p1, lane));
6447    DONE;
6448
6449})
6450
6451(define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
6452  [(set (match_operand:V2SF 0 "register_operand" "=w")
6453	(fma:V2SF
6454	 (float_extend:V2SF
6455	   (vec_select:V2HF
6456	    (match_operand:V4HF 2 "register_operand" "w")
6457	    (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6458	 (float_extend:V2SF
6459	  (vec_duplicate:V2HF
6460	   (vec_select:HF
6461	    (match_operand:V8HF 3 "register_operand" "x")
6462	    (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6463	 (match_operand:V2SF 1 "register_operand" "0")))]
6464  "TARGET_F16FML"
6465  "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6466  [(set_attr "type" "neon_fp_mul_s")]
6467)
6468
6469(define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
6470  [(set (match_operand:V2SF 0 "register_operand" "=w")
6471	(fma:V2SF
6472	 (float_extend:V2SF
6473	  (neg:V2HF
6474	   (vec_select:V2HF
6475	    (match_operand:V4HF 2 "register_operand" "w")
6476	    (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6477	 (float_extend:V2SF
6478	  (vec_duplicate:V2HF
6479	   (vec_select:HF
6480	    (match_operand:V8HF 3 "register_operand" "x")
6481	    (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6482	 (match_operand:V2SF 1 "register_operand" "0")))]
6483  "TARGET_F16FML"
6484  "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6485  [(set_attr "type" "neon_fp_mul_s")]
6486)
6487
6488(define_insn "aarch64_simd_fmlal_laneq_highv2sf"
6489  [(set (match_operand:V2SF 0 "register_operand" "=w")
6490	(fma:V2SF
6491	 (float_extend:V2SF
6492	   (vec_select:V2HF
6493	    (match_operand:V4HF 2 "register_operand" "w")
6494	    (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6495	 (float_extend:V2SF
6496	  (vec_duplicate:V2HF
6497	   (vec_select:HF
6498	    (match_operand:V8HF 3 "register_operand" "x")
6499	    (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6500	 (match_operand:V2SF 1 "register_operand" "0")))]
6501  "TARGET_F16FML"
6502  "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6503  [(set_attr "type" "neon_fp_mul_s")]
6504)
6505
6506(define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
6507  [(set (match_operand:V2SF 0 "register_operand" "=w")
6508	(fma:V2SF
6509	 (float_extend:V2SF
6510	  (neg:V2HF
6511	   (vec_select:V2HF
6512	    (match_operand:V4HF 2 "register_operand" "w")
6513	    (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6514	 (float_extend:V2SF
6515	  (vec_duplicate:V2HF
6516	   (vec_select:HF
6517	    (match_operand:V8HF 3 "register_operand" "x")
6518	    (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6519	 (match_operand:V2SF 1 "register_operand" "0")))]
6520  "TARGET_F16FML"
6521  "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6522  [(set_attr "type" "neon_fp_mul_s")]
6523)
6524
6525(define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
6526  [(set (match_operand:V4SF 0 "register_operand" "")
6527	(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6528		      (match_operand:V8HF 2 "register_operand" "")
6529		      (match_operand:V4HF 3 "register_operand" "")
6530		      (match_operand:SI 4 "aarch64_imm2" "")]
6531	 VFMLA16_LOW))]
6532  "TARGET_F16FML"
6533{
6534    rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6535    rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6536
6537    emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
6538							     operands[1],
6539							     operands[2],
6540							     operands[3],
6541							     p1, lane));
6542    DONE;
6543})
6544
6545(define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
6546  [(set (match_operand:V4SF 0 "register_operand" "")
6547	(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6548		      (match_operand:V8HF 2 "register_operand" "")
6549		      (match_operand:V4HF 3 "register_operand" "")
6550		      (match_operand:SI 4 "aarch64_imm2" "")]
6551	 VFMLA16_HIGH))]
6552  "TARGET_F16FML"
6553{
6554    rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6555    rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6556
6557    emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
6558							      operands[1],
6559							      operands[2],
6560							      operands[3],
6561							      p1, lane));
6562    DONE;
6563})
6564
6565(define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
6566  [(set (match_operand:V4SF 0 "register_operand" "=w")
6567	(fma:V4SF
6568	 (float_extend:V4SF
6569	  (vec_select:V4HF
6570	   (match_operand:V8HF 2 "register_operand" "w")
6571	   (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6572	 (float_extend:V4SF
6573	  (vec_duplicate:V4HF
6574	   (vec_select:HF
6575	    (match_operand:V4HF 3 "register_operand" "x")
6576	    (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6577	 (match_operand:V4SF 1 "register_operand" "0")))]
6578  "TARGET_F16FML"
6579  "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6580  [(set_attr "type" "neon_fp_mul_s")]
6581)
6582
6583(define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
6584  [(set (match_operand:V4SF 0 "register_operand" "=w")
6585	(fma:V4SF
6586	 (float_extend:V4SF
6587	  (neg:V4HF
6588	   (vec_select:V4HF
6589	    (match_operand:V8HF 2 "register_operand" "w")
6590	    (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6591	 (float_extend:V4SF
6592	  (vec_duplicate:V4HF
6593	   (vec_select:HF
6594	    (match_operand:V4HF 3 "register_operand" "x")
6595	    (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6596	 (match_operand:V4SF 1 "register_operand" "0")))]
6597  "TARGET_F16FML"
6598  "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6599  [(set_attr "type" "neon_fp_mul_s")]
6600)
6601
6602(define_insn "aarch64_simd_fmlalq_lane_highv4sf"
6603  [(set (match_operand:V4SF 0 "register_operand" "=w")
6604	(fma:V4SF
6605	 (float_extend:V4SF
6606	  (vec_select:V4HF
6607	   (match_operand:V8HF 2 "register_operand" "w")
6608	   (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6609	 (float_extend:V4SF
6610	  (vec_duplicate:V4HF
6611	   (vec_select:HF
6612	    (match_operand:V4HF 3 "register_operand" "x")
6613	    (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6614	 (match_operand:V4SF 1 "register_operand" "0")))]
6615  "TARGET_F16FML"
6616  "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6617  [(set_attr "type" "neon_fp_mul_s")]
6618)
6619
6620(define_insn "aarch64_simd_fmlslq_lane_highv4sf"
6621  [(set (match_operand:V4SF 0 "register_operand" "=w")
6622	(fma:V4SF
6623	 (float_extend:V4SF
6624	  (neg:V4HF
6625	   (vec_select:V4HF
6626	    (match_operand:V8HF 2 "register_operand" "w")
6627	    (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6628	 (float_extend:V4SF
6629	  (vec_duplicate:V4HF
6630	   (vec_select:HF
6631	    (match_operand:V4HF 3 "register_operand" "x")
6632	    (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6633	 (match_operand:V4SF 1 "register_operand" "0")))]
6634  "TARGET_F16FML"
6635  "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6636  [(set_attr "type" "neon_fp_mul_s")]
6637)
6638
6639;; pmull
6640
6641(define_insn "aarch64_crypto_pmulldi"
6642  [(set (match_operand:TI 0 "register_operand" "=w")
6643        (unspec:TI  [(match_operand:DI 1 "register_operand" "w")
6644		     (match_operand:DI 2 "register_operand" "w")]
6645		    UNSPEC_PMULL))]
6646 "TARGET_SIMD && TARGET_AES"
6647 "pmull\\t%0.1q, %1.1d, %2.1d"
6648  [(set_attr "type" "crypto_pmull")]
6649)
6650
6651(define_insn "aarch64_crypto_pmullv2di"
6652 [(set (match_operand:TI 0 "register_operand" "=w")
6653       (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
6654		   (match_operand:V2DI 2 "register_operand" "w")]
6655		  UNSPEC_PMULL2))]
6656  "TARGET_SIMD && TARGET_AES"
6657  "pmull2\\t%0.1q, %1.2d, %2.2d"
6658  [(set_attr "type" "crypto_pmull")]
6659)
6660