1;; Machine description for AArch64 SVE.
2;; Copyright (C) 2009-2016 Free Software Foundation, Inc.
3;; Contributed by ARM Ltd.
4;;
5;; This file is part of GCC.
6;;
7;; GCC is free software; you can redistribute it and/or modify it
8;; under the terms of the GNU General Public License as published by
9;; the Free Software Foundation; either version 3, or (at your option)
10;; any later version.
11;;
12;; GCC is distributed in the hope that it will be useful, but
13;; WITHOUT ANY WARRANTY; without even the implied warranty of
14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15;; General Public License for more details.
16;;
17;; You should have received a copy of the GNU General Public License
18;; along with GCC; see the file COPYING3.  If not see
19;; <http://www.gnu.org/licenses/>.
20
21;; Note on the handling of big-endian SVE
22;; --------------------------------------
23;;
24;; On big-endian systems, Advanced SIMD mov<mode> patterns act in the
25;; same way as movdi or movti would: the first byte of memory goes
26;; into the most significant byte of the register and the last byte
27;; of memory goes into the least significant byte of the register.
28;; This is the most natural ordering for Advanced SIMD and matches
29;; the ABI layout for 64-bit and 128-bit vector types.
30;;
31;; As a result, the order of bytes within the register is what GCC
32;; expects for a big-endian target, and subreg offsets therefore work
33;; as expected, with the first element in memory having subreg offset 0
34;; and the last element in memory having the subreg offset associated
35;; with a big-endian lowpart.  However, this ordering also means that
36;; GCC's lane numbering does not match the architecture's numbering:
37;; GCC always treats the element at the lowest address in memory
38;; (subreg offset 0) as element 0, while the architecture treats
39;; the least significant end of the register as element 0.
40;;
41;; The situation for SVE is different.  We want the layout of the
42;; SVE register to be same for mov<mode> as it is for maskload<mode>:
43;; logically, a mov<mode> load must be indistinguishable from a
44;; maskload<mode> whose mask is all true.  We therefore need the
45;; register layout to match LD1 rather than LDR.  The ABI layout of
46;; SVE types also matches LD1 byte ordering rather than LDR byte ordering.
47;;
48;; As a result, the architecture lane numbering matches GCC's lane
49;; numbering, with element 0 always being the first in memory.
50;; However:
51;;
52;; - Applying a subreg offset to a register does not give the element
53;;   that GCC expects: the first element in memory has the subreg offset
54;;   associated with a big-endian lowpart while the last element in memory
55;;   has subreg offset 0.  We handle this via TARGET_CAN_CHANGE_MODE_CLASS.
56;;
57;; - We cannot use LDR and STR for spill slots that might be accessed
58;;   via subregs, since although the elements have the order GCC expects,
59;;   the order of the bytes within the elements is different.  We instead
60;;   access spill slots via LD1 and ST1, using secondary reloads to
61;;   reserve a predicate register.
62
63
64;; SVE data moves.
65(define_expand "mov<mode>"
66  [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
67	(match_operand:SVE_ALL 1 "general_operand"))]
68  "TARGET_SVE"
69  {
70    /* Use the predicated load and store patterns where possible.
71       This is required for big-endian targets (see the comment at the
72       head of the file) and increases the addressing choices for
73       little-endian.  */
74    if ((MEM_P (operands[0]) || MEM_P (operands[1]))
75        && can_create_pseudo_p ())
76      {
77	aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
78	DONE;
79      }
80
81    if (CONSTANT_P (operands[1]))
82      {
83	aarch64_expand_mov_immediate (operands[0], operands[1],
84				      gen_vec_duplicate<mode>);
85	DONE;
86      }
87
88    /* Optimize subregs on big-endian targets: we can use REV[BHW]
89       instead of going through memory.  */
90    if (BYTES_BIG_ENDIAN
91        && aarch64_maybe_expand_sve_subreg_move (operands[0], operands[1]))
92      DONE;
93  }
94)
95
96;; A pattern for optimizing SUBREGs that have a reinterpreting effect
97;; on big-endian targets; see aarch64_maybe_expand_sve_subreg_move
98;; for details.  We use a special predicate for operand 2 to reduce
99;; the number of patterns.
100(define_insn_and_split "*aarch64_sve_mov<mode>_subreg_be"
101  [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w")
102	(unspec:SVE_ALL
103          [(match_operand:VNx16BI 1 "register_operand" "Upl")
104	   (match_operand 2 "aarch64_any_register_operand" "w")]
105	  UNSPEC_REV_SUBREG))]
106  "TARGET_SVE && BYTES_BIG_ENDIAN"
107  "#"
108  "&& reload_completed"
109  [(const_int 0)]
110  {
111    aarch64_split_sve_subreg_move (operands[0], operands[1], operands[2]);
112    DONE;
113  }
114)
115
116;; Unpredicated moves (little-endian).  Only allow memory operations
117;; during and after RA; before RA we want the predicated load and
118;; store patterns to be used instead.
119(define_insn "*aarch64_sve_mov<mode>_le"
120  [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
121	(match_operand:SVE_ALL 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
122  "TARGET_SVE
123   && !BYTES_BIG_ENDIAN
124   && ((lra_in_progress || reload_completed)
125       || (register_operand (operands[0], <MODE>mode)
126	   && nonmemory_operand (operands[1], <MODE>mode)))"
127  "@
128   ldr\t%0, %1
129   str\t%1, %0
130   mov\t%0.d, %1.d
131   * return aarch64_output_sve_mov_immediate (operands[1]);"
132)
133
134;; Unpredicated moves (big-endian).  Memory accesses require secondary
135;; reloads.
136(define_insn "*aarch64_sve_mov<mode>_be"
137  [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
138	(match_operand:SVE_ALL 1 "aarch64_nonmemory_operand" "w, Dn"))]
139  "TARGET_SVE && BYTES_BIG_ENDIAN"
140  "@
141   mov\t%0.d, %1.d
142   * return aarch64_output_sve_mov_immediate (operands[1]);"
143)
144
145;; Handle big-endian memory reloads.  We use byte PTRUE for all modes
146;; to try to encourage reuse.
147(define_expand "aarch64_sve_reload_be"
148  [(parallel
149     [(set (match_operand 0)
150           (match_operand 1))
151      (clobber (match_operand:VNx16BI 2 "register_operand" "=Upl"))])]
152  "TARGET_SVE && BYTES_BIG_ENDIAN"
153  {
154    /* Create a PTRUE.  */
155    emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
156
157    /* Refer to the PTRUE in the appropriate mode for this move.  */
158    machine_mode mode = GET_MODE (operands[0]);
159    machine_mode pred_mode
160      = aarch64_sve_pred_mode (GET_MODE_UNIT_SIZE (mode)).require ();
161    rtx pred = gen_lowpart (pred_mode, operands[2]);
162
163    /* Emit a predicated load or store.  */
164    aarch64_emit_sve_pred_move (operands[0], pred, operands[1]);
165    DONE;
166  }
167)
168
169;; A predicated load or store for which the predicate is known to be
170;; all-true.  Note that this pattern is generated directly by
171;; aarch64_emit_sve_pred_move, so changes to this pattern will
172;; need changes there as well.
173(define_insn "*pred_mov<mode>"
174  [(set (match_operand:SVE_ALL 0 "nonimmediate_operand" "=w, m")
175	(unspec:SVE_ALL
176	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
177	   (match_operand:SVE_ALL 2 "nonimmediate_operand" "m, w")]
178	  UNSPEC_MERGE_PTRUE))]
179  "TARGET_SVE
180   && (register_operand (operands[0], <MODE>mode)
181       || register_operand (operands[2], <MODE>mode))"
182  "@
183   ld1<Vesize>\t%0.<Vetype>, %1/z, %2
184   st1<Vesize>\t%2.<Vetype>, %1, %0"
185)
186
187(define_expand "movmisalign<mode>"
188  [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
189	(match_operand:SVE_ALL 1 "general_operand"))]
190  "TARGET_SVE"
191  {
192    /* Equivalent to a normal move for our purpooses.  */
193    emit_move_insn (operands[0], operands[1]);
194    DONE;
195  }
196)
197
198(define_insn "maskload<mode><vpred>"
199  [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
200	(unspec:SVE_ALL
201	  [(match_operand:<VPRED> 2 "register_operand" "Upl")
202	   (match_operand:SVE_ALL 1 "memory_operand" "m")]
203	  UNSPEC_LD1_SVE))]
204  "TARGET_SVE"
205  "ld1<Vesize>\t%0.<Vetype>, %2/z, %1"
206)
207
208(define_insn "maskstore<mode><vpred>"
209  [(set (match_operand:SVE_ALL 0 "memory_operand" "+m")
210	(unspec:SVE_ALL [(match_operand:<VPRED> 2 "register_operand" "Upl")
211			 (match_operand:SVE_ALL 1 "register_operand" "w")
212			 (match_dup 0)]
213			UNSPEC_ST1_SVE))]
214  "TARGET_SVE"
215  "st1<Vesize>\t%1.<Vetype>, %2, %0"
216)
217
218;; Unpredicated gather loads.
219(define_expand "gather_load<mode>"
220  [(set (match_operand:SVE_SD 0 "register_operand")
221	(unspec:SVE_SD
222	  [(match_dup 5)
223	   (match_operand:DI 1 "aarch64_reg_or_zero")
224	   (match_operand:<V_INT_EQUIV> 2 "register_operand")
225	   (match_operand:DI 3 "const_int_operand")
226	   (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
227	   (mem:BLK (scratch))]
228	  UNSPEC_LD1_GATHER))]
229  "TARGET_SVE"
230  {
231    operands[5] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
232  }
233)
234
235;; Predicated gather loads for 32-bit elements.  Operand 3 is true for
236;; unsigned extension and false for signed extension.
237(define_insn "mask_gather_load<mode>"
238  [(set (match_operand:SVE_S 0 "register_operand" "=w, w, w, w, w")
239	(unspec:SVE_S
240	  [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl, Upl, Upl")
241	   (match_operand:DI 1 "aarch64_reg_or_zero" "Z, rk, rk, rk, rk")
242	   (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w, w, w, w")
243	   (match_operand:DI 3 "const_int_operand" "i, Z, Ui1, Z, Ui1")
244	   (match_operand:DI 4 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, i, i")
245	   (mem:BLK (scratch))]
246	  UNSPEC_LD1_GATHER))]
247  "TARGET_SVE"
248  "@
249   ld1w\t%0.s, %5/z, [%2.s]
250   ld1w\t%0.s, %5/z, [%1, %2.s, sxtw]
251   ld1w\t%0.s, %5/z, [%1, %2.s, uxtw]
252   ld1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
253   ld1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
254)
255
256;; Predicated gather loads for 64-bit elements.  The value of operand 3
257;; doesn't matter in this case.
258(define_insn "mask_gather_load<mode>"
259  [(set (match_operand:SVE_D 0 "register_operand" "=w, w, w")
260	(unspec:SVE_D
261	  [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl")
262	   (match_operand:DI 1 "aarch64_reg_or_zero" "Z, rk, rk")
263	   (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w, w")
264	   (match_operand:DI 3 "const_int_operand")
265	   (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, Ui1, i")
266	   (mem:BLK (scratch))]
267	  UNSPEC_LD1_GATHER))]
268  "TARGET_SVE"
269  "@
270   ld1d\t%0.d, %5/z, [%2.d]
271   ld1d\t%0.d, %5/z, [%1, %2.d]
272   ld1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
273)
274
275;; Unpredicated scatter store.
276(define_expand "scatter_store<mode>"
277  [(set (mem:BLK (scratch))
278	(unspec:BLK
279	  [(match_dup 5)
280	   (match_operand:DI 0 "aarch64_reg_or_zero")
281	   (match_operand:<V_INT_EQUIV> 1 "register_operand")
282	   (match_operand:DI 2 "const_int_operand")
283	   (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
284	   (match_operand:SVE_SD 4 "register_operand")]
285	  UNSPEC_ST1_SCATTER))]
286  "TARGET_SVE"
287  {
288    operands[5] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
289  }
290)
291
292;; Predicated scatter stores for 32-bit elements.  Operand 2 is true for
293;; unsigned extension and false for signed extension.
294(define_insn "mask_scatter_store<mode>"
295  [(set (mem:BLK (scratch))
296	(unspec:BLK
297	  [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl, Upl, Upl")
298	   (match_operand:DI 0 "aarch64_reg_or_zero" "Z, rk, rk, rk, rk")
299	   (match_operand:<V_INT_EQUIV> 1 "register_operand" "w, w, w, w, w")
300	   (match_operand:DI 2 "const_int_operand" "i, Z, Ui1, Z, Ui1")
301	   (match_operand:DI 3 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, i, i")
302	   (match_operand:SVE_S 4 "register_operand" "w, w, w, w, w")]
303	  UNSPEC_ST1_SCATTER))]
304  "TARGET_SVE"
305  "@
306   st1w\t%4.s, %5, [%1.s]
307   st1w\t%4.s, %5, [%0, %1.s, sxtw]
308   st1w\t%4.s, %5, [%0, %1.s, uxtw]
309   st1w\t%4.s, %5, [%0, %1.s, sxtw %p3]
310   st1w\t%4.s, %5, [%0, %1.s, uxtw %p3]"
311)
312
313;; Predicated scatter stores for 64-bit elements.  The value of operand 2
314;; doesn't matter in this case.
315(define_insn "mask_scatter_store<mode>"
316  [(set (mem:BLK (scratch))
317	(unspec:BLK
318	  [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl")
319	   (match_operand:DI 0 "aarch64_reg_or_zero" "Z, rk, rk")
320	   (match_operand:<V_INT_EQUIV> 1 "register_operand" "w, w, w")
321	   (match_operand:DI 2 "const_int_operand")
322	   (match_operand:DI 3 "aarch64_gather_scale_operand_d" "Ui1, Ui1, i")
323	   (match_operand:SVE_D 4 "register_operand" "w, w, w")]
324	  UNSPEC_ST1_SCATTER))]
325  "TARGET_SVE"
326  "@
327   st1d\t%4.d, %5, [%1.d]
328   st1d\t%4.d, %5, [%0, %1.d]
329   st1d\t%4.d, %5, [%0, %1.d, lsl %p3]"
330)
331
332;; SVE structure moves.
333(define_expand "mov<mode>"
334  [(set (match_operand:SVE_STRUCT 0 "nonimmediate_operand")
335	(match_operand:SVE_STRUCT 1 "general_operand"))]
336  "TARGET_SVE"
337  {
338    /* Big-endian loads and stores need to be done via LD1 and ST1;
339       see the comment at the head of the file for details.  */
340    if ((MEM_P (operands[0]) || MEM_P (operands[1]))
341	&& BYTES_BIG_ENDIAN)
342      {
343	gcc_assert (can_create_pseudo_p ());
344	aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
345	DONE;
346      }
347
348    if (CONSTANT_P (operands[1]))
349      {
350	aarch64_expand_mov_immediate (operands[0], operands[1]);
351	DONE;
352      }
353  }
354)
355
356;; Unpredicated structure moves (little-endian).
357(define_insn "*aarch64_sve_mov<mode>_le"
358  [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
359	(match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
360  "TARGET_SVE && !BYTES_BIG_ENDIAN"
361  "#"
362  [(set_attr "length" "<insn_length>")]
363)
364
365;; Unpredicated structure moves (big-endian).  Memory accesses require
366;; secondary reloads.
367(define_insn "*aarch64_sve_mov<mode>_le"
368  [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w, w")
369	(match_operand:SVE_STRUCT 1 "aarch64_nonmemory_operand" "w, Dn"))]
370  "TARGET_SVE && BYTES_BIG_ENDIAN"
371  "#"
372  [(set_attr "length" "<insn_length>")]
373)
374
375;; Split unpredicated structure moves into pieces.  This is the same
376;; for both big-endian and little-endian code, although it only needs
377;; to handle memory operands for little-endian code.
378(define_split
379  [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand")
380	(match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand"))]
381  "TARGET_SVE && reload_completed"
382  [(const_int 0)]
383  {
384    rtx dest = operands[0];
385    rtx src = operands[1];
386    if (REG_P (dest) && REG_P (src))
387      aarch64_simd_emit_reg_reg_move (operands, <VSINGLE>mode, <vector_count>);
388    else
389      for (unsigned int i = 0; i < <vector_count>; ++i)
390	{
391	  rtx subdest = simplify_gen_subreg (<VSINGLE>mode, dest, <MODE>mode,
392					     i * BYTES_PER_SVE_VECTOR);
393	  rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, src, <MODE>mode,
394					    i * BYTES_PER_SVE_VECTOR);
395	  emit_insn (gen_rtx_SET (subdest, subsrc));
396	}
397    DONE;
398  }
399)
400
401;; Predicated structure moves.  This works for both endiannesses but in
402;; practice is only useful for big-endian.
403(define_insn_and_split "pred_mov<mode>"
404  [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_struct_nonimmediate_operand" "=w, Utx")
405	(unspec:SVE_STRUCT
406	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
407	   (match_operand:SVE_STRUCT 2 "aarch64_sve_struct_nonimmediate_operand" "Utx, w")]
408	  UNSPEC_MERGE_PTRUE))]
409  "TARGET_SVE
410   && (register_operand (operands[0], <MODE>mode)
411       || register_operand (operands[2], <MODE>mode))"
412  "#"
413  "&& reload_completed"
414  [(const_int 0)]
415  {
416    for (unsigned int i = 0; i < <vector_count>; ++i)
417      {
418	rtx subdest = simplify_gen_subreg (<VSINGLE>mode, operands[0],
419					   <MODE>mode,
420					   i * BYTES_PER_SVE_VECTOR);
421	rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, operands[2],
422					  <MODE>mode,
423					  i * BYTES_PER_SVE_VECTOR);
424	aarch64_emit_sve_pred_move (subdest, operands[1], subsrc);
425      }
426    DONE;
427  }
428  [(set_attr "length" "<insn_length>")]
429)
430
431(define_expand "mov<mode>"
432  [(set (match_operand:PRED_ALL 0 "nonimmediate_operand")
433	(match_operand:PRED_ALL 1 "general_operand"))]
434  "TARGET_SVE"
435  {
436    if (GET_CODE (operands[0]) == MEM)
437      operands[1] = force_reg (<MODE>mode, operands[1]);
438  }
439)
440
441(define_insn "*aarch64_sve_mov<mode>"
442  [(set (match_operand:PRED_ALL 0 "nonimmediate_operand" "=Upa, m, Upa, Upa, Upa")
443	(match_operand:PRED_ALL 1 "general_operand" "Upa, Upa, m, Dz, Dm"))]
444  "TARGET_SVE
445   && (register_operand (operands[0], <MODE>mode)
446       || register_operand (operands[1], <MODE>mode))"
447  "@
448   mov\t%0.b, %1.b
449   str\t%1, %0
450   ldr\t%0, %1
451   pfalse\t%0.b
452   * return aarch64_output_ptrue (<MODE>mode, '<Vetype>');"
453)
454
455;; Handle extractions from a predicate by converting to an integer vector
456;; and extracting from there.
457(define_expand "vec_extract<vpred><Vel>"
458  [(match_operand:<VEL> 0 "register_operand")
459   (match_operand:<VPRED> 1 "register_operand")
460   (match_operand:SI 2 "nonmemory_operand")
461   ;; Dummy operand to which we can attach the iterator.
462   (reg:SVE_I V0_REGNUM)]
463  "TARGET_SVE"
464  {
465    rtx tmp = gen_reg_rtx (<MODE>mode);
466    emit_insn (gen_aarch64_sve_dup<mode>_const (tmp, operands[1],
467						CONST1_RTX (<MODE>mode),
468						CONST0_RTX (<MODE>mode)));
469    emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2]));
470    DONE;
471  }
472)
473
474(define_expand "vec_extract<mode><Vel>"
475  [(set (match_operand:<VEL> 0 "register_operand")
476	(vec_select:<VEL>
477	  (match_operand:SVE_ALL 1 "register_operand")
478	  (parallel [(match_operand:SI 2 "nonmemory_operand")])))]
479  "TARGET_SVE"
480  {
481    poly_int64 val;
482    if (poly_int_rtx_p (operands[2], &val)
483	&& known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1))
484      {
485	/* The last element can be extracted with a LASTB and a false
486	   predicate.  */
487	rtx sel = force_reg (<VPRED>mode, CONST0_RTX (<VPRED>mode));
488	emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
489	DONE;
490      }
491    if (!CONST_INT_P (operands[2]))
492      {
493	/* Create an index with operand[2] as the base and -1 as the step.
494	   It will then be zero for the element we care about.  */
495	rtx index = gen_lowpart (<VEL_INT>mode, operands[2]);
496	index = force_reg (<VEL_INT>mode, index);
497	rtx series = gen_reg_rtx (<V_INT_EQUIV>mode);
498	emit_insn (gen_vec_series<v_int_equiv> (series, index, constm1_rtx));
499
500	/* Get a predicate that is true for only that element.  */
501	rtx zero = CONST0_RTX (<V_INT_EQUIV>mode);
502	rtx cmp = gen_rtx_EQ (<V_INT_EQUIV>mode, series, zero);
503	rtx sel = gen_reg_rtx (<VPRED>mode);
504	emit_insn (gen_vec_cmp<v_int_equiv><vpred> (sel, cmp, series, zero));
505
506	/* Select the element using LASTB.  */
507	emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
508	DONE;
509      }
510  }
511)
512
513;; Extract element zero.  This is a special case because we want to force
514;; the registers to be the same for the second alternative, and then
515;; split the instruction into nothing after RA.
516(define_insn_and_split "*vec_extract<mode><Vel>_0"
517  [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
518	(vec_select:<VEL>
519	  (match_operand:SVE_ALL 1 "register_operand" "w, 0, w")
520	  (parallel [(const_int 0)])))]
521  "TARGET_SVE"
522  {
523    operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
524    switch (which_alternative)
525      {
526	case 0:
527	  return "umov\\t%<vwcore>0, %1.<Vetype>[0]";
528	case 1:
529	  return "#";
530	case 2:
531	  return "st1\\t{%1.<Vetype>}[0], %0";
532	default:
533	  gcc_unreachable ();
534      }
535  }
536  "&& reload_completed
537   && REG_P (operands[0])
538   && REGNO (operands[0]) == REGNO (operands[1])"
539  [(const_int 0)]
540  {
541    emit_note (NOTE_INSN_DELETED);
542    DONE;
543  }
544  [(set_attr "type" "neon_to_gp_q, untyped, neon_store1_one_lane_q")]
545)
546
547;; Extract an element from the Advanced SIMD portion of the register.
548;; We don't just reuse the aarch64-simd.md pattern because we don't
549;; want any change in lane number on big-endian targets.
550(define_insn "*vec_extract<mode><Vel>_v128"
551  [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
552	(vec_select:<VEL>
553	  (match_operand:SVE_ALL 1 "register_operand" "w, w, w")
554	  (parallel [(match_operand:SI 2 "const_int_operand")])))]
555  "TARGET_SVE
556   && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 1, 15)"
557  {
558    operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
559    switch (which_alternative)
560      {
561	case 0:
562	  return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
563	case 1:
564	  return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
565	case 2:
566	  return "st1\\t{%1.<Vetype>}[%2], %0";
567	default:
568	  gcc_unreachable ();
569      }
570  }
571  [(set_attr "type" "neon_to_gp_q, neon_dup_q, neon_store1_one_lane_q")]
572)
573
574;; Extract an element in the range of DUP.  This pattern allows the
575;; source and destination to be different.
576(define_insn "*vec_extract<mode><Vel>_dup"
577  [(set (match_operand:<VEL> 0 "register_operand" "=w")
578	(vec_select:<VEL>
579	  (match_operand:SVE_ALL 1 "register_operand" "w")
580	  (parallel [(match_operand:SI 2 "const_int_operand")])))]
581  "TARGET_SVE
582   && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 16, 63)"
583  {
584    operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
585    return "dup\t%0.<Vetype>, %1.<Vetype>[%2]";
586  }
587)
588
589;; Extract an element outside the range of DUP.  This pattern requires the
590;; source and destination to be the same.
591(define_insn "*vec_extract<mode><Vel>_ext"
592  [(set (match_operand:<VEL> 0 "register_operand" "=w")
593	(vec_select:<VEL>
594	  (match_operand:SVE_ALL 1 "register_operand" "0")
595	  (parallel [(match_operand:SI 2 "const_int_operand")])))]
596  "TARGET_SVE && INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode) >= 64"
597  {
598    operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
599    operands[2] = GEN_INT (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode));
600    return "ext\t%0.b, %0.b, %0.b, #%2";
601  }
602)
603
604;; Extract the last active element of operand 1 into operand 0.
605;; If no elements are active, extract the last inactive element instead.
606(define_insn "extract_last_<mode>"
607  [(set (match_operand:<VEL> 0 "register_operand" "=r, w")
608	(unspec:<VEL>
609	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
610	   (match_operand:SVE_ALL 2 "register_operand" "w, w")]
611	  UNSPEC_LASTB))]
612  "TARGET_SVE"
613  "@
614   lastb\t%<vwcore>0, %1, %2.<Vetype>
615   lastb\t%<Vetype>0, %1, %2.<Vetype>"
616)
617
618(define_expand "vec_duplicate<mode>"
619  [(parallel
620    [(set (match_operand:SVE_ALL 0 "register_operand")
621	  (vec_duplicate:SVE_ALL
622	    (match_operand:<VEL> 1 "aarch64_sve_dup_operand")))
623     (clobber (scratch:<VPRED>))])]
624  "TARGET_SVE"
625  {
626    if (MEM_P (operands[1]))
627      {
628	rtx ptrue = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
629	emit_insn (gen_sve_ld1r<mode> (operands[0], ptrue, operands[1],
630				       CONST0_RTX (<MODE>mode)));
631	DONE;
632      }
633  }
634)
635
636;; Accept memory operands for the benefit of combine, and also in case
637;; the scalar input gets spilled to memory during RA.  We want to split
638;; the load at the first opportunity in order to allow the PTRUE to be
639;; optimized with surrounding code.
640(define_insn_and_split "*vec_duplicate<mode>_reg"
641  [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w")
642	(vec_duplicate:SVE_ALL
643	  (match_operand:<VEL> 1 "aarch64_sve_dup_operand" "r, w, Uty")))
644   (clobber (match_scratch:<VPRED> 2 "=X, X, Upl"))]
645  "TARGET_SVE"
646  "@
647   mov\t%0.<Vetype>, %<vwcore>1
648   mov\t%0.<Vetype>, %<Vetype>1
649   #"
650  "&& MEM_P (operands[1])"
651  [(const_int 0)]
652  {
653    if (GET_CODE (operands[2]) == SCRATCH)
654      operands[2] = gen_reg_rtx (<VPRED>mode);
655    emit_move_insn (operands[2], CONSTM1_RTX (<VPRED>mode));
656    emit_insn (gen_sve_ld1r<mode> (operands[0], operands[2], operands[1],
657				   CONST0_RTX (<MODE>mode)));
658    DONE;
659  }
660  [(set_attr "length" "4,4,8")]
661)
662
663;; This is used for vec_duplicate<mode>s from memory, but can also
664;; be used by combine to optimize selects of a a vec_duplicate<mode>
665;; with zero.
666(define_insn "sve_ld1r<mode>"
667  [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
668	(unspec:SVE_ALL
669	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
670	   (vec_duplicate:SVE_ALL
671	     (match_operand:<VEL> 2 "aarch64_sve_ld1r_operand" "Uty"))
672	   (match_operand:SVE_ALL 3 "aarch64_simd_imm_zero")]
673	  UNSPEC_SEL))]
674  "TARGET_SVE"
675  "ld1r<Vesize>\t%0.<Vetype>, %1/z, %2"
676)
677
678;; Load 128 bits from memory and duplicate to fill a vector.  Since there
679;; are so few operations on 128-bit "elements", we don't define a VNx1TI
680;; and simply use vectors of bytes instead.
681(define_insn "*sve_ld1rq<Vesize>"
682  [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
683	(unspec:SVE_ALL
684	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
685	   (match_operand:TI 2 "aarch64_sve_ld1r_operand" "Uty")]
686	  UNSPEC_LD1RQ))]
687  "TARGET_SVE"
688  "ld1rq<Vesize>\t%0.<Vetype>, %1/z, %2"
689)
690
691;; Implement a predicate broadcast by shifting the low bit of the scalar
692;; input into the top bit and using a WHILELO.  An alternative would be to
693;; duplicate the input and do a compare with zero.
694(define_expand "vec_duplicate<mode>"
695  [(set (match_operand:PRED_ALL 0 "register_operand")
696	(vec_duplicate:PRED_ALL (match_operand 1 "register_operand")))]
697  "TARGET_SVE"
698  {
699    rtx tmp = gen_reg_rtx (DImode);
700    rtx op1 = gen_lowpart (DImode, operands[1]);
701    emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode)));
702    emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp));
703    DONE;
704  }
705)
706
707(define_insn "vec_series<mode>"
708  [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w")
709	(vec_series:SVE_I
710	  (match_operand:<VEL> 1 "aarch64_sve_index_operand" "Usi, r, r")
711	  (match_operand:<VEL> 2 "aarch64_sve_index_operand" "r, Usi, r")))]
712  "TARGET_SVE"
713  "@
714   index\t%0.<Vetype>, #%1, %<vw>2
715   index\t%0.<Vetype>, %<vw>1, #%2
716   index\t%0.<Vetype>, %<vw>1, %<vw>2"
717)
718
719;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range
720;; of an INDEX instruction.
721(define_insn "*vec_series<mode>_plus"
722  [(set (match_operand:SVE_I 0 "register_operand" "=w")
723	(plus:SVE_I
724	  (vec_duplicate:SVE_I
725	    (match_operand:<VEL> 1 "register_operand" "r"))
726	  (match_operand:SVE_I 2 "immediate_operand")))]
727  "TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])"
728  {
729    operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]);
730    return "index\t%0.<Vetype>, %<vw>1, #%2";
731  }
732)
733
734;; Unpredicated LD[234].
735(define_expand "vec_load_lanes<mode><vsingle>"
736  [(set (match_operand:SVE_STRUCT 0 "register_operand")
737	(unspec:SVE_STRUCT
738	  [(match_dup 2)
739	   (match_operand:SVE_STRUCT 1 "memory_operand")]
740	  UNSPEC_LDN))]
741  "TARGET_SVE"
742  {
743    operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
744  }
745)
746
747;; Predicated LD[234].
748(define_insn "vec_mask_load_lanes<mode><vsingle>"
749  [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w")
750	(unspec:SVE_STRUCT
751	  [(match_operand:<VPRED> 2 "register_operand" "Upl")
752	   (match_operand:SVE_STRUCT 1 "memory_operand" "m")]
753	  UNSPEC_LDN))]
754  "TARGET_SVE"
755  "ld<vector_count><Vesize>\t%0, %2/z, %1"
756)
757
758;; Unpredicated ST[234].  This is always a full update, so the dependence
759;; on the old value of the memory location (via (match_dup 0)) is redundant.
760;; There doesn't seem to be any obvious benefit to treating the all-true
761;; case differently though.  In particular, it's very unlikely that we'll
762;; only find out during RTL that a store_lanes is dead.
763(define_expand "vec_store_lanes<mode><vsingle>"
764  [(set (match_operand:SVE_STRUCT 0 "memory_operand")
765	(unspec:SVE_STRUCT
766	  [(match_dup 2)
767	   (match_operand:SVE_STRUCT 1 "register_operand")
768	   (match_dup 0)]
769	  UNSPEC_STN))]
770  "TARGET_SVE"
771  {
772    operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
773  }
774)
775
776;; Predicated ST[234].
777(define_insn "vec_mask_store_lanes<mode><vsingle>"
778  [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m")
779	(unspec:SVE_STRUCT
780	  [(match_operand:<VPRED> 2 "register_operand" "Upl")
781	   (match_operand:SVE_STRUCT 1 "register_operand" "w")
782	   (match_dup 0)]
783	  UNSPEC_STN))]
784  "TARGET_SVE"
785  "st<vector_count><Vesize>\t%1, %2, %0"
786)
787
788(define_expand "vec_perm<mode>"
789  [(match_operand:SVE_ALL 0 "register_operand")
790   (match_operand:SVE_ALL 1 "register_operand")
791   (match_operand:SVE_ALL 2 "register_operand")
792   (match_operand:<V_INT_EQUIV> 3 "aarch64_sve_vec_perm_operand")]
793  "TARGET_SVE && GET_MODE_NUNITS (<MODE>mode).is_constant ()"
794  {
795    aarch64_expand_sve_vec_perm (operands[0], operands[1],
796				 operands[2], operands[3]);
797    DONE;
798  }
799)
800
801(define_insn "*aarch64_sve_tbl<mode>"
802  [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
803	(unspec:SVE_ALL
804	  [(match_operand:SVE_ALL 1 "register_operand" "w")
805	   (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
806	  UNSPEC_TBL))]
807  "TARGET_SVE"
808  "tbl\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
809)
810
811(define_insn "*aarch64_sve_<perm_insn><perm_hilo><mode>"
812  [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
813	(unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")
814			  (match_operand:PRED_ALL 2 "register_operand" "Upa")]
815			 PERMUTE))]
816  "TARGET_SVE"
817  "<perm_insn><perm_hilo>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
818)
819
820(define_insn "aarch64_sve_<perm_insn><perm_hilo><mode>"
821  [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
822	(unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w")
823			 (match_operand:SVE_ALL 2 "register_operand" "w")]
824			PERMUTE))]
825  "TARGET_SVE"
826  "<perm_insn><perm_hilo>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
827)
828
829(define_insn "*aarch64_sve_rev64<mode>"
830  [(set (match_operand:SVE_BHS 0 "register_operand" "=w")
831	(unspec:SVE_BHS
832	  [(match_operand:VNx2BI 1 "register_operand" "Upl")
833	   (unspec:SVE_BHS [(match_operand:SVE_BHS 2 "register_operand" "w")]
834			   UNSPEC_REV64)]
835	  UNSPEC_MERGE_PTRUE))]
836  "TARGET_SVE"
837  "rev<Vesize>\t%0.d, %1/m, %2.d"
838)
839
840(define_insn "*aarch64_sve_rev32<mode>"
841  [(set (match_operand:SVE_BH 0 "register_operand" "=w")
842	(unspec:SVE_BH
843	  [(match_operand:VNx4BI 1 "register_operand" "Upl")
844	   (unspec:SVE_BH [(match_operand:SVE_BH 2 "register_operand" "w")]
845			  UNSPEC_REV32)]
846	  UNSPEC_MERGE_PTRUE))]
847  "TARGET_SVE"
848  "rev<Vesize>\t%0.s, %1/m, %2.s"
849)
850
851(define_insn "*aarch64_sve_rev16vnx16qi"
852  [(set (match_operand:VNx16QI 0 "register_operand" "=w")
853	(unspec:VNx16QI
854	  [(match_operand:VNx8BI 1 "register_operand" "Upl")
855	   (unspec:VNx16QI [(match_operand:VNx16QI 2 "register_operand" "w")]
856			   UNSPEC_REV16)]
857	  UNSPEC_MERGE_PTRUE))]
858  "TARGET_SVE"
859  "revb\t%0.h, %1/m, %2.h"
860)
861
862(define_insn "*aarch64_sve_rev<mode>"
863  [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
864	(unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w")]
865			UNSPEC_REV))]
866  "TARGET_SVE"
867  "rev\t%0.<Vetype>, %1.<Vetype>")
868
869(define_insn "*aarch64_sve_dup_lane<mode>"
870  [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
871	(vec_duplicate:SVE_ALL
872	  (vec_select:<VEL>
873	    (match_operand:SVE_ALL 1 "register_operand" "w")
874	    (parallel [(match_operand:SI 2 "const_int_operand")]))))]
875  "TARGET_SVE
876   && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 0, 63)"
877  "dup\t%0.<Vetype>, %1.<Vetype>[%2]"
878)
879
880;; Note that the immediate (third) operand is the lane index not
881;; the byte index.
882(define_insn "*aarch64_sve_ext<mode>"
883  [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
884	(unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "0")
885			 (match_operand:SVE_ALL 2 "register_operand" "w")
886			 (match_operand:SI 3 "const_int_operand")]
887			UNSPEC_EXT))]
888  "TARGET_SVE
889   && IN_RANGE (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode), 0, 255)"
890  {
891    operands[3] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode));
892    return "ext\\t%0.b, %0.b, %2.b, #%3";
893  }
894)
895
896(define_insn "add<mode>3"
897  [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, w")
898	(plus:SVE_I
899	  (match_operand:SVE_I 1 "register_operand" "%0, 0, 0, w")
900	  (match_operand:SVE_I 2 "aarch64_sve_add_operand" "vsa, vsn, vsi, w")))]
901  "TARGET_SVE"
902  "@
903   add\t%0.<Vetype>, %0.<Vetype>, #%D2
904   sub\t%0.<Vetype>, %0.<Vetype>, #%N2
905   * return aarch64_output_sve_inc_dec_immediate (\"%0.<Vetype>\", operands[2]);
906   add\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
907)
908
909(define_insn "sub<mode>3"
910  [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
911	(minus:SVE_I
912	  (match_operand:SVE_I 1 "aarch64_sve_arith_operand" "w, vsa")
913	  (match_operand:SVE_I 2 "register_operand" "w, 0")))]
914  "TARGET_SVE"
915  "@
916   sub\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>
917   subr\t%0.<Vetype>, %0.<Vetype>, #%D1"
918)
919
920;; Unpredicated multiplication.
921(define_expand "mul<mode>3"
922  [(set (match_operand:SVE_I 0 "register_operand")
923	(unspec:SVE_I
924	  [(match_dup 3)
925	   (mult:SVE_I
926	     (match_operand:SVE_I 1 "register_operand")
927	     (match_operand:SVE_I 2 "aarch64_sve_mul_operand"))]
928	  UNSPEC_MERGE_PTRUE))]
929  "TARGET_SVE"
930  {
931    operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
932  }
933)
934
935;; Multiplication predicated with a PTRUE.  We don't actually need the
936;; predicate for the first alternative, but using Upa or X isn't likely
937;; to gain much and would make the instruction seem less uniform to the
938;; register allocator.
939(define_insn "*mul<mode>3"
940  [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
941	(unspec:SVE_I
942	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
943	   (mult:SVE_I
944	     (match_operand:SVE_I 2 "register_operand" "%0, 0")
945	     (match_operand:SVE_I 3 "aarch64_sve_mul_operand" "vsm, w"))]
946	  UNSPEC_MERGE_PTRUE))]
947  "TARGET_SVE"
948  "@
949   mul\t%0.<Vetype>, %0.<Vetype>, #%3
950   mul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
951)
952
953(define_insn "*madd<mode>"
954  [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
955	(plus:SVE_I
956	  (unspec:SVE_I
957	    [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
958	     (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w")
959			 (match_operand:SVE_I 3 "register_operand" "w, w"))]
960	    UNSPEC_MERGE_PTRUE)
961	  (match_operand:SVE_I 4 "register_operand" "w, 0")))]
962  "TARGET_SVE"
963  "@
964   mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
965   mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
966)
967
968(define_insn "*msub<mode>3"
969  [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
970	(minus:SVE_I
971	  (match_operand:SVE_I 4 "register_operand" "w, 0")
972	  (unspec:SVE_I
973	    [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
974	     (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w")
975			 (match_operand:SVE_I 3 "register_operand" "w, w"))]
976	    UNSPEC_MERGE_PTRUE)))]
977  "TARGET_SVE"
978  "@
979   msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
980   mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
981)
982
983;; Unpredicated highpart multiplication.
984(define_expand "<su>mul<mode>3_highpart"
985  [(set (match_operand:SVE_I 0 "register_operand")
986	(unspec:SVE_I
987	  [(match_dup 3)
988	   (unspec:SVE_I [(match_operand:SVE_I 1 "register_operand")
989			  (match_operand:SVE_I 2 "register_operand")]
990			 MUL_HIGHPART)]
991	  UNSPEC_MERGE_PTRUE))]
992  "TARGET_SVE"
993  {
994    operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
995  }
996)
997
998;; Predicated highpart multiplication.
999(define_insn "*<su>mul<mode>3_highpart"
1000  [(set (match_operand:SVE_I 0 "register_operand" "=w")
1001	(unspec:SVE_I
1002	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
1003	   (unspec:SVE_I [(match_operand:SVE_I 2 "register_operand" "%0")
1004			  (match_operand:SVE_I 3 "register_operand" "w")]
1005			 MUL_HIGHPART)]
1006	  UNSPEC_MERGE_PTRUE))]
1007  "TARGET_SVE"
1008  "<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1009)
1010
1011;; Unpredicated NEG, NOT and POPCOUNT.
1012(define_expand "<optab><mode>2"
1013  [(set (match_operand:SVE_I 0 "register_operand")
1014	(unspec:SVE_I
1015	  [(match_dup 2)
1016	   (SVE_INT_UNARY:SVE_I (match_operand:SVE_I 1 "register_operand"))]
1017	  UNSPEC_MERGE_PTRUE))]
1018  "TARGET_SVE"
1019  {
1020    operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1021  }
1022)
1023
1024;; NEG, NOT and POPCOUNT predicated with a PTRUE.
1025(define_insn "*<optab><mode>2"
1026  [(set (match_operand:SVE_I 0 "register_operand" "=w")
1027	(unspec:SVE_I
1028	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
1029	   (SVE_INT_UNARY:SVE_I
1030	     (match_operand:SVE_I 2 "register_operand" "w"))]
1031	  UNSPEC_MERGE_PTRUE))]
1032  "TARGET_SVE"
1033  "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
1034)
1035
1036;; Vector AND, ORR and XOR.
1037(define_insn "<optab><mode>3"
1038  [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
1039	(LOGICAL:SVE_I
1040	  (match_operand:SVE_I 1 "register_operand" "%0, w")
1041	  (match_operand:SVE_I 2 "aarch64_sve_logical_operand" "vsl, w")))]
1042  "TARGET_SVE"
1043  "@
1044   <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
1045   <logical>\t%0.d, %1.d, %2.d"
1046)
1047
1048;; Vector AND, ORR and XOR on floating-point modes.  We avoid subregs
1049;; by providing this, but we need to use UNSPECs since rtx logical ops
1050;; aren't defined for floating-point modes.
1051(define_insn "*<optab><mode>3"
1052  [(set (match_operand:SVE_F 0 "register_operand" "=w")
1053	(unspec:SVE_F [(match_operand:SVE_F 1 "register_operand" "w")
1054		       (match_operand:SVE_F 2 "register_operand" "w")]
1055		      LOGICALF))]
1056  "TARGET_SVE"
1057  "<logicalf_op>\t%0.d, %1.d, %2.d"
1058)
1059
1060;; REG_EQUAL notes on "not<mode>3" should ensure that we can generate
1061;; this pattern even though the NOT instruction itself is predicated.
1062(define_insn "bic<mode>3"
1063  [(set (match_operand:SVE_I 0 "register_operand" "=w")
1064	(and:SVE_I
1065	  (not:SVE_I (match_operand:SVE_I 1 "register_operand" "w"))
1066	  (match_operand:SVE_I 2 "register_operand" "w")))]
1067  "TARGET_SVE"
1068  "bic\t%0.d, %2.d, %1.d"
1069)
1070
1071;; Predicate AND.  We can reuse one of the inputs as the GP.
1072(define_insn "and<mode>3"
1073  [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1074	(and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand" "Upa")
1075		      (match_operand:PRED_ALL 2 "register_operand" "Upa")))]
1076  "TARGET_SVE"
1077  "and\t%0.b, %1/z, %1.b, %2.b"
1078)
1079
1080;; Unpredicated predicate ORR and XOR.
1081(define_expand "<optab><mode>3"
1082  [(set (match_operand:PRED_ALL 0 "register_operand")
1083	(and:PRED_ALL
1084	  (LOGICAL_OR:PRED_ALL
1085	    (match_operand:PRED_ALL 1 "register_operand")
1086	    (match_operand:PRED_ALL 2 "register_operand"))
1087	  (match_dup 3)))]
1088  "TARGET_SVE"
1089  {
1090    operands[3] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));
1091  }
1092)
1093
1094;; Predicated predicate ORR and XOR.
1095(define_insn "pred_<optab><mode>3"
1096  [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1097	(and:PRED_ALL
1098	  (LOGICAL:PRED_ALL
1099	    (match_operand:PRED_ALL 2 "register_operand" "Upa")
1100	    (match_operand:PRED_ALL 3 "register_operand" "Upa"))
1101	  (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
1102  "TARGET_SVE"
1103  "<logical>\t%0.b, %1/z, %2.b, %3.b"
1104)
1105
1106;; Perform a logical operation on operands 2 and 3, using operand 1 as
1107;; the GP (which is known to be a PTRUE).  Store the result in operand 0
1108;; and set the flags in the same way as for PTEST.  The (and ...) in the
1109;; UNSPEC_PTEST_PTRUE is logically redundant, but means that the tested
1110;; value is structurally equivalent to rhs of the second set.
1111(define_insn "*<optab><mode>3_cc"
1112  [(set (reg:CC CC_REGNUM)
1113	(compare:CC
1114	  (unspec:SI [(match_operand:PRED_ALL 1 "register_operand" "Upa")
1115		      (and:PRED_ALL
1116			(LOGICAL:PRED_ALL
1117			  (match_operand:PRED_ALL 2 "register_operand" "Upa")
1118			  (match_operand:PRED_ALL 3 "register_operand" "Upa"))
1119			(match_dup 1))]
1120		     UNSPEC_PTEST_PTRUE)
1121	  (const_int 0)))
1122   (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1123	(and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3))
1124		      (match_dup 1)))]
1125  "TARGET_SVE"
1126  "<logical>s\t%0.b, %1/z, %2.b, %3.b"
1127)
1128
1129;; Unpredicated predicate inverse.
1130(define_expand "one_cmpl<mode>2"
1131  [(set (match_operand:PRED_ALL 0 "register_operand")
1132	(and:PRED_ALL
1133	  (not:PRED_ALL (match_operand:PRED_ALL 1 "register_operand"))
1134	  (match_dup 2)))]
1135  "TARGET_SVE"
1136  {
1137    operands[2] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));
1138  }
1139)
1140
1141;; Predicated predicate inverse.
1142(define_insn "*one_cmpl<mode>3"
1143  [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1144	(and:PRED_ALL
1145	  (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
1146	  (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
1147  "TARGET_SVE"
1148  "not\t%0.b, %1/z, %2.b"
1149)
1150
1151;; Predicated predicate BIC and ORN.
1152(define_insn "*<nlogical><mode>3"
1153  [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1154	(and:PRED_ALL
1155	  (NLOGICAL:PRED_ALL
1156	    (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
1157	    (match_operand:PRED_ALL 3 "register_operand" "Upa"))
1158	  (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
1159  "TARGET_SVE"
1160  "<nlogical>\t%0.b, %1/z, %3.b, %2.b"
1161)
1162
1163;; Predicated predicate NAND and NOR.
1164(define_insn "*<logical_nn><mode>3"
1165  [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1166	(and:PRED_ALL
1167	  (NLOGICAL:PRED_ALL
1168	    (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
1169	    (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa")))
1170	  (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
1171  "TARGET_SVE"
1172  "<logical_nn>\t%0.b, %1/z, %2.b, %3.b"
1173)
1174
1175;; Unpredicated LSL, LSR and ASR by a vector.
1176(define_expand "v<optab><mode>3"
1177  [(set (match_operand:SVE_I 0 "register_operand")
1178	(unspec:SVE_I
1179	  [(match_dup 3)
1180	   (ASHIFT:SVE_I
1181	     (match_operand:SVE_I 1 "register_operand")
1182	     (match_operand:SVE_I 2 "aarch64_sve_<lr>shift_operand"))]
1183	  UNSPEC_MERGE_PTRUE))]
1184  "TARGET_SVE"
1185  {
1186    operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1187  }
1188)
1189
1190;; LSL, LSR and ASR by a vector, predicated with a PTRUE.  We don't
1191;; actually need the predicate for the first alternative, but using Upa
1192;; or X isn't likely to gain much and would make the instruction seem
1193;; less uniform to the register allocator.
1194(define_insn "*v<optab><mode>3"
1195  [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
1196	(unspec:SVE_I
1197	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1198	   (ASHIFT:SVE_I
1199	     (match_operand:SVE_I 2 "register_operand" "w, 0")
1200	     (match_operand:SVE_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w"))]
1201	  UNSPEC_MERGE_PTRUE))]
1202  "TARGET_SVE"
1203  "@
1204   <shift>\t%0.<Vetype>, %2.<Vetype>, #%3
1205   <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1206)
1207
1208;; LSL, LSR and ASR by a scalar, which expands into one of the vector
1209;; shifts above.
1210(define_expand "<ASHIFT:optab><mode>3"
1211  [(set (match_operand:SVE_I 0 "register_operand")
1212	(ASHIFT:SVE_I (match_operand:SVE_I 1 "register_operand")
1213		      (match_operand:<VEL> 2 "general_operand")))]
1214  "TARGET_SVE"
1215  {
1216    rtx amount;
1217    if (CONST_INT_P (operands[2]))
1218      {
1219	amount = gen_const_vec_duplicate (<MODE>mode, operands[2]);
1220	if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode))
1221	  amount = force_reg (<MODE>mode, amount);
1222      }
1223    else
1224      {
1225	amount = gen_reg_rtx (<MODE>mode);
1226	emit_insn (gen_vec_duplicate<mode> (amount,
1227					    convert_to_mode (<VEL>mode,
1228							     operands[2], 0)));
1229      }
1230    emit_insn (gen_v<optab><mode>3 (operands[0], operands[1], amount));
1231    DONE;
1232  }
1233)
1234
1235;; Test all bits of operand 1.  Operand 0 is a GP that is known to hold PTRUE.
1236;;
1237;; Using UNSPEC_PTEST_PTRUE allows combine patterns to assume that the GP
1238;; is a PTRUE even if the optimizers haven't yet been able to propagate
1239;; the constant.  We would use a separate unspec code for PTESTs involving
1240;; GPs that might not be PTRUEs.
1241(define_insn "ptest_ptrue<mode>"
1242  [(set (reg:CC CC_REGNUM)
1243	(compare:CC
1244	  (unspec:SI [(match_operand:PRED_ALL 0 "register_operand" "Upa")
1245		      (match_operand:PRED_ALL 1 "register_operand" "Upa")]
1246		     UNSPEC_PTEST_PTRUE)
1247	  (const_int 0)))]
1248  "TARGET_SVE"
1249  "ptest\t%0, %1.b"
1250)
1251
1252;; Set element I of the result if operand1 + J < operand2 for all J in [0, I].
1253;; with the comparison being unsigned.
1254(define_insn "while_ult<GPI:mode><PRED_ALL:mode>"
1255  [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1256	(unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
1257			  (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
1258			 UNSPEC_WHILE_LO))
1259   (clobber (reg:CC CC_REGNUM))]
1260  "TARGET_SVE"
1261  "whilelo\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
1262)
1263
1264;; WHILELO sets the flags in the same way as a PTEST with a PTRUE GP.
1265;; Handle the case in which both results are useful.  The GP operand
1266;; to the PTEST isn't needed, so we allow it to be anything.
1267(define_insn_and_split "while_ult<GPI:mode><PRED_ALL:mode>_cc"
1268  [(set (reg:CC CC_REGNUM)
1269	(compare:CC
1270	  (unspec:SI [(match_operand:PRED_ALL 1)
1271		      (unspec:PRED_ALL
1272			[(match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")
1273			 (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")]
1274			UNSPEC_WHILE_LO)]
1275		     UNSPEC_PTEST_PTRUE)
1276	  (const_int 0)))
1277   (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1278	(unspec:PRED_ALL [(match_dup 2)
1279			  (match_dup 3)]
1280			 UNSPEC_WHILE_LO))]
1281  "TARGET_SVE"
1282  "whilelo\t%0.<PRED_ALL:Vetype>, %<w>2, %<w>3"
1283  ;; Force the compiler to drop the unused predicate operand, so that we
1284  ;; don't have an unnecessary PTRUE.
1285  "&& !CONSTANT_P (operands[1])"
1286  [(const_int 0)]
1287  {
1288    emit_insn (gen_while_ult<GPI:mode><PRED_ALL:mode>_cc
1289	       (operands[0], CONSTM1_RTX (<MODE>mode),
1290		operands[2], operands[3]));
1291    DONE;
1292  }
1293)
1294
1295;; Predicated integer comparison.
1296(define_insn "*vec_cmp<cmp_op>_<mode>"
1297  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1298	(unspec:<VPRED>
1299	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1300	   (match_operand:SVE_I 2 "register_operand" "w, w")
1301	   (match_operand:SVE_I 3 "aarch64_sve_cmp_<imm_con>_operand" "<imm_con>, w")]
1302	  SVE_COND_INT_CMP))
1303   (clobber (reg:CC CC_REGNUM))]
1304  "TARGET_SVE"
1305  "@
1306   cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
1307   cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1308)
1309
1310;; Predicated integer comparison in which only the flags result is interesting.
1311(define_insn "*vec_cmp<cmp_op>_<mode>_ptest"
1312  [(set (reg:CC CC_REGNUM)
1313	(compare:CC
1314	  (unspec:SI
1315	    [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1316	     (unspec:<VPRED>
1317	       [(match_dup 1)
1318	        (match_operand:SVE_I 2 "register_operand" "w, w")
1319		(match_operand:SVE_I 3 "aarch64_sve_cmp_<imm_con>_operand" "<imm_con>, w")]
1320	       SVE_COND_INT_CMP)]
1321	    UNSPEC_PTEST_PTRUE)
1322	  (const_int 0)))
1323   (clobber (match_scratch:<VPRED> 0 "=Upa, Upa"))]
1324  "TARGET_SVE"
1325  "@
1326   cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
1327   cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1328)
1329
1330;; Predicated comparison in which both the flag and predicate results
1331;; are interesting.
1332(define_insn "*vec_cmp<cmp_op>_<mode>_cc"
1333  [(set (reg:CC CC_REGNUM)
1334	(compare:CC
1335	  (unspec:SI
1336	    [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1337	     (unspec:<VPRED>
1338	       [(match_dup 1)
1339		(match_operand:SVE_I 2 "register_operand" "w, w")
1340		(match_operand:SVE_I 3 "aarch64_sve_cmp_<imm_con>_operand" "<imm_con>, w")]
1341	       SVE_COND_INT_CMP)]
1342	    UNSPEC_PTEST_PTRUE)
1343	  (const_int 0)))
1344   (set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1345	(unspec:<VPRED>
1346	  [(match_dup 1)
1347	   (match_dup 2)
1348	   (match_dup 3)]
1349	  SVE_COND_INT_CMP))]
1350  "TARGET_SVE"
1351  "@
1352   cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
1353   cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1354)
1355
1356;; Predicated floating-point comparison (excluding FCMUO, which doesn't
1357;; allow #0.0 as an operand).
1358(define_insn "*vec_fcm<cmp_op><mode>"
1359  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1360	(unspec:<VPRED>
1361	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1362	   (match_operand:SVE_F 2 "register_operand" "w, w")
1363	   (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
1364	  SVE_COND_FP_CMP))]
1365  "TARGET_SVE"
1366  "@
1367   fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
1368   fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1369)
1370
1371;; Predicated FCMUO.
1372(define_insn "*vec_fcmuo<mode>"
1373  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
1374	(unspec:<VPRED>
1375	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
1376	   (match_operand:SVE_F 2 "register_operand" "w")
1377	   (match_operand:SVE_F 3 "register_operand" "w")]
1378	  UNSPEC_COND_UO))]
1379  "TARGET_SVE"
1380  "fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1381)
1382
1383;; vcond_mask operand order: true, false, mask
1384;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR)
1385;; SEL operand order:        mask, true, false
1386(define_insn "vcond_mask_<mode><vpred>"
1387  [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
1388	(unspec:SVE_ALL
1389	  [(match_operand:<VPRED> 3 "register_operand" "Upa")
1390	   (match_operand:SVE_ALL 1 "register_operand" "w")
1391	   (match_operand:SVE_ALL 2 "register_operand" "w")]
1392	  UNSPEC_SEL))]
1393  "TARGET_SVE"
1394  "sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>"
1395)
1396
1397;; Selects between a duplicated immediate and zero.
1398(define_insn "aarch64_sve_dup<mode>_const"
1399  [(set (match_operand:SVE_I 0 "register_operand" "=w")
1400	(unspec:SVE_I
1401	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
1402	   (match_operand:SVE_I 2 "aarch64_sve_dup_immediate")
1403	   (match_operand:SVE_I 3 "aarch64_simd_imm_zero")]
1404	  UNSPEC_SEL))]
1405  "TARGET_SVE"
1406  "mov\t%0.<Vetype>, %1/z, #%2"
1407)
1408
1409;; Integer (signed) vcond.  Don't enforce an immediate range here, since it
1410;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
1411(define_expand "vcond<mode><v_int_equiv>"
1412  [(set (match_operand:SVE_ALL 0 "register_operand")
1413	(if_then_else:SVE_ALL
1414	  (match_operator 3 "comparison_operator"
1415	    [(match_operand:<V_INT_EQUIV> 4 "register_operand")
1416	     (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
1417	  (match_operand:SVE_ALL 1 "register_operand")
1418	  (match_operand:SVE_ALL 2 "register_operand")))]
1419  "TARGET_SVE"
1420  {
1421    aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
1422    DONE;
1423  }
1424)
1425
1426;; Integer vcondu.  Don't enforce an immediate range here, since it
1427;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
1428(define_expand "vcondu<mode><v_int_equiv>"
1429  [(set (match_operand:SVE_ALL 0 "register_operand")
1430	(if_then_else:SVE_ALL
1431	  (match_operator 3 "comparison_operator"
1432	    [(match_operand:<V_INT_EQUIV> 4 "register_operand")
1433	     (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
1434	  (match_operand:SVE_ALL 1 "register_operand")
1435	  (match_operand:SVE_ALL 2 "register_operand")))]
1436  "TARGET_SVE"
1437  {
1438    aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
1439    DONE;
1440  }
1441)
1442
1443;; Floating-point vcond.  All comparisons except FCMUO allow a zero
1444;; operand; aarch64_expand_sve_vcond handles the case of an FCMUO
1445;; with zero.
1446(define_expand "vcond<mode><v_fp_equiv>"
1447  [(set (match_operand:SVE_SD 0 "register_operand")
1448	(if_then_else:SVE_SD
1449	  (match_operator 3 "comparison_operator"
1450	    [(match_operand:<V_FP_EQUIV> 4 "register_operand")
1451	     (match_operand:<V_FP_EQUIV> 5 "aarch64_simd_reg_or_zero")])
1452	  (match_operand:SVE_SD 1 "register_operand")
1453	  (match_operand:SVE_SD 2 "register_operand")))]
1454  "TARGET_SVE"
1455  {
1456    aarch64_expand_sve_vcond (<MODE>mode, <V_FP_EQUIV>mode, operands);
1457    DONE;
1458  }
1459)
1460
1461;; Signed integer comparisons.  Don't enforce an immediate range here, since
1462;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
1463;; instead.
1464(define_expand "vec_cmp<mode><vpred>"
1465  [(parallel
1466    [(set (match_operand:<VPRED> 0 "register_operand")
1467	  (match_operator:<VPRED> 1 "comparison_operator"
1468	    [(match_operand:SVE_I 2 "register_operand")
1469	     (match_operand:SVE_I 3 "nonmemory_operand")]))
1470     (clobber (reg:CC CC_REGNUM))])]
1471  "TARGET_SVE"
1472  {
1473    aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
1474				    operands[2], operands[3]);
1475    DONE;
1476  }
1477)
1478
1479;; Unsigned integer comparisons.  Don't enforce an immediate range here, since
1480;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
1481;; instead.
1482(define_expand "vec_cmpu<mode><vpred>"
1483  [(parallel
1484    [(set (match_operand:<VPRED> 0 "register_operand")
1485	  (match_operator:<VPRED> 1 "comparison_operator"
1486	    [(match_operand:SVE_I 2 "register_operand")
1487	     (match_operand:SVE_I 3 "nonmemory_operand")]))
1488     (clobber (reg:CC CC_REGNUM))])]
1489  "TARGET_SVE"
1490  {
1491    aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
1492				    operands[2], operands[3]);
1493    DONE;
1494  }
1495)
1496
1497;; Floating-point comparisons.  All comparisons except FCMUO allow a zero
1498;; operand; aarch64_expand_sve_vec_cmp_float handles the case of an FCMUO
1499;; with zero.
1500(define_expand "vec_cmp<mode><vpred>"
1501  [(set (match_operand:<VPRED> 0 "register_operand")
1502	(match_operator:<VPRED> 1 "comparison_operator"
1503	  [(match_operand:SVE_F 2 "register_operand")
1504	   (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero")]))]
1505  "TARGET_SVE"
1506  {
1507    aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]),
1508				      operands[2], operands[3], false);
1509    DONE;
1510  }
1511)
1512
1513;; Branch based on predicate equality or inequality.
1514(define_expand "cbranch<mode>4"
1515  [(set (pc)
1516	(if_then_else
1517	  (match_operator 0 "aarch64_equality_operator"
1518	    [(match_operand:PRED_ALL 1 "register_operand")
1519	     (match_operand:PRED_ALL 2 "aarch64_simd_reg_or_zero")])
1520	  (label_ref (match_operand 3 ""))
1521	  (pc)))]
1522  ""
1523  {
1524    rtx ptrue = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));
1525    rtx pred;
1526    if (operands[2] == CONST0_RTX (<MODE>mode))
1527      pred = operands[1];
1528    else
1529      {
1530	pred = gen_reg_rtx (<MODE>mode);
1531	emit_insn (gen_pred_xor<mode>3 (pred, ptrue, operands[1],
1532					operands[2]));
1533      }
1534    emit_insn (gen_ptest_ptrue<mode> (ptrue, pred));
1535    operands[1] = gen_rtx_REG (CCmode, CC_REGNUM);
1536    operands[2] = const0_rtx;
1537  }
1538)
1539
1540;; Unpredicated integer MIN/MAX.
1541(define_expand "<su><maxmin><mode>3"
1542  [(set (match_operand:SVE_I 0 "register_operand")
1543	(unspec:SVE_I
1544	  [(match_dup 3)
1545	   (MAXMIN:SVE_I (match_operand:SVE_I 1 "register_operand")
1546			 (match_operand:SVE_I 2 "register_operand"))]
1547	  UNSPEC_MERGE_PTRUE))]
1548  "TARGET_SVE"
1549  {
1550    operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1551  }
1552)
1553
1554;; Integer MIN/MAX predicated with a PTRUE.
1555(define_insn "*<su><maxmin><mode>3"
1556  [(set (match_operand:SVE_I 0 "register_operand" "=w")
1557	(unspec:SVE_I
1558	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
1559	   (MAXMIN:SVE_I (match_operand:SVE_I 2 "register_operand" "%0")
1560			 (match_operand:SVE_I 3 "register_operand" "w"))]
1561	  UNSPEC_MERGE_PTRUE))]
1562  "TARGET_SVE"
1563  "<su><maxmin>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1564)
1565
1566;; Unpredicated floating-point MIN/MAX.
1567(define_expand "<su><maxmin><mode>3"
1568  [(set (match_operand:SVE_F 0 "register_operand")
1569	(unspec:SVE_F
1570	  [(match_dup 3)
1571	   (FMAXMIN:SVE_F (match_operand:SVE_F 1 "register_operand")
1572			  (match_operand:SVE_F 2 "register_operand"))]
1573	  UNSPEC_MERGE_PTRUE))]
1574  "TARGET_SVE"
1575  {
1576    operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1577  }
1578)
1579
1580;; Floating-point MIN/MAX predicated with a PTRUE.
1581(define_insn "*<su><maxmin><mode>3"
1582  [(set (match_operand:SVE_F 0 "register_operand" "=w")
1583	(unspec:SVE_F
1584	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
1585	   (FMAXMIN:SVE_F (match_operand:SVE_F 2 "register_operand" "%0")
1586			  (match_operand:SVE_F 3 "register_operand" "w"))]
1587	  UNSPEC_MERGE_PTRUE))]
1588  "TARGET_SVE"
1589  "f<maxmin>nm\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1590)
1591
1592;; Unpredicated fmin/fmax.
1593(define_expand "<maxmin_uns><mode>3"
1594  [(set (match_operand:SVE_F 0 "register_operand")
1595	(unspec:SVE_F
1596	  [(match_dup 3)
1597	   (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand")
1598			  (match_operand:SVE_F 2 "register_operand")]
1599			 FMAXMIN_UNS)]
1600	  UNSPEC_MERGE_PTRUE))]
1601  "TARGET_SVE"
1602  {
1603    operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1604  }
1605)
1606
1607;; fmin/fmax predicated with a PTRUE.
1608(define_insn "*<maxmin_uns><mode>3"
1609  [(set (match_operand:SVE_F 0 "register_operand" "=w")
1610	(unspec:SVE_F
1611	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
1612	   (unspec:SVE_F [(match_operand:SVE_F 2 "register_operand" "%0")
1613			  (match_operand:SVE_F 3 "register_operand" "w")]
1614			 FMAXMIN_UNS)]
1615	  UNSPEC_MERGE_PTRUE))]
1616  "TARGET_SVE"
1617  "<maxmin_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1618)
1619
1620;; Predicated integer operations.
1621(define_insn "cond_<optab><mode>"
1622  [(set (match_operand:SVE_I 0 "register_operand" "=w")
1623	(unspec:SVE_I
1624	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
1625	   (match_operand:SVE_I 2 "register_operand" "0")
1626	   (match_operand:SVE_I 3 "register_operand" "w")]
1627	  SVE_COND_INT_OP))]
1628  "TARGET_SVE"
1629  "<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1630)
1631
1632;; Set operand 0 to the last active element in operand 3, or to tied
1633;; operand 1 if no elements are active.
1634(define_insn "fold_extract_last_<mode>"
1635  [(set (match_operand:<VEL> 0 "register_operand" "=r, w")
1636	(unspec:<VEL>
1637	  [(match_operand:<VEL> 1 "register_operand" "0, 0")
1638	   (match_operand:<VPRED> 2 "register_operand" "Upl, Upl")
1639	   (match_operand:SVE_ALL 3 "register_operand" "w, w")]
1640	  UNSPEC_CLASTB))]
1641  "TARGET_SVE"
1642  "@
1643   clastb\t%<vwcore>0, %2, %<vwcore>0, %3.<Vetype>
1644   clastb\t%<vw>0, %2, %<vw>0, %3.<Vetype>"
1645)
1646
1647;; Unpredicated integer add reduction.
1648(define_expand "reduc_plus_scal_<mode>"
1649  [(set (match_operand:<VEL> 0 "register_operand")
1650	(unspec:<VEL> [(match_dup 2)
1651		       (match_operand:SVE_I 1 "register_operand")]
1652		      UNSPEC_ADDV))]
1653  "TARGET_SVE"
1654  {
1655    operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1656  }
1657)
1658
1659;; Predicated integer add reduction.  The result is always 64-bits.
1660(define_insn "*reduc_plus_scal_<mode>"
1661  [(set (match_operand:<VEL> 0 "register_operand" "=w")
1662	(unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
1663		       (match_operand:SVE_I 2 "register_operand" "w")]
1664		      UNSPEC_ADDV))]
1665  "TARGET_SVE"
1666  "uaddv\t%d0, %1, %2.<Vetype>"
1667)
1668
1669;; Unpredicated floating-point add reduction.
1670(define_expand "reduc_plus_scal_<mode>"
1671  [(set (match_operand:<VEL> 0 "register_operand")
1672	(unspec:<VEL> [(match_dup 2)
1673		       (match_operand:SVE_F 1 "register_operand")]
1674		      UNSPEC_FADDV))]
1675  "TARGET_SVE"
1676  {
1677    operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1678  }
1679)
1680
1681;; Predicated floating-point add reduction.
1682(define_insn "*reduc_plus_scal_<mode>"
1683  [(set (match_operand:<VEL> 0 "register_operand" "=w")
1684	(unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
1685		       (match_operand:SVE_F 2 "register_operand" "w")]
1686		      UNSPEC_FADDV))]
1687  "TARGET_SVE"
1688  "faddv\t%<Vetype>0, %1, %2.<Vetype>"
1689)
1690
1691;; Unpredicated integer MIN/MAX reduction.
1692(define_expand "reduc_<maxmin_uns>_scal_<mode>"
1693  [(set (match_operand:<VEL> 0 "register_operand")
1694	(unspec:<VEL> [(match_dup 2)
1695		       (match_operand:SVE_I 1 "register_operand")]
1696		      MAXMINV))]
1697  "TARGET_SVE"
1698  {
1699    operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1700  }
1701)
1702
1703;; Predicated integer MIN/MAX reduction.
1704(define_insn "*reduc_<maxmin_uns>_scal_<mode>"
1705  [(set (match_operand:<VEL> 0 "register_operand" "=w")
1706	(unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
1707		       (match_operand:SVE_I 2 "register_operand" "w")]
1708		      MAXMINV))]
1709  "TARGET_SVE"
1710  "<maxmin_uns_op>v\t%<Vetype>0, %1, %2.<Vetype>"
1711)
1712
1713;; Unpredicated floating-point MIN/MAX reduction.
1714(define_expand "reduc_<maxmin_uns>_scal_<mode>"
1715  [(set (match_operand:<VEL> 0 "register_operand")
1716	(unspec:<VEL> [(match_dup 2)
1717		       (match_operand:SVE_F 1 "register_operand")]
1718		      FMAXMINV))]
1719  "TARGET_SVE"
1720  {
1721    operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1722  }
1723)
1724
1725;; Predicated floating-point MIN/MAX reduction.
1726(define_insn "*reduc_<maxmin_uns>_scal_<mode>"
1727  [(set (match_operand:<VEL> 0 "register_operand" "=w")
1728	(unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
1729		       (match_operand:SVE_F 2 "register_operand" "w")]
1730		      FMAXMINV))]
1731  "TARGET_SVE"
1732  "<maxmin_uns_op>v\t%<Vetype>0, %1, %2.<Vetype>"
1733)
1734
1735(define_expand "reduc_<optab>_scal_<mode>"
1736  [(set (match_operand:<VEL> 0 "register_operand")
1737	(unspec:<VEL> [(match_dup 2)
1738		       (match_operand:SVE_I 1 "register_operand")]
1739		      BITWISEV))]
1740  "TARGET_SVE"
1741  {
1742    operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1743  }
1744)
1745
1746(define_insn "*reduc_<optab>_scal_<mode>"
1747  [(set (match_operand:<VEL> 0 "register_operand" "=w")
1748	(unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
1749		       (match_operand:SVE_I 2 "register_operand" "w")]
1750		      BITWISEV))]
1751  "TARGET_SVE"
1752  "<bit_reduc_op>\t%<Vetype>0, %1, %2.<Vetype>"
1753)
1754
1755;; Unpredicated in-order FP reductions.
1756(define_expand "fold_left_plus_<mode>"
1757  [(set (match_operand:<VEL> 0 "register_operand")
1758	(unspec:<VEL> [(match_dup 3)
1759		       (match_operand:<VEL> 1 "register_operand")
1760		       (match_operand:SVE_F 2 "register_operand")]
1761		      UNSPEC_FADDA))]
1762  "TARGET_SVE"
1763  {
1764    operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1765  }
1766)
1767
1768;; In-order FP reductions predicated with PTRUE.
1769(define_insn "*fold_left_plus_<mode>"
1770  [(set (match_operand:<VEL> 0 "register_operand" "=w")
1771	(unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
1772		       (match_operand:<VEL> 2 "register_operand" "0")
1773		       (match_operand:SVE_F 3 "register_operand" "w")]
1774		      UNSPEC_FADDA))]
1775  "TARGET_SVE"
1776  "fadda\t%<Vetype>0, %1, %<Vetype>0, %3.<Vetype>"
1777)
1778
1779;; Predicated form of the above in-order reduction.
1780(define_insn "*pred_fold_left_plus_<mode>"
1781  [(set (match_operand:<VEL> 0 "register_operand" "=w")
1782	(unspec:<VEL>
1783	  [(match_operand:<VEL> 1 "register_operand" "0")
1784	   (unspec:SVE_F
1785	     [(match_operand:<VPRED> 2 "register_operand" "Upl")
1786	      (match_operand:SVE_F 3 "register_operand" "w")
1787	      (match_operand:SVE_F 4 "aarch64_simd_imm_zero")]
1788	     UNSPEC_SEL)]
1789	  UNSPEC_FADDA))]
1790  "TARGET_SVE"
1791  "fadda\t%<Vetype>0, %2, %<Vetype>0, %3.<Vetype>"
1792)
1793
1794;; Unpredicated floating-point addition.
1795(define_expand "add<mode>3"
1796  [(set (match_operand:SVE_F 0 "register_operand")
1797	(unspec:SVE_F
1798	  [(match_dup 3)
1799	   (plus:SVE_F
1800	     (match_operand:SVE_F 1 "register_operand")
1801	     (match_operand:SVE_F 2 "aarch64_sve_float_arith_with_sub_operand"))]
1802	  UNSPEC_MERGE_PTRUE))]
1803  "TARGET_SVE"
1804  {
1805    operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1806  }
1807)
1808
1809;; Floating-point addition predicated with a PTRUE.
1810(define_insn "*add<mode>3"
1811  [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w")
1812	(unspec:SVE_F
1813	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
1814	   (plus:SVE_F
1815	      (match_operand:SVE_F 2 "register_operand" "%0, 0, w")
1816	      (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, w"))]
1817	  UNSPEC_MERGE_PTRUE))]
1818  "TARGET_SVE"
1819  "@
1820   fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
1821   fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
1822   fadd\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
1823)
1824
1825;; Unpredicated floating-point subtraction.
1826(define_expand "sub<mode>3"
1827  [(set (match_operand:SVE_F 0 "register_operand")
1828	(unspec:SVE_F
1829	  [(match_dup 3)
1830	   (minus:SVE_F
1831	     (match_operand:SVE_F 1 "aarch64_sve_float_arith_operand")
1832	     (match_operand:SVE_F 2 "register_operand"))]
1833	  UNSPEC_MERGE_PTRUE))]
1834  "TARGET_SVE"
1835  {
1836    operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1837  }
1838)
1839
1840;; Floating-point subtraction predicated with a PTRUE.
1841(define_insn "*sub<mode>3"
1842  [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w, w")
1843	(unspec:SVE_F
1844	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
1845	   (minus:SVE_F
1846	     (match_operand:SVE_F 2 "aarch64_sve_float_arith_operand" "0, 0, vsA, w")
1847	     (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, 0, w"))]
1848	  UNSPEC_MERGE_PTRUE))]
1849  "TARGET_SVE
1850   && (register_operand (operands[2], <MODE>mode)
1851       || register_operand (operands[3], <MODE>mode))"
1852  "@
1853   fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
1854   fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
1855   fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
1856   fsub\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
1857)
1858
1859;; Unpredicated floating-point multiplication.
1860(define_expand "mul<mode>3"
1861  [(set (match_operand:SVE_F 0 "register_operand")
1862	(unspec:SVE_F
1863	  [(match_dup 3)
1864	   (mult:SVE_F
1865	     (match_operand:SVE_F 1 "register_operand")
1866	     (match_operand:SVE_F 2 "aarch64_sve_float_mul_operand"))]
1867	  UNSPEC_MERGE_PTRUE))]
1868  "TARGET_SVE"
1869  {
1870    operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1871  }
1872)
1873
1874;; Floating-point multiplication predicated with a PTRUE.
1875(define_insn "*mul<mode>3"
1876  [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
1877	(unspec:SVE_F
1878	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1879	   (mult:SVE_F
1880	     (match_operand:SVE_F 2 "register_operand" "%0, w")
1881	     (match_operand:SVE_F 3 "aarch64_sve_float_mul_operand" "vsM, w"))]
1882	  UNSPEC_MERGE_PTRUE))]
1883  "TARGET_SVE"
1884  "@
1885   fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
1886   fmul\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
1887)
1888
1889;; Unpredicated fma (%0 = (%1 * %2) + %3).
1890(define_expand "fma<mode>4"
1891  [(set (match_operand:SVE_F 0 "register_operand")
1892	(unspec:SVE_F
1893	  [(match_dup 4)
1894	   (fma:SVE_F (match_operand:SVE_F 1 "register_operand")
1895		      (match_operand:SVE_F 2 "register_operand")
1896		      (match_operand:SVE_F 3 "register_operand"))]
1897	  UNSPEC_MERGE_PTRUE))]
1898  "TARGET_SVE"
1899  {
1900    operands[4] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1901  }
1902)
1903
1904;; fma predicated with a PTRUE.
1905(define_insn "*fma<mode>4"
1906  [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
1907	(unspec:SVE_F
1908	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1909	   (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w")
1910		      (match_operand:SVE_F 4 "register_operand" "w, w")
1911		      (match_operand:SVE_F 2 "register_operand" "w, 0"))]
1912	  UNSPEC_MERGE_PTRUE))]
1913  "TARGET_SVE"
1914  "@
1915   fmad\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
1916   fmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
1917)
1918
1919;; Unpredicated fnma (%0 = (-%1 * %2) + %3).
1920(define_expand "fnma<mode>4"
1921  [(set (match_operand:SVE_F 0 "register_operand")
1922	(unspec:SVE_F
1923	  [(match_dup 4)
1924	   (fma:SVE_F (neg:SVE_F
1925			(match_operand:SVE_F 1 "register_operand"))
1926		      (match_operand:SVE_F 2 "register_operand")
1927		      (match_operand:SVE_F 3 "register_operand"))]
1928	  UNSPEC_MERGE_PTRUE))]
1929  "TARGET_SVE"
1930  {
1931    operands[4] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1932  }
1933)
1934
1935;; fnma predicated with a PTRUE.
1936(define_insn "*fnma<mode>4"
1937  [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
1938	(unspec:SVE_F
1939	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1940	   (fma:SVE_F (neg:SVE_F
1941			(match_operand:SVE_F 3 "register_operand" "%0, w"))
1942		      (match_operand:SVE_F 4 "register_operand" "w, w")
1943		      (match_operand:SVE_F 2 "register_operand" "w, 0"))]
1944	  UNSPEC_MERGE_PTRUE))]
1945  "TARGET_SVE"
1946  "@
1947   fmsb\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
1948   fmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
1949)
1950
1951;; Unpredicated fms (%0 = (%1 * %2) - %3).
1952(define_expand "fms<mode>4"
1953  [(set (match_operand:SVE_F 0 "register_operand")
1954	(unspec:SVE_F
1955	  [(match_dup 4)
1956	   (fma:SVE_F (match_operand:SVE_F 1 "register_operand")
1957		      (match_operand:SVE_F 2 "register_operand")
1958		      (neg:SVE_F
1959			(match_operand:SVE_F 3 "register_operand")))]
1960	  UNSPEC_MERGE_PTRUE))]
1961  "TARGET_SVE"
1962  {
1963    operands[4] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1964  }
1965)
1966
1967;; fms predicated with a PTRUE.
1968(define_insn "*fms<mode>4"
1969  [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
1970	(unspec:SVE_F
1971	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1972	   (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w")
1973		      (match_operand:SVE_F 4 "register_operand" "w, w")
1974		      (neg:SVE_F
1975			(match_operand:SVE_F 2 "register_operand" "w, 0")))]
1976	  UNSPEC_MERGE_PTRUE))]
1977  "TARGET_SVE"
1978  "@
1979   fnmsb\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
1980   fnmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
1981)
1982
1983;; Unpredicated fnms (%0 = (-%1 * %2) - %3).
1984(define_expand "fnms<mode>4"
1985  [(set (match_operand:SVE_F 0 "register_operand")
1986	(unspec:SVE_F
1987	  [(match_dup 4)
1988	   (fma:SVE_F (neg:SVE_F
1989			(match_operand:SVE_F 1 "register_operand"))
1990		      (match_operand:SVE_F 2 "register_operand")
1991		      (neg:SVE_F
1992			(match_operand:SVE_F 3 "register_operand")))]
1993	  UNSPEC_MERGE_PTRUE))]
1994  "TARGET_SVE"
1995  {
1996    operands[4] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1997  }
1998)
1999
2000;; fnms predicated with a PTRUE.
2001(define_insn "*fnms<mode>4"
2002  [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
2003	(unspec:SVE_F
2004	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2005	   (fma:SVE_F (neg:SVE_F
2006			(match_operand:SVE_F 3 "register_operand" "%0, w"))
2007		      (match_operand:SVE_F 4 "register_operand" "w, w")
2008		      (neg:SVE_F
2009			(match_operand:SVE_F 2 "register_operand" "w, 0")))]
2010	  UNSPEC_MERGE_PTRUE))]
2011  "TARGET_SVE"
2012  "@
2013   fnmad\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
2014   fnmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
2015)
2016
2017;; Unpredicated floating-point division.
2018(define_expand "div<mode>3"
2019  [(set (match_operand:SVE_F 0 "register_operand")
2020	(unspec:SVE_F
2021	  [(match_dup 3)
2022	   (div:SVE_F (match_operand:SVE_F 1 "register_operand")
2023		      (match_operand:SVE_F 2 "register_operand"))]
2024	  UNSPEC_MERGE_PTRUE))]
2025  "TARGET_SVE"
2026  {
2027    operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
2028  }
2029)
2030
2031;; Floating-point division predicated with a PTRUE.
2032(define_insn "*div<mode>3"
2033  [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
2034	(unspec:SVE_F
2035	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2036	   (div:SVE_F (match_operand:SVE_F 2 "register_operand" "0, w")
2037		      (match_operand:SVE_F 3 "register_operand" "w, 0"))]
2038	  UNSPEC_MERGE_PTRUE))]
2039  "TARGET_SVE"
2040  "@
2041   fdiv\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2042   fdivr\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
2043)
2044
2045;; Unpredicated FNEG, FABS and FSQRT.
2046(define_expand "<optab><mode>2"
2047  [(set (match_operand:SVE_F 0 "register_operand")
2048	(unspec:SVE_F
2049	  [(match_dup 2)
2050	   (SVE_FP_UNARY:SVE_F (match_operand:SVE_F 1 "register_operand"))]
2051	  UNSPEC_MERGE_PTRUE))]
2052  "TARGET_SVE"
2053  {
2054    operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
2055  }
2056)
2057
2058;; FNEG, FABS and FSQRT predicated with a PTRUE.
2059(define_insn "*<optab><mode>2"
2060  [(set (match_operand:SVE_F 0 "register_operand" "=w")
2061	(unspec:SVE_F
2062	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
2063	   (SVE_FP_UNARY:SVE_F (match_operand:SVE_F 2 "register_operand" "w"))]
2064	  UNSPEC_MERGE_PTRUE))]
2065  "TARGET_SVE"
2066  "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2067)
2068
2069;; Unpredicated FRINTy.
2070(define_expand "<frint_pattern><mode>2"
2071  [(set (match_operand:SVE_F 0 "register_operand")
2072	(unspec:SVE_F
2073	  [(match_dup 2)
2074	   (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand")]
2075			 FRINT)]
2076	  UNSPEC_MERGE_PTRUE))]
2077  "TARGET_SVE"
2078  {
2079    operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
2080  }
2081)
2082
2083;; FRINTy predicated with a PTRUE.
2084(define_insn "*<frint_pattern><mode>2"
2085  [(set (match_operand:SVE_F 0 "register_operand" "=w")
2086	(unspec:SVE_F
2087	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
2088	   (unspec:SVE_F [(match_operand:SVE_F 2 "register_operand" "w")]
2089			 FRINT)]
2090	  UNSPEC_MERGE_PTRUE))]
2091  "TARGET_SVE"
2092  "frint<frint_suffix>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2093)
2094
2095;; Unpredicated conversion of floats to integers of the same size (HF to HI,
2096;; SF to SI or DF to DI).
2097(define_expand "<fix_trunc_optab><mode><v_int_equiv>2"
2098  [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2099	(unspec:<V_INT_EQUIV>
2100	  [(match_dup 2)
2101	   (FIXUORS:<V_INT_EQUIV>
2102	     (match_operand:SVE_F 1 "register_operand"))]
2103	  UNSPEC_MERGE_PTRUE))]
2104  "TARGET_SVE"
2105  {
2106    operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
2107  }
2108)
2109
2110;; Conversion of SF to DI, SI or HI, predicated with a PTRUE.
2111(define_insn "*<fix_trunc_optab>v16hsf<mode>2"
2112  [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
2113	(unspec:SVE_HSDI
2114	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
2115	   (FIXUORS:SVE_HSDI
2116	     (match_operand:VNx8HF 2 "register_operand" "w"))]
2117	  UNSPEC_MERGE_PTRUE))]
2118  "TARGET_SVE"
2119  "fcvtz<su>\t%0.<Vetype>, %1/m, %2.h"
2120)
2121
2122;; Conversion of SF to DI or SI, predicated with a PTRUE.
2123(define_insn "*<fix_trunc_optab>vnx4sf<mode>2"
2124  [(set (match_operand:SVE_SDI 0 "register_operand" "=w")
2125	(unspec:SVE_SDI
2126	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
2127	   (FIXUORS:SVE_SDI
2128	     (match_operand:VNx4SF 2 "register_operand" "w"))]
2129	  UNSPEC_MERGE_PTRUE))]
2130  "TARGET_SVE"
2131  "fcvtz<su>\t%0.<Vetype>, %1/m, %2.s"
2132)
2133
2134;; Conversion of DF to DI or SI, predicated with a PTRUE.
2135(define_insn "*<fix_trunc_optab>vnx2df<mode>2"
2136  [(set (match_operand:SVE_SDI 0 "register_operand" "=w")
2137	(unspec:SVE_SDI
2138	  [(match_operand:VNx2BI 1 "register_operand" "Upl")
2139	   (FIXUORS:SVE_SDI
2140	     (match_operand:VNx2DF 2 "register_operand" "w"))]
2141	  UNSPEC_MERGE_PTRUE))]
2142  "TARGET_SVE"
2143  "fcvtz<su>\t%0.<Vetype>, %1/m, %2.d"
2144)
2145
2146;; Unpredicated conversion of integers to floats of the same size
2147;; (HI to HF, SI to SF or DI to DF).
2148(define_expand "<optab><v_int_equiv><mode>2"
2149  [(set (match_operand:SVE_F 0 "register_operand")
2150	(unspec:SVE_F
2151	  [(match_dup 2)
2152	   (FLOATUORS:SVE_F
2153	     (match_operand:<V_INT_EQUIV> 1 "register_operand"))]
2154	  UNSPEC_MERGE_PTRUE))]
2155  "TARGET_SVE"
2156  {
2157    operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
2158  }
2159)
2160
2161;; Conversion of DI, SI or HI to the same number of HFs, predicated
2162;; with a PTRUE.
2163(define_insn "*<optab><mode>vnx8hf2"
2164  [(set (match_operand:VNx8HF 0 "register_operand" "=w")
2165	(unspec:VNx8HF
2166	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
2167	   (FLOATUORS:VNx8HF
2168	     (match_operand:SVE_HSDI 2 "register_operand" "w"))]
2169	  UNSPEC_MERGE_PTRUE))]
2170  "TARGET_SVE"
2171  "<su_optab>cvtf\t%0.h, %1/m, %2.<Vetype>"
2172)
2173
2174;; Conversion of DI or SI to the same number of SFs, predicated with a PTRUE.
2175(define_insn "*<optab><mode>vnx4sf2"
2176  [(set (match_operand:VNx4SF 0 "register_operand" "=w")
2177	(unspec:VNx4SF
2178	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
2179	   (FLOATUORS:VNx4SF
2180	     (match_operand:SVE_SDI 2 "register_operand" "w"))]
2181	  UNSPEC_MERGE_PTRUE))]
2182  "TARGET_SVE"
2183  "<su_optab>cvtf\t%0.s, %1/m, %2.<Vetype>"
2184)
2185
2186;; Conversion of DI or SI to DF, predicated with a PTRUE.
2187(define_insn "aarch64_sve_<optab><mode>vnx2df2"
2188  [(set (match_operand:VNx2DF 0 "register_operand" "=w")
2189	(unspec:VNx2DF
2190	  [(match_operand:VNx2BI 1 "register_operand" "Upl")
2191	   (FLOATUORS:VNx2DF
2192	     (match_operand:SVE_SDI 2 "register_operand" "w"))]
2193	  UNSPEC_MERGE_PTRUE))]
2194  "TARGET_SVE"
2195  "<su_optab>cvtf\t%0.d, %1/m, %2.<Vetype>"
2196)
2197
2198;; Conversion of DFs to the same number of SFs, or SFs to the same number
2199;; of HFs.
2200(define_insn "*trunc<Vwide><mode>2"
2201  [(set (match_operand:SVE_HSF 0 "register_operand" "=w")
2202	(unspec:SVE_HSF
2203	  [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl")
2204	   (unspec:SVE_HSF
2205	     [(match_operand:<VWIDE> 2 "register_operand" "w")]
2206	     UNSPEC_FLOAT_CONVERT)]
2207	  UNSPEC_MERGE_PTRUE))]
2208  "TARGET_SVE"
2209  "fcvt\t%0.<Vetype>, %1/m, %2.<Vewtype>"
2210)
2211
2212;; Conversion of SFs to the same number of DFs, or HFs to the same number
2213;; of SFs.
2214(define_insn "aarch64_sve_extend<mode><Vwide>2"
2215  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2216	(unspec:<VWIDE>
2217	  [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl")
2218	   (unspec:<VWIDE>
2219	     [(match_operand:SVE_HSF 2 "register_operand" "w")]
2220	     UNSPEC_FLOAT_CONVERT)]
2221	  UNSPEC_MERGE_PTRUE))]
2222  "TARGET_SVE"
2223  "fcvt\t%0.<Vewtype>, %1/m, %2.<Vetype>"
2224)
2225
2226;; Unpack the low or high half of a predicate, where "high" refers to
2227;; the low-numbered lanes for big-endian and the high-numbered lanes
2228;; for little-endian.
2229(define_expand "vec_unpack<su>_<perm_hilo>_<mode>"
2230  [(match_operand:<VWIDE> 0 "register_operand")
2231   (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand")]
2232		   UNPACK)]
2233  "TARGET_SVE"
2234  {
2235    emit_insn ((<hi_lanes_optab>
2236		? gen_aarch64_sve_punpkhi_<PRED_BHS:mode>
2237		: gen_aarch64_sve_punpklo_<PRED_BHS:mode>)
2238	       (operands[0], operands[1]));
2239    DONE;
2240  }
2241)
2242
2243;; PUNPKHI and PUNPKLO.
2244(define_insn "aarch64_sve_punpk<perm_hilo>_<mode>"
2245  [(set (match_operand:<VWIDE> 0 "register_operand" "=Upa")
2246	(unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand" "Upa")]
2247			UNPACK_UNSIGNED))]
2248  "TARGET_SVE"
2249  "punpk<perm_hilo>\t%0.h, %1.b"
2250)
2251
2252;; Unpack the low or high half of a vector, where "high" refers to
2253;; the low-numbered lanes for big-endian and the high-numbered lanes
2254;; for little-endian.
2255(define_expand "vec_unpack<su>_<perm_hilo>_<SVE_BHSI:mode>"
2256  [(match_operand:<VWIDE> 0 "register_operand")
2257   (unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand")] UNPACK)]
2258  "TARGET_SVE"
2259  {
2260    emit_insn ((<hi_lanes_optab>
2261		? gen_aarch64_sve_<su>unpkhi_<SVE_BHSI:mode>
2262		: gen_aarch64_sve_<su>unpklo_<SVE_BHSI:mode>)
2263	       (operands[0], operands[1]));
2264    DONE;
2265  }
2266)
2267
2268;; SUNPKHI, UUNPKHI, SUNPKLO and UUNPKLO.
2269(define_insn "aarch64_sve_<su>unpk<perm_hilo>_<SVE_BHSI:mode>"
2270  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2271	(unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand" "w")]
2272			UNPACK))]
2273  "TARGET_SVE"
2274  "<su>unpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>"
2275)
2276
2277;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF.
2278;; First unpack the source without conversion, then float-convert the
2279;; unpacked source.
2280(define_expand "vec_unpacks_<perm_hilo>_<mode>"
2281  [(match_operand:<VWIDE> 0 "register_operand")
2282   (unspec:SVE_HSF [(match_operand:SVE_HSF 1 "register_operand")]
2283		   UNPACK_UNSIGNED)]
2284  "TARGET_SVE"
2285  {
2286    /* Use ZIP to do the unpack, since we don't care about the upper halves
2287       and since it has the nice property of not needing any subregs.
2288       If using UUNPK* turns out to be preferable, we could model it as
2289       a ZIP whose first operand is zero.  */
2290    rtx temp = gen_reg_rtx (<MODE>mode);
2291    emit_insn ((<hi_lanes_optab>
2292		? gen_aarch64_sve_zip2<mode>
2293		: gen_aarch64_sve_zip1<mode>)
2294		(temp, operands[1], operands[1]));
2295    rtx ptrue = force_reg (<VWIDE_PRED>mode, CONSTM1_RTX (<VWIDE_PRED>mode));
2296    emit_insn (gen_aarch64_sve_extend<mode><Vwide>2 (operands[0],
2297						     ptrue, temp));
2298    DONE;
2299  }
2300)
2301
2302;; Unpack one half of a VNx4SI to VNx2DF.  First unpack from VNx4SI
2303;; to VNx2DI, reinterpret the VNx2DI as a VNx4SI, then convert the
2304;; unpacked VNx4SI to VNx2DF.
2305(define_expand "vec_unpack<su_optab>_float_<perm_hilo>_vnx4si"
2306  [(match_operand:VNx2DF 0 "register_operand")
2307   (FLOATUORS:VNx2DF
2308     (unspec:VNx2DI [(match_operand:VNx4SI 1 "register_operand")]
2309		    UNPACK_UNSIGNED))]
2310  "TARGET_SVE"
2311  {
2312    /* Use ZIP to do the unpack, since we don't care about the upper halves
2313       and since it has the nice property of not needing any subregs.
2314       If using UUNPK* turns out to be preferable, we could model it as
2315       a ZIP whose first operand is zero.  */
2316    rtx temp = gen_reg_rtx (VNx4SImode);
2317    emit_insn ((<hi_lanes_optab>
2318	        ? gen_aarch64_sve_zip2vnx4si
2319	        : gen_aarch64_sve_zip1vnx4si)
2320	       (temp, operands[1], operands[1]));
2321    rtx ptrue = force_reg (VNx2BImode, CONSTM1_RTX (VNx2BImode));
2322    emit_insn (gen_aarch64_sve_<FLOATUORS:optab>vnx4sivnx2df2 (operands[0],
2323							       ptrue, temp));
2324    DONE;
2325  }
2326)
2327
2328;; Predicate pack.  Use UZP1 on the narrower type, which discards
2329;; the high part of each wide element.
2330(define_insn "vec_pack_trunc_<Vwide>"
2331  [(set (match_operand:PRED_BHS 0 "register_operand" "=Upa")
2332	(unspec:PRED_BHS
2333	  [(match_operand:<VWIDE> 1 "register_operand" "Upa")
2334	   (match_operand:<VWIDE> 2 "register_operand" "Upa")]
2335	  UNSPEC_PACK))]
2336  "TARGET_SVE"
2337  "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
2338)
2339
2340;; Integer pack.  Use UZP1 on the narrower type, which discards
2341;; the high part of each wide element.
2342(define_insn "vec_pack_trunc_<Vwide>"
2343  [(set (match_operand:SVE_BHSI 0 "register_operand" "=w")
2344	(unspec:SVE_BHSI
2345	  [(match_operand:<VWIDE> 1 "register_operand" "w")
2346	   (match_operand:<VWIDE> 2 "register_operand" "w")]
2347	  UNSPEC_PACK))]
2348  "TARGET_SVE"
2349  "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
2350)
2351
2352;; Convert two vectors of DF to SF, or two vectors of SF to HF, and pack
2353;; the results into a single vector.
2354(define_expand "vec_pack_trunc_<Vwide>"
2355  [(set (match_dup 4)
2356	(unspec:SVE_HSF
2357	  [(match_dup 3)
2358	   (unspec:SVE_HSF [(match_operand:<VWIDE> 1 "register_operand")]
2359			   UNSPEC_FLOAT_CONVERT)]
2360	  UNSPEC_MERGE_PTRUE))
2361   (set (match_dup 5)
2362	(unspec:SVE_HSF
2363	  [(match_dup 3)
2364	   (unspec:SVE_HSF [(match_operand:<VWIDE> 2 "register_operand")]
2365			   UNSPEC_FLOAT_CONVERT)]
2366	  UNSPEC_MERGE_PTRUE))
2367   (set (match_operand:SVE_HSF 0 "register_operand")
2368	(unspec:SVE_HSF [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
2369  "TARGET_SVE"
2370  {
2371    operands[3] = force_reg (<VWIDE_PRED>mode, CONSTM1_RTX (<VWIDE_PRED>mode));
2372    operands[4] = gen_reg_rtx (<MODE>mode);
2373    operands[5] = gen_reg_rtx (<MODE>mode);
2374  }
2375)
2376
2377;; Convert two vectors of DF to SI and pack the results into a single vector.
2378(define_expand "vec_pack_<su>fix_trunc_vnx2df"
2379  [(set (match_dup 4)
2380	(unspec:VNx4SI
2381	  [(match_dup 3)
2382	   (FIXUORS:VNx4SI (match_operand:VNx2DF 1 "register_operand"))]
2383	  UNSPEC_MERGE_PTRUE))
2384   (set (match_dup 5)
2385	(unspec:VNx4SI
2386	  [(match_dup 3)
2387	   (FIXUORS:VNx4SI (match_operand:VNx2DF 2 "register_operand"))]
2388	  UNSPEC_MERGE_PTRUE))
2389   (set (match_operand:VNx4SI 0 "register_operand")
2390	(unspec:VNx4SI [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
2391  "TARGET_SVE"
2392  {
2393    operands[3] = force_reg (VNx2BImode, CONSTM1_RTX (VNx2BImode));
2394    operands[4] = gen_reg_rtx (VNx4SImode);
2395    operands[5] = gen_reg_rtx (VNx4SImode);
2396  }
2397)
2398
2399;; Predicated floating-point operations.
2400(define_insn "cond_<optab><mode>"
2401  [(set (match_operand:SVE_F 0 "register_operand" "=w")
2402	(unspec:SVE_F
2403	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
2404	   (match_operand:SVE_F 2 "register_operand" "0")
2405	   (match_operand:SVE_F 3 "register_operand" "w")]
2406	  SVE_COND_FP_OP))]
2407  "TARGET_SVE"
2408  "<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
2409)
2410
2411;; Shift an SVE vector left and insert a scalar into element 0.
2412(define_insn "vec_shl_insert_<mode>"
2413  [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
2414	(unspec:SVE_ALL
2415	  [(match_operand:SVE_ALL 1 "register_operand" "0, 0")
2416	   (match_operand:<VEL> 2 "register_operand" "rZ, w")]
2417	  UNSPEC_INSR))]
2418  "TARGET_SVE"
2419  "@
2420   insr\t%0.<Vetype>, %<vwcore>2
2421   insr\t%0.<Vetype>, %<Vetype>2"
2422)
2423