1;; Machine description for AArch64 SVE2.
2;; Copyright (C) 2019-2022 Free Software Foundation, Inc.
3;; Contributed by ARM Ltd.
4;;
5;; This file is part of GCC.
6;;
7;; GCC is free software; you can redistribute it and/or modify it
8;; under the terms of the GNU General Public License as published by
9;; the Free Software Foundation; either version 3, or (at your option)
10;; any later version.
11;;
12;; GCC is distributed in the hope that it will be useful, but
13;; WITHOUT ANY WARRANTY; without even the implied warranty of
14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15;; General Public License for more details.
16;;
17;; You should have received a copy of the GNU General Public License
18;; along with GCC; see the file COPYING3.  If not see
19;; <http://www.gnu.org/licenses/>.
20
21;; The file is organised into the following sections (search for the full
22;; line):
23;;
24;; == Moves
25;; ---- Non-temporal gather loads
26;; ---- Non-temporal scatter stores
27;;
28;; == Uniform binary arithmnetic
29;; ---- [INT] Multiplication
30;; ---- [INT] Scaled high-part multiplication
31;; ---- [INT] General binary arithmetic that maps to unspecs
32;; ---- [INT] Saturating binary arithmetic
33;; ---- [INT] Saturating left shifts
34;;
35;; == Uniform ternary arithmnetic
36;; ---- [INT] General ternary arithmetic that maps to unspecs
37;; ---- [INT] Multiply-and-accumulate operations
38;; ---- [INT] Binary logic operations with rotation
39;; ---- [INT] Ternary logic operations
40;; ---- [INT] Shift-and-accumulate operations
41;; ---- [INT] Shift-and-insert operations
42;; ---- [INT] Sum of absolute differences
43;;
44;; == Extending arithmetic
45;; ---- [INT] Wide binary arithmetic
46;; ---- [INT] Long binary arithmetic
47;; ---- [INT] Long left shifts
48;; ---- [INT] Long binary arithmetic with accumulation
49;; ---- [FP] Long multiplication with accumulation
50;;
51;; == Narrowing arithnetic
52;; ---- [INT] Narrowing unary arithmetic
53;; ---- [INT] Narrowing binary arithmetic
54;; ---- [INT] Narrowing right shifts
55;;
56;; == Pairwise arithmetic
57;; ---- [INT] Pairwise arithmetic
58;; ---- [FP] Pairwise arithmetic
59;; ---- [INT] Pairwise arithmetic with accumulation
60;;
61;; == Complex arithmetic
62;; ---- [INT] Complex binary operations
63;; ---- [INT] Complex ternary operations
64;; ---- [INT] Complex dot product
65;;
66;; == Conversions
67;; ---- [FP<-FP] Widening conversions
68;; ---- [FP<-FP] Narrowing conversions
69;;
70;; == Other arithmetic
71;; ---- [INT] Reciprocal approximation
72;; ---- [INT<-FP] Base-2 logarithm
73;; ---- [INT] Polynomial multiplication
74;;
75;; == Permutation
76;; ---- [INT,FP] General permutes
77;; ---- [INT] Optional bit-permute extensions
78;;
79;; == General
80;; ---- Check for aliases between pointers
81;; ---- Histogram processing
82;; ---- String matching
83;;
84;; == Crypotographic extensions
85;; ---- Optional AES extensions
86;; ---- Optional SHA-3 extensions
87;; ---- Optional SM4 extensions
88
89;; =========================================================================
90;; == Moves
91;; =========================================================================
92
93;; -------------------------------------------------------------------------
94;; ---- Non-temporal gather loads
95;; -------------------------------------------------------------------------
96;; Includes gather forms of:
97;; - LDNT1B
98;; - LDNT1D
99;; - LDNT1H
100;; - LDNT1W
101;; -------------------------------------------------------------------------
102
103;; Non-extending loads.
104(define_insn "@aarch64_gather_ldnt<mode>"
105  [(set (match_operand:SVE_FULL_SD 0 "register_operand" "=w, w")
106	(unspec:SVE_FULL_SD
107	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
108	   (match_operand:DI 2 "aarch64_reg_or_zero" "Z, r")
109	   (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w")
110	   (mem:BLK (scratch))]
111	  UNSPEC_LDNT1_GATHER))]
112  "TARGET_SVE2"
113  "@
114   ldnt1<Vesize>\t%0.<Vetype>, %1/z, [%3.<Vetype>]
115   ldnt1<Vesize>\t%0.<Vetype>, %1/z, [%3.<Vetype>, %2]"
116)
117
118;; Extending loads.
119(define_insn_and_rewrite "@aarch64_gather_ldnt_<ANY_EXTEND:optab><SVE_FULL_SDI:mode><SVE_PARTIAL_I:mode>"
120  [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, w")
121	(unspec:SVE_FULL_SDI
122	  [(match_operand:<SVE_FULL_SDI:VPRED> 4 "general_operand" "UplDnm, UplDnm")
123	   (ANY_EXTEND:SVE_FULL_SDI
124	     (unspec:SVE_PARTIAL_I
125	       [(match_operand:<SVE_FULL_SDI:VPRED> 1 "register_operand" "Upl, Upl")
126		(match_operand:DI 2 "aarch64_reg_or_zero" "Z, r")
127		(match_operand:<SVE_FULL_SDI:V_INT_EQUIV> 3 "register_operand" "w, w")
128		(mem:BLK (scratch))]
129	       UNSPEC_LDNT1_GATHER))]
130	  UNSPEC_PRED_X))]
131  "TARGET_SVE2
132   && (~<SVE_FULL_SDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
133  "@
134   ldnt1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_SDI:Vetype>, %1/z, [%3.<SVE_FULL_SDI:Vetype>]
135   ldnt1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_SDI:Vetype>, %1/z, [%3.<SVE_FULL_SDI:Vetype>, %2]"
136  "&& !CONSTANT_P (operands[4])"
137  {
138    operands[4] = CONSTM1_RTX (<SVE_FULL_SDI:VPRED>mode);
139  }
140)
141
142;; -------------------------------------------------------------------------
143;; ---- Non-temporal scatter stores
144;; -------------------------------------------------------------------------
145;; Includes scatter forms of:
146;; - STNT1B
147;; - STNT1D
148;; - STNT1H
149;; - STNT1W
150;; -------------------------------------------------------------------------
151
152;; Non-truncating stores.
153(define_insn "@aarch64_scatter_stnt<mode>"
154  [(set (mem:BLK (scratch))
155	(unspec:BLK
156	  [(match_operand:<VPRED> 0 "register_operand" "Upl, Upl")
157	   (match_operand:DI 1 "aarch64_reg_or_zero" "Z, r")
158	   (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w")
159	   (match_operand:SVE_FULL_SD 3 "register_operand" "w, w")]
160
161	  UNSPEC_STNT1_SCATTER))]
162  "TARGET_SVE"
163  "@
164   stnt1<Vesize>\t%3.<Vetype>, %0, [%2.<Vetype>]
165   stnt1<Vesize>\t%3.<Vetype>, %0, [%2.<Vetype>, %1]"
166)
167
168;; Truncating stores.
169(define_insn "@aarch64_scatter_stnt_<SVE_FULL_SDI:mode><SVE_PARTIAL_I:mode>"
170  [(set (mem:BLK (scratch))
171	(unspec:BLK
172	  [(match_operand:<SVE_FULL_SDI:VPRED> 0 "register_operand" "Upl, Upl")
173	   (match_operand:DI 1 "aarch64_reg_or_zero" "Z, r")
174	   (match_operand:<SVE_FULL_SDI:V_INT_EQUIV> 2 "register_operand" "w, w")
175	   (truncate:SVE_PARTIAL_I
176	     (match_operand:SVE_FULL_SDI 3 "register_operand" "w, w"))]
177	  UNSPEC_STNT1_SCATTER))]
178  "TARGET_SVE2
179   && (~<SVE_FULL_SDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
180  "@
181   stnt1<SVE_PARTIAL_I:Vesize>\t%3.<SVE_FULL_SDI:Vetype>, %0, [%2.<SVE_FULL_SDI:Vetype>]
182   stnt1<SVE_PARTIAL_I:Vesize>\t%3.<SVE_FULL_SDI:Vetype>, %0, [%2.<SVE_FULL_SDI:Vetype>, %1]"
183)
184
185;; =========================================================================
186;; == Uniform binary arithmnetic
187;; =========================================================================
188
189;; -------------------------------------------------------------------------
190;; ---- [INT] Multiplication
191;; -------------------------------------------------------------------------
192;; Includes the lane forms of:
193;; - MUL
194;; -------------------------------------------------------------------------
195
196(define_insn "@aarch64_mul_lane_<mode>"
197  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
198	(mult:SVE_FULL_HSDI
199	  (unspec:SVE_FULL_HSDI
200	    [(match_operand:SVE_FULL_HSDI 2 "register_operand" "<sve_lane_con>")
201	     (match_operand:SI 3 "const_int_operand")]
202	    UNSPEC_SVE_LANE_SELECT)
203	  (match_operand:SVE_FULL_HSDI 1 "register_operand" "w")))]
204  "TARGET_SVE2"
205  "mul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
206)
207
208;; -------------------------------------------------------------------------
209;; ---- [INT] Scaled high-part multiplication
210;; -------------------------------------------------------------------------
211;; The patterns in this section are synthetic.
212;; -------------------------------------------------------------------------
213
214;; Unpredicated integer multiply-high-with-(round-and-)scale.
215(define_expand "<su>mulh<r>s<mode>3"
216  [(set (match_operand:SVE_FULL_BHSI 0 "register_operand")
217	(unspec:SVE_FULL_BHSI
218	  [(match_dup 3)
219	   (unspec:SVE_FULL_BHSI
220	     [(match_operand:SVE_FULL_BHSI 1 "register_operand")
221	      (match_operand:SVE_FULL_BHSI 2 "register_operand")]
222	     MULHRS)]
223	  UNSPEC_PRED_X))]
224  "TARGET_SVE2"
225  {
226    operands[3] = aarch64_ptrue_reg (<VPRED>mode);
227
228    rtx prod_b = gen_reg_rtx (<VWIDE>mode);
229    rtx prod_t = gen_reg_rtx (<VWIDE>mode);
230    emit_insn (gen_aarch64_sve_<su>mullb<Vwide> (prod_b, operands[1],
231						 operands[2]));
232    emit_insn (gen_aarch64_sve_<su>mullt<Vwide> (prod_t, operands[1],
233						 operands[2]));
234
235    rtx shift = GEN_INT (GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1);
236    emit_insn (gen_aarch64_sve_<r>shrnb<Vwide> (operands[0], prod_b, shift));
237    emit_insn (gen_aarch64_sve_<r>shrnt<Vwide> (operands[0], operands[0],
238						prod_t, shift));
239
240    DONE;
241  }
242)
243
244;; -------------------------------------------------------------------------
245;; ---- [INT] General binary arithmetic that maps to unspecs
246;; -------------------------------------------------------------------------
247;; Includes:
248;; - SHADD
249;; - SHSUB
250;; - SHSUBR
251;; - SQRSHL
252;; - SQRSHLR
253;; - SRHADD
254;; - SRSHL
255;; - SRSHLR
256;; - SUQADD
257;; - UHADD
258;; - UHSUB
259;; - UHSUBR
260;; - UQRSHL
261;; - UQRSHLR
262;; - URHADD
263;; - URSHL
264;; - URSHLR
265;; - USQADD
266;; -------------------------------------------------------------------------
267
268;; Integer average (floor).
269(define_expand "<u>avg<mode>3_floor"
270  [(set (match_operand:SVE_FULL_I 0 "register_operand")
271	(unspec:SVE_FULL_I
272	  [(match_dup 3)
273	   (unspec:SVE_FULL_I
274	     [(match_operand:SVE_FULL_I 1 "register_operand")
275	      (match_operand:SVE_FULL_I 2 "register_operand")]
276	     HADD)]
277	  UNSPEC_PRED_X))]
278  "TARGET_SVE2"
279  {
280    operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
281  }
282)
283
284;; Integer average (rounding).
285(define_expand "<u>avg<mode>3_ceil"
286  [(set (match_operand:SVE_FULL_I 0 "register_operand")
287	(unspec:SVE_FULL_I
288	  [(match_dup 3)
289	   (unspec:SVE_FULL_I
290	     [(match_operand:SVE_FULL_I 1 "register_operand")
291	      (match_operand:SVE_FULL_I 2 "register_operand")]
292	     RHADD)]
293	  UNSPEC_PRED_X))]
294  "TARGET_SVE2"
295  {
296    operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
297  }
298)
299
300;; The immediate form of SQADD acts as an immediate form of SUQADD
301;; over its full range.  In contrast to the ss_plus pattern, we do
302;; not need to treat byte immediates specially.  E.g.:
303;;
304;;	SQADD	Z0.B, Z0.B, #128
305;;
306;; is equivalent to:
307;;
308;;	MOV	Z1.B, #128
309;;	SUQADD	Z0.B, P0/M, Z0.B, Z1.B
310;;
311;; even though it's not equivalent to:
312;;
313;;	MOV	Z1.B, #128
314;;	SQADD	Z0.B, P0/M, Z0.B, Z1.B	// Saturating subtraction of 128
315(define_insn "@aarch64_sve_suqadd<mode>_const"
316  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
317	(unspec:SVE_FULL_I
318	  [(match_operand:SVE_FULL_I 1 "register_operand" "0, w")
319	   (match_operand:SVE_FULL_I 2 "aarch64_sve_arith_immediate")]
320	  UNSPEC_SUQADD))]
321  "TARGET_SVE2"
322  "@
323   sqadd\t%0.<Vetype>, %0.<Vetype>, #%D2
324   movprfx\t%0, %1\;sqadd\t%0.<Vetype>, %0.<Vetype>, #%D2"
325  [(set_attr "movprfx" "*,yes")]
326)
327
328;; General predicated binary arithmetic.  All operations handled here
329;; are commutative or have a reversed form.
330(define_insn "@aarch64_pred_<sve_int_op><mode>"
331  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w")
332	(unspec:SVE_FULL_I
333	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
334	   (unspec:SVE_FULL_I
335	     [(match_operand:SVE_FULL_I 2 "register_operand" "0, w, w")
336	      (match_operand:SVE_FULL_I 3 "register_operand" "w, 0, w")]
337	     SVE2_COND_INT_BINARY_REV)]
338	  UNSPEC_PRED_X))]
339  "TARGET_SVE2"
340  "@
341   <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
342   <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
343   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
344  [(set_attr "movprfx" "*,*,yes")]
345)
346
347;; Predicated binary arithmetic with merging.
348(define_expand "@cond_<sve_int_op><mode>"
349  [(set (match_operand:SVE_FULL_I 0 "register_operand")
350	(unspec:SVE_FULL_I
351	  [(match_operand:<VPRED> 1 "register_operand")
352	   (unspec:SVE_FULL_I
353	     [(match_dup 5)
354	      (unspec:SVE_FULL_I
355		[(match_operand:SVE_FULL_I 2 "register_operand")
356		 (match_operand:SVE_FULL_I 3 "register_operand")]
357		SVE2_COND_INT_BINARY)]
358	     UNSPEC_PRED_X)
359	   (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
360	  UNSPEC_SEL))]
361  "TARGET_SVE2"
362  {
363    operands[5] = CONSTM1_RTX (<MODE>mode);
364  }
365)
366
367;; Predicated binary arithmetic, merging with the first input.
368(define_insn_and_rewrite "*cond_<sve_int_op><mode>_2"
369  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
370	(unspec:SVE_FULL_I
371	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
372	   (unspec:SVE_FULL_I
373	     [(match_operand 4)
374	      (unspec:SVE_FULL_I
375		[(match_operand:SVE_FULL_I 2 "register_operand" "0, w")
376		 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
377		SVE2_COND_INT_BINARY)]
378	     UNSPEC_PRED_X)
379	   (match_dup 2)]
380	  UNSPEC_SEL))]
381  "TARGET_SVE2"
382  "@
383   <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
384   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
385  "&& !CONSTANT_P (operands[4])"
386  {
387    operands[4] = CONSTM1_RTX (<VPRED>mode);
388  }
389  [(set_attr "movprfx" "*,yes")]
390)
391
392;; Predicated binary arithmetic, merging with the second input.
393(define_insn_and_rewrite "*cond_<sve_int_op><mode>_3"
394  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
395	(unspec:SVE_FULL_I
396	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
397	   (unspec:SVE_FULL_I
398	     [(match_operand 4)
399	      (unspec:SVE_FULL_I
400		[(match_operand:SVE_FULL_I 2 "register_operand" "w, w")
401		 (match_operand:SVE_FULL_I 3 "register_operand" "0, w")]
402		SVE2_COND_INT_BINARY_REV)]
403	     UNSPEC_PRED_X)
404	   (match_dup 3)]
405	  UNSPEC_SEL))]
406  "TARGET_SVE2"
407  "@
408   <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
409   movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
410  "&& !CONSTANT_P (operands[4])"
411  {
412    operands[4] = CONSTM1_RTX (<VPRED>mode);
413  }
414  [(set_attr "movprfx" "*,yes")]
415)
416
417;; Predicated binary operations, merging with an independent value.
418(define_insn_and_rewrite "*cond_<sve_int_op><mode>_any"
419  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, ?&w")
420	(unspec:SVE_FULL_I
421	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
422	   (unspec:SVE_FULL_I
423	     [(match_operand 5)
424	      (unspec:SVE_FULL_I
425		[(match_operand:SVE_FULL_I 2 "register_operand" "0, w, w, w, w")
426		 (match_operand:SVE_FULL_I 3 "register_operand" "w, 0, w, w, w")]
427		SVE2_COND_INT_BINARY_REV)]
428	     UNSPEC_PRED_X)
429	   (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
430	  UNSPEC_SEL))]
431  "TARGET_SVE2
432   && !rtx_equal_p (operands[2], operands[4])
433   && !rtx_equal_p (operands[3], operands[4])"
434  "@
435   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
436   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
437   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
438   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
439   #"
440  "&& 1"
441  {
442    if (reload_completed
443        && register_operand (operands[4], <MODE>mode)
444        && !rtx_equal_p (operands[0], operands[4]))
445      {
446	emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
447						 operands[4], operands[1]));
448	operands[4] = operands[2] = operands[0];
449      }
450    else if (!CONSTANT_P (operands[5]))
451      operands[5] = CONSTM1_RTX (<VPRED>mode);
452    else
453      FAIL;
454  }
455  [(set_attr "movprfx" "yes")]
456)
457
458;; Predicated binary operations with no reverse form, merging with zero.
459;; At present we don't generate these patterns via a cond_* optab,
460;; so there's no correctness requirement to handle merging with an
461;; independent value.
462(define_insn_and_rewrite "*cond_<sve_int_op><mode>_z"
463  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w")
464	(unspec:SVE_FULL_I
465	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
466	   (unspec:SVE_FULL_I
467	     [(match_operand 5)
468	      (unspec:SVE_FULL_I
469		[(match_operand:SVE_FULL_I 2 "register_operand" "0, w")
470		 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
471		SVE2_COND_INT_BINARY_NOREV)]
472	     UNSPEC_PRED_X)
473	   (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_zero")]
474	  UNSPEC_SEL))]
475  "TARGET_SVE2"
476  "@
477   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
478   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
479  "&& !CONSTANT_P (operands[5])"
480  {
481    operands[5] = CONSTM1_RTX (<VPRED>mode);
482  }
483  [(set_attr "movprfx" "yes")]
484)
485
486;; -------------------------------------------------------------------------
487;; ---- [INT] Saturating binary arithmetic
488;; -------------------------------------------------------------------------
489;; Includes:
490;; - SQDMULH
491;; - SQRDMULH
492;; -------------------------------------------------------------------------
493
494(define_insn "@aarch64_sve_<sve_int_op><mode>"
495  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
496	(unspec:SVE_FULL_I
497	  [(match_operand:SVE_FULL_I 1 "register_operand" "w")
498	   (match_operand:SVE_FULL_I 2 "register_operand" "w")]
499	  SVE2_INT_BINARY))]
500  "TARGET_SVE2"
501  "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
502)
503
504(define_insn "@aarch64_sve_<sve_int_op>_lane_<mode>"
505  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
506	(unspec:SVE_FULL_HSDI
507	  [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w")
508	   (unspec:SVE_FULL_HSDI
509	     [(match_operand:SVE_FULL_HSDI 2 "register_operand" "<sve_lane_con>")
510	      (match_operand:SI 3 "const_int_operand")]
511	     UNSPEC_SVE_LANE_SELECT)]
512	  SVE2_INT_BINARY_LANE))]
513  "TARGET_SVE2"
514  "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
515)
516
517;; -------------------------------------------------------------------------
518;; ---- [INT] Saturating left shifts
519;; -------------------------------------------------------------------------
520;; Includes:
521;; - SQSHL
522;; - SQSHLR
523;; - UQSHL
524;; - UQSHLR
525;; -------------------------------------------------------------------------
526
527;; Predicated left shifts.
528(define_insn "@aarch64_pred_<sve_int_op><mode>"
529  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, w, ?&w, ?&w")
530	(unspec:SVE_FULL_I
531	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
532	   (unspec:SVE_FULL_I
533	     [(match_operand:SVE_FULL_I 2 "register_operand" "0, 0, w, w, w")
534	      (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, 0, D<lr>, w")]
535	     SVE2_COND_INT_SHIFT)]
536	  UNSPEC_PRED_X))]
537  "TARGET_SVE2"
538  "@
539   <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
540   <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
541   <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
542   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
543   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
544  [(set_attr "movprfx" "*,*,*,yes,yes")]
545)
546
547;; Predicated left shifts with merging.
548(define_expand "@cond_<sve_int_op><mode>"
549  [(set (match_operand:SVE_FULL_I 0 "register_operand")
550	(unspec:SVE_FULL_I
551	  [(match_operand:<VPRED> 1 "register_operand")
552	   (unspec:SVE_FULL_I
553	     [(match_dup 5)
554	      (unspec:SVE_FULL_I
555		[(match_operand:SVE_FULL_I 2 "register_operand")
556		 (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand")]
557		SVE2_COND_INT_SHIFT)]
558	     UNSPEC_PRED_X)
559	   (match_operand:SVE_FULL_I 4 "register_operand")]
560	  UNSPEC_SEL))]
561  "TARGET_SVE2"
562  {
563    operands[5] = CONSTM1_RTX (<VPRED>mode);
564  }
565)
566
567;; Predicated left shifts, merging with the first input.
568(define_insn_and_rewrite "*cond_<sve_int_op><mode>_2"
569  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w, ?&w")
570	(unspec:SVE_FULL_I
571	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
572	   (unspec:SVE_FULL_I
573	     [(match_operand 4)
574	      (unspec:SVE_FULL_I
575		[(match_operand:SVE_FULL_I 2 "register_operand" "0, 0, w, w")
576		 (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, D<lr>, w")]
577		SVE2_COND_INT_SHIFT)]
578	     UNSPEC_PRED_X)
579	   (match_dup 2)]
580	  UNSPEC_SEL))]
581  "TARGET_SVE2"
582  "@
583   <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
584   <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
585   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
586   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
587  "&& !CONSTANT_P (operands[4])"
588  {
589    operands[4] = CONSTM1_RTX (<VPRED>mode);
590  }
591  [(set_attr "movprfx" "*,*,yes,yes")]
592)
593
594;; Predicated left shifts, merging with the second input.
595(define_insn_and_rewrite "*cond_<sve_int_op><mode>_3"
596  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
597	(unspec:SVE_FULL_I
598	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
599	   (unspec:SVE_FULL_I
600	     [(match_operand 4)
601	      (unspec:SVE_FULL_I
602		[(match_operand:SVE_FULL_I 2 "register_operand" "w, w")
603		 (match_operand:SVE_FULL_I 3 "register_operand" "0, w")]
604		SVE2_COND_INT_SHIFT)]
605	     UNSPEC_PRED_X)
606	   (match_dup 3)]
607	  UNSPEC_SEL))]
608  "TARGET_SVE2"
609  "@
610   <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
611   movprfx\t%0, %3\;<sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
612  "&& !CONSTANT_P (operands[4])"
613  {
614    operands[4] = CONSTM1_RTX (<VPRED>mode);
615  }
616  [(set_attr "movprfx" "*,yes")]
617)
618
619;; Predicated left shifts, merging with an independent value.
620(define_insn_and_rewrite "*cond_<sve_int_op><mode>_any"
621  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, &w, &w, &w, ?&w, ?&w")
622	(unspec:SVE_FULL_I
623	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl, Upl, Upl, Upl")
624	   (unspec:SVE_FULL_I
625	     [(match_operand 5)
626	      (unspec:SVE_FULL_I
627		[(match_operand:SVE_FULL_I 2 "register_operand" "0, 0, w, w, w, w, w, w, w")
628		 (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, 0, D<lr>, w, D<lr>, w, D<lr>, w")]
629		SVE2_COND_INT_SHIFT)]
630	     UNSPEC_PRED_X)
631	   (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, Dz, 0, 0, w, w")]
632	  UNSPEC_SEL))]
633  "TARGET_SVE2
634   && !rtx_equal_p (operands[2], operands[4])
635   && (CONSTANT_P (operands[4]) || !rtx_equal_p (operands[3], operands[4]))"
636  "@
637   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
638   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
639   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
640   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
641   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
642   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
643   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
644  #
645  #"
646  "&& 1"
647  {
648    if (reload_completed
649        && register_operand (operands[4], <MODE>mode)
650        && !rtx_equal_p (operands[0], operands[4]))
651      {
652	emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
653						 operands[4], operands[1]));
654	operands[4] = operands[2] = operands[0];
655      }
656    else if (!CONSTANT_P (operands[5]))
657      operands[5] = CONSTM1_RTX (<VPRED>mode);
658    else
659      FAIL;
660  }
661  [(set_attr "movprfx" "yes")]
662)
663
664;; =========================================================================
665;; == Uniform ternary arithmnetic
666;; =========================================================================
667
668;; -------------------------------------------------------------------------
669;; ---- [INT] General ternary arithmetic that maps to unspecs
670;; -------------------------------------------------------------------------
671;; Includes:
672;; - ADCLB
673;; - ADCLT
674;; - EORBT
675;; - EORTB
676;; - SBCLB
677;; - SBCLT
678;; - SQRDMLAH
679;; - SQRDMLSH
680;; -------------------------------------------------------------------------
681
682(define_insn "@aarch64_sve_<sve_int_op><mode>"
683  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
684	(unspec:SVE_FULL_I
685	  [(match_operand:SVE_FULL_I 2 "register_operand" "w, w")
686	   (match_operand:SVE_FULL_I 3 "register_operand" "w, w")
687	   (match_operand:SVE_FULL_I 1 "register_operand" "0, w")]
688	  SVE2_INT_TERNARY))]
689  "TARGET_SVE2"
690  "@
691   <sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
692   movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
693  [(set_attr "movprfx" "*,yes")]
694)
695
696(define_insn "@aarch64_sve_<sve_int_op>_lane_<mode>"
697  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
698	(unspec:SVE_FULL_HSDI
699	  [(match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w")
700	   (unspec:SVE_FULL_HSDI
701	     [(match_operand:SVE_FULL_HSDI 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
702	      (match_operand:SI 4 "const_int_operand")]
703	     UNSPEC_SVE_LANE_SELECT)
704	   (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")]
705	  SVE2_INT_TERNARY_LANE))]
706  "TARGET_SVE2"
707  "@
708   <sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]
709   movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]"
710  [(set_attr "movprfx" "*,yes")]
711)
712
713;; -------------------------------------------------------------------------
714;; ---- [INT] Multiply-and-accumulate operations
715;; -------------------------------------------------------------------------
716;; Includes the lane forms of:
717;; - MLA
718;; - MLS
719;; -------------------------------------------------------------------------
720
721(define_insn "@aarch64_sve_add_mul_lane_<mode>"
722  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
723	(plus:SVE_FULL_HSDI
724	  (mult:SVE_FULL_HSDI
725	    (unspec:SVE_FULL_HSDI
726	      [(match_operand:SVE_FULL_HSDI 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
727	       (match_operand:SI 4 "const_int_operand")]
728	      UNSPEC_SVE_LANE_SELECT)
729	    (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w"))
730	  (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")))]
731  "TARGET_SVE2"
732  "@
733   mla\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]
734   movprfx\t%0, %1\;mla\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]"
735  [(set_attr "movprfx" "*,yes")]
736)
737
738(define_insn "@aarch64_sve_sub_mul_lane_<mode>"
739  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
740	(minus:SVE_FULL_HSDI
741	  (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")
742	  (mult:SVE_FULL_HSDI
743	    (unspec:SVE_FULL_HSDI
744	      [(match_operand:SVE_FULL_HSDI 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
745	       (match_operand:SI 4 "const_int_operand")]
746	      UNSPEC_SVE_LANE_SELECT)
747	    (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w"))))]
748  "TARGET_SVE2"
749  "@
750   mls\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]
751   movprfx\t%0, %1\;mls\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]"
752  [(set_attr "movprfx" "*,yes")]
753)
754
755;; -------------------------------------------------------------------------
756;; ---- [INT] Binary logic operations with rotation
757;; -------------------------------------------------------------------------
758;; Includes:
759;; - XAR
760;; -------------------------------------------------------------------------
761
762(define_insn "@aarch64_sve2_xar<mode>"
763  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
764	(rotatert:SVE_FULL_I
765	  (xor:SVE_FULL_I
766	    (match_operand:SVE_FULL_I 1 "register_operand" "%0, w")
767	    (match_operand:SVE_FULL_I 2 "register_operand" "w, w"))
768	  (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm")))]
769  "TARGET_SVE2"
770  "@
771  xar\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3
772  movprfx\t%0, %1\;xar\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3"
773  [(set_attr "movprfx" "*,yes")]
774)
775
776;; -------------------------------------------------------------------------
777;; ---- [INT] Ternary logic operations
778;; -------------------------------------------------------------------------
779;; Includes:
780;; - BCAX
781;; - BSL
782;; - BSL1N
783;; - BSL2N
784;; - EOR3
785;; - NBSL
786;; -------------------------------------------------------------------------
787
788;; Unpredicated exclusive OR of AND.
789(define_expand "@aarch64_sve2_bcax<mode>"
790  [(set (match_operand:SVE_FULL_I 0 "register_operand")
791	(xor:SVE_FULL_I
792	  (and:SVE_FULL_I
793	    (unspec:SVE_FULL_I
794	      [(match_dup 4)
795	       (not:SVE_FULL_I
796		 (match_operand:SVE_FULL_I 3 "register_operand"))]
797	      UNSPEC_PRED_X)
798	    (match_operand:SVE_FULL_I 2 "register_operand"))
799	  (match_operand:SVE_FULL_I 1 "register_operand")))]
800  "TARGET_SVE2"
801  {
802    operands[4] = CONSTM1_RTX (<VPRED>mode);
803  }
804)
805
806(define_insn_and_rewrite "*aarch64_sve2_bcax<mode>"
807  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
808	(xor:SVE_FULL_I
809	  (and:SVE_FULL_I
810	    (unspec:SVE_FULL_I
811	      [(match_operand 4)
812	       (not:SVE_FULL_I
813		 (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))]
814	      UNSPEC_PRED_X)
815	    (match_operand:SVE_FULL_I 2 "register_operand" "w, w"))
816	  (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))]
817  "TARGET_SVE2"
818  "@
819  bcax\t%0.d, %0.d, %2.d, %3.d
820  movprfx\t%0, %1\;bcax\t%0.d, %0.d, %2.d, %3.d"
821  "&& !CONSTANT_P (operands[4])"
822  {
823    operands[4] = CONSTM1_RTX (<VPRED>mode);
824  }
825  [(set_attr "movprfx" "*,yes")]
826)
827
828;; Unpredicated 3-way exclusive OR.
829(define_insn "@aarch64_sve2_eor3<mode>"
830  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, w, ?&w")
831	(xor:SVE_FULL_I
832	  (xor:SVE_FULL_I
833	    (match_operand:SVE_FULL_I 1 "register_operand" "0, w, w, w")
834	    (match_operand:SVE_FULL_I 2 "register_operand" "w, 0, w, w"))
835	  (match_operand:SVE_FULL_I 3 "register_operand" "w, w, 0, w")))]
836  "TARGET_SVE2"
837  "@
838  eor3\t%0.d, %0.d, %2.d, %3.d
839  eor3\t%0.d, %0.d, %1.d, %3.d
840  eor3\t%0.d, %0.d, %1.d, %2.d
841  movprfx\t%0, %1\;eor3\t%0.d, %0.d, %2.d, %3.d"
842  [(set_attr "movprfx" "*,*,*,yes")]
843)
844
845;; Use NBSL for vector NOR.
846(define_insn_and_rewrite "*aarch64_sve2_nor<mode>"
847  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
848	(unspec:SVE_FULL_I
849	  [(match_operand 3)
850	   (and:SVE_FULL_I
851	     (not:SVE_FULL_I
852	       (match_operand:SVE_FULL_I 1 "register_operand" "%0, w"))
853	     (not:SVE_FULL_I
854	       (match_operand:SVE_FULL_I 2 "register_operand" "w, w")))]
855	  UNSPEC_PRED_X))]
856  "TARGET_SVE2"
857  "@
858  nbsl\t%0.d, %0.d, %2.d, %0.d
859  movprfx\t%0, %1\;nbsl\t%0.d, %0.d, %2.d, %0.d"
860  "&& !CONSTANT_P (operands[3])"
861  {
862    operands[3] = CONSTM1_RTX (<VPRED>mode);
863  }
864  [(set_attr "movprfx" "*,yes")]
865)
866
867;; Use NBSL for vector NAND.
868(define_insn_and_rewrite "*aarch64_sve2_nand<mode>"
869  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
870	(unspec:SVE_FULL_I
871	  [(match_operand 3)
872	   (ior:SVE_FULL_I
873	     (not:SVE_FULL_I
874	       (match_operand:SVE_FULL_I 1 "register_operand" "%0, w"))
875	     (not:SVE_FULL_I
876	       (match_operand:SVE_FULL_I 2 "register_operand" "w, w")))]
877	  UNSPEC_PRED_X))]
878  "TARGET_SVE2"
879  "@
880  nbsl\t%0.d, %0.d, %2.d, %2.d
881  movprfx\t%0, %1\;nbsl\t%0.d, %0.d, %2.d, %2.d"
882  "&& !CONSTANT_P (operands[3])"
883  {
884    operands[3] = CONSTM1_RTX (<VPRED>mode);
885  }
886  [(set_attr "movprfx" "*,yes")]
887)
888
889;; Unpredicated bitwise select.
890;; (op3 ? bsl_mov : bsl_dup) == (((bsl_mov ^ bsl_dup) & op3) ^ bsl_dup)
891(define_expand "@aarch64_sve2_bsl<mode>"
892  [(set (match_operand:SVE_FULL_I 0 "register_operand")
893	(xor:SVE_FULL_I
894	  (and:SVE_FULL_I
895	    (xor:SVE_FULL_I
896	      (match_operand:SVE_FULL_I 1 "register_operand")
897	      (match_operand:SVE_FULL_I 2 "register_operand"))
898	    (match_operand:SVE_FULL_I 3 "register_operand"))
899	  (match_dup 2)))]
900  "TARGET_SVE2"
901)
902
903(define_insn "*aarch64_sve2_bsl<mode>"
904  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
905	(xor:SVE_FULL_I
906	  (and:SVE_FULL_I
907	    (xor:SVE_FULL_I
908	      (match_operand:SVE_FULL_I 1 "register_operand" "<bsl_1st>, w")
909	      (match_operand:SVE_FULL_I 2 "register_operand" "<bsl_2nd>, w"))
910	    (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
911	  (match_dup BSL_DUP)))]
912  "TARGET_SVE2"
913  "@
914  bsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d
915  movprfx\t%0, %<bsl_mov>\;bsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d"
916  [(set_attr "movprfx" "*,yes")]
917)
918
919;; Unpredicated bitwise inverted select.
920;; (~(op3 ? bsl_mov : bsl_dup)) == (~(((bsl_mov ^ bsl_dup) & op3) ^ bsl_dup))
921(define_expand "@aarch64_sve2_nbsl<mode>"
922  [(set (match_operand:SVE_FULL_I 0 "register_operand")
923	(unspec:SVE_FULL_I
924	  [(match_dup 4)
925	   (not:SVE_FULL_I
926	     (xor:SVE_FULL_I
927	       (and:SVE_FULL_I
928		 (xor:SVE_FULL_I
929		   (match_operand:SVE_FULL_I 1 "register_operand")
930		   (match_operand:SVE_FULL_I 2 "register_operand"))
931		 (match_operand:SVE_FULL_I 3 "register_operand"))
932	       (match_dup 2)))]
933	  UNSPEC_PRED_X))]
934  "TARGET_SVE2"
935  {
936    operands[4] = CONSTM1_RTX (<VPRED>mode);
937  }
938)
939
940(define_insn_and_rewrite "*aarch64_sve2_nbsl<mode>"
941  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
942	(unspec:SVE_FULL_I
943	  [(match_operand 4)
944	   (not:SVE_FULL_I
945	     (xor:SVE_FULL_I
946	       (and:SVE_FULL_I
947		 (xor:SVE_FULL_I
948		   (match_operand:SVE_FULL_I 1 "register_operand" "<bsl_1st>, w")
949		   (match_operand:SVE_FULL_I 2 "register_operand" "<bsl_2nd>, w"))
950		 (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
951	       (match_dup BSL_DUP)))]
952	  UNSPEC_PRED_X))]
953  "TARGET_SVE2"
954  "@
955  nbsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d
956  movprfx\t%0, %<bsl_mov>\;nbsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d"
957  "&& !CONSTANT_P (operands[4])"
958  {
959    operands[4] = CONSTM1_RTX (<VPRED>mode);
960  }
961  [(set_attr "movprfx" "*,yes")]
962)
963
964;; Unpredicated bitwise select with inverted first operand.
965;; (op3 ? ~bsl_mov : bsl_dup) == ((~(bsl_mov ^ bsl_dup) & op3) ^ bsl_dup)
966(define_expand "@aarch64_sve2_bsl1n<mode>"
967  [(set (match_operand:SVE_FULL_I 0 "register_operand")
968	(xor:SVE_FULL_I
969	  (and:SVE_FULL_I
970	    (unspec:SVE_FULL_I
971	      [(match_dup 4)
972	       (not:SVE_FULL_I
973		 (xor:SVE_FULL_I
974		   (match_operand:SVE_FULL_I 1 "register_operand")
975		   (match_operand:SVE_FULL_I 2 "register_operand")))]
976	      UNSPEC_PRED_X)
977	    (match_operand:SVE_FULL_I 3 "register_operand"))
978	  (match_dup 2)))]
979  "TARGET_SVE2"
980  {
981    operands[4] = CONSTM1_RTX (<VPRED>mode);
982  }
983)
984
985(define_insn_and_rewrite "*aarch64_sve2_bsl1n<mode>"
986  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
987	(xor:SVE_FULL_I
988	  (and:SVE_FULL_I
989	    (unspec:SVE_FULL_I
990	      [(match_operand 4)
991	       (not:SVE_FULL_I
992		 (xor:SVE_FULL_I
993		   (match_operand:SVE_FULL_I 1 "register_operand" "<bsl_1st>, w")
994		   (match_operand:SVE_FULL_I 2 "register_operand" "<bsl_2nd>, w")))]
995	      UNSPEC_PRED_X)
996	    (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
997	  (match_dup BSL_DUP)))]
998  "TARGET_SVE2"
999  "@
1000  bsl1n\t%0.d, %0.d, %<bsl_dup>.d, %3.d
1001  movprfx\t%0, %<bsl_mov>\;bsl1n\t%0.d, %0.d, %<bsl_dup>.d, %3.d"
1002  "&& !CONSTANT_P (operands[4])"
1003  {
1004    operands[4] = CONSTM1_RTX (<VPRED>mode);
1005  }
1006  [(set_attr "movprfx" "*,yes")]
1007)
1008
1009;; Unpredicated bitwise select with inverted second operand.
1010;; (bsl_dup ? bsl_mov : ~op3) == ((bsl_dup & bsl_mov) | (~op3 & ~bsl_dup))
1011(define_expand "@aarch64_sve2_bsl2n<mode>"
1012  [(set (match_operand:SVE_FULL_I 0 "register_operand")
1013	(ior:SVE_FULL_I
1014	  (and:SVE_FULL_I
1015	    (match_operand:SVE_FULL_I 1 "register_operand")
1016	    (match_operand:SVE_FULL_I 3 "register_operand"))
1017	  (unspec:SVE_FULL_I
1018	    [(match_dup 4)
1019	     (and:SVE_FULL_I
1020	       (not:SVE_FULL_I
1021		 (match_operand:SVE_FULL_I 2 "register_operand"))
1022	       (not:SVE_FULL_I
1023		 (match_dup 3)))]
1024	    UNSPEC_PRED_X)))]
1025  "TARGET_SVE2"
1026  {
1027    operands[4] = CONSTM1_RTX (<VPRED>mode);
1028  }
1029)
1030
1031(define_insn_and_rewrite "*aarch64_sve2_bsl2n<mode>"
1032  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
1033	(ior:SVE_FULL_I
1034	  (and:SVE_FULL_I
1035	    (match_operand:SVE_FULL_I 1 "register_operand" "<bsl_1st>, w")
1036	    (match_operand:SVE_FULL_I 2 "register_operand" "<bsl_2nd>, w"))
1037	  (unspec:SVE_FULL_I
1038	    [(match_operand 4)
1039	     (and:SVE_FULL_I
1040	       (not:SVE_FULL_I
1041		 (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
1042	       (not:SVE_FULL_I
1043		 (match_dup BSL_DUP)))]
1044	    UNSPEC_PRED_X)))]
1045  "TARGET_SVE2"
1046  "@
1047  bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d
1048  movprfx\t%0, %<bsl_mov>\;bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d"
1049  "&& !CONSTANT_P (operands[4])"
1050  {
1051    operands[4] = CONSTM1_RTX (<VPRED>mode);
1052  }
1053  [(set_attr "movprfx" "*,yes")]
1054)
1055
1056;; Unpredicated bitwise select with inverted second operand, alternative form.
1057;; (bsl_dup ? bsl_mov : ~op3) == ((bsl_dup & bsl_mov) | (~bsl_dup & ~op3))
1058(define_insn_and_rewrite "*aarch64_sve2_bsl2n<mode>"
1059  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
1060	(ior:SVE_FULL_I
1061	  (and:SVE_FULL_I
1062	    (match_operand:SVE_FULL_I 1 "register_operand" "<bsl_1st>, w")
1063	    (match_operand:SVE_FULL_I 2 "register_operand" "<bsl_2nd>, w"))
1064	  (unspec:SVE_FULL_I
1065	    [(match_operand 4)
1066	     (and:SVE_FULL_I
1067	       (not:SVE_FULL_I
1068		 (match_dup BSL_DUP))
1069	       (not:SVE_FULL_I
1070		 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")))]
1071	    UNSPEC_PRED_X)))]
1072  "TARGET_SVE2"
1073  "@
1074  bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d
1075  movprfx\t%0, %<bsl_mov>\;bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d"
1076  "&& !CONSTANT_P (operands[4])"
1077  {
1078    operands[4] = CONSTM1_RTX (<VPRED>mode);
1079  }
1080  [(set_attr "movprfx" "*,yes")]
1081)
1082
1083;; -------------------------------------------------------------------------
1084;; ---- [INT] Shift-and-accumulate operations
1085;; -------------------------------------------------------------------------
1086;; Includes:
1087;; - SRSRA
1088;; - SSRA
1089;; - URSRA
1090;; - USRA
1091;; -------------------------------------------------------------------------
1092
1093;; Provide the natural unpredicated interface for SSRA and USRA.
1094(define_expand "@aarch64_sve_add_<sve_int_op><mode>"
1095  [(set (match_operand:SVE_FULL_I 0 "register_operand")
1096	(plus:SVE_FULL_I
1097	  (unspec:SVE_FULL_I
1098	    [(match_dup 4)
1099	     (SHIFTRT:SVE_FULL_I
1100	       (match_operand:SVE_FULL_I 2 "register_operand")
1101	       (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))]
1102	    UNSPEC_PRED_X)
1103	 (match_operand:SVE_FULL_I 1 "register_operand")))]
1104  "TARGET_SVE2"
1105  {
1106    operands[4] = CONSTM1_RTX (<VPRED>mode);
1107  }
1108)
1109
1110;; Pattern-match SSRA and USRA as a predicated operation whose predicate
1111;; isn't needed.
1112(define_insn_and_rewrite "*aarch64_sve2_sra<mode>"
1113  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
1114	(plus:SVE_FULL_I
1115	  (unspec:SVE_FULL_I
1116	    [(match_operand 4)
1117	     (SHIFTRT:SVE_FULL_I
1118	       (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
1119	       (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))]
1120	    UNSPEC_PRED_X)
1121	 (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))]
1122  "TARGET_SVE2"
1123  "@
1124   <sra_op>sra\t%0.<Vetype>, %2.<Vetype>, #%3
1125   movprfx\t%0, %1\;<sra_op>sra\t%0.<Vetype>, %2.<Vetype>, #%3"
1126  "&& !CONSTANT_P (operands[4])"
1127  {
1128    operands[4] = CONSTM1_RTX (<VPRED>mode);
1129  }
1130  [(set_attr "movprfx" "*,yes")]
1131)
1132
1133;; SRSRA and URSRA.
1134(define_insn "@aarch64_sve_add_<sve_int_op><mode>"
1135  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
1136	(plus:SVE_FULL_I
1137	  (unspec:SVE_FULL_I
1138	    [(match_operand:SVE_FULL_I 2 "register_operand" "w, w")
1139	     (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm")]
1140	    VRSHR_N)
1141	 (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))]
1142  "TARGET_SVE2"
1143  "@
1144   <sur>sra\t%0.<Vetype>, %2.<Vetype>, #%3
1145   movprfx\t%0, %1\;<sur>sra\t%0.<Vetype>, %2.<Vetype>, #%3"
1146  [(set_attr "movprfx" "*,yes")]
1147)
1148
1149;; -------------------------------------------------------------------------
1150;; ---- [INT] Shift-and-insert operations
1151;; -------------------------------------------------------------------------
1152;; Includes:
1153;; - SLI
1154;; - SRI
1155;; -------------------------------------------------------------------------
1156
1157;; These instructions do not take MOVPRFX.
1158(define_insn "@aarch64_sve_<sve_int_op><mode>"
1159  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
1160	(unspec:SVE_FULL_I
1161	  [(match_operand:SVE_FULL_I 1 "register_operand" "0")
1162	   (match_operand:SVE_FULL_I 2 "register_operand" "w")
1163	   (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm")]
1164	  SVE2_INT_SHIFT_INSERT))]
1165  "TARGET_SVE2"
1166  "<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, #%3"
1167)
1168
1169;; -------------------------------------------------------------------------
1170;; ---- [INT] Sum of absolute differences
1171;; -------------------------------------------------------------------------
1172;; Includes:
1173;; - SABA
1174;; - UABA
1175;; -------------------------------------------------------------------------
1176
1177;; Provide the natural unpredicated interface for SABA and UABA.
1178(define_expand "@aarch64_sve2_<su>aba<mode>"
1179  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
1180	(plus:SVE_FULL_I
1181	  (minus:SVE_FULL_I
1182	    (unspec:SVE_FULL_I
1183	      [(match_dup 4)
1184	       (USMAX:SVE_FULL_I
1185		 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
1186		 (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))]
1187	      UNSPEC_PRED_X)
1188	    (unspec:SVE_FULL_I
1189	      [(match_dup 4)
1190	       (<max_opp>:SVE_FULL_I
1191		 (match_dup 2)
1192		 (match_dup 3))]
1193	      UNSPEC_PRED_X))
1194	  (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))]
1195  "TARGET_SVE2"
1196  {
1197    operands[4] = CONSTM1_RTX (<VPRED>mode);
1198  }
1199)
1200
1201;; Pattern-match SABA and UABA as an absolute-difference-and-accumulate
1202;; operation whose predicates aren't needed.
1203(define_insn "*aarch64_sve2_<su>aba<mode>"
1204  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
1205	(plus:SVE_FULL_I
1206	  (minus:SVE_FULL_I
1207	    (unspec:SVE_FULL_I
1208	      [(match_operand 4)
1209	       (USMAX:SVE_FULL_I
1210		 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
1211		 (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))]
1212	      UNSPEC_PRED_X)
1213	    (unspec:SVE_FULL_I
1214	      [(match_operand 5)
1215	       (<max_opp>:SVE_FULL_I
1216		 (match_dup 2)
1217		 (match_dup 3))]
1218	      UNSPEC_PRED_X))
1219	  (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))]
1220  "TARGET_SVE2"
1221  "@
1222   <su>aba\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
1223   movprfx\t%0, %1\;<su>aba\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
1224  [(set_attr "movprfx" "*,yes")]
1225)
1226
1227;; =========================================================================
1228;; == Extending arithmetic
1229;; =========================================================================
1230
1231;; -------------------------------------------------------------------------
1232;; ---- [INT] Wide binary arithmetic
1233;; -------------------------------------------------------------------------
1234;; Includes:
1235;; - SADDWB
1236;; - SADDWT
1237;; - SSUBWB
1238;; - SSUBWT
1239;; - UADDWB
1240;; - UADDWT
1241;; - USUBWB
1242;; - USUBWT
1243;; -------------------------------------------------------------------------
1244
1245(define_insn "@aarch64_sve_<sve_int_op><mode>"
1246  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
1247	(unspec:SVE_FULL_HSDI
1248	  [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w")
1249	   (match_operand:<VNARROW> 2 "register_operand" "w")]
1250	  SVE2_INT_BINARY_WIDE))]
1251  "TARGET_SVE2"
1252  "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Ventype>"
1253)
1254
1255;; -------------------------------------------------------------------------
1256;; ---- [INT] Long binary arithmetic
1257;; -------------------------------------------------------------------------
1258;; Includes:
1259;; - SABDLB
1260;; - SABDLT
1261;; - SADDLB
1262;; - SADDLBT
1263;; - SADDLT
1264;; - SMULLB
1265;; - SMULLT
1266;; - SQDMULLB
1267;; - SQDMULLT
1268;; - SSUBLB
1269;; - SSUBLBT
1270;; - SSUBLT
1271;; - SSUBLTB
1272;; - UABDLB
1273;; - UABDLT
1274;; - UADDLB
1275;; - UADDLT
1276;; - UMULLB
1277;; - UMULLT
1278;; - USUBLB
1279;; - USUBLT
1280;; -------------------------------------------------------------------------
1281
1282(define_insn "@aarch64_sve_<sve_int_op><mode>"
1283  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
1284	(unspec:SVE_FULL_HSDI
1285	  [(match_operand:<VNARROW> 1 "register_operand" "w")
1286	   (match_operand:<VNARROW> 2 "register_operand" "w")]
1287	  SVE2_INT_BINARY_LONG))]
1288  "TARGET_SVE2"
1289  "<sve_int_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>"
1290)
1291
1292(define_insn "@aarch64_sve_<sve_int_op>_lane_<mode>"
1293  [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w")
1294	(unspec:SVE_FULL_SDI
1295	  [(match_operand:<VNARROW> 1 "register_operand" "w")
1296	   (unspec:<VNARROW>
1297	     [(match_operand:<VNARROW> 2 "register_operand" "<sve_lane_con>")
1298	      (match_operand:SI 3 "const_int_operand")]
1299	     UNSPEC_SVE_LANE_SELECT)]
1300	  SVE2_INT_BINARY_LONG_LANE))]
1301  "TARGET_SVE2"
1302  "<sve_int_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>[%3]"
1303)
1304
1305;; -------------------------------------------------------------------------
1306;; ---- [INT] Long left shifts
1307;; -------------------------------------------------------------------------
1308;; Includes:
1309;; - SSHLLB
1310;; - SSHLLT
1311;; - USHLLB
1312;; - USHLLT
1313;; -------------------------------------------------------------------------
1314
1315;; The immediate range is enforced before generating the instruction.
1316(define_insn "@aarch64_sve_<sve_int_op><mode>"
1317  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
1318	(unspec:SVE_FULL_HSDI
1319	  [(match_operand:<VNARROW> 1 "register_operand" "w")
1320	   (match_operand:DI 2 "const_int_operand")]
1321	  SVE2_INT_SHIFT_IMM_LONG))]
1322  "TARGET_SVE2"
1323  "<sve_int_op>\t%0.<Vetype>, %1.<Ventype>, #%2"
1324)
1325
1326;; -------------------------------------------------------------------------
1327;; ---- [INT] Long binary arithmetic with accumulation
1328;; -------------------------------------------------------------------------
1329;; Includes:
1330;; - SABALB
1331;; - SABALT
1332;; - SMLALB
1333;; - SMLALT
1334;; - SMLSLB
1335;; - SMLSLT
1336;; - SQDMLALB
1337;; - SQDMLALBT
1338;; - SQDMLALT
1339;; - SQDMLSLB
1340;; - SQDMLSLBT
1341;; - SQDMLSLT
1342;; - UABALB
1343;; - UABALT
1344;; - UMLALB
1345;; - UMLALT
1346;; - UMLSLB
1347;; - UMLSLT
1348;; -------------------------------------------------------------------------
1349
1350;; Non-saturating MLA operations.
1351(define_insn "@aarch64_sve_add_<sve_int_op><mode>"
1352  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
1353	(plus:SVE_FULL_HSDI
1354	  (unspec:SVE_FULL_HSDI
1355	    [(match_operand:<VNARROW> 2 "register_operand" "w, w")
1356	     (match_operand:<VNARROW> 3 "register_operand" "w, w")]
1357	    SVE2_INT_ADD_BINARY_LONG)
1358	  (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")))]
1359  "TARGET_SVE2"
1360  "@
1361   <sve_int_add_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>
1362   movprfx\t%0, %1\;<sve_int_add_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>"
1363  [(set_attr "movprfx" "*,yes")]
1364)
1365
1366;; Non-saturating MLA operations with lane select.
1367(define_insn "@aarch64_sve_add_<sve_int_op>_lane_<mode>"
1368  [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
1369	(plus:SVE_FULL_SDI
1370	  (unspec:SVE_FULL_SDI
1371	    [(match_operand:<VNARROW> 2 "register_operand" "w, w")
1372	     (unspec:<VNARROW>
1373	       [(match_operand:<VNARROW> 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
1374		(match_operand:SI 4 "const_int_operand")]
1375	       UNSPEC_SVE_LANE_SELECT)]
1376	    SVE2_INT_ADD_BINARY_LONG_LANE)
1377	  (match_operand:SVE_FULL_SDI 1 "register_operand" "0, w")))]
1378  "TARGET_SVE2"
1379  "@
1380   <sve_int_add_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]
1381   movprfx\t%0, %1\;<sve_int_add_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]"
1382  [(set_attr "movprfx" "*,yes")]
1383)
1384
1385;; Saturating MLA operations.
1386(define_insn "@aarch64_sve_qadd_<sve_int_op><mode>"
1387  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
1388	(ss_plus:SVE_FULL_HSDI
1389	  (unspec:SVE_FULL_HSDI
1390	    [(match_operand:<VNARROW> 2 "register_operand" "w, w")
1391	     (match_operand:<VNARROW> 3 "register_operand" "w, w")]
1392	    SVE2_INT_QADD_BINARY_LONG)
1393	  (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")))]
1394  "TARGET_SVE2"
1395  "@
1396   <sve_int_qadd_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>
1397   movprfx\t%0, %1\;<sve_int_qadd_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>"
1398  [(set_attr "movprfx" "*,yes")]
1399)
1400
1401;; Saturating MLA operations with lane select.
1402(define_insn "@aarch64_sve_qadd_<sve_int_op>_lane_<mode>"
1403  [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
1404	(ss_plus:SVE_FULL_SDI
1405	  (unspec:SVE_FULL_SDI
1406	    [(match_operand:<VNARROW> 2 "register_operand" "w, w")
1407	     (unspec:<VNARROW>
1408	       [(match_operand:<VNARROW> 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
1409		(match_operand:SI 4 "const_int_operand")]
1410	       UNSPEC_SVE_LANE_SELECT)]
1411	    SVE2_INT_QADD_BINARY_LONG_LANE)
1412	  (match_operand:SVE_FULL_SDI 1 "register_operand" "0, w")))]
1413  "TARGET_SVE2"
1414  "@
1415   <sve_int_qadd_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]
1416   movprfx\t%0, %1\;<sve_int_qadd_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]"
1417  [(set_attr "movprfx" "*,yes")]
1418)
1419
1420;; Non-saturating MLS operations.
1421(define_insn "@aarch64_sve_sub_<sve_int_op><mode>"
1422  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
1423	(minus:SVE_FULL_HSDI
1424	  (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")
1425	  (unspec:SVE_FULL_HSDI
1426	    [(match_operand:<VNARROW> 2 "register_operand" "w, w")
1427	     (match_operand:<VNARROW> 3 "register_operand" "w, w")]
1428	    SVE2_INT_SUB_BINARY_LONG)))]
1429  "TARGET_SVE2"
1430  "@
1431   <sve_int_sub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>
1432   movprfx\t%0, %1\;<sve_int_sub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>"
1433  [(set_attr "movprfx" "*,yes")]
1434)
1435
1436;; Non-saturating MLS operations with lane select.
1437(define_insn "@aarch64_sve_sub_<sve_int_op>_lane_<mode>"
1438  [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
1439	(minus:SVE_FULL_SDI
1440	  (match_operand:SVE_FULL_SDI 1 "register_operand" "0, w")
1441	  (unspec:SVE_FULL_SDI
1442	    [(match_operand:<VNARROW> 2 "register_operand" "w, w")
1443	     (unspec:<VNARROW>
1444	       [(match_operand:<VNARROW> 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
1445		(match_operand:SI 4 "const_int_operand")]
1446	       UNSPEC_SVE_LANE_SELECT)]
1447	    SVE2_INT_SUB_BINARY_LONG_LANE)))]
1448  "TARGET_SVE2"
1449  "@
1450   <sve_int_sub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]
1451   movprfx\t%0, %1\;<sve_int_sub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]"
1452  [(set_attr "movprfx" "*,yes")]
1453)
1454
1455;; Saturating MLS operations.
1456(define_insn "@aarch64_sve_qsub_<sve_int_op><mode>"
1457  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
1458	(ss_minus:SVE_FULL_HSDI
1459	  (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")
1460	  (unspec:SVE_FULL_HSDI
1461	    [(match_operand:<VNARROW> 2 "register_operand" "w, w")
1462	     (match_operand:<VNARROW> 3 "register_operand" "w, w")]
1463	    SVE2_INT_QSUB_BINARY_LONG)))]
1464  "TARGET_SVE2"
1465  "@
1466   <sve_int_qsub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>
1467   movprfx\t%0, %1\;<sve_int_qsub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>"
1468  [(set_attr "movprfx" "*,yes")]
1469)
1470
1471;; Saturating MLS operations with lane select.
1472(define_insn "@aarch64_sve_qsub_<sve_int_op>_lane_<mode>"
1473  [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
1474	(ss_minus:SVE_FULL_SDI
1475	  (match_operand:SVE_FULL_SDI 1 "register_operand" "0, w")
1476	  (unspec:SVE_FULL_SDI
1477	    [(match_operand:<VNARROW> 2 "register_operand" "w, w")
1478	     (unspec:<VNARROW>
1479	       [(match_operand:<VNARROW> 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
1480		(match_operand:SI 4 "const_int_operand")]
1481	       UNSPEC_SVE_LANE_SELECT)]
1482	    SVE2_INT_QSUB_BINARY_LONG_LANE)))]
1483  "TARGET_SVE2"
1484  "@
1485   <sve_int_qsub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]
1486   movprfx\t%0, %1\;<sve_int_qsub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]"
1487  [(set_attr "movprfx" "*,yes")]
1488)
1489;; -------------------------------------------------------------------------
1490;; ---- [FP] Long multiplication with accumulation
1491;; -------------------------------------------------------------------------
1492;; Includes:
1493;; - FMLALB
1494;; - FMLALT
1495;; - FMLSLB
1496;; - FMLSLT
1497;; -------------------------------------------------------------------------
1498
1499(define_insn "@aarch64_sve_<sve_fp_op><mode>"
1500  [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=w, ?&w")
1501	(unspec:VNx4SF_ONLY
1502	  [(match_operand:<VNARROW> 1 "register_operand" "w, w")
1503	   (match_operand:<VNARROW> 2 "register_operand" "w, w")
1504	   (match_operand:VNx4SF_ONLY 3 "register_operand" "0, w")]
1505	  SVE2_FP_TERNARY_LONG))]
1506  "TARGET_SVE2"
1507  "@
1508   <sve_fp_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>
1509   movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>"
1510  [(set_attr "movprfx" "*,yes")]
1511)
1512
1513(define_insn "@aarch64_<sve_fp_op>_lane_<mode>"
1514  [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=w, ?&w")
1515	(unspec:VNx4SF_ONLY
1516	  [(match_operand:<VNARROW> 1 "register_operand" "w, w")
1517	   (unspec:<VNARROW>
1518	     [(match_operand:<VNARROW> 2 "register_operand" "<sve_lane_con>, <sve_lane_con>")
1519	      (match_operand:SI 3 "const_int_operand")]
1520	     UNSPEC_SVE_LANE_SELECT)
1521	   (match_operand:VNx4SF_ONLY 4 "register_operand" "0, w")]
1522	  SVE2_FP_TERNARY_LONG_LANE))]
1523  "TARGET_SVE2"
1524  "@
1525   <sve_fp_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>[%3]
1526   movprfx\t%0, %4\;<sve_fp_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>[%3]"
1527  [(set_attr "movprfx" "*,yes")]
1528)
1529
1530;; =========================================================================
1531;; == Narrowing arithnetic
1532;; =========================================================================
1533
1534;; -------------------------------------------------------------------------
1535;; ---- [INT] Narrowing unary arithmetic
1536;; -------------------------------------------------------------------------
1537;; Includes:
1538;; - SQXTNB
1539;; - SQXTNT
1540;; - SQXTUNB
1541;; - SQXTUNT
1542;; - UQXTNB
1543;; - UQXTNT
1544;; -------------------------------------------------------------------------
1545
1546(define_insn "@aarch64_sve_<sve_int_op><mode>"
1547  [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
1548	(unspec:<VNARROW>
1549	  [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w")]
1550	  SVE2_INT_UNARY_NARROWB))]
1551  "TARGET_SVE2"
1552  "<sve_int_op>\t%0.<Ventype>, %1.<Vetype>"
1553)
1554
1555;; These instructions do not take MOVPRFX.
1556(define_insn "@aarch64_sve_<sve_int_op><mode>"
1557  [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
1558	(unspec:<VNARROW>
1559	  [(match_operand:<VNARROW> 1 "register_operand" "0")
1560	   (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")]
1561	  SVE2_INT_UNARY_NARROWT))]
1562  "TARGET_SVE2"
1563  "<sve_int_op>\t%0.<Ventype>, %2.<Vetype>"
1564)
1565
1566;; -------------------------------------------------------------------------
1567;; ---- [INT] Narrowing binary arithmetic
1568;; -------------------------------------------------------------------------
1569;; Includes:
1570;; - ADDHNB
1571;; - ADDHNT
1572;; - RADDHNB
1573;; - RADDHNT
1574;; - RSUBHNB
1575;; - RSUBHNT
1576;; - SUBHNB
1577;; - SUBHNT
1578;; -------------------------------------------------------------------------
1579
1580(define_insn "@aarch64_sve_<sve_int_op><mode>"
1581  [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
1582	(unspec:<VNARROW>
1583	  [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w")
1584	   (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")]
1585	  SVE2_INT_BINARY_NARROWB))]
1586  "TARGET_SVE2"
1587  "<sve_int_op>\t%0.<Ventype>, %1.<Vetype>, %2.<Vetype>"
1588)
1589
1590;; These instructions do not take MOVPRFX.
1591(define_insn "@aarch64_sve_<sve_int_op><mode>"
1592  [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
1593	(unspec:<VNARROW>
1594	  [(match_operand:<VNARROW> 1 "register_operand" "0")
1595	   (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")
1596	   (match_operand:SVE_FULL_HSDI 3 "register_operand" "w")]
1597	  SVE2_INT_BINARY_NARROWT))]
1598  "TARGET_SVE2"
1599  "<sve_int_op>\t%0.<Ventype>, %2.<Vetype>, %3.<Vetype>"
1600)
1601
1602;; -------------------------------------------------------------------------
1603;; ---- [INT] Narrowing right shifts
1604;; -------------------------------------------------------------------------
1605;; Includes:
1606;; - RSHRNB
1607;; - RSHRNT
1608;; - SHRNB
1609;; - SHRNT
1610;; - SQRSHRNB
1611;; - SQRSHRNT
1612;; - SQRSHRUNB
1613;; - SQRSHRUNT
1614;; - SQSHRNB
1615;; - SQSHRNT
1616;; - SQSHRUNB
1617;; - SQSHRUNT
1618;; - UQRSHRNB
1619;; - UQRSHRNT
1620;; - UQSHRNB
1621;; - UQSHRNT
1622;; -------------------------------------------------------------------------
1623
1624;; The immediate range is enforced before generating the instruction.
1625(define_insn "@aarch64_sve_<sve_int_op><mode>"
1626  [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
1627	(unspec:<VNARROW>
1628	  [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w")
1629	   (match_operand:DI 2 "const_int_operand")]
1630	  SVE2_INT_SHIFT_IMM_NARROWB))]
1631  "TARGET_SVE2"
1632  "<sve_int_op>\t%0.<Ventype>, %1.<Vetype>, #%2"
1633)
1634
1635;; The immediate range is enforced before generating the instruction.
1636;; These instructions do not take MOVPRFX.
1637(define_insn "@aarch64_sve_<sve_int_op><mode>"
1638  [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
1639	(unspec:<VNARROW>
1640	  [(match_operand:<VNARROW> 1 "register_operand" "0")
1641	   (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")
1642	   (match_operand:DI 3 "const_int_operand")]
1643	  SVE2_INT_SHIFT_IMM_NARROWT))]
1644  "TARGET_SVE2"
1645  "<sve_int_op>\t%0.<Ventype>, %2.<Vetype>, #%3"
1646)
1647
1648;; =========================================================================
1649;; == Pairwise arithmetic
1650;; =========================================================================
1651
1652;; -------------------------------------------------------------------------
1653;; ---- [INT] Pairwise arithmetic
1654;; -------------------------------------------------------------------------
1655;; Includes:
1656;; - ADDP
1657;; - SMAXP
1658;; - SMINP
1659;; - UMAXP
1660;; - UMINP
1661;; -------------------------------------------------------------------------
1662
1663(define_insn "@aarch64_pred_<sve_int_op><mode>"
1664  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
1665	(unspec:SVE_FULL_I
1666	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1667	   (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
1668	   (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
1669	  SVE2_INT_BINARY_PAIR))]
1670  "TARGET_SVE2"
1671  "@
1672   <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1673   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1674  [(set_attr "movprfx" "*,yes")]
1675)
1676
1677;; -------------------------------------------------------------------------
1678;; ---- [FP] Pairwise arithmetic
1679;; -------------------------------------------------------------------------
1680;; Includes:
1681;; - FADDP
1682;; - FMAXP
1683;; - FMAXNMP
1684;; - FMINP
1685;; - FMINNMP
1686;; -------------------------------------------------------------------------
1687
1688(define_insn "@aarch64_pred_<sve_fp_op><mode>"
1689  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
1690	(unspec:SVE_FULL_F
1691	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1692	   (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
1693	   (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
1694	  SVE2_FP_BINARY_PAIR))]
1695  "TARGET_SVE2"
1696  "@
1697   <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1698   movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1699  [(set_attr "movprfx" "*,yes")]
1700)
1701
1702;; -------------------------------------------------------------------------
1703;; ---- [INT] Pairwise arithmetic with accumulation
1704;; -------------------------------------------------------------------------
1705;; Includes:
1706;; - SADALP
1707;; - UADALP
1708;; -------------------------------------------------------------------------
1709
1710;; Predicated pairwise absolute difference and accumulate with merging.
1711(define_expand "@cond_<sve_int_op><mode>"
1712  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
1713	(unspec:SVE_FULL_HSDI
1714	  [(match_operand:<VPRED> 1 "register_operand")
1715	   (unspec:SVE_FULL_HSDI
1716	     [(match_dup 1)
1717	      (match_operand:SVE_FULL_HSDI 2 "register_operand")
1718	      (match_operand:<VNARROW> 3 "register_operand")]
1719	     SVE2_INT_BINARY_PAIR_LONG)
1720	   (match_operand:SVE_FULL_HSDI 4 "aarch64_simd_reg_or_zero")]
1721	  UNSPEC_SEL))]
1722  "TARGET_SVE2"
1723{
1724  /* Only target code is aware of these operations, so we don't need
1725     to handle the fully-general case.  */
1726  gcc_assert (rtx_equal_p (operands[2], operands[4])
1727	      || CONSTANT_P (operands[4]));
1728})
1729
1730;; Predicated pairwise absolute difference and accumulate, merging with
1731;; the first input.
1732(define_insn_and_rewrite "*cond_<sve_int_op><mode>_2"
1733  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
1734	(unspec:SVE_FULL_HSDI
1735	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1736	   (unspec:SVE_FULL_HSDI
1737	     [(match_operand 4)
1738	      (match_operand:SVE_FULL_HSDI 2 "register_operand" "0, w")
1739	      (match_operand:<VNARROW> 3 "register_operand" "w, w")]
1740	     SVE2_INT_BINARY_PAIR_LONG)
1741	   (match_dup 2)]
1742	  UNSPEC_SEL))]
1743  "TARGET_SVE2"
1744  "@
1745   <sve_int_op>\t%0.<Vetype>, %1/m, %3.<Ventype>
1746   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %3.<Ventype>"
1747  "&& !CONSTANT_P (operands[4])"
1748  {
1749    operands[4] = CONSTM1_RTX (<VPRED>mode);
1750  }
1751  [(set_attr "movprfx" "*,yes")]
1752)
1753
1754;; Predicated pairwise absolute difference and accumulate, merging with zero.
1755(define_insn_and_rewrite "*cond_<sve_int_op><mode>_z"
1756  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=&w, &w")
1757	(unspec:SVE_FULL_HSDI
1758	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1759	   (unspec:SVE_FULL_HSDI
1760	     [(match_operand 5)
1761	      (match_operand:SVE_FULL_HSDI 2 "register_operand" "0, w")
1762	      (match_operand:<VNARROW> 3 "register_operand" "w, w")]
1763	     SVE2_INT_BINARY_PAIR_LONG)
1764	   (match_operand:SVE_FULL_HSDI 4 "aarch64_simd_imm_zero")]
1765	  UNSPEC_SEL))]
1766  "TARGET_SVE2"
1767  "@
1768   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %3.<Ventype>
1769   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %3.<Ventype>"
1770  "&& !CONSTANT_P (operands[5])"
1771  {
1772    operands[5] = CONSTM1_RTX (<VPRED>mode);
1773  }
1774  [(set_attr "movprfx" "yes")]
1775)
1776
1777;; =========================================================================
1778;; == Complex arithmetic
1779;; =========================================================================
1780
1781;; -------------------------------------------------------------------------
1782;; ---- [INT] Complex binary operations
1783;; -------------------------------------------------------------------------
1784;; Includes:
1785;; - CADD
1786;; - SQCADD
1787;; -------------------------------------------------------------------------
1788
1789(define_insn "@aarch64_sve_<optab><mode>"
1790  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
1791	(unspec:SVE_FULL_I
1792	  [(match_operand:SVE_FULL_I 1 "register_operand" "0, w")
1793	   (match_operand:SVE_FULL_I 2 "register_operand" "w, w")]
1794	  SVE2_INT_CADD))]
1795  "TARGET_SVE2"
1796  "@
1797   <sve_int_op>\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #<rot>
1798   movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #<rot>"
1799  [(set_attr "movprfx" "*,yes")]
1800)
1801
1802;; unpredicated optab pattern for auto-vectorizer
1803(define_expand "cadd<rot><mode>3"
1804  [(set (match_operand:SVE_FULL_I 0 "register_operand")
1805	(unspec:SVE_FULL_I
1806	  [(match_operand:SVE_FULL_I 1 "register_operand")
1807	   (match_operand:SVE_FULL_I 2 "register_operand")]
1808	  SVE2_INT_CADD_OP))]
1809  "TARGET_SVE2"
1810)
1811
1812;; -------------------------------------------------------------------------
1813;; ---- [INT] Complex ternary operations
1814;; -------------------------------------------------------------------------
1815;; Includes:
1816;; - CMLA
1817;; - SQRDCMLA
1818;; -------------------------------------------------------------------------
1819
1820(define_insn "@aarch64_sve_<optab><mode>"
1821  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
1822	(unspec:SVE_FULL_I
1823	  [(match_operand:SVE_FULL_I 1 "register_operand" "0, w")
1824	   (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
1825	   (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
1826	  SVE2_INT_CMLA))]
1827  "TARGET_SVE2"
1828  "@
1829   <sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>, #<rot>
1830   movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>, #<rot>"
1831  [(set_attr "movprfx" "*,yes")]
1832)
1833
1834(define_insn "@aarch64_<optab>_lane_<mode>"
1835  [(set (match_operand:SVE_FULL_HSI 0 "register_operand" "=w, ?&w")
1836	(unspec:SVE_FULL_HSI
1837	  [(match_operand:SVE_FULL_HSI 1 "register_operand" "0, w")
1838	   (match_operand:SVE_FULL_HSI 2 "register_operand" "w, w")
1839	   (unspec:SVE_FULL_HSI
1840	     [(match_operand:SVE_FULL_HSI 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
1841	      (match_operand:SI 4 "const_int_operand")]
1842	     UNSPEC_SVE_LANE_SELECT)]
1843	  SVE2_INT_CMLA))]
1844  "TARGET_SVE2"
1845  "@
1846   <sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4], #<rot>
1847   movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4], #<rot>"
1848  [(set_attr "movprfx" "*,yes")]
1849)
1850
1851;; unpredicated optab pattern for auto-vectorizer
1852;; The complex mla/mls operations always need to expand to two instructions.
1853;; The first operation does half the computation and the second does the
1854;; remainder.  Because of this, expand early.
1855(define_expand "cml<fcmac1><conj_op><mode>4"
1856  [(set (match_operand:SVE_FULL_I 0 "register_operand")
1857	(plus:SVE_FULL_I (match_operand:SVE_FULL_I 1 "register_operand")
1858	  (unspec:SVE_FULL_I
1859	    [(match_operand:SVE_FULL_I 2 "register_operand")
1860	     (match_operand:SVE_FULL_I 3 "register_operand")]
1861	    SVE2_INT_CMLA_OP)))]
1862  "TARGET_SVE2"
1863{
1864  rtx tmp = gen_reg_rtx (<MODE>mode);
1865  emit_insn (gen_aarch64_sve_cmla<sve_rot1><mode> (tmp, operands[1],
1866						   operands[3], operands[2]));
1867  emit_insn (gen_aarch64_sve_cmla<sve_rot2><mode> (operands[0], tmp,
1868						   operands[3], operands[2]));
1869  DONE;
1870})
1871
1872;; unpredicated optab pattern for auto-vectorizer
1873;; The complex mul operations always need to expand to two instructions.
1874;; The first operation does half the computation and the second does the
1875;; remainder.  Because of this, expand early.
1876(define_expand "cmul<conj_op><mode>3"
1877  [(set (match_operand:SVE_FULL_I 0 "register_operand")
1878	(unspec:SVE_FULL_I
1879	  [(match_operand:SVE_FULL_I 1 "register_operand")
1880	   (match_operand:SVE_FULL_I 2 "register_operand")]
1881	  SVE2_INT_CMUL_OP))]
1882  "TARGET_SVE2"
1883{
1884  rtx accum = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
1885  rtx tmp = gen_reg_rtx (<MODE>mode);
1886  emit_insn (gen_aarch64_sve_cmla<sve_rot1><mode> (tmp, accum,
1887						   operands[2], operands[1]));
1888  emit_insn (gen_aarch64_sve_cmla<sve_rot2><mode> (operands[0], tmp,
1889						   operands[2], operands[1]));
1890  DONE;
1891})
1892
1893;; -------------------------------------------------------------------------
1894;; ---- [INT] Complex dot product
1895;; -------------------------------------------------------------------------
1896;; Includes:
1897;; - CDOT
1898;; -------------------------------------------------------------------------
1899
1900(define_insn "@aarch64_sve_<optab><mode>"
1901  [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
1902	(unspec:SVE_FULL_SDI
1903	  [(match_operand:SVE_FULL_SDI 1 "register_operand" "0, w")
1904	   (match_operand:<VSI2QI> 2 "register_operand" "w, w")
1905	   (match_operand:<VSI2QI> 3 "register_operand" "w, w")]
1906	  SVE2_INT_CDOT))]
1907  "TARGET_SVE2"
1908  "@
1909   <sve_int_op>\t%0.<Vetype>, %2.<Vetype_fourth>, %3.<Vetype_fourth>, #<rot>
1910   movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype_fourth>, %3.<Vetype_fourth>, #<rot>"
1911  [(set_attr "movprfx" "*,yes")]
1912)
1913
1914(define_insn "@aarch64_<optab>_lane_<mode>"
1915  [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
1916	(unspec:SVE_FULL_SDI
1917	  [(match_operand:SVE_FULL_SDI 1 "register_operand" "0, w")
1918	   (match_operand:<VSI2QI> 2 "register_operand" "w, w")
1919	   (unspec:<VSI2QI>
1920	     [(match_operand:<VSI2QI> 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
1921	      (match_operand:SI 4 "const_int_operand")]
1922	     UNSPEC_SVE_LANE_SELECT)]
1923	  SVE2_INT_CDOT))]
1924  "TARGET_SVE2"
1925  "@
1926   <sve_int_op>\t%0.<Vetype>, %2.<Vetype_fourth>, %3.<Vetype_fourth>[%4], #<rot>
1927   movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype_fourth>, %3.<Vetype_fourth>[%4], #<rot>"
1928  [(set_attr "movprfx" "*,yes")]
1929)
1930
1931;; =========================================================================
1932;; == Conversions
1933;; =========================================================================
1934
1935;; -------------------------------------------------------------------------
1936;; ---- [FP<-FP] Widening conversions
1937;; -------------------------------------------------------------------------
1938;; Includes:
1939;; - FCVTLT
1940;; -------------------------------------------------------------------------
1941
1942;; Predicated convert long top.
1943(define_insn "@aarch64_pred_<sve_fp_op><mode>"
1944  [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w")
1945	(unspec:SVE_FULL_SDF
1946	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
1947	   (match_operand:SI 3 "aarch64_sve_gp_strictness")
1948	   (match_operand:<VNARROW> 2 "register_operand" "0")]
1949	  SVE2_COND_FP_UNARY_LONG))]
1950  "TARGET_SVE2"
1951  "<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Ventype>"
1952)
1953
1954;; Predicated convert long top with merging.
1955(define_expand "@cond_<sve_fp_op><mode>"
1956  [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
1957	(unspec:SVE_FULL_SDF
1958	  [(match_operand:<VPRED> 1 "register_operand")
1959	   (unspec:SVE_FULL_SDF
1960	     [(match_dup 1)
1961	      (const_int SVE_STRICT_GP)
1962	      (match_operand:<VNARROW> 2 "register_operand")]
1963	     SVE2_COND_FP_UNARY_LONG)
1964	   (match_operand:SVE_FULL_SDF 3 "register_operand")]
1965	  UNSPEC_SEL))]
1966  "TARGET_SVE2"
1967)
1968
1969;; These instructions do not take MOVPRFX.
1970(define_insn_and_rewrite "*cond_<sve_fp_op><mode>_relaxed"
1971  [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w")
1972	(unspec:SVE_FULL_SDF
1973	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
1974	   (unspec:SVE_FULL_SDF
1975	     [(match_operand 4)
1976	      (const_int SVE_RELAXED_GP)
1977	      (match_operand:<VNARROW> 2 "register_operand" "w")]
1978	     SVE2_COND_FP_UNARY_LONG)
1979	   (match_operand:SVE_FULL_SDF 3 "register_operand" "0")]
1980	  UNSPEC_SEL))]
1981  "TARGET_SVE2"
1982  "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Ventype>"
1983  "&& !rtx_equal_p (operands[1], operands[4])"
1984  {
1985    operands[4] = copy_rtx (operands[1]);
1986  }
1987)
1988
1989(define_insn "*cond_<sve_fp_op><mode>_strict"
1990  [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w")
1991	(unspec:SVE_FULL_SDF
1992	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
1993	   (unspec:SVE_FULL_SDF
1994	     [(match_dup 1)
1995	      (const_int SVE_STRICT_GP)
1996	      (match_operand:<VNARROW> 2 "register_operand" "w")]
1997	     SVE2_COND_FP_UNARY_LONG)
1998	   (match_operand:SVE_FULL_SDF 3 "register_operand" "0")]
1999	  UNSPEC_SEL))]
2000  "TARGET_SVE2"
2001  "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Ventype>"
2002)
2003
2004;; -------------------------------------------------------------------------
2005;; ---- [FP<-FP] Narrowing conversions
2006;; -------------------------------------------------------------------------
2007;; Includes:
2008;; - FCVTNT
2009;; - FCVTX
2010;; - FCVTXNT
2011;; -------------------------------------------------------------------------
2012
2013;; Predicated FCVTNT.  This doesn't give a natural aarch64_pred_*/cond_*
2014;; pair because the even elements always have to be supplied for active
2015;; elements, even if the inactive elements don't matter.
2016;;
2017;; These instructions do not take MOVPRFX.
2018(define_insn "@aarch64_sve_cvtnt<mode>"
2019  [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w")
2020	(unspec:SVE_FULL_HSF
2021	  [(match_operand:<VWIDE_PRED> 2 "register_operand" "Upl")
2022	   (const_int SVE_STRICT_GP)
2023	   (match_operand:SVE_FULL_HSF 1 "register_operand" "0")
2024	   (match_operand:<VWIDE> 3 "register_operand" "w")]
2025	  UNSPEC_COND_FCVTNT))]
2026  "TARGET_SVE2"
2027  "fcvtnt\t%0.<Vetype>, %2/m, %3.<Vewtype>"
2028)
2029
2030;; Predicated FCVTX (equivalent to what would be FCVTXNB, except that
2031;; it supports MOVPRFX).
2032(define_insn "@aarch64_pred_<sve_fp_op><mode>"
2033  [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=w, ?&w")
2034	(unspec:VNx4SF_ONLY
2035	  [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl, Upl")
2036	   (match_operand:SI 3 "aarch64_sve_gp_strictness")
2037	   (match_operand:<VWIDE> 2 "register_operand" "0, w")]
2038	  SVE2_COND_FP_UNARY_NARROWB))]
2039  "TARGET_SVE2"
2040  "@
2041   <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>
2042   movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>"
2043  [(set_attr "movprfx" "*,yes")]
2044)
2045
2046;; Predicated FCVTX with merging.
2047(define_expand "@cond_<sve_fp_op><mode>"
2048  [(set (match_operand:VNx4SF_ONLY 0 "register_operand")
2049	(unspec:VNx4SF_ONLY
2050	  [(match_operand:<VWIDE_PRED> 1 "register_operand")
2051	   (unspec:VNx4SF_ONLY
2052	     [(match_dup 1)
2053	      (const_int SVE_STRICT_GP)
2054	      (match_operand:<VWIDE> 2 "register_operand")]
2055	     SVE2_COND_FP_UNARY_NARROWB)
2056	   (match_operand:VNx4SF_ONLY 3 "aarch64_simd_reg_or_zero")]
2057	  UNSPEC_SEL))]
2058  "TARGET_SVE2"
2059)
2060
2061(define_insn_and_rewrite "*cond_<sve_fp_op><mode>_any_relaxed"
2062  [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=&w, &w, &w")
2063	(unspec:VNx4SF_ONLY
2064	  [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl, Upl, Upl")
2065	   (unspec:VNx4SF_ONLY
2066	     [(match_operand 4)
2067	      (const_int SVE_RELAXED_GP)
2068	      (match_operand:<VWIDE> 2 "register_operand" "w, w, w")]
2069	     SVE2_COND_FP_UNARY_NARROWB)
2070	   (match_operand:VNx4SF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
2071	  UNSPEC_SEL))]
2072  "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])"
2073  "@
2074   <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>
2075   movprfx\t%0.<Vewtype>, %1/z, %2.<Vewtype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>
2076   movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>"
2077  "&& !rtx_equal_p (operands[1], operands[4])"
2078  {
2079    operands[4] = copy_rtx (operands[1]);
2080  }
2081  [(set_attr "movprfx" "*,yes,yes")]
2082)
2083
2084(define_insn "*cond_<sve_fp_op><mode>_any_strict"
2085  [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=&w, &w, &w")
2086	(unspec:VNx4SF_ONLY
2087	  [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl, Upl, Upl")
2088	   (unspec:VNx4SF_ONLY
2089	     [(match_dup 1)
2090	      (const_int SVE_STRICT_GP)
2091	      (match_operand:<VWIDE> 2 "register_operand" "w, w, w")]
2092	     SVE2_COND_FP_UNARY_NARROWB)
2093	   (match_operand:VNx4SF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
2094	  UNSPEC_SEL))]
2095  "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])"
2096  "@
2097   <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>
2098   movprfx\t%0.<Vewtype>, %1/z, %2.<Vewtype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>
2099   movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>"
2100  [(set_attr "movprfx" "*,yes,yes")]
2101)
2102
2103;; Predicated FCVTXNT.  This doesn't give a natural aarch64_pred_*/cond_*
2104;; pair because the even elements always have to be supplied for active
2105;; elements, even if the inactive elements don't matter.
2106;;
2107;; These instructions do not take MOVPRFX.
2108(define_insn "@aarch64_sve2_cvtxnt<mode>"
2109  [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
2110	(unspec:<VNARROW>
2111	  [(match_operand:<VPRED> 2 "register_operand" "Upl")
2112	   (const_int SVE_STRICT_GP)
2113	   (match_operand:<VNARROW> 1 "register_operand" "0")
2114	   (match_operand:VNx2DF_ONLY 3 "register_operand" "w")]
2115	  UNSPEC_COND_FCVTXNT))]
2116  "TARGET_SVE2"
2117  "fcvtxnt\t%0.<Ventype>, %2/m, %3.<Vetype>"
2118)
2119
2120;; =========================================================================
2121;; == Other arithmetic
2122;; =========================================================================
2123
2124;; -------------------------------------------------------------------------
2125;; ---- [INT] Reciprocal approximation
2126;; -------------------------------------------------------------------------
2127;; Includes:
2128;; - URECPE
2129;; - URSQRTE
2130;; -------------------------------------------------------------------------
2131
2132;; Predicated integer unary operations.
2133(define_insn "@aarch64_pred_<sve_int_op><mode>"
2134  [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w")
2135	(unspec:VNx4SI_ONLY
2136	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2137	   (unspec:VNx4SI_ONLY
2138	     [(match_operand:VNx4SI_ONLY 2 "register_operand" "0, w")]
2139	     SVE2_U32_UNARY)]
2140	  UNSPEC_PRED_X))]
2141  "TARGET_SVE2"
2142  "@
2143   <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2144   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2145  [(set_attr "movprfx" "*,yes")]
2146)
2147
2148;; Predicated integer unary operations with merging.
2149(define_expand "@cond_<sve_int_op><mode>"
2150  [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
2151	(unspec:VNx4SI_ONLY
2152	  [(match_operand:<VPRED> 1 "register_operand")
2153	   (unspec:VNx4SI_ONLY
2154	     [(match_dup 4)
2155	      (unspec:VNx4SI_ONLY
2156		[(match_operand:VNx4SI_ONLY 2 "register_operand")]
2157		SVE2_U32_UNARY)]
2158	     UNSPEC_PRED_X)
2159	   (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero")]
2160	  UNSPEC_SEL))]
2161  "TARGET_SVE2"
2162  {
2163    operands[4] = CONSTM1_RTX (<MODE>mode);
2164  }
2165)
2166
2167(define_insn_and_rewrite "*cond_<sve_int_op><mode>"
2168  [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w, ?&w")
2169	(unspec:VNx4SI_ONLY
2170	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2171	   (unspec:VNx4SI_ONLY
2172	     [(match_operand 4)
2173	      (unspec:VNx4SI_ONLY
2174		[(match_operand:VNx4SI_ONLY 2 "register_operand" "w, w, w")]
2175		SVE2_U32_UNARY)]
2176	     UNSPEC_PRED_X)
2177	   (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
2178	  UNSPEC_SEL))]
2179  "TARGET_SVE2"
2180  "@
2181   <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2182   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2183   movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2184  "&& !CONSTANT_P (operands[4])"
2185  {
2186    operands[4] = CONSTM1_RTX (<VPRED>mode);
2187  }
2188  [(set_attr "movprfx" "*,yes,yes")]
2189)
2190
2191;; -------------------------------------------------------------------------
2192;; ---- [INT<-FP] Base-2 logarithm
2193;; -------------------------------------------------------------------------
2194;; Includes:
2195;; - FLOGB
2196;; -------------------------------------------------------------------------
2197
2198;; Predicated FLOGB.
2199(define_insn "@aarch64_pred_<sve_fp_op><mode>"
2200  [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w, ?&w")
2201	(unspec:<V_INT_EQUIV>
2202	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2203	   (match_operand:SI 3 "aarch64_sve_gp_strictness")
2204	   (match_operand:SVE_FULL_F 2 "register_operand" "0, w")]
2205	  SVE2_COND_INT_UNARY_FP))]
2206  "TARGET_SVE2"
2207  "@
2208   <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2209   movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2210  [(set_attr "movprfx" "*,yes")]
2211)
2212
2213;; Predicated FLOGB with merging.
2214(define_expand "@cond_<sve_fp_op><mode>"
2215  [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2216	(unspec:<V_INT_EQUIV>
2217	  [(match_operand:<VPRED> 1 "register_operand")
2218	   (unspec:<V_INT_EQUIV>
2219	     [(match_dup 1)
2220	      (const_int SVE_STRICT_GP)
2221	      (match_operand:SVE_FULL_F 2 "register_operand")]
2222	     SVE2_COND_INT_UNARY_FP)
2223	   (match_operand:<V_INT_EQUIV> 3 "aarch64_simd_reg_or_zero")]
2224	  UNSPEC_SEL))]
2225  "TARGET_SVE2"
2226)
2227
2228(define_insn_and_rewrite "*cond_<sve_fp_op><mode>"
2229  [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=&w, ?&w, ?&w")
2230	(unspec:<V_INT_EQUIV>
2231	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2232	   (unspec:<V_INT_EQUIV>
2233	     [(match_operand 4)
2234	      (const_int SVE_RELAXED_GP)
2235	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
2236	     SVE2_COND_INT_UNARY_FP)
2237	   (match_operand:<V_INT_EQUIV> 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
2238	  UNSPEC_SEL))]
2239  "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])"
2240  "@
2241   <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2242   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2243   movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2244  "&& !rtx_equal_p (operands[1], operands[4])"
2245  {
2246    operands[4] = copy_rtx (operands[1]);
2247  }
2248  [(set_attr "movprfx" "*,yes,yes")]
2249)
2250
2251(define_insn "*cond_<sve_fp_op><mode>_strict"
2252  [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=&w, ?&w, ?&w")
2253	(unspec:<V_INT_EQUIV>
2254	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2255	   (unspec:<V_INT_EQUIV>
2256	     [(match_dup 1)
2257	      (const_int SVE_STRICT_GP)
2258	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
2259	     SVE2_COND_INT_UNARY_FP)
2260	   (match_operand:<V_INT_EQUIV> 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
2261	  UNSPEC_SEL))]
2262  "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])"
2263  "@
2264   <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2265   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2266   movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2267  [(set_attr "movprfx" "*,yes,yes")]
2268)
2269
2270;; -------------------------------------------------------------------------
2271;; ---- [INT] Polynomial multiplication
2272;; -------------------------------------------------------------------------
2273;; Includes:
2274;; - PMUL
2275;; - PMULLB
2276;; - PMULLT
2277;; -------------------------------------------------------------------------
2278
2279;; Uniform PMUL.
2280(define_insn "@aarch64_sve2_pmul<mode>"
2281  [(set (match_operand:VNx16QI_ONLY 0 "register_operand" "=w")
2282	(unspec:VNx16QI_ONLY
2283	  [(match_operand:VNx16QI_ONLY 1 "register_operand" "w")
2284	   (match_operand:VNx16QI_ONLY 2 "register_operand" "w")]
2285	  UNSPEC_PMUL))]
2286  "TARGET_SVE2"
2287  "pmul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
2288)
2289
2290;; Extending PMUL, with the results modeled as wider vectors.
2291;; This representation is only possible for .H and .D, not .Q.
2292(define_insn "@aarch64_sve_<optab><mode>"
2293  [(set (match_operand:SVE_FULL_HDI 0 "register_operand" "=w")
2294	(unspec:SVE_FULL_HDI
2295	  [(match_operand:<VNARROW> 1 "register_operand" "w")
2296	   (match_operand:<VNARROW> 2 "register_operand" "w")]
2297	  SVE2_PMULL))]
2298  "TARGET_SVE2"
2299  "<sve_int_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>"
2300)
2301
2302;; Extending PMUL, with the results modeled as pairs of values.
2303;; This representation works for .H, .D and .Q, with .Q requiring
2304;; the AES extension.  (This is enforced by the mode iterator.)
2305(define_insn "@aarch64_sve_<optab><mode>"
2306  [(set (match_operand:SVE2_PMULL_PAIR_I 0 "register_operand" "=w")
2307	(unspec:SVE2_PMULL_PAIR_I
2308	  [(match_operand:SVE2_PMULL_PAIR_I 1 "register_operand" "w")
2309	   (match_operand:SVE2_PMULL_PAIR_I 2 "register_operand" "w")]
2310	  SVE2_PMULL_PAIR))]
2311  "TARGET_SVE2"
2312  "<sve_int_op>\t%0.<Vewtype>, %1.<Vetype>, %2.<Vetype>"
2313)
2314
2315;; =========================================================================
2316;; == Permutation
2317;; =========================================================================
2318
2319;; -------------------------------------------------------------------------
2320;; ---- [INT,FP] General permutes
2321;; -------------------------------------------------------------------------
2322;; Includes:
2323;; - TBL (vector pair form)
2324;; - TBX
2325;; -------------------------------------------------------------------------
2326
2327;; TBL on a pair of data vectors.
2328(define_insn "@aarch64_sve2_tbl2<mode>"
2329  [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2330	(unspec:SVE_FULL
2331	  [(match_operand:<VDOUBLE> 1 "register_operand" "w")
2332	   (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
2333	  UNSPEC_TBL2))]
2334  "TARGET_SVE2"
2335  "tbl\t%0.<Vetype>, %1, %2.<Vetype>"
2336)
2337
2338;; TBX.  These instructions do not take MOVPRFX.
2339(define_insn "@aarch64_sve2_tbx<mode>"
2340  [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2341	(unspec:SVE_FULL
2342	  [(match_operand:SVE_FULL 1 "register_operand" "0")
2343	   (match_operand:SVE_FULL 2 "register_operand" "w")
2344	   (match_operand:<V_INT_EQUIV> 3 "register_operand" "w")]
2345	  UNSPEC_TBX))]
2346  "TARGET_SVE2"
2347  "tbx\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
2348)
2349
2350;; -------------------------------------------------------------------------
2351;; ---- [INT] Optional bit-permute extensions
2352;; -------------------------------------------------------------------------
2353;; Includes:
2354;; - BDEP
2355;; - BEXT
2356;; - BGRP
2357;; -------------------------------------------------------------------------
2358
2359(define_insn "@aarch64_sve_<sve_int_op><mode>"
2360  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
2361	(unspec:SVE_FULL_I
2362	  [(match_operand:SVE_FULL_I 1 "register_operand" "w")
2363	   (match_operand:SVE_FULL_I 2 "register_operand" "w")]
2364	  SVE2_INT_BITPERM))]
2365  "TARGET_SVE2_BITPERM"
2366  "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
2367)
2368
2369;; =========================================================================
2370;; == General
2371;; =========================================================================
2372
2373;; -------------------------------------------------------------------------
2374;; ---- Check for aliases between pointers
2375;; -------------------------------------------------------------------------
2376;; The patterns in this section are synthetic: WHILERW and WHILEWR are
2377;; defined in aarch64-sve.md instead.
2378;; -------------------------------------------------------------------------
2379
2380;; Use WHILERW and WHILEWR to accelerate alias checks.  This is only
2381;; possible if the accesses we're checking are exactly the same size
2382;; as an SVE vector.
2383(define_expand "check_<raw_war>_ptrs<mode>"
2384  [(match_operand:GPI 0 "register_operand")
2385   (unspec:VNx16BI
2386     [(match_operand:GPI 1 "register_operand")
2387      (match_operand:GPI 2 "register_operand")
2388      (match_operand:GPI 3 "aarch64_bytes_per_sve_vector_operand")
2389      (match_operand:GPI 4 "const_int_operand")]
2390     SVE2_WHILE_PTR)]
2391  "TARGET_SVE2"
2392{
2393  /* Use the widest predicate mode we can.  */
2394  unsigned int align = INTVAL (operands[4]);
2395  if (align > 8)
2396    align = 8;
2397  machine_mode pred_mode = aarch64_sve_pred_mode (align).require ();
2398
2399  /* Emit a WHILERW or WHILEWR, setting the condition codes based on
2400     the result.  */
2401  emit_insn (gen_while_ptest
2402	     (<SVE2_WHILE_PTR:unspec>, <MODE>mode, pred_mode,
2403	      gen_rtx_SCRATCH (pred_mode), operands[1], operands[2],
2404	      CONSTM1_RTX (VNx16BImode), CONSTM1_RTX (pred_mode)));
2405
2406  /* Set operand 0 to true if the last bit of the predicate result is set,
2407     i.e. if all elements are free of dependencies.  */
2408  rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
2409  rtx cmp = gen_rtx_LTU (<MODE>mode, cc_reg, const0_rtx);
2410  emit_insn (gen_aarch64_cstore<mode> (operands[0], cmp, cc_reg));
2411  DONE;
2412})
2413
2414;; -------------------------------------------------------------------------
2415;; ---- Histogram processing
2416;; -------------------------------------------------------------------------
2417;; Includes:
2418;; - HISTCNT
2419;; - HISTSEG
2420;; -------------------------------------------------------------------------
2421
2422(define_insn "@aarch64_sve2_histcnt<mode>"
2423  [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w")
2424	(unspec:SVE_FULL_SDI
2425	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
2426	   (match_operand:SVE_FULL_SDI 2 "register_operand" "w")
2427	   (match_operand:SVE_FULL_SDI 3 "register_operand" "w")]
2428	  UNSPEC_HISTCNT))]
2429  "TARGET_SVE2"
2430  "histcnt\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
2431)
2432
2433(define_insn "@aarch64_sve2_histseg<mode>"
2434  [(set (match_operand:VNx16QI_ONLY 0 "register_operand" "=w")
2435	(unspec:VNx16QI_ONLY
2436	  [(match_operand:VNx16QI_ONLY 1 "register_operand" "w")
2437	   (match_operand:VNx16QI_ONLY 2 "register_operand" "w")]
2438	  UNSPEC_HISTSEG))]
2439  "TARGET_SVE2"
2440  "histseg\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
2441)
2442
2443;; -------------------------------------------------------------------------
2444;; ---- String matching
2445;; -------------------------------------------------------------------------
2446;; Includes:
2447;; - MATCH
2448;; - NMATCH
2449;; -------------------------------------------------------------------------
2450
2451;; Predicated string matching.
2452(define_insn "@aarch64_pred_<sve_int_op><mode>"
2453  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
2454	(unspec:<VPRED>
2455	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
2456	   (match_operand:SI 2 "aarch64_sve_ptrue_flag")
2457	   (unspec:<VPRED>
2458	     [(match_operand:SVE_FULL_BHI 3 "register_operand" "w")
2459	      (match_operand:SVE_FULL_BHI 4 "register_operand" "w")]
2460	     SVE2_MATCH)]
2461	  UNSPEC_PRED_Z))
2462   (clobber (reg:CC_NZC CC_REGNUM))]
2463  "TARGET_SVE2"
2464  "<sve_int_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
2465)
2466
2467;; Predicated string matching in which both the flag and predicate results
2468;; are interesting.
2469(define_insn_and_rewrite "*aarch64_pred_<sve_int_op><mode>_cc"
2470  [(set (reg:CC_NZC CC_REGNUM)
2471	(unspec:CC_NZC
2472	  [(match_operand:VNx16BI 1 "register_operand" "Upl")
2473	   (match_operand 4)
2474	   (match_operand:SI 5 "aarch64_sve_ptrue_flag")
2475	   (unspec:<VPRED>
2476	     [(match_operand 6)
2477	      (match_operand:SI 7 "aarch64_sve_ptrue_flag")
2478	      (unspec:<VPRED>
2479		[(match_operand:SVE_FULL_BHI 2 "register_operand" "w")
2480		 (match_operand:SVE_FULL_BHI 3 "register_operand" "w")]
2481		SVE2_MATCH)]
2482	     UNSPEC_PRED_Z)]
2483	  UNSPEC_PTEST))
2484   (set (match_operand:<VPRED> 0 "register_operand" "=Upa")
2485	(unspec:<VPRED>
2486	  [(match_dup 6)
2487	   (match_dup 7)
2488	   (unspec:<VPRED>
2489	     [(match_dup 2)
2490	      (match_dup 3)]
2491	     SVE2_MATCH)]
2492	  UNSPEC_PRED_Z))]
2493  "TARGET_SVE2
2494   && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
2495  "<sve_int_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
2496  "&& !rtx_equal_p (operands[4], operands[6])"
2497  {
2498    operands[6] = copy_rtx (operands[4]);
2499    operands[7] = operands[5];
2500  }
2501)
2502
2503;; Predicated string matching in which only the flags result is interesting.
2504(define_insn_and_rewrite "*aarch64_pred_<sve_int_op><mode>_ptest"
2505  [(set (reg:CC_NZC CC_REGNUM)
2506	(unspec:CC_NZC
2507	  [(match_operand:VNx16BI 1 "register_operand" "Upl")
2508	   (match_operand 4)
2509	   (match_operand:SI 5 "aarch64_sve_ptrue_flag")
2510	   (unspec:<VPRED>
2511	     [(match_operand 6)
2512	      (match_operand:SI 7 "aarch64_sve_ptrue_flag")
2513	      (unspec:<VPRED>
2514		[(match_operand:SVE_FULL_BHI 2 "register_operand" "w")
2515		 (match_operand:SVE_FULL_BHI 3 "register_operand" "w")]
2516		SVE2_MATCH)]
2517	     UNSPEC_PRED_Z)]
2518	  UNSPEC_PTEST))
2519   (clobber (match_scratch:<VPRED> 0 "=Upa"))]
2520  "TARGET_SVE2
2521   && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
2522  "<sve_int_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
2523  "&& !rtx_equal_p (operands[4], operands[6])"
2524  {
2525    operands[6] = copy_rtx (operands[4]);
2526    operands[7] = operands[5];
2527  }
2528)
2529
2530;; =========================================================================
2531;; == Crypotographic extensions
2532;; =========================================================================
2533
2534;; -------------------------------------------------------------------------
2535;; ---- Optional AES extensions
2536;; -------------------------------------------------------------------------
2537;; Includes:
2538;; - AESD
2539;; - AESE
2540;; - AESIMC
2541;; - AESMC
2542;; -------------------------------------------------------------------------
2543
2544;; AESD and AESE.
2545(define_insn "aarch64_sve2_aes<aes_op>"
2546  [(set (match_operand:VNx16QI 0 "register_operand" "=w")
2547	(unspec:VNx16QI
2548	  [(xor:VNx16QI
2549	     (match_operand:VNx16QI 1 "register_operand" "%0")
2550	     (match_operand:VNx16QI 2 "register_operand" "w"))]
2551          CRYPTO_AES))]
2552  "TARGET_SVE2_AES"
2553  "aes<aes_op>\t%0.b, %0.b, %2.b"
2554  [(set_attr "type" "crypto_aese")]
2555)
2556
2557;; AESMC and AESIMC.  These instructions do not take MOVPRFX.
2558(define_insn "aarch64_sve2_aes<aesmc_op>"
2559  [(set (match_operand:VNx16QI 0 "register_operand" "=w")
2560	(unspec:VNx16QI
2561	  [(match_operand:VNx16QI 1 "register_operand" "0")]
2562	  CRYPTO_AESMC))]
2563  "TARGET_SVE2_AES"
2564  "aes<aesmc_op>\t%0.b, %0.b"
2565  [(set_attr "type" "crypto_aesmc")]
2566)
2567
2568;; When AESE/AESMC and AESD/AESIMC fusion is enabled, we really want
2569;; to keep the two together and enforce the register dependency without
2570;; scheduling or register allocation messing up the order or introducing
2571;; moves inbetween.  Mash the two together during combine.
2572
2573(define_insn "*aarch64_sve2_aese_fused"
2574  [(set (match_operand:VNx16QI 0 "register_operand" "=w")
2575	(unspec:VNx16QI
2576	  [(unspec:VNx16QI
2577	     [(xor:VNx16QI
2578		(match_operand:VNx16QI 1 "register_operand" "%0")
2579		(match_operand:VNx16QI 2 "register_operand" "w"))]
2580	     UNSPEC_AESE)]
2581	  UNSPEC_AESMC))]
2582  "TARGET_SVE2_AES && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
2583  "aese\t%0.b, %0.b, %2.b\;aesmc\t%0.b, %0.b"
2584  [(set_attr "type" "crypto_aese")
2585   (set_attr "length" "8")]
2586)
2587
2588(define_insn "*aarch64_sve2_aesd_fused"
2589  [(set (match_operand:VNx16QI 0 "register_operand" "=w")
2590	(unspec:VNx16QI
2591	  [(unspec:VNx16QI
2592	     [(xor:VNx16QI
2593		(match_operand:VNx16QI 1 "register_operand" "%0")
2594		(match_operand:VNx16QI 2 "register_operand" "w"))]
2595	     UNSPEC_AESD)]
2596	  UNSPEC_AESIMC))]
2597  "TARGET_SVE2_AES && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
2598  "aesd\t%0.b, %0.b, %2.b\;aesimc\t%0.b, %0.b"
2599  [(set_attr "type" "crypto_aese")
2600   (set_attr "length" "8")]
2601)
2602
2603;; -------------------------------------------------------------------------
2604;; ---- Optional SHA-3 extensions
2605;; -------------------------------------------------------------------------
2606;; Includes:
2607;; - RAX1
2608;; -------------------------------------------------------------------------
2609
2610(define_insn "aarch64_sve2_rax1"
2611  [(set (match_operand:VNx2DI 0 "register_operand" "=w")
2612	(xor:VNx2DI
2613	  (rotate:VNx2DI
2614	    (match_operand:VNx2DI 2 "register_operand" "w")
2615	    (const_int 1))
2616	  (match_operand:VNx2DI 1 "register_operand" "w")))]
2617  "TARGET_SVE2_SHA3"
2618  "rax1\t%0.d, %1.d, %2.d"
2619  [(set_attr "type" "crypto_sha3")]
2620)
2621
2622;; -------------------------------------------------------------------------
2623;; ---- Optional SM4 extensions
2624;; -------------------------------------------------------------------------
2625;; Includes:
2626;; - SM4E
2627;; - SM4EKEY
2628;; -------------------------------------------------------------------------
2629
2630;; These instructions do not take MOVPRFX.
2631(define_insn "aarch64_sve2_sm4e"
2632  [(set (match_operand:VNx4SI 0 "register_operand" "=w")
2633	(unspec:VNx4SI
2634	  [(match_operand:VNx4SI 1 "register_operand" "0")
2635	   (match_operand:VNx4SI 2 "register_operand" "w")]
2636	  UNSPEC_SM4E))]
2637  "TARGET_SVE2_SM4"
2638  "sm4e\t%0.s, %0.s, %2.s"
2639  [(set_attr "type" "crypto_sm4")]
2640)
2641
2642(define_insn "aarch64_sve2_sm4ekey"
2643  [(set (match_operand:VNx4SI 0 "register_operand" "=w")
2644	(unspec:VNx4SI
2645	  [(match_operand:VNx4SI 1 "register_operand" "w")
2646	   (match_operand:VNx4SI 2 "register_operand" "w")]
2647	  UNSPEC_SM4EKEY))]
2648  "TARGET_SVE2_SM4"
2649  "sm4ekey\t%0.s, %1.s, %2.s"
2650  [(set_attr "type" "crypto_sm4")]
2651)
2652