1 /* ACLE support for AArch64 SVE (__ARM_FEATURE_SVE intrinsics)
2    Copyright (C) 2018-2020 Free Software Foundation, Inc.
3 
4    This file is part of GCC.
5 
6    GCC is free software; you can redistribute it and/or modify it
7    under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3, or (at your option)
9    any later version.
10 
11    GCC is distributed in the hope that it will be useful, but
12    WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with GCC; see the file COPYING3.  If not see
18    <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "tree.h"
25 #include "rtl.h"
26 #include "tm_p.h"
27 #include "memmodel.h"
28 #include "insn-codes.h"
29 #include "optabs.h"
30 #include "recog.h"
31 #include "expr.h"
32 #include "basic-block.h"
33 #include "function.h"
34 #include "fold-const.h"
35 #include "gimple.h"
36 #include "gimple-iterator.h"
37 #include "gimplify.h"
38 #include "explow.h"
39 #include "emit-rtl.h"
40 #include "tree-vector-builder.h"
41 #include "rtx-vector-builder.h"
42 #include "vec-perm-indices.h"
43 #include "aarch64-sve-builtins.h"
44 #include "aarch64-sve-builtins-shapes.h"
45 #include "aarch64-sve-builtins-base.h"
46 #include "aarch64-sve-builtins-functions.h"
47 
48 using namespace aarch64_sve;
49 
50 namespace {
51 
52 /* Return the UNSPEC_CMLA* unspec for rotation amount ROT.  */
53 static int
unspec_cmla(int rot)54 unspec_cmla (int rot)
55 {
56   switch (rot)
57     {
58     case 0: return UNSPEC_CMLA;
59     case 90: return UNSPEC_CMLA90;
60     case 180: return UNSPEC_CMLA180;
61     case 270: return UNSPEC_CMLA270;
62     default: gcc_unreachable ();
63     }
64 }
65 
66 /* Return the UNSPEC_FCMLA* unspec for rotation amount ROT.  */
67 static int
unspec_fcmla(int rot)68 unspec_fcmla (int rot)
69 {
70   switch (rot)
71     {
72     case 0: return UNSPEC_FCMLA;
73     case 90: return UNSPEC_FCMLA90;
74     case 180: return UNSPEC_FCMLA180;
75     case 270: return UNSPEC_FCMLA270;
76     default: gcc_unreachable ();
77     }
78 }
79 
80 /* Return the UNSPEC_COND_FCMLA* unspec for rotation amount ROT.  */
81 static int
unspec_cond_fcmla(int rot)82 unspec_cond_fcmla (int rot)
83 {
84   switch (rot)
85     {
86     case 0: return UNSPEC_COND_FCMLA;
87     case 90: return UNSPEC_COND_FCMLA90;
88     case 180: return UNSPEC_COND_FCMLA180;
89     case 270: return UNSPEC_COND_FCMLA270;
90     default: gcc_unreachable ();
91     }
92 }
93 
94 /* Expand a call to svmad, or svmla after reordering its operands.
95    Make _m forms merge with argument MERGE_ARGNO.  */
96 static rtx
expand_mad(function_expander & e,unsigned int merge_argno=DEFAULT_MERGE_ARGNO)97 expand_mad (function_expander &e,
98 	    unsigned int merge_argno = DEFAULT_MERGE_ARGNO)
99 {
100   if (e.pred == PRED_x)
101     {
102       insn_code icode;
103       if (e.type_suffix (0).integer_p)
104 	icode = code_for_aarch64_pred_fma (e.vector_mode (0));
105       else
106 	icode = code_for_aarch64_pred (UNSPEC_COND_FMLA, e.vector_mode (0));
107       return e.use_pred_x_insn (icode);
108     }
109 
110   insn_code icode = e.direct_optab_handler (cond_fma_optab);
111   return e.use_cond_insn (icode, merge_argno);
112 }
113 
114 /* Expand a call to svmla_lane or svmls_lane using floating-point unspec
115    UNSPEC.  */
116 static rtx
expand_mla_mls_lane(function_expander & e,int unspec)117 expand_mla_mls_lane (function_expander &e, int unspec)
118 {
119   /* Put the operands in the normal (fma ...) order, with the accumulator
120      last.  This fits naturally since that's also the unprinted operand
121      in the asm output.  */
122   e.rotate_inputs_left (0, 4);
123   insn_code icode = code_for_aarch64_lane (unspec, e.vector_mode (0));
124   return e.use_exact_insn (icode);
125 }
126 
127 /* Expand a call to svmsb, or svmls after reordering its operands.
128    Make _m forms merge with argument MERGE_ARGNO.  */
129 static rtx
expand_msb(function_expander & e,unsigned int merge_argno=DEFAULT_MERGE_ARGNO)130 expand_msb (function_expander &e,
131 	    unsigned int merge_argno = DEFAULT_MERGE_ARGNO)
132 {
133   if (e.pred == PRED_x)
134     {
135       insn_code icode;
136       if (e.type_suffix (0).integer_p)
137 	icode = code_for_aarch64_pred_fnma (e.vector_mode (0));
138       else
139 	icode = code_for_aarch64_pred (UNSPEC_COND_FMLS, e.vector_mode (0));
140       return e.use_pred_x_insn (icode);
141     }
142 
143   insn_code icode = e.direct_optab_handler (cond_fnma_optab);
144   return e.use_cond_insn (icode, merge_argno);
145 }
146 
147 class svabd_impl : public function_base
148 {
149 public:
150   rtx
expand(function_expander & e) const151   expand (function_expander &e) const OVERRIDE
152   {
153     /* The integer operations are represented as the subtraction of the
154        minimum from the maximum, with the signedness of the instruction
155        keyed off the signedness of the maximum operation.  */
156     rtx_code max_code = e.type_suffix (0).unsigned_p ? UMAX : SMAX;
157     insn_code icode;
158     if (e.pred == PRED_x)
159       {
160 	if (e.type_suffix (0).integer_p)
161 	  icode = code_for_aarch64_pred_abd (max_code, e.vector_mode (0));
162 	else
163 	  icode = code_for_aarch64_pred_abd (e.vector_mode (0));
164 	return e.use_pred_x_insn (icode);
165       }
166 
167     if (e.type_suffix (0).integer_p)
168       icode = code_for_aarch64_cond_abd (max_code, e.vector_mode (0));
169     else
170       icode = code_for_aarch64_cond_abd (e.vector_mode (0));
171     return e.use_cond_insn (icode);
172   }
173 };
174 
175 /* Implements svacge, svacgt, svacle and svaclt.  */
176 class svac_impl : public function_base
177 {
178 public:
svac_impl(int unspec)179   CONSTEXPR svac_impl (int unspec) : m_unspec (unspec) {}
180 
181   rtx
expand(function_expander & e) const182   expand (function_expander &e) const OVERRIDE
183   {
184     e.add_ptrue_hint (0, e.gp_mode (0));
185     insn_code icode = code_for_aarch64_pred_fac (m_unspec, e.vector_mode (0));
186     return e.use_exact_insn (icode);
187   }
188 
189   /* The unspec code for the underlying comparison.  */
190   int m_unspec;
191 };
192 
193 class svadda_impl : public function_base
194 {
195 public:
196   rtx
expand(function_expander & e) const197   expand (function_expander &e) const OVERRIDE
198   {
199     /* Put the predicate last, as required by mask_fold_left_plus_optab.  */
200     e.rotate_inputs_left (0, 3);
201     machine_mode mode = e.vector_mode (0);
202     insn_code icode = direct_optab_handler (mask_fold_left_plus_optab, mode);
203     return e.use_exact_insn (icode);
204   }
205 };
206 
207 /* Implements svadr[bhwd].  */
208 class svadr_bhwd_impl : public function_base
209 {
210 public:
svadr_bhwd_impl(unsigned int shift)211   CONSTEXPR svadr_bhwd_impl (unsigned int shift) : m_shift (shift) {}
212 
213   rtx
expand(function_expander & e) const214   expand (function_expander &e) const OVERRIDE
215   {
216     machine_mode mode = GET_MODE (e.args[0]);
217     if (m_shift == 0)
218       return e.use_exact_insn (code_for_aarch64_adr (mode));
219 
220     /* Turn the access size into an extra shift argument.  */
221     rtx shift = gen_int_mode (m_shift, GET_MODE_INNER (mode));
222     e.args.quick_push (expand_vector_broadcast (mode, shift));
223     return e.use_exact_insn (code_for_aarch64_adr_shift (mode));
224   }
225 
226   /* How many bits left to shift the vector displacement.  */
227   unsigned int m_shift;
228 };
229 
230 class svbic_impl : public function_base
231 {
232 public:
233   rtx
expand(function_expander & e) const234   expand (function_expander &e) const OVERRIDE
235   {
236     /* Convert svbic of a constant into svand of its inverse.  */
237     if (CONST_INT_P (e.args[2]))
238       {
239 	machine_mode mode = GET_MODE_INNER (e.vector_mode (0));
240 	e.args[2] = simplify_unary_operation (NOT, mode, e.args[2], mode);
241 	return e.map_to_rtx_codes (AND, AND, -1);
242       }
243 
244     if (e.type_suffix_ids[0] == TYPE_SUFFIX_b)
245       {
246 	gcc_assert (e.pred == PRED_z);
247 	return e.use_exact_insn (CODE_FOR_aarch64_pred_bicvnx16bi_z);
248       }
249 
250     if (e.pred == PRED_x)
251       return e.use_unpred_insn (code_for_aarch64_bic (e.vector_mode (0)));
252 
253     return e.use_cond_insn (code_for_cond_bic (e.vector_mode (0)));
254   }
255 };
256 
257 /* Implements svbrkn, svbrkpa and svbrkpb.  */
258 class svbrk_binary_impl : public function_base
259 {
260 public:
svbrk_binary_impl(int unspec)261   CONSTEXPR svbrk_binary_impl (int unspec) : m_unspec (unspec) {}
262 
263   rtx
expand(function_expander & e) const264   expand (function_expander &e) const OVERRIDE
265   {
266     return e.use_exact_insn (code_for_aarch64_brk (m_unspec));
267   }
268 
269   /* The unspec code associated with the operation.  */
270   int m_unspec;
271 };
272 
273 /* Implements svbrka and svbrkb.  */
274 class svbrk_unary_impl : public function_base
275 {
276 public:
svbrk_unary_impl(int unspec)277   CONSTEXPR svbrk_unary_impl (int unspec) : m_unspec (unspec) {}
278 
279   rtx
expand(function_expander & e) const280   expand (function_expander &e) const OVERRIDE
281   {
282     return e.use_cond_insn (code_for_aarch64_brk (m_unspec));
283   }
284 
285   /* The unspec code associated with the operation.  */
286   int m_unspec;
287 };
288 
289 class svcadd_impl : public function_base
290 {
291 public:
292   rtx
expand(function_expander & e) const293   expand (function_expander &e) const OVERRIDE
294   {
295     /* Convert the rotation amount into a specific unspec.  */
296     int rot = INTVAL (e.args.pop ());
297     if (rot == 90)
298       return e.map_to_unspecs (UNSPEC_CADD90, UNSPEC_CADD90,
299 			       UNSPEC_COND_FCADD90);
300     if (rot == 270)
301       return e.map_to_unspecs (UNSPEC_CADD270, UNSPEC_CADD270,
302 			       UNSPEC_COND_FCADD270);
303     gcc_unreachable ();
304   }
305 };
306 
307 /* Implements svclasta and svclastb.  */
308 class svclast_impl : public quiet<function_base>
309 {
310 public:
svclast_impl(int unspec)311   CONSTEXPR svclast_impl (int unspec) : m_unspec (unspec) {}
312 
313   rtx
expand(function_expander & e) const314   expand (function_expander &e) const OVERRIDE
315   {
316     /* Match the fold_extract_optab order.  */
317     std::swap (e.args[0], e.args[1]);
318     machine_mode mode = e.vector_mode (0);
319     insn_code icode;
320     if (e.mode_suffix_id == MODE_n)
321       icode = code_for_fold_extract (m_unspec, mode);
322     else
323       icode = code_for_aarch64_fold_extract_vector (m_unspec, mode);
324     return e.use_exact_insn (icode);
325   }
326 
327   /* The unspec code associated with the operation.  */
328   int m_unspec;
329 };
330 
331 class svcmla_impl : public function_base
332 {
333 public:
334   rtx
expand(function_expander & e) const335   expand (function_expander &e) const OVERRIDE
336   {
337     /* Convert the rotation amount into a specific unspec.  */
338     int rot = INTVAL (e.args.pop ());
339     if (e.type_suffix (0).float_p)
340       {
341 	/* Make the operand order the same as the one used by the fma optabs,
342 	   with the accumulator last.  */
343 	e.rotate_inputs_left (1, 4);
344 	return e.map_to_unspecs (-1, -1, unspec_cond_fcmla (rot), 3);
345       }
346     else
347       {
348 	int cmla = unspec_cmla (rot);
349 	return e.map_to_unspecs (cmla, cmla, -1);
350       }
351   }
352 };
353 
354 class svcmla_lane_impl : public function_base
355 {
356 public:
357   rtx
expand(function_expander & e) const358   expand (function_expander &e) const OVERRIDE
359   {
360     /* Convert the rotation amount into a specific unspec.  */
361     int rot = INTVAL (e.args.pop ());
362     machine_mode mode = e.vector_mode (0);
363     if (e.type_suffix (0).float_p)
364       {
365 	/* Make the operand order the same as the one used by the fma optabs,
366 	   with the accumulator last.  */
367 	e.rotate_inputs_left (0, 4);
368 	insn_code icode = code_for_aarch64_lane (unspec_fcmla (rot), mode);
369 	return e.use_exact_insn (icode);
370       }
371     else
372       {
373 	insn_code icode = code_for_aarch64_lane (unspec_cmla (rot), mode);
374 	return e.use_exact_insn (icode);
375       }
376   }
377 };
378 
379 /* Implements svcmp<cc> (except svcmpuo, which is handled separately).  */
380 class svcmp_impl : public function_base
381 {
382 public:
svcmp_impl(tree_code code,int unspec_for_fp)383   CONSTEXPR svcmp_impl (tree_code code, int unspec_for_fp)
384     : m_code (code), m_unspec_for_fp (unspec_for_fp) {}
385 
386   gimple *
fold(gimple_folder & f) const387   fold (gimple_folder &f) const OVERRIDE
388   {
389     tree pg = gimple_call_arg (f.call, 0);
390     tree rhs1 = gimple_call_arg (f.call, 1);
391     tree rhs2 = gimple_call_arg (f.call, 2);
392 
393     /* Convert a ptrue-predicated integer comparison into the corresponding
394        gimple-level operation.  */
395     if (integer_all_onesp (pg)
396 	&& f.type_suffix (0).element_bytes == 1
397 	&& f.type_suffix (0).integer_p)
398       {
399 	gimple_seq stmts = NULL;
400 	rhs2 = f.force_vector (stmts, TREE_TYPE (rhs1), rhs2);
401 	gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
402 	return gimple_build_assign (f.lhs, m_code, rhs1, rhs2);
403       }
404 
405     return NULL;
406   }
407 
408   rtx
expand(function_expander & e) const409   expand (function_expander &e) const OVERRIDE
410   {
411     machine_mode mode = e.vector_mode (0);
412 
413     /* Comparisons are UNSPEC_PRED_Z operations and so need a hint
414        operand.  */
415     e.add_ptrue_hint (0, e.gp_mode (0));
416 
417     if (e.type_suffix (0).integer_p)
418       {
419 	bool unsigned_p = e.type_suffix (0).unsigned_p;
420 	rtx_code code = get_rtx_code (m_code, unsigned_p);
421 	return e.use_exact_insn (code_for_aarch64_pred_cmp (code, mode));
422       }
423 
424     insn_code icode = code_for_aarch64_pred_fcm (m_unspec_for_fp, mode);
425     return e.use_exact_insn (icode);
426   }
427 
428   /* The tree code associated with the comparison.  */
429   tree_code m_code;
430 
431   /* The unspec code to use for floating-point comparisons.  */
432   int m_unspec_for_fp;
433 };
434 
435 /* Implements svcmp<cc>_wide.  */
436 class svcmp_wide_impl : public function_base
437 {
438 public:
svcmp_wide_impl(tree_code code,int unspec_for_sint,int unspec_for_uint)439   CONSTEXPR svcmp_wide_impl (tree_code code, int unspec_for_sint,
440 			     int unspec_for_uint)
441     : m_code (code), m_unspec_for_sint (unspec_for_sint),
442       m_unspec_for_uint (unspec_for_uint) {}
443 
444   rtx
expand(function_expander & e) const445   expand (function_expander &e) const OVERRIDE
446   {
447     machine_mode mode = e.vector_mode (0);
448     bool unsigned_p = e.type_suffix (0).unsigned_p;
449     rtx_code code = get_rtx_code (m_code, unsigned_p);
450 
451     /* Comparisons are UNSPEC_PRED_Z operations and so need a hint
452        operand.  */
453     e.add_ptrue_hint (0, e.gp_mode (0));
454 
455     /* If the argument is a constant that the unwidened comparisons
456        can handle directly, use them instead.  */
457     insn_code icode = code_for_aarch64_pred_cmp (code, mode);
458     rtx op2 = unwrap_const_vec_duplicate (e.args[3]);
459     if (CONSTANT_P (op2)
460 	&& insn_data[icode].operand[4].predicate (op2, DImode))
461       {
462 	e.args[3] = op2;
463 	return e.use_exact_insn (icode);
464       }
465 
466     int unspec = (unsigned_p ? m_unspec_for_uint : m_unspec_for_sint);
467     return e.use_exact_insn (code_for_aarch64_pred_cmp_wide (unspec, mode));
468   }
469 
470   /* The tree code associated with the comparison.  */
471   tree_code m_code;
472 
473   /* The unspec codes for signed and unsigned wide comparisons
474      respectively.  */
475   int m_unspec_for_sint;
476   int m_unspec_for_uint;
477 };
478 
479 class svcmpuo_impl : public quiet<function_base>
480 {
481 public:
482   rtx
expand(function_expander & e) const483   expand (function_expander &e) const OVERRIDE
484   {
485     e.add_ptrue_hint (0, e.gp_mode (0));
486     return e.use_exact_insn (code_for_aarch64_pred_fcmuo (e.vector_mode (0)));
487   }
488 };
489 
490 class svcnot_impl : public function_base
491 {
492 public:
493   rtx
expand(function_expander & e) const494   expand (function_expander &e) const OVERRIDE
495   {
496     machine_mode mode = e.vector_mode (0);
497     if (e.pred == PRED_x)
498       {
499 	/* The pattern for CNOT includes an UNSPEC_PRED_Z, so needs
500 	   a ptrue hint.  */
501 	e.add_ptrue_hint (0, e.gp_mode (0));
502 	return e.use_pred_x_insn (code_for_aarch64_pred_cnot (mode));
503       }
504 
505     return e.use_cond_insn (code_for_cond_cnot (mode), 0);
506   }
507 };
508 
509 /* Implements svcnt[bhwd], which count the number of elements
510    in a particular vector mode.  */
511 class svcnt_bhwd_impl : public function_base
512 {
513 public:
svcnt_bhwd_impl(machine_mode ref_mode)514   CONSTEXPR svcnt_bhwd_impl (machine_mode ref_mode) : m_ref_mode (ref_mode) {}
515 
516   gimple *
fold(gimple_folder & f) const517   fold (gimple_folder &f) const OVERRIDE
518   {
519     tree count = build_int_cstu (TREE_TYPE (f.lhs),
520 				 GET_MODE_NUNITS (m_ref_mode));
521     return gimple_build_assign (f.lhs, count);
522   }
523 
524   rtx
expand(function_expander &) const525   expand (function_expander &) const OVERRIDE
526   {
527     return gen_int_mode (GET_MODE_NUNITS (m_ref_mode), DImode);
528   }
529 
530   /* The mode of the vector associated with the [bhwd] suffix.  */
531   machine_mode m_ref_mode;
532 };
533 
534 /* Implements svcnt[bhwd]_pat.  */
535 class svcnt_bhwd_pat_impl : public svcnt_bhwd_impl
536 {
537 public:
svcnt_bhwd_pat_impl(machine_mode ref_mode)538   CONSTEXPR svcnt_bhwd_pat_impl (machine_mode ref_mode)
539     : svcnt_bhwd_impl (ref_mode) {}
540 
541   gimple *
fold(gimple_folder & f) const542   fold (gimple_folder &f) const OVERRIDE
543   {
544     tree pattern_arg = gimple_call_arg (f.call, 0);
545     aarch64_svpattern pattern = (aarch64_svpattern) tree_to_shwi (pattern_arg);
546 
547     if (pattern == AARCH64_SV_ALL)
548       /* svcvnt[bwhd]_pat (SV_ALL) == svcnt[bwhd] ().  */
549       return svcnt_bhwd_impl::fold (f);
550 
551     /* See whether we can count the number of elements in the pattern
552        at compile time.  */
553     unsigned int elements_per_vq = 128 / GET_MODE_UNIT_BITSIZE (m_ref_mode);
554     HOST_WIDE_INT value = aarch64_fold_sve_cnt_pat (pattern, elements_per_vq);
555     if (value >= 0)
556       {
557 	tree count = build_int_cstu (TREE_TYPE (f.lhs), value);
558 	return gimple_build_assign (f.lhs, count);
559       }
560 
561     return NULL;
562   }
563 
564   rtx
expand(function_expander & e) const565   expand (function_expander &e) const OVERRIDE
566   {
567     unsigned int elements_per_vq = 128 / GET_MODE_UNIT_BITSIZE (m_ref_mode);
568     e.args.quick_push (gen_int_mode (elements_per_vq, DImode));
569     e.args.quick_push (const1_rtx);
570     return e.use_exact_insn (CODE_FOR_aarch64_sve_cnt_pat);
571   }
572 };
573 
574 class svcntp_impl : public function_base
575 {
576 public:
577   rtx
expand(function_expander & e) const578   expand (function_expander &e) const OVERRIDE
579   {
580     machine_mode mode = e.vector_mode (0);
581     e.add_ptrue_hint (0, mode);
582     return e.use_exact_insn (code_for_aarch64_pred_cntp (mode));
583   }
584 };
585 
586 /* Implements svcreate2, svcreate3 and svcreate4.  */
587 class svcreate_impl : public quiet<multi_vector_function>
588 {
589 public:
svcreate_impl(unsigned int vectors_per_tuple)590   CONSTEXPR svcreate_impl (unsigned int vectors_per_tuple)
591     : quiet<multi_vector_function> (vectors_per_tuple) {}
592 
593   gimple *
fold(gimple_folder & f) const594   fold (gimple_folder &f) const OVERRIDE
595   {
596     unsigned int nargs = gimple_call_num_args (f.call);
597     tree lhs_type = TREE_TYPE (f.lhs);
598 
599     /* Replace the call with a clobber of the result (to prevent it from
600        becoming upwards exposed) followed by stores into each individual
601        vector of tuple.
602 
603        The fold routines expect the replacement statement to have the
604        same lhs as the original call, so return the clobber statement
605        rather than the final vector store.  */
606     gassign *clobber = gimple_build_assign (f.lhs, build_clobber (lhs_type));
607 
608     for (unsigned int i = nargs; i-- > 0; )
609       {
610 	tree rhs_vector = gimple_call_arg (f.call, i);
611 	tree field = tuple_type_field (TREE_TYPE (f.lhs));
612 	tree lhs_array = build3 (COMPONENT_REF, TREE_TYPE (field),
613 				 unshare_expr (f.lhs), field, NULL_TREE);
614 	tree lhs_vector = build4 (ARRAY_REF, TREE_TYPE (rhs_vector),
615 				  lhs_array, size_int (i),
616 				  NULL_TREE, NULL_TREE);
617 	gassign *assign = gimple_build_assign (lhs_vector, rhs_vector);
618 	gsi_insert_after (f.gsi, assign, GSI_SAME_STMT);
619       }
620     return clobber;
621   }
622 
623   rtx
expand(function_expander & e) const624   expand (function_expander &e) const OVERRIDE
625   {
626     rtx lhs_tuple = e.get_nonoverlapping_reg_target ();
627 
628     /* Record that LHS_TUPLE is dead before the first store.  */
629     emit_clobber (lhs_tuple);
630     for (unsigned int i = 0; i < e.args.length (); ++i)
631       {
632 	/* Use an lvalue subreg to refer to vector I in LHS_TUPLE.  */
633 	rtx lhs_vector = simplify_gen_subreg (GET_MODE (e.args[i]),
634 					      lhs_tuple, GET_MODE (lhs_tuple),
635 					      i * BYTES_PER_SVE_VECTOR);
636 	emit_move_insn (lhs_vector, e.args[i]);
637       }
638     return lhs_tuple;
639   }
640 };
641 
642 class svcvt_impl : public function_base
643 {
644 public:
645   rtx
expand(function_expander & e) const646   expand (function_expander &e) const OVERRIDE
647   {
648     machine_mode mode0 = e.vector_mode (0);
649     machine_mode mode1 = e.vector_mode (1);
650     insn_code icode;
651     /* All this complication comes from the need to select four things
652        simultaneously:
653 
654        (1) the kind of conversion (int<-float, float<-int, float<-float)
655        (2) signed vs. unsigned integers, where relevant
656        (3) the predication mode, which must be the wider of the predication
657 	   modes for MODE0 and MODE1
658        (4) the predication type (m, x or z)
659 
660        The only supported int<->float conversions for which the integer is
661        narrower than the float are SI<->DF.  It's therefore more convenient
662        to handle (3) by defining two patterns for int<->float conversions:
663        one in which the integer is at least as wide as the float and so
664        determines the predication mode, and another single SI<->DF pattern
665        in which the float's mode determines the predication mode (which is
666        always VNx2BI in that case).
667 
668        The names of the patterns follow the optab convention of giving
669        the source mode before the destination mode.  */
670     if (e.type_suffix (1).integer_p)
671       {
672 	int unspec = (e.type_suffix (1).unsigned_p
673 		      ? UNSPEC_COND_UCVTF
674 		      : UNSPEC_COND_SCVTF);
675 	if (e.type_suffix (0).element_bytes <= e.type_suffix (1).element_bytes)
676 	  icode = (e.pred == PRED_x
677 		   ? code_for_aarch64_sve_nonextend (unspec, mode1, mode0)
678 		   : code_for_cond_nonextend (unspec, mode1, mode0));
679 	else
680 	  icode = (e.pred == PRED_x
681 		   ? code_for_aarch64_sve_extend (unspec, mode1, mode0)
682 		   : code_for_cond_extend (unspec, mode1, mode0));
683       }
684     else
685       {
686 	int unspec = (!e.type_suffix (0).integer_p ? UNSPEC_COND_FCVT
687 		      : e.type_suffix (0).unsigned_p ? UNSPEC_COND_FCVTZU
688 		      : UNSPEC_COND_FCVTZS);
689 	if (e.type_suffix (0).element_bytes >= e.type_suffix (1).element_bytes)
690 	  icode = (e.pred == PRED_x
691 		   ? code_for_aarch64_sve_nontrunc (unspec, mode1, mode0)
692 		   : code_for_cond_nontrunc (unspec, mode1, mode0));
693 	else
694 	  icode = (e.pred == PRED_x
695 		   ? code_for_aarch64_sve_trunc (unspec, mode1, mode0)
696 		   : code_for_cond_trunc (unspec, mode1, mode0));
697       }
698 
699     if (e.pred == PRED_x)
700       return e.use_pred_x_insn (icode);
701     return e.use_cond_insn (icode);
702   }
703 };
704 
705 class svdot_impl : public function_base
706 {
707 public:
708   rtx
expand(function_expander & e) const709   expand (function_expander &e) const OVERRIDE
710   {
711     /* In the optab, the multiplication operands come before the accumulator
712        operand.  The optab is keyed off the multiplication mode.  */
713     e.rotate_inputs_left (0, 3);
714     insn_code icode
715       = e.direct_optab_handler_for_sign (sdot_prod_optab, udot_prod_optab,
716 					 0, GET_MODE (e.args[0]));
717     return e.use_unpred_insn (icode);
718   }
719 };
720 
721 class svdotprod_lane_impl : public unspec_based_function_base
722 {
723 public:
svdotprod_lane_impl(int unspec_for_sint,int unspec_for_uint,int unspec_for_float)724   CONSTEXPR svdotprod_lane_impl (int unspec_for_sint,
725 				 int unspec_for_uint,
726 				 int unspec_for_float)
727     : unspec_based_function_base (unspec_for_sint,
728 				  unspec_for_uint,
729 				  unspec_for_float) {}
730 
731   rtx
expand(function_expander & e) const732   expand (function_expander &e) const OVERRIDE
733   {
734     /* Use the same ordering as the dot_prod_optab, with the
735        accumulator last.  */
736     e.rotate_inputs_left (0, 4);
737     int unspec = unspec_for (e);
738     machine_mode mode = e.vector_mode (0);
739     return e.use_exact_insn (code_for_aarch64_dot_prod_lane (unspec, mode));
740   }
741 };
742 
743 class svdup_impl : public quiet<function_base>
744 {
745 public:
746   gimple *
fold(gimple_folder & f) const747   fold (gimple_folder &f) const OVERRIDE
748   {
749     tree vec_type = TREE_TYPE (f.lhs);
750     tree rhs = gimple_call_arg (f.call, f.pred == PRED_none ? 0 : 1);
751 
752     if (f.pred == PRED_none || f.pred == PRED_x)
753       {
754 	if (CONSTANT_CLASS_P (rhs))
755 	  {
756 	    if (f.type_suffix (0).bool_p)
757 	      return (tree_to_shwi (rhs)
758 		      ? f.fold_to_ptrue ()
759 		      : f.fold_to_pfalse ());
760 
761 	    tree rhs_vector = build_vector_from_val (vec_type, rhs);
762 	    return gimple_build_assign (f.lhs, rhs_vector);
763 	  }
764 
765 	/* Avoid folding _b to a VEC_DUPLICATE_EXPR, since to do that we
766 	   would need to introduce an extra and unwanted conversion to
767 	   the truth vector element type.  */
768 	if (!f.type_suffix (0).bool_p)
769 	  return gimple_build_assign (f.lhs, VEC_DUPLICATE_EXPR, rhs);
770       }
771 
772     /* svdup_z (pg, x) == VEC_COND_EXPR <pg, VEC_DUPLICATE_EXPR <x>, 0>.  */
773     if (f.pred == PRED_z)
774       {
775 	gimple_seq stmts = NULL;
776 	tree pred = f.convert_pred (stmts, vec_type, 0);
777 	rhs = f.force_vector (stmts, vec_type, rhs);
778 	gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
779 	return gimple_build_assign (f.lhs, VEC_COND_EXPR, pred, rhs,
780 				    build_zero_cst (vec_type));
781       }
782 
783     return NULL;
784   }
785 
786   rtx
expand(function_expander & e) const787   expand (function_expander &e) const OVERRIDE
788   {
789     if (e.pred == PRED_none || e.pred == PRED_x)
790       /* There's no benefit to using predicated instructions for _x here.  */
791       return e.use_unpred_insn (e.direct_optab_handler (vec_duplicate_optab));
792 
793     /* Model predicated svdups as a SEL in which the "true" value is
794        the duplicate of the function argument and the "false" value
795        is the value of inactive lanes.  */
796     insn_code icode;
797     machine_mode mode = e.vector_mode (0);
798     if (valid_for_const_vector_p (GET_MODE_INNER (mode), e.args.last ()))
799       /* Duplicate the constant to fill a vector.  The pattern optimizes
800 	 various cases involving constant operands, falling back to SEL
801 	 if necessary.  */
802       icode = code_for_vcond_mask (mode, mode);
803     else
804       /* Use the pattern for selecting between a duplicated scalar
805 	 variable and a vector fallback.  */
806       icode = code_for_aarch64_sel_dup (mode);
807     return e.use_vcond_mask_insn (icode);
808   }
809 };
810 
811 class svdup_lane_impl : public quiet<function_base>
812 {
813 public:
814   rtx
expand(function_expander & e) const815   expand (function_expander &e) const OVERRIDE
816   {
817     /* The native DUP lane has an index range of 64 bytes.  */
818     machine_mode mode = e.vector_mode (0);
819     if (CONST_INT_P (e.args[1])
820 	&& IN_RANGE (INTVAL (e.args[1]) * GET_MODE_UNIT_SIZE (mode), 0, 63))
821       return e.use_exact_insn (code_for_aarch64_sve_dup_lane (mode));
822 
823     /* Treat svdup_lane as if it were svtbl_n.  */
824     return e.use_exact_insn (code_for_aarch64_sve_tbl (e.vector_mode (0)));
825   }
826 };
827 
828 class svdupq_impl : public quiet<function_base>
829 {
830 public:
831   gimple *
fold(gimple_folder & f) const832   fold (gimple_folder &f) const OVERRIDE
833   {
834     tree vec_type = TREE_TYPE (f.lhs);
835     unsigned int nargs = gimple_call_num_args (f.call);
836     /* For predicates, pad out each argument so that we have one element
837        per bit.  */
838     unsigned int factor = (f.type_suffix (0).bool_p
839 			   ? f.type_suffix (0).element_bytes : 1);
840     tree_vector_builder builder (vec_type, nargs * factor, 1);
841     for (unsigned int i = 0; i < nargs; ++i)
842       {
843 	tree elt = gimple_call_arg (f.call, i);
844 	if (!CONSTANT_CLASS_P (elt))
845 	  return NULL;
846 	builder.quick_push (elt);
847 	for (unsigned int j = 1; j < factor; ++j)
848 	  builder.quick_push (build_zero_cst (TREE_TYPE (vec_type)));
849       }
850     return gimple_build_assign (f.lhs, builder.build ());
851   }
852 
853   rtx
expand(function_expander & e) const854   expand (function_expander &e) const OVERRIDE
855   {
856     machine_mode mode = e.vector_mode (0);
857     unsigned int elements_per_vq = e.args.length ();
858     if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
859       {
860 	/* Construct a vector of integers so that we can compare them against
861 	   zero below.  Zero vs. nonzero is the only distinction that
862 	   matters.  */
863 	mode = aarch64_sve_int_mode (mode);
864 	for (unsigned int i = 0; i < elements_per_vq; ++i)
865 	  e.args[i] = simplify_gen_unary (ZERO_EXTEND, GET_MODE_INNER (mode),
866 					  e.args[i], QImode);
867       }
868 
869     /* Get the 128-bit Advanced SIMD vector for this data size.  */
870     scalar_mode element_mode = GET_MODE_INNER (mode);
871     machine_mode vq_mode = aarch64_vq_mode (element_mode).require ();
872     gcc_assert (known_eq (elements_per_vq, GET_MODE_NUNITS (vq_mode)));
873 
874     /* Put the arguments into a 128-bit Advanced SIMD vector.  We want
875        argument N to go into architectural lane N, whereas Advanced SIMD
876        vectors are loaded memory lsb to register lsb.  We therefore need
877        to reverse the elements for big-endian targets.  */
878     rtx vq_reg = gen_reg_rtx (vq_mode);
879     rtvec vec = rtvec_alloc (elements_per_vq);
880     for (unsigned int i = 0; i < elements_per_vq; ++i)
881       {
882 	unsigned int argno = BYTES_BIG_ENDIAN ? elements_per_vq - i - 1 : i;
883 	RTVEC_ELT (vec, i) = e.args[argno];
884       }
885     aarch64_expand_vector_init (vq_reg, gen_rtx_PARALLEL (vq_mode, vec));
886 
887     /* If the result is a boolean, compare the data vector against zero.  */
888     if (mode != e.vector_mode (0))
889       {
890 	rtx data_dupq = aarch64_expand_sve_dupq (NULL, mode, vq_reg);
891 	return aarch64_convert_sve_data_to_pred (e.possible_target,
892 						 e.vector_mode (0), data_dupq);
893       }
894 
895     return aarch64_expand_sve_dupq (e.possible_target, mode, vq_reg);
896   }
897 };
898 
899 class svdupq_lane_impl : public quiet<function_base>
900 {
901 public:
902   rtx
expand(function_expander & e) const903   expand (function_expander &e) const OVERRIDE
904   {
905     machine_mode mode = e.vector_mode (0);
906     rtx index = e.args[1];
907     if (CONST_INT_P (index) && IN_RANGE (INTVAL (index), 0, 3))
908       {
909 	/* Use the .Q form of DUP, which is the native instruction for
910 	   this function.  */
911 	insn_code icode = code_for_aarch64_sve_dupq_lane (mode);
912 	unsigned int num_indices = e.elements_per_vq (0);
913 	rtx indices = aarch64_gen_stepped_int_parallel
914 	  (num_indices, INTVAL (index) * num_indices, 1);
915 
916 	e.add_output_operand (icode);
917 	e.add_input_operand (icode, e.args[0]);
918 	e.add_fixed_operand (indices);
919 	return e.generate_insn (icode);
920       }
921 
922     /* Build a .D TBL index for the pairs of doublewords that we want to
923        duplicate.  */
924     if (CONST_INT_P (index))
925       {
926 	/* The index vector is a constant.  */
927 	rtx_vector_builder builder (VNx2DImode, 2, 1);
928 	builder.quick_push (gen_int_mode (INTVAL (index) * 2, DImode));
929 	builder.quick_push (gen_int_mode (INTVAL (index) * 2 + 1, DImode));
930 	index = builder.build ();
931       }
932     else
933       {
934 	/* Duplicate INDEX * 2 to fill a DImode vector.  The ACLE spec
935 	   explicitly allows the top of the index to be dropped.  */
936 	index = force_reg (DImode, simplify_gen_binary (ASHIFT, DImode,
937 							index, const1_rtx));
938 	index = expand_vector_broadcast (VNx2DImode, index);
939 
940 	/* Get an alternating 0, 1 predicate.  */
941 	rtx_vector_builder builder (VNx2BImode, 2, 1);
942 	builder.quick_push (const0_rtx);
943 	builder.quick_push (constm1_rtx);
944 	rtx pg = force_reg (VNx2BImode, builder.build ());
945 
946 	/* Add one to the odd elements of the index.  */
947 	rtx one = force_reg (VNx2DImode, CONST1_RTX (VNx2DImode));
948 	rtx target = gen_reg_rtx (VNx2DImode);
949 	emit_insn (gen_cond_addvnx2di (target, pg, index, one, index));
950 	index = target;
951       }
952 
953     e.args[0] = gen_lowpart (VNx2DImode, e.args[0]);
954     e.args[1] = index;
955     return e.use_exact_insn (CODE_FOR_aarch64_sve_tblvnx2di);
956   }
957 };
958 
959 /* Implements svextb, svexth and svextw.  */
960 class svext_bhw_impl : public function_base
961 {
962 public:
svext_bhw_impl(scalar_int_mode from_mode)963   CONSTEXPR svext_bhw_impl (scalar_int_mode from_mode)
964     : m_from_mode (from_mode) {}
965 
966   rtx
expand(function_expander & e) const967   expand (function_expander &e) const OVERRIDE
968   {
969     if (e.type_suffix (0).unsigned_p)
970       {
971 	/* Convert to an AND.  The widest we go is 0xffffffff, which fits
972 	   in a CONST_INT.  */
973 	e.args.quick_push (GEN_INT (GET_MODE_MASK (m_from_mode)));
974 	if (e.pred == PRED_m)
975 	  /* We now have arguments "(inactive, pg, op, mask)".  Convert this
976 	     to "(pg, op, mask, inactive)" so that the order matches svand_m
977 	     with an extra argument on the end.  Take the inactive elements
978 	     from this extra argument.  */
979 	  e.rotate_inputs_left (0, 4);
980 	return e.map_to_rtx_codes (AND, AND, -1, 3);
981       }
982 
983     machine_mode wide_mode = e.vector_mode (0);
984     poly_uint64 nunits = GET_MODE_NUNITS (wide_mode);
985     machine_mode narrow_mode
986       = aarch64_sve_data_mode (m_from_mode, nunits).require ();
987     if (e.pred == PRED_x)
988       {
989 	insn_code icode = code_for_aarch64_pred_sxt (wide_mode, narrow_mode);
990 	return e.use_pred_x_insn (icode);
991       }
992 
993     insn_code icode = code_for_aarch64_cond_sxt (wide_mode, narrow_mode);
994     return e.use_cond_insn (icode);
995   }
996 
997   /* The element mode that we're extending from.  */
998   scalar_int_mode m_from_mode;
999 };
1000 
1001 /* Implements svget2, svget3 and svget4.  */
1002 class svget_impl : public quiet<multi_vector_function>
1003 {
1004 public:
svget_impl(unsigned int vectors_per_tuple)1005   CONSTEXPR svget_impl (unsigned int vectors_per_tuple)
1006     : quiet<multi_vector_function> (vectors_per_tuple) {}
1007 
1008   gimple *
fold(gimple_folder & f) const1009   fold (gimple_folder &f) const OVERRIDE
1010   {
1011     /* Fold into a normal gimple component access.  */
1012     tree rhs_tuple = gimple_call_arg (f.call, 0);
1013     tree index = gimple_call_arg (f.call, 1);
1014     tree field = tuple_type_field (TREE_TYPE (rhs_tuple));
1015     tree rhs_array = build3 (COMPONENT_REF, TREE_TYPE (field),
1016 			     rhs_tuple, field, NULL_TREE);
1017     tree rhs_vector = build4 (ARRAY_REF, TREE_TYPE (f.lhs),
1018 			      rhs_array, index, NULL_TREE, NULL_TREE);
1019     return gimple_build_assign (f.lhs, rhs_vector);
1020   }
1021 
1022   rtx
expand(function_expander & e) const1023   expand (function_expander &e) const OVERRIDE
1024   {
1025     /* Fold the access into a subreg rvalue.  */
1026     return simplify_gen_subreg (e.vector_mode (0), e.args[0],
1027 				GET_MODE (e.args[0]),
1028 				INTVAL (e.args[1]) * BYTES_PER_SVE_VECTOR);
1029   }
1030 };
1031 
1032 class svindex_impl : public function_base
1033 {
1034 public:
1035   rtx
expand(function_expander & e) const1036   expand (function_expander &e) const OVERRIDE
1037   {
1038     return e.use_exact_insn (e.direct_optab_handler (vec_series_optab));
1039   }
1040 };
1041 
1042 class svinsr_impl : public quiet<function_base>
1043 {
1044 public:
1045   gimple *
fold(gimple_folder & f) const1046   fold (gimple_folder &f) const OVERRIDE
1047   {
1048     gcall *new_call = gimple_build_call_internal (IFN_VEC_SHL_INSERT, 2,
1049 						  gimple_call_arg (f.call, 0),
1050 						  gimple_call_arg (f.call, 1));
1051     gimple_call_set_lhs (new_call, f.lhs);
1052     return new_call;
1053   }
1054 
1055   rtx
expand(function_expander & e) const1056   expand (function_expander &e) const OVERRIDE
1057   {
1058     insn_code icode = direct_optab_handler (vec_shl_insert_optab,
1059 					    e.vector_mode (0));
1060     return e.use_exact_insn (icode);
1061   }
1062 };
1063 
1064 /* Implements svlasta and svlastb.  */
1065 class svlast_impl : public quiet<function_base>
1066 {
1067 public:
svlast_impl(int unspec)1068   CONSTEXPR svlast_impl (int unspec) : m_unspec (unspec) {}
1069 
1070   rtx
expand(function_expander & e) const1071   expand (function_expander &e) const OVERRIDE
1072   {
1073     return e.use_exact_insn (code_for_extract (m_unspec, e.vector_mode (0)));
1074   }
1075 
1076   /* The unspec code associated with the operation.  */
1077   int m_unspec;
1078 };
1079 
1080 class svld1_impl : public full_width_access
1081 {
1082 public:
1083   unsigned int
call_properties(const function_instance &) const1084   call_properties (const function_instance &) const OVERRIDE
1085   {
1086     return CP_READ_MEMORY;
1087   }
1088 
1089   gimple *
fold(gimple_folder & f) const1090   fold (gimple_folder &f) const OVERRIDE
1091   {
1092     tree vectype = f.vector_type (0);
1093 
1094     /* Get the predicate and base pointer.  */
1095     gimple_seq stmts = NULL;
1096     tree pred = f.convert_pred (stmts, vectype, 0);
1097     tree base = f.fold_contiguous_base (stmts, vectype);
1098     gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
1099 
1100     tree cookie = f.load_store_cookie (TREE_TYPE (vectype));
1101     gcall *new_call = gimple_build_call_internal (IFN_MASK_LOAD, 3,
1102 						  base, cookie, pred);
1103     gimple_call_set_lhs (new_call, f.lhs);
1104     return new_call;
1105   }
1106 
1107   rtx
expand(function_expander & e) const1108   expand (function_expander &e) const OVERRIDE
1109   {
1110     insn_code icode = convert_optab_handler (maskload_optab,
1111 					     e.vector_mode (0), e.gp_mode (0));
1112     return e.use_contiguous_load_insn (icode);
1113   }
1114 };
1115 
1116 /* Implements extending contiguous forms of svld1.  */
1117 class svld1_extend_impl : public extending_load
1118 {
1119 public:
svld1_extend_impl(type_suffix_index memory_type)1120   CONSTEXPR svld1_extend_impl (type_suffix_index memory_type)
1121     : extending_load (memory_type) {}
1122 
1123   rtx
expand(function_expander & e) const1124   expand (function_expander &e) const OVERRIDE
1125   {
1126     insn_code icode = code_for_aarch64_load (extend_rtx_code (),
1127 					     e.vector_mode (0),
1128 					     e.memory_vector_mode ());
1129     return e.use_contiguous_load_insn (icode);
1130   }
1131 };
1132 
1133 class svld1_gather_impl : public full_width_access
1134 {
1135 public:
1136   unsigned int
call_properties(const function_instance &) const1137   call_properties (const function_instance &) const OVERRIDE
1138   {
1139     return CP_READ_MEMORY;
1140   }
1141 
1142   rtx
expand(function_expander & e) const1143   expand (function_expander &e) const OVERRIDE
1144   {
1145     e.prepare_gather_address_operands (1);
1146     /* Put the predicate last, as required by mask_gather_load_optab.  */
1147     e.rotate_inputs_left (0, 5);
1148     machine_mode mem_mode = e.memory_vector_mode ();
1149     machine_mode int_mode = aarch64_sve_int_mode (mem_mode);
1150     insn_code icode = convert_optab_handler (mask_gather_load_optab,
1151 					     mem_mode, int_mode);
1152     return e.use_exact_insn (icode);
1153   }
1154 };
1155 
1156 /* Implements extending forms of svld1_gather.  */
1157 class svld1_gather_extend_impl : public extending_load
1158 {
1159 public:
svld1_gather_extend_impl(type_suffix_index memory_type)1160   CONSTEXPR svld1_gather_extend_impl (type_suffix_index memory_type)
1161     : extending_load (memory_type) {}
1162 
1163   rtx
expand(function_expander & e) const1164   expand (function_expander &e) const OVERRIDE
1165   {
1166     e.prepare_gather_address_operands (1);
1167     /* Put the predicate last, since the extending gathers use the same
1168        operand order as mask_gather_load_optab.  */
1169     e.rotate_inputs_left (0, 5);
1170     /* Add a constant predicate for the extension rtx.  */
1171     e.args.quick_push (CONSTM1_RTX (VNx16BImode));
1172     insn_code icode = code_for_aarch64_gather_load (extend_rtx_code (),
1173 						    e.vector_mode (0),
1174 						    e.memory_vector_mode ());
1175     return e.use_exact_insn (icode);
1176   }
1177 };
1178 
1179 class load_replicate : public function_base
1180 {
1181 public:
1182   unsigned int
call_properties(const function_instance &) const1183   call_properties (const function_instance &) const OVERRIDE
1184   {
1185     return CP_READ_MEMORY;
1186   }
1187 
1188   tree
memory_scalar_type(const function_instance & fi) const1189   memory_scalar_type (const function_instance &fi) const OVERRIDE
1190   {
1191     return fi.scalar_type (0);
1192   }
1193 };
1194 
1195 class svld1rq_impl : public load_replicate
1196 {
1197 public:
1198   machine_mode
memory_vector_mode(const function_instance & fi) const1199   memory_vector_mode (const function_instance &fi) const OVERRIDE
1200   {
1201     return aarch64_vq_mode (GET_MODE_INNER (fi.vector_mode (0))).require ();
1202   }
1203 
1204   rtx
expand(function_expander & e) const1205   expand (function_expander &e) const OVERRIDE
1206   {
1207     insn_code icode = code_for_aarch64_sve_ld1rq (e.vector_mode (0));
1208     return e.use_contiguous_load_insn (icode);
1209   }
1210 };
1211 
1212 class svld1ro_impl : public load_replicate
1213 {
1214 public:
1215   machine_mode
memory_vector_mode(const function_instance &) const1216   memory_vector_mode (const function_instance &) const OVERRIDE
1217   {
1218     return OImode;
1219   }
1220 
1221   rtx
expand(function_expander & e) const1222   expand (function_expander &e) const OVERRIDE
1223   {
1224     insn_code icode = code_for_aarch64_sve_ld1ro (e.vector_mode (0));
1225     return e.use_contiguous_load_insn (icode);
1226   }
1227 };
1228 
1229 /* Implements svld2, svld3 and svld4.  */
1230 class svld234_impl : public full_width_access
1231 {
1232 public:
svld234_impl(unsigned int vectors_per_tuple)1233   CONSTEXPR svld234_impl (unsigned int vectors_per_tuple)
1234     : full_width_access (vectors_per_tuple) {}
1235 
1236   unsigned int
call_properties(const function_instance &) const1237   call_properties (const function_instance &) const OVERRIDE
1238   {
1239     return CP_READ_MEMORY;
1240   }
1241 
1242   gimple *
fold(gimple_folder & f) const1243   fold (gimple_folder &f) const OVERRIDE
1244   {
1245     tree tuple_type = TREE_TYPE (f.lhs);
1246     tree vectype = f.vector_type (0);
1247 
1248     /* Get the predicate and base pointer.  */
1249     gimple_seq stmts = NULL;
1250     tree pred = f.convert_pred (stmts, vectype, 0);
1251     tree base = f.fold_contiguous_base (stmts, vectype);
1252     gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
1253 
1254     /* Emit two statements: a clobber of the lhs, so that it isn't
1255        upwards exposed, and then the load itself.
1256 
1257        The fold routines expect the replacement statement to have the
1258        same lhs as the original call, so return the clobber statement
1259        rather than the load.  */
1260     gimple *clobber = gimple_build_assign (f.lhs, build_clobber (tuple_type));
1261 
1262     /* View the loaded data as an array of vectors.  */
1263     tree field = tuple_type_field (tuple_type);
1264     tree lhs_array = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (field),
1265 			     unshare_expr (f.lhs));
1266 
1267     /* Emit the load itself.  */
1268     tree cookie = f.load_store_cookie (TREE_TYPE (vectype));
1269     gcall *new_call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
1270 						  base, cookie, pred);
1271     gimple_call_set_lhs (new_call, lhs_array);
1272     gsi_insert_after (f.gsi, new_call, GSI_SAME_STMT);
1273 
1274     return clobber;
1275   }
1276 
1277   rtx
expand(function_expander & e) const1278   expand (function_expander &e) const OVERRIDE
1279   {
1280     machine_mode tuple_mode = TYPE_MODE (TREE_TYPE (e.call_expr));
1281     insn_code icode = convert_optab_handler (vec_mask_load_lanes_optab,
1282 					     tuple_mode, e.vector_mode (0));
1283     return e.use_contiguous_load_insn (icode);
1284   }
1285 };
1286 
1287 class svldff1_gather_impl : public full_width_access
1288 {
1289 public:
1290   unsigned int
call_properties(const function_instance &) const1291   call_properties (const function_instance &) const OVERRIDE
1292   {
1293     return CP_READ_MEMORY | CP_READ_FFR | CP_WRITE_FFR;
1294   }
1295 
1296   rtx
expand(function_expander & e) const1297   expand (function_expander &e) const OVERRIDE
1298   {
1299     /* See the block comment in aarch64-sve.md for details about the
1300        FFR handling.  */
1301     emit_insn (gen_aarch64_update_ffr_for_load ());
1302 
1303     e.prepare_gather_address_operands (1);
1304     /* Put the predicate last, since ldff1_gather uses the same operand
1305        order as mask_gather_load_optab.  */
1306     e.rotate_inputs_left (0, 5);
1307     machine_mode mem_mode = e.memory_vector_mode ();
1308     return e.use_exact_insn (code_for_aarch64_ldff1_gather (mem_mode));
1309   }
1310 };
1311 
1312 /* Implements extending forms of svldff1_gather.  */
1313 class svldff1_gather_extend : public extending_load
1314 {
1315 public:
svldff1_gather_extend(type_suffix_index memory_type)1316   CONSTEXPR svldff1_gather_extend (type_suffix_index memory_type)
1317     : extending_load (memory_type) {}
1318 
1319   rtx
expand(function_expander & e) const1320   expand (function_expander &e) const OVERRIDE
1321   {
1322     /* See the block comment in aarch64-sve.md for details about the
1323        FFR handling.  */
1324     emit_insn (gen_aarch64_update_ffr_for_load ());
1325 
1326     e.prepare_gather_address_operands (1);
1327     /* Put the predicate last, since ldff1_gather uses the same operand
1328        order as mask_gather_load_optab.  */
1329     e.rotate_inputs_left (0, 5);
1330     /* Add a constant predicate for the extension rtx.  */
1331     e.args.quick_push (CONSTM1_RTX (VNx16BImode));
1332     insn_code icode = code_for_aarch64_ldff1_gather (extend_rtx_code (),
1333 						     e.vector_mode (0),
1334 						     e.memory_vector_mode ());
1335     return e.use_exact_insn (icode);
1336   }
1337 };
1338 
1339 class svldnt1_impl : public full_width_access
1340 {
1341 public:
1342   unsigned int
call_properties(const function_instance &) const1343   call_properties (const function_instance &) const OVERRIDE
1344   {
1345     return CP_READ_MEMORY;
1346   }
1347 
1348   rtx
expand(function_expander & e) const1349   expand (function_expander &e) const OVERRIDE
1350   {
1351     insn_code icode = code_for_aarch64_ldnt1 (e.vector_mode (0));
1352     return e.use_contiguous_load_insn (icode);
1353   }
1354 };
1355 
1356 /* Implements svldff1 and svldnf1.  */
1357 class svldxf1_impl : public full_width_access
1358 {
1359 public:
svldxf1_impl(int unspec)1360   CONSTEXPR svldxf1_impl (int unspec) : m_unspec (unspec) {}
1361 
1362   unsigned int
call_properties(const function_instance &) const1363   call_properties (const function_instance &) const OVERRIDE
1364   {
1365     return CP_READ_MEMORY | CP_READ_FFR | CP_WRITE_FFR;
1366   }
1367 
1368   rtx
expand(function_expander & e) const1369   expand (function_expander &e) const OVERRIDE
1370   {
1371     /* See the block comment in aarch64-sve.md for details about the
1372        FFR handling.  */
1373     emit_insn (gen_aarch64_update_ffr_for_load ());
1374 
1375     machine_mode mode = e.vector_mode (0);
1376     return e.use_contiguous_load_insn (code_for_aarch64_ldf1 (m_unspec, mode));
1377   }
1378 
1379   /* The unspec associated with the load.  */
1380   int m_unspec;
1381 };
1382 
1383 /* Implements extending contiguous forms of svldff1 and svldnf1.  */
1384 class svldxf1_extend_impl : public extending_load
1385 {
1386 public:
svldxf1_extend_impl(type_suffix_index memory_type,int unspec)1387   CONSTEXPR svldxf1_extend_impl (type_suffix_index memory_type, int unspec)
1388     : extending_load (memory_type), m_unspec (unspec) {}
1389 
1390   unsigned int
call_properties(const function_instance &) const1391   call_properties (const function_instance &) const OVERRIDE
1392   {
1393     return CP_READ_MEMORY | CP_READ_FFR | CP_WRITE_FFR;
1394   }
1395 
1396   rtx
expand(function_expander & e) const1397   expand (function_expander &e) const OVERRIDE
1398   {
1399     /* See the block comment in aarch64-sve.md for details about the
1400        FFR handling.  */
1401     emit_insn (gen_aarch64_update_ffr_for_load ());
1402 
1403     insn_code icode = code_for_aarch64_ldf1 (m_unspec, extend_rtx_code (),
1404 					     e.vector_mode (0),
1405 					     e.memory_vector_mode ());
1406     return e.use_contiguous_load_insn (icode);
1407   }
1408 
1409   /* The unspec associated with the load.  */
1410   int m_unspec;
1411 };
1412 
1413 class svlen_impl : public quiet<function_base>
1414 {
1415 public:
1416   gimple *
fold(gimple_folder & f) const1417   fold (gimple_folder &f) const OVERRIDE
1418   {
1419     /* The argument only exists for its type.  */
1420     tree rhs_type = TREE_TYPE (gimple_call_arg (f.call, 0));
1421     tree count = build_int_cstu (TREE_TYPE (f.lhs),
1422 				 TYPE_VECTOR_SUBPARTS (rhs_type));
1423     return gimple_build_assign (f.lhs, count);
1424   }
1425 
1426   rtx
expand(function_expander & e) const1427   expand (function_expander &e) const OVERRIDE
1428   {
1429     /* The argument only exists for its type.  */
1430     return gen_int_mode (GET_MODE_NUNITS (e.vector_mode (0)), DImode);
1431   }
1432 };
1433 
1434 class svmad_impl : public function_base
1435 {
1436 public:
1437   rtx
expand(function_expander & e) const1438   expand (function_expander &e) const OVERRIDE
1439   {
1440     return expand_mad (e);
1441   }
1442 };
1443 
1444 class svmla_impl : public function_base
1445 {
1446 public:
1447   rtx
expand(function_expander & e) const1448   expand (function_expander &e) const OVERRIDE
1449   {
1450     /* Put the accumulator at the end (argument 3), but keep it as the
1451        merge input for _m functions.  */
1452     e.rotate_inputs_left (1, 4);
1453     return expand_mad (e, 3);
1454   }
1455 };
1456 
1457 class svmla_lane_impl : public function_base
1458 {
1459 public:
1460   rtx
expand(function_expander & e) const1461   expand (function_expander &e) const OVERRIDE
1462   {
1463     if (e.type_suffix (0).integer_p)
1464       {
1465 	machine_mode mode = e.vector_mode (0);
1466 	return e.use_exact_insn (code_for_aarch64_sve_add_mul_lane (mode));
1467       }
1468     return expand_mla_mls_lane (e, UNSPEC_FMLA);
1469   }
1470 };
1471 
1472 class svmls_impl : public function_base
1473 {
1474 public:
1475   rtx
expand(function_expander & e) const1476   expand (function_expander &e) const OVERRIDE
1477   {
1478     /* Put the accumulator at the end (argument 3), but keep it as the
1479        merge input for _m functions.  */
1480     e.rotate_inputs_left (1, 4);
1481     return expand_msb (e, 3);
1482   }
1483 };
1484 
1485 class svmov_impl : public function_base
1486 {
1487 public:
1488   gimple *
fold(gimple_folder & f) const1489   fold (gimple_folder &f) const OVERRIDE
1490   {
1491     return gimple_build_assign (f.lhs, BIT_AND_EXPR,
1492 				gimple_call_arg (f.call, 0),
1493 				gimple_call_arg (f.call, 1));
1494   }
1495 
1496   rtx
expand(function_expander & e) const1497   expand (function_expander &e) const OVERRIDE
1498   {
1499     /* The canonical form for the assembler alias "MOV Pa.B, Pb/Z, Pc.B"
1500        is "AND Pa.B, Pb/Z, Pc.B, Pc.B".  */
1501     gcc_assert (e.pred == PRED_z);
1502     e.args.quick_push (e.args[1]);
1503     return e.use_exact_insn (CODE_FOR_aarch64_pred_andvnx16bi_z);
1504   }
1505 };
1506 
1507 class svmls_lane_impl : public function_base
1508 {
1509 public:
1510   rtx
expand(function_expander & e) const1511   expand (function_expander &e) const OVERRIDE
1512   {
1513     if (e.type_suffix (0).integer_p)
1514       {
1515 	machine_mode mode = e.vector_mode (0);
1516 	return e.use_exact_insn (code_for_aarch64_sve_sub_mul_lane (mode));
1517       }
1518     return expand_mla_mls_lane (e, UNSPEC_FMLS);
1519   }
1520 };
1521 
1522 class svmmla_impl : public function_base
1523 {
1524 public:
1525   rtx
expand(function_expander & e) const1526   expand (function_expander &e) const OVERRIDE
1527   {
1528     insn_code icode;
1529     if (e.type_suffix (0).integer_p)
1530       {
1531 	if (e.type_suffix (0).unsigned_p)
1532 	  icode = code_for_aarch64_sve_add (UNSPEC_UMATMUL, e.vector_mode (0));
1533 	else
1534 	  icode = code_for_aarch64_sve_add (UNSPEC_SMATMUL, e.vector_mode (0));
1535       }
1536     else
1537       icode = code_for_aarch64_sve (UNSPEC_FMMLA, e.vector_mode (0));
1538     return e.use_exact_insn (icode);
1539   }
1540 };
1541 
1542 class svmsb_impl : public function_base
1543 {
1544 public:
1545   rtx
expand(function_expander & e) const1546   expand (function_expander &e) const OVERRIDE
1547   {
1548     return expand_msb (e);
1549   }
1550 };
1551 
1552 class svnand_impl : public function_base
1553 {
1554 public:
1555   rtx
expand(function_expander & e) const1556   expand (function_expander &e) const OVERRIDE
1557   {
1558     gcc_assert (e.pred == PRED_z);
1559     return e.use_exact_insn (CODE_FOR_aarch64_pred_nandvnx16bi_z);
1560   }
1561 };
1562 
1563 class svnor_impl : public function_base
1564 {
1565 public:
1566   rtx
expand(function_expander & e) const1567   expand (function_expander &e) const OVERRIDE
1568   {
1569     gcc_assert (e.pred == PRED_z);
1570     return e.use_exact_insn (CODE_FOR_aarch64_pred_norvnx16bi_z);
1571   }
1572 };
1573 
1574 class svnot_impl : public rtx_code_function
1575 {
1576 public:
svnot_impl()1577   CONSTEXPR svnot_impl () : rtx_code_function (NOT, NOT, -1) {}
1578 
1579   rtx
expand(function_expander & e) const1580   expand (function_expander &e) const OVERRIDE
1581   {
1582     if (e.type_suffix_ids[0] == TYPE_SUFFIX_b)
1583       {
1584 	/* The canonical form for the assembler alias "NOT Pa.B, Pb/Z, Pc.B"
1585 	   is "EOR Pa.B, Pb/Z, Pb.B, Pc.B".  */
1586 	gcc_assert (e.pred == PRED_z);
1587 	e.args.quick_insert (1, e.args[0]);
1588 	return e.use_exact_insn (CODE_FOR_aarch64_pred_xorvnx16bi_z);
1589       }
1590     return rtx_code_function::expand (e);
1591   }
1592 };
1593 
1594 class svorn_impl : public function_base
1595 {
1596 public:
1597   rtx
expand(function_expander & e) const1598   expand (function_expander &e) const OVERRIDE
1599   {
1600     gcc_assert (e.pred == PRED_z);
1601     return e.use_exact_insn (CODE_FOR_aarch64_pred_ornvnx16bi_z);
1602   }
1603 };
1604 
1605 class svpfalse_impl : public function_base
1606 {
1607 public:
1608   gimple *
fold(gimple_folder & f) const1609   fold (gimple_folder &f) const OVERRIDE
1610   {
1611     return f.fold_to_pfalse ();
1612   }
1613 
1614   rtx
expand(function_expander &) const1615   expand (function_expander &) const OVERRIDE
1616   {
1617     return CONST0_RTX (VNx16BImode);
1618   }
1619 };
1620 
1621 /* Implements svpfirst and svpnext, which share the same .md patterns.  */
1622 class svpfirst_svpnext_impl : public function_base
1623 {
1624 public:
svpfirst_svpnext_impl(int unspec)1625   CONSTEXPR svpfirst_svpnext_impl (int unspec) : m_unspec (unspec) {}
1626 
1627   rtx
expand(function_expander & e) const1628   expand (function_expander &e) const OVERRIDE
1629   {
1630     machine_mode mode = e.vector_mode (0);
1631     e.add_ptrue_hint (0, mode);
1632     return e.use_exact_insn (code_for_aarch64_sve (m_unspec, mode));
1633   }
1634 
1635   /* The unspec associated with the operation.  */
1636   int m_unspec;
1637 };
1638 
1639 /* Implements contiguous forms of svprf[bhwd].  */
1640 class svprf_bhwd_impl : public function_base
1641 {
1642 public:
svprf_bhwd_impl(machine_mode mode)1643   CONSTEXPR svprf_bhwd_impl (machine_mode mode) : m_mode (mode) {}
1644 
1645   unsigned int
call_properties(const function_instance &) const1646   call_properties (const function_instance &) const OVERRIDE
1647   {
1648     return CP_PREFETCH_MEMORY;
1649   }
1650 
1651   rtx
expand(function_expander & e) const1652   expand (function_expander &e) const OVERRIDE
1653   {
1654     e.prepare_prefetch_operands ();
1655     insn_code icode = code_for_aarch64_sve_prefetch (m_mode);
1656     return e.use_contiguous_prefetch_insn (icode);
1657   }
1658 
1659   /* The mode that we'd use to hold one vector of prefetched data.  */
1660   machine_mode m_mode;
1661 };
1662 
1663 /* Implements svprf[bhwd]_gather.  */
1664 class svprf_bhwd_gather_impl : public function_base
1665 {
1666 public:
svprf_bhwd_gather_impl(machine_mode mode)1667   CONSTEXPR svprf_bhwd_gather_impl (machine_mode mode) : m_mode (mode) {}
1668 
1669   unsigned int
call_properties(const function_instance &) const1670   call_properties (const function_instance &) const OVERRIDE
1671   {
1672     return CP_PREFETCH_MEMORY;
1673   }
1674 
1675   machine_mode
memory_vector_mode(const function_instance &) const1676   memory_vector_mode (const function_instance &) const OVERRIDE
1677   {
1678     return m_mode;
1679   }
1680 
1681   rtx
expand(function_expander & e) const1682   expand (function_expander &e) const OVERRIDE
1683   {
1684     e.prepare_prefetch_operands ();
1685     e.prepare_gather_address_operands (1);
1686 
1687     /* Insert a zero operand to identify the mode of the memory being
1688        accessed.  This goes between the gather operands and prefetch
1689        operands created above.  */
1690     e.args.quick_insert (5, CONST0_RTX (m_mode));
1691 
1692     machine_mode reg_mode = GET_MODE (e.args[2]);
1693     insn_code icode = code_for_aarch64_sve_gather_prefetch (m_mode, reg_mode);
1694     return e.use_exact_insn (icode);
1695   }
1696 
1697   /* The mode that we'd use to hold one vector of prefetched data.  */
1698   machine_mode m_mode;
1699 };
1700 
1701 /* Implements svptest_any, svptest_first and svptest_last.  */
1702 class svptest_impl : public function_base
1703 {
1704 public:
svptest_impl(rtx_code compare)1705   CONSTEXPR svptest_impl (rtx_code compare) : m_compare (compare) {}
1706 
1707   rtx
expand(function_expander & e) const1708   expand (function_expander &e) const OVERRIDE
1709   {
1710     /* See whether GP is an exact ptrue for some predicate mode;
1711        i.e. whether converting the GP to that mode will not drop
1712        set bits and will leave all significant bits set.  */
1713     machine_mode wide_mode;
1714     int hint;
1715     if (aarch64_ptrue_all_mode (e.args[0]).exists (&wide_mode))
1716       hint = SVE_KNOWN_PTRUE;
1717     else
1718       {
1719 	hint = SVE_MAYBE_NOT_PTRUE;
1720 	wide_mode = VNx16BImode;
1721       }
1722 
1723     /* Generate the PTEST itself.  */
1724     rtx pg = force_reg (VNx16BImode, e.args[0]);
1725     rtx wide_pg = gen_lowpart (wide_mode, pg);
1726     rtx hint_rtx = gen_int_mode (hint, DImode);
1727     rtx op = force_reg (wide_mode, gen_lowpart (wide_mode, e.args[1]));
1728     emit_insn (gen_aarch64_ptestvnx16bi (pg, wide_pg, hint_rtx, op));
1729 
1730     /* Get the location of the boolean result.  We can provide SImode and
1731        DImode values directly; rely on generic code to convert others.  */
1732     rtx target = e.possible_target;
1733     if (!target
1734 	|| !REG_P (target)
1735 	|| (GET_MODE (target) != SImode && GET_MODE (target) != DImode))
1736       target = gen_reg_rtx (DImode);
1737 
1738     /* Generate a CSET to convert the CC result of the PTEST to a boolean.  */
1739     rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
1740     rtx compare = gen_rtx_fmt_ee (m_compare, GET_MODE (target),
1741 				  cc_reg, const0_rtx);
1742     emit_insn (gen_rtx_SET (target, compare));
1743     return target;
1744   }
1745 
1746   /* The comparison code associated with ptest condition.  */
1747   rtx_code m_compare;
1748 };
1749 
1750 class svptrue_impl : public function_base
1751 {
1752 public:
1753   gimple *
fold(gimple_folder & f) const1754   fold (gimple_folder &f) const OVERRIDE
1755   {
1756     return f.fold_to_ptrue ();
1757   }
1758 
1759   rtx
expand(function_expander & e) const1760   expand (function_expander &e) const OVERRIDE
1761   {
1762     return aarch64_ptrue_all (e.type_suffix (0).element_bytes);
1763   }
1764 };
1765 
1766 class svptrue_pat_impl : public function_base
1767 {
1768 public:
1769   gimple *
fold(gimple_folder & f) const1770   fold (gimple_folder &f) const OVERRIDE
1771   {
1772     tree pattern_arg = gimple_call_arg (f.call, 0);
1773     aarch64_svpattern pattern = (aarch64_svpattern) tree_to_shwi (pattern_arg);
1774 
1775     if (pattern == AARCH64_SV_ALL)
1776       /* svptrue_pat_bN (SV_ALL) == svptrue_bN ().  */
1777       return f.fold_to_ptrue ();
1778 
1779     /* See whether we can count the number of elements in the pattern
1780        at compile time.  If so, construct a predicate with that number
1781        of 1s followed by all 0s.  */
1782     int nelts_per_vq = f.elements_per_vq (0);
1783     HOST_WIDE_INT value = aarch64_fold_sve_cnt_pat (pattern, nelts_per_vq);
1784     if (value >= 0)
1785       return f.fold_to_vl_pred (value);
1786 
1787     return NULL;
1788   }
1789 
1790   rtx
expand(function_expander & e) const1791   expand (function_expander &e) const OVERRIDE
1792   {
1793     /* In rtl, the predicate is represented as the constant:
1794 
1795          (const:V16BI (unspec:V16BI [(const_int PATTERN)
1796 				     (const_vector:VnnBI [zeros])]
1797 				    UNSPEC_PTRUE))
1798 
1799        where nn determines the element size.  */
1800     rtvec vec = gen_rtvec (2, e.args[0], CONST0_RTX (e.vector_mode (0)));
1801     return gen_rtx_CONST (VNx16BImode,
1802 			  gen_rtx_UNSPEC (VNx16BImode, vec, UNSPEC_PTRUE));
1803   }
1804 };
1805 
1806 /* Implements svqdec[bhwd]{,_pat} and svqinc[bhwd]{,_pat}.  */
1807 class svqdec_svqinc_bhwd_impl : public function_base
1808 {
1809 public:
svqdec_svqinc_bhwd_impl(rtx_code code_for_sint,rtx_code code_for_uint,scalar_int_mode elem_mode)1810   CONSTEXPR svqdec_svqinc_bhwd_impl (rtx_code code_for_sint,
1811 				     rtx_code code_for_uint,
1812 				     scalar_int_mode elem_mode)
1813     : m_code_for_sint (code_for_sint),
1814       m_code_for_uint (code_for_uint),
1815       m_elem_mode (elem_mode)
1816   {}
1817 
1818   rtx
expand(function_expander & e) const1819   expand (function_expander &e) const OVERRIDE
1820   {
1821     /* Treat non-_pat functions in the same way as _pat functions with
1822        an SV_ALL argument.  */
1823     if (e.args.length () == 2)
1824       e.args.quick_insert (1, gen_int_mode (AARCH64_SV_ALL, DImode));
1825 
1826     /* Insert the number of elements per 128-bit block as a fake argument,
1827        between the pattern and the multiplier.  Arguments 1, 2 and 3 then
1828        correspond exactly with the 3 UNSPEC_SVE_CNT_PAT operands; see
1829        aarch64_sve_cnt_pat for details.  */
1830     unsigned int elements_per_vq = 128 / GET_MODE_BITSIZE (m_elem_mode);
1831     e.args.quick_insert (2, gen_int_mode (elements_per_vq, DImode));
1832 
1833     rtx_code code = (e.type_suffix (0).unsigned_p
1834 		     ? m_code_for_uint
1835 		     : m_code_for_sint);
1836 
1837     /* Choose between operating on integer scalars or integer vectors.  */
1838     machine_mode mode = e.vector_mode (0);
1839     if (e.mode_suffix_id == MODE_n)
1840       mode = GET_MODE_INNER (mode);
1841     return e.use_exact_insn (code_for_aarch64_sve_pat (code, mode));
1842   }
1843 
1844   /* The saturating addition or subtraction codes to use for signed and
1845      unsigned values respectively.  */
1846   rtx_code m_code_for_sint;
1847   rtx_code m_code_for_uint;
1848 
1849   /* The integer mode associated with the [bhwd] suffix.  */
1850   scalar_int_mode m_elem_mode;
1851 };
1852 
1853 /* Implements svqdec[bhwd]{,_pat}.  */
1854 class svqdec_bhwd_impl : public svqdec_svqinc_bhwd_impl
1855 {
1856 public:
svqdec_bhwd_impl(scalar_int_mode elem_mode)1857   CONSTEXPR svqdec_bhwd_impl (scalar_int_mode elem_mode)
1858     : svqdec_svqinc_bhwd_impl (SS_MINUS, US_MINUS, elem_mode) {}
1859 };
1860 
1861 /* Implements svqinc[bhwd]{,_pat}.  */
1862 class svqinc_bhwd_impl : public svqdec_svqinc_bhwd_impl
1863 {
1864 public:
svqinc_bhwd_impl(scalar_int_mode elem_mode)1865   CONSTEXPR svqinc_bhwd_impl (scalar_int_mode elem_mode)
1866     : svqdec_svqinc_bhwd_impl (SS_PLUS, US_PLUS, elem_mode) {}
1867 };
1868 
1869 /* Implements svqdecp and svqincp.  */
1870 class svqdecp_svqincp_impl : public function_base
1871 {
1872 public:
svqdecp_svqincp_impl(rtx_code code_for_sint,rtx_code code_for_uint)1873   CONSTEXPR svqdecp_svqincp_impl (rtx_code code_for_sint,
1874 				  rtx_code code_for_uint)
1875     : m_code_for_sint (code_for_sint),
1876       m_code_for_uint (code_for_uint)
1877   {}
1878 
1879   rtx
expand(function_expander & e) const1880   expand (function_expander &e) const OVERRIDE
1881   {
1882     rtx_code code = (e.type_suffix (0).unsigned_p
1883 		     ? m_code_for_uint
1884 		     : m_code_for_sint);
1885     insn_code icode;
1886     if (e.mode_suffix_id == MODE_n)
1887       {
1888 	/* Increment or decrement a scalar (whose mode is given by the first
1889 	   type suffix) by the number of active elements in a predicate
1890 	   (whose mode is given by the second type suffix).  */
1891 	machine_mode mode = GET_MODE_INNER (e.vector_mode (0));
1892 	icode = code_for_aarch64_sve_cntp (code, mode, e.vector_mode (1));
1893       }
1894     else
1895       /* Increment a vector by the number of active elements in a predicate,
1896 	 with the vector mode determining the predicate mode.  */
1897       icode = code_for_aarch64_sve_cntp (code, e.vector_mode (0));
1898     return e.use_exact_insn (icode);
1899   }
1900 
1901   /* The saturating addition or subtraction codes to use for signed and
1902      unsigned values respectively.  */
1903   rtx_code m_code_for_sint;
1904   rtx_code m_code_for_uint;
1905 };
1906 
1907 class svrdffr_impl : public function_base
1908 {
1909 public:
1910   unsigned int
call_properties(const function_instance &) const1911   call_properties (const function_instance &) const OVERRIDE
1912   {
1913     return CP_READ_FFR;
1914   }
1915 
1916   rtx
expand(function_expander & e) const1917   expand (function_expander &e) const OVERRIDE
1918   {
1919     /* See the block comment in aarch64-sve.md for details about the
1920        FFR handling.  */
1921     emit_insn (gen_aarch64_copy_ffr_to_ffrt ());
1922     rtx result = e.use_exact_insn (e.pred == PRED_z
1923 				   ? CODE_FOR_aarch64_rdffr_z
1924 				   : CODE_FOR_aarch64_rdffr);
1925     emit_insn (gen_aarch64_update_ffrt ());
1926     return result;
1927   }
1928 };
1929 
1930 class svreinterpret_impl : public quiet<function_base>
1931 {
1932 public:
1933   gimple *
fold(gimple_folder & f) const1934   fold (gimple_folder &f) const OVERRIDE
1935   {
1936     /* Punt to rtl if the effect of the reinterpret on registers does not
1937        conform to GCC's endianness model.  */
1938     if (!targetm.can_change_mode_class (f.vector_mode (0),
1939 					f.vector_mode (1), FP_REGS))
1940       return NULL;
1941 
1942     /* Otherwise svreinterpret corresponds directly to a VIEW_CONVERT_EXPR
1943        reinterpretation.  */
1944     tree rhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (f.lhs),
1945 		       gimple_call_arg (f.call, 0));
1946     return gimple_build_assign (f.lhs, VIEW_CONVERT_EXPR, rhs);
1947   }
1948 
1949   rtx
expand(function_expander & e) const1950   expand (function_expander &e) const OVERRIDE
1951   {
1952     machine_mode mode = e.vector_mode (0);
1953     return e.use_exact_insn (code_for_aarch64_sve_reinterpret (mode));
1954   }
1955 };
1956 
1957 class svrev_impl : public permute
1958 {
1959 public:
1960   gimple *
fold(gimple_folder & f) const1961   fold (gimple_folder &f) const OVERRIDE
1962   {
1963     /* Punt for now on _b16 and wider; we'd need more complex evpc logic
1964        to rerecognize the result.  */
1965     if (f.type_suffix (0).bool_p && f.type_suffix (0).element_bits > 8)
1966       return NULL;
1967 
1968     /* Permute as { nelts - 1, nelts - 2, nelts - 3, ... }.  */
1969     poly_int64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs));
1970     vec_perm_builder builder (nelts, 1, 3);
1971     for (int i = 0; i < 3; ++i)
1972       builder.quick_push (nelts - i - 1);
1973     return fold_permute (f, builder);
1974   }
1975 
1976   rtx
expand(function_expander & e) const1977   expand (function_expander &e) const OVERRIDE
1978   {
1979     return e.use_exact_insn (code_for_aarch64_sve_rev (e.vector_mode (0)));
1980   }
1981 };
1982 
1983 class svsel_impl : public quiet<function_base>
1984 {
1985 public:
1986   gimple *
fold(gimple_folder & f) const1987   fold (gimple_folder &f) const OVERRIDE
1988   {
1989     /* svsel corresponds exactly to VEC_COND_EXPR.  */
1990     gimple_seq stmts = NULL;
1991     tree pred = f.convert_pred (stmts, f.vector_type (0), 0);
1992     gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
1993     return gimple_build_assign (f.lhs, VEC_COND_EXPR, pred,
1994 				gimple_call_arg (f.call, 1),
1995 				gimple_call_arg (f.call, 2));
1996   }
1997 
1998   rtx
expand(function_expander & e) const1999   expand (function_expander &e) const OVERRIDE
2000   {
2001     /* svsel (cond, truev, falsev) is vcond_mask (truev, falsev, cond).  */
2002     e.rotate_inputs_left (0, 3);
2003     insn_code icode = convert_optab_handler (vcond_mask_optab,
2004 					     e.vector_mode (0),
2005 					     e.gp_mode (0));
2006     return e.use_exact_insn (icode);
2007   }
2008 };
2009 
2010 /* Implements svset2, svset3 and svset4.  */
2011 class svset_impl : public quiet<multi_vector_function>
2012 {
2013 public:
svset_impl(unsigned int vectors_per_tuple)2014   CONSTEXPR svset_impl (unsigned int vectors_per_tuple)
2015     : quiet<multi_vector_function> (vectors_per_tuple) {}
2016 
2017   gimple *
fold(gimple_folder & f) const2018   fold (gimple_folder &f) const OVERRIDE
2019   {
2020     tree rhs_tuple = gimple_call_arg (f.call, 0);
2021     tree index = gimple_call_arg (f.call, 1);
2022     tree rhs_vector = gimple_call_arg (f.call, 2);
2023 
2024     /* Replace the call with two statements: a copy of the full tuple
2025        to the call result, followed by an update of the individual vector.
2026 
2027        The fold routines expect the replacement statement to have the
2028        same lhs as the original call, so return the copy statement
2029        rather than the field update.  */
2030     gassign *copy = gimple_build_assign (unshare_expr (f.lhs), rhs_tuple);
2031 
2032     /* Get a reference to the individual vector.  */
2033     tree field = tuple_type_field (TREE_TYPE (f.lhs));
2034     tree lhs_array = build3 (COMPONENT_REF, TREE_TYPE (field),
2035 			     f.lhs, field, NULL_TREE);
2036     tree lhs_vector = build4 (ARRAY_REF, TREE_TYPE (rhs_vector),
2037 			      lhs_array, index, NULL_TREE, NULL_TREE);
2038     gassign *update = gimple_build_assign (lhs_vector, rhs_vector);
2039     gsi_insert_after (f.gsi, update, GSI_SAME_STMT);
2040 
2041     return copy;
2042   }
2043 
2044   rtx
expand(function_expander & e) const2045   expand (function_expander &e) const OVERRIDE
2046   {
2047     rtx rhs_tuple = e.args[0];
2048     unsigned int index = INTVAL (e.args[1]);
2049     rtx rhs_vector = e.args[2];
2050 
2051     /* First copy the full tuple to the target register.  */
2052     rtx lhs_tuple = e.get_nonoverlapping_reg_target ();
2053     emit_move_insn (lhs_tuple, rhs_tuple);
2054 
2055     /* ...then update the individual vector.  */
2056     rtx lhs_vector = simplify_gen_subreg (GET_MODE (rhs_vector),
2057 					  lhs_tuple, GET_MODE (lhs_tuple),
2058 					  index * BYTES_PER_SVE_VECTOR);
2059     emit_move_insn (lhs_vector, rhs_vector);
2060     return lhs_vector;
2061   }
2062 };
2063 
2064 class svsetffr_impl : public function_base
2065 {
2066 public:
2067   unsigned int
call_properties(const function_instance &) const2068   call_properties (const function_instance &) const OVERRIDE
2069   {
2070     return CP_WRITE_FFR;
2071   }
2072 
2073   rtx
expand(function_expander & e) const2074   expand (function_expander &e) const OVERRIDE
2075   {
2076     e.args.quick_push (CONSTM1_RTX (VNx16BImode));
2077     return e.use_exact_insn (CODE_FOR_aarch64_wrffr);
2078   }
2079 };
2080 
2081 class svst1_impl : public full_width_access
2082 {
2083 public:
2084   unsigned int
call_properties(const function_instance &) const2085   call_properties (const function_instance &) const OVERRIDE
2086   {
2087     return CP_WRITE_MEMORY;
2088   }
2089 
2090   gimple *
fold(gimple_folder & f) const2091   fold (gimple_folder &f) const OVERRIDE
2092   {
2093     tree vectype = f.vector_type (0);
2094 
2095     /* Get the predicate and base pointer.  */
2096     gimple_seq stmts = NULL;
2097     tree pred = f.convert_pred (stmts, vectype, 0);
2098     tree base = f.fold_contiguous_base (stmts, vectype);
2099     gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
2100 
2101     tree cookie = f.load_store_cookie (TREE_TYPE (vectype));
2102     tree rhs = gimple_call_arg (f.call, gimple_call_num_args (f.call) - 1);
2103     return gimple_build_call_internal (IFN_MASK_STORE, 4,
2104 				       base, cookie, pred, rhs);
2105   }
2106 
2107   rtx
expand(function_expander & e) const2108   expand (function_expander &e) const OVERRIDE
2109   {
2110     insn_code icode = convert_optab_handler (maskstore_optab,
2111 					     e.vector_mode (0), e.gp_mode (0));
2112     return e.use_contiguous_store_insn (icode);
2113   }
2114 };
2115 
2116 class svst1_scatter_impl : public full_width_access
2117 {
2118 public:
2119   unsigned int
call_properties(const function_instance &) const2120   call_properties (const function_instance &) const OVERRIDE
2121   {
2122     return CP_WRITE_MEMORY;
2123   }
2124 
2125   rtx
expand(function_expander & e) const2126   expand (function_expander &e) const OVERRIDE
2127   {
2128     e.prepare_gather_address_operands (1);
2129     /* Put the predicate last, as required by mask_scatter_store_optab.  */
2130     e.rotate_inputs_left (0, 6);
2131     machine_mode mem_mode = e.memory_vector_mode ();
2132     machine_mode int_mode = aarch64_sve_int_mode (mem_mode);
2133     insn_code icode = convert_optab_handler (mask_scatter_store_optab,
2134 					     mem_mode, int_mode);
2135     return e.use_exact_insn (icode);
2136   }
2137 };
2138 
2139 /* Implements truncating forms of svst1_scatter.  */
2140 class svst1_scatter_truncate_impl : public truncating_store
2141 {
2142 public:
svst1_scatter_truncate_impl(scalar_int_mode to_mode)2143   CONSTEXPR svst1_scatter_truncate_impl (scalar_int_mode to_mode)
2144     : truncating_store (to_mode) {}
2145 
2146   rtx
expand(function_expander & e) const2147   expand (function_expander &e) const OVERRIDE
2148   {
2149     e.prepare_gather_address_operands (1);
2150     /* Put the predicate last, since the truncating scatters use the same
2151        operand order as mask_scatter_store_optab.  */
2152     e.rotate_inputs_left (0, 6);
2153     insn_code icode = code_for_aarch64_scatter_store_trunc
2154       (e.memory_vector_mode (), e.vector_mode (0));
2155     return e.use_exact_insn (icode);
2156   }
2157 };
2158 
2159 /* Implements truncating contiguous forms of svst1.  */
2160 class svst1_truncate_impl : public truncating_store
2161 {
2162 public:
svst1_truncate_impl(scalar_int_mode to_mode)2163   CONSTEXPR svst1_truncate_impl (scalar_int_mode to_mode)
2164     : truncating_store (to_mode) {}
2165 
2166   rtx
expand(function_expander & e) const2167   expand (function_expander &e) const OVERRIDE
2168   {
2169     insn_code icode = code_for_aarch64_store_trunc (e.memory_vector_mode (),
2170 						    e.vector_mode (0));
2171     return e.use_contiguous_store_insn (icode);
2172   }
2173 };
2174 
2175 /* Implements svst2, svst3 and svst4.  */
2176 class svst234_impl : public full_width_access
2177 {
2178 public:
svst234_impl(unsigned int vectors_per_tuple)2179   CONSTEXPR svst234_impl (unsigned int vectors_per_tuple)
2180     : full_width_access (vectors_per_tuple) {}
2181 
2182   unsigned int
call_properties(const function_instance &) const2183   call_properties (const function_instance &) const OVERRIDE
2184   {
2185     return CP_WRITE_MEMORY;
2186   }
2187 
2188   gimple *
fold(gimple_folder & f) const2189   fold (gimple_folder &f) const OVERRIDE
2190   {
2191     tree vectype = f.vector_type (0);
2192 
2193     /* Get the predicate and base pointer.  */
2194     gimple_seq stmts = NULL;
2195     tree pred = f.convert_pred (stmts, vectype, 0);
2196     tree base = f.fold_contiguous_base (stmts, vectype);
2197     gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
2198 
2199     /* View the stored data as an array of vectors.  */
2200     unsigned int num_args = gimple_call_num_args (f.call);
2201     tree rhs_tuple = gimple_call_arg (f.call, num_args - 1);
2202     tree field = tuple_type_field (TREE_TYPE (rhs_tuple));
2203     tree rhs_array = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (field), rhs_tuple);
2204 
2205     tree cookie = f.load_store_cookie (TREE_TYPE (vectype));
2206     return gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
2207 				       base, cookie, pred, rhs_array);
2208   }
2209 
2210   rtx
expand(function_expander & e) const2211   expand (function_expander &e) const OVERRIDE
2212   {
2213     machine_mode tuple_mode = GET_MODE (e.args.last ());
2214     insn_code icode = convert_optab_handler (vec_mask_store_lanes_optab,
2215 					     tuple_mode, e.vector_mode (0));
2216     return e.use_contiguous_store_insn (icode);
2217   }
2218 };
2219 
2220 class svstnt1_impl : public full_width_access
2221 {
2222 public:
2223   unsigned int
call_properties(const function_instance &) const2224   call_properties (const function_instance &) const OVERRIDE
2225   {
2226     return CP_WRITE_MEMORY;
2227   }
2228 
2229   rtx
expand(function_expander & e) const2230   expand (function_expander &e) const OVERRIDE
2231   {
2232     insn_code icode = code_for_aarch64_stnt1 (e.vector_mode (0));
2233     return e.use_contiguous_store_insn (icode);
2234   }
2235 };
2236 
2237 class svsub_impl : public rtx_code_function
2238 {
2239 public:
svsub_impl()2240   CONSTEXPR svsub_impl ()
2241     : rtx_code_function (MINUS, MINUS, UNSPEC_COND_FSUB) {}
2242 
2243   rtx
expand(function_expander & e) const2244   expand (function_expander &e) const OVERRIDE
2245   {
2246     /* Canonicalize subtractions of constants to additions.  */
2247     machine_mode mode = e.vector_mode (0);
2248     if (e.try_negating_argument (2, mode))
2249       return e.map_to_rtx_codes (PLUS, PLUS, UNSPEC_COND_FADD);
2250 
2251     return rtx_code_function::expand (e);
2252   }
2253 };
2254 
2255 class svtbl_impl : public permute
2256 {
2257 public:
2258   rtx
expand(function_expander & e) const2259   expand (function_expander &e) const OVERRIDE
2260   {
2261     return e.use_exact_insn (code_for_aarch64_sve_tbl (e.vector_mode (0)));
2262   }
2263 };
2264 
2265 /* Implements svtrn1 and svtrn2.  */
2266 class svtrn_impl : public binary_permute
2267 {
2268 public:
svtrn_impl(int base)2269   CONSTEXPR svtrn_impl (int base)
2270     : binary_permute (base ? UNSPEC_TRN2 : UNSPEC_TRN1), m_base (base) {}
2271 
2272   gimple *
fold(gimple_folder & f) const2273   fold (gimple_folder &f) const OVERRIDE
2274   {
2275     /* svtrn1: { 0, nelts, 2, nelts + 2, 4, nelts + 4, ... }
2276        svtrn2: as for svtrn1, but with 1 added to each index.  */
2277     poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs));
2278     vec_perm_builder builder (nelts, 2, 3);
2279     for (unsigned int i = 0; i < 3; ++i)
2280       {
2281 	builder.quick_push (m_base + i * 2);
2282 	builder.quick_push (m_base + i * 2 + nelts);
2283       }
2284     return fold_permute (f, builder);
2285   }
2286 
2287   /* 0 for svtrn1, 1 for svtrn2.  */
2288   unsigned int m_base;
2289 };
2290 
2291 /* Base class for svundef{,2,3,4}.  */
2292 class svundef_impl : public quiet<multi_vector_function>
2293 {
2294 public:
svundef_impl(unsigned int vectors_per_tuple)2295   CONSTEXPR svundef_impl (unsigned int vectors_per_tuple)
2296     : quiet<multi_vector_function> (vectors_per_tuple) {}
2297 
2298   gimple *
fold(gimple_folder & f) const2299   fold (gimple_folder &f) const OVERRIDE
2300   {
2301     /* Don't fold svundef at the gimple level.  There's no exact
2302        correspondence for SSA_NAMEs, and we explicitly don't want
2303        to generate a specific value (like an all-zeros vector).  */
2304     if (vectors_per_tuple () == 1)
2305       return NULL;
2306     return gimple_build_assign (f.lhs, build_clobber (TREE_TYPE (f.lhs)));
2307   }
2308 
2309   rtx
expand(function_expander & e) const2310   expand (function_expander &e) const OVERRIDE
2311   {
2312     rtx target = e.get_reg_target ();
2313     emit_clobber (copy_rtx (target));
2314     return target;
2315   }
2316 };
2317 
2318 /* Implements svunpklo and svunpkhi.  */
2319 class svunpk_impl : public quiet<function_base>
2320 {
2321 public:
svunpk_impl(bool high_p)2322   CONSTEXPR svunpk_impl (bool high_p) : m_high_p (high_p) {}
2323 
2324   gimple *
fold(gimple_folder & f) const2325   fold (gimple_folder &f) const OVERRIDE
2326   {
2327     /* Don't fold the predicate ops, since every bit of the svbool_t
2328        result is significant.  */
2329     if (f.type_suffix_ids[0] == TYPE_SUFFIX_b)
2330       return NULL;
2331 
2332     /* The first half in memory is VEC_UNPACK_LO_EXPR for little-endian
2333        and VEC_UNPACK_HI_EXPR for big-endian.  */
2334     bool high_p = BYTES_BIG_ENDIAN ? !m_high_p : m_high_p;
2335     tree_code code = high_p ? VEC_UNPACK_HI_EXPR : VEC_UNPACK_LO_EXPR;
2336     return gimple_build_assign (f.lhs, code, gimple_call_arg (f.call, 0));
2337   }
2338 
2339   rtx
expand(function_expander & e) const2340   expand (function_expander &e) const OVERRIDE
2341   {
2342     machine_mode mode = GET_MODE (e.args[0]);
2343     unsigned int unpacku = m_high_p ? UNSPEC_UNPACKUHI : UNSPEC_UNPACKULO;
2344     unsigned int unpacks = m_high_p ? UNSPEC_UNPACKSHI : UNSPEC_UNPACKSLO;
2345     insn_code icode;
2346     if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
2347       icode = code_for_aarch64_sve_punpk (unpacku, mode);
2348     else
2349       {
2350 	int unspec = e.type_suffix (0).unsigned_p ? unpacku : unpacks;
2351 	icode = code_for_aarch64_sve_unpk (unspec, unspec, mode);
2352       }
2353     return e.use_exact_insn (icode);
2354   }
2355 
2356   /* True for svunpkhi, false for svunpklo.  */
2357   bool m_high_p;
2358 };
2359 
2360 /* Also implements svsudot.  */
2361 class svusdot_impl : public function_base
2362 {
2363 public:
svusdot_impl(bool su)2364   CONSTEXPR svusdot_impl (bool su) : m_su (su) {}
2365 
2366   rtx
expand(function_expander & e) const2367   expand (function_expander &e) const OVERRIDE
2368   {
2369     /* The implementation of the ACLE function svsudot (for the non-lane
2370        version) is through the USDOT instruction but with the second and third
2371        inputs swapped.  */
2372     if (m_su)
2373       e.rotate_inputs_left (1, 2);
2374     /* The ACLE function has the same order requirements as for svdot.
2375        While there's no requirement for the RTL pattern to have the same sort
2376        of order as that for <sur>dot_prod, it's easier to read.
2377        Hence we do the same rotation on arguments as svdot_impl does.  */
2378     e.rotate_inputs_left (0, 3);
2379     machine_mode mode = e.vector_mode (0);
2380     insn_code icode = code_for_aarch64_dot_prod (UNSPEC_USDOT, mode);
2381     return e.use_exact_insn (icode);
2382   }
2383 
2384 private:
2385   bool m_su;
2386 };
2387 
2388 /* Implements svuzp1 and svuzp2.  */
2389 class svuzp_impl : public binary_permute
2390 {
2391 public:
svuzp_impl(unsigned int base)2392   CONSTEXPR svuzp_impl (unsigned int base)
2393     : binary_permute (base ? UNSPEC_UZP2 : UNSPEC_UZP1), m_base (base) {}
2394 
2395   gimple *
fold(gimple_folder & f) const2396   fold (gimple_folder &f) const OVERRIDE
2397   {
2398     /* svuzp1: { 0, 2, 4, 6, ... }
2399        svuzp2: { 1, 3, 5, 7, ... }.  */
2400     poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs));
2401     vec_perm_builder builder (nelts, 1, 3);
2402     for (unsigned int i = 0; i < 3; ++i)
2403       builder.quick_push (m_base + i * 2);
2404     return fold_permute (f, builder);
2405   }
2406 
2407   /* 0 for svuzp1, 1 for svuzp2.  */
2408   unsigned int m_base;
2409 };
2410 
2411 /* A function_base for svwhilele and svwhilelt functions.  */
2412 class svwhilelx_impl : public while_comparison
2413 {
2414 public:
svwhilelx_impl(int unspec_for_sint,int unspec_for_uint,bool eq_p)2415   CONSTEXPR svwhilelx_impl (int unspec_for_sint, int unspec_for_uint, bool eq_p)
2416     : while_comparison (unspec_for_sint, unspec_for_uint), m_eq_p (eq_p)
2417   {}
2418 
2419   /* Try to fold a call by treating its arguments as constants of type T.  */
2420   template<typename T>
2421   gimple *
fold_type(gimple_folder & f) const2422   fold_type (gimple_folder &f) const
2423   {
2424     /* Only handle cases in which both operands are constant.  */
2425     T arg0, arg1;
2426     if (!poly_int_tree_p (gimple_call_arg (f.call, 0), &arg0)
2427 	|| !poly_int_tree_p (gimple_call_arg (f.call, 1), &arg1))
2428       return NULL;
2429 
2430     /* Check whether the result is known to be all-false.  */
2431     if (m_eq_p ? known_gt (arg0, arg1) : known_ge (arg0, arg1))
2432       return f.fold_to_pfalse ();
2433 
2434     /* Punt if we can't tell at compile time whether the result
2435        is all-false.  */
2436     if (m_eq_p ? maybe_gt (arg0, arg1) : maybe_ge (arg0, arg1))
2437       return NULL;
2438 
2439     /* At this point we know the result has at least one set element.  */
2440     poly_uint64 diff = arg1 - arg0;
2441     poly_uint64 nelts = GET_MODE_NUNITS (f.vector_mode (0));
2442 
2443     /* Canonicalize the svwhilele form to the svwhilelt form.  Subtract
2444        from NELTS rather than adding to DIFF, to prevent overflow.  */
2445     if (m_eq_p)
2446       nelts -= 1;
2447 
2448     /* Check whether the result is known to be all-true.  */
2449     if (known_ge (diff, nelts))
2450       return f.fold_to_ptrue ();
2451 
2452     /* Punt if DIFF might not be the actual number of set elements
2453        in the result.  Conditional equality is fine.  */
2454     if (maybe_gt (diff, nelts))
2455       return NULL;
2456 
2457     /* At this point we know that the predicate will have DIFF set elements
2458        for svwhilelt and DIFF + 1 set elements for svwhilele (which stops
2459        after rather than before ARG1 is reached).  See if we can create
2460        the predicate at compile time.  */
2461     unsigned HOST_WIDE_INT vl;
2462     if (diff.is_constant (&vl))
2463       /* Overflow is no longer possible after the checks above.  */
2464       return f.fold_to_vl_pred (m_eq_p ? vl + 1 : vl);
2465 
2466     return NULL;
2467   }
2468 
2469   gimple *
fold(gimple_folder & f) const2470   fold (gimple_folder &f) const OVERRIDE
2471   {
2472     if (f.type_suffix (1).unsigned_p)
2473       return fold_type<poly_uint64> (f);
2474     else
2475       return fold_type<poly_int64> (f);
2476   }
2477 
2478   /* True svwhilele, false for svwhilelt.  */
2479   bool m_eq_p;
2480 };
2481 
2482 class svwrffr_impl : public function_base
2483 {
2484 public:
2485   unsigned int
call_properties(const function_instance &) const2486   call_properties (const function_instance &) const OVERRIDE
2487   {
2488     return CP_WRITE_FFR;
2489   }
2490 
2491   rtx
expand(function_expander & e) const2492   expand (function_expander &e) const OVERRIDE
2493   {
2494     return e.use_exact_insn (CODE_FOR_aarch64_wrffr);
2495   }
2496 };
2497 
2498 /* Implements svzip1 and svzip2.  */
2499 class svzip_impl : public binary_permute
2500 {
2501 public:
svzip_impl(unsigned int base)2502   CONSTEXPR svzip_impl (unsigned int base)
2503     : binary_permute (base ? UNSPEC_ZIP2 : UNSPEC_ZIP1), m_base (base) {}
2504 
2505   gimple *
fold(gimple_folder & f) const2506   fold (gimple_folder &f) const OVERRIDE
2507   {
2508     /* svzip1: { 0, nelts, 1, nelts + 1, 2, nelts + 2, ... }
2509        svzip2: as for svzip1, but with nelts / 2 added to each index.  */
2510     poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs));
2511     poly_uint64 base = m_base * exact_div (nelts, 2);
2512     vec_perm_builder builder (nelts, 2, 3);
2513     for (unsigned int i = 0; i < 3; ++i)
2514       {
2515 	builder.quick_push (base + i);
2516 	builder.quick_push (base + i + nelts);
2517       }
2518     return fold_permute (f, builder);
2519   }
2520 
2521   /* 0 for svzip1, 1 for svzip2.  */
2522   unsigned int m_base;
2523 };
2524 
2525 } /* end anonymous namespace */
2526 
2527 namespace aarch64_sve {
2528 
2529 FUNCTION (svabd, svabd_impl,)
2530 FUNCTION (svabs, quiet<rtx_code_function>, (ABS, ABS, UNSPEC_COND_FABS))
2531 FUNCTION (svacge, svac_impl, (UNSPEC_COND_FCMGE))
2532 FUNCTION (svacgt, svac_impl, (UNSPEC_COND_FCMGT))
2533 FUNCTION (svacle, svac_impl, (UNSPEC_COND_FCMLE))
2534 FUNCTION (svaclt, svac_impl, (UNSPEC_COND_FCMLT))
2535 FUNCTION (svadd, rtx_code_function, (PLUS, PLUS, UNSPEC_COND_FADD))
2536 FUNCTION (svadda, svadda_impl,)
2537 FUNCTION (svaddv, reduction, (UNSPEC_SADDV, UNSPEC_UADDV, UNSPEC_FADDV))
2538 FUNCTION (svadrb, svadr_bhwd_impl, (0))
2539 FUNCTION (svadrd, svadr_bhwd_impl, (3))
2540 FUNCTION (svadrh, svadr_bhwd_impl, (1))
2541 FUNCTION (svadrw, svadr_bhwd_impl, (2))
2542 FUNCTION (svand, rtx_code_function, (AND, AND))
2543 FUNCTION (svandv, reduction, (UNSPEC_ANDV))
2544 FUNCTION (svasr, rtx_code_function, (ASHIFTRT, ASHIFTRT))
2545 FUNCTION (svasr_wide, shift_wide, (ASHIFTRT, UNSPEC_ASHIFTRT_WIDE))
2546 FUNCTION (svasrd, unspec_based_function, (UNSPEC_ASRD, -1, -1))
2547 FUNCTION (svbfdot, fixed_insn_function, (CODE_FOR_aarch64_sve_bfdotvnx4sf))
2548 FUNCTION (svbfdot_lane, fixed_insn_function,
2549 	  (CODE_FOR_aarch64_sve_bfdot_lanevnx4sf))
2550 FUNCTION (svbfmlalb, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmlalbvnx4sf))
2551 FUNCTION (svbfmlalb_lane, fixed_insn_function,
2552 	  (CODE_FOR_aarch64_sve_bfmlalb_lanevnx4sf))
2553 FUNCTION (svbfmlalt, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmlaltvnx4sf))
2554 FUNCTION (svbfmlalt_lane, fixed_insn_function,
2555 	  (CODE_FOR_aarch64_sve_bfmlalt_lanevnx4sf))
2556 FUNCTION (svbfmmla, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmmlavnx4sf))
2557 FUNCTION (svbic, svbic_impl,)
2558 FUNCTION (svbrka, svbrk_unary_impl, (UNSPEC_BRKA))
2559 FUNCTION (svbrkb, svbrk_unary_impl, (UNSPEC_BRKB))
2560 FUNCTION (svbrkn, svbrk_binary_impl, (UNSPEC_BRKN))
2561 FUNCTION (svbrkpa, svbrk_binary_impl, (UNSPEC_BRKPA))
2562 FUNCTION (svbrkpb, svbrk_binary_impl, (UNSPEC_BRKPB))
2563 FUNCTION (svcadd, svcadd_impl,)
2564 FUNCTION (svclasta, svclast_impl, (UNSPEC_CLASTA))
2565 FUNCTION (svclastb, svclast_impl, (UNSPEC_CLASTB))
2566 FUNCTION (svcls, unary_count, (CLRSB))
2567 FUNCTION (svclz, unary_count, (CLZ))
2568 FUNCTION (svcmla, svcmla_impl,)
2569 FUNCTION (svcmla_lane, svcmla_lane_impl,)
2570 FUNCTION (svcmpeq, svcmp_impl, (EQ_EXPR, UNSPEC_COND_FCMEQ))
2571 FUNCTION (svcmpeq_wide, svcmp_wide_impl, (EQ_EXPR, UNSPEC_COND_CMPEQ_WIDE,
2572 					  UNSPEC_COND_CMPEQ_WIDE))
2573 FUNCTION (svcmpge, svcmp_impl, (GE_EXPR, UNSPEC_COND_FCMGE))
2574 FUNCTION (svcmpge_wide, svcmp_wide_impl, (GE_EXPR, UNSPEC_COND_CMPGE_WIDE,
2575 					  UNSPEC_COND_CMPHS_WIDE))
2576 FUNCTION (svcmpgt, svcmp_impl, (GT_EXPR, UNSPEC_COND_FCMGT))
2577 FUNCTION (svcmpgt_wide, svcmp_wide_impl, (GT_EXPR, UNSPEC_COND_CMPGT_WIDE,
2578 					  UNSPEC_COND_CMPHI_WIDE))
2579 FUNCTION (svcmple, svcmp_impl, (LE_EXPR, UNSPEC_COND_FCMLE))
2580 FUNCTION (svcmple_wide, svcmp_wide_impl, (LE_EXPR, UNSPEC_COND_CMPLE_WIDE,
2581 					  UNSPEC_COND_CMPLS_WIDE))
2582 FUNCTION (svcmplt, svcmp_impl, (LT_EXPR, UNSPEC_COND_FCMLT))
2583 FUNCTION (svcmplt_wide, svcmp_wide_impl, (LT_EXPR, UNSPEC_COND_CMPLT_WIDE,
2584 					  UNSPEC_COND_CMPLO_WIDE))
2585 FUNCTION (svcmpne, svcmp_impl, (NE_EXPR, UNSPEC_COND_FCMNE))
2586 FUNCTION (svcmpne_wide, svcmp_wide_impl, (NE_EXPR, UNSPEC_COND_CMPNE_WIDE,
2587 					  UNSPEC_COND_CMPNE_WIDE))
2588 FUNCTION (svcmpuo, svcmpuo_impl,)
2589 FUNCTION (svcnot, svcnot_impl,)
2590 FUNCTION (svcnt, unary_count, (POPCOUNT))
2591 FUNCTION (svcntb, svcnt_bhwd_impl, (VNx16QImode))
2592 FUNCTION (svcntb_pat, svcnt_bhwd_pat_impl, (VNx16QImode))
2593 FUNCTION (svcntd, svcnt_bhwd_impl, (VNx2DImode))
2594 FUNCTION (svcntd_pat, svcnt_bhwd_pat_impl, (VNx2DImode))
2595 FUNCTION (svcnth, svcnt_bhwd_impl, (VNx8HImode))
2596 FUNCTION (svcnth_pat, svcnt_bhwd_pat_impl, (VNx8HImode))
2597 FUNCTION (svcntp, svcntp_impl,)
2598 FUNCTION (svcntw, svcnt_bhwd_impl, (VNx4SImode))
2599 FUNCTION (svcntw_pat, svcnt_bhwd_pat_impl, (VNx4SImode))
2600 FUNCTION (svcompact, QUIET_CODE_FOR_MODE0 (aarch64_sve_compact),)
2601 FUNCTION (svcreate2, svcreate_impl, (2))
2602 FUNCTION (svcreate3, svcreate_impl, (3))
2603 FUNCTION (svcreate4, svcreate_impl, (4))
2604 FUNCTION (svcvt, svcvt_impl,)
2605 FUNCTION (svcvtnt, CODE_FOR_MODE0 (aarch64_sve_cvtnt),)
2606 FUNCTION (svdiv, rtx_code_function, (DIV, UDIV, UNSPEC_COND_FDIV))
2607 FUNCTION (svdivr, rtx_code_function_rotated, (DIV, UDIV, UNSPEC_COND_FDIV))
2608 FUNCTION (svdot, svdot_impl,)
2609 FUNCTION (svdot_lane, svdotprod_lane_impl, (UNSPEC_SDOT, UNSPEC_UDOT, -1))
2610 FUNCTION (svdup, svdup_impl,)
2611 FUNCTION (svdup_lane, svdup_lane_impl,)
2612 FUNCTION (svdupq, svdupq_impl,)
2613 FUNCTION (svdupq_lane, svdupq_lane_impl,)
2614 FUNCTION (sveor, rtx_code_function, (XOR, XOR, -1))
2615 FUNCTION (sveorv, reduction, (UNSPEC_XORV))
2616 FUNCTION (svexpa, unspec_based_function, (-1, -1, UNSPEC_FEXPA))
2617 FUNCTION (svext, QUIET_CODE_FOR_MODE0 (aarch64_sve_ext),)
2618 FUNCTION (svextb, svext_bhw_impl, (QImode))
2619 FUNCTION (svexth, svext_bhw_impl, (HImode))
2620 FUNCTION (svextw, svext_bhw_impl, (SImode))
2621 FUNCTION (svget2, svget_impl, (2))
2622 FUNCTION (svget3, svget_impl, (3))
2623 FUNCTION (svget4, svget_impl, (4))
2624 FUNCTION (svindex, svindex_impl,)
2625 FUNCTION (svinsr, svinsr_impl,)
2626 FUNCTION (svlasta, svlast_impl, (UNSPEC_LASTA))
2627 FUNCTION (svlastb, svlast_impl, (UNSPEC_LASTB))
2628 FUNCTION (svld1, svld1_impl,)
2629 FUNCTION (svld1_gather, svld1_gather_impl,)
2630 FUNCTION (svld1ro, svld1ro_impl,)
2631 FUNCTION (svld1rq, svld1rq_impl,)
2632 FUNCTION (svld1sb, svld1_extend_impl, (TYPE_SUFFIX_s8))
2633 FUNCTION (svld1sb_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_s8))
2634 FUNCTION (svld1sh, svld1_extend_impl, (TYPE_SUFFIX_s16))
2635 FUNCTION (svld1sh_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_s16))
2636 FUNCTION (svld1sw, svld1_extend_impl, (TYPE_SUFFIX_s32))
2637 FUNCTION (svld1sw_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_s32))
2638 FUNCTION (svld1ub, svld1_extend_impl, (TYPE_SUFFIX_u8))
2639 FUNCTION (svld1ub_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_u8))
2640 FUNCTION (svld1uh, svld1_extend_impl, (TYPE_SUFFIX_u16))
2641 FUNCTION (svld1uh_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_u16))
2642 FUNCTION (svld1uw, svld1_extend_impl, (TYPE_SUFFIX_u32))
2643 FUNCTION (svld1uw_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_u32))
2644 FUNCTION (svld2, svld234_impl, (2))
2645 FUNCTION (svld3, svld234_impl, (3))
2646 FUNCTION (svld4, svld234_impl, (4))
2647 FUNCTION (svldff1, svldxf1_impl, (UNSPEC_LDFF1))
2648 FUNCTION (svldff1_gather, svldff1_gather_impl,)
2649 FUNCTION (svldff1sb, svldxf1_extend_impl, (TYPE_SUFFIX_s8, UNSPEC_LDFF1))
2650 FUNCTION (svldff1sb_gather, svldff1_gather_extend, (TYPE_SUFFIX_s8))
2651 FUNCTION (svldff1sh, svldxf1_extend_impl, (TYPE_SUFFIX_s16, UNSPEC_LDFF1))
2652 FUNCTION (svldff1sh_gather, svldff1_gather_extend, (TYPE_SUFFIX_s16))
2653 FUNCTION (svldff1sw, svldxf1_extend_impl, (TYPE_SUFFIX_s32, UNSPEC_LDFF1))
2654 FUNCTION (svldff1sw_gather, svldff1_gather_extend, (TYPE_SUFFIX_s32))
2655 FUNCTION (svldff1ub, svldxf1_extend_impl, (TYPE_SUFFIX_u8, UNSPEC_LDFF1))
2656 FUNCTION (svldff1ub_gather, svldff1_gather_extend, (TYPE_SUFFIX_u8))
2657 FUNCTION (svldff1uh, svldxf1_extend_impl, (TYPE_SUFFIX_u16, UNSPEC_LDFF1))
2658 FUNCTION (svldff1uh_gather, svldff1_gather_extend, (TYPE_SUFFIX_u16))
2659 FUNCTION (svldff1uw, svldxf1_extend_impl, (TYPE_SUFFIX_u32, UNSPEC_LDFF1))
2660 FUNCTION (svldff1uw_gather, svldff1_gather_extend, (TYPE_SUFFIX_u32))
2661 FUNCTION (svldnf1, svldxf1_impl, (UNSPEC_LDNF1))
2662 FUNCTION (svldnf1sb, svldxf1_extend_impl, (TYPE_SUFFIX_s8, UNSPEC_LDNF1))
2663 FUNCTION (svldnf1sh, svldxf1_extend_impl, (TYPE_SUFFIX_s16, UNSPEC_LDNF1))
2664 FUNCTION (svldnf1sw, svldxf1_extend_impl, (TYPE_SUFFIX_s32, UNSPEC_LDNF1))
2665 FUNCTION (svldnf1ub, svldxf1_extend_impl, (TYPE_SUFFIX_u8, UNSPEC_LDNF1))
2666 FUNCTION (svldnf1uh, svldxf1_extend_impl, (TYPE_SUFFIX_u16, UNSPEC_LDNF1))
2667 FUNCTION (svldnf1uw, svldxf1_extend_impl, (TYPE_SUFFIX_u32, UNSPEC_LDNF1))
2668 FUNCTION (svldnt1, svldnt1_impl,)
2669 FUNCTION (svlen, svlen_impl,)
2670 FUNCTION (svlsl, rtx_code_function, (ASHIFT, ASHIFT))
2671 FUNCTION (svlsl_wide, shift_wide, (ASHIFT, UNSPEC_ASHIFT_WIDE))
2672 FUNCTION (svlsr, rtx_code_function, (LSHIFTRT, LSHIFTRT))
2673 FUNCTION (svlsr_wide, shift_wide, (LSHIFTRT, UNSPEC_LSHIFTRT_WIDE))
2674 FUNCTION (svmad, svmad_impl,)
2675 FUNCTION (svmax, rtx_code_function, (SMAX, UMAX, UNSPEC_COND_FMAX))
2676 FUNCTION (svmaxnm, unspec_based_function, (-1, -1, UNSPEC_COND_FMAXNM))
2677 FUNCTION (svmaxnmv, reduction, (UNSPEC_FMAXNMV))
2678 FUNCTION (svmaxv, reduction, (UNSPEC_SMAXV, UNSPEC_UMAXV, UNSPEC_FMAXV))
2679 FUNCTION (svmin, rtx_code_function, (SMIN, UMIN, UNSPEC_COND_FMIN))
2680 FUNCTION (svminnm, unspec_based_function, (-1, -1, UNSPEC_COND_FMINNM))
2681 FUNCTION (svminnmv, reduction, (UNSPEC_FMINNMV))
2682 FUNCTION (svminv, reduction, (UNSPEC_SMINV, UNSPEC_UMINV, UNSPEC_FMINV))
2683 FUNCTION (svmla, svmla_impl,)
2684 FUNCTION (svmla_lane, svmla_lane_impl,)
2685 FUNCTION (svmls, svmls_impl,)
2686 FUNCTION (svmls_lane, svmls_lane_impl,)
2687 FUNCTION (svmmla, svmmla_impl,)
2688 FUNCTION (svmov, svmov_impl,)
2689 FUNCTION (svmsb, svmsb_impl,)
2690 FUNCTION (svmul, rtx_code_function, (MULT, MULT, UNSPEC_COND_FMUL))
2691 FUNCTION (svmul_lane, CODE_FOR_MODE0 (aarch64_mul_lane),)
2692 FUNCTION (svmulh, unspec_based_function, (UNSPEC_SMUL_HIGHPART,
2693 					  UNSPEC_UMUL_HIGHPART, -1))
2694 FUNCTION (svmulx, unspec_based_function, (-1, -1, UNSPEC_COND_FMULX))
2695 FUNCTION (svnand, svnand_impl,)
2696 FUNCTION (svneg, quiet<rtx_code_function>, (NEG, NEG, UNSPEC_COND_FNEG))
2697 FUNCTION (svnmad, unspec_based_function, (-1, -1, UNSPEC_COND_FNMLA))
2698 FUNCTION (svnmla, unspec_based_function_rotated, (-1, -1, UNSPEC_COND_FNMLA))
2699 FUNCTION (svnmls, unspec_based_function_rotated, (-1, -1, UNSPEC_COND_FNMLS))
2700 FUNCTION (svnmsb, unspec_based_function, (-1, -1, UNSPEC_COND_FNMLS))
2701 FUNCTION (svnor, svnor_impl,)
2702 FUNCTION (svnot, svnot_impl,)
2703 FUNCTION (svorn, svorn_impl,)
2704 FUNCTION (svorr, rtx_code_function, (IOR, IOR))
2705 FUNCTION (svorv, reduction, (UNSPEC_IORV))
2706 FUNCTION (svpfalse, svpfalse_impl,)
2707 FUNCTION (svpfirst, svpfirst_svpnext_impl, (UNSPEC_PFIRST))
2708 FUNCTION (svpnext, svpfirst_svpnext_impl, (UNSPEC_PNEXT))
2709 FUNCTION (svprfb, svprf_bhwd_impl, (VNx16QImode))
2710 FUNCTION (svprfb_gather, svprf_bhwd_gather_impl, (VNx16QImode))
2711 FUNCTION (svprfd, svprf_bhwd_impl, (VNx2DImode))
2712 FUNCTION (svprfd_gather, svprf_bhwd_gather_impl, (VNx2DImode))
2713 FUNCTION (svprfh, svprf_bhwd_impl, (VNx8HImode))
2714 FUNCTION (svprfh_gather, svprf_bhwd_gather_impl, (VNx8HImode))
2715 FUNCTION (svprfw, svprf_bhwd_impl, (VNx4SImode))
2716 FUNCTION (svprfw_gather, svprf_bhwd_gather_impl, (VNx4SImode))
2717 FUNCTION (svptest_any, svptest_impl, (NE))
2718 FUNCTION (svptest_first, svptest_impl, (LT))
2719 FUNCTION (svptest_last, svptest_impl, (LTU))
2720 FUNCTION (svptrue, svptrue_impl,)
2721 FUNCTION (svptrue_pat, svptrue_pat_impl,)
2722 FUNCTION (svqadd, rtx_code_function, (SS_PLUS, US_PLUS, -1))
2723 FUNCTION (svqdecb, svqdec_bhwd_impl, (QImode))
2724 FUNCTION (svqdecb_pat, svqdec_bhwd_impl, (QImode))
2725 FUNCTION (svqdecd, svqdec_bhwd_impl, (DImode))
2726 FUNCTION (svqdecd_pat, svqdec_bhwd_impl, (DImode))
2727 FUNCTION (svqdech, svqdec_bhwd_impl, (HImode))
2728 FUNCTION (svqdech_pat, svqdec_bhwd_impl, (HImode))
2729 FUNCTION (svqdecp, svqdecp_svqincp_impl, (SS_MINUS, US_MINUS))
2730 FUNCTION (svqdecw, svqdec_bhwd_impl, (SImode))
2731 FUNCTION (svqdecw_pat, svqdec_bhwd_impl, (SImode))
2732 FUNCTION (svqincb, svqinc_bhwd_impl, (QImode))
2733 FUNCTION (svqincb_pat, svqinc_bhwd_impl, (QImode))
2734 FUNCTION (svqincd, svqinc_bhwd_impl, (DImode))
2735 FUNCTION (svqincd_pat, svqinc_bhwd_impl, (DImode))
2736 FUNCTION (svqinch, svqinc_bhwd_impl, (HImode))
2737 FUNCTION (svqinch_pat, svqinc_bhwd_impl, (HImode))
2738 FUNCTION (svqincp, svqdecp_svqincp_impl, (SS_PLUS, US_PLUS))
2739 FUNCTION (svqincw, svqinc_bhwd_impl, (SImode))
2740 FUNCTION (svqincw_pat, svqinc_bhwd_impl, (SImode))
2741 FUNCTION (svqsub, rtx_code_function, (SS_MINUS, US_MINUS, -1))
2742 FUNCTION (svrbit, unspec_based_function, (UNSPEC_RBIT, UNSPEC_RBIT, -1))
2743 FUNCTION (svrdffr, svrdffr_impl,)
2744 FUNCTION (svrecpe, unspec_based_function, (-1, UNSPEC_URECPE, UNSPEC_FRECPE))
2745 FUNCTION (svrecps, unspec_based_function, (-1, -1, UNSPEC_FRECPS))
2746 FUNCTION (svrecpx, unspec_based_function, (-1, -1, UNSPEC_COND_FRECPX))
2747 FUNCTION (svreinterpret, svreinterpret_impl,)
2748 FUNCTION (svrev, svrev_impl,)
2749 FUNCTION (svrevb, unspec_based_function, (UNSPEC_REVB, UNSPEC_REVB, -1))
2750 FUNCTION (svrevh, unspec_based_function, (UNSPEC_REVH, UNSPEC_REVH, -1))
2751 FUNCTION (svrevw, unspec_based_function, (UNSPEC_REVW, UNSPEC_REVW, -1))
2752 FUNCTION (svrinta, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTA))
2753 FUNCTION (svrinti, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTI))
2754 FUNCTION (svrintm, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTM))
2755 FUNCTION (svrintn, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTN))
2756 FUNCTION (svrintp, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTP))
2757 FUNCTION (svrintx, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTX))
2758 FUNCTION (svrintz, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTZ))
2759 FUNCTION (svrsqrte, unspec_based_function, (-1, UNSPEC_RSQRTE, UNSPEC_RSQRTE))
2760 FUNCTION (svrsqrts, unspec_based_function, (-1, -1, UNSPEC_RSQRTS))
2761 FUNCTION (svscale, unspec_based_function, (-1, -1, UNSPEC_COND_FSCALE))
2762 FUNCTION (svsel, svsel_impl,)
2763 FUNCTION (svset2, svset_impl, (2))
2764 FUNCTION (svset3, svset_impl, (3))
2765 FUNCTION (svset4, svset_impl, (4))
2766 FUNCTION (svsetffr, svsetffr_impl,)
2767 FUNCTION (svsplice, QUIET_CODE_FOR_MODE0 (aarch64_sve_splice),)
2768 FUNCTION (svsqrt, rtx_code_function, (SQRT, SQRT, UNSPEC_COND_FSQRT))
2769 FUNCTION (svst1, svst1_impl,)
2770 FUNCTION (svst1_scatter, svst1_scatter_impl,)
2771 FUNCTION (svst1b, svst1_truncate_impl, (QImode))
2772 FUNCTION (svst1b_scatter, svst1_scatter_truncate_impl, (QImode))
2773 FUNCTION (svst1h, svst1_truncate_impl, (HImode))
2774 FUNCTION (svst1h_scatter, svst1_scatter_truncate_impl, (HImode))
2775 FUNCTION (svst1w, svst1_truncate_impl, (SImode))
2776 FUNCTION (svst1w_scatter, svst1_scatter_truncate_impl, (SImode))
2777 FUNCTION (svst2, svst234_impl, (2))
2778 FUNCTION (svst3, svst234_impl, (3))
2779 FUNCTION (svst4, svst234_impl, (4))
2780 FUNCTION (svstnt1, svstnt1_impl,)
2781 FUNCTION (svsub, svsub_impl,)
2782 FUNCTION (svsubr, rtx_code_function_rotated, (MINUS, MINUS, UNSPEC_COND_FSUB))
2783 FUNCTION (svsudot, svusdot_impl, (true))
2784 FUNCTION (svsudot_lane, svdotprod_lane_impl, (UNSPEC_SUDOT, -1, -1))
2785 FUNCTION (svtbl, svtbl_impl,)
2786 FUNCTION (svtmad, CODE_FOR_MODE0 (aarch64_sve_tmad),)
2787 FUNCTION (svtrn1, svtrn_impl, (0))
2788 FUNCTION (svtrn1q, unspec_based_function, (UNSPEC_TRN1Q, UNSPEC_TRN1Q,
2789 					   UNSPEC_TRN1Q))
2790 FUNCTION (svtrn2, svtrn_impl, (1))
2791 FUNCTION (svtrn2q, unspec_based_function, (UNSPEC_TRN2Q, UNSPEC_TRN2Q,
2792 					   UNSPEC_TRN2Q))
2793 FUNCTION (svtsmul, unspec_based_function, (-1, -1, UNSPEC_FTSMUL))
2794 FUNCTION (svtssel, unspec_based_function, (-1, -1, UNSPEC_FTSSEL))
2795 FUNCTION (svundef, svundef_impl, (1))
2796 FUNCTION (svundef2, svundef_impl, (2))
2797 FUNCTION (svundef3, svundef_impl, (3))
2798 FUNCTION (svundef4, svundef_impl, (4))
2799 FUNCTION (svunpkhi, svunpk_impl, (true))
2800 FUNCTION (svunpklo, svunpk_impl, (false))
2801 FUNCTION (svusdot, svusdot_impl, (false))
2802 FUNCTION (svusdot_lane, svdotprod_lane_impl, (UNSPEC_USDOT, -1, -1))
2803 FUNCTION (svusmmla, unspec_based_add_function, (UNSPEC_USMATMUL, -1, -1))
2804 FUNCTION (svuzp1, svuzp_impl, (0))
2805 FUNCTION (svuzp1q, unspec_based_function, (UNSPEC_UZP1Q, UNSPEC_UZP1Q,
2806 					   UNSPEC_UZP1Q))
2807 FUNCTION (svuzp2, svuzp_impl, (1))
2808 FUNCTION (svuzp2q, unspec_based_function, (UNSPEC_UZP2Q, UNSPEC_UZP2Q,
2809 					   UNSPEC_UZP2Q))
2810 FUNCTION (svwhilele, svwhilelx_impl, (UNSPEC_WHILELE, UNSPEC_WHILELS, true))
2811 FUNCTION (svwhilelt, svwhilelx_impl, (UNSPEC_WHILELT, UNSPEC_WHILELO, false))
2812 FUNCTION (svwrffr, svwrffr_impl,)
2813 FUNCTION (svzip1, svzip_impl, (0))
2814 FUNCTION (svzip1q, unspec_based_function, (UNSPEC_ZIP1Q, UNSPEC_ZIP1Q,
2815 					   UNSPEC_ZIP1Q))
2816 FUNCTION (svzip2, svzip_impl, (1))
2817 FUNCTION (svzip2q, unspec_based_function, (UNSPEC_ZIP2Q, UNSPEC_ZIP2Q,
2818 					   UNSPEC_ZIP2Q))
2819 
2820 } /* end namespace aarch64_sve */
2821