1 /* Builtins' description for AArch64 SIMD architecture.
2    Copyright (C) 2011-2019 Free Software Foundation, Inc.
3    Contributed by ARM Ltd.
4 
5    This file is part of GCC.
6 
7    GCC is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11 
12    GCC is distributed in the hope that it will be useful, but
13    WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15    General Public License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with GCC; see the file COPYING3.  If not see
19    <http://www.gnu.org/licenses/>.  */
20 
21 #define IN_TARGET_CODE 1
22 
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "function.h"
28 #include "basic-block.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "expmed.h"
35 #include "optabs.h"
36 #include "recog.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "explow.h"
41 #include "expr.h"
42 #include "langhooks.h"
43 #include "gimple-iterator.h"
44 #include "case-cfn-macros.h"
45 #include "emit-rtl.h"
46 
47 #define v8qi_UP  E_V8QImode
48 #define v4hi_UP  E_V4HImode
49 #define v4hf_UP  E_V4HFmode
50 #define v2si_UP  E_V2SImode
51 #define v2sf_UP  E_V2SFmode
52 #define v1df_UP  E_V1DFmode
53 #define di_UP    E_DImode
54 #define df_UP    E_DFmode
55 #define v16qi_UP E_V16QImode
56 #define v8hi_UP  E_V8HImode
57 #define v8hf_UP  E_V8HFmode
58 #define v4si_UP  E_V4SImode
59 #define v4sf_UP  E_V4SFmode
60 #define v2di_UP  E_V2DImode
61 #define v2df_UP  E_V2DFmode
62 #define ti_UP	 E_TImode
63 #define oi_UP	 E_OImode
64 #define ci_UP	 E_CImode
65 #define xi_UP	 E_XImode
66 #define si_UP    E_SImode
67 #define sf_UP    E_SFmode
68 #define hi_UP    E_HImode
69 #define hf_UP    E_HFmode
70 #define qi_UP    E_QImode
71 #define UP(X) X##_UP
72 
73 #define SIMD_MAX_BUILTIN_ARGS 5
74 
75 enum aarch64_type_qualifiers
76 {
77   /* T foo.  */
78   qualifier_none = 0x0,
79   /* unsigned T foo.  */
80   qualifier_unsigned = 0x1, /* 1 << 0  */
81   /* const T foo.  */
82   qualifier_const = 0x2, /* 1 << 1  */
83   /* T *foo.  */
84   qualifier_pointer = 0x4, /* 1 << 2  */
85   /* Used when expanding arguments if an operand could
86      be an immediate.  */
87   qualifier_immediate = 0x8, /* 1 << 3  */
88   qualifier_maybe_immediate = 0x10, /* 1 << 4  */
89   /* void foo (...).  */
90   qualifier_void = 0x20, /* 1 << 5  */
91   /* Some patterns may have internal operands, this qualifier is an
92      instruction to the initialisation code to skip this operand.  */
93   qualifier_internal = 0x40, /* 1 << 6  */
94   /* Some builtins should use the T_*mode* encoded in a simd_builtin_datum
95      rather than using the type of the operand.  */
96   qualifier_map_mode = 0x80, /* 1 << 7  */
97   /* qualifier_pointer | qualifier_map_mode  */
98   qualifier_pointer_map_mode = 0x84,
99   /* qualifier_const | qualifier_pointer | qualifier_map_mode  */
100   qualifier_const_pointer_map_mode = 0x86,
101   /* Polynomial types.  */
102   qualifier_poly = 0x100,
103   /* Lane indices - must be in range, and flipped for bigendian.  */
104   qualifier_lane_index = 0x200,
105   /* Lane indices for single lane structure loads and stores.  */
106   qualifier_struct_load_store_lane_index = 0x400,
107   /* Lane indices selected in pairs. - must be in range, and flipped for
108      bigendian.  */
109   qualifier_lane_pair_index = 0x800,
110 };
111 
112 typedef struct
113 {
114   const char *name;
115   machine_mode mode;
116   const enum insn_code code;
117   unsigned int fcode;
118   enum aarch64_type_qualifiers *qualifiers;
119 } aarch64_simd_builtin_datum;
120 
121 static enum aarch64_type_qualifiers
122 aarch64_types_unop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
123   = { qualifier_none, qualifier_none };
124 #define TYPES_UNOP (aarch64_types_unop_qualifiers)
125 static enum aarch64_type_qualifiers
126 aarch64_types_unopu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
127   = { qualifier_unsigned, qualifier_unsigned };
128 #define TYPES_UNOPU (aarch64_types_unopu_qualifiers)
129 static enum aarch64_type_qualifiers
130 aarch64_types_unopus_qualifiers[SIMD_MAX_BUILTIN_ARGS]
131   = { qualifier_unsigned, qualifier_none };
132 #define TYPES_UNOPUS (aarch64_types_unopus_qualifiers)
133 static enum aarch64_type_qualifiers
134 aarch64_types_binop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
135   = { qualifier_none, qualifier_none, qualifier_maybe_immediate };
136 #define TYPES_BINOP (aarch64_types_binop_qualifiers)
137 static enum aarch64_type_qualifiers
138 aarch64_types_binopu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
139   = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned };
140 #define TYPES_BINOPU (aarch64_types_binopu_qualifiers)
141 static enum aarch64_type_qualifiers
142 aarch64_types_binop_uus_qualifiers[SIMD_MAX_BUILTIN_ARGS]
143   = { qualifier_unsigned, qualifier_unsigned, qualifier_none };
144 #define TYPES_BINOP_UUS (aarch64_types_binop_uus_qualifiers)
145 static enum aarch64_type_qualifiers
146 aarch64_types_binop_ssu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
147   = { qualifier_none, qualifier_none, qualifier_unsigned };
148 #define TYPES_BINOP_SSU (aarch64_types_binop_ssu_qualifiers)
149 static enum aarch64_type_qualifiers
150 aarch64_types_binop_uss_qualifiers[SIMD_MAX_BUILTIN_ARGS]
151   = { qualifier_unsigned, qualifier_none, qualifier_none };
152 #define TYPES_BINOP_USS (aarch64_types_binop_uss_qualifiers)
153 static enum aarch64_type_qualifiers
154 aarch64_types_binopp_qualifiers[SIMD_MAX_BUILTIN_ARGS]
155   = { qualifier_poly, qualifier_poly, qualifier_poly };
156 #define TYPES_BINOPP (aarch64_types_binopp_qualifiers)
157 
158 static enum aarch64_type_qualifiers
159 aarch64_types_ternop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
160   = { qualifier_none, qualifier_none, qualifier_none, qualifier_none };
161 #define TYPES_TERNOP (aarch64_types_ternop_qualifiers)
162 static enum aarch64_type_qualifiers
163 aarch64_types_ternop_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
164   = { qualifier_none, qualifier_none, qualifier_none, qualifier_lane_index };
165 #define TYPES_TERNOP_LANE (aarch64_types_ternop_lane_qualifiers)
166 static enum aarch64_type_qualifiers
167 aarch64_types_ternopu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
168   = { qualifier_unsigned, qualifier_unsigned,
169       qualifier_unsigned, qualifier_unsigned };
170 #define TYPES_TERNOPU (aarch64_types_ternopu_qualifiers)
171 static enum aarch64_type_qualifiers
172 aarch64_types_ternopu_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
173   = { qualifier_unsigned, qualifier_unsigned,
174       qualifier_unsigned, qualifier_immediate };
175 #define TYPES_TERNOPUI (aarch64_types_ternopu_imm_qualifiers)
176 
177 
178 static enum aarch64_type_qualifiers
179 aarch64_types_quadop_lane_pair_qualifiers[SIMD_MAX_BUILTIN_ARGS]
180   = { qualifier_none, qualifier_none, qualifier_none,
181       qualifier_none, qualifier_lane_pair_index };
182 #define TYPES_QUADOP_LANE_PAIR (aarch64_types_quadop_lane_pair_qualifiers)
183 static enum aarch64_type_qualifiers
184 aarch64_types_quadop_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
185   = { qualifier_none, qualifier_none, qualifier_none,
186       qualifier_none, qualifier_lane_index };
187 #define TYPES_QUADOP_LANE (aarch64_types_quadop_lane_qualifiers)
188 static enum aarch64_type_qualifiers
189 aarch64_types_quadopu_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
190   = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned,
191       qualifier_unsigned, qualifier_lane_index };
192 #define TYPES_QUADOPU_LANE (aarch64_types_quadopu_lane_qualifiers)
193 
194 static enum aarch64_type_qualifiers
195 aarch64_types_quadopu_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
196   = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned,
197       qualifier_unsigned, qualifier_immediate };
198 #define TYPES_QUADOPUI (aarch64_types_quadopu_imm_qualifiers)
199 
200 static enum aarch64_type_qualifiers
201 aarch64_types_binop_imm_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
202   = { qualifier_poly, qualifier_none, qualifier_immediate };
203 #define TYPES_GETREGP (aarch64_types_binop_imm_p_qualifiers)
204 static enum aarch64_type_qualifiers
205 aarch64_types_binop_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
206   = { qualifier_none, qualifier_none, qualifier_immediate };
207 #define TYPES_GETREG (aarch64_types_binop_imm_qualifiers)
208 #define TYPES_SHIFTIMM (aarch64_types_binop_imm_qualifiers)
209 static enum aarch64_type_qualifiers
210 aarch64_types_shift_to_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS]
211   = { qualifier_unsigned, qualifier_none, qualifier_immediate };
212 #define TYPES_SHIFTIMM_USS (aarch64_types_shift_to_unsigned_qualifiers)
213 static enum aarch64_type_qualifiers
214 aarch64_types_fcvt_from_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS]
215   = { qualifier_none, qualifier_unsigned, qualifier_immediate };
216 #define TYPES_FCVTIMM_SUS (aarch64_types_fcvt_from_unsigned_qualifiers)
217 static enum aarch64_type_qualifiers
218 aarch64_types_unsigned_shift_qualifiers[SIMD_MAX_BUILTIN_ARGS]
219   = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate };
220 #define TYPES_USHIFTIMM (aarch64_types_unsigned_shift_qualifiers)
221 
222 static enum aarch64_type_qualifiers
223 aarch64_types_ternop_s_imm_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
224   = { qualifier_none, qualifier_none, qualifier_poly, qualifier_immediate};
225 #define TYPES_SETREGP (aarch64_types_ternop_s_imm_p_qualifiers)
226 static enum aarch64_type_qualifiers
227 aarch64_types_ternop_s_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
228   = { qualifier_none, qualifier_none, qualifier_none, qualifier_immediate};
229 #define TYPES_SETREG (aarch64_types_ternop_s_imm_qualifiers)
230 #define TYPES_SHIFTINSERT (aarch64_types_ternop_s_imm_qualifiers)
231 #define TYPES_SHIFTACC (aarch64_types_ternop_s_imm_qualifiers)
232 
233 static enum aarch64_type_qualifiers
234 aarch64_types_ternop_p_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
235   = { qualifier_poly, qualifier_poly, qualifier_poly, qualifier_immediate};
236 #define TYPES_SHIFTINSERTP (aarch64_types_ternop_p_imm_qualifiers)
237 
238 static enum aarch64_type_qualifiers
239 aarch64_types_unsigned_shiftacc_qualifiers[SIMD_MAX_BUILTIN_ARGS]
240   = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned,
241       qualifier_immediate };
242 #define TYPES_USHIFTACC (aarch64_types_unsigned_shiftacc_qualifiers)
243 
244 
245 static enum aarch64_type_qualifiers
246 aarch64_types_combine_qualifiers[SIMD_MAX_BUILTIN_ARGS]
247   = { qualifier_none, qualifier_none, qualifier_none };
248 #define TYPES_COMBINE (aarch64_types_combine_qualifiers)
249 
250 static enum aarch64_type_qualifiers
251 aarch64_types_combine_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
252   = { qualifier_poly, qualifier_poly, qualifier_poly };
253 #define TYPES_COMBINEP (aarch64_types_combine_p_qualifiers)
254 
255 static enum aarch64_type_qualifiers
256 aarch64_types_load1_qualifiers[SIMD_MAX_BUILTIN_ARGS]
257   = { qualifier_none, qualifier_const_pointer_map_mode };
258 #define TYPES_LOAD1 (aarch64_types_load1_qualifiers)
259 #define TYPES_LOADSTRUCT (aarch64_types_load1_qualifiers)
260 static enum aarch64_type_qualifiers
261 aarch64_types_loadstruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
262   = { qualifier_none, qualifier_const_pointer_map_mode,
263       qualifier_none, qualifier_struct_load_store_lane_index };
264 #define TYPES_LOADSTRUCT_LANE (aarch64_types_loadstruct_lane_qualifiers)
265 
266 static enum aarch64_type_qualifiers
267 aarch64_types_bsl_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
268   = { qualifier_poly, qualifier_unsigned,
269       qualifier_poly, qualifier_poly };
270 #define TYPES_BSL_P (aarch64_types_bsl_p_qualifiers)
271 static enum aarch64_type_qualifiers
272 aarch64_types_bsl_s_qualifiers[SIMD_MAX_BUILTIN_ARGS]
273   = { qualifier_none, qualifier_unsigned,
274       qualifier_none, qualifier_none };
275 #define TYPES_BSL_S (aarch64_types_bsl_s_qualifiers)
276 static enum aarch64_type_qualifiers
277 aarch64_types_bsl_u_qualifiers[SIMD_MAX_BUILTIN_ARGS]
278   = { qualifier_unsigned, qualifier_unsigned,
279       qualifier_unsigned, qualifier_unsigned };
280 #define TYPES_BSL_U (aarch64_types_bsl_u_qualifiers)
281 
282 /* The first argument (return type) of a store should be void type,
283    which we represent with qualifier_void.  Their first operand will be
284    a DImode pointer to the location to store to, so we must use
285    qualifier_map_mode | qualifier_pointer to build a pointer to the
286    element type of the vector.  */
287 static enum aarch64_type_qualifiers
288 aarch64_types_store1_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
289   = { qualifier_void, qualifier_pointer_map_mode, qualifier_poly };
290 #define TYPES_STORE1P (aarch64_types_store1_p_qualifiers)
291 static enum aarch64_type_qualifiers
292 aarch64_types_store1_qualifiers[SIMD_MAX_BUILTIN_ARGS]
293   = { qualifier_void, qualifier_pointer_map_mode, qualifier_none };
294 #define TYPES_STORE1 (aarch64_types_store1_qualifiers)
295 #define TYPES_STORESTRUCT (aarch64_types_store1_qualifiers)
296 static enum aarch64_type_qualifiers
297 aarch64_types_storestruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
298   = { qualifier_void, qualifier_pointer_map_mode,
299       qualifier_none, qualifier_struct_load_store_lane_index };
300 #define TYPES_STORESTRUCT_LANE (aarch64_types_storestruct_lane_qualifiers)
301 
302 #define CF0(N, X) CODE_FOR_aarch64_##N##X
303 #define CF1(N, X) CODE_FOR_##N##X##1
304 #define CF2(N, X) CODE_FOR_##N##X##2
305 #define CF3(N, X) CODE_FOR_##N##X##3
306 #define CF4(N, X) CODE_FOR_##N##X##4
307 #define CF10(N, X) CODE_FOR_##N##X
308 
309 #define VAR1(T, N, MAP, A) \
310   {#N #A, UP (A), CF##MAP (N, A), 0, TYPES_##T},
311 #define VAR2(T, N, MAP, A, B) \
312   VAR1 (T, N, MAP, A) \
313   VAR1 (T, N, MAP, B)
314 #define VAR3(T, N, MAP, A, B, C) \
315   VAR2 (T, N, MAP, A, B) \
316   VAR1 (T, N, MAP, C)
317 #define VAR4(T, N, MAP, A, B, C, D) \
318   VAR3 (T, N, MAP, A, B, C) \
319   VAR1 (T, N, MAP, D)
320 #define VAR5(T, N, MAP, A, B, C, D, E) \
321   VAR4 (T, N, MAP, A, B, C, D) \
322   VAR1 (T, N, MAP, E)
323 #define VAR6(T, N, MAP, A, B, C, D, E, F) \
324   VAR5 (T, N, MAP, A, B, C, D, E) \
325   VAR1 (T, N, MAP, F)
326 #define VAR7(T, N, MAP, A, B, C, D, E, F, G) \
327   VAR6 (T, N, MAP, A, B, C, D, E, F) \
328   VAR1 (T, N, MAP, G)
329 #define VAR8(T, N, MAP, A, B, C, D, E, F, G, H) \
330   VAR7 (T, N, MAP, A, B, C, D, E, F, G) \
331   VAR1 (T, N, MAP, H)
332 #define VAR9(T, N, MAP, A, B, C, D, E, F, G, H, I) \
333   VAR8 (T, N, MAP, A, B, C, D, E, F, G, H) \
334   VAR1 (T, N, MAP, I)
335 #define VAR10(T, N, MAP, A, B, C, D, E, F, G, H, I, J) \
336   VAR9 (T, N, MAP, A, B, C, D, E, F, G, H, I) \
337   VAR1 (T, N, MAP, J)
338 #define VAR11(T, N, MAP, A, B, C, D, E, F, G, H, I, J, K) \
339   VAR10 (T, N, MAP, A, B, C, D, E, F, G, H, I, J) \
340   VAR1 (T, N, MAP, K)
341 #define VAR12(T, N, MAP, A, B, C, D, E, F, G, H, I, J, K, L) \
342   VAR11 (T, N, MAP, A, B, C, D, E, F, G, H, I, J, K) \
343   VAR1 (T, N, MAP, L)
344 #define VAR13(T, N, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M) \
345   VAR12 (T, N, MAP, A, B, C, D, E, F, G, H, I, J, K, L) \
346   VAR1 (T, N, MAP, M)
347 #define VAR14(T, X, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M, N) \
348   VAR13 (T, X, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M) \
349   VAR1 (T, X, MAP, N)
350 
351 #include "aarch64-builtin-iterators.h"
352 
353 static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = {
354 #include "aarch64-simd-builtins.def"
355 };
356 
357 /* There's only 8 CRC32 builtins.  Probably not worth their own .def file.  */
358 #define AARCH64_CRC32_BUILTINS \
359   CRC32_BUILTIN (crc32b, QI) \
360   CRC32_BUILTIN (crc32h, HI) \
361   CRC32_BUILTIN (crc32w, SI) \
362   CRC32_BUILTIN (crc32x, DI) \
363   CRC32_BUILTIN (crc32cb, QI) \
364   CRC32_BUILTIN (crc32ch, HI) \
365   CRC32_BUILTIN (crc32cw, SI) \
366   CRC32_BUILTIN (crc32cx, DI)
367 
368 /* The next 8 FCMLA instrinsics require some special handling compared the
369    normal simd intrinsics.  */
370 #define AARCH64_SIMD_FCMLA_LANEQ_BUILTINS \
371   FCMLA_LANEQ_BUILTIN (0, v2sf, fcmla, V2SF, false) \
372   FCMLA_LANEQ_BUILTIN (90, v2sf, fcmla, V2SF, false) \
373   FCMLA_LANEQ_BUILTIN (180, v2sf, fcmla, V2SF, false) \
374   FCMLA_LANEQ_BUILTIN (270, v2sf, fcmla, V2SF, false) \
375   FCMLA_LANEQ_BUILTIN (0, v4hf, fcmla_laneq, V4HF, true) \
376   FCMLA_LANEQ_BUILTIN (90, v4hf, fcmla_laneq, V4HF, true) \
377   FCMLA_LANEQ_BUILTIN (180, v4hf, fcmla_laneq, V4HF, true) \
378   FCMLA_LANEQ_BUILTIN (270, v4hf, fcmla_laneq, V4HF, true) \
379 
380 typedef struct
381 {
382   const char *name;
383   machine_mode mode;
384   const enum insn_code icode;
385   unsigned int fcode;
386 } aarch64_crc_builtin_datum;
387 
388 /* Hold information about how to expand the FCMLA_LANEQ builtins.  */
389 typedef struct
390 {
391   const char *name;
392   machine_mode mode;
393   const enum insn_code icode;
394   unsigned int fcode;
395   bool lane;
396 } aarch64_fcmla_laneq_builtin_datum;
397 
398 #define CRC32_BUILTIN(N, M) \
399   AARCH64_BUILTIN_##N,
400 
401 #define FCMLA_LANEQ_BUILTIN(I, N, X, M, T) \
402   AARCH64_SIMD_BUILTIN_FCMLA_LANEQ##I##_##M,
403 
404 #undef VAR1
405 #define VAR1(T, N, MAP, A) \
406   AARCH64_SIMD_BUILTIN_##T##_##N##A,
407 
408 enum aarch64_builtins
409 {
410   AARCH64_BUILTIN_MIN,
411 
412   AARCH64_BUILTIN_GET_FPCR,
413   AARCH64_BUILTIN_SET_FPCR,
414   AARCH64_BUILTIN_GET_FPSR,
415   AARCH64_BUILTIN_SET_FPSR,
416 
417   AARCH64_BUILTIN_RSQRT_DF,
418   AARCH64_BUILTIN_RSQRT_SF,
419   AARCH64_BUILTIN_RSQRT_V2DF,
420   AARCH64_BUILTIN_RSQRT_V2SF,
421   AARCH64_BUILTIN_RSQRT_V4SF,
422   AARCH64_SIMD_BUILTIN_BASE,
423   AARCH64_SIMD_BUILTIN_LANE_CHECK,
424 #include "aarch64-simd-builtins.def"
425   /* The first enum element which is based on an insn_data pattern.  */
426   AARCH64_SIMD_PATTERN_START = AARCH64_SIMD_BUILTIN_LANE_CHECK + 1,
427   AARCH64_SIMD_BUILTIN_MAX = AARCH64_SIMD_PATTERN_START
428 			      + ARRAY_SIZE (aarch64_simd_builtin_data) - 1,
429   AARCH64_CRC32_BUILTIN_BASE,
430   AARCH64_CRC32_BUILTINS
431   AARCH64_CRC32_BUILTIN_MAX,
432   /* ARMv8.3-A Pointer Authentication Builtins.  */
433   AARCH64_PAUTH_BUILTIN_AUTIA1716,
434   AARCH64_PAUTH_BUILTIN_PACIA1716,
435   AARCH64_PAUTH_BUILTIN_XPACLRI,
436   /* Special cased Armv8.3-A Complex FMA by Lane quad Builtins.  */
437   AARCH64_SIMD_FCMLA_LANEQ_BUILTIN_BASE,
438   AARCH64_SIMD_FCMLA_LANEQ_BUILTINS
439   /* Builtin for Arm8.3-a Javascript conversion instruction.  */
440   AARCH64_JSCVT,
441   /* Armv8.5-a RNG instruction builtins.  */
442   AARCH64_BUILTIN_RNG_RNDR,
443   AARCH64_BUILTIN_RNG_RNDRRS,
444   AARCH64_BUILTIN_MAX
445 };
446 
447 #undef CRC32_BUILTIN
448 #define CRC32_BUILTIN(N, M) \
449   {"__builtin_aarch64_"#N, E_##M##mode, CODE_FOR_aarch64_##N, AARCH64_BUILTIN_##N},
450 
451 static aarch64_crc_builtin_datum aarch64_crc_builtin_data[] = {
452   AARCH64_CRC32_BUILTINS
453 };
454 
455 
456 #undef FCMLA_LANEQ_BUILTIN
457 #define FCMLA_LANEQ_BUILTIN(I, N, X, M, T) \
458   {"__builtin_aarch64_fcmla_laneq"#I#N, E_##M##mode, CODE_FOR_aarch64_##X##I##N, \
459    AARCH64_SIMD_BUILTIN_FCMLA_LANEQ##I##_##M, T},
460 
461 /* This structure contains how to manage the mapping form the builtin to the
462    instruction to generate in the backend and how to invoke the instruction.  */
463 static aarch64_fcmla_laneq_builtin_datum aarch64_fcmla_lane_builtin_data[] = {
464   AARCH64_SIMD_FCMLA_LANEQ_BUILTINS
465 };
466 
467 #undef CRC32_BUILTIN
468 
469 static GTY(()) tree aarch64_builtin_decls[AARCH64_BUILTIN_MAX];
470 
471 #define NUM_DREG_TYPES 6
472 #define NUM_QREG_TYPES 6
473 
474 /* Internal scalar builtin types.  These types are used to support
475    neon intrinsic builtins.  They are _not_ user-visible types.  Therefore
476    the mangling for these types are implementation defined.  */
477 const char *aarch64_scalar_builtin_types[] = {
478   "__builtin_aarch64_simd_qi",
479   "__builtin_aarch64_simd_hi",
480   "__builtin_aarch64_simd_si",
481   "__builtin_aarch64_simd_hf",
482   "__builtin_aarch64_simd_sf",
483   "__builtin_aarch64_simd_di",
484   "__builtin_aarch64_simd_df",
485   "__builtin_aarch64_simd_poly8",
486   "__builtin_aarch64_simd_poly16",
487   "__builtin_aarch64_simd_poly64",
488   "__builtin_aarch64_simd_poly128",
489   "__builtin_aarch64_simd_ti",
490   "__builtin_aarch64_simd_uqi",
491   "__builtin_aarch64_simd_uhi",
492   "__builtin_aarch64_simd_usi",
493   "__builtin_aarch64_simd_udi",
494   "__builtin_aarch64_simd_ei",
495   "__builtin_aarch64_simd_oi",
496   "__builtin_aarch64_simd_ci",
497   "__builtin_aarch64_simd_xi",
498   NULL
499 };
500 
501 #define ENTRY(E, M, Q, G) E,
502 enum aarch64_simd_type
503 {
504 #include "aarch64-simd-builtin-types.def"
505   ARM_NEON_H_TYPES_LAST
506 };
507 #undef ENTRY
508 
509 struct aarch64_simd_type_info
510 {
511   enum aarch64_simd_type type;
512 
513   /* Internal type name.  */
514   const char *name;
515 
516   /* Internal type name(mangled).  The mangled names conform to the
517      AAPCS64 (see "Procedure Call Standard for the ARM 64-bit Architecture",
518      Appendix A).  To qualify for emission with the mangled names defined in
519      that document, a vector type must not only be of the correct mode but also
520      be of the correct internal AdvSIMD vector type (e.g. __Int8x8_t); these
521      types are registered by aarch64_init_simd_builtin_types ().  In other
522      words, vector types defined in other ways e.g. via vector_size attribute
523      will get default mangled names.  */
524   const char *mangle;
525 
526   /* Internal type.  */
527   tree itype;
528 
529   /* Element type.  */
530   tree eltype;
531 
532   /* Machine mode the internal type maps to.  */
533   enum machine_mode mode;
534 
535   /* Qualifiers.  */
536   enum aarch64_type_qualifiers q;
537 };
538 
539 #define ENTRY(E, M, Q, G)  \
540   {E, "__" #E, #G "__" #E, NULL_TREE, NULL_TREE, E_##M##mode, qualifier_##Q},
541 static struct aarch64_simd_type_info aarch64_simd_types [] = {
542 #include "aarch64-simd-builtin-types.def"
543 };
544 #undef ENTRY
545 
546 static tree aarch64_simd_intOI_type_node = NULL_TREE;
547 static tree aarch64_simd_intCI_type_node = NULL_TREE;
548 static tree aarch64_simd_intXI_type_node = NULL_TREE;
549 
550 /* The user-visible __fp16 type, and a pointer to that type.  Used
551    across the back-end.  */
552 tree aarch64_fp16_type_node = NULL_TREE;
553 tree aarch64_fp16_ptr_type_node = NULL_TREE;
554 
555 static const char *
aarch64_mangle_builtin_scalar_type(const_tree type)556 aarch64_mangle_builtin_scalar_type (const_tree type)
557 {
558   int i = 0;
559 
560   while (aarch64_scalar_builtin_types[i] != NULL)
561     {
562       const char *name = aarch64_scalar_builtin_types[i];
563 
564       if (TREE_CODE (TYPE_NAME (type)) == TYPE_DECL
565 	  && DECL_NAME (TYPE_NAME (type))
566 	  && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))), name))
567 	return aarch64_scalar_builtin_types[i];
568       i++;
569     }
570   return NULL;
571 }
572 
573 static const char *
aarch64_mangle_builtin_vector_type(const_tree type)574 aarch64_mangle_builtin_vector_type (const_tree type)
575 {
576   int i;
577   int nelts = sizeof (aarch64_simd_types) / sizeof (aarch64_simd_types[0]);
578 
579   for (i = 0; i < nelts; i++)
580     if (aarch64_simd_types[i].mode ==  TYPE_MODE (type)
581 	&& TYPE_NAME (type)
582 	&& TREE_CODE (TYPE_NAME (type)) == TYPE_DECL
583 	&& DECL_NAME (TYPE_NAME (type))
584 	&& !strcmp
585 	     (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))),
586 	      aarch64_simd_types[i].name))
587       return aarch64_simd_types[i].mangle;
588 
589   return NULL;
590 }
591 
592 const char *
aarch64_mangle_builtin_type(const_tree type)593 aarch64_mangle_builtin_type (const_tree type)
594 {
595   const char *mangle;
596   /* Walk through all the AArch64 builtins types tables to filter out the
597      incoming type.  */
598   if ((mangle = aarch64_mangle_builtin_vector_type (type))
599       || (mangle = aarch64_mangle_builtin_scalar_type (type)))
600     return mangle;
601 
602   return NULL;
603 }
604 
605 static tree
aarch64_simd_builtin_std_type(machine_mode mode,enum aarch64_type_qualifiers q)606 aarch64_simd_builtin_std_type (machine_mode mode,
607 			       enum aarch64_type_qualifiers q)
608 {
609 #define QUAL_TYPE(M)  \
610   ((q == qualifier_none) ? int##M##_type_node : unsigned_int##M##_type_node);
611   switch (mode)
612     {
613     case E_QImode:
614       return QUAL_TYPE (QI);
615     case E_HImode:
616       return QUAL_TYPE (HI);
617     case E_SImode:
618       return QUAL_TYPE (SI);
619     case E_DImode:
620       return QUAL_TYPE (DI);
621     case E_TImode:
622       return QUAL_TYPE (TI);
623     case E_OImode:
624       return aarch64_simd_intOI_type_node;
625     case E_CImode:
626       return aarch64_simd_intCI_type_node;
627     case E_XImode:
628       return aarch64_simd_intXI_type_node;
629     case E_HFmode:
630       return aarch64_fp16_type_node;
631     case E_SFmode:
632       return float_type_node;
633     case E_DFmode:
634       return double_type_node;
635     default:
636       gcc_unreachable ();
637     }
638 #undef QUAL_TYPE
639 }
640 
641 static tree
aarch64_lookup_simd_builtin_type(machine_mode mode,enum aarch64_type_qualifiers q)642 aarch64_lookup_simd_builtin_type (machine_mode mode,
643 				  enum aarch64_type_qualifiers q)
644 {
645   int i;
646   int nelts = sizeof (aarch64_simd_types) / sizeof (aarch64_simd_types[0]);
647 
648   /* Non-poly scalar modes map to standard types not in the table.  */
649   if (q != qualifier_poly && !VECTOR_MODE_P (mode))
650     return aarch64_simd_builtin_std_type (mode, q);
651 
652   for (i = 0; i < nelts; i++)
653     if (aarch64_simd_types[i].mode == mode
654 	&& aarch64_simd_types[i].q == q)
655       return aarch64_simd_types[i].itype;
656 
657   return NULL_TREE;
658 }
659 
660 static tree
aarch64_simd_builtin_type(machine_mode mode,bool unsigned_p,bool poly_p)661 aarch64_simd_builtin_type (machine_mode mode,
662 			   bool unsigned_p, bool poly_p)
663 {
664   if (poly_p)
665     return aarch64_lookup_simd_builtin_type (mode, qualifier_poly);
666   else if (unsigned_p)
667     return aarch64_lookup_simd_builtin_type (mode, qualifier_unsigned);
668   else
669     return aarch64_lookup_simd_builtin_type (mode, qualifier_none);
670 }
671 
672 static void
aarch64_init_simd_builtin_types(void)673 aarch64_init_simd_builtin_types (void)
674 {
675   int i;
676   int nelts = sizeof (aarch64_simd_types) / sizeof (aarch64_simd_types[0]);
677   tree tdecl;
678 
679   /* Init all the element types built by the front-end.  */
680   aarch64_simd_types[Int8x8_t].eltype = intQI_type_node;
681   aarch64_simd_types[Int8x16_t].eltype = intQI_type_node;
682   aarch64_simd_types[Int16x4_t].eltype = intHI_type_node;
683   aarch64_simd_types[Int16x8_t].eltype = intHI_type_node;
684   aarch64_simd_types[Int32x2_t].eltype = intSI_type_node;
685   aarch64_simd_types[Int32x4_t].eltype = intSI_type_node;
686   aarch64_simd_types[Int64x1_t].eltype = intDI_type_node;
687   aarch64_simd_types[Int64x2_t].eltype = intDI_type_node;
688   aarch64_simd_types[Uint8x8_t].eltype = unsigned_intQI_type_node;
689   aarch64_simd_types[Uint8x16_t].eltype = unsigned_intQI_type_node;
690   aarch64_simd_types[Uint16x4_t].eltype = unsigned_intHI_type_node;
691   aarch64_simd_types[Uint16x8_t].eltype = unsigned_intHI_type_node;
692   aarch64_simd_types[Uint32x2_t].eltype = unsigned_intSI_type_node;
693   aarch64_simd_types[Uint32x4_t].eltype = unsigned_intSI_type_node;
694   aarch64_simd_types[Uint64x1_t].eltype = unsigned_intDI_type_node;
695   aarch64_simd_types[Uint64x2_t].eltype = unsigned_intDI_type_node;
696 
697   /* Poly types are a world of their own.  */
698   aarch64_simd_types[Poly8_t].eltype = aarch64_simd_types[Poly8_t].itype =
699     build_distinct_type_copy (unsigned_intQI_type_node);
700   /* Prevent front-ends from transforming Poly8_t arrays into string
701      literals.  */
702   TYPE_STRING_FLAG (aarch64_simd_types[Poly8_t].eltype) = false;
703 
704   aarch64_simd_types[Poly16_t].eltype = aarch64_simd_types[Poly16_t].itype =
705     build_distinct_type_copy (unsigned_intHI_type_node);
706   aarch64_simd_types[Poly64_t].eltype = aarch64_simd_types[Poly64_t].itype =
707     build_distinct_type_copy (unsigned_intDI_type_node);
708   aarch64_simd_types[Poly128_t].eltype = aarch64_simd_types[Poly128_t].itype =
709     build_distinct_type_copy (unsigned_intTI_type_node);
710   /* Init poly vector element types with scalar poly types.  */
711   aarch64_simd_types[Poly8x8_t].eltype = aarch64_simd_types[Poly8_t].itype;
712   aarch64_simd_types[Poly8x16_t].eltype = aarch64_simd_types[Poly8_t].itype;
713   aarch64_simd_types[Poly16x4_t].eltype = aarch64_simd_types[Poly16_t].itype;
714   aarch64_simd_types[Poly16x8_t].eltype = aarch64_simd_types[Poly16_t].itype;
715   aarch64_simd_types[Poly64x1_t].eltype = aarch64_simd_types[Poly64_t].itype;
716   aarch64_simd_types[Poly64x2_t].eltype = aarch64_simd_types[Poly64_t].itype;
717 
718   /* Continue with standard types.  */
719   aarch64_simd_types[Float16x4_t].eltype = aarch64_fp16_type_node;
720   aarch64_simd_types[Float16x8_t].eltype = aarch64_fp16_type_node;
721   aarch64_simd_types[Float32x2_t].eltype = float_type_node;
722   aarch64_simd_types[Float32x4_t].eltype = float_type_node;
723   aarch64_simd_types[Float64x1_t].eltype = double_type_node;
724   aarch64_simd_types[Float64x2_t].eltype = double_type_node;
725 
726   for (i = 0; i < nelts; i++)
727     {
728       tree eltype = aarch64_simd_types[i].eltype;
729       machine_mode mode = aarch64_simd_types[i].mode;
730 
731       if (aarch64_simd_types[i].itype == NULL)
732 	{
733 	  aarch64_simd_types[i].itype
734 	    = build_distinct_type_copy
735 	      (build_vector_type (eltype, GET_MODE_NUNITS (mode)));
736 	  SET_TYPE_STRUCTURAL_EQUALITY (aarch64_simd_types[i].itype);
737 	}
738 
739       tdecl = add_builtin_type (aarch64_simd_types[i].name,
740 				aarch64_simd_types[i].itype);
741       TYPE_NAME (aarch64_simd_types[i].itype) = tdecl;
742     }
743 
744 #define AARCH64_BUILD_SIGNED_TYPE(mode)  \
745   make_signed_type (GET_MODE_PRECISION (mode));
746   aarch64_simd_intOI_type_node = AARCH64_BUILD_SIGNED_TYPE (OImode);
747   aarch64_simd_intCI_type_node = AARCH64_BUILD_SIGNED_TYPE (CImode);
748   aarch64_simd_intXI_type_node = AARCH64_BUILD_SIGNED_TYPE (XImode);
749 #undef AARCH64_BUILD_SIGNED_TYPE
750 
751   tdecl = add_builtin_type
752 	    ("__builtin_aarch64_simd_oi" , aarch64_simd_intOI_type_node);
753   TYPE_NAME (aarch64_simd_intOI_type_node) = tdecl;
754   tdecl = add_builtin_type
755 	    ("__builtin_aarch64_simd_ci" , aarch64_simd_intCI_type_node);
756   TYPE_NAME (aarch64_simd_intCI_type_node) = tdecl;
757   tdecl = add_builtin_type
758 	    ("__builtin_aarch64_simd_xi" , aarch64_simd_intXI_type_node);
759   TYPE_NAME (aarch64_simd_intXI_type_node) = tdecl;
760 }
761 
762 static void
aarch64_init_simd_builtin_scalar_types(void)763 aarch64_init_simd_builtin_scalar_types (void)
764 {
765   /* Define typedefs for all the standard scalar types.  */
766   (*lang_hooks.types.register_builtin_type) (intQI_type_node,
767 					     "__builtin_aarch64_simd_qi");
768   (*lang_hooks.types.register_builtin_type) (intHI_type_node,
769 					     "__builtin_aarch64_simd_hi");
770   (*lang_hooks.types.register_builtin_type) (aarch64_fp16_type_node,
771 					     "__builtin_aarch64_simd_hf");
772   (*lang_hooks.types.register_builtin_type) (intSI_type_node,
773 					     "__builtin_aarch64_simd_si");
774   (*lang_hooks.types.register_builtin_type) (float_type_node,
775 					     "__builtin_aarch64_simd_sf");
776   (*lang_hooks.types.register_builtin_type) (intDI_type_node,
777 					     "__builtin_aarch64_simd_di");
778   (*lang_hooks.types.register_builtin_type) (double_type_node,
779 					     "__builtin_aarch64_simd_df");
780   (*lang_hooks.types.register_builtin_type) (unsigned_intQI_type_node,
781 					     "__builtin_aarch64_simd_poly8");
782   (*lang_hooks.types.register_builtin_type) (unsigned_intHI_type_node,
783 					     "__builtin_aarch64_simd_poly16");
784   (*lang_hooks.types.register_builtin_type) (unsigned_intDI_type_node,
785 					     "__builtin_aarch64_simd_poly64");
786   (*lang_hooks.types.register_builtin_type) (unsigned_intTI_type_node,
787 					     "__builtin_aarch64_simd_poly128");
788   (*lang_hooks.types.register_builtin_type) (intTI_type_node,
789 					     "__builtin_aarch64_simd_ti");
790   /* Unsigned integer types for various mode sizes.  */
791   (*lang_hooks.types.register_builtin_type) (unsigned_intQI_type_node,
792 					     "__builtin_aarch64_simd_uqi");
793   (*lang_hooks.types.register_builtin_type) (unsigned_intHI_type_node,
794 					     "__builtin_aarch64_simd_uhi");
795   (*lang_hooks.types.register_builtin_type) (unsigned_intSI_type_node,
796 					     "__builtin_aarch64_simd_usi");
797   (*lang_hooks.types.register_builtin_type) (unsigned_intDI_type_node,
798 					     "__builtin_aarch64_simd_udi");
799 }
800 
801 static bool aarch64_simd_builtins_initialized_p = false;
802 
803 /* Due to the architecture not providing lane variant of the lane instructions
804    for fcmla we can't use the standard simd builtin expansion code, but we
805    still want the majority of the validation that would normally be done.  */
806 
807 void
aarch64_init_fcmla_laneq_builtins(void)808 aarch64_init_fcmla_laneq_builtins (void)
809 {
810   unsigned int i = 0;
811 
812   for (i = 0; i < ARRAY_SIZE (aarch64_fcmla_lane_builtin_data); ++i)
813     {
814       aarch64_fcmla_laneq_builtin_datum* d
815 	= &aarch64_fcmla_lane_builtin_data[i];
816       tree argtype = aarch64_lookup_simd_builtin_type (d->mode, qualifier_none);
817       machine_mode quadmode = GET_MODE_2XWIDER_MODE (d->mode).require ();
818       tree quadtype
819 	= aarch64_lookup_simd_builtin_type (quadmode, qualifier_none);
820       tree lanetype
821 	= aarch64_simd_builtin_std_type (SImode, qualifier_lane_pair_index);
822       tree ftype = build_function_type_list (argtype, argtype, argtype,
823 					     quadtype, lanetype, NULL_TREE);
824       tree fndecl = add_builtin_function (d->name, ftype, d->fcode,
825 					  BUILT_IN_MD, NULL, NULL_TREE);
826 
827       aarch64_builtin_decls[d->fcode] = fndecl;
828     }
829 }
830 
831 void
aarch64_init_simd_builtins(void)832 aarch64_init_simd_builtins (void)
833 {
834   unsigned int i, fcode = AARCH64_SIMD_PATTERN_START;
835 
836   if (aarch64_simd_builtins_initialized_p)
837     return;
838 
839   aarch64_simd_builtins_initialized_p = true;
840 
841   aarch64_init_simd_builtin_types ();
842 
843   /* Strong-typing hasn't been implemented for all AdvSIMD builtin intrinsics.
844      Therefore we need to preserve the old __builtin scalar types.  It can be
845      removed once all the intrinsics become strongly typed using the qualifier
846      system.  */
847   aarch64_init_simd_builtin_scalar_types ();
848 
849   tree lane_check_fpr = build_function_type_list (void_type_node,
850 						  size_type_node,
851 						  size_type_node,
852 						  intSI_type_node,
853 						  NULL);
854   aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_LANE_CHECK] =
855       add_builtin_function ("__builtin_aarch64_im_lane_boundsi", lane_check_fpr,
856 			    AARCH64_SIMD_BUILTIN_LANE_CHECK, BUILT_IN_MD,
857 			    NULL, NULL_TREE);
858 
859   for (i = 0; i < ARRAY_SIZE (aarch64_simd_builtin_data); i++, fcode++)
860     {
861       bool print_type_signature_p = false;
862       char type_signature[SIMD_MAX_BUILTIN_ARGS + 1] = { 0 };
863       aarch64_simd_builtin_datum *d = &aarch64_simd_builtin_data[i];
864       char namebuf[60];
865       tree ftype = NULL;
866       tree fndecl = NULL;
867 
868       d->fcode = fcode;
869 
870       /* We must track two variables here.  op_num is
871 	 the operand number as in the RTL pattern.  This is
872 	 required to access the mode (e.g. V4SF mode) of the
873 	 argument, from which the base type can be derived.
874 	 arg_num is an index in to the qualifiers data, which
875 	 gives qualifiers to the type (e.g. const unsigned).
876 	 The reason these two variables may differ by one is the
877 	 void return type.  While all return types take the 0th entry
878 	 in the qualifiers array, there is no operand for them in the
879 	 RTL pattern.  */
880       int op_num = insn_data[d->code].n_operands - 1;
881       int arg_num = d->qualifiers[0] & qualifier_void
882 		      ? op_num + 1
883 		      : op_num;
884       tree return_type = void_type_node, args = void_list_node;
885       tree eltype;
886 
887       /* Build a function type directly from the insn_data for this
888 	 builtin.  The build_function_type () function takes care of
889 	 removing duplicates for us.  */
890       for (; op_num >= 0; arg_num--, op_num--)
891 	{
892 	  machine_mode op_mode = insn_data[d->code].operand[op_num].mode;
893 	  enum aarch64_type_qualifiers qualifiers = d->qualifiers[arg_num];
894 
895 	  if (qualifiers & qualifier_unsigned)
896 	    {
897 	      type_signature[op_num] = 'u';
898 	      print_type_signature_p = true;
899 	    }
900 	  else if (qualifiers & qualifier_poly)
901 	    {
902 	      type_signature[op_num] = 'p';
903 	      print_type_signature_p = true;
904 	    }
905 	  else
906 	    type_signature[op_num] = 's';
907 
908 	  /* Skip an internal operand for vget_{low, high}.  */
909 	  if (qualifiers & qualifier_internal)
910 	    continue;
911 
912 	  /* Some builtins have different user-facing types
913 	     for certain arguments, encoded in d->mode.  */
914 	  if (qualifiers & qualifier_map_mode)
915 	      op_mode = d->mode;
916 
917 	  /* For pointers, we want a pointer to the basic type
918 	     of the vector.  */
919 	  if (qualifiers & qualifier_pointer && VECTOR_MODE_P (op_mode))
920 	    op_mode = GET_MODE_INNER (op_mode);
921 
922 	  eltype = aarch64_simd_builtin_type
923 		     (op_mode,
924 		      (qualifiers & qualifier_unsigned) != 0,
925 		      (qualifiers & qualifier_poly) != 0);
926 	  gcc_assert (eltype != NULL);
927 
928 	  /* Add qualifiers.  */
929 	  if (qualifiers & qualifier_const)
930 	    eltype = build_qualified_type (eltype, TYPE_QUAL_CONST);
931 
932 	  if (qualifiers & qualifier_pointer)
933 	      eltype = build_pointer_type (eltype);
934 
935 	  /* If we have reached arg_num == 0, we are at a non-void
936 	     return type.  Otherwise, we are still processing
937 	     arguments.  */
938 	  if (arg_num == 0)
939 	    return_type = eltype;
940 	  else
941 	    args = tree_cons (NULL_TREE, eltype, args);
942 	}
943 
944       ftype = build_function_type (return_type, args);
945 
946       gcc_assert (ftype != NULL);
947 
948       if (print_type_signature_p)
949 	snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s_%s",
950 		  d->name, type_signature);
951       else
952 	snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s",
953 		  d->name);
954 
955       fndecl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD,
956 				     NULL, NULL_TREE);
957       aarch64_builtin_decls[fcode] = fndecl;
958     }
959 
960    /* Initialize the remaining fcmla_laneq intrinsics.  */
961    aarch64_init_fcmla_laneq_builtins ();
962 }
963 
964 static void
aarch64_init_crc32_builtins()965 aarch64_init_crc32_builtins ()
966 {
967   tree usi_type = aarch64_simd_builtin_std_type (SImode, qualifier_unsigned);
968   unsigned int i = 0;
969 
970   for (i = 0; i < ARRAY_SIZE (aarch64_crc_builtin_data); ++i)
971     {
972       aarch64_crc_builtin_datum* d = &aarch64_crc_builtin_data[i];
973       tree argtype = aarch64_simd_builtin_std_type (d->mode,
974 						    qualifier_unsigned);
975       tree ftype = build_function_type_list (usi_type, usi_type, argtype, NULL_TREE);
976       tree fndecl = add_builtin_function (d->name, ftype, d->fcode,
977                                           BUILT_IN_MD, NULL, NULL_TREE);
978 
979       aarch64_builtin_decls[d->fcode] = fndecl;
980     }
981 }
982 
983 /* Add builtins for reciprocal square root.  */
984 
985 void
aarch64_init_builtin_rsqrt(void)986 aarch64_init_builtin_rsqrt (void)
987 {
988   tree fndecl = NULL;
989   tree ftype = NULL;
990 
991   tree V2SF_type_node = build_vector_type (float_type_node, 2);
992   tree V2DF_type_node = build_vector_type (double_type_node, 2);
993   tree V4SF_type_node = build_vector_type (float_type_node, 4);
994 
995   struct builtin_decls_data
996   {
997     tree type_node;
998     const char *builtin_name;
999     int function_code;
1000   };
1001 
1002   builtin_decls_data bdda[] =
1003   {
1004     { double_type_node, "__builtin_aarch64_rsqrt_df", AARCH64_BUILTIN_RSQRT_DF },
1005     { float_type_node, "__builtin_aarch64_rsqrt_sf", AARCH64_BUILTIN_RSQRT_SF },
1006     { V2DF_type_node, "__builtin_aarch64_rsqrt_v2df", AARCH64_BUILTIN_RSQRT_V2DF },
1007     { V2SF_type_node, "__builtin_aarch64_rsqrt_v2sf", AARCH64_BUILTIN_RSQRT_V2SF },
1008     { V4SF_type_node, "__builtin_aarch64_rsqrt_v4sf", AARCH64_BUILTIN_RSQRT_V4SF }
1009   };
1010 
1011   builtin_decls_data *bdd = bdda;
1012   builtin_decls_data *bdd_end = bdd + (sizeof (bdda) / sizeof (builtin_decls_data));
1013 
1014   for (; bdd < bdd_end; bdd++)
1015   {
1016     ftype = build_function_type_list (bdd->type_node, bdd->type_node, NULL_TREE);
1017     fndecl = add_builtin_function (bdd->builtin_name,
1018       ftype, bdd->function_code, BUILT_IN_MD, NULL, NULL_TREE);
1019     aarch64_builtin_decls[bdd->function_code] = fndecl;
1020   }
1021 }
1022 
1023 /* Initialize the backend types that support the user-visible __fp16
1024    type, also initialize a pointer to that type, to be used when
1025    forming HFAs.  */
1026 
1027 static void
aarch64_init_fp16_types(void)1028 aarch64_init_fp16_types (void)
1029 {
1030   aarch64_fp16_type_node = make_node (REAL_TYPE);
1031   TYPE_PRECISION (aarch64_fp16_type_node) = 16;
1032   layout_type (aarch64_fp16_type_node);
1033 
1034   (*lang_hooks.types.register_builtin_type) (aarch64_fp16_type_node, "__fp16");
1035   aarch64_fp16_ptr_type_node = build_pointer_type (aarch64_fp16_type_node);
1036 }
1037 
1038 /* Pointer authentication builtins that will become NOP on legacy platform.
1039    Currently, these builtins are for internal use only (libgcc EH unwinder).  */
1040 
1041 void
aarch64_init_pauth_hint_builtins(void)1042 aarch64_init_pauth_hint_builtins (void)
1043 {
1044   /* Pointer Authentication builtins.  */
1045   tree ftype_pointer_auth
1046     = build_function_type_list (ptr_type_node, ptr_type_node,
1047 				unsigned_intDI_type_node, NULL_TREE);
1048   tree ftype_pointer_strip
1049     = build_function_type_list (ptr_type_node, ptr_type_node, NULL_TREE);
1050 
1051   aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_AUTIA1716]
1052     = add_builtin_function ("__builtin_aarch64_autia1716", ftype_pointer_auth,
1053 			    AARCH64_PAUTH_BUILTIN_AUTIA1716, BUILT_IN_MD, NULL,
1054 			    NULL_TREE);
1055   aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_PACIA1716]
1056     = add_builtin_function ("__builtin_aarch64_pacia1716", ftype_pointer_auth,
1057 			    AARCH64_PAUTH_BUILTIN_PACIA1716, BUILT_IN_MD, NULL,
1058 			    NULL_TREE);
1059   aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_XPACLRI]
1060     = add_builtin_function ("__builtin_aarch64_xpaclri", ftype_pointer_strip,
1061 			    AARCH64_PAUTH_BUILTIN_XPACLRI, BUILT_IN_MD, NULL,
1062 			    NULL_TREE);
1063 }
1064 
1065 /* Add builtins for Random Number instructions.  */
1066 
1067 static void
aarch64_init_rng_builtins(void)1068 aarch64_init_rng_builtins (void)
1069 {
1070   tree unsigned_ptr_type = build_pointer_type (unsigned_intDI_type_node);
1071   tree ftype
1072     = build_function_type_list (integer_type_node, unsigned_ptr_type, NULL);
1073   aarch64_builtin_decls[AARCH64_BUILTIN_RNG_RNDR]
1074     = add_builtin_function ("__builtin_aarch64_rndr", ftype,
1075 			    AARCH64_BUILTIN_RNG_RNDR, BUILT_IN_MD, NULL,
1076 			    NULL_TREE);
1077   aarch64_builtin_decls[AARCH64_BUILTIN_RNG_RNDRRS]
1078     = add_builtin_function ("__builtin_aarch64_rndrrs", ftype,
1079 			    AARCH64_BUILTIN_RNG_RNDRRS, BUILT_IN_MD, NULL,
1080 			    NULL_TREE);
1081 }
1082 
1083 
1084 void
aarch64_init_builtins(void)1085 aarch64_init_builtins (void)
1086 {
1087   tree ftype_set_fpr
1088     = build_function_type_list (void_type_node, unsigned_type_node, NULL);
1089   tree ftype_get_fpr
1090     = build_function_type_list (unsigned_type_node, NULL);
1091 
1092   aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR]
1093     = add_builtin_function ("__builtin_aarch64_get_fpcr", ftype_get_fpr,
1094 			    AARCH64_BUILTIN_GET_FPCR, BUILT_IN_MD, NULL, NULL_TREE);
1095   aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR]
1096     = add_builtin_function ("__builtin_aarch64_set_fpcr", ftype_set_fpr,
1097 			    AARCH64_BUILTIN_SET_FPCR, BUILT_IN_MD, NULL, NULL_TREE);
1098   aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR]
1099     = add_builtin_function ("__builtin_aarch64_get_fpsr", ftype_get_fpr,
1100 			    AARCH64_BUILTIN_GET_FPSR, BUILT_IN_MD, NULL, NULL_TREE);
1101   aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR]
1102     = add_builtin_function ("__builtin_aarch64_set_fpsr", ftype_set_fpr,
1103 			    AARCH64_BUILTIN_SET_FPSR, BUILT_IN_MD, NULL, NULL_TREE);
1104 
1105   aarch64_init_fp16_types ();
1106 
1107   if (TARGET_SIMD)
1108     aarch64_init_simd_builtins ();
1109 
1110   aarch64_init_crc32_builtins ();
1111   aarch64_init_builtin_rsqrt ();
1112   aarch64_init_rng_builtins ();
1113 
1114   tree ftype_jcvt
1115     = build_function_type_list (intSI_type_node, double_type_node, NULL);
1116   aarch64_builtin_decls[AARCH64_JSCVT]
1117     = add_builtin_function ("__builtin_aarch64_jcvtzs", ftype_jcvt,
1118 			    AARCH64_JSCVT, BUILT_IN_MD, NULL, NULL_TREE);
1119 
1120   /* Initialize pointer authentication builtins which are backed by instructions
1121      in NOP encoding space.
1122 
1123      NOTE: these builtins are supposed to be used by libgcc unwinder only, as
1124      there is no support on return address signing under ILP32, we don't
1125      register them.  */
1126   if (!TARGET_ILP32)
1127     aarch64_init_pauth_hint_builtins ();
1128 }
1129 
1130 tree
aarch64_builtin_decl(unsigned code,bool initialize_p ATTRIBUTE_UNUSED)1131 aarch64_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
1132 {
1133   if (code >= AARCH64_BUILTIN_MAX)
1134     return error_mark_node;
1135 
1136   return aarch64_builtin_decls[code];
1137 }
1138 
1139 typedef enum
1140 {
1141   SIMD_ARG_COPY_TO_REG,
1142   SIMD_ARG_CONSTANT,
1143   SIMD_ARG_LANE_INDEX,
1144   SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX,
1145   SIMD_ARG_LANE_PAIR_INDEX,
1146   SIMD_ARG_STOP
1147 } builtin_simd_arg;
1148 
1149 
1150 static rtx
aarch64_simd_expand_args(rtx target,int icode,int have_retval,tree exp,builtin_simd_arg * args,machine_mode builtin_mode)1151 aarch64_simd_expand_args (rtx target, int icode, int have_retval,
1152 			  tree exp, builtin_simd_arg *args,
1153 			  machine_mode builtin_mode)
1154 {
1155   rtx pat;
1156   rtx op[SIMD_MAX_BUILTIN_ARGS + 1]; /* First element for result operand.  */
1157   int opc = 0;
1158 
1159   if (have_retval)
1160     {
1161       machine_mode tmode = insn_data[icode].operand[0].mode;
1162       if (!target
1163 	  || GET_MODE (target) != tmode
1164 	  || !(*insn_data[icode].operand[0].predicate) (target, tmode))
1165 	target = gen_reg_rtx (tmode);
1166       op[opc++] = target;
1167     }
1168 
1169   for (;;)
1170     {
1171       builtin_simd_arg thisarg = args[opc - have_retval];
1172 
1173       if (thisarg == SIMD_ARG_STOP)
1174 	break;
1175       else
1176 	{
1177 	  tree arg = CALL_EXPR_ARG (exp, opc - have_retval);
1178 	  machine_mode mode = insn_data[icode].operand[opc].mode;
1179 	  op[opc] = expand_normal (arg);
1180 
1181 	  switch (thisarg)
1182 	    {
1183 	    case SIMD_ARG_COPY_TO_REG:
1184 	      if (POINTER_TYPE_P (TREE_TYPE (arg)))
1185 		op[opc] = convert_memory_address (Pmode, op[opc]);
1186 	      /*gcc_assert (GET_MODE (op[opc]) == mode); */
1187 	      if (!(*insn_data[icode].operand[opc].predicate)
1188 		  (op[opc], mode))
1189 		op[opc] = copy_to_mode_reg (mode, op[opc]);
1190 	      break;
1191 
1192 	    case SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX:
1193 	      gcc_assert (opc > 1);
1194 	      if (CONST_INT_P (op[opc]))
1195 		{
1196 		  unsigned int nunits
1197 		    = GET_MODE_NUNITS (builtin_mode).to_constant ();
1198 		  aarch64_simd_lane_bounds (op[opc], 0, nunits, exp);
1199 		  /* Keep to GCC-vector-extension lane indices in the RTL.  */
1200 		  op[opc] = aarch64_endian_lane_rtx (builtin_mode,
1201 						     INTVAL (op[opc]));
1202 		}
1203 	      goto constant_arg;
1204 
1205 	    case SIMD_ARG_LANE_INDEX:
1206 	      /* Must be a previous operand into which this is an index.  */
1207 	      gcc_assert (opc > 0);
1208 	      if (CONST_INT_P (op[opc]))
1209 		{
1210 		  machine_mode vmode = insn_data[icode].operand[opc - 1].mode;
1211 		  unsigned int nunits
1212 		    = GET_MODE_NUNITS (vmode).to_constant ();
1213 		  aarch64_simd_lane_bounds (op[opc], 0, nunits, exp);
1214 		  /* Keep to GCC-vector-extension lane indices in the RTL.  */
1215 		  op[opc] = aarch64_endian_lane_rtx (vmode, INTVAL (op[opc]));
1216 		}
1217 	      /* If the lane index isn't a constant then error out.  */
1218 	      goto constant_arg;
1219 
1220 	    case SIMD_ARG_LANE_PAIR_INDEX:
1221 	      /* Must be a previous operand into which this is an index and
1222 		 index is restricted to nunits / 2.  */
1223 	      gcc_assert (opc > 0);
1224 	      if (CONST_INT_P (op[opc]))
1225 		{
1226 		  machine_mode vmode = insn_data[icode].operand[opc - 1].mode;
1227 		  unsigned int nunits
1228 		    = GET_MODE_NUNITS (vmode).to_constant ();
1229 		  aarch64_simd_lane_bounds (op[opc], 0, nunits / 2, exp);
1230 		  /* Keep to GCC-vector-extension lane indices in the RTL.  */
1231 		  int lane = INTVAL (op[opc]);
1232 		  op[opc] = gen_int_mode (ENDIAN_LANE_N (nunits / 2, lane),
1233 					  SImode);
1234 		}
1235 	      /* Fall through - if the lane index isn't a constant then
1236 		 the next case will error.  */
1237 	      /* FALLTHRU */
1238 	    case SIMD_ARG_CONSTANT:
1239 constant_arg:
1240 	      if (!(*insn_data[icode].operand[opc].predicate)
1241 		  (op[opc], mode))
1242 	      {
1243 		error ("%Kargument %d must be a constant immediate",
1244 		       exp, opc + 1 - have_retval);
1245 		return const0_rtx;
1246 	      }
1247 	      break;
1248 
1249 	    case SIMD_ARG_STOP:
1250 	      gcc_unreachable ();
1251 	    }
1252 
1253 	  opc++;
1254 	}
1255     }
1256 
1257   switch (opc)
1258     {
1259     case 1:
1260       pat = GEN_FCN (icode) (op[0]);
1261       break;
1262 
1263     case 2:
1264       pat = GEN_FCN (icode) (op[0], op[1]);
1265       break;
1266 
1267     case 3:
1268       pat = GEN_FCN (icode) (op[0], op[1], op[2]);
1269       break;
1270 
1271     case 4:
1272       pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
1273       break;
1274 
1275     case 5:
1276       pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
1277       break;
1278 
1279     case 6:
1280       pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
1281       break;
1282 
1283     default:
1284       gcc_unreachable ();
1285     }
1286 
1287   if (!pat)
1288     return NULL_RTX;
1289 
1290   emit_insn (pat);
1291 
1292   return target;
1293 }
1294 
1295 /* Expand an AArch64 AdvSIMD builtin(intrinsic).  */
1296 rtx
aarch64_simd_expand_builtin(int fcode,tree exp,rtx target)1297 aarch64_simd_expand_builtin (int fcode, tree exp, rtx target)
1298 {
1299   if (fcode == AARCH64_SIMD_BUILTIN_LANE_CHECK)
1300     {
1301       rtx totalsize = expand_normal (CALL_EXPR_ARG (exp, 0));
1302       rtx elementsize = expand_normal (CALL_EXPR_ARG (exp, 1));
1303       if (CONST_INT_P (totalsize) && CONST_INT_P (elementsize)
1304 	  && UINTVAL (elementsize) != 0
1305 	  && UINTVAL (totalsize) != 0)
1306 	{
1307 	  rtx lane_idx = expand_normal (CALL_EXPR_ARG (exp, 2));
1308           if (CONST_INT_P (lane_idx))
1309 	    aarch64_simd_lane_bounds (lane_idx, 0,
1310 				      UINTVAL (totalsize)
1311 				       / UINTVAL (elementsize),
1312 				      exp);
1313           else
1314 	    error ("%Klane index must be a constant immediate", exp);
1315 	}
1316       else
1317 	error ("%Ktotal size and element size must be a non-zero constant immediate", exp);
1318       /* Don't generate any RTL.  */
1319       return const0_rtx;
1320     }
1321   aarch64_simd_builtin_datum *d =
1322 		&aarch64_simd_builtin_data[fcode - AARCH64_SIMD_PATTERN_START];
1323   enum insn_code icode = d->code;
1324   builtin_simd_arg args[SIMD_MAX_BUILTIN_ARGS + 1];
1325   int num_args = insn_data[d->code].n_operands;
1326   int is_void = 0;
1327   int k;
1328 
1329   is_void = !!(d->qualifiers[0] & qualifier_void);
1330 
1331   num_args += is_void;
1332 
1333   for (k = 1; k < num_args; k++)
1334     {
1335       /* We have four arrays of data, each indexed in a different fashion.
1336 	 qualifiers - element 0 always describes the function return type.
1337 	 operands - element 0 is either the operand for return value (if
1338 	   the function has a non-void return type) or the operand for the
1339 	   first argument.
1340 	 expr_args - element 0 always holds the first argument.
1341 	 args - element 0 is always used for the return type.  */
1342       int qualifiers_k = k;
1343       int operands_k = k - is_void;
1344       int expr_args_k = k - 1;
1345 
1346       if (d->qualifiers[qualifiers_k] & qualifier_lane_index)
1347 	args[k] = SIMD_ARG_LANE_INDEX;
1348       else if (d->qualifiers[qualifiers_k] & qualifier_lane_pair_index)
1349 	args[k] = SIMD_ARG_LANE_PAIR_INDEX;
1350       else if (d->qualifiers[qualifiers_k] & qualifier_struct_load_store_lane_index)
1351 	args[k] = SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX;
1352       else if (d->qualifiers[qualifiers_k] & qualifier_immediate)
1353 	args[k] = SIMD_ARG_CONSTANT;
1354       else if (d->qualifiers[qualifiers_k] & qualifier_maybe_immediate)
1355 	{
1356 	  rtx arg
1357 	    = expand_normal (CALL_EXPR_ARG (exp,
1358 					    (expr_args_k)));
1359 	  /* Handle constants only if the predicate allows it.  */
1360 	  bool op_const_int_p =
1361 	    (CONST_INT_P (arg)
1362 	     && (*insn_data[icode].operand[operands_k].predicate)
1363 		(arg, insn_data[icode].operand[operands_k].mode));
1364 	  args[k] = op_const_int_p ? SIMD_ARG_CONSTANT : SIMD_ARG_COPY_TO_REG;
1365 	}
1366       else
1367 	args[k] = SIMD_ARG_COPY_TO_REG;
1368 
1369     }
1370   args[k] = SIMD_ARG_STOP;
1371 
1372   /* The interface to aarch64_simd_expand_args expects a 0 if
1373      the function is void, and a 1 if it is not.  */
1374   return aarch64_simd_expand_args
1375 	  (target, icode, !is_void, exp, &args[1], d->mode);
1376 }
1377 
1378 rtx
aarch64_crc32_expand_builtin(int fcode,tree exp,rtx target)1379 aarch64_crc32_expand_builtin (int fcode, tree exp, rtx target)
1380 {
1381   rtx pat;
1382   aarch64_crc_builtin_datum *d
1383     = &aarch64_crc_builtin_data[fcode - (AARCH64_CRC32_BUILTIN_BASE + 1)];
1384   enum insn_code icode = d->icode;
1385   tree arg0 = CALL_EXPR_ARG (exp, 0);
1386   tree arg1 = CALL_EXPR_ARG (exp, 1);
1387   rtx op0 = expand_normal (arg0);
1388   rtx op1 = expand_normal (arg1);
1389   machine_mode tmode = insn_data[icode].operand[0].mode;
1390   machine_mode mode0 = insn_data[icode].operand[1].mode;
1391   machine_mode mode1 = insn_data[icode].operand[2].mode;
1392 
1393   if (! target
1394       || GET_MODE (target) != tmode
1395       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
1396     target = gen_reg_rtx (tmode);
1397 
1398   gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
1399 	      && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
1400 
1401   if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
1402     op0 = copy_to_mode_reg (mode0, op0);
1403   if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
1404     op1 = copy_to_mode_reg (mode1, op1);
1405 
1406   pat = GEN_FCN (icode) (target, op0, op1);
1407   if (!pat)
1408     return NULL_RTX;
1409 
1410   emit_insn (pat);
1411   return target;
1412 }
1413 
1414 /* Function to expand reciprocal square root builtins.  */
1415 
1416 static rtx
aarch64_expand_builtin_rsqrt(int fcode,tree exp,rtx target)1417 aarch64_expand_builtin_rsqrt (int fcode, tree exp, rtx target)
1418 {
1419   tree arg0 = CALL_EXPR_ARG (exp, 0);
1420   rtx op0 = expand_normal (arg0);
1421 
1422   rtx (*gen) (rtx, rtx);
1423 
1424   switch (fcode)
1425     {
1426       case AARCH64_BUILTIN_RSQRT_DF:
1427 	gen = gen_rsqrtdf2;
1428 	break;
1429       case AARCH64_BUILTIN_RSQRT_SF:
1430 	gen = gen_rsqrtsf2;
1431 	break;
1432       case AARCH64_BUILTIN_RSQRT_V2DF:
1433 	gen = gen_rsqrtv2df2;
1434 	break;
1435       case AARCH64_BUILTIN_RSQRT_V2SF:
1436 	gen = gen_rsqrtv2sf2;
1437 	break;
1438       case AARCH64_BUILTIN_RSQRT_V4SF:
1439 	gen = gen_rsqrtv4sf2;
1440 	break;
1441       default: gcc_unreachable ();
1442     }
1443 
1444   if (!target)
1445     target = gen_reg_rtx (GET_MODE (op0));
1446 
1447   emit_insn (gen (target, op0));
1448 
1449   return target;
1450 }
1451 
1452 /* Expand a FCMLA lane expression EXP with code FCODE and
1453    result going to TARGET if that is convenient.  */
1454 
1455 rtx
aarch64_expand_fcmla_builtin(tree exp,rtx target,int fcode)1456 aarch64_expand_fcmla_builtin (tree exp, rtx target, int fcode)
1457 {
1458   int bcode = fcode - AARCH64_SIMD_FCMLA_LANEQ_BUILTIN_BASE - 1;
1459   aarch64_fcmla_laneq_builtin_datum* d
1460     = &aarch64_fcmla_lane_builtin_data[bcode];
1461   machine_mode quadmode = GET_MODE_2XWIDER_MODE (d->mode).require ();
1462   rtx op0 = force_reg (d->mode, expand_normal (CALL_EXPR_ARG (exp, 0)));
1463   rtx op1 = force_reg (d->mode, expand_normal (CALL_EXPR_ARG (exp, 1)));
1464   rtx op2 = force_reg (quadmode, expand_normal (CALL_EXPR_ARG (exp, 2)));
1465   tree tmp = CALL_EXPR_ARG (exp, 3);
1466   rtx lane_idx = expand_expr (tmp, NULL_RTX, VOIDmode, EXPAND_INITIALIZER);
1467 
1468   /* Validate that the lane index is a constant.  */
1469   if (!CONST_INT_P (lane_idx))
1470     {
1471       error ("%Kargument %d must be a constant immediate", exp, 4);
1472       return const0_rtx;
1473     }
1474 
1475   /* Validate that the index is within the expected range.  */
1476   int nunits = GET_MODE_NUNITS (quadmode).to_constant ();
1477   aarch64_simd_lane_bounds (lane_idx, 0, nunits / 2, exp);
1478 
1479   /* Generate the correct register and mode.  */
1480   int lane = INTVAL (lane_idx);
1481 
1482   if (lane < nunits / 4)
1483     op2 = simplify_gen_subreg (d->mode, op2, quadmode,
1484 			       subreg_lowpart_offset (d->mode, quadmode));
1485   else
1486     {
1487       /* Select the upper 64 bits, either a V2SF or V4HF, this however
1488 	 is quite messy, as the operation required even though simple
1489 	 doesn't have a simple RTL pattern, and seems it's quite hard to
1490 	 define using a single RTL pattern.  The target generic version
1491 	 gen_highpart_mode generates code that isn't optimal.  */
1492       rtx temp1 = gen_reg_rtx (d->mode);
1493       rtx temp2 = gen_reg_rtx (DImode);
1494       temp1 = simplify_gen_subreg (d->mode, op2, quadmode,
1495 				   subreg_lowpart_offset (d->mode, quadmode));
1496       temp1 = simplify_gen_subreg (V2DImode, temp1, d->mode, 0);
1497       if (BYTES_BIG_ENDIAN)
1498 	emit_insn (gen_aarch64_get_lanev2di (temp2, temp1, const0_rtx));
1499       else
1500 	emit_insn (gen_aarch64_get_lanev2di (temp2, temp1, const1_rtx));
1501       op2 = simplify_gen_subreg (d->mode, temp2, GET_MODE (temp2), 0);
1502 
1503       /* And recalculate the index.  */
1504       lane -= nunits / 4;
1505     }
1506 
1507   /* Keep to GCC-vector-extension lane indices in the RTL, only nunits / 4
1508      (max nunits in range check) are valid.  Which means only 0-1, so we
1509      only need to know the order in a V2mode.  */
1510   lane_idx = aarch64_endian_lane_rtx (V2DImode, lane);
1511 
1512   if (!target)
1513     target = gen_reg_rtx (d->mode);
1514   else
1515     target = force_reg (d->mode, target);
1516 
1517   rtx pat = NULL_RTX;
1518 
1519   if (d->lane)
1520     pat = GEN_FCN (d->icode) (target, op0, op1, op2, lane_idx);
1521   else
1522     pat = GEN_FCN (d->icode) (target, op0, op1, op2);
1523 
1524   if (!pat)
1525     return NULL_RTX;
1526 
1527   emit_insn (pat);
1528   return target;
1529 }
1530 
1531 /* Expand a random number builtin EXP with code FCODE, putting the result
1532    int TARGET.  If IGNORE is true the return value is ignored.  */
1533 
1534 rtx
aarch64_expand_rng_builtin(tree exp,rtx target,int fcode,int ignore)1535 aarch64_expand_rng_builtin (tree exp, rtx target, int fcode, int ignore)
1536 {
1537   rtx pat;
1538   enum insn_code icode;
1539   if (fcode == AARCH64_BUILTIN_RNG_RNDR)
1540     icode = CODE_FOR_aarch64_rndr;
1541   else if (fcode == AARCH64_BUILTIN_RNG_RNDRRS)
1542     icode = CODE_FOR_aarch64_rndrrs;
1543   else
1544     gcc_unreachable ();
1545 
1546   rtx rand = gen_reg_rtx (DImode);
1547   pat = GEN_FCN (icode) (rand);
1548   if (!pat)
1549     return NULL_RTX;
1550 
1551   tree arg0 = CALL_EXPR_ARG (exp, 0);
1552   rtx res_addr = expand_normal (arg0);
1553   res_addr = convert_memory_address (Pmode, res_addr);
1554   rtx res_mem = gen_rtx_MEM (DImode, res_addr);
1555   emit_insn (pat);
1556   emit_move_insn (res_mem, rand);
1557   /* If the status result is unused don't generate the CSET code.  */
1558   if (ignore)
1559     return target;
1560 
1561   rtx cc_reg = gen_rtx_REG (CC_Zmode, CC_REGNUM);
1562   rtx cmp_rtx = gen_rtx_fmt_ee (EQ, SImode, cc_reg, const0_rtx);
1563   emit_insn (gen_aarch64_cstoresi (target, cmp_rtx, cc_reg));
1564   return target;
1565 }
1566 
1567 /* Expand an expression EXP that calls a built-in function,
1568    with result going to TARGET if that's convenient.  */
1569 rtx
aarch64_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,int ignore ATTRIBUTE_UNUSED)1570 aarch64_expand_builtin (tree exp,
1571 		     rtx target,
1572 		     rtx subtarget ATTRIBUTE_UNUSED,
1573 		     machine_mode mode ATTRIBUTE_UNUSED,
1574 		     int ignore ATTRIBUTE_UNUSED)
1575 {
1576   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
1577   int fcode = DECL_FUNCTION_CODE (fndecl);
1578   int icode;
1579   rtx pat, op0;
1580   tree arg0;
1581 
1582   switch (fcode)
1583     {
1584     case AARCH64_BUILTIN_GET_FPCR:
1585     case AARCH64_BUILTIN_SET_FPCR:
1586     case AARCH64_BUILTIN_GET_FPSR:
1587     case AARCH64_BUILTIN_SET_FPSR:
1588       if ((fcode == AARCH64_BUILTIN_GET_FPCR)
1589 	  || (fcode == AARCH64_BUILTIN_GET_FPSR))
1590 	{
1591 	  icode = (fcode == AARCH64_BUILTIN_GET_FPSR) ?
1592 	    CODE_FOR_get_fpsr : CODE_FOR_get_fpcr;
1593 	  target = gen_reg_rtx (SImode);
1594 	  pat = GEN_FCN (icode) (target);
1595 	}
1596       else
1597 	{
1598 	  target = NULL_RTX;
1599 	  icode = (fcode == AARCH64_BUILTIN_SET_FPSR) ?
1600 	    CODE_FOR_set_fpsr : CODE_FOR_set_fpcr;
1601 	  arg0 = CALL_EXPR_ARG (exp, 0);
1602 	  op0 = force_reg (SImode, expand_normal (arg0));
1603 	  pat = GEN_FCN (icode) (op0);
1604 	}
1605       emit_insn (pat);
1606       return target;
1607 
1608     case AARCH64_PAUTH_BUILTIN_AUTIA1716:
1609     case AARCH64_PAUTH_BUILTIN_PACIA1716:
1610     case AARCH64_PAUTH_BUILTIN_XPACLRI:
1611       arg0 = CALL_EXPR_ARG (exp, 0);
1612       op0 = force_reg (Pmode, expand_normal (arg0));
1613 
1614       if (!target)
1615 	target = gen_reg_rtx (Pmode);
1616       else
1617 	target = force_reg (Pmode, target);
1618 
1619       emit_move_insn (target, op0);
1620 
1621       if (fcode == AARCH64_PAUTH_BUILTIN_XPACLRI)
1622 	{
1623 	  rtx lr = gen_rtx_REG (Pmode, R30_REGNUM);
1624 	  icode = CODE_FOR_xpaclri;
1625 	  emit_move_insn (lr, op0);
1626 	  emit_insn (GEN_FCN (icode) ());
1627 	  emit_move_insn (target, lr);
1628 	}
1629       else
1630 	{
1631 	  tree arg1 = CALL_EXPR_ARG (exp, 1);
1632 	  rtx op1 = force_reg (Pmode, expand_normal (arg1));
1633 	  icode = (fcode == AARCH64_PAUTH_BUILTIN_PACIA1716
1634 		   ? CODE_FOR_paci1716 : CODE_FOR_auti1716);
1635 
1636 	  rtx x16_reg = gen_rtx_REG (Pmode, R16_REGNUM);
1637 	  rtx x17_reg = gen_rtx_REG (Pmode, R17_REGNUM);
1638 	  emit_move_insn (x17_reg, op0);
1639 	  emit_move_insn (x16_reg, op1);
1640 	  emit_insn (GEN_FCN (icode) ());
1641 	  emit_move_insn (target, x17_reg);
1642 	}
1643 
1644       return target;
1645 
1646     case AARCH64_JSCVT:
1647       {
1648 	expand_operand ops[2];
1649 	create_output_operand (&ops[0], target, SImode);
1650 	op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
1651 	create_input_operand (&ops[1], op0, DFmode);
1652 	expand_insn (CODE_FOR_aarch64_fjcvtzs, 2, ops);
1653 	return ops[0].value;
1654       }
1655 
1656     case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V2SF:
1657     case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V2SF:
1658     case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V2SF:
1659     case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ270_V2SF:
1660     case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V4HF:
1661     case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V4HF:
1662     case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V4HF:
1663     case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ270_V4HF:
1664       return aarch64_expand_fcmla_builtin (exp, target, fcode);
1665     case AARCH64_BUILTIN_RNG_RNDR:
1666     case AARCH64_BUILTIN_RNG_RNDRRS:
1667       return aarch64_expand_rng_builtin (exp, target, fcode, ignore);
1668     }
1669 
1670   if (fcode >= AARCH64_SIMD_BUILTIN_BASE && fcode <= AARCH64_SIMD_BUILTIN_MAX)
1671     return aarch64_simd_expand_builtin (fcode, exp, target);
1672   else if (fcode >= AARCH64_CRC32_BUILTIN_BASE && fcode <= AARCH64_CRC32_BUILTIN_MAX)
1673     return aarch64_crc32_expand_builtin (fcode, exp, target);
1674 
1675   if (fcode == AARCH64_BUILTIN_RSQRT_DF
1676       || fcode == AARCH64_BUILTIN_RSQRT_SF
1677       || fcode == AARCH64_BUILTIN_RSQRT_V2DF
1678       || fcode == AARCH64_BUILTIN_RSQRT_V2SF
1679       || fcode == AARCH64_BUILTIN_RSQRT_V4SF)
1680     return aarch64_expand_builtin_rsqrt (fcode, exp, target);
1681 
1682   gcc_unreachable ();
1683 }
1684 
1685 tree
aarch64_builtin_vectorized_function(unsigned int fn,tree type_out,tree type_in)1686 aarch64_builtin_vectorized_function (unsigned int fn, tree type_out,
1687 				     tree type_in)
1688 {
1689   machine_mode in_mode, out_mode;
1690   unsigned HOST_WIDE_INT in_n, out_n;
1691 
1692   if (TREE_CODE (type_out) != VECTOR_TYPE
1693       || TREE_CODE (type_in) != VECTOR_TYPE)
1694     return NULL_TREE;
1695 
1696   out_mode = TYPE_MODE (TREE_TYPE (type_out));
1697   in_mode = TYPE_MODE (TREE_TYPE (type_in));
1698   if (!TYPE_VECTOR_SUBPARTS (type_out).is_constant (&out_n)
1699       || !TYPE_VECTOR_SUBPARTS (type_in).is_constant (&in_n))
1700     return NULL_TREE;
1701 
1702 #undef AARCH64_CHECK_BUILTIN_MODE
1703 #define AARCH64_CHECK_BUILTIN_MODE(C, N) 1
1704 #define AARCH64_FIND_FRINT_VARIANT(N) \
1705   (AARCH64_CHECK_BUILTIN_MODE (2, D) \
1706     ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v2df] \
1707     : (AARCH64_CHECK_BUILTIN_MODE (4, S) \
1708 	? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v4sf] \
1709 	: (AARCH64_CHECK_BUILTIN_MODE (2, S) \
1710 	   ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v2sf] \
1711 	   : NULL_TREE)))
1712   switch (fn)
1713     {
1714 #undef AARCH64_CHECK_BUILTIN_MODE
1715 #define AARCH64_CHECK_BUILTIN_MODE(C, N) \
1716   (out_mode == N##Fmode && out_n == C \
1717    && in_mode == N##Fmode && in_n == C)
1718     CASE_CFN_FLOOR:
1719       return AARCH64_FIND_FRINT_VARIANT (floor);
1720     CASE_CFN_CEIL:
1721       return AARCH64_FIND_FRINT_VARIANT (ceil);
1722     CASE_CFN_TRUNC:
1723       return AARCH64_FIND_FRINT_VARIANT (btrunc);
1724     CASE_CFN_ROUND:
1725       return AARCH64_FIND_FRINT_VARIANT (round);
1726     CASE_CFN_NEARBYINT:
1727       return AARCH64_FIND_FRINT_VARIANT (nearbyint);
1728     CASE_CFN_SQRT:
1729       return AARCH64_FIND_FRINT_VARIANT (sqrt);
1730 #undef AARCH64_CHECK_BUILTIN_MODE
1731 #define AARCH64_CHECK_BUILTIN_MODE(C, N) \
1732   (out_mode == SImode && out_n == C \
1733    && in_mode == N##Imode && in_n == C)
1734     CASE_CFN_CLZ:
1735       {
1736 	if (AARCH64_CHECK_BUILTIN_MODE (4, S))
1737 	  return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_clzv4si];
1738 	return NULL_TREE;
1739       }
1740     CASE_CFN_CTZ:
1741       {
1742 	if (AARCH64_CHECK_BUILTIN_MODE (2, S))
1743 	  return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_ctzv2si];
1744 	else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
1745 	  return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_ctzv4si];
1746 	return NULL_TREE;
1747       }
1748 #undef AARCH64_CHECK_BUILTIN_MODE
1749 #define AARCH64_CHECK_BUILTIN_MODE(C, N) \
1750   (out_mode == N##Imode && out_n == C \
1751    && in_mode == N##Fmode && in_n == C)
1752     CASE_CFN_IFLOOR:
1753     CASE_CFN_LFLOOR:
1754     CASE_CFN_LLFLOOR:
1755       {
1756 	enum aarch64_builtins builtin;
1757 	if (AARCH64_CHECK_BUILTIN_MODE (2, D))
1758 	  builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv2dfv2di;
1759 	else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
1760 	  builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv4sfv4si;
1761 	else if (AARCH64_CHECK_BUILTIN_MODE (2, S))
1762 	  builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv2sfv2si;
1763 	else
1764 	  return NULL_TREE;
1765 
1766 	return aarch64_builtin_decls[builtin];
1767       }
1768     CASE_CFN_ICEIL:
1769     CASE_CFN_LCEIL:
1770     CASE_CFN_LLCEIL:
1771       {
1772 	enum aarch64_builtins builtin;
1773 	if (AARCH64_CHECK_BUILTIN_MODE (2, D))
1774 	  builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv2dfv2di;
1775 	else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
1776 	  builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv4sfv4si;
1777 	else if (AARCH64_CHECK_BUILTIN_MODE (2, S))
1778 	  builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv2sfv2si;
1779 	else
1780 	  return NULL_TREE;
1781 
1782 	return aarch64_builtin_decls[builtin];
1783       }
1784     CASE_CFN_IROUND:
1785     CASE_CFN_LROUND:
1786     CASE_CFN_LLROUND:
1787       {
1788 	enum aarch64_builtins builtin;
1789 	if (AARCH64_CHECK_BUILTIN_MODE (2, D))
1790 	  builtin =	AARCH64_SIMD_BUILTIN_UNOP_lroundv2dfv2di;
1791 	else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
1792 	  builtin =	AARCH64_SIMD_BUILTIN_UNOP_lroundv4sfv4si;
1793 	else if (AARCH64_CHECK_BUILTIN_MODE (2, S))
1794 	  builtin =	AARCH64_SIMD_BUILTIN_UNOP_lroundv2sfv2si;
1795 	else
1796 	  return NULL_TREE;
1797 
1798 	return aarch64_builtin_decls[builtin];
1799       }
1800     case CFN_BUILT_IN_BSWAP16:
1801 #undef AARCH64_CHECK_BUILTIN_MODE
1802 #define AARCH64_CHECK_BUILTIN_MODE(C, N) \
1803   (out_mode == N##Imode && out_n == C \
1804    && in_mode == N##Imode && in_n == C)
1805       if (AARCH64_CHECK_BUILTIN_MODE (4, H))
1806 	return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv4hi];
1807       else if (AARCH64_CHECK_BUILTIN_MODE (8, H))
1808 	return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv8hi];
1809       else
1810 	return NULL_TREE;
1811     case CFN_BUILT_IN_BSWAP32:
1812       if (AARCH64_CHECK_BUILTIN_MODE (2, S))
1813 	return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv2si];
1814       else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
1815 	return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv4si];
1816       else
1817 	return NULL_TREE;
1818     case CFN_BUILT_IN_BSWAP64:
1819       if (AARCH64_CHECK_BUILTIN_MODE (2, D))
1820 	return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv2di];
1821       else
1822 	return NULL_TREE;
1823     default:
1824       return NULL_TREE;
1825     }
1826 
1827   return NULL_TREE;
1828 }
1829 
1830 /* Return builtin for reciprocal square root.  */
1831 
1832 tree
aarch64_builtin_rsqrt(unsigned int fn)1833 aarch64_builtin_rsqrt (unsigned int fn)
1834 {
1835   if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv2df)
1836     return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V2DF];
1837   if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv2sf)
1838     return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V2SF];
1839   if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv4sf)
1840     return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V4SF];
1841   return NULL_TREE;
1842 }
1843 
1844 #undef VAR1
1845 #define VAR1(T, N, MAP, A) \
1846   case AARCH64_SIMD_BUILTIN_##T##_##N##A:
1847 
1848 tree
aarch64_fold_builtin(tree fndecl,int n_args ATTRIBUTE_UNUSED,tree * args,bool ignore ATTRIBUTE_UNUSED)1849 aarch64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *args,
1850 		      bool ignore ATTRIBUTE_UNUSED)
1851 {
1852   int fcode = DECL_FUNCTION_CODE (fndecl);
1853   tree type = TREE_TYPE (TREE_TYPE (fndecl));
1854 
1855   switch (fcode)
1856     {
1857       BUILTIN_VDQF (UNOP, abs, 2)
1858 	return fold_build1 (ABS_EXPR, type, args[0]);
1859       VAR1 (UNOP, floatv2si, 2, v2sf)
1860       VAR1 (UNOP, floatv4si, 2, v4sf)
1861       VAR1 (UNOP, floatv2di, 2, v2df)
1862 	return fold_build1 (FLOAT_EXPR, type, args[0]);
1863       default:
1864 	break;
1865     }
1866 
1867   return NULL_TREE;
1868 }
1869 
1870 bool
aarch64_gimple_fold_builtin(gimple_stmt_iterator * gsi)1871 aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi)
1872 {
1873   bool changed = false;
1874   gimple *stmt = gsi_stmt (*gsi);
1875   tree call = gimple_call_fn (stmt);
1876   tree fndecl;
1877   gimple *new_stmt = NULL;
1878 
1879   if (call)
1880     {
1881       fndecl = gimple_call_fndecl (stmt);
1882       if (fndecl)
1883 	{
1884 	  int fcode = DECL_FUNCTION_CODE (fndecl);
1885 	  unsigned nargs = gimple_call_num_args (stmt);
1886 	  tree *args = (nargs > 0
1887 			? gimple_call_arg_ptr (stmt, 0)
1888 			: &error_mark_node);
1889 
1890 	  /* We use gimple's IFN_REDUC_(PLUS|MIN|MAX)s for float, signed int
1891 	     and unsigned int; it will distinguish according to the types of
1892 	     the arguments to the __builtin.  */
1893 	  switch (fcode)
1894 	    {
1895 	      BUILTIN_VALL (UNOP, reduc_plus_scal_, 10)
1896 	        new_stmt = gimple_build_call_internal (IFN_REDUC_PLUS,
1897 						       1, args[0]);
1898 		gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
1899 		break;
1900 	      BUILTIN_VDQIF (UNOP, reduc_smax_scal_, 10)
1901 	      BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10)
1902 	        new_stmt = gimple_build_call_internal (IFN_REDUC_MAX,
1903 						       1, args[0]);
1904 		gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
1905 		break;
1906 	      BUILTIN_VDQIF (UNOP, reduc_smin_scal_, 10)
1907 	      BUILTIN_VDQ_BHSI (UNOPU, reduc_umin_scal_, 10)
1908 	        new_stmt = gimple_build_call_internal (IFN_REDUC_MIN,
1909 						       1, args[0]);
1910 		gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
1911 		break;
1912 	      BUILTIN_GPF (BINOP, fmulx, 0)
1913 		{
1914 		  gcc_assert (nargs == 2);
1915 		  bool a0_cst_p = TREE_CODE (args[0]) == REAL_CST;
1916 		  bool a1_cst_p = TREE_CODE (args[1]) == REAL_CST;
1917 		  if (a0_cst_p || a1_cst_p)
1918 		    {
1919 		      if (a0_cst_p && a1_cst_p)
1920 			{
1921 			  tree t0 = TREE_TYPE (args[0]);
1922 			  real_value a0 = (TREE_REAL_CST (args[0]));
1923 			  real_value a1 = (TREE_REAL_CST (args[1]));
1924 			  if (real_equal (&a1, &dconst0))
1925 			    std::swap (a0, a1);
1926 			  /* According to real_equal (), +0 equals -0.  */
1927 			  if (real_equal (&a0, &dconst0) && real_isinf (&a1))
1928 			    {
1929 			      real_value res = dconst2;
1930 			      res.sign = a0.sign ^ a1.sign;
1931 			      new_stmt =
1932 				gimple_build_assign (gimple_call_lhs (stmt),
1933 						     REAL_CST,
1934 						     build_real (t0, res));
1935 			    }
1936 			  else
1937 			    new_stmt =
1938 			      gimple_build_assign (gimple_call_lhs (stmt),
1939 						   MULT_EXPR,
1940 						   args[0], args[1]);
1941 			}
1942 		      else /* a0_cst_p ^ a1_cst_p.  */
1943 			{
1944 			  real_value const_part = a0_cst_p
1945 			    ? TREE_REAL_CST (args[0]) : TREE_REAL_CST (args[1]);
1946 			  if (!real_equal (&const_part, &dconst0)
1947 			      && !real_isinf (&const_part))
1948 			    new_stmt =
1949 			      gimple_build_assign (gimple_call_lhs (stmt),
1950 						   MULT_EXPR, args[0], args[1]);
1951 			}
1952 		    }
1953 		  if (new_stmt)
1954 		    {
1955 		      gimple_set_vuse (new_stmt, gimple_vuse (stmt));
1956 		      gimple_set_vdef (new_stmt, gimple_vdef (stmt));
1957 		    }
1958 		  break;
1959 		}
1960 	    default:
1961 	      break;
1962 	    }
1963 	}
1964     }
1965 
1966   if (new_stmt)
1967     {
1968       gsi_replace (gsi, new_stmt, true);
1969       changed = true;
1970     }
1971 
1972   return changed;
1973 }
1974 
1975 void
aarch64_atomic_assign_expand_fenv(tree * hold,tree * clear,tree * update)1976 aarch64_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
1977 {
1978   const unsigned AARCH64_FE_INVALID = 1;
1979   const unsigned AARCH64_FE_DIVBYZERO = 2;
1980   const unsigned AARCH64_FE_OVERFLOW = 4;
1981   const unsigned AARCH64_FE_UNDERFLOW = 8;
1982   const unsigned AARCH64_FE_INEXACT = 16;
1983   const unsigned HOST_WIDE_INT AARCH64_FE_ALL_EXCEPT = (AARCH64_FE_INVALID
1984 							| AARCH64_FE_DIVBYZERO
1985 							| AARCH64_FE_OVERFLOW
1986 							| AARCH64_FE_UNDERFLOW
1987 							| AARCH64_FE_INEXACT);
1988   const unsigned HOST_WIDE_INT AARCH64_FE_EXCEPT_SHIFT = 8;
1989   tree fenv_cr, fenv_sr, get_fpcr, set_fpcr, mask_cr, mask_sr;
1990   tree ld_fenv_cr, ld_fenv_sr, masked_fenv_cr, masked_fenv_sr, hold_fnclex_cr;
1991   tree hold_fnclex_sr, new_fenv_var, reload_fenv, restore_fnenv, get_fpsr, set_fpsr;
1992   tree update_call, atomic_feraiseexcept, hold_fnclex, masked_fenv, ld_fenv;
1993 
1994   /* Generate the equivalence of :
1995        unsigned int fenv_cr;
1996        fenv_cr = __builtin_aarch64_get_fpcr ();
1997 
1998        unsigned int fenv_sr;
1999        fenv_sr = __builtin_aarch64_get_fpsr ();
2000 
2001        Now set all exceptions to non-stop
2002        unsigned int mask_cr
2003 		= ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT);
2004        unsigned int masked_cr;
2005        masked_cr = fenv_cr & mask_cr;
2006 
2007        And clear all exception flags
2008        unsigned int maske_sr = ~AARCH64_FE_ALL_EXCEPT;
2009        unsigned int masked_cr;
2010        masked_sr = fenv_sr & mask_sr;
2011 
2012        __builtin_aarch64_set_cr (masked_cr);
2013        __builtin_aarch64_set_sr (masked_sr);  */
2014 
2015   fenv_cr = create_tmp_var_raw (unsigned_type_node);
2016   fenv_sr = create_tmp_var_raw (unsigned_type_node);
2017 
2018   get_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR];
2019   set_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR];
2020   get_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR];
2021   set_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR];
2022 
2023   mask_cr = build_int_cst (unsigned_type_node,
2024 			   ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT));
2025   mask_sr = build_int_cst (unsigned_type_node,
2026 			   ~(AARCH64_FE_ALL_EXCEPT));
2027 
2028   ld_fenv_cr = build4 (TARGET_EXPR, unsigned_type_node,
2029 		       fenv_cr, build_call_expr (get_fpcr, 0),
2030 		       NULL_TREE, NULL_TREE);
2031   ld_fenv_sr = build4 (TARGET_EXPR, unsigned_type_node,
2032 		       fenv_sr, build_call_expr (get_fpsr, 0),
2033 		       NULL_TREE, NULL_TREE);
2034 
2035   masked_fenv_cr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_cr, mask_cr);
2036   masked_fenv_sr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_sr, mask_sr);
2037 
2038   hold_fnclex_cr = build_call_expr (set_fpcr, 1, masked_fenv_cr);
2039   hold_fnclex_sr = build_call_expr (set_fpsr, 1, masked_fenv_sr);
2040 
2041   hold_fnclex = build2 (COMPOUND_EXPR, void_type_node, hold_fnclex_cr,
2042 			hold_fnclex_sr);
2043   masked_fenv = build2 (COMPOUND_EXPR, void_type_node, masked_fenv_cr,
2044 			masked_fenv_sr);
2045   ld_fenv = build2 (COMPOUND_EXPR, void_type_node, ld_fenv_cr, ld_fenv_sr);
2046 
2047   *hold = build2 (COMPOUND_EXPR, void_type_node,
2048 		  build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
2049 		  hold_fnclex);
2050 
2051   /* Store the value of masked_fenv to clear the exceptions:
2052      __builtin_aarch64_set_fpsr (masked_fenv_sr);  */
2053 
2054   *clear = build_call_expr (set_fpsr, 1, masked_fenv_sr);
2055 
2056   /* Generate the equivalent of :
2057        unsigned int new_fenv_var;
2058        new_fenv_var = __builtin_aarch64_get_fpsr ();
2059 
2060        __builtin_aarch64_set_fpsr (fenv_sr);
2061 
2062        __atomic_feraiseexcept (new_fenv_var);  */
2063 
2064   new_fenv_var = create_tmp_var_raw (unsigned_type_node);
2065   reload_fenv = build4 (TARGET_EXPR, unsigned_type_node,
2066 			new_fenv_var, build_call_expr (get_fpsr, 0),
2067 			NULL_TREE, NULL_TREE);
2068   restore_fnenv = build_call_expr (set_fpsr, 1, fenv_sr);
2069   atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
2070   update_call = build_call_expr (atomic_feraiseexcept, 1,
2071 				 fold_convert (integer_type_node, new_fenv_var));
2072   *update = build2 (COMPOUND_EXPR, void_type_node,
2073 		    build2 (COMPOUND_EXPR, void_type_node,
2074 			    reload_fenv, restore_fnenv), update_call);
2075 }
2076 
2077 
2078 #undef AARCH64_CHECK_BUILTIN_MODE
2079 #undef AARCH64_FIND_FRINT_VARIANT
2080 #undef CF0
2081 #undef CF1
2082 #undef CF2
2083 #undef CF3
2084 #undef CF4
2085 #undef CF10
2086 #undef VAR1
2087 #undef VAR2
2088 #undef VAR3
2089 #undef VAR4
2090 #undef VAR5
2091 #undef VAR6
2092 #undef VAR7
2093 #undef VAR8
2094 #undef VAR9
2095 #undef VAR10
2096 #undef VAR11
2097 
2098 #include "gt-aarch64-builtins.h"
2099