1 /* Builtins' description for AArch64 SIMD architecture.
2 Copyright (C) 2011-2019 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #define IN_TARGET_CODE 1
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "function.h"
28 #include "basic-block.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "expmed.h"
35 #include "optabs.h"
36 #include "recog.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "explow.h"
41 #include "expr.h"
42 #include "langhooks.h"
43 #include "gimple-iterator.h"
44 #include "case-cfn-macros.h"
45 #include "emit-rtl.h"
46
47 #define v8qi_UP E_V8QImode
48 #define v4hi_UP E_V4HImode
49 #define v4hf_UP E_V4HFmode
50 #define v2si_UP E_V2SImode
51 #define v2sf_UP E_V2SFmode
52 #define v1df_UP E_V1DFmode
53 #define di_UP E_DImode
54 #define df_UP E_DFmode
55 #define v16qi_UP E_V16QImode
56 #define v8hi_UP E_V8HImode
57 #define v8hf_UP E_V8HFmode
58 #define v4si_UP E_V4SImode
59 #define v4sf_UP E_V4SFmode
60 #define v2di_UP E_V2DImode
61 #define v2df_UP E_V2DFmode
62 #define ti_UP E_TImode
63 #define oi_UP E_OImode
64 #define ci_UP E_CImode
65 #define xi_UP E_XImode
66 #define si_UP E_SImode
67 #define sf_UP E_SFmode
68 #define hi_UP E_HImode
69 #define hf_UP E_HFmode
70 #define qi_UP E_QImode
71 #define UP(X) X##_UP
72
73 #define SIMD_MAX_BUILTIN_ARGS 5
74
75 enum aarch64_type_qualifiers
76 {
77 /* T foo. */
78 qualifier_none = 0x0,
79 /* unsigned T foo. */
80 qualifier_unsigned = 0x1, /* 1 << 0 */
81 /* const T foo. */
82 qualifier_const = 0x2, /* 1 << 1 */
83 /* T *foo. */
84 qualifier_pointer = 0x4, /* 1 << 2 */
85 /* Used when expanding arguments if an operand could
86 be an immediate. */
87 qualifier_immediate = 0x8, /* 1 << 3 */
88 qualifier_maybe_immediate = 0x10, /* 1 << 4 */
89 /* void foo (...). */
90 qualifier_void = 0x20, /* 1 << 5 */
91 /* Some patterns may have internal operands, this qualifier is an
92 instruction to the initialisation code to skip this operand. */
93 qualifier_internal = 0x40, /* 1 << 6 */
94 /* Some builtins should use the T_*mode* encoded in a simd_builtin_datum
95 rather than using the type of the operand. */
96 qualifier_map_mode = 0x80, /* 1 << 7 */
97 /* qualifier_pointer | qualifier_map_mode */
98 qualifier_pointer_map_mode = 0x84,
99 /* qualifier_const | qualifier_pointer | qualifier_map_mode */
100 qualifier_const_pointer_map_mode = 0x86,
101 /* Polynomial types. */
102 qualifier_poly = 0x100,
103 /* Lane indices - must be in range, and flipped for bigendian. */
104 qualifier_lane_index = 0x200,
105 /* Lane indices for single lane structure loads and stores. */
106 qualifier_struct_load_store_lane_index = 0x400,
107 /* Lane indices selected in pairs. - must be in range, and flipped for
108 bigendian. */
109 qualifier_lane_pair_index = 0x800,
110 };
111
112 typedef struct
113 {
114 const char *name;
115 machine_mode mode;
116 const enum insn_code code;
117 unsigned int fcode;
118 enum aarch64_type_qualifiers *qualifiers;
119 } aarch64_simd_builtin_datum;
120
121 static enum aarch64_type_qualifiers
122 aarch64_types_unop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
123 = { qualifier_none, qualifier_none };
124 #define TYPES_UNOP (aarch64_types_unop_qualifiers)
125 static enum aarch64_type_qualifiers
126 aarch64_types_unopu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
127 = { qualifier_unsigned, qualifier_unsigned };
128 #define TYPES_UNOPU (aarch64_types_unopu_qualifiers)
129 static enum aarch64_type_qualifiers
130 aarch64_types_unopus_qualifiers[SIMD_MAX_BUILTIN_ARGS]
131 = { qualifier_unsigned, qualifier_none };
132 #define TYPES_UNOPUS (aarch64_types_unopus_qualifiers)
133 static enum aarch64_type_qualifiers
134 aarch64_types_binop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
135 = { qualifier_none, qualifier_none, qualifier_maybe_immediate };
136 #define TYPES_BINOP (aarch64_types_binop_qualifiers)
137 static enum aarch64_type_qualifiers
138 aarch64_types_binopu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
139 = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned };
140 #define TYPES_BINOPU (aarch64_types_binopu_qualifiers)
141 static enum aarch64_type_qualifiers
142 aarch64_types_binop_uus_qualifiers[SIMD_MAX_BUILTIN_ARGS]
143 = { qualifier_unsigned, qualifier_unsigned, qualifier_none };
144 #define TYPES_BINOP_UUS (aarch64_types_binop_uus_qualifiers)
145 static enum aarch64_type_qualifiers
146 aarch64_types_binop_ssu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
147 = { qualifier_none, qualifier_none, qualifier_unsigned };
148 #define TYPES_BINOP_SSU (aarch64_types_binop_ssu_qualifiers)
149 static enum aarch64_type_qualifiers
150 aarch64_types_binop_uss_qualifiers[SIMD_MAX_BUILTIN_ARGS]
151 = { qualifier_unsigned, qualifier_none, qualifier_none };
152 #define TYPES_BINOP_USS (aarch64_types_binop_uss_qualifiers)
153 static enum aarch64_type_qualifiers
154 aarch64_types_binopp_qualifiers[SIMD_MAX_BUILTIN_ARGS]
155 = { qualifier_poly, qualifier_poly, qualifier_poly };
156 #define TYPES_BINOPP (aarch64_types_binopp_qualifiers)
157
158 static enum aarch64_type_qualifiers
159 aarch64_types_ternop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
160 = { qualifier_none, qualifier_none, qualifier_none, qualifier_none };
161 #define TYPES_TERNOP (aarch64_types_ternop_qualifiers)
162 static enum aarch64_type_qualifiers
163 aarch64_types_ternop_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
164 = { qualifier_none, qualifier_none, qualifier_none, qualifier_lane_index };
165 #define TYPES_TERNOP_LANE (aarch64_types_ternop_lane_qualifiers)
166 static enum aarch64_type_qualifiers
167 aarch64_types_ternopu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
168 = { qualifier_unsigned, qualifier_unsigned,
169 qualifier_unsigned, qualifier_unsigned };
170 #define TYPES_TERNOPU (aarch64_types_ternopu_qualifiers)
171 static enum aarch64_type_qualifiers
172 aarch64_types_ternopu_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
173 = { qualifier_unsigned, qualifier_unsigned,
174 qualifier_unsigned, qualifier_immediate };
175 #define TYPES_TERNOPUI (aarch64_types_ternopu_imm_qualifiers)
176
177
178 static enum aarch64_type_qualifiers
179 aarch64_types_quadop_lane_pair_qualifiers[SIMD_MAX_BUILTIN_ARGS]
180 = { qualifier_none, qualifier_none, qualifier_none,
181 qualifier_none, qualifier_lane_pair_index };
182 #define TYPES_QUADOP_LANE_PAIR (aarch64_types_quadop_lane_pair_qualifiers)
183 static enum aarch64_type_qualifiers
184 aarch64_types_quadop_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
185 = { qualifier_none, qualifier_none, qualifier_none,
186 qualifier_none, qualifier_lane_index };
187 #define TYPES_QUADOP_LANE (aarch64_types_quadop_lane_qualifiers)
188 static enum aarch64_type_qualifiers
189 aarch64_types_quadopu_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
190 = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned,
191 qualifier_unsigned, qualifier_lane_index };
192 #define TYPES_QUADOPU_LANE (aarch64_types_quadopu_lane_qualifiers)
193
194 static enum aarch64_type_qualifiers
195 aarch64_types_quadopu_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
196 = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned,
197 qualifier_unsigned, qualifier_immediate };
198 #define TYPES_QUADOPUI (aarch64_types_quadopu_imm_qualifiers)
199
200 static enum aarch64_type_qualifiers
201 aarch64_types_binop_imm_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
202 = { qualifier_poly, qualifier_none, qualifier_immediate };
203 #define TYPES_GETREGP (aarch64_types_binop_imm_p_qualifiers)
204 static enum aarch64_type_qualifiers
205 aarch64_types_binop_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
206 = { qualifier_none, qualifier_none, qualifier_immediate };
207 #define TYPES_GETREG (aarch64_types_binop_imm_qualifiers)
208 #define TYPES_SHIFTIMM (aarch64_types_binop_imm_qualifiers)
209 static enum aarch64_type_qualifiers
210 aarch64_types_shift_to_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS]
211 = { qualifier_unsigned, qualifier_none, qualifier_immediate };
212 #define TYPES_SHIFTIMM_USS (aarch64_types_shift_to_unsigned_qualifiers)
213 static enum aarch64_type_qualifiers
214 aarch64_types_fcvt_from_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS]
215 = { qualifier_none, qualifier_unsigned, qualifier_immediate };
216 #define TYPES_FCVTIMM_SUS (aarch64_types_fcvt_from_unsigned_qualifiers)
217 static enum aarch64_type_qualifiers
218 aarch64_types_unsigned_shift_qualifiers[SIMD_MAX_BUILTIN_ARGS]
219 = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate };
220 #define TYPES_USHIFTIMM (aarch64_types_unsigned_shift_qualifiers)
221
222 static enum aarch64_type_qualifiers
223 aarch64_types_ternop_s_imm_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
224 = { qualifier_none, qualifier_none, qualifier_poly, qualifier_immediate};
225 #define TYPES_SETREGP (aarch64_types_ternop_s_imm_p_qualifiers)
226 static enum aarch64_type_qualifiers
227 aarch64_types_ternop_s_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
228 = { qualifier_none, qualifier_none, qualifier_none, qualifier_immediate};
229 #define TYPES_SETREG (aarch64_types_ternop_s_imm_qualifiers)
230 #define TYPES_SHIFTINSERT (aarch64_types_ternop_s_imm_qualifiers)
231 #define TYPES_SHIFTACC (aarch64_types_ternop_s_imm_qualifiers)
232
233 static enum aarch64_type_qualifiers
234 aarch64_types_ternop_p_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
235 = { qualifier_poly, qualifier_poly, qualifier_poly, qualifier_immediate};
236 #define TYPES_SHIFTINSERTP (aarch64_types_ternop_p_imm_qualifiers)
237
238 static enum aarch64_type_qualifiers
239 aarch64_types_unsigned_shiftacc_qualifiers[SIMD_MAX_BUILTIN_ARGS]
240 = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned,
241 qualifier_immediate };
242 #define TYPES_USHIFTACC (aarch64_types_unsigned_shiftacc_qualifiers)
243
244
245 static enum aarch64_type_qualifiers
246 aarch64_types_combine_qualifiers[SIMD_MAX_BUILTIN_ARGS]
247 = { qualifier_none, qualifier_none, qualifier_none };
248 #define TYPES_COMBINE (aarch64_types_combine_qualifiers)
249
250 static enum aarch64_type_qualifiers
251 aarch64_types_combine_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
252 = { qualifier_poly, qualifier_poly, qualifier_poly };
253 #define TYPES_COMBINEP (aarch64_types_combine_p_qualifiers)
254
255 static enum aarch64_type_qualifiers
256 aarch64_types_load1_qualifiers[SIMD_MAX_BUILTIN_ARGS]
257 = { qualifier_none, qualifier_const_pointer_map_mode };
258 #define TYPES_LOAD1 (aarch64_types_load1_qualifiers)
259 #define TYPES_LOADSTRUCT (aarch64_types_load1_qualifiers)
260 static enum aarch64_type_qualifiers
261 aarch64_types_loadstruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
262 = { qualifier_none, qualifier_const_pointer_map_mode,
263 qualifier_none, qualifier_struct_load_store_lane_index };
264 #define TYPES_LOADSTRUCT_LANE (aarch64_types_loadstruct_lane_qualifiers)
265
266 static enum aarch64_type_qualifiers
267 aarch64_types_bsl_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
268 = { qualifier_poly, qualifier_unsigned,
269 qualifier_poly, qualifier_poly };
270 #define TYPES_BSL_P (aarch64_types_bsl_p_qualifiers)
271 static enum aarch64_type_qualifiers
272 aarch64_types_bsl_s_qualifiers[SIMD_MAX_BUILTIN_ARGS]
273 = { qualifier_none, qualifier_unsigned,
274 qualifier_none, qualifier_none };
275 #define TYPES_BSL_S (aarch64_types_bsl_s_qualifiers)
276 static enum aarch64_type_qualifiers
277 aarch64_types_bsl_u_qualifiers[SIMD_MAX_BUILTIN_ARGS]
278 = { qualifier_unsigned, qualifier_unsigned,
279 qualifier_unsigned, qualifier_unsigned };
280 #define TYPES_BSL_U (aarch64_types_bsl_u_qualifiers)
281
282 /* The first argument (return type) of a store should be void type,
283 which we represent with qualifier_void. Their first operand will be
284 a DImode pointer to the location to store to, so we must use
285 qualifier_map_mode | qualifier_pointer to build a pointer to the
286 element type of the vector. */
287 static enum aarch64_type_qualifiers
288 aarch64_types_store1_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
289 = { qualifier_void, qualifier_pointer_map_mode, qualifier_poly };
290 #define TYPES_STORE1P (aarch64_types_store1_p_qualifiers)
291 static enum aarch64_type_qualifiers
292 aarch64_types_store1_qualifiers[SIMD_MAX_BUILTIN_ARGS]
293 = { qualifier_void, qualifier_pointer_map_mode, qualifier_none };
294 #define TYPES_STORE1 (aarch64_types_store1_qualifiers)
295 #define TYPES_STORESTRUCT (aarch64_types_store1_qualifiers)
296 static enum aarch64_type_qualifiers
297 aarch64_types_storestruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
298 = { qualifier_void, qualifier_pointer_map_mode,
299 qualifier_none, qualifier_struct_load_store_lane_index };
300 #define TYPES_STORESTRUCT_LANE (aarch64_types_storestruct_lane_qualifiers)
301
302 #define CF0(N, X) CODE_FOR_aarch64_##N##X
303 #define CF1(N, X) CODE_FOR_##N##X##1
304 #define CF2(N, X) CODE_FOR_##N##X##2
305 #define CF3(N, X) CODE_FOR_##N##X##3
306 #define CF4(N, X) CODE_FOR_##N##X##4
307 #define CF10(N, X) CODE_FOR_##N##X
308
309 #define VAR1(T, N, MAP, A) \
310 {#N #A, UP (A), CF##MAP (N, A), 0, TYPES_##T},
311 #define VAR2(T, N, MAP, A, B) \
312 VAR1 (T, N, MAP, A) \
313 VAR1 (T, N, MAP, B)
314 #define VAR3(T, N, MAP, A, B, C) \
315 VAR2 (T, N, MAP, A, B) \
316 VAR1 (T, N, MAP, C)
317 #define VAR4(T, N, MAP, A, B, C, D) \
318 VAR3 (T, N, MAP, A, B, C) \
319 VAR1 (T, N, MAP, D)
320 #define VAR5(T, N, MAP, A, B, C, D, E) \
321 VAR4 (T, N, MAP, A, B, C, D) \
322 VAR1 (T, N, MAP, E)
323 #define VAR6(T, N, MAP, A, B, C, D, E, F) \
324 VAR5 (T, N, MAP, A, B, C, D, E) \
325 VAR1 (T, N, MAP, F)
326 #define VAR7(T, N, MAP, A, B, C, D, E, F, G) \
327 VAR6 (T, N, MAP, A, B, C, D, E, F) \
328 VAR1 (T, N, MAP, G)
329 #define VAR8(T, N, MAP, A, B, C, D, E, F, G, H) \
330 VAR7 (T, N, MAP, A, B, C, D, E, F, G) \
331 VAR1 (T, N, MAP, H)
332 #define VAR9(T, N, MAP, A, B, C, D, E, F, G, H, I) \
333 VAR8 (T, N, MAP, A, B, C, D, E, F, G, H) \
334 VAR1 (T, N, MAP, I)
335 #define VAR10(T, N, MAP, A, B, C, D, E, F, G, H, I, J) \
336 VAR9 (T, N, MAP, A, B, C, D, E, F, G, H, I) \
337 VAR1 (T, N, MAP, J)
338 #define VAR11(T, N, MAP, A, B, C, D, E, F, G, H, I, J, K) \
339 VAR10 (T, N, MAP, A, B, C, D, E, F, G, H, I, J) \
340 VAR1 (T, N, MAP, K)
341 #define VAR12(T, N, MAP, A, B, C, D, E, F, G, H, I, J, K, L) \
342 VAR11 (T, N, MAP, A, B, C, D, E, F, G, H, I, J, K) \
343 VAR1 (T, N, MAP, L)
344 #define VAR13(T, N, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M) \
345 VAR12 (T, N, MAP, A, B, C, D, E, F, G, H, I, J, K, L) \
346 VAR1 (T, N, MAP, M)
347 #define VAR14(T, X, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M, N) \
348 VAR13 (T, X, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M) \
349 VAR1 (T, X, MAP, N)
350
351 #include "aarch64-builtin-iterators.h"
352
353 static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = {
354 #include "aarch64-simd-builtins.def"
355 };
356
357 /* There's only 8 CRC32 builtins. Probably not worth their own .def file. */
358 #define AARCH64_CRC32_BUILTINS \
359 CRC32_BUILTIN (crc32b, QI) \
360 CRC32_BUILTIN (crc32h, HI) \
361 CRC32_BUILTIN (crc32w, SI) \
362 CRC32_BUILTIN (crc32x, DI) \
363 CRC32_BUILTIN (crc32cb, QI) \
364 CRC32_BUILTIN (crc32ch, HI) \
365 CRC32_BUILTIN (crc32cw, SI) \
366 CRC32_BUILTIN (crc32cx, DI)
367
368 /* The next 8 FCMLA instrinsics require some special handling compared the
369 normal simd intrinsics. */
370 #define AARCH64_SIMD_FCMLA_LANEQ_BUILTINS \
371 FCMLA_LANEQ_BUILTIN (0, v2sf, fcmla, V2SF, false) \
372 FCMLA_LANEQ_BUILTIN (90, v2sf, fcmla, V2SF, false) \
373 FCMLA_LANEQ_BUILTIN (180, v2sf, fcmla, V2SF, false) \
374 FCMLA_LANEQ_BUILTIN (270, v2sf, fcmla, V2SF, false) \
375 FCMLA_LANEQ_BUILTIN (0, v4hf, fcmla_laneq, V4HF, true) \
376 FCMLA_LANEQ_BUILTIN (90, v4hf, fcmla_laneq, V4HF, true) \
377 FCMLA_LANEQ_BUILTIN (180, v4hf, fcmla_laneq, V4HF, true) \
378 FCMLA_LANEQ_BUILTIN (270, v4hf, fcmla_laneq, V4HF, true) \
379
380 typedef struct
381 {
382 const char *name;
383 machine_mode mode;
384 const enum insn_code icode;
385 unsigned int fcode;
386 } aarch64_crc_builtin_datum;
387
388 /* Hold information about how to expand the FCMLA_LANEQ builtins. */
389 typedef struct
390 {
391 const char *name;
392 machine_mode mode;
393 const enum insn_code icode;
394 unsigned int fcode;
395 bool lane;
396 } aarch64_fcmla_laneq_builtin_datum;
397
398 #define CRC32_BUILTIN(N, M) \
399 AARCH64_BUILTIN_##N,
400
401 #define FCMLA_LANEQ_BUILTIN(I, N, X, M, T) \
402 AARCH64_SIMD_BUILTIN_FCMLA_LANEQ##I##_##M,
403
404 #undef VAR1
405 #define VAR1(T, N, MAP, A) \
406 AARCH64_SIMD_BUILTIN_##T##_##N##A,
407
408 enum aarch64_builtins
409 {
410 AARCH64_BUILTIN_MIN,
411
412 AARCH64_BUILTIN_GET_FPCR,
413 AARCH64_BUILTIN_SET_FPCR,
414 AARCH64_BUILTIN_GET_FPSR,
415 AARCH64_BUILTIN_SET_FPSR,
416
417 AARCH64_BUILTIN_RSQRT_DF,
418 AARCH64_BUILTIN_RSQRT_SF,
419 AARCH64_BUILTIN_RSQRT_V2DF,
420 AARCH64_BUILTIN_RSQRT_V2SF,
421 AARCH64_BUILTIN_RSQRT_V4SF,
422 AARCH64_SIMD_BUILTIN_BASE,
423 AARCH64_SIMD_BUILTIN_LANE_CHECK,
424 #include "aarch64-simd-builtins.def"
425 /* The first enum element which is based on an insn_data pattern. */
426 AARCH64_SIMD_PATTERN_START = AARCH64_SIMD_BUILTIN_LANE_CHECK + 1,
427 AARCH64_SIMD_BUILTIN_MAX = AARCH64_SIMD_PATTERN_START
428 + ARRAY_SIZE (aarch64_simd_builtin_data) - 1,
429 AARCH64_CRC32_BUILTIN_BASE,
430 AARCH64_CRC32_BUILTINS
431 AARCH64_CRC32_BUILTIN_MAX,
432 /* ARMv8.3-A Pointer Authentication Builtins. */
433 AARCH64_PAUTH_BUILTIN_AUTIA1716,
434 AARCH64_PAUTH_BUILTIN_PACIA1716,
435 AARCH64_PAUTH_BUILTIN_XPACLRI,
436 /* Special cased Armv8.3-A Complex FMA by Lane quad Builtins. */
437 AARCH64_SIMD_FCMLA_LANEQ_BUILTIN_BASE,
438 AARCH64_SIMD_FCMLA_LANEQ_BUILTINS
439 /* Builtin for Arm8.3-a Javascript conversion instruction. */
440 AARCH64_JSCVT,
441 /* Armv8.5-a RNG instruction builtins. */
442 AARCH64_BUILTIN_RNG_RNDR,
443 AARCH64_BUILTIN_RNG_RNDRRS,
444 AARCH64_BUILTIN_MAX
445 };
446
447 #undef CRC32_BUILTIN
448 #define CRC32_BUILTIN(N, M) \
449 {"__builtin_aarch64_"#N, E_##M##mode, CODE_FOR_aarch64_##N, AARCH64_BUILTIN_##N},
450
451 static aarch64_crc_builtin_datum aarch64_crc_builtin_data[] = {
452 AARCH64_CRC32_BUILTINS
453 };
454
455
456 #undef FCMLA_LANEQ_BUILTIN
457 #define FCMLA_LANEQ_BUILTIN(I, N, X, M, T) \
458 {"__builtin_aarch64_fcmla_laneq"#I#N, E_##M##mode, CODE_FOR_aarch64_##X##I##N, \
459 AARCH64_SIMD_BUILTIN_FCMLA_LANEQ##I##_##M, T},
460
461 /* This structure contains how to manage the mapping form the builtin to the
462 instruction to generate in the backend and how to invoke the instruction. */
463 static aarch64_fcmla_laneq_builtin_datum aarch64_fcmla_lane_builtin_data[] = {
464 AARCH64_SIMD_FCMLA_LANEQ_BUILTINS
465 };
466
467 #undef CRC32_BUILTIN
468
469 static GTY(()) tree aarch64_builtin_decls[AARCH64_BUILTIN_MAX];
470
471 #define NUM_DREG_TYPES 6
472 #define NUM_QREG_TYPES 6
473
474 /* Internal scalar builtin types. These types are used to support
475 neon intrinsic builtins. They are _not_ user-visible types. Therefore
476 the mangling for these types are implementation defined. */
477 const char *aarch64_scalar_builtin_types[] = {
478 "__builtin_aarch64_simd_qi",
479 "__builtin_aarch64_simd_hi",
480 "__builtin_aarch64_simd_si",
481 "__builtin_aarch64_simd_hf",
482 "__builtin_aarch64_simd_sf",
483 "__builtin_aarch64_simd_di",
484 "__builtin_aarch64_simd_df",
485 "__builtin_aarch64_simd_poly8",
486 "__builtin_aarch64_simd_poly16",
487 "__builtin_aarch64_simd_poly64",
488 "__builtin_aarch64_simd_poly128",
489 "__builtin_aarch64_simd_ti",
490 "__builtin_aarch64_simd_uqi",
491 "__builtin_aarch64_simd_uhi",
492 "__builtin_aarch64_simd_usi",
493 "__builtin_aarch64_simd_udi",
494 "__builtin_aarch64_simd_ei",
495 "__builtin_aarch64_simd_oi",
496 "__builtin_aarch64_simd_ci",
497 "__builtin_aarch64_simd_xi",
498 NULL
499 };
500
501 #define ENTRY(E, M, Q, G) E,
502 enum aarch64_simd_type
503 {
504 #include "aarch64-simd-builtin-types.def"
505 ARM_NEON_H_TYPES_LAST
506 };
507 #undef ENTRY
508
509 struct aarch64_simd_type_info
510 {
511 enum aarch64_simd_type type;
512
513 /* Internal type name. */
514 const char *name;
515
516 /* Internal type name(mangled). The mangled names conform to the
517 AAPCS64 (see "Procedure Call Standard for the ARM 64-bit Architecture",
518 Appendix A). To qualify for emission with the mangled names defined in
519 that document, a vector type must not only be of the correct mode but also
520 be of the correct internal AdvSIMD vector type (e.g. __Int8x8_t); these
521 types are registered by aarch64_init_simd_builtin_types (). In other
522 words, vector types defined in other ways e.g. via vector_size attribute
523 will get default mangled names. */
524 const char *mangle;
525
526 /* Internal type. */
527 tree itype;
528
529 /* Element type. */
530 tree eltype;
531
532 /* Machine mode the internal type maps to. */
533 enum machine_mode mode;
534
535 /* Qualifiers. */
536 enum aarch64_type_qualifiers q;
537 };
538
539 #define ENTRY(E, M, Q, G) \
540 {E, "__" #E, #G "__" #E, NULL_TREE, NULL_TREE, E_##M##mode, qualifier_##Q},
541 static struct aarch64_simd_type_info aarch64_simd_types [] = {
542 #include "aarch64-simd-builtin-types.def"
543 };
544 #undef ENTRY
545
546 static tree aarch64_simd_intOI_type_node = NULL_TREE;
547 static tree aarch64_simd_intCI_type_node = NULL_TREE;
548 static tree aarch64_simd_intXI_type_node = NULL_TREE;
549
550 /* The user-visible __fp16 type, and a pointer to that type. Used
551 across the back-end. */
552 tree aarch64_fp16_type_node = NULL_TREE;
553 tree aarch64_fp16_ptr_type_node = NULL_TREE;
554
555 static const char *
aarch64_mangle_builtin_scalar_type(const_tree type)556 aarch64_mangle_builtin_scalar_type (const_tree type)
557 {
558 int i = 0;
559
560 while (aarch64_scalar_builtin_types[i] != NULL)
561 {
562 const char *name = aarch64_scalar_builtin_types[i];
563
564 if (TREE_CODE (TYPE_NAME (type)) == TYPE_DECL
565 && DECL_NAME (TYPE_NAME (type))
566 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))), name))
567 return aarch64_scalar_builtin_types[i];
568 i++;
569 }
570 return NULL;
571 }
572
573 static const char *
aarch64_mangle_builtin_vector_type(const_tree type)574 aarch64_mangle_builtin_vector_type (const_tree type)
575 {
576 int i;
577 int nelts = sizeof (aarch64_simd_types) / sizeof (aarch64_simd_types[0]);
578
579 for (i = 0; i < nelts; i++)
580 if (aarch64_simd_types[i].mode == TYPE_MODE (type)
581 && TYPE_NAME (type)
582 && TREE_CODE (TYPE_NAME (type)) == TYPE_DECL
583 && DECL_NAME (TYPE_NAME (type))
584 && !strcmp
585 (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))),
586 aarch64_simd_types[i].name))
587 return aarch64_simd_types[i].mangle;
588
589 return NULL;
590 }
591
592 const char *
aarch64_mangle_builtin_type(const_tree type)593 aarch64_mangle_builtin_type (const_tree type)
594 {
595 const char *mangle;
596 /* Walk through all the AArch64 builtins types tables to filter out the
597 incoming type. */
598 if ((mangle = aarch64_mangle_builtin_vector_type (type))
599 || (mangle = aarch64_mangle_builtin_scalar_type (type)))
600 return mangle;
601
602 return NULL;
603 }
604
605 static tree
aarch64_simd_builtin_std_type(machine_mode mode,enum aarch64_type_qualifiers q)606 aarch64_simd_builtin_std_type (machine_mode mode,
607 enum aarch64_type_qualifiers q)
608 {
609 #define QUAL_TYPE(M) \
610 ((q == qualifier_none) ? int##M##_type_node : unsigned_int##M##_type_node);
611 switch (mode)
612 {
613 case E_QImode:
614 return QUAL_TYPE (QI);
615 case E_HImode:
616 return QUAL_TYPE (HI);
617 case E_SImode:
618 return QUAL_TYPE (SI);
619 case E_DImode:
620 return QUAL_TYPE (DI);
621 case E_TImode:
622 return QUAL_TYPE (TI);
623 case E_OImode:
624 return aarch64_simd_intOI_type_node;
625 case E_CImode:
626 return aarch64_simd_intCI_type_node;
627 case E_XImode:
628 return aarch64_simd_intXI_type_node;
629 case E_HFmode:
630 return aarch64_fp16_type_node;
631 case E_SFmode:
632 return float_type_node;
633 case E_DFmode:
634 return double_type_node;
635 default:
636 gcc_unreachable ();
637 }
638 #undef QUAL_TYPE
639 }
640
641 static tree
aarch64_lookup_simd_builtin_type(machine_mode mode,enum aarch64_type_qualifiers q)642 aarch64_lookup_simd_builtin_type (machine_mode mode,
643 enum aarch64_type_qualifiers q)
644 {
645 int i;
646 int nelts = sizeof (aarch64_simd_types) / sizeof (aarch64_simd_types[0]);
647
648 /* Non-poly scalar modes map to standard types not in the table. */
649 if (q != qualifier_poly && !VECTOR_MODE_P (mode))
650 return aarch64_simd_builtin_std_type (mode, q);
651
652 for (i = 0; i < nelts; i++)
653 if (aarch64_simd_types[i].mode == mode
654 && aarch64_simd_types[i].q == q)
655 return aarch64_simd_types[i].itype;
656
657 return NULL_TREE;
658 }
659
660 static tree
aarch64_simd_builtin_type(machine_mode mode,bool unsigned_p,bool poly_p)661 aarch64_simd_builtin_type (machine_mode mode,
662 bool unsigned_p, bool poly_p)
663 {
664 if (poly_p)
665 return aarch64_lookup_simd_builtin_type (mode, qualifier_poly);
666 else if (unsigned_p)
667 return aarch64_lookup_simd_builtin_type (mode, qualifier_unsigned);
668 else
669 return aarch64_lookup_simd_builtin_type (mode, qualifier_none);
670 }
671
672 static void
aarch64_init_simd_builtin_types(void)673 aarch64_init_simd_builtin_types (void)
674 {
675 int i;
676 int nelts = sizeof (aarch64_simd_types) / sizeof (aarch64_simd_types[0]);
677 tree tdecl;
678
679 /* Init all the element types built by the front-end. */
680 aarch64_simd_types[Int8x8_t].eltype = intQI_type_node;
681 aarch64_simd_types[Int8x16_t].eltype = intQI_type_node;
682 aarch64_simd_types[Int16x4_t].eltype = intHI_type_node;
683 aarch64_simd_types[Int16x8_t].eltype = intHI_type_node;
684 aarch64_simd_types[Int32x2_t].eltype = intSI_type_node;
685 aarch64_simd_types[Int32x4_t].eltype = intSI_type_node;
686 aarch64_simd_types[Int64x1_t].eltype = intDI_type_node;
687 aarch64_simd_types[Int64x2_t].eltype = intDI_type_node;
688 aarch64_simd_types[Uint8x8_t].eltype = unsigned_intQI_type_node;
689 aarch64_simd_types[Uint8x16_t].eltype = unsigned_intQI_type_node;
690 aarch64_simd_types[Uint16x4_t].eltype = unsigned_intHI_type_node;
691 aarch64_simd_types[Uint16x8_t].eltype = unsigned_intHI_type_node;
692 aarch64_simd_types[Uint32x2_t].eltype = unsigned_intSI_type_node;
693 aarch64_simd_types[Uint32x4_t].eltype = unsigned_intSI_type_node;
694 aarch64_simd_types[Uint64x1_t].eltype = unsigned_intDI_type_node;
695 aarch64_simd_types[Uint64x2_t].eltype = unsigned_intDI_type_node;
696
697 /* Poly types are a world of their own. */
698 aarch64_simd_types[Poly8_t].eltype = aarch64_simd_types[Poly8_t].itype =
699 build_distinct_type_copy (unsigned_intQI_type_node);
700 /* Prevent front-ends from transforming Poly8_t arrays into string
701 literals. */
702 TYPE_STRING_FLAG (aarch64_simd_types[Poly8_t].eltype) = false;
703
704 aarch64_simd_types[Poly16_t].eltype = aarch64_simd_types[Poly16_t].itype =
705 build_distinct_type_copy (unsigned_intHI_type_node);
706 aarch64_simd_types[Poly64_t].eltype = aarch64_simd_types[Poly64_t].itype =
707 build_distinct_type_copy (unsigned_intDI_type_node);
708 aarch64_simd_types[Poly128_t].eltype = aarch64_simd_types[Poly128_t].itype =
709 build_distinct_type_copy (unsigned_intTI_type_node);
710 /* Init poly vector element types with scalar poly types. */
711 aarch64_simd_types[Poly8x8_t].eltype = aarch64_simd_types[Poly8_t].itype;
712 aarch64_simd_types[Poly8x16_t].eltype = aarch64_simd_types[Poly8_t].itype;
713 aarch64_simd_types[Poly16x4_t].eltype = aarch64_simd_types[Poly16_t].itype;
714 aarch64_simd_types[Poly16x8_t].eltype = aarch64_simd_types[Poly16_t].itype;
715 aarch64_simd_types[Poly64x1_t].eltype = aarch64_simd_types[Poly64_t].itype;
716 aarch64_simd_types[Poly64x2_t].eltype = aarch64_simd_types[Poly64_t].itype;
717
718 /* Continue with standard types. */
719 aarch64_simd_types[Float16x4_t].eltype = aarch64_fp16_type_node;
720 aarch64_simd_types[Float16x8_t].eltype = aarch64_fp16_type_node;
721 aarch64_simd_types[Float32x2_t].eltype = float_type_node;
722 aarch64_simd_types[Float32x4_t].eltype = float_type_node;
723 aarch64_simd_types[Float64x1_t].eltype = double_type_node;
724 aarch64_simd_types[Float64x2_t].eltype = double_type_node;
725
726 for (i = 0; i < nelts; i++)
727 {
728 tree eltype = aarch64_simd_types[i].eltype;
729 machine_mode mode = aarch64_simd_types[i].mode;
730
731 if (aarch64_simd_types[i].itype == NULL)
732 {
733 aarch64_simd_types[i].itype
734 = build_distinct_type_copy
735 (build_vector_type (eltype, GET_MODE_NUNITS (mode)));
736 SET_TYPE_STRUCTURAL_EQUALITY (aarch64_simd_types[i].itype);
737 }
738
739 tdecl = add_builtin_type (aarch64_simd_types[i].name,
740 aarch64_simd_types[i].itype);
741 TYPE_NAME (aarch64_simd_types[i].itype) = tdecl;
742 }
743
744 #define AARCH64_BUILD_SIGNED_TYPE(mode) \
745 make_signed_type (GET_MODE_PRECISION (mode));
746 aarch64_simd_intOI_type_node = AARCH64_BUILD_SIGNED_TYPE (OImode);
747 aarch64_simd_intCI_type_node = AARCH64_BUILD_SIGNED_TYPE (CImode);
748 aarch64_simd_intXI_type_node = AARCH64_BUILD_SIGNED_TYPE (XImode);
749 #undef AARCH64_BUILD_SIGNED_TYPE
750
751 tdecl = add_builtin_type
752 ("__builtin_aarch64_simd_oi" , aarch64_simd_intOI_type_node);
753 TYPE_NAME (aarch64_simd_intOI_type_node) = tdecl;
754 tdecl = add_builtin_type
755 ("__builtin_aarch64_simd_ci" , aarch64_simd_intCI_type_node);
756 TYPE_NAME (aarch64_simd_intCI_type_node) = tdecl;
757 tdecl = add_builtin_type
758 ("__builtin_aarch64_simd_xi" , aarch64_simd_intXI_type_node);
759 TYPE_NAME (aarch64_simd_intXI_type_node) = tdecl;
760 }
761
762 static void
aarch64_init_simd_builtin_scalar_types(void)763 aarch64_init_simd_builtin_scalar_types (void)
764 {
765 /* Define typedefs for all the standard scalar types. */
766 (*lang_hooks.types.register_builtin_type) (intQI_type_node,
767 "__builtin_aarch64_simd_qi");
768 (*lang_hooks.types.register_builtin_type) (intHI_type_node,
769 "__builtin_aarch64_simd_hi");
770 (*lang_hooks.types.register_builtin_type) (aarch64_fp16_type_node,
771 "__builtin_aarch64_simd_hf");
772 (*lang_hooks.types.register_builtin_type) (intSI_type_node,
773 "__builtin_aarch64_simd_si");
774 (*lang_hooks.types.register_builtin_type) (float_type_node,
775 "__builtin_aarch64_simd_sf");
776 (*lang_hooks.types.register_builtin_type) (intDI_type_node,
777 "__builtin_aarch64_simd_di");
778 (*lang_hooks.types.register_builtin_type) (double_type_node,
779 "__builtin_aarch64_simd_df");
780 (*lang_hooks.types.register_builtin_type) (unsigned_intQI_type_node,
781 "__builtin_aarch64_simd_poly8");
782 (*lang_hooks.types.register_builtin_type) (unsigned_intHI_type_node,
783 "__builtin_aarch64_simd_poly16");
784 (*lang_hooks.types.register_builtin_type) (unsigned_intDI_type_node,
785 "__builtin_aarch64_simd_poly64");
786 (*lang_hooks.types.register_builtin_type) (unsigned_intTI_type_node,
787 "__builtin_aarch64_simd_poly128");
788 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
789 "__builtin_aarch64_simd_ti");
790 /* Unsigned integer types for various mode sizes. */
791 (*lang_hooks.types.register_builtin_type) (unsigned_intQI_type_node,
792 "__builtin_aarch64_simd_uqi");
793 (*lang_hooks.types.register_builtin_type) (unsigned_intHI_type_node,
794 "__builtin_aarch64_simd_uhi");
795 (*lang_hooks.types.register_builtin_type) (unsigned_intSI_type_node,
796 "__builtin_aarch64_simd_usi");
797 (*lang_hooks.types.register_builtin_type) (unsigned_intDI_type_node,
798 "__builtin_aarch64_simd_udi");
799 }
800
801 static bool aarch64_simd_builtins_initialized_p = false;
802
803 /* Due to the architecture not providing lane variant of the lane instructions
804 for fcmla we can't use the standard simd builtin expansion code, but we
805 still want the majority of the validation that would normally be done. */
806
807 void
aarch64_init_fcmla_laneq_builtins(void)808 aarch64_init_fcmla_laneq_builtins (void)
809 {
810 unsigned int i = 0;
811
812 for (i = 0; i < ARRAY_SIZE (aarch64_fcmla_lane_builtin_data); ++i)
813 {
814 aarch64_fcmla_laneq_builtin_datum* d
815 = &aarch64_fcmla_lane_builtin_data[i];
816 tree argtype = aarch64_lookup_simd_builtin_type (d->mode, qualifier_none);
817 machine_mode quadmode = GET_MODE_2XWIDER_MODE (d->mode).require ();
818 tree quadtype
819 = aarch64_lookup_simd_builtin_type (quadmode, qualifier_none);
820 tree lanetype
821 = aarch64_simd_builtin_std_type (SImode, qualifier_lane_pair_index);
822 tree ftype = build_function_type_list (argtype, argtype, argtype,
823 quadtype, lanetype, NULL_TREE);
824 tree fndecl = add_builtin_function (d->name, ftype, d->fcode,
825 BUILT_IN_MD, NULL, NULL_TREE);
826
827 aarch64_builtin_decls[d->fcode] = fndecl;
828 }
829 }
830
831 void
aarch64_init_simd_builtins(void)832 aarch64_init_simd_builtins (void)
833 {
834 unsigned int i, fcode = AARCH64_SIMD_PATTERN_START;
835
836 if (aarch64_simd_builtins_initialized_p)
837 return;
838
839 aarch64_simd_builtins_initialized_p = true;
840
841 aarch64_init_simd_builtin_types ();
842
843 /* Strong-typing hasn't been implemented for all AdvSIMD builtin intrinsics.
844 Therefore we need to preserve the old __builtin scalar types. It can be
845 removed once all the intrinsics become strongly typed using the qualifier
846 system. */
847 aarch64_init_simd_builtin_scalar_types ();
848
849 tree lane_check_fpr = build_function_type_list (void_type_node,
850 size_type_node,
851 size_type_node,
852 intSI_type_node,
853 NULL);
854 aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_LANE_CHECK] =
855 add_builtin_function ("__builtin_aarch64_im_lane_boundsi", lane_check_fpr,
856 AARCH64_SIMD_BUILTIN_LANE_CHECK, BUILT_IN_MD,
857 NULL, NULL_TREE);
858
859 for (i = 0; i < ARRAY_SIZE (aarch64_simd_builtin_data); i++, fcode++)
860 {
861 bool print_type_signature_p = false;
862 char type_signature[SIMD_MAX_BUILTIN_ARGS + 1] = { 0 };
863 aarch64_simd_builtin_datum *d = &aarch64_simd_builtin_data[i];
864 char namebuf[60];
865 tree ftype = NULL;
866 tree fndecl = NULL;
867
868 d->fcode = fcode;
869
870 /* We must track two variables here. op_num is
871 the operand number as in the RTL pattern. This is
872 required to access the mode (e.g. V4SF mode) of the
873 argument, from which the base type can be derived.
874 arg_num is an index in to the qualifiers data, which
875 gives qualifiers to the type (e.g. const unsigned).
876 The reason these two variables may differ by one is the
877 void return type. While all return types take the 0th entry
878 in the qualifiers array, there is no operand for them in the
879 RTL pattern. */
880 int op_num = insn_data[d->code].n_operands - 1;
881 int arg_num = d->qualifiers[0] & qualifier_void
882 ? op_num + 1
883 : op_num;
884 tree return_type = void_type_node, args = void_list_node;
885 tree eltype;
886
887 /* Build a function type directly from the insn_data for this
888 builtin. The build_function_type () function takes care of
889 removing duplicates for us. */
890 for (; op_num >= 0; arg_num--, op_num--)
891 {
892 machine_mode op_mode = insn_data[d->code].operand[op_num].mode;
893 enum aarch64_type_qualifiers qualifiers = d->qualifiers[arg_num];
894
895 if (qualifiers & qualifier_unsigned)
896 {
897 type_signature[op_num] = 'u';
898 print_type_signature_p = true;
899 }
900 else if (qualifiers & qualifier_poly)
901 {
902 type_signature[op_num] = 'p';
903 print_type_signature_p = true;
904 }
905 else
906 type_signature[op_num] = 's';
907
908 /* Skip an internal operand for vget_{low, high}. */
909 if (qualifiers & qualifier_internal)
910 continue;
911
912 /* Some builtins have different user-facing types
913 for certain arguments, encoded in d->mode. */
914 if (qualifiers & qualifier_map_mode)
915 op_mode = d->mode;
916
917 /* For pointers, we want a pointer to the basic type
918 of the vector. */
919 if (qualifiers & qualifier_pointer && VECTOR_MODE_P (op_mode))
920 op_mode = GET_MODE_INNER (op_mode);
921
922 eltype = aarch64_simd_builtin_type
923 (op_mode,
924 (qualifiers & qualifier_unsigned) != 0,
925 (qualifiers & qualifier_poly) != 0);
926 gcc_assert (eltype != NULL);
927
928 /* Add qualifiers. */
929 if (qualifiers & qualifier_const)
930 eltype = build_qualified_type (eltype, TYPE_QUAL_CONST);
931
932 if (qualifiers & qualifier_pointer)
933 eltype = build_pointer_type (eltype);
934
935 /* If we have reached arg_num == 0, we are at a non-void
936 return type. Otherwise, we are still processing
937 arguments. */
938 if (arg_num == 0)
939 return_type = eltype;
940 else
941 args = tree_cons (NULL_TREE, eltype, args);
942 }
943
944 ftype = build_function_type (return_type, args);
945
946 gcc_assert (ftype != NULL);
947
948 if (print_type_signature_p)
949 snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s_%s",
950 d->name, type_signature);
951 else
952 snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s",
953 d->name);
954
955 fndecl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD,
956 NULL, NULL_TREE);
957 aarch64_builtin_decls[fcode] = fndecl;
958 }
959
960 /* Initialize the remaining fcmla_laneq intrinsics. */
961 aarch64_init_fcmla_laneq_builtins ();
962 }
963
964 static void
aarch64_init_crc32_builtins()965 aarch64_init_crc32_builtins ()
966 {
967 tree usi_type = aarch64_simd_builtin_std_type (SImode, qualifier_unsigned);
968 unsigned int i = 0;
969
970 for (i = 0; i < ARRAY_SIZE (aarch64_crc_builtin_data); ++i)
971 {
972 aarch64_crc_builtin_datum* d = &aarch64_crc_builtin_data[i];
973 tree argtype = aarch64_simd_builtin_std_type (d->mode,
974 qualifier_unsigned);
975 tree ftype = build_function_type_list (usi_type, usi_type, argtype, NULL_TREE);
976 tree fndecl = add_builtin_function (d->name, ftype, d->fcode,
977 BUILT_IN_MD, NULL, NULL_TREE);
978
979 aarch64_builtin_decls[d->fcode] = fndecl;
980 }
981 }
982
983 /* Add builtins for reciprocal square root. */
984
985 void
aarch64_init_builtin_rsqrt(void)986 aarch64_init_builtin_rsqrt (void)
987 {
988 tree fndecl = NULL;
989 tree ftype = NULL;
990
991 tree V2SF_type_node = build_vector_type (float_type_node, 2);
992 tree V2DF_type_node = build_vector_type (double_type_node, 2);
993 tree V4SF_type_node = build_vector_type (float_type_node, 4);
994
995 struct builtin_decls_data
996 {
997 tree type_node;
998 const char *builtin_name;
999 int function_code;
1000 };
1001
1002 builtin_decls_data bdda[] =
1003 {
1004 { double_type_node, "__builtin_aarch64_rsqrt_df", AARCH64_BUILTIN_RSQRT_DF },
1005 { float_type_node, "__builtin_aarch64_rsqrt_sf", AARCH64_BUILTIN_RSQRT_SF },
1006 { V2DF_type_node, "__builtin_aarch64_rsqrt_v2df", AARCH64_BUILTIN_RSQRT_V2DF },
1007 { V2SF_type_node, "__builtin_aarch64_rsqrt_v2sf", AARCH64_BUILTIN_RSQRT_V2SF },
1008 { V4SF_type_node, "__builtin_aarch64_rsqrt_v4sf", AARCH64_BUILTIN_RSQRT_V4SF }
1009 };
1010
1011 builtin_decls_data *bdd = bdda;
1012 builtin_decls_data *bdd_end = bdd + (sizeof (bdda) / sizeof (builtin_decls_data));
1013
1014 for (; bdd < bdd_end; bdd++)
1015 {
1016 ftype = build_function_type_list (bdd->type_node, bdd->type_node, NULL_TREE);
1017 fndecl = add_builtin_function (bdd->builtin_name,
1018 ftype, bdd->function_code, BUILT_IN_MD, NULL, NULL_TREE);
1019 aarch64_builtin_decls[bdd->function_code] = fndecl;
1020 }
1021 }
1022
1023 /* Initialize the backend types that support the user-visible __fp16
1024 type, also initialize a pointer to that type, to be used when
1025 forming HFAs. */
1026
1027 static void
aarch64_init_fp16_types(void)1028 aarch64_init_fp16_types (void)
1029 {
1030 aarch64_fp16_type_node = make_node (REAL_TYPE);
1031 TYPE_PRECISION (aarch64_fp16_type_node) = 16;
1032 layout_type (aarch64_fp16_type_node);
1033
1034 (*lang_hooks.types.register_builtin_type) (aarch64_fp16_type_node, "__fp16");
1035 aarch64_fp16_ptr_type_node = build_pointer_type (aarch64_fp16_type_node);
1036 }
1037
1038 /* Pointer authentication builtins that will become NOP on legacy platform.
1039 Currently, these builtins are for internal use only (libgcc EH unwinder). */
1040
1041 void
aarch64_init_pauth_hint_builtins(void)1042 aarch64_init_pauth_hint_builtins (void)
1043 {
1044 /* Pointer Authentication builtins. */
1045 tree ftype_pointer_auth
1046 = build_function_type_list (ptr_type_node, ptr_type_node,
1047 unsigned_intDI_type_node, NULL_TREE);
1048 tree ftype_pointer_strip
1049 = build_function_type_list (ptr_type_node, ptr_type_node, NULL_TREE);
1050
1051 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_AUTIA1716]
1052 = add_builtin_function ("__builtin_aarch64_autia1716", ftype_pointer_auth,
1053 AARCH64_PAUTH_BUILTIN_AUTIA1716, BUILT_IN_MD, NULL,
1054 NULL_TREE);
1055 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_PACIA1716]
1056 = add_builtin_function ("__builtin_aarch64_pacia1716", ftype_pointer_auth,
1057 AARCH64_PAUTH_BUILTIN_PACIA1716, BUILT_IN_MD, NULL,
1058 NULL_TREE);
1059 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_XPACLRI]
1060 = add_builtin_function ("__builtin_aarch64_xpaclri", ftype_pointer_strip,
1061 AARCH64_PAUTH_BUILTIN_XPACLRI, BUILT_IN_MD, NULL,
1062 NULL_TREE);
1063 }
1064
1065 /* Add builtins for Random Number instructions. */
1066
1067 static void
aarch64_init_rng_builtins(void)1068 aarch64_init_rng_builtins (void)
1069 {
1070 tree unsigned_ptr_type = build_pointer_type (unsigned_intDI_type_node);
1071 tree ftype
1072 = build_function_type_list (integer_type_node, unsigned_ptr_type, NULL);
1073 aarch64_builtin_decls[AARCH64_BUILTIN_RNG_RNDR]
1074 = add_builtin_function ("__builtin_aarch64_rndr", ftype,
1075 AARCH64_BUILTIN_RNG_RNDR, BUILT_IN_MD, NULL,
1076 NULL_TREE);
1077 aarch64_builtin_decls[AARCH64_BUILTIN_RNG_RNDRRS]
1078 = add_builtin_function ("__builtin_aarch64_rndrrs", ftype,
1079 AARCH64_BUILTIN_RNG_RNDRRS, BUILT_IN_MD, NULL,
1080 NULL_TREE);
1081 }
1082
1083
1084 void
aarch64_init_builtins(void)1085 aarch64_init_builtins (void)
1086 {
1087 tree ftype_set_fpr
1088 = build_function_type_list (void_type_node, unsigned_type_node, NULL);
1089 tree ftype_get_fpr
1090 = build_function_type_list (unsigned_type_node, NULL);
1091
1092 aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR]
1093 = add_builtin_function ("__builtin_aarch64_get_fpcr", ftype_get_fpr,
1094 AARCH64_BUILTIN_GET_FPCR, BUILT_IN_MD, NULL, NULL_TREE);
1095 aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR]
1096 = add_builtin_function ("__builtin_aarch64_set_fpcr", ftype_set_fpr,
1097 AARCH64_BUILTIN_SET_FPCR, BUILT_IN_MD, NULL, NULL_TREE);
1098 aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR]
1099 = add_builtin_function ("__builtin_aarch64_get_fpsr", ftype_get_fpr,
1100 AARCH64_BUILTIN_GET_FPSR, BUILT_IN_MD, NULL, NULL_TREE);
1101 aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR]
1102 = add_builtin_function ("__builtin_aarch64_set_fpsr", ftype_set_fpr,
1103 AARCH64_BUILTIN_SET_FPSR, BUILT_IN_MD, NULL, NULL_TREE);
1104
1105 aarch64_init_fp16_types ();
1106
1107 if (TARGET_SIMD)
1108 aarch64_init_simd_builtins ();
1109
1110 aarch64_init_crc32_builtins ();
1111 aarch64_init_builtin_rsqrt ();
1112 aarch64_init_rng_builtins ();
1113
1114 tree ftype_jcvt
1115 = build_function_type_list (intSI_type_node, double_type_node, NULL);
1116 aarch64_builtin_decls[AARCH64_JSCVT]
1117 = add_builtin_function ("__builtin_aarch64_jcvtzs", ftype_jcvt,
1118 AARCH64_JSCVT, BUILT_IN_MD, NULL, NULL_TREE);
1119
1120 /* Initialize pointer authentication builtins which are backed by instructions
1121 in NOP encoding space.
1122
1123 NOTE: these builtins are supposed to be used by libgcc unwinder only, as
1124 there is no support on return address signing under ILP32, we don't
1125 register them. */
1126 if (!TARGET_ILP32)
1127 aarch64_init_pauth_hint_builtins ();
1128 }
1129
1130 tree
aarch64_builtin_decl(unsigned code,bool initialize_p ATTRIBUTE_UNUSED)1131 aarch64_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
1132 {
1133 if (code >= AARCH64_BUILTIN_MAX)
1134 return error_mark_node;
1135
1136 return aarch64_builtin_decls[code];
1137 }
1138
1139 typedef enum
1140 {
1141 SIMD_ARG_COPY_TO_REG,
1142 SIMD_ARG_CONSTANT,
1143 SIMD_ARG_LANE_INDEX,
1144 SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX,
1145 SIMD_ARG_LANE_PAIR_INDEX,
1146 SIMD_ARG_STOP
1147 } builtin_simd_arg;
1148
1149
1150 static rtx
aarch64_simd_expand_args(rtx target,int icode,int have_retval,tree exp,builtin_simd_arg * args,machine_mode builtin_mode)1151 aarch64_simd_expand_args (rtx target, int icode, int have_retval,
1152 tree exp, builtin_simd_arg *args,
1153 machine_mode builtin_mode)
1154 {
1155 rtx pat;
1156 rtx op[SIMD_MAX_BUILTIN_ARGS + 1]; /* First element for result operand. */
1157 int opc = 0;
1158
1159 if (have_retval)
1160 {
1161 machine_mode tmode = insn_data[icode].operand[0].mode;
1162 if (!target
1163 || GET_MODE (target) != tmode
1164 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
1165 target = gen_reg_rtx (tmode);
1166 op[opc++] = target;
1167 }
1168
1169 for (;;)
1170 {
1171 builtin_simd_arg thisarg = args[opc - have_retval];
1172
1173 if (thisarg == SIMD_ARG_STOP)
1174 break;
1175 else
1176 {
1177 tree arg = CALL_EXPR_ARG (exp, opc - have_retval);
1178 machine_mode mode = insn_data[icode].operand[opc].mode;
1179 op[opc] = expand_normal (arg);
1180
1181 switch (thisarg)
1182 {
1183 case SIMD_ARG_COPY_TO_REG:
1184 if (POINTER_TYPE_P (TREE_TYPE (arg)))
1185 op[opc] = convert_memory_address (Pmode, op[opc]);
1186 /*gcc_assert (GET_MODE (op[opc]) == mode); */
1187 if (!(*insn_data[icode].operand[opc].predicate)
1188 (op[opc], mode))
1189 op[opc] = copy_to_mode_reg (mode, op[opc]);
1190 break;
1191
1192 case SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX:
1193 gcc_assert (opc > 1);
1194 if (CONST_INT_P (op[opc]))
1195 {
1196 unsigned int nunits
1197 = GET_MODE_NUNITS (builtin_mode).to_constant ();
1198 aarch64_simd_lane_bounds (op[opc], 0, nunits, exp);
1199 /* Keep to GCC-vector-extension lane indices in the RTL. */
1200 op[opc] = aarch64_endian_lane_rtx (builtin_mode,
1201 INTVAL (op[opc]));
1202 }
1203 goto constant_arg;
1204
1205 case SIMD_ARG_LANE_INDEX:
1206 /* Must be a previous operand into which this is an index. */
1207 gcc_assert (opc > 0);
1208 if (CONST_INT_P (op[opc]))
1209 {
1210 machine_mode vmode = insn_data[icode].operand[opc - 1].mode;
1211 unsigned int nunits
1212 = GET_MODE_NUNITS (vmode).to_constant ();
1213 aarch64_simd_lane_bounds (op[opc], 0, nunits, exp);
1214 /* Keep to GCC-vector-extension lane indices in the RTL. */
1215 op[opc] = aarch64_endian_lane_rtx (vmode, INTVAL (op[opc]));
1216 }
1217 /* If the lane index isn't a constant then error out. */
1218 goto constant_arg;
1219
1220 case SIMD_ARG_LANE_PAIR_INDEX:
1221 /* Must be a previous operand into which this is an index and
1222 index is restricted to nunits / 2. */
1223 gcc_assert (opc > 0);
1224 if (CONST_INT_P (op[opc]))
1225 {
1226 machine_mode vmode = insn_data[icode].operand[opc - 1].mode;
1227 unsigned int nunits
1228 = GET_MODE_NUNITS (vmode).to_constant ();
1229 aarch64_simd_lane_bounds (op[opc], 0, nunits / 2, exp);
1230 /* Keep to GCC-vector-extension lane indices in the RTL. */
1231 int lane = INTVAL (op[opc]);
1232 op[opc] = gen_int_mode (ENDIAN_LANE_N (nunits / 2, lane),
1233 SImode);
1234 }
1235 /* Fall through - if the lane index isn't a constant then
1236 the next case will error. */
1237 /* FALLTHRU */
1238 case SIMD_ARG_CONSTANT:
1239 constant_arg:
1240 if (!(*insn_data[icode].operand[opc].predicate)
1241 (op[opc], mode))
1242 {
1243 error ("%Kargument %d must be a constant immediate",
1244 exp, opc + 1 - have_retval);
1245 return const0_rtx;
1246 }
1247 break;
1248
1249 case SIMD_ARG_STOP:
1250 gcc_unreachable ();
1251 }
1252
1253 opc++;
1254 }
1255 }
1256
1257 switch (opc)
1258 {
1259 case 1:
1260 pat = GEN_FCN (icode) (op[0]);
1261 break;
1262
1263 case 2:
1264 pat = GEN_FCN (icode) (op[0], op[1]);
1265 break;
1266
1267 case 3:
1268 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
1269 break;
1270
1271 case 4:
1272 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
1273 break;
1274
1275 case 5:
1276 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
1277 break;
1278
1279 case 6:
1280 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
1281 break;
1282
1283 default:
1284 gcc_unreachable ();
1285 }
1286
1287 if (!pat)
1288 return NULL_RTX;
1289
1290 emit_insn (pat);
1291
1292 return target;
1293 }
1294
1295 /* Expand an AArch64 AdvSIMD builtin(intrinsic). */
1296 rtx
aarch64_simd_expand_builtin(int fcode,tree exp,rtx target)1297 aarch64_simd_expand_builtin (int fcode, tree exp, rtx target)
1298 {
1299 if (fcode == AARCH64_SIMD_BUILTIN_LANE_CHECK)
1300 {
1301 rtx totalsize = expand_normal (CALL_EXPR_ARG (exp, 0));
1302 rtx elementsize = expand_normal (CALL_EXPR_ARG (exp, 1));
1303 if (CONST_INT_P (totalsize) && CONST_INT_P (elementsize)
1304 && UINTVAL (elementsize) != 0
1305 && UINTVAL (totalsize) != 0)
1306 {
1307 rtx lane_idx = expand_normal (CALL_EXPR_ARG (exp, 2));
1308 if (CONST_INT_P (lane_idx))
1309 aarch64_simd_lane_bounds (lane_idx, 0,
1310 UINTVAL (totalsize)
1311 / UINTVAL (elementsize),
1312 exp);
1313 else
1314 error ("%Klane index must be a constant immediate", exp);
1315 }
1316 else
1317 error ("%Ktotal size and element size must be a non-zero constant immediate", exp);
1318 /* Don't generate any RTL. */
1319 return const0_rtx;
1320 }
1321 aarch64_simd_builtin_datum *d =
1322 &aarch64_simd_builtin_data[fcode - AARCH64_SIMD_PATTERN_START];
1323 enum insn_code icode = d->code;
1324 builtin_simd_arg args[SIMD_MAX_BUILTIN_ARGS + 1];
1325 int num_args = insn_data[d->code].n_operands;
1326 int is_void = 0;
1327 int k;
1328
1329 is_void = !!(d->qualifiers[0] & qualifier_void);
1330
1331 num_args += is_void;
1332
1333 for (k = 1; k < num_args; k++)
1334 {
1335 /* We have four arrays of data, each indexed in a different fashion.
1336 qualifiers - element 0 always describes the function return type.
1337 operands - element 0 is either the operand for return value (if
1338 the function has a non-void return type) or the operand for the
1339 first argument.
1340 expr_args - element 0 always holds the first argument.
1341 args - element 0 is always used for the return type. */
1342 int qualifiers_k = k;
1343 int operands_k = k - is_void;
1344 int expr_args_k = k - 1;
1345
1346 if (d->qualifiers[qualifiers_k] & qualifier_lane_index)
1347 args[k] = SIMD_ARG_LANE_INDEX;
1348 else if (d->qualifiers[qualifiers_k] & qualifier_lane_pair_index)
1349 args[k] = SIMD_ARG_LANE_PAIR_INDEX;
1350 else if (d->qualifiers[qualifiers_k] & qualifier_struct_load_store_lane_index)
1351 args[k] = SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX;
1352 else if (d->qualifiers[qualifiers_k] & qualifier_immediate)
1353 args[k] = SIMD_ARG_CONSTANT;
1354 else if (d->qualifiers[qualifiers_k] & qualifier_maybe_immediate)
1355 {
1356 rtx arg
1357 = expand_normal (CALL_EXPR_ARG (exp,
1358 (expr_args_k)));
1359 /* Handle constants only if the predicate allows it. */
1360 bool op_const_int_p =
1361 (CONST_INT_P (arg)
1362 && (*insn_data[icode].operand[operands_k].predicate)
1363 (arg, insn_data[icode].operand[operands_k].mode));
1364 args[k] = op_const_int_p ? SIMD_ARG_CONSTANT : SIMD_ARG_COPY_TO_REG;
1365 }
1366 else
1367 args[k] = SIMD_ARG_COPY_TO_REG;
1368
1369 }
1370 args[k] = SIMD_ARG_STOP;
1371
1372 /* The interface to aarch64_simd_expand_args expects a 0 if
1373 the function is void, and a 1 if it is not. */
1374 return aarch64_simd_expand_args
1375 (target, icode, !is_void, exp, &args[1], d->mode);
1376 }
1377
1378 rtx
aarch64_crc32_expand_builtin(int fcode,tree exp,rtx target)1379 aarch64_crc32_expand_builtin (int fcode, tree exp, rtx target)
1380 {
1381 rtx pat;
1382 aarch64_crc_builtin_datum *d
1383 = &aarch64_crc_builtin_data[fcode - (AARCH64_CRC32_BUILTIN_BASE + 1)];
1384 enum insn_code icode = d->icode;
1385 tree arg0 = CALL_EXPR_ARG (exp, 0);
1386 tree arg1 = CALL_EXPR_ARG (exp, 1);
1387 rtx op0 = expand_normal (arg0);
1388 rtx op1 = expand_normal (arg1);
1389 machine_mode tmode = insn_data[icode].operand[0].mode;
1390 machine_mode mode0 = insn_data[icode].operand[1].mode;
1391 machine_mode mode1 = insn_data[icode].operand[2].mode;
1392
1393 if (! target
1394 || GET_MODE (target) != tmode
1395 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
1396 target = gen_reg_rtx (tmode);
1397
1398 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
1399 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
1400
1401 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
1402 op0 = copy_to_mode_reg (mode0, op0);
1403 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
1404 op1 = copy_to_mode_reg (mode1, op1);
1405
1406 pat = GEN_FCN (icode) (target, op0, op1);
1407 if (!pat)
1408 return NULL_RTX;
1409
1410 emit_insn (pat);
1411 return target;
1412 }
1413
1414 /* Function to expand reciprocal square root builtins. */
1415
1416 static rtx
aarch64_expand_builtin_rsqrt(int fcode,tree exp,rtx target)1417 aarch64_expand_builtin_rsqrt (int fcode, tree exp, rtx target)
1418 {
1419 tree arg0 = CALL_EXPR_ARG (exp, 0);
1420 rtx op0 = expand_normal (arg0);
1421
1422 rtx (*gen) (rtx, rtx);
1423
1424 switch (fcode)
1425 {
1426 case AARCH64_BUILTIN_RSQRT_DF:
1427 gen = gen_rsqrtdf2;
1428 break;
1429 case AARCH64_BUILTIN_RSQRT_SF:
1430 gen = gen_rsqrtsf2;
1431 break;
1432 case AARCH64_BUILTIN_RSQRT_V2DF:
1433 gen = gen_rsqrtv2df2;
1434 break;
1435 case AARCH64_BUILTIN_RSQRT_V2SF:
1436 gen = gen_rsqrtv2sf2;
1437 break;
1438 case AARCH64_BUILTIN_RSQRT_V4SF:
1439 gen = gen_rsqrtv4sf2;
1440 break;
1441 default: gcc_unreachable ();
1442 }
1443
1444 if (!target)
1445 target = gen_reg_rtx (GET_MODE (op0));
1446
1447 emit_insn (gen (target, op0));
1448
1449 return target;
1450 }
1451
1452 /* Expand a FCMLA lane expression EXP with code FCODE and
1453 result going to TARGET if that is convenient. */
1454
1455 rtx
aarch64_expand_fcmla_builtin(tree exp,rtx target,int fcode)1456 aarch64_expand_fcmla_builtin (tree exp, rtx target, int fcode)
1457 {
1458 int bcode = fcode - AARCH64_SIMD_FCMLA_LANEQ_BUILTIN_BASE - 1;
1459 aarch64_fcmla_laneq_builtin_datum* d
1460 = &aarch64_fcmla_lane_builtin_data[bcode];
1461 machine_mode quadmode = GET_MODE_2XWIDER_MODE (d->mode).require ();
1462 rtx op0 = force_reg (d->mode, expand_normal (CALL_EXPR_ARG (exp, 0)));
1463 rtx op1 = force_reg (d->mode, expand_normal (CALL_EXPR_ARG (exp, 1)));
1464 rtx op2 = force_reg (quadmode, expand_normal (CALL_EXPR_ARG (exp, 2)));
1465 tree tmp = CALL_EXPR_ARG (exp, 3);
1466 rtx lane_idx = expand_expr (tmp, NULL_RTX, VOIDmode, EXPAND_INITIALIZER);
1467
1468 /* Validate that the lane index is a constant. */
1469 if (!CONST_INT_P (lane_idx))
1470 {
1471 error ("%Kargument %d must be a constant immediate", exp, 4);
1472 return const0_rtx;
1473 }
1474
1475 /* Validate that the index is within the expected range. */
1476 int nunits = GET_MODE_NUNITS (quadmode).to_constant ();
1477 aarch64_simd_lane_bounds (lane_idx, 0, nunits / 2, exp);
1478
1479 /* Generate the correct register and mode. */
1480 int lane = INTVAL (lane_idx);
1481
1482 if (lane < nunits / 4)
1483 op2 = simplify_gen_subreg (d->mode, op2, quadmode,
1484 subreg_lowpart_offset (d->mode, quadmode));
1485 else
1486 {
1487 /* Select the upper 64 bits, either a V2SF or V4HF, this however
1488 is quite messy, as the operation required even though simple
1489 doesn't have a simple RTL pattern, and seems it's quite hard to
1490 define using a single RTL pattern. The target generic version
1491 gen_highpart_mode generates code that isn't optimal. */
1492 rtx temp1 = gen_reg_rtx (d->mode);
1493 rtx temp2 = gen_reg_rtx (DImode);
1494 temp1 = simplify_gen_subreg (d->mode, op2, quadmode,
1495 subreg_lowpart_offset (d->mode, quadmode));
1496 temp1 = simplify_gen_subreg (V2DImode, temp1, d->mode, 0);
1497 if (BYTES_BIG_ENDIAN)
1498 emit_insn (gen_aarch64_get_lanev2di (temp2, temp1, const0_rtx));
1499 else
1500 emit_insn (gen_aarch64_get_lanev2di (temp2, temp1, const1_rtx));
1501 op2 = simplify_gen_subreg (d->mode, temp2, GET_MODE (temp2), 0);
1502
1503 /* And recalculate the index. */
1504 lane -= nunits / 4;
1505 }
1506
1507 /* Keep to GCC-vector-extension lane indices in the RTL, only nunits / 4
1508 (max nunits in range check) are valid. Which means only 0-1, so we
1509 only need to know the order in a V2mode. */
1510 lane_idx = aarch64_endian_lane_rtx (V2DImode, lane);
1511
1512 if (!target)
1513 target = gen_reg_rtx (d->mode);
1514 else
1515 target = force_reg (d->mode, target);
1516
1517 rtx pat = NULL_RTX;
1518
1519 if (d->lane)
1520 pat = GEN_FCN (d->icode) (target, op0, op1, op2, lane_idx);
1521 else
1522 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
1523
1524 if (!pat)
1525 return NULL_RTX;
1526
1527 emit_insn (pat);
1528 return target;
1529 }
1530
1531 /* Expand a random number builtin EXP with code FCODE, putting the result
1532 int TARGET. If IGNORE is true the return value is ignored. */
1533
1534 rtx
aarch64_expand_rng_builtin(tree exp,rtx target,int fcode,int ignore)1535 aarch64_expand_rng_builtin (tree exp, rtx target, int fcode, int ignore)
1536 {
1537 rtx pat;
1538 enum insn_code icode;
1539 if (fcode == AARCH64_BUILTIN_RNG_RNDR)
1540 icode = CODE_FOR_aarch64_rndr;
1541 else if (fcode == AARCH64_BUILTIN_RNG_RNDRRS)
1542 icode = CODE_FOR_aarch64_rndrrs;
1543 else
1544 gcc_unreachable ();
1545
1546 rtx rand = gen_reg_rtx (DImode);
1547 pat = GEN_FCN (icode) (rand);
1548 if (!pat)
1549 return NULL_RTX;
1550
1551 tree arg0 = CALL_EXPR_ARG (exp, 0);
1552 rtx res_addr = expand_normal (arg0);
1553 res_addr = convert_memory_address (Pmode, res_addr);
1554 rtx res_mem = gen_rtx_MEM (DImode, res_addr);
1555 emit_insn (pat);
1556 emit_move_insn (res_mem, rand);
1557 /* If the status result is unused don't generate the CSET code. */
1558 if (ignore)
1559 return target;
1560
1561 rtx cc_reg = gen_rtx_REG (CC_Zmode, CC_REGNUM);
1562 rtx cmp_rtx = gen_rtx_fmt_ee (EQ, SImode, cc_reg, const0_rtx);
1563 emit_insn (gen_aarch64_cstoresi (target, cmp_rtx, cc_reg));
1564 return target;
1565 }
1566
1567 /* Expand an expression EXP that calls a built-in function,
1568 with result going to TARGET if that's convenient. */
1569 rtx
aarch64_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,int ignore ATTRIBUTE_UNUSED)1570 aarch64_expand_builtin (tree exp,
1571 rtx target,
1572 rtx subtarget ATTRIBUTE_UNUSED,
1573 machine_mode mode ATTRIBUTE_UNUSED,
1574 int ignore ATTRIBUTE_UNUSED)
1575 {
1576 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
1577 int fcode = DECL_FUNCTION_CODE (fndecl);
1578 int icode;
1579 rtx pat, op0;
1580 tree arg0;
1581
1582 switch (fcode)
1583 {
1584 case AARCH64_BUILTIN_GET_FPCR:
1585 case AARCH64_BUILTIN_SET_FPCR:
1586 case AARCH64_BUILTIN_GET_FPSR:
1587 case AARCH64_BUILTIN_SET_FPSR:
1588 if ((fcode == AARCH64_BUILTIN_GET_FPCR)
1589 || (fcode == AARCH64_BUILTIN_GET_FPSR))
1590 {
1591 icode = (fcode == AARCH64_BUILTIN_GET_FPSR) ?
1592 CODE_FOR_get_fpsr : CODE_FOR_get_fpcr;
1593 target = gen_reg_rtx (SImode);
1594 pat = GEN_FCN (icode) (target);
1595 }
1596 else
1597 {
1598 target = NULL_RTX;
1599 icode = (fcode == AARCH64_BUILTIN_SET_FPSR) ?
1600 CODE_FOR_set_fpsr : CODE_FOR_set_fpcr;
1601 arg0 = CALL_EXPR_ARG (exp, 0);
1602 op0 = force_reg (SImode, expand_normal (arg0));
1603 pat = GEN_FCN (icode) (op0);
1604 }
1605 emit_insn (pat);
1606 return target;
1607
1608 case AARCH64_PAUTH_BUILTIN_AUTIA1716:
1609 case AARCH64_PAUTH_BUILTIN_PACIA1716:
1610 case AARCH64_PAUTH_BUILTIN_XPACLRI:
1611 arg0 = CALL_EXPR_ARG (exp, 0);
1612 op0 = force_reg (Pmode, expand_normal (arg0));
1613
1614 if (!target)
1615 target = gen_reg_rtx (Pmode);
1616 else
1617 target = force_reg (Pmode, target);
1618
1619 emit_move_insn (target, op0);
1620
1621 if (fcode == AARCH64_PAUTH_BUILTIN_XPACLRI)
1622 {
1623 rtx lr = gen_rtx_REG (Pmode, R30_REGNUM);
1624 icode = CODE_FOR_xpaclri;
1625 emit_move_insn (lr, op0);
1626 emit_insn (GEN_FCN (icode) ());
1627 emit_move_insn (target, lr);
1628 }
1629 else
1630 {
1631 tree arg1 = CALL_EXPR_ARG (exp, 1);
1632 rtx op1 = force_reg (Pmode, expand_normal (arg1));
1633 icode = (fcode == AARCH64_PAUTH_BUILTIN_PACIA1716
1634 ? CODE_FOR_paci1716 : CODE_FOR_auti1716);
1635
1636 rtx x16_reg = gen_rtx_REG (Pmode, R16_REGNUM);
1637 rtx x17_reg = gen_rtx_REG (Pmode, R17_REGNUM);
1638 emit_move_insn (x17_reg, op0);
1639 emit_move_insn (x16_reg, op1);
1640 emit_insn (GEN_FCN (icode) ());
1641 emit_move_insn (target, x17_reg);
1642 }
1643
1644 return target;
1645
1646 case AARCH64_JSCVT:
1647 {
1648 expand_operand ops[2];
1649 create_output_operand (&ops[0], target, SImode);
1650 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
1651 create_input_operand (&ops[1], op0, DFmode);
1652 expand_insn (CODE_FOR_aarch64_fjcvtzs, 2, ops);
1653 return ops[0].value;
1654 }
1655
1656 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V2SF:
1657 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V2SF:
1658 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V2SF:
1659 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ270_V2SF:
1660 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V4HF:
1661 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V4HF:
1662 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V4HF:
1663 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ270_V4HF:
1664 return aarch64_expand_fcmla_builtin (exp, target, fcode);
1665 case AARCH64_BUILTIN_RNG_RNDR:
1666 case AARCH64_BUILTIN_RNG_RNDRRS:
1667 return aarch64_expand_rng_builtin (exp, target, fcode, ignore);
1668 }
1669
1670 if (fcode >= AARCH64_SIMD_BUILTIN_BASE && fcode <= AARCH64_SIMD_BUILTIN_MAX)
1671 return aarch64_simd_expand_builtin (fcode, exp, target);
1672 else if (fcode >= AARCH64_CRC32_BUILTIN_BASE && fcode <= AARCH64_CRC32_BUILTIN_MAX)
1673 return aarch64_crc32_expand_builtin (fcode, exp, target);
1674
1675 if (fcode == AARCH64_BUILTIN_RSQRT_DF
1676 || fcode == AARCH64_BUILTIN_RSQRT_SF
1677 || fcode == AARCH64_BUILTIN_RSQRT_V2DF
1678 || fcode == AARCH64_BUILTIN_RSQRT_V2SF
1679 || fcode == AARCH64_BUILTIN_RSQRT_V4SF)
1680 return aarch64_expand_builtin_rsqrt (fcode, exp, target);
1681
1682 gcc_unreachable ();
1683 }
1684
1685 tree
aarch64_builtin_vectorized_function(unsigned int fn,tree type_out,tree type_in)1686 aarch64_builtin_vectorized_function (unsigned int fn, tree type_out,
1687 tree type_in)
1688 {
1689 machine_mode in_mode, out_mode;
1690 unsigned HOST_WIDE_INT in_n, out_n;
1691
1692 if (TREE_CODE (type_out) != VECTOR_TYPE
1693 || TREE_CODE (type_in) != VECTOR_TYPE)
1694 return NULL_TREE;
1695
1696 out_mode = TYPE_MODE (TREE_TYPE (type_out));
1697 in_mode = TYPE_MODE (TREE_TYPE (type_in));
1698 if (!TYPE_VECTOR_SUBPARTS (type_out).is_constant (&out_n)
1699 || !TYPE_VECTOR_SUBPARTS (type_in).is_constant (&in_n))
1700 return NULL_TREE;
1701
1702 #undef AARCH64_CHECK_BUILTIN_MODE
1703 #define AARCH64_CHECK_BUILTIN_MODE(C, N) 1
1704 #define AARCH64_FIND_FRINT_VARIANT(N) \
1705 (AARCH64_CHECK_BUILTIN_MODE (2, D) \
1706 ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v2df] \
1707 : (AARCH64_CHECK_BUILTIN_MODE (4, S) \
1708 ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v4sf] \
1709 : (AARCH64_CHECK_BUILTIN_MODE (2, S) \
1710 ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v2sf] \
1711 : NULL_TREE)))
1712 switch (fn)
1713 {
1714 #undef AARCH64_CHECK_BUILTIN_MODE
1715 #define AARCH64_CHECK_BUILTIN_MODE(C, N) \
1716 (out_mode == N##Fmode && out_n == C \
1717 && in_mode == N##Fmode && in_n == C)
1718 CASE_CFN_FLOOR:
1719 return AARCH64_FIND_FRINT_VARIANT (floor);
1720 CASE_CFN_CEIL:
1721 return AARCH64_FIND_FRINT_VARIANT (ceil);
1722 CASE_CFN_TRUNC:
1723 return AARCH64_FIND_FRINT_VARIANT (btrunc);
1724 CASE_CFN_ROUND:
1725 return AARCH64_FIND_FRINT_VARIANT (round);
1726 CASE_CFN_NEARBYINT:
1727 return AARCH64_FIND_FRINT_VARIANT (nearbyint);
1728 CASE_CFN_SQRT:
1729 return AARCH64_FIND_FRINT_VARIANT (sqrt);
1730 #undef AARCH64_CHECK_BUILTIN_MODE
1731 #define AARCH64_CHECK_BUILTIN_MODE(C, N) \
1732 (out_mode == SImode && out_n == C \
1733 && in_mode == N##Imode && in_n == C)
1734 CASE_CFN_CLZ:
1735 {
1736 if (AARCH64_CHECK_BUILTIN_MODE (4, S))
1737 return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_clzv4si];
1738 return NULL_TREE;
1739 }
1740 CASE_CFN_CTZ:
1741 {
1742 if (AARCH64_CHECK_BUILTIN_MODE (2, S))
1743 return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_ctzv2si];
1744 else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
1745 return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_ctzv4si];
1746 return NULL_TREE;
1747 }
1748 #undef AARCH64_CHECK_BUILTIN_MODE
1749 #define AARCH64_CHECK_BUILTIN_MODE(C, N) \
1750 (out_mode == N##Imode && out_n == C \
1751 && in_mode == N##Fmode && in_n == C)
1752 CASE_CFN_IFLOOR:
1753 CASE_CFN_LFLOOR:
1754 CASE_CFN_LLFLOOR:
1755 {
1756 enum aarch64_builtins builtin;
1757 if (AARCH64_CHECK_BUILTIN_MODE (2, D))
1758 builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv2dfv2di;
1759 else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
1760 builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv4sfv4si;
1761 else if (AARCH64_CHECK_BUILTIN_MODE (2, S))
1762 builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv2sfv2si;
1763 else
1764 return NULL_TREE;
1765
1766 return aarch64_builtin_decls[builtin];
1767 }
1768 CASE_CFN_ICEIL:
1769 CASE_CFN_LCEIL:
1770 CASE_CFN_LLCEIL:
1771 {
1772 enum aarch64_builtins builtin;
1773 if (AARCH64_CHECK_BUILTIN_MODE (2, D))
1774 builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv2dfv2di;
1775 else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
1776 builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv4sfv4si;
1777 else if (AARCH64_CHECK_BUILTIN_MODE (2, S))
1778 builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv2sfv2si;
1779 else
1780 return NULL_TREE;
1781
1782 return aarch64_builtin_decls[builtin];
1783 }
1784 CASE_CFN_IROUND:
1785 CASE_CFN_LROUND:
1786 CASE_CFN_LLROUND:
1787 {
1788 enum aarch64_builtins builtin;
1789 if (AARCH64_CHECK_BUILTIN_MODE (2, D))
1790 builtin = AARCH64_SIMD_BUILTIN_UNOP_lroundv2dfv2di;
1791 else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
1792 builtin = AARCH64_SIMD_BUILTIN_UNOP_lroundv4sfv4si;
1793 else if (AARCH64_CHECK_BUILTIN_MODE (2, S))
1794 builtin = AARCH64_SIMD_BUILTIN_UNOP_lroundv2sfv2si;
1795 else
1796 return NULL_TREE;
1797
1798 return aarch64_builtin_decls[builtin];
1799 }
1800 case CFN_BUILT_IN_BSWAP16:
1801 #undef AARCH64_CHECK_BUILTIN_MODE
1802 #define AARCH64_CHECK_BUILTIN_MODE(C, N) \
1803 (out_mode == N##Imode && out_n == C \
1804 && in_mode == N##Imode && in_n == C)
1805 if (AARCH64_CHECK_BUILTIN_MODE (4, H))
1806 return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv4hi];
1807 else if (AARCH64_CHECK_BUILTIN_MODE (8, H))
1808 return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv8hi];
1809 else
1810 return NULL_TREE;
1811 case CFN_BUILT_IN_BSWAP32:
1812 if (AARCH64_CHECK_BUILTIN_MODE (2, S))
1813 return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv2si];
1814 else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
1815 return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv4si];
1816 else
1817 return NULL_TREE;
1818 case CFN_BUILT_IN_BSWAP64:
1819 if (AARCH64_CHECK_BUILTIN_MODE (2, D))
1820 return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv2di];
1821 else
1822 return NULL_TREE;
1823 default:
1824 return NULL_TREE;
1825 }
1826
1827 return NULL_TREE;
1828 }
1829
1830 /* Return builtin for reciprocal square root. */
1831
1832 tree
aarch64_builtin_rsqrt(unsigned int fn)1833 aarch64_builtin_rsqrt (unsigned int fn)
1834 {
1835 if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv2df)
1836 return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V2DF];
1837 if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv2sf)
1838 return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V2SF];
1839 if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv4sf)
1840 return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V4SF];
1841 return NULL_TREE;
1842 }
1843
1844 #undef VAR1
1845 #define VAR1(T, N, MAP, A) \
1846 case AARCH64_SIMD_BUILTIN_##T##_##N##A:
1847
1848 tree
aarch64_fold_builtin(tree fndecl,int n_args ATTRIBUTE_UNUSED,tree * args,bool ignore ATTRIBUTE_UNUSED)1849 aarch64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *args,
1850 bool ignore ATTRIBUTE_UNUSED)
1851 {
1852 int fcode = DECL_FUNCTION_CODE (fndecl);
1853 tree type = TREE_TYPE (TREE_TYPE (fndecl));
1854
1855 switch (fcode)
1856 {
1857 BUILTIN_VDQF (UNOP, abs, 2)
1858 return fold_build1 (ABS_EXPR, type, args[0]);
1859 VAR1 (UNOP, floatv2si, 2, v2sf)
1860 VAR1 (UNOP, floatv4si, 2, v4sf)
1861 VAR1 (UNOP, floatv2di, 2, v2df)
1862 return fold_build1 (FLOAT_EXPR, type, args[0]);
1863 default:
1864 break;
1865 }
1866
1867 return NULL_TREE;
1868 }
1869
1870 bool
aarch64_gimple_fold_builtin(gimple_stmt_iterator * gsi)1871 aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi)
1872 {
1873 bool changed = false;
1874 gimple *stmt = gsi_stmt (*gsi);
1875 tree call = gimple_call_fn (stmt);
1876 tree fndecl;
1877 gimple *new_stmt = NULL;
1878
1879 if (call)
1880 {
1881 fndecl = gimple_call_fndecl (stmt);
1882 if (fndecl)
1883 {
1884 int fcode = DECL_FUNCTION_CODE (fndecl);
1885 unsigned nargs = gimple_call_num_args (stmt);
1886 tree *args = (nargs > 0
1887 ? gimple_call_arg_ptr (stmt, 0)
1888 : &error_mark_node);
1889
1890 /* We use gimple's IFN_REDUC_(PLUS|MIN|MAX)s for float, signed int
1891 and unsigned int; it will distinguish according to the types of
1892 the arguments to the __builtin. */
1893 switch (fcode)
1894 {
1895 BUILTIN_VALL (UNOP, reduc_plus_scal_, 10)
1896 new_stmt = gimple_build_call_internal (IFN_REDUC_PLUS,
1897 1, args[0]);
1898 gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
1899 break;
1900 BUILTIN_VDQIF (UNOP, reduc_smax_scal_, 10)
1901 BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10)
1902 new_stmt = gimple_build_call_internal (IFN_REDUC_MAX,
1903 1, args[0]);
1904 gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
1905 break;
1906 BUILTIN_VDQIF (UNOP, reduc_smin_scal_, 10)
1907 BUILTIN_VDQ_BHSI (UNOPU, reduc_umin_scal_, 10)
1908 new_stmt = gimple_build_call_internal (IFN_REDUC_MIN,
1909 1, args[0]);
1910 gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
1911 break;
1912 BUILTIN_GPF (BINOP, fmulx, 0)
1913 {
1914 gcc_assert (nargs == 2);
1915 bool a0_cst_p = TREE_CODE (args[0]) == REAL_CST;
1916 bool a1_cst_p = TREE_CODE (args[1]) == REAL_CST;
1917 if (a0_cst_p || a1_cst_p)
1918 {
1919 if (a0_cst_p && a1_cst_p)
1920 {
1921 tree t0 = TREE_TYPE (args[0]);
1922 real_value a0 = (TREE_REAL_CST (args[0]));
1923 real_value a1 = (TREE_REAL_CST (args[1]));
1924 if (real_equal (&a1, &dconst0))
1925 std::swap (a0, a1);
1926 /* According to real_equal (), +0 equals -0. */
1927 if (real_equal (&a0, &dconst0) && real_isinf (&a1))
1928 {
1929 real_value res = dconst2;
1930 res.sign = a0.sign ^ a1.sign;
1931 new_stmt =
1932 gimple_build_assign (gimple_call_lhs (stmt),
1933 REAL_CST,
1934 build_real (t0, res));
1935 }
1936 else
1937 new_stmt =
1938 gimple_build_assign (gimple_call_lhs (stmt),
1939 MULT_EXPR,
1940 args[0], args[1]);
1941 }
1942 else /* a0_cst_p ^ a1_cst_p. */
1943 {
1944 real_value const_part = a0_cst_p
1945 ? TREE_REAL_CST (args[0]) : TREE_REAL_CST (args[1]);
1946 if (!real_equal (&const_part, &dconst0)
1947 && !real_isinf (&const_part))
1948 new_stmt =
1949 gimple_build_assign (gimple_call_lhs (stmt),
1950 MULT_EXPR, args[0], args[1]);
1951 }
1952 }
1953 if (new_stmt)
1954 {
1955 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
1956 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
1957 }
1958 break;
1959 }
1960 default:
1961 break;
1962 }
1963 }
1964 }
1965
1966 if (new_stmt)
1967 {
1968 gsi_replace (gsi, new_stmt, true);
1969 changed = true;
1970 }
1971
1972 return changed;
1973 }
1974
1975 void
aarch64_atomic_assign_expand_fenv(tree * hold,tree * clear,tree * update)1976 aarch64_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
1977 {
1978 const unsigned AARCH64_FE_INVALID = 1;
1979 const unsigned AARCH64_FE_DIVBYZERO = 2;
1980 const unsigned AARCH64_FE_OVERFLOW = 4;
1981 const unsigned AARCH64_FE_UNDERFLOW = 8;
1982 const unsigned AARCH64_FE_INEXACT = 16;
1983 const unsigned HOST_WIDE_INT AARCH64_FE_ALL_EXCEPT = (AARCH64_FE_INVALID
1984 | AARCH64_FE_DIVBYZERO
1985 | AARCH64_FE_OVERFLOW
1986 | AARCH64_FE_UNDERFLOW
1987 | AARCH64_FE_INEXACT);
1988 const unsigned HOST_WIDE_INT AARCH64_FE_EXCEPT_SHIFT = 8;
1989 tree fenv_cr, fenv_sr, get_fpcr, set_fpcr, mask_cr, mask_sr;
1990 tree ld_fenv_cr, ld_fenv_sr, masked_fenv_cr, masked_fenv_sr, hold_fnclex_cr;
1991 tree hold_fnclex_sr, new_fenv_var, reload_fenv, restore_fnenv, get_fpsr, set_fpsr;
1992 tree update_call, atomic_feraiseexcept, hold_fnclex, masked_fenv, ld_fenv;
1993
1994 /* Generate the equivalence of :
1995 unsigned int fenv_cr;
1996 fenv_cr = __builtin_aarch64_get_fpcr ();
1997
1998 unsigned int fenv_sr;
1999 fenv_sr = __builtin_aarch64_get_fpsr ();
2000
2001 Now set all exceptions to non-stop
2002 unsigned int mask_cr
2003 = ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT);
2004 unsigned int masked_cr;
2005 masked_cr = fenv_cr & mask_cr;
2006
2007 And clear all exception flags
2008 unsigned int maske_sr = ~AARCH64_FE_ALL_EXCEPT;
2009 unsigned int masked_cr;
2010 masked_sr = fenv_sr & mask_sr;
2011
2012 __builtin_aarch64_set_cr (masked_cr);
2013 __builtin_aarch64_set_sr (masked_sr); */
2014
2015 fenv_cr = create_tmp_var_raw (unsigned_type_node);
2016 fenv_sr = create_tmp_var_raw (unsigned_type_node);
2017
2018 get_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR];
2019 set_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR];
2020 get_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR];
2021 set_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR];
2022
2023 mask_cr = build_int_cst (unsigned_type_node,
2024 ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT));
2025 mask_sr = build_int_cst (unsigned_type_node,
2026 ~(AARCH64_FE_ALL_EXCEPT));
2027
2028 ld_fenv_cr = build4 (TARGET_EXPR, unsigned_type_node,
2029 fenv_cr, build_call_expr (get_fpcr, 0),
2030 NULL_TREE, NULL_TREE);
2031 ld_fenv_sr = build4 (TARGET_EXPR, unsigned_type_node,
2032 fenv_sr, build_call_expr (get_fpsr, 0),
2033 NULL_TREE, NULL_TREE);
2034
2035 masked_fenv_cr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_cr, mask_cr);
2036 masked_fenv_sr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_sr, mask_sr);
2037
2038 hold_fnclex_cr = build_call_expr (set_fpcr, 1, masked_fenv_cr);
2039 hold_fnclex_sr = build_call_expr (set_fpsr, 1, masked_fenv_sr);
2040
2041 hold_fnclex = build2 (COMPOUND_EXPR, void_type_node, hold_fnclex_cr,
2042 hold_fnclex_sr);
2043 masked_fenv = build2 (COMPOUND_EXPR, void_type_node, masked_fenv_cr,
2044 masked_fenv_sr);
2045 ld_fenv = build2 (COMPOUND_EXPR, void_type_node, ld_fenv_cr, ld_fenv_sr);
2046
2047 *hold = build2 (COMPOUND_EXPR, void_type_node,
2048 build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
2049 hold_fnclex);
2050
2051 /* Store the value of masked_fenv to clear the exceptions:
2052 __builtin_aarch64_set_fpsr (masked_fenv_sr); */
2053
2054 *clear = build_call_expr (set_fpsr, 1, masked_fenv_sr);
2055
2056 /* Generate the equivalent of :
2057 unsigned int new_fenv_var;
2058 new_fenv_var = __builtin_aarch64_get_fpsr ();
2059
2060 __builtin_aarch64_set_fpsr (fenv_sr);
2061
2062 __atomic_feraiseexcept (new_fenv_var); */
2063
2064 new_fenv_var = create_tmp_var_raw (unsigned_type_node);
2065 reload_fenv = build4 (TARGET_EXPR, unsigned_type_node,
2066 new_fenv_var, build_call_expr (get_fpsr, 0),
2067 NULL_TREE, NULL_TREE);
2068 restore_fnenv = build_call_expr (set_fpsr, 1, fenv_sr);
2069 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
2070 update_call = build_call_expr (atomic_feraiseexcept, 1,
2071 fold_convert (integer_type_node, new_fenv_var));
2072 *update = build2 (COMPOUND_EXPR, void_type_node,
2073 build2 (COMPOUND_EXPR, void_type_node,
2074 reload_fenv, restore_fnenv), update_call);
2075 }
2076
2077
2078 #undef AARCH64_CHECK_BUILTIN_MODE
2079 #undef AARCH64_FIND_FRINT_VARIANT
2080 #undef CF0
2081 #undef CF1
2082 #undef CF2
2083 #undef CF3
2084 #undef CF4
2085 #undef CF10
2086 #undef VAR1
2087 #undef VAR2
2088 #undef VAR3
2089 #undef VAR4
2090 #undef VAR5
2091 #undef VAR6
2092 #undef VAR7
2093 #undef VAR8
2094 #undef VAR9
2095 #undef VAR10
2096 #undef VAR11
2097
2098 #include "gt-aarch64-builtins.h"
2099