1 /* Subroutines used for code generation on IBM S/390 and zSeries
2 Copyright (C) 1999-2020 Free Software Foundation, Inc.
3 Contributed by Hartmut Penner (hpenner@de.ibm.com) and
4 Ulrich Weigand (uweigand@de.ibm.com) and
5 Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #define IN_TARGET_CODE 1
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "backend.h"
29 #include "target.h"
30 #include "target-globals.h"
31 #include "rtl.h"
32 #include "tree.h"
33 #include "gimple.h"
34 #include "cfghooks.h"
35 #include "cfgloop.h"
36 #include "df.h"
37 #include "memmodel.h"
38 #include "tm_p.h"
39 #include "stringpool.h"
40 #include "attribs.h"
41 #include "expmed.h"
42 #include "optabs.h"
43 #include "regs.h"
44 #include "emit-rtl.h"
45 #include "recog.h"
46 #include "cgraph.h"
47 #include "diagnostic-core.h"
48 #include "diagnostic.h"
49 #include "alias.h"
50 #include "fold-const.h"
51 #include "print-tree.h"
52 #include "stor-layout.h"
53 #include "varasm.h"
54 #include "calls.h"
55 #include "conditions.h"
56 #include "output.h"
57 #include "insn-attr.h"
58 #include "flags.h"
59 #include "except.h"
60 #include "dojump.h"
61 #include "explow.h"
62 #include "stmt.h"
63 #include "expr.h"
64 #include "reload.h"
65 #include "cfgrtl.h"
66 #include "cfganal.h"
67 #include "lcm.h"
68 #include "cfgbuild.h"
69 #include "cfgcleanup.h"
70 #include "debug.h"
71 #include "langhooks.h"
72 #include "internal-fn.h"
73 #include "gimple-fold.h"
74 #include "tree-eh.h"
75 #include "gimplify.h"
76 #include "opts.h"
77 #include "tree-pass.h"
78 #include "context.h"
79 #include "builtins.h"
80 #include "rtl-iter.h"
81 #include "intl.h"
82 #include "tm-constrs.h"
83 #include "tree-vrp.h"
84 #include "symbol-summary.h"
85 #include "ipa-prop.h"
86 #include "ipa-fnsummary.h"
87 #include "sched-int.h"
88
89 /* This file should be included last. */
90 #include "target-def.h"
91
92 static bool s390_hard_regno_mode_ok (unsigned int, machine_mode);
93
94 /* Remember the last target of s390_set_current_function. */
95 static GTY(()) tree s390_previous_fndecl;
96
97 /* Define the specific costs for a given cpu. */
98
99 struct processor_costs
100 {
101 /* multiplication */
102 const int m; /* cost of an M instruction. */
103 const int mghi; /* cost of an MGHI instruction. */
104 const int mh; /* cost of an MH instruction. */
105 const int mhi; /* cost of an MHI instruction. */
106 const int ml; /* cost of an ML instruction. */
107 const int mr; /* cost of an MR instruction. */
108 const int ms; /* cost of an MS instruction. */
109 const int msg; /* cost of an MSG instruction. */
110 const int msgf; /* cost of an MSGF instruction. */
111 const int msgfr; /* cost of an MSGFR instruction. */
112 const int msgr; /* cost of an MSGR instruction. */
113 const int msr; /* cost of an MSR instruction. */
114 const int mult_df; /* cost of multiplication in DFmode. */
115 const int mxbr;
116 /* square root */
117 const int sqxbr; /* cost of square root in TFmode. */
118 const int sqdbr; /* cost of square root in DFmode. */
119 const int sqebr; /* cost of square root in SFmode. */
120 /* multiply and add */
121 const int madbr; /* cost of multiply and add in DFmode. */
122 const int maebr; /* cost of multiply and add in SFmode. */
123 /* division */
124 const int dxbr;
125 const int ddbr;
126 const int debr;
127 const int dlgr;
128 const int dlr;
129 const int dr;
130 const int dsgfr;
131 const int dsgr;
132 };
133
134 #define s390_cost ((const struct processor_costs *)(s390_cost_pointer))
135
136 static const
137 struct processor_costs z900_cost =
138 {
139 COSTS_N_INSNS (5), /* M */
140 COSTS_N_INSNS (10), /* MGHI */
141 COSTS_N_INSNS (5), /* MH */
142 COSTS_N_INSNS (4), /* MHI */
143 COSTS_N_INSNS (5), /* ML */
144 COSTS_N_INSNS (5), /* MR */
145 COSTS_N_INSNS (4), /* MS */
146 COSTS_N_INSNS (15), /* MSG */
147 COSTS_N_INSNS (7), /* MSGF */
148 COSTS_N_INSNS (7), /* MSGFR */
149 COSTS_N_INSNS (10), /* MSGR */
150 COSTS_N_INSNS (4), /* MSR */
151 COSTS_N_INSNS (7), /* multiplication in DFmode */
152 COSTS_N_INSNS (13), /* MXBR */
153 COSTS_N_INSNS (136), /* SQXBR */
154 COSTS_N_INSNS (44), /* SQDBR */
155 COSTS_N_INSNS (35), /* SQEBR */
156 COSTS_N_INSNS (18), /* MADBR */
157 COSTS_N_INSNS (13), /* MAEBR */
158 COSTS_N_INSNS (134), /* DXBR */
159 COSTS_N_INSNS (30), /* DDBR */
160 COSTS_N_INSNS (27), /* DEBR */
161 COSTS_N_INSNS (220), /* DLGR */
162 COSTS_N_INSNS (34), /* DLR */
163 COSTS_N_INSNS (34), /* DR */
164 COSTS_N_INSNS (32), /* DSGFR */
165 COSTS_N_INSNS (32), /* DSGR */
166 };
167
168 static const
169 struct processor_costs z990_cost =
170 {
171 COSTS_N_INSNS (4), /* M */
172 COSTS_N_INSNS (2), /* MGHI */
173 COSTS_N_INSNS (2), /* MH */
174 COSTS_N_INSNS (2), /* MHI */
175 COSTS_N_INSNS (4), /* ML */
176 COSTS_N_INSNS (4), /* MR */
177 COSTS_N_INSNS (5), /* MS */
178 COSTS_N_INSNS (6), /* MSG */
179 COSTS_N_INSNS (4), /* MSGF */
180 COSTS_N_INSNS (4), /* MSGFR */
181 COSTS_N_INSNS (4), /* MSGR */
182 COSTS_N_INSNS (4), /* MSR */
183 COSTS_N_INSNS (1), /* multiplication in DFmode */
184 COSTS_N_INSNS (28), /* MXBR */
185 COSTS_N_INSNS (130), /* SQXBR */
186 COSTS_N_INSNS (66), /* SQDBR */
187 COSTS_N_INSNS (38), /* SQEBR */
188 COSTS_N_INSNS (1), /* MADBR */
189 COSTS_N_INSNS (1), /* MAEBR */
190 COSTS_N_INSNS (60), /* DXBR */
191 COSTS_N_INSNS (40), /* DDBR */
192 COSTS_N_INSNS (26), /* DEBR */
193 COSTS_N_INSNS (176), /* DLGR */
194 COSTS_N_INSNS (31), /* DLR */
195 COSTS_N_INSNS (31), /* DR */
196 COSTS_N_INSNS (31), /* DSGFR */
197 COSTS_N_INSNS (31), /* DSGR */
198 };
199
200 static const
201 struct processor_costs z9_109_cost =
202 {
203 COSTS_N_INSNS (4), /* M */
204 COSTS_N_INSNS (2), /* MGHI */
205 COSTS_N_INSNS (2), /* MH */
206 COSTS_N_INSNS (2), /* MHI */
207 COSTS_N_INSNS (4), /* ML */
208 COSTS_N_INSNS (4), /* MR */
209 COSTS_N_INSNS (5), /* MS */
210 COSTS_N_INSNS (6), /* MSG */
211 COSTS_N_INSNS (4), /* MSGF */
212 COSTS_N_INSNS (4), /* MSGFR */
213 COSTS_N_INSNS (4), /* MSGR */
214 COSTS_N_INSNS (4), /* MSR */
215 COSTS_N_INSNS (1), /* multiplication in DFmode */
216 COSTS_N_INSNS (28), /* MXBR */
217 COSTS_N_INSNS (130), /* SQXBR */
218 COSTS_N_INSNS (66), /* SQDBR */
219 COSTS_N_INSNS (38), /* SQEBR */
220 COSTS_N_INSNS (1), /* MADBR */
221 COSTS_N_INSNS (1), /* MAEBR */
222 COSTS_N_INSNS (60), /* DXBR */
223 COSTS_N_INSNS (40), /* DDBR */
224 COSTS_N_INSNS (26), /* DEBR */
225 COSTS_N_INSNS (30), /* DLGR */
226 COSTS_N_INSNS (23), /* DLR */
227 COSTS_N_INSNS (23), /* DR */
228 COSTS_N_INSNS (24), /* DSGFR */
229 COSTS_N_INSNS (24), /* DSGR */
230 };
231
232 static const
233 struct processor_costs z10_cost =
234 {
235 COSTS_N_INSNS (10), /* M */
236 COSTS_N_INSNS (10), /* MGHI */
237 COSTS_N_INSNS (10), /* MH */
238 COSTS_N_INSNS (10), /* MHI */
239 COSTS_N_INSNS (10), /* ML */
240 COSTS_N_INSNS (10), /* MR */
241 COSTS_N_INSNS (10), /* MS */
242 COSTS_N_INSNS (10), /* MSG */
243 COSTS_N_INSNS (10), /* MSGF */
244 COSTS_N_INSNS (10), /* MSGFR */
245 COSTS_N_INSNS (10), /* MSGR */
246 COSTS_N_INSNS (10), /* MSR */
247 COSTS_N_INSNS (1) , /* multiplication in DFmode */
248 COSTS_N_INSNS (50), /* MXBR */
249 COSTS_N_INSNS (120), /* SQXBR */
250 COSTS_N_INSNS (52), /* SQDBR */
251 COSTS_N_INSNS (38), /* SQEBR */
252 COSTS_N_INSNS (1), /* MADBR */
253 COSTS_N_INSNS (1), /* MAEBR */
254 COSTS_N_INSNS (111), /* DXBR */
255 COSTS_N_INSNS (39), /* DDBR */
256 COSTS_N_INSNS (32), /* DEBR */
257 COSTS_N_INSNS (160), /* DLGR */
258 COSTS_N_INSNS (71), /* DLR */
259 COSTS_N_INSNS (71), /* DR */
260 COSTS_N_INSNS (71), /* DSGFR */
261 COSTS_N_INSNS (71), /* DSGR */
262 };
263
264 static const
265 struct processor_costs z196_cost =
266 {
267 COSTS_N_INSNS (7), /* M */
268 COSTS_N_INSNS (5), /* MGHI */
269 COSTS_N_INSNS (5), /* MH */
270 COSTS_N_INSNS (5), /* MHI */
271 COSTS_N_INSNS (7), /* ML */
272 COSTS_N_INSNS (7), /* MR */
273 COSTS_N_INSNS (6), /* MS */
274 COSTS_N_INSNS (8), /* MSG */
275 COSTS_N_INSNS (6), /* MSGF */
276 COSTS_N_INSNS (6), /* MSGFR */
277 COSTS_N_INSNS (8), /* MSGR */
278 COSTS_N_INSNS (6), /* MSR */
279 COSTS_N_INSNS (1) , /* multiplication in DFmode */
280 COSTS_N_INSNS (40), /* MXBR B+40 */
281 COSTS_N_INSNS (100), /* SQXBR B+100 */
282 COSTS_N_INSNS (42), /* SQDBR B+42 */
283 COSTS_N_INSNS (28), /* SQEBR B+28 */
284 COSTS_N_INSNS (1), /* MADBR B */
285 COSTS_N_INSNS (1), /* MAEBR B */
286 COSTS_N_INSNS (101), /* DXBR B+101 */
287 COSTS_N_INSNS (29), /* DDBR */
288 COSTS_N_INSNS (22), /* DEBR */
289 COSTS_N_INSNS (160), /* DLGR cracked */
290 COSTS_N_INSNS (160), /* DLR cracked */
291 COSTS_N_INSNS (160), /* DR expanded */
292 COSTS_N_INSNS (160), /* DSGFR cracked */
293 COSTS_N_INSNS (160), /* DSGR cracked */
294 };
295
296 static const
297 struct processor_costs zEC12_cost =
298 {
299 COSTS_N_INSNS (7), /* M */
300 COSTS_N_INSNS (5), /* MGHI */
301 COSTS_N_INSNS (5), /* MH */
302 COSTS_N_INSNS (5), /* MHI */
303 COSTS_N_INSNS (7), /* ML */
304 COSTS_N_INSNS (7), /* MR */
305 COSTS_N_INSNS (6), /* MS */
306 COSTS_N_INSNS (8), /* MSG */
307 COSTS_N_INSNS (6), /* MSGF */
308 COSTS_N_INSNS (6), /* MSGFR */
309 COSTS_N_INSNS (8), /* MSGR */
310 COSTS_N_INSNS (6), /* MSR */
311 COSTS_N_INSNS (1) , /* multiplication in DFmode */
312 COSTS_N_INSNS (40), /* MXBR B+40 */
313 COSTS_N_INSNS (100), /* SQXBR B+100 */
314 COSTS_N_INSNS (42), /* SQDBR B+42 */
315 COSTS_N_INSNS (28), /* SQEBR B+28 */
316 COSTS_N_INSNS (1), /* MADBR B */
317 COSTS_N_INSNS (1), /* MAEBR B */
318 COSTS_N_INSNS (131), /* DXBR B+131 */
319 COSTS_N_INSNS (29), /* DDBR */
320 COSTS_N_INSNS (22), /* DEBR */
321 COSTS_N_INSNS (160), /* DLGR cracked */
322 COSTS_N_INSNS (160), /* DLR cracked */
323 COSTS_N_INSNS (160), /* DR expanded */
324 COSTS_N_INSNS (160), /* DSGFR cracked */
325 COSTS_N_INSNS (160), /* DSGR cracked */
326 };
327
328 const struct s390_processor processor_table[] =
329 {
330 { "z900", "z900", PROCESSOR_2064_Z900, &z900_cost, 5 },
331 { "z990", "z990", PROCESSOR_2084_Z990, &z990_cost, 6 },
332 { "z9-109", "z9-109", PROCESSOR_2094_Z9_109, &z9_109_cost, 7 },
333 { "z9-ec", "z9-ec", PROCESSOR_2094_Z9_EC, &z9_109_cost, 7 },
334 { "z10", "z10", PROCESSOR_2097_Z10, &z10_cost, 8 },
335 { "z196", "z196", PROCESSOR_2817_Z196, &z196_cost, 9 },
336 { "zEC12", "zEC12", PROCESSOR_2827_ZEC12, &zEC12_cost, 10 },
337 { "z13", "z13", PROCESSOR_2964_Z13, &zEC12_cost, 11 },
338 { "z14", "arch12", PROCESSOR_3906_Z14, &zEC12_cost, 12 },
339 { "z15", "arch13", PROCESSOR_8561_Z15, &zEC12_cost, 13 },
340 { "native", "", PROCESSOR_NATIVE, NULL, 0 }
341 };
342
343 extern int reload_completed;
344
345 /* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */
346 static rtx_insn *last_scheduled_insn;
347 #define NUM_SIDES 2
348
349 #define MAX_SCHED_UNITS 4
350 static int last_scheduled_unit_distance[MAX_SCHED_UNITS][NUM_SIDES];
351
352 /* Estimate of number of cycles a long-running insn occupies an
353 execution unit. */
354 static int fxd_longrunning[NUM_SIDES];
355 static int fpd_longrunning[NUM_SIDES];
356
357 /* The maximum score added for an instruction whose unit hasn't been
358 in use for MAX_SCHED_MIX_DISTANCE steps. Increase this value to
359 give instruction mix scheduling more priority over instruction
360 grouping. */
361 #define MAX_SCHED_MIX_SCORE 2
362
363 /* The maximum distance up to which individual scores will be
364 calculated. Everything beyond this gives MAX_SCHED_MIX_SCORE.
365 Increase this with the OOO windows size of the machine. */
366 #define MAX_SCHED_MIX_DISTANCE 70
367
368 /* Structure used to hold the components of a S/390 memory
369 address. A legitimate address on S/390 is of the general
370 form
371 base + index + displacement
372 where any of the components is optional.
373
374 base and index are registers of the class ADDR_REGS,
375 displacement is an unsigned 12-bit immediate constant. */
376
377 /* The max number of insns of backend generated memset/memcpy/memcmp
378 loops. This value is used in the unroll adjust hook to detect such
379 loops. Current max is 9 coming from the memcmp loop. */
380 #define BLOCK_MEM_OPS_LOOP_INSNS 9
381
382 struct s390_address
383 {
384 rtx base;
385 rtx indx;
386 rtx disp;
387 bool pointer;
388 bool literal_pool;
389 };
390
391 /* Few accessor macros for struct cfun->machine->s390_frame_layout. */
392
393 #define cfun_frame_layout (cfun->machine->frame_layout)
394 #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
395 #define cfun_save_arg_fprs_p (!!(TARGET_64BIT \
396 ? cfun_frame_layout.fpr_bitmap & 0x0f \
397 : cfun_frame_layout.fpr_bitmap & 0x03))
398 #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
399 cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
400 #define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |= \
401 (1 << (REGNO - FPR0_REGNUM)))
402 #define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap & \
403 (1 << (REGNO - FPR0_REGNUM))))
404 #define cfun_gpr_save_slot(REGNO) \
405 cfun->machine->frame_layout.gpr_save_slots[REGNO]
406
407 /* Number of GPRs and FPRs used for argument passing. */
408 #define GP_ARG_NUM_REG 5
409 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
410 #define VEC_ARG_NUM_REG 8
411
412 /* A couple of shortcuts. */
413 #define CONST_OK_FOR_J(x) \
414 CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
415 #define CONST_OK_FOR_K(x) \
416 CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
417 #define CONST_OK_FOR_Os(x) \
418 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
419 #define CONST_OK_FOR_Op(x) \
420 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
421 #define CONST_OK_FOR_On(x) \
422 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
423
424 #define REGNO_PAIR_OK(REGNO, MODE) \
425 (s390_hard_regno_nregs ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
426
427 /* That's the read ahead of the dynamic branch prediction unit in
428 bytes on a z10 (or higher) CPU. */
429 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
430
431 /* Masks per jump target register indicating which thunk need to be
432 generated. */
433 static GTY(()) int indirect_branch_prez10thunk_mask = 0;
434 static GTY(()) int indirect_branch_z10thunk_mask = 0;
435
436 #define INDIRECT_BRANCH_NUM_OPTIONS 4
437
438 enum s390_indirect_branch_option
439 {
440 s390_opt_indirect_branch_jump = 0,
441 s390_opt_indirect_branch_call,
442 s390_opt_function_return_reg,
443 s390_opt_function_return_mem
444 };
445
446 static GTY(()) int indirect_branch_table_label_no[INDIRECT_BRANCH_NUM_OPTIONS] = { 0 };
447 const char *indirect_branch_table_label[INDIRECT_BRANCH_NUM_OPTIONS] = \
448 { "LJUMP", "LCALL", "LRETREG", "LRETMEM" };
449 const char *indirect_branch_table_name[INDIRECT_BRANCH_NUM_OPTIONS] = \
450 { ".s390_indirect_jump", ".s390_indirect_call",
451 ".s390_return_reg", ".s390_return_mem" };
452
453 bool
s390_return_addr_from_memory()454 s390_return_addr_from_memory ()
455 {
456 return cfun_gpr_save_slot(RETURN_REGNUM) == SAVE_SLOT_STACK;
457 }
458
459 /* Indicate which ABI has been used for passing vector args.
460 0 - no vector type arguments have been passed where the ABI is relevant
461 1 - the old ABI has been used
462 2 - a vector type argument has been passed either in a vector register
463 or on the stack by value */
464 static int s390_vector_abi = 0;
465
466 /* Set the vector ABI marker if TYPE is subject to the vector ABI
467 switch. The vector ABI affects only vector data types. There are
468 two aspects of the vector ABI relevant here:
469
470 1. vectors >= 16 bytes have an alignment of 8 bytes with the new
471 ABI and natural alignment with the old.
472
473 2. vector <= 16 bytes are passed in VRs or by value on the stack
474 with the new ABI but by reference on the stack with the old.
475
476 If ARG_P is true TYPE is used for a function argument or return
477 value. The ABI marker then is set for all vector data types. If
478 ARG_P is false only type 1 vectors are being checked. */
479
480 static void
s390_check_type_for_vector_abi(const_tree type,bool arg_p,bool in_struct_p)481 s390_check_type_for_vector_abi (const_tree type, bool arg_p, bool in_struct_p)
482 {
483 static hash_set<const_tree> visited_types_hash;
484
485 if (s390_vector_abi)
486 return;
487
488 if (type == NULL_TREE || TREE_CODE (type) == ERROR_MARK)
489 return;
490
491 if (visited_types_hash.contains (type))
492 return;
493
494 visited_types_hash.add (type);
495
496 if (VECTOR_TYPE_P (type))
497 {
498 int type_size = int_size_in_bytes (type);
499
500 /* Outside arguments only the alignment is changing and this
501 only happens for vector types >= 16 bytes. */
502 if (!arg_p && type_size < 16)
503 return;
504
505 /* In arguments vector types > 16 are passed as before (GCC
506 never enforced the bigger alignment for arguments which was
507 required by the old vector ABI). However, it might still be
508 ABI relevant due to the changed alignment if it is a struct
509 member. */
510 if (arg_p && type_size > 16 && !in_struct_p)
511 return;
512
513 s390_vector_abi = TARGET_VX_ABI ? 2 : 1;
514 }
515 else if (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE)
516 {
517 /* ARRAY_TYPE: Since with neither of the ABIs we have more than
518 natural alignment there will never be ABI dependent padding
519 in an array type. That's why we do not set in_struct_p to
520 true here. */
521 s390_check_type_for_vector_abi (TREE_TYPE (type), arg_p, in_struct_p);
522 }
523 else if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
524 {
525 tree arg_chain;
526
527 /* Check the return type. */
528 s390_check_type_for_vector_abi (TREE_TYPE (type), true, false);
529
530 for (arg_chain = TYPE_ARG_TYPES (type);
531 arg_chain;
532 arg_chain = TREE_CHAIN (arg_chain))
533 s390_check_type_for_vector_abi (TREE_VALUE (arg_chain), true, false);
534 }
535 else if (RECORD_OR_UNION_TYPE_P (type))
536 {
537 tree field;
538
539 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
540 {
541 if (TREE_CODE (field) != FIELD_DECL)
542 continue;
543
544 s390_check_type_for_vector_abi (TREE_TYPE (field), arg_p, true);
545 }
546 }
547 }
548
549
550 /* System z builtins. */
551
552 #include "s390-builtins.h"
553
554 const unsigned int bflags_builtin[S390_BUILTIN_MAX + 1] =
555 {
556 #undef B_DEF
557 #undef OB_DEF
558 #undef OB_DEF_VAR
559 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, ...) BFLAGS,
560 #define OB_DEF(...)
561 #define OB_DEF_VAR(...)
562 #include "s390-builtins.def"
563 0
564 };
565
566 const unsigned int opflags_builtin[S390_BUILTIN_MAX + 1] =
567 {
568 #undef B_DEF
569 #undef OB_DEF
570 #undef OB_DEF_VAR
571 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, ...) OPFLAGS,
572 #define OB_DEF(...)
573 #define OB_DEF_VAR(...)
574 #include "s390-builtins.def"
575 0
576 };
577
578 const unsigned int bflags_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1] =
579 {
580 #undef B_DEF
581 #undef OB_DEF
582 #undef OB_DEF_VAR
583 #define B_DEF(...)
584 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, ...) BFLAGS,
585 #define OB_DEF_VAR(...)
586 #include "s390-builtins.def"
587 0
588 };
589
590 const unsigned int
591 bflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
592 {
593 #undef B_DEF
594 #undef OB_DEF
595 #undef OB_DEF_VAR
596 #define B_DEF(...)
597 #define OB_DEF(...)
598 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) FLAGS,
599 #include "s390-builtins.def"
600 0
601 };
602
603 const unsigned int
604 opflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
605 {
606 #undef B_DEF
607 #undef OB_DEF
608 #undef OB_DEF_VAR
609 #define B_DEF(...)
610 #define OB_DEF(...)
611 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) OPFLAGS,
612 #include "s390-builtins.def"
613 0
614 };
615
616 tree s390_builtin_types[BT_MAX];
617 tree s390_builtin_fn_types[BT_FN_MAX];
618 tree s390_builtin_decls[S390_BUILTIN_MAX +
619 S390_OVERLOADED_BUILTIN_MAX +
620 S390_OVERLOADED_BUILTIN_VAR_MAX];
621
622 static enum insn_code const code_for_builtin[S390_BUILTIN_MAX + 1] = {
623 #undef B_DEF
624 #undef OB_DEF
625 #undef OB_DEF_VAR
626 #define B_DEF(NAME, PATTERN, ...) CODE_FOR_##PATTERN,
627 #define OB_DEF(...)
628 #define OB_DEF_VAR(...)
629
630 #include "s390-builtins.def"
631 CODE_FOR_nothing
632 };
633
634 static void
s390_init_builtins(void)635 s390_init_builtins (void)
636 {
637 /* These definitions are being used in s390-builtins.def. */
638 tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"),
639 NULL, NULL);
640 tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL);
641 tree c_uint64_type_node;
642
643 /* The uint64_type_node from tree.c is not compatible to the C99
644 uint64_t data type. What we want is c_uint64_type_node from
645 c-common.c. But since backend code is not supposed to interface
646 with the frontend we recreate it here. */
647 if (TARGET_64BIT)
648 c_uint64_type_node = long_unsigned_type_node;
649 else
650 c_uint64_type_node = long_long_unsigned_type_node;
651
652 #undef DEF_TYPE
653 #define DEF_TYPE(INDEX, NODE, CONST_P) \
654 if (s390_builtin_types[INDEX] == NULL) \
655 s390_builtin_types[INDEX] = (!CONST_P) ? \
656 (NODE) : build_type_variant ((NODE), 1, 0);
657
658 #undef DEF_POINTER_TYPE
659 #define DEF_POINTER_TYPE(INDEX, INDEX_BASE) \
660 if (s390_builtin_types[INDEX] == NULL) \
661 s390_builtin_types[INDEX] = \
662 build_pointer_type (s390_builtin_types[INDEX_BASE]);
663
664 #undef DEF_DISTINCT_TYPE
665 #define DEF_DISTINCT_TYPE(INDEX, INDEX_BASE) \
666 if (s390_builtin_types[INDEX] == NULL) \
667 s390_builtin_types[INDEX] = \
668 build_distinct_type_copy (s390_builtin_types[INDEX_BASE]);
669
670 #undef DEF_VECTOR_TYPE
671 #define DEF_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \
672 if (s390_builtin_types[INDEX] == NULL) \
673 s390_builtin_types[INDEX] = \
674 build_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
675
676 #undef DEF_OPAQUE_VECTOR_TYPE
677 #define DEF_OPAQUE_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \
678 if (s390_builtin_types[INDEX] == NULL) \
679 s390_builtin_types[INDEX] = \
680 build_opaque_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
681
682 #undef DEF_FN_TYPE
683 #define DEF_FN_TYPE(INDEX, args...) \
684 if (s390_builtin_fn_types[INDEX] == NULL) \
685 s390_builtin_fn_types[INDEX] = \
686 build_function_type_list (args, NULL_TREE);
687 #undef DEF_OV_TYPE
688 #define DEF_OV_TYPE(...)
689 #include "s390-builtin-types.def"
690
691 #undef B_DEF
692 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, FNTYPE) \
693 if (s390_builtin_decls[S390_BUILTIN_##NAME] == NULL) \
694 s390_builtin_decls[S390_BUILTIN_##NAME] = \
695 add_builtin_function ("__builtin_" #NAME, \
696 s390_builtin_fn_types[FNTYPE], \
697 S390_BUILTIN_##NAME, \
698 BUILT_IN_MD, \
699 NULL, \
700 ATTRS);
701 #undef OB_DEF
702 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, FNTYPE) \
703 if (s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] \
704 == NULL) \
705 s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \
706 add_builtin_function ("__builtin_" #NAME, \
707 s390_builtin_fn_types[FNTYPE], \
708 S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX, \
709 BUILT_IN_MD, \
710 NULL, \
711 0);
712 #undef OB_DEF_VAR
713 #define OB_DEF_VAR(...)
714 #include "s390-builtins.def"
715
716 }
717
718 /* Return true if ARG is appropriate as argument number ARGNUM of
719 builtin DECL. The operand flags from s390-builtins.def have to
720 passed as OP_FLAGS. */
721 bool
s390_const_operand_ok(tree arg,int argnum,int op_flags,tree decl)722 s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl)
723 {
724 if (O_UIMM_P (op_flags))
725 {
726 int bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32 };
727 int bitwidth = bitwidths[op_flags - O_U1];
728
729 if (!tree_fits_uhwi_p (arg)
730 || tree_to_uhwi (arg) > (HOST_WIDE_INT_1U << bitwidth) - 1)
731 {
732 error ("constant argument %d for builtin %qF is out of range "
733 "(0..%wu)", argnum, decl,
734 (HOST_WIDE_INT_1U << bitwidth) - 1);
735 return false;
736 }
737 }
738
739 if (O_SIMM_P (op_flags))
740 {
741 int bitwidths[] = { 2, 3, 4, 5, 8, 12, 16, 32 };
742 int bitwidth = bitwidths[op_flags - O_S2];
743
744 if (!tree_fits_shwi_p (arg)
745 || tree_to_shwi (arg) < -(HOST_WIDE_INT_1 << (bitwidth - 1))
746 || tree_to_shwi (arg) > ((HOST_WIDE_INT_1 << (bitwidth - 1)) - 1))
747 {
748 error ("constant argument %d for builtin %qF is out of range "
749 "(%wd..%wd)", argnum, decl,
750 -(HOST_WIDE_INT_1 << (bitwidth - 1)),
751 (HOST_WIDE_INT_1 << (bitwidth - 1)) - 1);
752 return false;
753 }
754 }
755 return true;
756 }
757
758 /* Expand an expression EXP that calls a built-in function,
759 with result going to TARGET if that's convenient
760 (and in mode MODE if that's convenient).
761 SUBTARGET may be used as the target for computing one of EXP's operands.
762 IGNORE is nonzero if the value is to be ignored. */
763
764 static rtx
s390_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,int ignore ATTRIBUTE_UNUSED)765 s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
766 machine_mode mode ATTRIBUTE_UNUSED,
767 int ignore ATTRIBUTE_UNUSED)
768 {
769 #define MAX_ARGS 6
770
771 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
772 unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
773 enum insn_code icode;
774 rtx op[MAX_ARGS], pat;
775 int arity;
776 bool nonvoid;
777 tree arg;
778 call_expr_arg_iterator iter;
779 unsigned int all_op_flags = opflags_for_builtin (fcode);
780 machine_mode last_vec_mode = VOIDmode;
781
782 if (TARGET_DEBUG_ARG)
783 {
784 fprintf (stderr,
785 "s390_expand_builtin, code = %4d, %s, bflags = 0x%x\n",
786 (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl)),
787 bflags_for_builtin (fcode));
788 }
789
790 if (S390_USE_TARGET_ATTRIBUTE)
791 {
792 unsigned int bflags;
793
794 bflags = bflags_for_builtin (fcode);
795 if ((bflags & B_HTM) && !TARGET_HTM)
796 {
797 error ("builtin %qF is not supported without %<-mhtm%> "
798 "(default with %<-march=zEC12%> and higher).", fndecl);
799 return const0_rtx;
800 }
801 if (((bflags & B_VX) || (bflags & B_VXE)) && !TARGET_VX)
802 {
803 error ("builtin %qF requires %<-mvx%> "
804 "(default with %<-march=z13%> and higher).", fndecl);
805 return const0_rtx;
806 }
807
808 if ((bflags & B_VXE) && !TARGET_VXE)
809 {
810 error ("Builtin %qF requires z14 or higher.", fndecl);
811 return const0_rtx;
812 }
813
814 if ((bflags & B_VXE2) && !TARGET_VXE2)
815 {
816 error ("Builtin %qF requires z15 or higher.", fndecl);
817 return const0_rtx;
818 }
819 }
820 if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET
821 && fcode < S390_ALL_BUILTIN_MAX)
822 {
823 gcc_unreachable ();
824 }
825 else if (fcode < S390_OVERLOADED_BUILTIN_OFFSET)
826 {
827 icode = code_for_builtin[fcode];
828 /* Set a flag in the machine specific cfun part in order to support
829 saving/restoring of FPRs. */
830 if (fcode == S390_BUILTIN_tbegin || fcode == S390_BUILTIN_tbegin_retry)
831 cfun->machine->tbegin_p = true;
832 }
833 else if (fcode < S390_OVERLOADED_BUILTIN_VAR_OFFSET)
834 {
835 error ("unresolved overloaded builtin");
836 return const0_rtx;
837 }
838 else
839 internal_error ("bad builtin fcode");
840
841 if (icode == 0)
842 internal_error ("bad builtin icode");
843
844 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
845
846 if (nonvoid)
847 {
848 machine_mode tmode = insn_data[icode].operand[0].mode;
849 if (!target
850 || GET_MODE (target) != tmode
851 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
852 target = gen_reg_rtx (tmode);
853
854 /* There are builtins (e.g. vec_promote) with no vector
855 arguments but an element selector. So we have to also look
856 at the vector return type when emitting the modulo
857 operation. */
858 if (VECTOR_MODE_P (insn_data[icode].operand[0].mode))
859 last_vec_mode = insn_data[icode].operand[0].mode;
860 }
861
862 arity = 0;
863 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
864 {
865 rtx tmp_rtx;
866 const struct insn_operand_data *insn_op;
867 unsigned int op_flags = all_op_flags & ((1 << O_SHIFT) - 1);
868
869 all_op_flags = all_op_flags >> O_SHIFT;
870
871 if (arg == error_mark_node)
872 return NULL_RTX;
873 if (arity >= MAX_ARGS)
874 return NULL_RTX;
875
876 if (O_IMM_P (op_flags)
877 && TREE_CODE (arg) != INTEGER_CST)
878 {
879 error ("constant value required for builtin %qF argument %d",
880 fndecl, arity + 1);
881 return const0_rtx;
882 }
883
884 if (!s390_const_operand_ok (arg, arity + 1, op_flags, fndecl))
885 return const0_rtx;
886
887 insn_op = &insn_data[icode].operand[arity + nonvoid];
888 op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
889
890 /* expand_expr truncates constants to the target mode only if it
891 is "convenient". However, our checks below rely on this
892 being done. */
893 if (CONST_INT_P (op[arity])
894 && SCALAR_INT_MODE_P (insn_op->mode)
895 && GET_MODE (op[arity]) != insn_op->mode)
896 op[arity] = GEN_INT (trunc_int_for_mode (INTVAL (op[arity]),
897 insn_op->mode));
898
899 /* Wrap the expanded RTX for pointer types into a MEM expr with
900 the proper mode. This allows us to use e.g. (match_operand
901 "memory_operand"..) in the insn patterns instead of (mem
902 (match_operand "address_operand)). This is helpful for
903 patterns not just accepting MEMs. */
904 if (POINTER_TYPE_P (TREE_TYPE (arg))
905 && insn_op->predicate != address_operand)
906 op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]);
907
908 /* Expand the module operation required on element selectors. */
909 if (op_flags == O_ELEM)
910 {
911 gcc_assert (last_vec_mode != VOIDmode);
912 op[arity] = simplify_expand_binop (SImode, code_to_optab (AND),
913 op[arity],
914 GEN_INT (GET_MODE_NUNITS (last_vec_mode) - 1),
915 NULL_RTX, 1, OPTAB_DIRECT);
916 }
917
918 /* Record the vector mode used for an element selector. This assumes:
919 1. There is no builtin with two different vector modes and an element selector
920 2. The element selector comes after the vector type it is referring to.
921 This currently the true for all the builtins but FIXME we
922 should better check for that. */
923 if (VECTOR_MODE_P (insn_op->mode))
924 last_vec_mode = insn_op->mode;
925
926 if (insn_op->predicate (op[arity], insn_op->mode))
927 {
928 arity++;
929 continue;
930 }
931
932 /* A memory operand is rejected by the memory_operand predicate.
933 Try making the address legal by copying it into a register. */
934 if (MEM_P (op[arity])
935 && insn_op->predicate == memory_operand
936 && (GET_MODE (XEXP (op[arity], 0)) == Pmode
937 || GET_MODE (XEXP (op[arity], 0)) == VOIDmode))
938 {
939 op[arity] = replace_equiv_address (op[arity],
940 copy_to_mode_reg (Pmode,
941 XEXP (op[arity], 0)));
942 }
943 /* Some of the builtins require different modes/types than the
944 pattern in order to implement a specific API. Instead of
945 adding many expanders which do the mode change we do it here.
946 E.g. s390_vec_add_u128 required to have vector unsigned char
947 arguments is mapped to addti3. */
948 else if (insn_op->mode != VOIDmode
949 && GET_MODE (op[arity]) != VOIDmode
950 && GET_MODE (op[arity]) != insn_op->mode
951 && ((tmp_rtx = simplify_gen_subreg (insn_op->mode, op[arity],
952 GET_MODE (op[arity]), 0))
953 != NULL_RTX))
954 {
955 op[arity] = tmp_rtx;
956 }
957
958 /* The predicate rejects the operand although the mode is fine.
959 Copy the operand to register. */
960 if (!insn_op->predicate (op[arity], insn_op->mode)
961 && (GET_MODE (op[arity]) == insn_op->mode
962 || GET_MODE (op[arity]) == VOIDmode
963 || (insn_op->predicate == address_operand
964 && GET_MODE (op[arity]) == Pmode)))
965 {
966 /* An address_operand usually has VOIDmode in the expander
967 so we cannot use this. */
968 machine_mode target_mode =
969 (insn_op->predicate == address_operand
970 ? (machine_mode) Pmode : insn_op->mode);
971 op[arity] = copy_to_mode_reg (target_mode, op[arity]);
972 }
973
974 if (!insn_op->predicate (op[arity], insn_op->mode))
975 {
976 error ("invalid argument %d for builtin %qF", arity + 1, fndecl);
977 return const0_rtx;
978 }
979 arity++;
980 }
981
982 switch (arity)
983 {
984 case 0:
985 pat = GEN_FCN (icode) (target);
986 break;
987 case 1:
988 if (nonvoid)
989 pat = GEN_FCN (icode) (target, op[0]);
990 else
991 pat = GEN_FCN (icode) (op[0]);
992 break;
993 case 2:
994 if (nonvoid)
995 pat = GEN_FCN (icode) (target, op[0], op[1]);
996 else
997 pat = GEN_FCN (icode) (op[0], op[1]);
998 break;
999 case 3:
1000 if (nonvoid)
1001 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
1002 else
1003 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
1004 break;
1005 case 4:
1006 if (nonvoid)
1007 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
1008 else
1009 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
1010 break;
1011 case 5:
1012 if (nonvoid)
1013 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
1014 else
1015 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
1016 break;
1017 case 6:
1018 if (nonvoid)
1019 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4], op[5]);
1020 else
1021 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
1022 break;
1023 default:
1024 gcc_unreachable ();
1025 }
1026 if (!pat)
1027 return NULL_RTX;
1028 emit_insn (pat);
1029
1030 if (nonvoid)
1031 return target;
1032 else
1033 return const0_rtx;
1034 }
1035
1036
1037 static const int s390_hotpatch_hw_max = 1000000;
1038 static int s390_hotpatch_hw_before_label = 0;
1039 static int s390_hotpatch_hw_after_label = 0;
1040
1041 /* Check whether the hotpatch attribute is applied to a function and, if it has
1042 an argument, the argument is valid. */
1043
1044 static tree
s390_handle_hotpatch_attribute(tree * node,tree name,tree args,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)1045 s390_handle_hotpatch_attribute (tree *node, tree name, tree args,
1046 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1047 {
1048 tree expr;
1049 tree expr2;
1050 int err;
1051
1052 if (TREE_CODE (*node) != FUNCTION_DECL)
1053 {
1054 warning (OPT_Wattributes, "%qE attribute only applies to functions",
1055 name);
1056 *no_add_attrs = true;
1057 }
1058 if (args != NULL && TREE_CHAIN (args) != NULL)
1059 {
1060 expr = TREE_VALUE (args);
1061 expr2 = TREE_VALUE (TREE_CHAIN (args));
1062 }
1063 if (args == NULL || TREE_CHAIN (args) == NULL)
1064 err = 1;
1065 else if (TREE_CODE (expr) != INTEGER_CST
1066 || !INTEGRAL_TYPE_P (TREE_TYPE (expr))
1067 || wi::gtu_p (wi::to_wide (expr), s390_hotpatch_hw_max))
1068 err = 1;
1069 else if (TREE_CODE (expr2) != INTEGER_CST
1070 || !INTEGRAL_TYPE_P (TREE_TYPE (expr2))
1071 || wi::gtu_p (wi::to_wide (expr2), s390_hotpatch_hw_max))
1072 err = 1;
1073 else
1074 err = 0;
1075 if (err)
1076 {
1077 error ("requested %qE attribute is not a comma separated pair of"
1078 " non-negative integer constants or too large (max. %d)", name,
1079 s390_hotpatch_hw_max);
1080 *no_add_attrs = true;
1081 }
1082
1083 return NULL_TREE;
1084 }
1085
1086 /* Expand the s390_vector_bool type attribute. */
1087
1088 static tree
s390_handle_vectorbool_attribute(tree * node,tree name ATTRIBUTE_UNUSED,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)1089 s390_handle_vectorbool_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
1090 tree args ATTRIBUTE_UNUSED,
1091 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1092 {
1093 tree type = *node, result = NULL_TREE;
1094 machine_mode mode;
1095
1096 while (POINTER_TYPE_P (type)
1097 || TREE_CODE (type) == FUNCTION_TYPE
1098 || TREE_CODE (type) == METHOD_TYPE
1099 || TREE_CODE (type) == ARRAY_TYPE)
1100 type = TREE_TYPE (type);
1101
1102 mode = TYPE_MODE (type);
1103 switch (mode)
1104 {
1105 case E_DImode: case E_V2DImode:
1106 result = s390_builtin_types[BT_BV2DI];
1107 break;
1108 case E_SImode: case E_V4SImode:
1109 result = s390_builtin_types[BT_BV4SI];
1110 break;
1111 case E_HImode: case E_V8HImode:
1112 result = s390_builtin_types[BT_BV8HI];
1113 break;
1114 case E_QImode: case E_V16QImode:
1115 result = s390_builtin_types[BT_BV16QI];
1116 break;
1117 default:
1118 break;
1119 }
1120
1121 *no_add_attrs = true; /* No need to hang on to the attribute. */
1122
1123 if (result)
1124 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
1125
1126 return NULL_TREE;
1127 }
1128
1129 /* Check syntax of function decl attributes having a string type value. */
1130
1131 static tree
s390_handle_string_attribute(tree * node,tree name ATTRIBUTE_UNUSED,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)1132 s390_handle_string_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
1133 tree args ATTRIBUTE_UNUSED,
1134 int flags ATTRIBUTE_UNUSED,
1135 bool *no_add_attrs)
1136 {
1137 tree cst;
1138
1139 if (TREE_CODE (*node) != FUNCTION_DECL)
1140 {
1141 warning (OPT_Wattributes, "%qE attribute only applies to functions",
1142 name);
1143 *no_add_attrs = true;
1144 }
1145
1146 cst = TREE_VALUE (args);
1147
1148 if (TREE_CODE (cst) != STRING_CST)
1149 {
1150 warning (OPT_Wattributes,
1151 "%qE attribute requires a string constant argument",
1152 name);
1153 *no_add_attrs = true;
1154 }
1155
1156 if (is_attribute_p ("indirect_branch", name)
1157 || is_attribute_p ("indirect_branch_call", name)
1158 || is_attribute_p ("function_return", name)
1159 || is_attribute_p ("function_return_reg", name)
1160 || is_attribute_p ("function_return_mem", name))
1161 {
1162 if (strcmp (TREE_STRING_POINTER (cst), "keep") != 0
1163 && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0
1164 && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0)
1165 {
1166 warning (OPT_Wattributes,
1167 "argument to %qE attribute is not "
1168 "(keep|thunk|thunk-extern)", name);
1169 *no_add_attrs = true;
1170 }
1171 }
1172
1173 if (is_attribute_p ("indirect_branch_jump", name)
1174 && strcmp (TREE_STRING_POINTER (cst), "keep") != 0
1175 && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0
1176 && strcmp (TREE_STRING_POINTER (cst), "thunk-inline") != 0
1177 && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0)
1178 {
1179 warning (OPT_Wattributes,
1180 "argument to %qE attribute is not "
1181 "(keep|thunk|thunk-inline|thunk-extern)", name);
1182 *no_add_attrs = true;
1183 }
1184
1185 return NULL_TREE;
1186 }
1187
1188 static const struct attribute_spec s390_attribute_table[] = {
1189 { "hotpatch", 2, 2, true, false, false, false,
1190 s390_handle_hotpatch_attribute, NULL },
1191 { "s390_vector_bool", 0, 0, false, true, false, true,
1192 s390_handle_vectorbool_attribute, NULL },
1193 { "indirect_branch", 1, 1, true, false, false, false,
1194 s390_handle_string_attribute, NULL },
1195 { "indirect_branch_jump", 1, 1, true, false, false, false,
1196 s390_handle_string_attribute, NULL },
1197 { "indirect_branch_call", 1, 1, true, false, false, false,
1198 s390_handle_string_attribute, NULL },
1199 { "function_return", 1, 1, true, false, false, false,
1200 s390_handle_string_attribute, NULL },
1201 { "function_return_reg", 1, 1, true, false, false, false,
1202 s390_handle_string_attribute, NULL },
1203 { "function_return_mem", 1, 1, true, false, false, false,
1204 s390_handle_string_attribute, NULL },
1205
1206 /* End element. */
1207 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1208 };
1209
1210 /* Return the alignment for LABEL. We default to the -falign-labels
1211 value except for the literal pool base label. */
1212 int
s390_label_align(rtx_insn * label)1213 s390_label_align (rtx_insn *label)
1214 {
1215 rtx_insn *prev_insn = prev_active_insn (label);
1216 rtx set, src;
1217
1218 if (prev_insn == NULL_RTX)
1219 goto old;
1220
1221 set = single_set (prev_insn);
1222
1223 if (set == NULL_RTX)
1224 goto old;
1225
1226 src = SET_SRC (set);
1227
1228 /* Don't align literal pool base labels. */
1229 if (GET_CODE (src) == UNSPEC
1230 && XINT (src, 1) == UNSPEC_MAIN_BASE)
1231 return 0;
1232
1233 old:
1234 return align_labels.levels[0].log;
1235 }
1236
1237 static GTY(()) rtx got_symbol;
1238
1239 /* Return the GOT table symbol. The symbol will be created when the
1240 function is invoked for the first time. */
1241
1242 static rtx
s390_got_symbol(void)1243 s390_got_symbol (void)
1244 {
1245 if (!got_symbol)
1246 {
1247 got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
1248 SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL;
1249 }
1250
1251 return got_symbol;
1252 }
1253
1254 static scalar_int_mode
s390_libgcc_cmp_return_mode(void)1255 s390_libgcc_cmp_return_mode (void)
1256 {
1257 return TARGET_64BIT ? DImode : SImode;
1258 }
1259
1260 static scalar_int_mode
s390_libgcc_shift_count_mode(void)1261 s390_libgcc_shift_count_mode (void)
1262 {
1263 return TARGET_64BIT ? DImode : SImode;
1264 }
1265
1266 static scalar_int_mode
s390_unwind_word_mode(void)1267 s390_unwind_word_mode (void)
1268 {
1269 return TARGET_64BIT ? DImode : SImode;
1270 }
1271
1272 /* Return true if the back end supports mode MODE. */
1273 static bool
s390_scalar_mode_supported_p(scalar_mode mode)1274 s390_scalar_mode_supported_p (scalar_mode mode)
1275 {
1276 /* In contrast to the default implementation reject TImode constants on 31bit
1277 TARGET_ZARCH for ABI compliance. */
1278 if (!TARGET_64BIT && TARGET_ZARCH && mode == TImode)
1279 return false;
1280
1281 if (DECIMAL_FLOAT_MODE_P (mode))
1282 return default_decimal_float_supported_p ();
1283
1284 return default_scalar_mode_supported_p (mode);
1285 }
1286
1287 /* Return true if the back end supports vector mode MODE. */
1288 static bool
s390_vector_mode_supported_p(machine_mode mode)1289 s390_vector_mode_supported_p (machine_mode mode)
1290 {
1291 machine_mode inner;
1292
1293 if (!VECTOR_MODE_P (mode)
1294 || !TARGET_VX
1295 || GET_MODE_SIZE (mode) > 16)
1296 return false;
1297
1298 inner = GET_MODE_INNER (mode);
1299
1300 switch (inner)
1301 {
1302 case E_QImode:
1303 case E_HImode:
1304 case E_SImode:
1305 case E_DImode:
1306 case E_TImode:
1307 case E_SFmode:
1308 case E_DFmode:
1309 case E_TFmode:
1310 return true;
1311 default:
1312 return false;
1313 }
1314 }
1315
1316 /* Set the has_landing_pad_p flag in struct machine_function to VALUE. */
1317
1318 void
s390_set_has_landing_pad_p(bool value)1319 s390_set_has_landing_pad_p (bool value)
1320 {
1321 cfun->machine->has_landing_pad_p = value;
1322 }
1323
1324 /* If two condition code modes are compatible, return a condition code
1325 mode which is compatible with both. Otherwise, return
1326 VOIDmode. */
1327
1328 static machine_mode
s390_cc_modes_compatible(machine_mode m1,machine_mode m2)1329 s390_cc_modes_compatible (machine_mode m1, machine_mode m2)
1330 {
1331 if (m1 == m2)
1332 return m1;
1333
1334 switch (m1)
1335 {
1336 case E_CCZmode:
1337 if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode
1338 || m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode)
1339 return m2;
1340 return VOIDmode;
1341
1342 case E_CCSmode:
1343 case E_CCUmode:
1344 case E_CCTmode:
1345 case E_CCSRmode:
1346 case E_CCURmode:
1347 case E_CCZ1mode:
1348 if (m2 == CCZmode)
1349 return m1;
1350
1351 return VOIDmode;
1352
1353 default:
1354 return VOIDmode;
1355 }
1356 return VOIDmode;
1357 }
1358
1359 /* Return true if SET either doesn't set the CC register, or else
1360 the source and destination have matching CC modes and that
1361 CC mode is at least as constrained as REQ_MODE. */
1362
1363 static bool
s390_match_ccmode_set(rtx set,machine_mode req_mode)1364 s390_match_ccmode_set (rtx set, machine_mode req_mode)
1365 {
1366 machine_mode set_mode;
1367
1368 gcc_assert (GET_CODE (set) == SET);
1369
1370 /* These modes are supposed to be used only in CC consumer
1371 patterns. */
1372 gcc_assert (req_mode != CCVIALLmode && req_mode != CCVIANYmode
1373 && req_mode != CCVFALLmode && req_mode != CCVFANYmode);
1374
1375 if (GET_CODE (SET_DEST (set)) != REG || !CC_REGNO_P (REGNO (SET_DEST (set))))
1376 return 1;
1377
1378 set_mode = GET_MODE (SET_DEST (set));
1379 switch (set_mode)
1380 {
1381 case E_CCZ1mode:
1382 case E_CCSmode:
1383 case E_CCSRmode:
1384 case E_CCSFPSmode:
1385 case E_CCUmode:
1386 case E_CCURmode:
1387 case E_CCOmode:
1388 case E_CCLmode:
1389 case E_CCL1mode:
1390 case E_CCL2mode:
1391 case E_CCL3mode:
1392 case E_CCT1mode:
1393 case E_CCT2mode:
1394 case E_CCT3mode:
1395 case E_CCVEQmode:
1396 case E_CCVIHmode:
1397 case E_CCVIHUmode:
1398 case E_CCVFHmode:
1399 case E_CCVFHEmode:
1400 if (req_mode != set_mode)
1401 return 0;
1402 break;
1403
1404 case E_CCZmode:
1405 if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode
1406 && req_mode != CCSRmode && req_mode != CCURmode
1407 && req_mode != CCZ1mode)
1408 return 0;
1409 break;
1410
1411 case E_CCAPmode:
1412 case E_CCANmode:
1413 if (req_mode != CCAmode)
1414 return 0;
1415 break;
1416
1417 default:
1418 gcc_unreachable ();
1419 }
1420
1421 return (GET_MODE (SET_SRC (set)) == set_mode);
1422 }
1423
1424 /* Return true if every SET in INSN that sets the CC register
1425 has source and destination with matching CC modes and that
1426 CC mode is at least as constrained as REQ_MODE.
1427 If REQ_MODE is VOIDmode, always return false. */
1428
1429 bool
s390_match_ccmode(rtx_insn * insn,machine_mode req_mode)1430 s390_match_ccmode (rtx_insn *insn, machine_mode req_mode)
1431 {
1432 int i;
1433
1434 /* s390_tm_ccmode returns VOIDmode to indicate failure. */
1435 if (req_mode == VOIDmode)
1436 return false;
1437
1438 if (GET_CODE (PATTERN (insn)) == SET)
1439 return s390_match_ccmode_set (PATTERN (insn), req_mode);
1440
1441 if (GET_CODE (PATTERN (insn)) == PARALLEL)
1442 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1443 {
1444 rtx set = XVECEXP (PATTERN (insn), 0, i);
1445 if (GET_CODE (set) == SET)
1446 if (!s390_match_ccmode_set (set, req_mode))
1447 return false;
1448 }
1449
1450 return true;
1451 }
1452
1453 /* If a test-under-mask instruction can be used to implement
1454 (compare (and ... OP1) OP2), return the CC mode required
1455 to do that. Otherwise, return VOIDmode.
1456 MIXED is true if the instruction can distinguish between
1457 CC1 and CC2 for mixed selected bits (TMxx), it is false
1458 if the instruction cannot (TM). */
1459
1460 machine_mode
s390_tm_ccmode(rtx op1,rtx op2,bool mixed)1461 s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
1462 {
1463 int bit0, bit1;
1464
1465 /* ??? Fixme: should work on CONST_WIDE_INT as well. */
1466 if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT)
1467 return VOIDmode;
1468
1469 /* Selected bits all zero: CC0.
1470 e.g.: int a; if ((a & (16 + 128)) == 0) */
1471 if (INTVAL (op2) == 0)
1472 return CCTmode;
1473
1474 /* Selected bits all one: CC3.
1475 e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
1476 if (INTVAL (op2) == INTVAL (op1))
1477 return CCT3mode;
1478
1479 /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
1480 int a;
1481 if ((a & (16 + 128)) == 16) -> CCT1
1482 if ((a & (16 + 128)) == 128) -> CCT2 */
1483 if (mixed)
1484 {
1485 bit1 = exact_log2 (INTVAL (op2));
1486 bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2));
1487 if (bit0 != -1 && bit1 != -1)
1488 return bit0 > bit1 ? CCT1mode : CCT2mode;
1489 }
1490
1491 return VOIDmode;
1492 }
1493
1494 /* Given a comparison code OP (EQ, NE, etc.) and the operands
1495 OP0 and OP1 of a COMPARE, return the mode to be used for the
1496 comparison. */
1497
1498 machine_mode
s390_select_ccmode(enum rtx_code code,rtx op0,rtx op1)1499 s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
1500 {
1501 switch (code)
1502 {
1503 case EQ:
1504 case NE:
1505 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1506 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1507 return CCAPmode;
1508 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1509 && CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))))
1510 return CCAPmode;
1511 if ((GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS
1512 || GET_CODE (op1) == NEG)
1513 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1514 return CCLmode;
1515
1516 if (GET_CODE (op0) == AND)
1517 {
1518 /* Check whether we can potentially do it via TM. */
1519 machine_mode ccmode;
1520 ccmode = s390_tm_ccmode (XEXP (op0, 1), op1, 1);
1521 if (ccmode != VOIDmode)
1522 {
1523 /* Relax CCTmode to CCZmode to allow fall-back to AND
1524 if that turns out to be beneficial. */
1525 return ccmode == CCTmode ? CCZmode : ccmode;
1526 }
1527 }
1528
1529 if (register_operand (op0, HImode)
1530 && GET_CODE (op1) == CONST_INT
1531 && (INTVAL (op1) == -1 || INTVAL (op1) == 65535))
1532 return CCT3mode;
1533 if (register_operand (op0, QImode)
1534 && GET_CODE (op1) == CONST_INT
1535 && (INTVAL (op1) == -1 || INTVAL (op1) == 255))
1536 return CCT3mode;
1537
1538 return CCZmode;
1539
1540 case LE:
1541 case LT:
1542 case GE:
1543 case GT:
1544 /* The only overflow condition of NEG and ABS happens when
1545 -INT_MAX is used as parameter, which stays negative. So
1546 we have an overflow from a positive value to a negative.
1547 Using CCAP mode the resulting cc can be used for comparisons. */
1548 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1549 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1550 return CCAPmode;
1551
1552 /* If constants are involved in an add instruction it is possible to use
1553 the resulting cc for comparisons with zero. Knowing the sign of the
1554 constant the overflow behavior gets predictable. e.g.:
1555 int a, b; if ((b = a + c) > 0)
1556 with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP */
1557 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1558 && (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1)))
1559 || (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os")
1560 /* Avoid INT32_MIN on 32 bit. */
1561 && (!TARGET_ZARCH || INTVAL (XEXP (op0, 1)) != -0x7fffffff - 1))))
1562 {
1563 if (INTVAL (XEXP((op0), 1)) < 0)
1564 return CCANmode;
1565 else
1566 return CCAPmode;
1567 }
1568
1569 /* Fall through. */
1570 case LTGT:
1571 if (HONOR_NANS (op0) || HONOR_NANS (op1))
1572 return CCSFPSmode;
1573
1574 /* Fall through. */
1575 case UNORDERED:
1576 case ORDERED:
1577 case UNEQ:
1578 case UNLE:
1579 case UNLT:
1580 case UNGE:
1581 case UNGT:
1582 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1583 && GET_CODE (op1) != CONST_INT)
1584 return CCSRmode;
1585 return CCSmode;
1586
1587 case LTU:
1588 case GEU:
1589 if (GET_CODE (op0) == PLUS
1590 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1591 return CCL1mode;
1592
1593 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1594 && GET_CODE (op1) != CONST_INT)
1595 return CCURmode;
1596 return CCUmode;
1597
1598 case LEU:
1599 case GTU:
1600 if (GET_CODE (op0) == MINUS
1601 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1602 return CCL2mode;
1603
1604 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1605 && GET_CODE (op1) != CONST_INT)
1606 return CCURmode;
1607 return CCUmode;
1608
1609 default:
1610 gcc_unreachable ();
1611 }
1612 }
1613
1614 /* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
1615 that we can implement more efficiently. */
1616
1617 static void
s390_canonicalize_comparison(int * code,rtx * op0,rtx * op1,bool op0_preserve_value)1618 s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1619 bool op0_preserve_value)
1620 {
1621 if (op0_preserve_value)
1622 return;
1623
1624 /* Convert ZERO_EXTRACT back to AND to enable TM patterns. */
1625 if ((*code == EQ || *code == NE)
1626 && *op1 == const0_rtx
1627 && GET_CODE (*op0) == ZERO_EXTRACT
1628 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1629 && GET_CODE (XEXP (*op0, 2)) == CONST_INT
1630 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1631 {
1632 rtx inner = XEXP (*op0, 0);
1633 HOST_WIDE_INT modesize = GET_MODE_BITSIZE (GET_MODE (inner));
1634 HOST_WIDE_INT len = INTVAL (XEXP (*op0, 1));
1635 HOST_WIDE_INT pos = INTVAL (XEXP (*op0, 2));
1636
1637 if (len > 0 && len < modesize
1638 && pos >= 0 && pos + len <= modesize
1639 && modesize <= HOST_BITS_PER_WIDE_INT)
1640 {
1641 unsigned HOST_WIDE_INT block;
1642 block = (HOST_WIDE_INT_1U << len) - 1;
1643 block <<= modesize - pos - len;
1644
1645 *op0 = gen_rtx_AND (GET_MODE (inner), inner,
1646 gen_int_mode (block, GET_MODE (inner)));
1647 }
1648 }
1649
1650 /* Narrow AND of memory against immediate to enable TM. */
1651 if ((*code == EQ || *code == NE)
1652 && *op1 == const0_rtx
1653 && GET_CODE (*op0) == AND
1654 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1655 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1656 {
1657 rtx inner = XEXP (*op0, 0);
1658 rtx mask = XEXP (*op0, 1);
1659
1660 /* Ignore paradoxical SUBREGs if all extra bits are masked out. */
1661 if (GET_CODE (inner) == SUBREG
1662 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner)))
1663 && (GET_MODE_SIZE (GET_MODE (inner))
1664 >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1665 && ((INTVAL (mask)
1666 & GET_MODE_MASK (GET_MODE (inner))
1667 & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner))))
1668 == 0))
1669 inner = SUBREG_REG (inner);
1670
1671 /* Do not change volatile MEMs. */
1672 if (MEM_P (inner) && !MEM_VOLATILE_P (inner))
1673 {
1674 int part = s390_single_part (XEXP (*op0, 1),
1675 GET_MODE (inner), QImode, 0);
1676 if (part >= 0)
1677 {
1678 mask = gen_int_mode (s390_extract_part (mask, QImode, 0), QImode);
1679 inner = adjust_address_nv (inner, QImode, part);
1680 *op0 = gen_rtx_AND (QImode, inner, mask);
1681 }
1682 }
1683 }
1684
1685 /* Narrow comparisons against 0xffff to HImode if possible. */
1686 if ((*code == EQ || *code == NE)
1687 && GET_CODE (*op1) == CONST_INT
1688 && INTVAL (*op1) == 0xffff
1689 && SCALAR_INT_MODE_P (GET_MODE (*op0))
1690 && (nonzero_bits (*op0, GET_MODE (*op0))
1691 & ~HOST_WIDE_INT_UC (0xffff)) == 0)
1692 {
1693 *op0 = gen_lowpart (HImode, *op0);
1694 *op1 = constm1_rtx;
1695 }
1696
1697 /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible. */
1698 if (GET_CODE (*op0) == UNSPEC
1699 && XINT (*op0, 1) == UNSPEC_STRCMPCC_TO_INT
1700 && XVECLEN (*op0, 0) == 1
1701 && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode
1702 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1703 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1704 && *op1 == const0_rtx)
1705 {
1706 enum rtx_code new_code = UNKNOWN;
1707 switch (*code)
1708 {
1709 case EQ: new_code = EQ; break;
1710 case NE: new_code = NE; break;
1711 case LT: new_code = GTU; break;
1712 case GT: new_code = LTU; break;
1713 case LE: new_code = GEU; break;
1714 case GE: new_code = LEU; break;
1715 default: break;
1716 }
1717
1718 if (new_code != UNKNOWN)
1719 {
1720 *op0 = XVECEXP (*op0, 0, 0);
1721 *code = new_code;
1722 }
1723 }
1724
1725 /* Remove redundant UNSPEC_CC_TO_INT conversions if possible. */
1726 if (GET_CODE (*op0) == UNSPEC
1727 && XINT (*op0, 1) == UNSPEC_CC_TO_INT
1728 && XVECLEN (*op0, 0) == 1
1729 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1730 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1731 && CONST_INT_P (*op1))
1732 {
1733 enum rtx_code new_code = UNKNOWN;
1734 switch (GET_MODE (XVECEXP (*op0, 0, 0)))
1735 {
1736 case E_CCZmode:
1737 case E_CCRAWmode:
1738 switch (*code)
1739 {
1740 case EQ: new_code = EQ; break;
1741 case NE: new_code = NE; break;
1742 default: break;
1743 }
1744 break;
1745 default: break;
1746 }
1747
1748 if (new_code != UNKNOWN)
1749 {
1750 /* For CCRAWmode put the required cc mask into the second
1751 operand. */
1752 if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode
1753 && INTVAL (*op1) >= 0 && INTVAL (*op1) <= 3)
1754 *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1)));
1755 *op0 = XVECEXP (*op0, 0, 0);
1756 *code = new_code;
1757 }
1758 }
1759
1760 /* Simplify cascaded EQ, NE with const0_rtx. */
1761 if ((*code == NE || *code == EQ)
1762 && (GET_CODE (*op0) == EQ || GET_CODE (*op0) == NE)
1763 && GET_MODE (*op0) == SImode
1764 && GET_MODE (XEXP (*op0, 0)) == CCZ1mode
1765 && REG_P (XEXP (*op0, 0))
1766 && XEXP (*op0, 1) == const0_rtx
1767 && *op1 == const0_rtx)
1768 {
1769 if ((*code == EQ && GET_CODE (*op0) == NE)
1770 || (*code == NE && GET_CODE (*op0) == EQ))
1771 *code = EQ;
1772 else
1773 *code = NE;
1774 *op0 = XEXP (*op0, 0);
1775 }
1776
1777 /* Prefer register over memory as first operand. */
1778 if (MEM_P (*op0) && REG_P (*op1))
1779 {
1780 rtx tem = *op0; *op0 = *op1; *op1 = tem;
1781 *code = (int)swap_condition ((enum rtx_code)*code);
1782 }
1783
1784 /* A comparison result is compared against zero. Replace it with
1785 the (perhaps inverted) original comparison.
1786 This probably should be done by simplify_relational_operation. */
1787 if ((*code == EQ || *code == NE)
1788 && *op1 == const0_rtx
1789 && COMPARISON_P (*op0)
1790 && CC_REG_P (XEXP (*op0, 0)))
1791 {
1792 enum rtx_code new_code;
1793
1794 if (*code == EQ)
1795 new_code = reversed_comparison_code_parts (GET_CODE (*op0),
1796 XEXP (*op0, 0),
1797 XEXP (*op0, 1), NULL);
1798 else
1799 new_code = GET_CODE (*op0);
1800
1801 if (new_code != UNKNOWN)
1802 {
1803 *code = new_code;
1804 *op1 = XEXP (*op0, 1);
1805 *op0 = XEXP (*op0, 0);
1806 }
1807 }
1808
1809 /* ~a==b -> ~(a^b)==0 ~a!=b -> ~(a^b)!=0 */
1810 if (TARGET_Z15
1811 && (*code == EQ || *code == NE)
1812 && (GET_MODE (*op0) == DImode || GET_MODE (*op0) == SImode)
1813 && GET_CODE (*op0) == NOT)
1814 {
1815 machine_mode mode = GET_MODE (*op0);
1816 *op0 = gen_rtx_XOR (mode, XEXP (*op0, 0), *op1);
1817 *op0 = gen_rtx_NOT (mode, *op0);
1818 *op1 = const0_rtx;
1819 }
1820
1821 /* a&b == -1 -> ~a|~b == 0 a|b == -1 -> ~a&~b == 0 */
1822 if (TARGET_Z15
1823 && (*code == EQ || *code == NE)
1824 && (GET_CODE (*op0) == AND || GET_CODE (*op0) == IOR)
1825 && (GET_MODE (*op0) == DImode || GET_MODE (*op0) == SImode)
1826 && CONST_INT_P (*op1)
1827 && *op1 == constm1_rtx)
1828 {
1829 machine_mode mode = GET_MODE (*op0);
1830 rtx op00 = gen_rtx_NOT (mode, XEXP (*op0, 0));
1831 rtx op01 = gen_rtx_NOT (mode, XEXP (*op0, 1));
1832
1833 if (GET_CODE (*op0) == AND)
1834 *op0 = gen_rtx_IOR (mode, op00, op01);
1835 else
1836 *op0 = gen_rtx_AND (mode, op00, op01);
1837
1838 *op1 = const0_rtx;
1839 }
1840 }
1841
1842
1843 /* Emit a compare instruction suitable to implement the comparison
1844 OP0 CODE OP1. Return the correct condition RTL to be placed in
1845 the IF_THEN_ELSE of the conditional branch testing the result. */
1846
1847 rtx
s390_emit_compare(enum rtx_code code,rtx op0,rtx op1)1848 s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
1849 {
1850 machine_mode mode = s390_select_ccmode (code, op0, op1);
1851 rtx cc;
1852
1853 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
1854 {
1855 /* Do not output a redundant compare instruction if a
1856 compare_and_swap pattern already computed the result and the
1857 machine modes are compatible. */
1858 gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
1859 == GET_MODE (op0));
1860 cc = op0;
1861 }
1862 else
1863 {
1864 cc = gen_rtx_REG (mode, CC_REGNUM);
1865 emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, op0, op1)));
1866 }
1867
1868 return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
1869 }
1870
1871 /* If MEM is not a legitimate compare-and-swap memory operand, return a new
1872 MEM, whose address is a pseudo containing the original MEM's address. */
1873
1874 static rtx
s390_legitimize_cs_operand(rtx mem)1875 s390_legitimize_cs_operand (rtx mem)
1876 {
1877 rtx tmp;
1878
1879 if (!contains_symbol_ref_p (mem))
1880 return mem;
1881 tmp = gen_reg_rtx (Pmode);
1882 emit_move_insn (tmp, copy_rtx (XEXP (mem, 0)));
1883 return change_address (mem, VOIDmode, tmp);
1884 }
1885
1886 /* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
1887 matches CMP.
1888 Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
1889 conditional branch testing the result. */
1890
1891 static rtx
s390_emit_compare_and_swap(enum rtx_code code,rtx old,rtx mem,rtx cmp,rtx new_rtx,machine_mode ccmode)1892 s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
1893 rtx cmp, rtx new_rtx, machine_mode ccmode)
1894 {
1895 rtx cc;
1896
1897 mem = s390_legitimize_cs_operand (mem);
1898 cc = gen_rtx_REG (ccmode, CC_REGNUM);
1899 switch (GET_MODE (mem))
1900 {
1901 case E_SImode:
1902 emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp,
1903 new_rtx, cc));
1904 break;
1905 case E_DImode:
1906 emit_insn (gen_atomic_compare_and_swapdi_internal (old, mem, cmp,
1907 new_rtx, cc));
1908 break;
1909 case E_TImode:
1910 emit_insn (gen_atomic_compare_and_swapti_internal (old, mem, cmp,
1911 new_rtx, cc));
1912 break;
1913 case E_QImode:
1914 case E_HImode:
1915 default:
1916 gcc_unreachable ();
1917 }
1918 return s390_emit_compare (code, cc, const0_rtx);
1919 }
1920
1921 /* Emit a jump instruction to TARGET and return it. If COND is
1922 NULL_RTX, emit an unconditional jump, else a conditional jump under
1923 condition COND. */
1924
1925 rtx_insn *
s390_emit_jump(rtx target,rtx cond)1926 s390_emit_jump (rtx target, rtx cond)
1927 {
1928 rtx insn;
1929
1930 target = gen_rtx_LABEL_REF (VOIDmode, target);
1931 if (cond)
1932 target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
1933
1934 insn = gen_rtx_SET (pc_rtx, target);
1935 return emit_jump_insn (insn);
1936 }
1937
1938 /* Return branch condition mask to implement a branch
1939 specified by CODE. Return -1 for invalid comparisons. */
1940
1941 int
s390_branch_condition_mask(rtx code)1942 s390_branch_condition_mask (rtx code)
1943 {
1944 const int CC0 = 1 << 3;
1945 const int CC1 = 1 << 2;
1946 const int CC2 = 1 << 1;
1947 const int CC3 = 1 << 0;
1948
1949 gcc_assert (GET_CODE (XEXP (code, 0)) == REG);
1950 gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM);
1951 gcc_assert (XEXP (code, 1) == const0_rtx
1952 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
1953 && CONST_INT_P (XEXP (code, 1))));
1954
1955
1956 switch (GET_MODE (XEXP (code, 0)))
1957 {
1958 case E_CCZmode:
1959 case E_CCZ1mode:
1960 switch (GET_CODE (code))
1961 {
1962 case EQ: return CC0;
1963 case NE: return CC1 | CC2 | CC3;
1964 default: return -1;
1965 }
1966 break;
1967
1968 case E_CCT1mode:
1969 switch (GET_CODE (code))
1970 {
1971 case EQ: return CC1;
1972 case NE: return CC0 | CC2 | CC3;
1973 default: return -1;
1974 }
1975 break;
1976
1977 case E_CCT2mode:
1978 switch (GET_CODE (code))
1979 {
1980 case EQ: return CC2;
1981 case NE: return CC0 | CC1 | CC3;
1982 default: return -1;
1983 }
1984 break;
1985
1986 case E_CCT3mode:
1987 switch (GET_CODE (code))
1988 {
1989 case EQ: return CC3;
1990 case NE: return CC0 | CC1 | CC2;
1991 default: return -1;
1992 }
1993 break;
1994
1995 case E_CCLmode:
1996 switch (GET_CODE (code))
1997 {
1998 case EQ: return CC0 | CC2;
1999 case NE: return CC1 | CC3;
2000 default: return -1;
2001 }
2002 break;
2003
2004 case E_CCL1mode:
2005 switch (GET_CODE (code))
2006 {
2007 case LTU: return CC2 | CC3; /* carry */
2008 case GEU: return CC0 | CC1; /* no carry */
2009 default: return -1;
2010 }
2011 break;
2012
2013 case E_CCL2mode:
2014 switch (GET_CODE (code))
2015 {
2016 case GTU: return CC0 | CC1; /* borrow */
2017 case LEU: return CC2 | CC3; /* no borrow */
2018 default: return -1;
2019 }
2020 break;
2021
2022 case E_CCL3mode:
2023 switch (GET_CODE (code))
2024 {
2025 case EQ: return CC0 | CC2;
2026 case NE: return CC1 | CC3;
2027 case LTU: return CC1;
2028 case GTU: return CC3;
2029 case LEU: return CC1 | CC2;
2030 case GEU: return CC2 | CC3;
2031 default: return -1;
2032 }
2033
2034 case E_CCUmode:
2035 switch (GET_CODE (code))
2036 {
2037 case EQ: return CC0;
2038 case NE: return CC1 | CC2 | CC3;
2039 case LTU: return CC1;
2040 case GTU: return CC2;
2041 case LEU: return CC0 | CC1;
2042 case GEU: return CC0 | CC2;
2043 default: return -1;
2044 }
2045 break;
2046
2047 case E_CCURmode:
2048 switch (GET_CODE (code))
2049 {
2050 case EQ: return CC0;
2051 case NE: return CC2 | CC1 | CC3;
2052 case LTU: return CC2;
2053 case GTU: return CC1;
2054 case LEU: return CC0 | CC2;
2055 case GEU: return CC0 | CC1;
2056 default: return -1;
2057 }
2058 break;
2059
2060 case E_CCAPmode:
2061 switch (GET_CODE (code))
2062 {
2063 case EQ: return CC0;
2064 case NE: return CC1 | CC2 | CC3;
2065 case LT: return CC1 | CC3;
2066 case GT: return CC2;
2067 case LE: return CC0 | CC1 | CC3;
2068 case GE: return CC0 | CC2;
2069 default: return -1;
2070 }
2071 break;
2072
2073 case E_CCANmode:
2074 switch (GET_CODE (code))
2075 {
2076 case EQ: return CC0;
2077 case NE: return CC1 | CC2 | CC3;
2078 case LT: return CC1;
2079 case GT: return CC2 | CC3;
2080 case LE: return CC0 | CC1;
2081 case GE: return CC0 | CC2 | CC3;
2082 default: return -1;
2083 }
2084 break;
2085
2086 case E_CCOmode:
2087 switch (GET_CODE (code))
2088 {
2089 case EQ: return CC0 | CC1 | CC2;
2090 case NE: return CC3;
2091 default: return -1;
2092 }
2093 break;
2094
2095 case E_CCSmode:
2096 case E_CCSFPSmode:
2097 switch (GET_CODE (code))
2098 {
2099 case EQ: return CC0;
2100 case NE: return CC1 | CC2 | CC3;
2101 case LT: return CC1;
2102 case GT: return CC2;
2103 case LE: return CC0 | CC1;
2104 case GE: return CC0 | CC2;
2105 case UNORDERED: return CC3;
2106 case ORDERED: return CC0 | CC1 | CC2;
2107 case UNEQ: return CC0 | CC3;
2108 case UNLT: return CC1 | CC3;
2109 case UNGT: return CC2 | CC3;
2110 case UNLE: return CC0 | CC1 | CC3;
2111 case UNGE: return CC0 | CC2 | CC3;
2112 case LTGT: return CC1 | CC2;
2113 default: return -1;
2114 }
2115 break;
2116
2117 case E_CCSRmode:
2118 switch (GET_CODE (code))
2119 {
2120 case EQ: return CC0;
2121 case NE: return CC2 | CC1 | CC3;
2122 case LT: return CC2;
2123 case GT: return CC1;
2124 case LE: return CC0 | CC2;
2125 case GE: return CC0 | CC1;
2126 case UNORDERED: return CC3;
2127 case ORDERED: return CC0 | CC2 | CC1;
2128 case UNEQ: return CC0 | CC3;
2129 case UNLT: return CC2 | CC3;
2130 case UNGT: return CC1 | CC3;
2131 case UNLE: return CC0 | CC2 | CC3;
2132 case UNGE: return CC0 | CC1 | CC3;
2133 case LTGT: return CC2 | CC1;
2134 default: return -1;
2135 }
2136 break;
2137
2138 /* Vector comparison modes. */
2139 /* CC2 will never be set. It however is part of the negated
2140 masks. */
2141 case E_CCVIALLmode:
2142 switch (GET_CODE (code))
2143 {
2144 case EQ:
2145 case GTU:
2146 case GT:
2147 case GE: return CC0;
2148 /* The inverted modes are in fact *any* modes. */
2149 case NE:
2150 case LEU:
2151 case LE:
2152 case LT: return CC3 | CC1 | CC2;
2153 default: return -1;
2154 }
2155
2156 case E_CCVIANYmode:
2157 switch (GET_CODE (code))
2158 {
2159 case EQ:
2160 case GTU:
2161 case GT:
2162 case GE: return CC0 | CC1;
2163 /* The inverted modes are in fact *all* modes. */
2164 case NE:
2165 case LEU:
2166 case LE:
2167 case LT: return CC3 | CC2;
2168 default: return -1;
2169 }
2170 case E_CCVFALLmode:
2171 switch (GET_CODE (code))
2172 {
2173 case EQ:
2174 case GT:
2175 case GE: return CC0;
2176 /* The inverted modes are in fact *any* modes. */
2177 case NE:
2178 case UNLE:
2179 case UNLT: return CC3 | CC1 | CC2;
2180 default: return -1;
2181 }
2182
2183 case E_CCVFANYmode:
2184 switch (GET_CODE (code))
2185 {
2186 case EQ:
2187 case GT:
2188 case GE: return CC0 | CC1;
2189 /* The inverted modes are in fact *all* modes. */
2190 case NE:
2191 case UNLE:
2192 case UNLT: return CC3 | CC2;
2193 default: return -1;
2194 }
2195
2196 case E_CCRAWmode:
2197 switch (GET_CODE (code))
2198 {
2199 case EQ:
2200 return INTVAL (XEXP (code, 1));
2201 case NE:
2202 return (INTVAL (XEXP (code, 1))) ^ 0xf;
2203 default:
2204 gcc_unreachable ();
2205 }
2206
2207 default:
2208 return -1;
2209 }
2210 }
2211
2212
2213 /* Return branch condition mask to implement a compare and branch
2214 specified by CODE. Return -1 for invalid comparisons. */
2215
2216 int
s390_compare_and_branch_condition_mask(rtx code)2217 s390_compare_and_branch_condition_mask (rtx code)
2218 {
2219 const int CC0 = 1 << 3;
2220 const int CC1 = 1 << 2;
2221 const int CC2 = 1 << 1;
2222
2223 switch (GET_CODE (code))
2224 {
2225 case EQ:
2226 return CC0;
2227 case NE:
2228 return CC1 | CC2;
2229 case LT:
2230 case LTU:
2231 return CC1;
2232 case GT:
2233 case GTU:
2234 return CC2;
2235 case LE:
2236 case LEU:
2237 return CC0 | CC1;
2238 case GE:
2239 case GEU:
2240 return CC0 | CC2;
2241 default:
2242 gcc_unreachable ();
2243 }
2244 return -1;
2245 }
2246
2247 /* If INV is false, return assembler mnemonic string to implement
2248 a branch specified by CODE. If INV is true, return mnemonic
2249 for the corresponding inverted branch. */
2250
2251 static const char *
s390_branch_condition_mnemonic(rtx code,int inv)2252 s390_branch_condition_mnemonic (rtx code, int inv)
2253 {
2254 int mask;
2255
2256 static const char *const mnemonic[16] =
2257 {
2258 NULL, "o", "h", "nle",
2259 "l", "nhe", "lh", "ne",
2260 "e", "nlh", "he", "nl",
2261 "le", "nh", "no", NULL
2262 };
2263
2264 if (GET_CODE (XEXP (code, 0)) == REG
2265 && REGNO (XEXP (code, 0)) == CC_REGNUM
2266 && (XEXP (code, 1) == const0_rtx
2267 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
2268 && CONST_INT_P (XEXP (code, 1)))))
2269 mask = s390_branch_condition_mask (code);
2270 else
2271 mask = s390_compare_and_branch_condition_mask (code);
2272
2273 gcc_assert (mask >= 0);
2274
2275 if (inv)
2276 mask ^= 15;
2277
2278 gcc_assert (mask >= 1 && mask <= 14);
2279
2280 return mnemonic[mask];
2281 }
2282
2283 /* Return the part of op which has a value different from def.
2284 The size of the part is determined by mode.
2285 Use this function only if you already know that op really
2286 contains such a part. */
2287
2288 unsigned HOST_WIDE_INT
s390_extract_part(rtx op,machine_mode mode,int def)2289 s390_extract_part (rtx op, machine_mode mode, int def)
2290 {
2291 unsigned HOST_WIDE_INT value = 0;
2292 int max_parts = HOST_BITS_PER_WIDE_INT / GET_MODE_BITSIZE (mode);
2293 int part_bits = GET_MODE_BITSIZE (mode);
2294 unsigned HOST_WIDE_INT part_mask = (HOST_WIDE_INT_1U << part_bits) - 1;
2295 int i;
2296
2297 for (i = 0; i < max_parts; i++)
2298 {
2299 if (i == 0)
2300 value = UINTVAL (op);
2301 else
2302 value >>= part_bits;
2303
2304 if ((value & part_mask) != (def & part_mask))
2305 return value & part_mask;
2306 }
2307
2308 gcc_unreachable ();
2309 }
2310
2311 /* If OP is an integer constant of mode MODE with exactly one
2312 part of mode PART_MODE unequal to DEF, return the number of that
2313 part. Otherwise, return -1. */
2314
2315 int
s390_single_part(rtx op,machine_mode mode,machine_mode part_mode,int def)2316 s390_single_part (rtx op,
2317 machine_mode mode,
2318 machine_mode part_mode,
2319 int def)
2320 {
2321 unsigned HOST_WIDE_INT value = 0;
2322 int n_parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (part_mode);
2323 unsigned HOST_WIDE_INT part_mask
2324 = (HOST_WIDE_INT_1U << GET_MODE_BITSIZE (part_mode)) - 1;
2325 int i, part = -1;
2326
2327 if (GET_CODE (op) != CONST_INT)
2328 return -1;
2329
2330 for (i = 0; i < n_parts; i++)
2331 {
2332 if (i == 0)
2333 value = UINTVAL (op);
2334 else
2335 value >>= GET_MODE_BITSIZE (part_mode);
2336
2337 if ((value & part_mask) != (def & part_mask))
2338 {
2339 if (part != -1)
2340 return -1;
2341 else
2342 part = i;
2343 }
2344 }
2345 return part == -1 ? -1 : n_parts - 1 - part;
2346 }
2347
2348 /* Return true if IN contains a contiguous bitfield in the lower SIZE
2349 bits and no other bits are set in (the lower SIZE bits of) IN.
2350
2351 PSTART and PEND can be used to obtain the start and end
2352 position (inclusive) of the bitfield relative to 64
2353 bits. *PSTART / *PEND gives the position of the first/last bit
2354 of the bitfield counting from the highest order bit starting
2355 with zero. */
2356
2357 bool
s390_contiguous_bitmask_nowrap_p(unsigned HOST_WIDE_INT in,int size,int * pstart,int * pend)2358 s390_contiguous_bitmask_nowrap_p (unsigned HOST_WIDE_INT in, int size,
2359 int *pstart, int *pend)
2360 {
2361 int start;
2362 int end = -1;
2363 int lowbit = HOST_BITS_PER_WIDE_INT - 1;
2364 int highbit = HOST_BITS_PER_WIDE_INT - size;
2365 unsigned HOST_WIDE_INT bitmask = HOST_WIDE_INT_1U;
2366
2367 gcc_assert (!!pstart == !!pend);
2368 for (start = lowbit; start >= highbit; bitmask <<= 1, start--)
2369 if (end == -1)
2370 {
2371 /* Look for the rightmost bit of a contiguous range of ones. */
2372 if (bitmask & in)
2373 /* Found it. */
2374 end = start;
2375 }
2376 else
2377 {
2378 /* Look for the firt zero bit after the range of ones. */
2379 if (! (bitmask & in))
2380 /* Found it. */
2381 break;
2382 }
2383 /* We're one past the last one-bit. */
2384 start++;
2385
2386 if (end == -1)
2387 /* No one bits found. */
2388 return false;
2389
2390 if (start > highbit)
2391 {
2392 unsigned HOST_WIDE_INT mask;
2393
2394 /* Calculate a mask for all bits beyond the contiguous bits. */
2395 mask = ((~HOST_WIDE_INT_0U >> highbit)
2396 & (~HOST_WIDE_INT_0U << (lowbit - start + 1)));
2397 if (mask & in)
2398 /* There are more bits set beyond the first range of one bits. */
2399 return false;
2400 }
2401
2402 if (pstart)
2403 {
2404 *pstart = start;
2405 *pend = end;
2406 }
2407
2408 return true;
2409 }
2410
2411 /* Same as s390_contiguous_bitmask_nowrap_p but also returns true
2412 if ~IN contains a contiguous bitfield. In that case, *END is <
2413 *START.
2414
2415 If WRAP_P is true, a bitmask that wraps around is also tested.
2416 When a wraparoud occurs *START is greater than *END (in
2417 non-null pointers), and the uppermost (64 - SIZE) bits are thus
2418 part of the range. If WRAP_P is false, no wraparound is
2419 tested. */
2420
2421 bool
s390_contiguous_bitmask_p(unsigned HOST_WIDE_INT in,bool wrap_p,int size,int * start,int * end)2422 s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, bool wrap_p,
2423 int size, int *start, int *end)
2424 {
2425 int bs = HOST_BITS_PER_WIDE_INT;
2426 bool b;
2427
2428 gcc_assert (!!start == !!end);
2429 if ((in & ((~HOST_WIDE_INT_0U) >> (bs - size))) == 0)
2430 /* This cannot be expressed as a contiguous bitmask. Exit early because
2431 the second call of s390_contiguous_bitmask_nowrap_p would accept this as
2432 a valid bitmask. */
2433 return false;
2434 b = s390_contiguous_bitmask_nowrap_p (in, size, start, end);
2435 if (b)
2436 return true;
2437 if (! wrap_p)
2438 return false;
2439 b = s390_contiguous_bitmask_nowrap_p (~in, size, start, end);
2440 if (b && start)
2441 {
2442 int s = *start;
2443 int e = *end;
2444
2445 gcc_assert (s >= 1);
2446 *start = ((e + 1) & (bs - 1));
2447 *end = ((s - 1 + bs) & (bs - 1));
2448 }
2449
2450 return b;
2451 }
2452
2453 /* Return true if OP contains the same contiguous bitfield in *all*
2454 its elements. START and END can be used to obtain the start and
2455 end position of the bitfield.
2456
2457 START/STOP give the position of the first/last bit of the bitfield
2458 counting from the lowest order bit starting with zero. In order to
2459 use these values for S/390 instructions this has to be converted to
2460 "bits big endian" style. */
2461
2462 bool
s390_contiguous_bitmask_vector_p(rtx op,int * start,int * end)2463 s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end)
2464 {
2465 unsigned HOST_WIDE_INT mask;
2466 int size;
2467 rtx elt;
2468 bool b;
2469
2470 gcc_assert (!!start == !!end);
2471 if (!const_vec_duplicate_p (op, &elt)
2472 || !CONST_INT_P (elt))
2473 return false;
2474
2475 size = GET_MODE_UNIT_BITSIZE (GET_MODE (op));
2476
2477 /* We cannot deal with V1TI/V1TF. This would require a vgmq. */
2478 if (size > 64)
2479 return false;
2480
2481 mask = UINTVAL (elt);
2482
2483 b = s390_contiguous_bitmask_p (mask, true, size, start, end);
2484 if (b)
2485 {
2486 if (start)
2487 {
2488 *start -= (HOST_BITS_PER_WIDE_INT - size);
2489 *end -= (HOST_BITS_PER_WIDE_INT - size);
2490 }
2491 return true;
2492 }
2493 else
2494 return false;
2495 }
2496
2497 /* Return true if C consists only of byte chunks being either 0 or
2498 0xff. If MASK is !=NULL a byte mask is generated which is
2499 appropriate for the vector generate byte mask instruction. */
2500
2501 bool
s390_bytemask_vector_p(rtx op,unsigned * mask)2502 s390_bytemask_vector_p (rtx op, unsigned *mask)
2503 {
2504 int i;
2505 unsigned tmp_mask = 0;
2506 int nunit, unit_size;
2507
2508 if (!VECTOR_MODE_P (GET_MODE (op))
2509 || GET_CODE (op) != CONST_VECTOR
2510 || !CONST_INT_P (XVECEXP (op, 0, 0)))
2511 return false;
2512
2513 nunit = GET_MODE_NUNITS (GET_MODE (op));
2514 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (op));
2515
2516 for (i = 0; i < nunit; i++)
2517 {
2518 unsigned HOST_WIDE_INT c;
2519 int j;
2520
2521 if (!CONST_INT_P (XVECEXP (op, 0, i)))
2522 return false;
2523
2524 c = UINTVAL (XVECEXP (op, 0, i));
2525 for (j = 0; j < unit_size; j++)
2526 {
2527 if ((c & 0xff) != 0 && (c & 0xff) != 0xff)
2528 return false;
2529 tmp_mask |= (c & 1) << ((nunit - 1 - i) * unit_size + j);
2530 c = c >> BITS_PER_UNIT;
2531 }
2532 }
2533
2534 if (mask != NULL)
2535 *mask = tmp_mask;
2536
2537 return true;
2538 }
2539
2540 /* Check whether a rotate of ROTL followed by an AND of CONTIG is
2541 equivalent to a shift followed by the AND. In particular, CONTIG
2542 should not overlap the (rotated) bit 0/bit 63 gap. Negative values
2543 for ROTL indicate a rotate to the right. */
2544
2545 bool
s390_extzv_shift_ok(int bitsize,int rotl,unsigned HOST_WIDE_INT contig)2546 s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig)
2547 {
2548 int start, end;
2549 bool ok;
2550
2551 ok = s390_contiguous_bitmask_nowrap_p (contig, bitsize, &start, &end);
2552 gcc_assert (ok);
2553
2554 if (rotl >= 0)
2555 return (64 - end >= rotl);
2556 else
2557 {
2558 /* Translate "- rotate right" in BITSIZE mode to "rotate left" in
2559 DIMode. */
2560 rotl = -rotl + (64 - bitsize);
2561 return (start >= rotl);
2562 }
2563 }
2564
2565 /* Check whether we can (and want to) split a double-word
2566 move in mode MODE from SRC to DST into two single-word
2567 moves, moving the subword FIRST_SUBWORD first. */
2568
2569 bool
s390_split_ok_p(rtx dst,rtx src,machine_mode mode,int first_subword)2570 s390_split_ok_p (rtx dst, rtx src, machine_mode mode, int first_subword)
2571 {
2572 /* Floating point and vector registers cannot be split. */
2573 if (FP_REG_P (src) || FP_REG_P (dst) || VECTOR_REG_P (src) || VECTOR_REG_P (dst))
2574 return false;
2575
2576 /* Non-offsettable memory references cannot be split. */
2577 if ((GET_CODE (src) == MEM && !offsettable_memref_p (src))
2578 || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst)))
2579 return false;
2580
2581 /* Moving the first subword must not clobber a register
2582 needed to move the second subword. */
2583 if (register_operand (dst, mode))
2584 {
2585 rtx subreg = operand_subword (dst, first_subword, 0, mode);
2586 if (reg_overlap_mentioned_p (subreg, src))
2587 return false;
2588 }
2589
2590 return true;
2591 }
2592
2593 /* Return true if it can be proven that [MEM1, MEM1 + SIZE]
2594 and [MEM2, MEM2 + SIZE] do overlap and false
2595 otherwise. */
2596
2597 bool
s390_overlap_p(rtx mem1,rtx mem2,HOST_WIDE_INT size)2598 s390_overlap_p (rtx mem1, rtx mem2, HOST_WIDE_INT size)
2599 {
2600 rtx addr1, addr2, addr_delta;
2601 HOST_WIDE_INT delta;
2602
2603 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2604 return true;
2605
2606 if (size == 0)
2607 return false;
2608
2609 addr1 = XEXP (mem1, 0);
2610 addr2 = XEXP (mem2, 0);
2611
2612 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2613
2614 /* This overlapping check is used by peepholes merging memory block operations.
2615 Overlapping operations would otherwise be recognized by the S/390 hardware
2616 and would fall back to a slower implementation. Allowing overlapping
2617 operations would lead to slow code but not to wrong code. Therefore we are
2618 somewhat optimistic if we cannot prove that the memory blocks are
2619 overlapping.
2620 That's why we return false here although this may accept operations on
2621 overlapping memory areas. */
2622 if (!addr_delta || GET_CODE (addr_delta) != CONST_INT)
2623 return false;
2624
2625 delta = INTVAL (addr_delta);
2626
2627 if (delta == 0
2628 || (delta > 0 && delta < size)
2629 || (delta < 0 && -delta < size))
2630 return true;
2631
2632 return false;
2633 }
2634
2635 /* Check whether the address of memory reference MEM2 equals exactly
2636 the address of memory reference MEM1 plus DELTA. Return true if
2637 we can prove this to be the case, false otherwise. */
2638
2639 bool
s390_offset_p(rtx mem1,rtx mem2,rtx delta)2640 s390_offset_p (rtx mem1, rtx mem2, rtx delta)
2641 {
2642 rtx addr1, addr2, addr_delta;
2643
2644 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2645 return false;
2646
2647 addr1 = XEXP (mem1, 0);
2648 addr2 = XEXP (mem2, 0);
2649
2650 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2651 if (!addr_delta || !rtx_equal_p (addr_delta, delta))
2652 return false;
2653
2654 return true;
2655 }
2656
2657 /* Expand logical operator CODE in mode MODE with operands OPERANDS. */
2658
2659 void
s390_expand_logical_operator(enum rtx_code code,machine_mode mode,rtx * operands)2660 s390_expand_logical_operator (enum rtx_code code, machine_mode mode,
2661 rtx *operands)
2662 {
2663 machine_mode wmode = mode;
2664 rtx dst = operands[0];
2665 rtx src1 = operands[1];
2666 rtx src2 = operands[2];
2667 rtx op, clob, tem;
2668
2669 /* If we cannot handle the operation directly, use a temp register. */
2670 if (!s390_logical_operator_ok_p (operands))
2671 dst = gen_reg_rtx (mode);
2672
2673 /* QImode and HImode patterns make sense only if we have a destination
2674 in memory. Otherwise perform the operation in SImode. */
2675 if ((mode == QImode || mode == HImode) && GET_CODE (dst) != MEM)
2676 wmode = SImode;
2677
2678 /* Widen operands if required. */
2679 if (mode != wmode)
2680 {
2681 if (GET_CODE (dst) == SUBREG
2682 && (tem = simplify_subreg (wmode, dst, mode, 0)) != 0)
2683 dst = tem;
2684 else if (REG_P (dst))
2685 dst = gen_rtx_SUBREG (wmode, dst, 0);
2686 else
2687 dst = gen_reg_rtx (wmode);
2688
2689 if (GET_CODE (src1) == SUBREG
2690 && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0)
2691 src1 = tem;
2692 else if (GET_MODE (src1) != VOIDmode)
2693 src1 = gen_rtx_SUBREG (wmode, force_reg (mode, src1), 0);
2694
2695 if (GET_CODE (src2) == SUBREG
2696 && (tem = simplify_subreg (wmode, src2, mode, 0)) != 0)
2697 src2 = tem;
2698 else if (GET_MODE (src2) != VOIDmode)
2699 src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0);
2700 }
2701
2702 /* Emit the instruction. */
2703 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, wmode, src1, src2));
2704 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
2705 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
2706
2707 /* Fix up the destination if needed. */
2708 if (dst != operands[0])
2709 emit_move_insn (operands[0], gen_lowpart (mode, dst));
2710 }
2711
2712 /* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR). */
2713
2714 bool
s390_logical_operator_ok_p(rtx * operands)2715 s390_logical_operator_ok_p (rtx *operands)
2716 {
2717 /* If the destination operand is in memory, it needs to coincide
2718 with one of the source operands. After reload, it has to be
2719 the first source operand. */
2720 if (GET_CODE (operands[0]) == MEM)
2721 return rtx_equal_p (operands[0], operands[1])
2722 || (!reload_completed && rtx_equal_p (operands[0], operands[2]));
2723
2724 return true;
2725 }
2726
2727 /* Narrow logical operation CODE of memory operand MEMOP with immediate
2728 operand IMMOP to switch from SS to SI type instructions. */
2729
2730 void
s390_narrow_logical_operator(enum rtx_code code,rtx * memop,rtx * immop)2731 s390_narrow_logical_operator (enum rtx_code code, rtx *memop, rtx *immop)
2732 {
2733 int def = code == AND ? -1 : 0;
2734 HOST_WIDE_INT mask;
2735 int part;
2736
2737 gcc_assert (GET_CODE (*memop) == MEM);
2738 gcc_assert (!MEM_VOLATILE_P (*memop));
2739
2740 mask = s390_extract_part (*immop, QImode, def);
2741 part = s390_single_part (*immop, GET_MODE (*memop), QImode, def);
2742 gcc_assert (part >= 0);
2743
2744 *memop = adjust_address (*memop, QImode, part);
2745 *immop = gen_int_mode (mask, QImode);
2746 }
2747
2748
2749 /* How to allocate a 'struct machine_function'. */
2750
2751 static struct machine_function *
s390_init_machine_status(void)2752 s390_init_machine_status (void)
2753 {
2754 return ggc_cleared_alloc<machine_function> ();
2755 }
2756
2757 /* Map for smallest class containing reg regno. */
2758
2759 const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
2760 { GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 0 */
2761 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 4 */
2762 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 8 */
2763 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 12 */
2764 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 16 */
2765 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 20 */
2766 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 24 */
2767 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 28 */
2768 ADDR_REGS, CC_REGS, ADDR_REGS, ADDR_REGS, /* 32 */
2769 ACCESS_REGS, ACCESS_REGS, VEC_REGS, VEC_REGS, /* 36 */
2770 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 40 */
2771 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 44 */
2772 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 48 */
2773 VEC_REGS, VEC_REGS /* 52 */
2774 };
2775
2776 /* Return attribute type of insn. */
2777
2778 static enum attr_type
s390_safe_attr_type(rtx_insn * insn)2779 s390_safe_attr_type (rtx_insn *insn)
2780 {
2781 if (recog_memoized (insn) >= 0)
2782 return get_attr_type (insn);
2783 else
2784 return TYPE_NONE;
2785 }
2786
2787 /* Return attribute relative_long of insn. */
2788
2789 static bool
s390_safe_relative_long_p(rtx_insn * insn)2790 s390_safe_relative_long_p (rtx_insn *insn)
2791 {
2792 if (recog_memoized (insn) >= 0)
2793 return get_attr_relative_long (insn) == RELATIVE_LONG_YES;
2794 else
2795 return false;
2796 }
2797
2798 /* Return true if DISP is a valid short displacement. */
2799
2800 static bool
s390_short_displacement(rtx disp)2801 s390_short_displacement (rtx disp)
2802 {
2803 /* No displacement is OK. */
2804 if (!disp)
2805 return true;
2806
2807 /* Without the long displacement facility we don't need to
2808 distingiush between long and short displacement. */
2809 if (!TARGET_LONG_DISPLACEMENT)
2810 return true;
2811
2812 /* Integer displacement in range. */
2813 if (GET_CODE (disp) == CONST_INT)
2814 return INTVAL (disp) >= 0 && INTVAL (disp) < 4096;
2815
2816 /* GOT offset is not OK, the GOT can be large. */
2817 if (GET_CODE (disp) == CONST
2818 && GET_CODE (XEXP (disp, 0)) == UNSPEC
2819 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOT
2820 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF))
2821 return false;
2822
2823 /* All other symbolic constants are literal pool references,
2824 which are OK as the literal pool must be small. */
2825 if (GET_CODE (disp) == CONST)
2826 return true;
2827
2828 return false;
2829 }
2830
2831 /* Attempts to split `ref', which should be UNSPEC_LTREF, into (base + `disp').
2832 If successful, also determines the
2833 following characteristics of `ref': `is_ptr' - whether it can be an
2834 LA argument, `is_base_ptr' - whether the resulting base is a well-known
2835 base register (stack/frame pointer, etc), `is_pool_ptr` - whether it is
2836 considered a literal pool pointer for purposes of avoiding two different
2837 literal pool pointers per insn during or after reload (`B' constraint). */
2838 static bool
s390_decompose_constant_pool_ref(rtx * ref,rtx * disp,bool * is_ptr,bool * is_base_ptr,bool * is_pool_ptr)2839 s390_decompose_constant_pool_ref (rtx *ref, rtx *disp, bool *is_ptr,
2840 bool *is_base_ptr, bool *is_pool_ptr)
2841 {
2842 if (!*ref)
2843 return true;
2844
2845 if (GET_CODE (*ref) == UNSPEC)
2846 switch (XINT (*ref, 1))
2847 {
2848 case UNSPEC_LTREF:
2849 if (!*disp)
2850 *disp = gen_rtx_UNSPEC (Pmode,
2851 gen_rtvec (1, XVECEXP (*ref, 0, 0)),
2852 UNSPEC_LTREL_OFFSET);
2853 else
2854 return false;
2855
2856 *ref = XVECEXP (*ref, 0, 1);
2857 break;
2858
2859 default:
2860 return false;
2861 }
2862
2863 if (!REG_P (*ref) || GET_MODE (*ref) != Pmode)
2864 return false;
2865
2866 if (REGNO (*ref) == STACK_POINTER_REGNUM
2867 || REGNO (*ref) == FRAME_POINTER_REGNUM
2868 || ((reload_completed || reload_in_progress)
2869 && frame_pointer_needed
2870 && REGNO (*ref) == HARD_FRAME_POINTER_REGNUM)
2871 || REGNO (*ref) == ARG_POINTER_REGNUM
2872 || (flag_pic
2873 && REGNO (*ref) == PIC_OFFSET_TABLE_REGNUM))
2874 *is_ptr = *is_base_ptr = true;
2875
2876 if ((reload_completed || reload_in_progress)
2877 && *ref == cfun->machine->base_reg)
2878 *is_ptr = *is_base_ptr = *is_pool_ptr = true;
2879
2880 return true;
2881 }
2882
2883 /* Decompose a RTL expression ADDR for a memory address into
2884 its components, returned in OUT.
2885
2886 Returns false if ADDR is not a valid memory address, true
2887 otherwise. If OUT is NULL, don't return the components,
2888 but check for validity only.
2889
2890 Note: Only addresses in canonical form are recognized.
2891 LEGITIMIZE_ADDRESS should convert non-canonical forms to the
2892 canonical form so that they will be recognized. */
2893
2894 static int
s390_decompose_address(rtx addr,struct s390_address * out)2895 s390_decompose_address (rtx addr, struct s390_address *out)
2896 {
2897 HOST_WIDE_INT offset = 0;
2898 rtx base = NULL_RTX;
2899 rtx indx = NULL_RTX;
2900 rtx disp = NULL_RTX;
2901 rtx orig_disp;
2902 bool pointer = false;
2903 bool base_ptr = false;
2904 bool indx_ptr = false;
2905 bool literal_pool = false;
2906
2907 /* We may need to substitute the literal pool base register into the address
2908 below. However, at this point we do not know which register is going to
2909 be used as base, so we substitute the arg pointer register. This is going
2910 to be treated as holding a pointer below -- it shouldn't be used for any
2911 other purpose. */
2912 rtx fake_pool_base = gen_rtx_REG (Pmode, ARG_POINTER_REGNUM);
2913
2914 /* Decompose address into base + index + displacement. */
2915
2916 if (GET_CODE (addr) == REG || GET_CODE (addr) == UNSPEC)
2917 base = addr;
2918
2919 else if (GET_CODE (addr) == PLUS)
2920 {
2921 rtx op0 = XEXP (addr, 0);
2922 rtx op1 = XEXP (addr, 1);
2923 enum rtx_code code0 = GET_CODE (op0);
2924 enum rtx_code code1 = GET_CODE (op1);
2925
2926 if (code0 == REG || code0 == UNSPEC)
2927 {
2928 if (code1 == REG || code1 == UNSPEC)
2929 {
2930 indx = op0; /* index + base */
2931 base = op1;
2932 }
2933
2934 else
2935 {
2936 base = op0; /* base + displacement */
2937 disp = op1;
2938 }
2939 }
2940
2941 else if (code0 == PLUS)
2942 {
2943 indx = XEXP (op0, 0); /* index + base + disp */
2944 base = XEXP (op0, 1);
2945 disp = op1;
2946 }
2947
2948 else
2949 {
2950 return false;
2951 }
2952 }
2953
2954 else
2955 disp = addr; /* displacement */
2956
2957 /* Extract integer part of displacement. */
2958 orig_disp = disp;
2959 if (disp)
2960 {
2961 if (GET_CODE (disp) == CONST_INT)
2962 {
2963 offset = INTVAL (disp);
2964 disp = NULL_RTX;
2965 }
2966 else if (GET_CODE (disp) == CONST
2967 && GET_CODE (XEXP (disp, 0)) == PLUS
2968 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
2969 {
2970 offset = INTVAL (XEXP (XEXP (disp, 0), 1));
2971 disp = XEXP (XEXP (disp, 0), 0);
2972 }
2973 }
2974
2975 /* Strip off CONST here to avoid special case tests later. */
2976 if (disp && GET_CODE (disp) == CONST)
2977 disp = XEXP (disp, 0);
2978
2979 /* We can convert literal pool addresses to
2980 displacements by basing them off the base register. */
2981 if (disp && GET_CODE (disp) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (disp))
2982 {
2983 if (base || indx)
2984 return false;
2985
2986 base = fake_pool_base, literal_pool = true;
2987
2988 /* Mark up the displacement. */
2989 disp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, disp),
2990 UNSPEC_LTREL_OFFSET);
2991 }
2992
2993 /* Validate base register. */
2994 if (!s390_decompose_constant_pool_ref (&base, &disp, &pointer, &base_ptr,
2995 &literal_pool))
2996 return false;
2997
2998 /* Validate index register. */
2999 if (!s390_decompose_constant_pool_ref (&indx, &disp, &pointer, &indx_ptr,
3000 &literal_pool))
3001 return false;
3002
3003 /* Prefer to use pointer as base, not index. */
3004 if (base && indx && !base_ptr
3005 && (indx_ptr || (!REG_POINTER (base) && REG_POINTER (indx))))
3006 {
3007 rtx tmp = base;
3008 base = indx;
3009 indx = tmp;
3010 }
3011
3012 /* Validate displacement. */
3013 if (!disp)
3014 {
3015 /* If virtual registers are involved, the displacement will change later
3016 anyway as the virtual registers get eliminated. This could make a
3017 valid displacement invalid, but it is more likely to make an invalid
3018 displacement valid, because we sometimes access the register save area
3019 via negative offsets to one of those registers.
3020 Thus we don't check the displacement for validity here. If after
3021 elimination the displacement turns out to be invalid after all,
3022 this is fixed up by reload in any case. */
3023 /* LRA maintains always displacements up to date and we need to
3024 know the displacement is right during all LRA not only at the
3025 final elimination. */
3026 if (lra_in_progress
3027 || (base != arg_pointer_rtx
3028 && indx != arg_pointer_rtx
3029 && base != return_address_pointer_rtx
3030 && indx != return_address_pointer_rtx
3031 && base != frame_pointer_rtx
3032 && indx != frame_pointer_rtx
3033 && base != virtual_stack_vars_rtx
3034 && indx != virtual_stack_vars_rtx))
3035 if (!DISP_IN_RANGE (offset))
3036 return false;
3037 }
3038 else
3039 {
3040 /* All the special cases are pointers. */
3041 pointer = true;
3042
3043 /* In the small-PIC case, the linker converts @GOT
3044 and @GOTNTPOFF offsets to possible displacements. */
3045 if (GET_CODE (disp) == UNSPEC
3046 && (XINT (disp, 1) == UNSPEC_GOT
3047 || XINT (disp, 1) == UNSPEC_GOTNTPOFF)
3048 && flag_pic == 1)
3049 {
3050 ;
3051 }
3052
3053 /* Accept pool label offsets. */
3054 else if (GET_CODE (disp) == UNSPEC
3055 && XINT (disp, 1) == UNSPEC_POOL_OFFSET)
3056 ;
3057
3058 /* Accept literal pool references. */
3059 else if (GET_CODE (disp) == UNSPEC
3060 && XINT (disp, 1) == UNSPEC_LTREL_OFFSET)
3061 {
3062 /* In case CSE pulled a non literal pool reference out of
3063 the pool we have to reject the address. This is
3064 especially important when loading the GOT pointer on non
3065 zarch CPUs. In this case the literal pool contains an lt
3066 relative offset to the _GLOBAL_OFFSET_TABLE_ label which
3067 will most likely exceed the displacement. */
3068 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
3069 || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp, 0, 0)))
3070 return false;
3071
3072 orig_disp = gen_rtx_CONST (Pmode, disp);
3073 if (offset)
3074 {
3075 /* If we have an offset, make sure it does not
3076 exceed the size of the constant pool entry.
3077 Otherwise we might generate an out-of-range
3078 displacement for the base register form. */
3079 rtx sym = XVECEXP (disp, 0, 0);
3080 if (offset >= GET_MODE_SIZE (get_pool_mode (sym)))
3081 return false;
3082
3083 orig_disp = plus_constant (Pmode, orig_disp, offset);
3084 }
3085 }
3086
3087 else
3088 return false;
3089 }
3090
3091 if (!base && !indx)
3092 pointer = true;
3093
3094 if (out)
3095 {
3096 out->base = base;
3097 out->indx = indx;
3098 out->disp = orig_disp;
3099 out->pointer = pointer;
3100 out->literal_pool = literal_pool;
3101 }
3102
3103 return true;
3104 }
3105
3106 /* Decompose a RTL expression OP for an address style operand into its
3107 components, and return the base register in BASE and the offset in
3108 OFFSET. While OP looks like an address it is never supposed to be
3109 used as such.
3110
3111 Return true if OP is a valid address operand, false if not. */
3112
3113 bool
s390_decompose_addrstyle_without_index(rtx op,rtx * base,HOST_WIDE_INT * offset)3114 s390_decompose_addrstyle_without_index (rtx op, rtx *base,
3115 HOST_WIDE_INT *offset)
3116 {
3117 rtx off = NULL_RTX;
3118
3119 /* We can have an integer constant, an address register,
3120 or a sum of the two. */
3121 if (CONST_SCALAR_INT_P (op))
3122 {
3123 off = op;
3124 op = NULL_RTX;
3125 }
3126 if (op && GET_CODE (op) == PLUS && CONST_SCALAR_INT_P (XEXP (op, 1)))
3127 {
3128 off = XEXP (op, 1);
3129 op = XEXP (op, 0);
3130 }
3131 while (op && GET_CODE (op) == SUBREG)
3132 op = SUBREG_REG (op);
3133
3134 if (op && GET_CODE (op) != REG)
3135 return false;
3136
3137 if (offset)
3138 {
3139 if (off == NULL_RTX)
3140 *offset = 0;
3141 else if (CONST_INT_P (off))
3142 *offset = INTVAL (off);
3143 else if (CONST_WIDE_INT_P (off))
3144 /* The offset will anyway be cut down to 12 bits so take just
3145 the lowest order chunk of the wide int. */
3146 *offset = CONST_WIDE_INT_ELT (off, 0);
3147 else
3148 gcc_unreachable ();
3149 }
3150 if (base)
3151 *base = op;
3152
3153 return true;
3154 }
3155
3156 /* Check that OP is a valid shift count operand.
3157 It should be of the following structure:
3158 (subreg (and (plus (reg imm_op)) 2^k-1) 7)
3159 where subreg, and and plus are optional.
3160
3161 If IMPLICIT_MASK is > 0 and OP contains and
3162 (AND ... immediate)
3163 it is checked whether IMPLICIT_MASK and the immediate match.
3164 Otherwise, no checking is performed.
3165 */
3166 bool
s390_valid_shift_count(rtx op,HOST_WIDE_INT implicit_mask)3167 s390_valid_shift_count (rtx op, HOST_WIDE_INT implicit_mask)
3168 {
3169 /* Strip subreg. */
3170 while (GET_CODE (op) == SUBREG && subreg_lowpart_p (op))
3171 op = XEXP (op, 0);
3172
3173 /* Check for an and with proper constant. */
3174 if (GET_CODE (op) == AND)
3175 {
3176 rtx op1 = XEXP (op, 0);
3177 rtx imm = XEXP (op, 1);
3178
3179 if (GET_CODE (op1) == SUBREG && subreg_lowpart_p (op1))
3180 op1 = XEXP (op1, 0);
3181
3182 if (!(register_operand (op1, GET_MODE (op1)) || GET_CODE (op1) == PLUS))
3183 return false;
3184
3185 if (!immediate_operand (imm, GET_MODE (imm)))
3186 return false;
3187
3188 HOST_WIDE_INT val = INTVAL (imm);
3189 if (implicit_mask > 0
3190 && (val & implicit_mask) != implicit_mask)
3191 return false;
3192
3193 op = op1;
3194 }
3195
3196 /* Check the rest. */
3197 return s390_decompose_addrstyle_without_index (op, NULL, NULL);
3198 }
3199
3200 /* Return true if CODE is a valid address without index. */
3201
3202 bool
s390_legitimate_address_without_index_p(rtx op)3203 s390_legitimate_address_without_index_p (rtx op)
3204 {
3205 struct s390_address addr;
3206
3207 if (!s390_decompose_address (XEXP (op, 0), &addr))
3208 return false;
3209 if (addr.indx)
3210 return false;
3211
3212 return true;
3213 }
3214
3215
3216 /* Return TRUE if ADDR is an operand valid for a load/store relative
3217 instruction. Be aware that the alignment of the operand needs to
3218 be checked separately.
3219 Valid addresses are single references or a sum of a reference and a
3220 constant integer. Return these parts in SYMREF and ADDEND. You can
3221 pass NULL in REF and/or ADDEND if you are not interested in these
3222 values. */
3223
3224 static bool
s390_loadrelative_operand_p(rtx addr,rtx * symref,HOST_WIDE_INT * addend)3225 s390_loadrelative_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
3226 {
3227 HOST_WIDE_INT tmpaddend = 0;
3228
3229 if (GET_CODE (addr) == CONST)
3230 addr = XEXP (addr, 0);
3231
3232 if (GET_CODE (addr) == PLUS)
3233 {
3234 if (!CONST_INT_P (XEXP (addr, 1)))
3235 return false;
3236
3237 tmpaddend = INTVAL (XEXP (addr, 1));
3238 addr = XEXP (addr, 0);
3239 }
3240
3241 if (GET_CODE (addr) == SYMBOL_REF
3242 || (GET_CODE (addr) == UNSPEC
3243 && (XINT (addr, 1) == UNSPEC_GOTENT
3244 || XINT (addr, 1) == UNSPEC_PLT)))
3245 {
3246 if (symref)
3247 *symref = addr;
3248 if (addend)
3249 *addend = tmpaddend;
3250
3251 return true;
3252 }
3253 return false;
3254 }
3255
3256 /* Return true if the address in OP is valid for constraint letter C
3257 if wrapped in a MEM rtx. Set LIT_POOL_OK to true if it literal
3258 pool MEMs should be accepted. Only the Q, R, S, T constraint
3259 letters are allowed for C. */
3260
3261 static int
s390_check_qrst_address(char c,rtx op,bool lit_pool_ok)3262 s390_check_qrst_address (char c, rtx op, bool lit_pool_ok)
3263 {
3264 rtx symref;
3265 struct s390_address addr;
3266 bool decomposed = false;
3267
3268 if (!address_operand (op, GET_MODE (op)))
3269 return 0;
3270
3271 /* This check makes sure that no symbolic address (except literal
3272 pool references) are accepted by the R or T constraints. */
3273 if (s390_loadrelative_operand_p (op, &symref, NULL)
3274 && (!lit_pool_ok
3275 || !SYMBOL_REF_P (symref)
3276 || !CONSTANT_POOL_ADDRESS_P (symref)))
3277 return 0;
3278
3279 /* Ensure literal pool references are only accepted if LIT_POOL_OK. */
3280 if (!lit_pool_ok)
3281 {
3282 if (!s390_decompose_address (op, &addr))
3283 return 0;
3284 if (addr.literal_pool)
3285 return 0;
3286 decomposed = true;
3287 }
3288
3289 /* With reload, we sometimes get intermediate address forms that are
3290 actually invalid as-is, but we need to accept them in the most
3291 generic cases below ('R' or 'T'), since reload will in fact fix
3292 them up. LRA behaves differently here; we never see such forms,
3293 but on the other hand, we need to strictly reject every invalid
3294 address form. After both reload and LRA invalid address forms
3295 must be rejected, because nothing will fix them up later. Perform
3296 this check right up front. */
3297 if (lra_in_progress || reload_completed)
3298 {
3299 if (!decomposed && !s390_decompose_address (op, &addr))
3300 return 0;
3301 decomposed = true;
3302 }
3303
3304 switch (c)
3305 {
3306 case 'Q': /* no index short displacement */
3307 if (!decomposed && !s390_decompose_address (op, &addr))
3308 return 0;
3309 if (addr.indx)
3310 return 0;
3311 if (!s390_short_displacement (addr.disp))
3312 return 0;
3313 break;
3314
3315 case 'R': /* with index short displacement */
3316 if (TARGET_LONG_DISPLACEMENT)
3317 {
3318 if (!decomposed && !s390_decompose_address (op, &addr))
3319 return 0;
3320 if (!s390_short_displacement (addr.disp))
3321 return 0;
3322 }
3323 /* Any invalid address here will be fixed up by reload,
3324 so accept it for the most generic constraint. */
3325 break;
3326
3327 case 'S': /* no index long displacement */
3328 if (!decomposed && !s390_decompose_address (op, &addr))
3329 return 0;
3330 if (addr.indx)
3331 return 0;
3332 break;
3333
3334 case 'T': /* with index long displacement */
3335 /* Any invalid address here will be fixed up by reload,
3336 so accept it for the most generic constraint. */
3337 break;
3338
3339 default:
3340 return 0;
3341 }
3342 return 1;
3343 }
3344
3345
3346 /* Evaluates constraint strings described by the regular expression
3347 ([A|B|Z](Q|R|S|T))|Y and returns 1 if OP is a valid operand for
3348 the constraint given in STR, or 0 else. */
3349
3350 int
s390_mem_constraint(const char * str,rtx op)3351 s390_mem_constraint (const char *str, rtx op)
3352 {
3353 char c = str[0];
3354
3355 switch (c)
3356 {
3357 case 'A':
3358 /* Check for offsettable variants of memory constraints. */
3359 if (!MEM_P (op) || MEM_VOLATILE_P (op))
3360 return 0;
3361 if ((reload_completed || reload_in_progress)
3362 ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op))
3363 return 0;
3364 return s390_check_qrst_address (str[1], XEXP (op, 0), true);
3365 case 'B':
3366 /* Check for non-literal-pool variants of memory constraints. */
3367 if (!MEM_P (op))
3368 return 0;
3369 return s390_check_qrst_address (str[1], XEXP (op, 0), false);
3370 case 'Q':
3371 case 'R':
3372 case 'S':
3373 case 'T':
3374 if (GET_CODE (op) != MEM)
3375 return 0;
3376 return s390_check_qrst_address (c, XEXP (op, 0), true);
3377 case 'Y':
3378 /* Simply check for the basic form of a shift count. Reload will
3379 take care of making sure we have a proper base register. */
3380 if (!s390_decompose_addrstyle_without_index (op, NULL, NULL))
3381 return 0;
3382 break;
3383 case 'Z':
3384 return s390_check_qrst_address (str[1], op, true);
3385 default:
3386 return 0;
3387 }
3388 return 1;
3389 }
3390
3391
3392 /* Evaluates constraint strings starting with letter O. Input
3393 parameter C is the second letter following the "O" in the constraint
3394 string. Returns 1 if VALUE meets the respective constraint and 0
3395 otherwise. */
3396
3397 int
s390_O_constraint_str(const char c,HOST_WIDE_INT value)3398 s390_O_constraint_str (const char c, HOST_WIDE_INT value)
3399 {
3400 if (!TARGET_EXTIMM)
3401 return 0;
3402
3403 switch (c)
3404 {
3405 case 's':
3406 return trunc_int_for_mode (value, SImode) == value;
3407
3408 case 'p':
3409 return value == 0
3410 || s390_single_part (GEN_INT (value), DImode, SImode, 0) == 1;
3411
3412 case 'n':
3413 return s390_single_part (GEN_INT (value - 1), DImode, SImode, -1) == 1;
3414
3415 default:
3416 gcc_unreachable ();
3417 }
3418 }
3419
3420
3421 /* Evaluates constraint strings starting with letter N. Parameter STR
3422 contains the letters following letter "N" in the constraint string.
3423 Returns true if VALUE matches the constraint. */
3424
3425 int
s390_N_constraint_str(const char * str,HOST_WIDE_INT value)3426 s390_N_constraint_str (const char *str, HOST_WIDE_INT value)
3427 {
3428 machine_mode mode, part_mode;
3429 int def;
3430 int part, part_goal;
3431
3432
3433 if (str[0] == 'x')
3434 part_goal = -1;
3435 else
3436 part_goal = str[0] - '0';
3437
3438 switch (str[1])
3439 {
3440 case 'Q':
3441 part_mode = QImode;
3442 break;
3443 case 'H':
3444 part_mode = HImode;
3445 break;
3446 case 'S':
3447 part_mode = SImode;
3448 break;
3449 default:
3450 return 0;
3451 }
3452
3453 switch (str[2])
3454 {
3455 case 'H':
3456 mode = HImode;
3457 break;
3458 case 'S':
3459 mode = SImode;
3460 break;
3461 case 'D':
3462 mode = DImode;
3463 break;
3464 default:
3465 return 0;
3466 }
3467
3468 switch (str[3])
3469 {
3470 case '0':
3471 def = 0;
3472 break;
3473 case 'F':
3474 def = -1;
3475 break;
3476 default:
3477 return 0;
3478 }
3479
3480 if (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (part_mode))
3481 return 0;
3482
3483 part = s390_single_part (GEN_INT (value), mode, part_mode, def);
3484 if (part < 0)
3485 return 0;
3486 if (part_goal != -1 && part_goal != part)
3487 return 0;
3488
3489 return 1;
3490 }
3491
3492
3493 /* Returns true if the input parameter VALUE is a float zero. */
3494
3495 int
s390_float_const_zero_p(rtx value)3496 s390_float_const_zero_p (rtx value)
3497 {
3498 return (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT
3499 && value == CONST0_RTX (GET_MODE (value)));
3500 }
3501
3502 /* Implement TARGET_REGISTER_MOVE_COST. */
3503
3504 static int
s390_register_move_cost(machine_mode mode,reg_class_t from,reg_class_t to)3505 s390_register_move_cost (machine_mode mode,
3506 reg_class_t from, reg_class_t to)
3507 {
3508 /* On s390, copy between fprs and gprs is expensive. */
3509
3510 /* It becomes somewhat faster having ldgr/lgdr. */
3511 if (TARGET_Z10 && GET_MODE_SIZE (mode) == 8)
3512 {
3513 /* ldgr is single cycle. */
3514 if (reg_classes_intersect_p (from, GENERAL_REGS)
3515 && reg_classes_intersect_p (to, FP_REGS))
3516 return 1;
3517 /* lgdr needs 3 cycles. */
3518 if (reg_classes_intersect_p (to, GENERAL_REGS)
3519 && reg_classes_intersect_p (from, FP_REGS))
3520 return 3;
3521 }
3522
3523 /* Otherwise copying is done via memory. */
3524 if ((reg_classes_intersect_p (from, GENERAL_REGS)
3525 && reg_classes_intersect_p (to, FP_REGS))
3526 || (reg_classes_intersect_p (from, FP_REGS)
3527 && reg_classes_intersect_p (to, GENERAL_REGS)))
3528 return 10;
3529
3530 /* We usually do not want to copy via CC. */
3531 if (reg_classes_intersect_p (from, CC_REGS)
3532 || reg_classes_intersect_p (to, CC_REGS))
3533 return 5;
3534
3535 return 1;
3536 }
3537
3538 /* Implement TARGET_MEMORY_MOVE_COST. */
3539
3540 static int
s390_memory_move_cost(machine_mode mode ATTRIBUTE_UNUSED,reg_class_t rclass ATTRIBUTE_UNUSED,bool in ATTRIBUTE_UNUSED)3541 s390_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
3542 reg_class_t rclass ATTRIBUTE_UNUSED,
3543 bool in ATTRIBUTE_UNUSED)
3544 {
3545 return 2;
3546 }
3547
3548 /* Compute a (partial) cost for rtx X. Return true if the complete
3549 cost has been computed, and false if subexpressions should be
3550 scanned. In either case, *TOTAL contains the cost result. The
3551 initial value of *TOTAL is the default value computed by
3552 rtx_cost. It may be left unmodified. OUTER_CODE contains the
3553 code of the superexpression of x. */
3554
3555 static bool
s390_rtx_costs(rtx x,machine_mode mode,int outer_code,int opno ATTRIBUTE_UNUSED,int * total,bool speed ATTRIBUTE_UNUSED)3556 s390_rtx_costs (rtx x, machine_mode mode, int outer_code,
3557 int opno ATTRIBUTE_UNUSED,
3558 int *total, bool speed ATTRIBUTE_UNUSED)
3559 {
3560 int code = GET_CODE (x);
3561 switch (code)
3562 {
3563 case CONST:
3564 case CONST_INT:
3565 case LABEL_REF:
3566 case SYMBOL_REF:
3567 case CONST_DOUBLE:
3568 case CONST_WIDE_INT:
3569 case MEM:
3570 *total = 0;
3571 return true;
3572
3573 case SET:
3574 {
3575 /* Without this a conditional move instruction would be
3576 accounted as 3 * COSTS_N_INSNS (set, if_then_else,
3577 comparison operator). That's a bit pessimistic. */
3578
3579 if (!TARGET_Z196 || GET_CODE (SET_SRC (x)) != IF_THEN_ELSE)
3580 return false;
3581
3582 rtx cond = XEXP (SET_SRC (x), 0);
3583
3584 if (!CC_REG_P (XEXP (cond, 0)) || !CONST_INT_P (XEXP (cond, 1)))
3585 return false;
3586
3587 /* It is going to be a load/store on condition. Make it
3588 slightly more expensive than a normal load. */
3589 *total = COSTS_N_INSNS (1) + 1;
3590
3591 rtx dst = SET_DEST (x);
3592 rtx then = XEXP (SET_SRC (x), 1);
3593 rtx els = XEXP (SET_SRC (x), 2);
3594
3595 /* It is a real IF-THEN-ELSE. An additional move will be
3596 needed to implement that. */
3597 if (!TARGET_Z15
3598 && reload_completed
3599 && !rtx_equal_p (dst, then)
3600 && !rtx_equal_p (dst, els))
3601 *total += COSTS_N_INSNS (1) / 2;
3602
3603 /* A minor penalty for constants we cannot directly handle. */
3604 if ((CONST_INT_P (then) || CONST_INT_P (els))
3605 && (!TARGET_Z13 || MEM_P (dst)
3606 || (CONST_INT_P (then) && !satisfies_constraint_K (then))
3607 || (CONST_INT_P (els) && !satisfies_constraint_K (els))))
3608 *total += COSTS_N_INSNS (1) / 2;
3609
3610 /* A store on condition can only handle register src operands. */
3611 if (MEM_P (dst) && (!REG_P (then) || !REG_P (els)))
3612 *total += COSTS_N_INSNS (1) / 2;
3613
3614 return true;
3615 }
3616 case IOR:
3617
3618 /* nnrk, nngrk */
3619 if (TARGET_Z15
3620 && (mode == SImode || mode == DImode)
3621 && GET_CODE (XEXP (x, 0)) == NOT
3622 && GET_CODE (XEXP (x, 1)) == NOT)
3623 {
3624 *total = COSTS_N_INSNS (1);
3625 if (!REG_P (XEXP (XEXP (x, 0), 0)))
3626 *total += 1;
3627 if (!REG_P (XEXP (XEXP (x, 1), 0)))
3628 *total += 1;
3629 return true;
3630 }
3631
3632 /* risbg */
3633 if (GET_CODE (XEXP (x, 0)) == AND
3634 && GET_CODE (XEXP (x, 1)) == ASHIFT
3635 && REG_P (XEXP (XEXP (x, 0), 0))
3636 && REG_P (XEXP (XEXP (x, 1), 0))
3637 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3638 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3639 && (UINTVAL (XEXP (XEXP (x, 0), 1)) ==
3640 (HOST_WIDE_INT_1U << UINTVAL (XEXP (XEXP (x, 1), 1))) - 1))
3641 {
3642 *total = COSTS_N_INSNS (2);
3643 return true;
3644 }
3645
3646 /* ~AND on a 128 bit mode. This can be done using a vector
3647 instruction. */
3648 if (TARGET_VXE
3649 && GET_CODE (XEXP (x, 0)) == NOT
3650 && GET_CODE (XEXP (x, 1)) == NOT
3651 && REG_P (XEXP (XEXP (x, 0), 0))
3652 && REG_P (XEXP (XEXP (x, 1), 0))
3653 && GET_MODE_SIZE (GET_MODE (XEXP (XEXP (x, 0), 0))) == 16
3654 && s390_hard_regno_mode_ok (VR0_REGNUM,
3655 GET_MODE (XEXP (XEXP (x, 0), 0))))
3656 {
3657 *total = COSTS_N_INSNS (1);
3658 return true;
3659 }
3660
3661 *total = COSTS_N_INSNS (1);
3662 return false;
3663
3664 case AND:
3665 /* nork, nogrk */
3666 if (TARGET_Z15
3667 && (mode == SImode || mode == DImode)
3668 && GET_CODE (XEXP (x, 0)) == NOT
3669 && GET_CODE (XEXP (x, 1)) == NOT)
3670 {
3671 *total = COSTS_N_INSNS (1);
3672 if (!REG_P (XEXP (XEXP (x, 0), 0)))
3673 *total += 1;
3674 if (!REG_P (XEXP (XEXP (x, 1), 0)))
3675 *total += 1;
3676 return true;
3677 }
3678 /* fallthrough */
3679 case ASHIFT:
3680 case ASHIFTRT:
3681 case LSHIFTRT:
3682 case ROTATE:
3683 case ROTATERT:
3684 case XOR:
3685 case NEG:
3686 case NOT:
3687 case PLUS:
3688 case MINUS:
3689 *total = COSTS_N_INSNS (1);
3690 return false;
3691
3692 case MULT:
3693 switch (mode)
3694 {
3695 case E_SImode:
3696 {
3697 rtx left = XEXP (x, 0);
3698 rtx right = XEXP (x, 1);
3699 if (GET_CODE (right) == CONST_INT
3700 && CONST_OK_FOR_K (INTVAL (right)))
3701 *total = s390_cost->mhi;
3702 else if (GET_CODE (left) == SIGN_EXTEND)
3703 *total = s390_cost->mh;
3704 else
3705 *total = s390_cost->ms; /* msr, ms, msy */
3706 break;
3707 }
3708 case E_DImode:
3709 {
3710 rtx left = XEXP (x, 0);
3711 rtx right = XEXP (x, 1);
3712 if (TARGET_ZARCH)
3713 {
3714 if (GET_CODE (right) == CONST_INT
3715 && CONST_OK_FOR_K (INTVAL (right)))
3716 *total = s390_cost->mghi;
3717 else if (GET_CODE (left) == SIGN_EXTEND)
3718 *total = s390_cost->msgf;
3719 else
3720 *total = s390_cost->msg; /* msgr, msg */
3721 }
3722 else /* TARGET_31BIT */
3723 {
3724 if (GET_CODE (left) == SIGN_EXTEND
3725 && GET_CODE (right) == SIGN_EXTEND)
3726 /* mulsidi case: mr, m */
3727 *total = s390_cost->m;
3728 else if (GET_CODE (left) == ZERO_EXTEND
3729 && GET_CODE (right) == ZERO_EXTEND)
3730 /* umulsidi case: ml, mlr */
3731 *total = s390_cost->ml;
3732 else
3733 /* Complex calculation is required. */
3734 *total = COSTS_N_INSNS (40);
3735 }
3736 break;
3737 }
3738 case E_SFmode:
3739 case E_DFmode:
3740 *total = s390_cost->mult_df;
3741 break;
3742 case E_TFmode:
3743 *total = s390_cost->mxbr;
3744 break;
3745 default:
3746 return false;
3747 }
3748 return false;
3749
3750 case FMA:
3751 switch (mode)
3752 {
3753 case E_DFmode:
3754 *total = s390_cost->madbr;
3755 break;
3756 case E_SFmode:
3757 *total = s390_cost->maebr;
3758 break;
3759 default:
3760 return false;
3761 }
3762 /* Negate in the third argument is free: FMSUB. */
3763 if (GET_CODE (XEXP (x, 2)) == NEG)
3764 {
3765 *total += (rtx_cost (XEXP (x, 0), mode, FMA, 0, speed)
3766 + rtx_cost (XEXP (x, 1), mode, FMA, 1, speed)
3767 + rtx_cost (XEXP (XEXP (x, 2), 0), mode, FMA, 2, speed));
3768 return true;
3769 }
3770 return false;
3771
3772 case UDIV:
3773 case UMOD:
3774 if (mode == TImode) /* 128 bit division */
3775 *total = s390_cost->dlgr;
3776 else if (mode == DImode)
3777 {
3778 rtx right = XEXP (x, 1);
3779 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3780 *total = s390_cost->dlr;
3781 else /* 64 by 64 bit division */
3782 *total = s390_cost->dlgr;
3783 }
3784 else if (mode == SImode) /* 32 bit division */
3785 *total = s390_cost->dlr;
3786 return false;
3787
3788 case DIV:
3789 case MOD:
3790 if (mode == DImode)
3791 {
3792 rtx right = XEXP (x, 1);
3793 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3794 if (TARGET_ZARCH)
3795 *total = s390_cost->dsgfr;
3796 else
3797 *total = s390_cost->dr;
3798 else /* 64 by 64 bit division */
3799 *total = s390_cost->dsgr;
3800 }
3801 else if (mode == SImode) /* 32 bit division */
3802 *total = s390_cost->dlr;
3803 else if (mode == SFmode)
3804 {
3805 *total = s390_cost->debr;
3806 }
3807 else if (mode == DFmode)
3808 {
3809 *total = s390_cost->ddbr;
3810 }
3811 else if (mode == TFmode)
3812 {
3813 *total = s390_cost->dxbr;
3814 }
3815 return false;
3816
3817 case SQRT:
3818 if (mode == SFmode)
3819 *total = s390_cost->sqebr;
3820 else if (mode == DFmode)
3821 *total = s390_cost->sqdbr;
3822 else /* TFmode */
3823 *total = s390_cost->sqxbr;
3824 return false;
3825
3826 case SIGN_EXTEND:
3827 case ZERO_EXTEND:
3828 if (outer_code == MULT || outer_code == DIV || outer_code == MOD
3829 || outer_code == PLUS || outer_code == MINUS
3830 || outer_code == COMPARE)
3831 *total = 0;
3832 return false;
3833
3834 case COMPARE:
3835 *total = COSTS_N_INSNS (1);
3836
3837 /* nxrk, nxgrk ~(a^b)==0 */
3838 if (TARGET_Z15
3839 && GET_CODE (XEXP (x, 0)) == NOT
3840 && XEXP (x, 1) == const0_rtx
3841 && GET_CODE (XEXP (XEXP (x, 0), 0)) == XOR
3842 && (GET_MODE (XEXP (x, 0)) == SImode || GET_MODE (XEXP (x, 0)) == DImode)
3843 && mode == CCZmode)
3844 {
3845 if (!REG_P (XEXP (XEXP (XEXP (x, 0), 0), 0)))
3846 *total += 1;
3847 if (!REG_P (XEXP (XEXP (XEXP (x, 0), 0), 1)))
3848 *total += 1;
3849 return true;
3850 }
3851
3852 /* nnrk, nngrk, nork, nogrk */
3853 if (TARGET_Z15
3854 && (GET_CODE (XEXP (x, 0)) == AND || GET_CODE (XEXP (x, 0)) == IOR)
3855 && XEXP (x, 1) == const0_rtx
3856 && (GET_MODE (XEXP (x, 0)) == SImode || GET_MODE (XEXP (x, 0)) == DImode)
3857 && GET_CODE (XEXP (XEXP (x, 0), 0)) == NOT
3858 && GET_CODE (XEXP (XEXP (x, 0), 1)) == NOT
3859 && mode == CCZmode)
3860 {
3861 if (!REG_P (XEXP (XEXP (XEXP (x, 0), 0), 0)))
3862 *total += 1;
3863 if (!REG_P (XEXP (XEXP (XEXP (x, 0), 1), 0)))
3864 *total += 1;
3865 return true;
3866 }
3867
3868 if (GET_CODE (XEXP (x, 0)) == AND
3869 && GET_CODE (XEXP (x, 1)) == CONST_INT
3870 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
3871 {
3872 rtx op0 = XEXP (XEXP (x, 0), 0);
3873 rtx op1 = XEXP (XEXP (x, 0), 1);
3874 rtx op2 = XEXP (x, 1);
3875
3876 if (memory_operand (op0, GET_MODE (op0))
3877 && s390_tm_ccmode (op1, op2, 0) != VOIDmode)
3878 return true;
3879 if (register_operand (op0, GET_MODE (op0))
3880 && s390_tm_ccmode (op1, op2, 1) != VOIDmode)
3881 return true;
3882 }
3883 return false;
3884
3885 default:
3886 return false;
3887 }
3888 }
3889
3890 /* Return the cost of an address rtx ADDR. */
3891
3892 static int
s390_address_cost(rtx addr,machine_mode mode ATTRIBUTE_UNUSED,addr_space_t as ATTRIBUTE_UNUSED,bool speed ATTRIBUTE_UNUSED)3893 s390_address_cost (rtx addr, machine_mode mode ATTRIBUTE_UNUSED,
3894 addr_space_t as ATTRIBUTE_UNUSED,
3895 bool speed ATTRIBUTE_UNUSED)
3896 {
3897 struct s390_address ad;
3898 if (!s390_decompose_address (addr, &ad))
3899 return 1000;
3900
3901 return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
3902 }
3903
3904 /* Implement targetm.vectorize.builtin_vectorization_cost. */
3905 static int
s390_builtin_vectorization_cost(enum vect_cost_for_stmt type_of_cost,tree vectype,int misalign ATTRIBUTE_UNUSED)3906 s390_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
3907 tree vectype,
3908 int misalign ATTRIBUTE_UNUSED)
3909 {
3910 switch (type_of_cost)
3911 {
3912 case scalar_stmt:
3913 case scalar_load:
3914 case scalar_store:
3915 case vector_stmt:
3916 case vector_load:
3917 case vector_store:
3918 case vector_gather_load:
3919 case vector_scatter_store:
3920 case vec_to_scalar:
3921 case scalar_to_vec:
3922 case cond_branch_not_taken:
3923 case vec_perm:
3924 case vec_promote_demote:
3925 case unaligned_load:
3926 case unaligned_store:
3927 return 1;
3928
3929 case cond_branch_taken:
3930 return 3;
3931
3932 case vec_construct:
3933 return TYPE_VECTOR_SUBPARTS (vectype) - 1;
3934
3935 default:
3936 gcc_unreachable ();
3937 }
3938 }
3939
3940 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
3941 otherwise return 0. */
3942
3943 int
tls_symbolic_operand(rtx op)3944 tls_symbolic_operand (rtx op)
3945 {
3946 if (GET_CODE (op) != SYMBOL_REF)
3947 return 0;
3948 return SYMBOL_REF_TLS_MODEL (op);
3949 }
3950
3951 /* Split DImode access register reference REG (on 64-bit) into its constituent
3952 low and high parts, and store them into LO and HI. Note that gen_lowpart/
3953 gen_highpart cannot be used as they assume all registers are word-sized,
3954 while our access registers have only half that size. */
3955
3956 void
s390_split_access_reg(rtx reg,rtx * lo,rtx * hi)3957 s390_split_access_reg (rtx reg, rtx *lo, rtx *hi)
3958 {
3959 gcc_assert (TARGET_64BIT);
3960 gcc_assert (ACCESS_REG_P (reg));
3961 gcc_assert (GET_MODE (reg) == DImode);
3962 gcc_assert (!(REGNO (reg) & 1));
3963
3964 *lo = gen_rtx_REG (SImode, REGNO (reg) + 1);
3965 *hi = gen_rtx_REG (SImode, REGNO (reg));
3966 }
3967
3968 /* Return true if OP contains a symbol reference */
3969
3970 bool
symbolic_reference_mentioned_p(rtx op)3971 symbolic_reference_mentioned_p (rtx op)
3972 {
3973 const char *fmt;
3974 int i;
3975
3976 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3977 return 1;
3978
3979 fmt = GET_RTX_FORMAT (GET_CODE (op));
3980 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3981 {
3982 if (fmt[i] == 'E')
3983 {
3984 int j;
3985
3986 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3987 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3988 return 1;
3989 }
3990
3991 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3992 return 1;
3993 }
3994
3995 return 0;
3996 }
3997
3998 /* Return true if OP contains a reference to a thread-local symbol. */
3999
4000 bool
tls_symbolic_reference_mentioned_p(rtx op)4001 tls_symbolic_reference_mentioned_p (rtx op)
4002 {
4003 const char *fmt;
4004 int i;
4005
4006 if (GET_CODE (op) == SYMBOL_REF)
4007 return tls_symbolic_operand (op);
4008
4009 fmt = GET_RTX_FORMAT (GET_CODE (op));
4010 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4011 {
4012 if (fmt[i] == 'E')
4013 {
4014 int j;
4015
4016 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4017 if (tls_symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4018 return true;
4019 }
4020
4021 else if (fmt[i] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op, i)))
4022 return true;
4023 }
4024
4025 return false;
4026 }
4027
4028
4029 /* Return true if OP is a legitimate general operand when
4030 generating PIC code. It is given that flag_pic is on
4031 and that OP satisfies CONSTANT_P. */
4032
4033 int
legitimate_pic_operand_p(rtx op)4034 legitimate_pic_operand_p (rtx op)
4035 {
4036 /* Accept all non-symbolic constants. */
4037 if (!SYMBOLIC_CONST (op))
4038 return 1;
4039
4040 /* Accept addresses that can be expressed relative to (pc). */
4041 if (larl_operand (op, VOIDmode))
4042 return 1;
4043
4044 /* Reject everything else; must be handled
4045 via emit_symbolic_move. */
4046 return 0;
4047 }
4048
4049 /* Returns true if the constant value OP is a legitimate general operand.
4050 It is given that OP satisfies CONSTANT_P. */
4051
4052 static bool
s390_legitimate_constant_p(machine_mode mode,rtx op)4053 s390_legitimate_constant_p (machine_mode mode, rtx op)
4054 {
4055 if (TARGET_VX && VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR)
4056 {
4057 if (GET_MODE_SIZE (mode) != 16)
4058 return 0;
4059
4060 if (!satisfies_constraint_j00 (op)
4061 && !satisfies_constraint_jm1 (op)
4062 && !satisfies_constraint_jKK (op)
4063 && !satisfies_constraint_jxx (op)
4064 && !satisfies_constraint_jyy (op))
4065 return 0;
4066 }
4067
4068 /* Accept all non-symbolic constants. */
4069 if (!SYMBOLIC_CONST (op))
4070 return 1;
4071
4072 /* Accept immediate LARL operands. */
4073 if (larl_operand (op, mode))
4074 return 1;
4075
4076 /* Thread-local symbols are never legal constants. This is
4077 so that emit_call knows that computing such addresses
4078 might require a function call. */
4079 if (TLS_SYMBOLIC_CONST (op))
4080 return 0;
4081
4082 /* In the PIC case, symbolic constants must *not* be
4083 forced into the literal pool. We accept them here,
4084 so that they will be handled by emit_symbolic_move. */
4085 if (flag_pic)
4086 return 1;
4087
4088 /* All remaining non-PIC symbolic constants are
4089 forced into the literal pool. */
4090 return 0;
4091 }
4092
4093 /* Determine if it's legal to put X into the constant pool. This
4094 is not possible if X contains the address of a symbol that is
4095 not constant (TLS) or not known at final link time (PIC). */
4096
4097 static bool
s390_cannot_force_const_mem(machine_mode mode,rtx x)4098 s390_cannot_force_const_mem (machine_mode mode, rtx x)
4099 {
4100 switch (GET_CODE (x))
4101 {
4102 case CONST_INT:
4103 case CONST_DOUBLE:
4104 case CONST_WIDE_INT:
4105 case CONST_VECTOR:
4106 /* Accept all non-symbolic constants. */
4107 return false;
4108
4109 case LABEL_REF:
4110 /* Labels are OK iff we are non-PIC. */
4111 return flag_pic != 0;
4112
4113 case SYMBOL_REF:
4114 /* 'Naked' TLS symbol references are never OK,
4115 non-TLS symbols are OK iff we are non-PIC. */
4116 if (tls_symbolic_operand (x))
4117 return true;
4118 else
4119 return flag_pic != 0;
4120
4121 case CONST:
4122 return s390_cannot_force_const_mem (mode, XEXP (x, 0));
4123 case PLUS:
4124 case MINUS:
4125 return s390_cannot_force_const_mem (mode, XEXP (x, 0))
4126 || s390_cannot_force_const_mem (mode, XEXP (x, 1));
4127
4128 case UNSPEC:
4129 switch (XINT (x, 1))
4130 {
4131 /* Only lt-relative or GOT-relative UNSPECs are OK. */
4132 case UNSPEC_LTREL_OFFSET:
4133 case UNSPEC_GOT:
4134 case UNSPEC_GOTOFF:
4135 case UNSPEC_PLTOFF:
4136 case UNSPEC_TLSGD:
4137 case UNSPEC_TLSLDM:
4138 case UNSPEC_NTPOFF:
4139 case UNSPEC_DTPOFF:
4140 case UNSPEC_GOTNTPOFF:
4141 case UNSPEC_INDNTPOFF:
4142 return false;
4143
4144 /* If the literal pool shares the code section, be put
4145 execute template placeholders into the pool as well. */
4146 case UNSPEC_INSN:
4147 default:
4148 return true;
4149 }
4150 break;
4151
4152 default:
4153 gcc_unreachable ();
4154 }
4155 }
4156
4157 /* Returns true if the constant value OP is a legitimate general
4158 operand during and after reload. The difference to
4159 legitimate_constant_p is that this function will not accept
4160 a constant that would need to be forced to the literal pool
4161 before it can be used as operand.
4162 This function accepts all constants which can be loaded directly
4163 into a GPR. */
4164
4165 bool
legitimate_reload_constant_p(rtx op)4166 legitimate_reload_constant_p (rtx op)
4167 {
4168 /* Accept la(y) operands. */
4169 if (GET_CODE (op) == CONST_INT
4170 && DISP_IN_RANGE (INTVAL (op)))
4171 return true;
4172
4173 /* Accept l(g)hi/l(g)fi operands. */
4174 if (GET_CODE (op) == CONST_INT
4175 && (CONST_OK_FOR_K (INTVAL (op)) || CONST_OK_FOR_Os (INTVAL (op))))
4176 return true;
4177
4178 /* Accept lliXX operands. */
4179 if (TARGET_ZARCH
4180 && GET_CODE (op) == CONST_INT
4181 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
4182 && s390_single_part (op, word_mode, HImode, 0) >= 0)
4183 return true;
4184
4185 if (TARGET_EXTIMM
4186 && GET_CODE (op) == CONST_INT
4187 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
4188 && s390_single_part (op, word_mode, SImode, 0) >= 0)
4189 return true;
4190
4191 /* Accept larl operands. */
4192 if (larl_operand (op, VOIDmode))
4193 return true;
4194
4195 /* Accept floating-point zero operands that fit into a single GPR. */
4196 if (GET_CODE (op) == CONST_DOUBLE
4197 && s390_float_const_zero_p (op)
4198 && GET_MODE_SIZE (GET_MODE (op)) <= UNITS_PER_WORD)
4199 return true;
4200
4201 /* Accept double-word operands that can be split. */
4202 if (GET_CODE (op) == CONST_WIDE_INT
4203 || (GET_CODE (op) == CONST_INT
4204 && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op)))
4205 {
4206 machine_mode dword_mode = word_mode == SImode ? DImode : TImode;
4207 rtx hi = operand_subword (op, 0, 0, dword_mode);
4208 rtx lo = operand_subword (op, 1, 0, dword_mode);
4209 return legitimate_reload_constant_p (hi)
4210 && legitimate_reload_constant_p (lo);
4211 }
4212
4213 /* Everything else cannot be handled without reload. */
4214 return false;
4215 }
4216
4217 /* Returns true if the constant value OP is a legitimate fp operand
4218 during and after reload.
4219 This function accepts all constants which can be loaded directly
4220 into an FPR. */
4221
4222 static bool
legitimate_reload_fp_constant_p(rtx op)4223 legitimate_reload_fp_constant_p (rtx op)
4224 {
4225 /* Accept floating-point zero operands if the load zero instruction
4226 can be used. Prior to z196 the load fp zero instruction caused a
4227 performance penalty if the result is used as BFP number. */
4228 if (TARGET_Z196
4229 && GET_CODE (op) == CONST_DOUBLE
4230 && s390_float_const_zero_p (op))
4231 return true;
4232
4233 return false;
4234 }
4235
4236 /* Returns true if the constant value OP is a legitimate vector operand
4237 during and after reload.
4238 This function accepts all constants which can be loaded directly
4239 into an VR. */
4240
4241 static bool
legitimate_reload_vector_constant_p(rtx op)4242 legitimate_reload_vector_constant_p (rtx op)
4243 {
4244 if (TARGET_VX && GET_MODE_SIZE (GET_MODE (op)) == 16
4245 && (satisfies_constraint_j00 (op)
4246 || satisfies_constraint_jm1 (op)
4247 || satisfies_constraint_jKK (op)
4248 || satisfies_constraint_jxx (op)
4249 || satisfies_constraint_jyy (op)))
4250 return true;
4251
4252 return false;
4253 }
4254
4255 /* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
4256 return the class of reg to actually use. */
4257
4258 static reg_class_t
s390_preferred_reload_class(rtx op,reg_class_t rclass)4259 s390_preferred_reload_class (rtx op, reg_class_t rclass)
4260 {
4261 switch (GET_CODE (op))
4262 {
4263 /* Constants we cannot reload into general registers
4264 must be forced into the literal pool. */
4265 case CONST_VECTOR:
4266 case CONST_DOUBLE:
4267 case CONST_INT:
4268 case CONST_WIDE_INT:
4269 if (reg_class_subset_p (GENERAL_REGS, rclass)
4270 && legitimate_reload_constant_p (op))
4271 return GENERAL_REGS;
4272 else if (reg_class_subset_p (ADDR_REGS, rclass)
4273 && legitimate_reload_constant_p (op))
4274 return ADDR_REGS;
4275 else if (reg_class_subset_p (FP_REGS, rclass)
4276 && legitimate_reload_fp_constant_p (op))
4277 return FP_REGS;
4278 else if (reg_class_subset_p (VEC_REGS, rclass)
4279 && legitimate_reload_vector_constant_p (op))
4280 return VEC_REGS;
4281
4282 return NO_REGS;
4283
4284 /* If a symbolic constant or a PLUS is reloaded,
4285 it is most likely being used as an address, so
4286 prefer ADDR_REGS. If 'class' is not a superset
4287 of ADDR_REGS, e.g. FP_REGS, reject this reload. */
4288 case CONST:
4289 /* Symrefs cannot be pushed into the literal pool with -fPIC
4290 so we *MUST NOT* return NO_REGS for these cases
4291 (s390_cannot_force_const_mem will return true).
4292
4293 On the other hand we MUST return NO_REGS for symrefs with
4294 invalid addend which might have been pushed to the literal
4295 pool (no -fPIC). Usually we would expect them to be
4296 handled via secondary reload but this does not happen if
4297 they are used as literal pool slot replacement in reload
4298 inheritance (see emit_input_reload_insns). */
4299 if (GET_CODE (XEXP (op, 0)) == PLUS
4300 && GET_CODE (XEXP (XEXP(op, 0), 0)) == SYMBOL_REF
4301 && GET_CODE (XEXP (XEXP(op, 0), 1)) == CONST_INT)
4302 {
4303 if (flag_pic && reg_class_subset_p (ADDR_REGS, rclass))
4304 return ADDR_REGS;
4305 else
4306 return NO_REGS;
4307 }
4308 /* fallthrough */
4309 case LABEL_REF:
4310 case SYMBOL_REF:
4311 if (!legitimate_reload_constant_p (op))
4312 return NO_REGS;
4313 /* fallthrough */
4314 case PLUS:
4315 /* load address will be used. */
4316 if (reg_class_subset_p (ADDR_REGS, rclass))
4317 return ADDR_REGS;
4318 else
4319 return NO_REGS;
4320
4321 default:
4322 break;
4323 }
4324
4325 return rclass;
4326 }
4327
4328 /* Return true if ADDR is SYMBOL_REF + addend with addend being a
4329 multiple of ALIGNMENT and the SYMBOL_REF being naturally
4330 aligned. */
4331
4332 bool
s390_check_symref_alignment(rtx addr,HOST_WIDE_INT alignment)4333 s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment)
4334 {
4335 HOST_WIDE_INT addend;
4336 rtx symref;
4337
4338 /* The "required alignment" might be 0 (e.g. for certain structs
4339 accessed via BLKmode). Early abort in this case, as well as when
4340 an alignment > 8 is required. */
4341 if (alignment < 2 || alignment > 8)
4342 return false;
4343
4344 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4345 return false;
4346
4347 if (addend & (alignment - 1))
4348 return false;
4349
4350 if (GET_CODE (symref) == SYMBOL_REF)
4351 {
4352 /* s390_encode_section_info is not called for anchors, since they don't
4353 have corresponding VAR_DECLs. Therefore, we cannot rely on
4354 SYMBOL_FLAG_NOTALIGN{2,4,8}_P returning useful information. */
4355 if (SYMBOL_REF_ANCHOR_P (symref))
4356 {
4357 HOST_WIDE_INT block_offset = SYMBOL_REF_BLOCK_OFFSET (symref);
4358 unsigned int block_alignment = (SYMBOL_REF_BLOCK (symref)->alignment
4359 / BITS_PER_UNIT);
4360
4361 gcc_assert (block_offset >= 0);
4362 return ((block_offset & (alignment - 1)) == 0
4363 && block_alignment >= alignment);
4364 }
4365
4366 /* We have load-relative instructions for 2-byte, 4-byte, and
4367 8-byte alignment so allow only these. */
4368 switch (alignment)
4369 {
4370 case 8: return !SYMBOL_FLAG_NOTALIGN8_P (symref);
4371 case 4: return !SYMBOL_FLAG_NOTALIGN4_P (symref);
4372 case 2: return !SYMBOL_FLAG_NOTALIGN2_P (symref);
4373 default: return false;
4374 }
4375 }
4376
4377 if (GET_CODE (symref) == UNSPEC
4378 && alignment <= UNITS_PER_LONG)
4379 return true;
4380
4381 return false;
4382 }
4383
4384 /* ADDR is moved into REG using larl. If ADDR isn't a valid larl
4385 operand SCRATCH is used to reload the even part of the address and
4386 adding one. */
4387
4388 void
s390_reload_larl_operand(rtx reg,rtx addr,rtx scratch)4389 s390_reload_larl_operand (rtx reg, rtx addr, rtx scratch)
4390 {
4391 HOST_WIDE_INT addend;
4392 rtx symref;
4393
4394 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4395 gcc_unreachable ();
4396
4397 if (!(addend & 1))
4398 /* Easy case. The addend is even so larl will do fine. */
4399 emit_move_insn (reg, addr);
4400 else
4401 {
4402 /* We can leave the scratch register untouched if the target
4403 register is a valid base register. */
4404 if (REGNO (reg) < FIRST_PSEUDO_REGISTER
4405 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS)
4406 scratch = reg;
4407
4408 gcc_assert (REGNO (scratch) < FIRST_PSEUDO_REGISTER);
4409 gcc_assert (REGNO_REG_CLASS (REGNO (scratch)) == ADDR_REGS);
4410
4411 if (addend != 1)
4412 emit_move_insn (scratch,
4413 gen_rtx_CONST (Pmode,
4414 gen_rtx_PLUS (Pmode, symref,
4415 GEN_INT (addend - 1))));
4416 else
4417 emit_move_insn (scratch, symref);
4418
4419 /* Increment the address using la in order to avoid clobbering cc. */
4420 s390_load_address (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx));
4421 }
4422 }
4423
4424 /* Generate what is necessary to move between REG and MEM using
4425 SCRATCH. The direction is given by TOMEM. */
4426
4427 void
s390_reload_symref_address(rtx reg,rtx mem,rtx scratch,bool tomem)4428 s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem)
4429 {
4430 /* Reload might have pulled a constant out of the literal pool.
4431 Force it back in. */
4432 if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
4433 || GET_CODE (mem) == CONST_WIDE_INT
4434 || GET_CODE (mem) == CONST_VECTOR
4435 || GET_CODE (mem) == CONST)
4436 mem = force_const_mem (GET_MODE (reg), mem);
4437
4438 gcc_assert (MEM_P (mem));
4439
4440 /* For a load from memory we can leave the scratch register
4441 untouched if the target register is a valid base register. */
4442 if (!tomem
4443 && REGNO (reg) < FIRST_PSEUDO_REGISTER
4444 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS
4445 && GET_MODE (reg) == GET_MODE (scratch))
4446 scratch = reg;
4447
4448 /* Load address into scratch register. Since we can't have a
4449 secondary reload for a secondary reload we have to cover the case
4450 where larl would need a secondary reload here as well. */
4451 s390_reload_larl_operand (scratch, XEXP (mem, 0), scratch);
4452
4453 /* Now we can use a standard load/store to do the move. */
4454 if (tomem)
4455 emit_move_insn (replace_equiv_address (mem, scratch), reg);
4456 else
4457 emit_move_insn (reg, replace_equiv_address (mem, scratch));
4458 }
4459
4460 /* Inform reload about cases where moving X with a mode MODE to a register in
4461 RCLASS requires an extra scratch or immediate register. Return the class
4462 needed for the immediate register. */
4463
4464 static reg_class_t
s390_secondary_reload(bool in_p,rtx x,reg_class_t rclass_i,machine_mode mode,secondary_reload_info * sri)4465 s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
4466 machine_mode mode, secondary_reload_info *sri)
4467 {
4468 enum reg_class rclass = (enum reg_class) rclass_i;
4469
4470 /* Intermediate register needed. */
4471 if (reg_classes_intersect_p (CC_REGS, rclass))
4472 return GENERAL_REGS;
4473
4474 if (TARGET_VX)
4475 {
4476 /* The vst/vl vector move instructions allow only for short
4477 displacements. */
4478 if (MEM_P (x)
4479 && GET_CODE (XEXP (x, 0)) == PLUS
4480 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4481 && !SHORT_DISP_IN_RANGE(INTVAL (XEXP (XEXP (x, 0), 1)))
4482 && reg_class_subset_p (rclass, VEC_REGS)
4483 && (!reg_class_subset_p (rclass, FP_REGS)
4484 || (GET_MODE_SIZE (mode) > 8
4485 && s390_class_max_nregs (FP_REGS, mode) == 1)))
4486 {
4487 if (in_p)
4488 sri->icode = (TARGET_64BIT ?
4489 CODE_FOR_reloaddi_la_in :
4490 CODE_FOR_reloadsi_la_in);
4491 else
4492 sri->icode = (TARGET_64BIT ?
4493 CODE_FOR_reloaddi_la_out :
4494 CODE_FOR_reloadsi_la_out);
4495 }
4496 }
4497
4498 if (TARGET_Z10)
4499 {
4500 HOST_WIDE_INT offset;
4501 rtx symref;
4502
4503 /* On z10 several optimizer steps may generate larl operands with
4504 an odd addend. */
4505 if (in_p
4506 && s390_loadrelative_operand_p (x, &symref, &offset)
4507 && mode == Pmode
4508 && !SYMBOL_FLAG_NOTALIGN2_P (symref)
4509 && (offset & 1) == 1)
4510 sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10
4511 : CODE_FOR_reloadsi_larl_odd_addend_z10);
4512
4513 /* Handle all the (mem (symref)) accesses we cannot use the z10
4514 instructions for. */
4515 if (MEM_P (x)
4516 && s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL)
4517 && (mode == QImode
4518 || !reg_class_subset_p (rclass, GENERAL_REGS)
4519 || GET_MODE_SIZE (mode) > UNITS_PER_WORD
4520 || !s390_check_symref_alignment (XEXP (x, 0),
4521 GET_MODE_SIZE (mode))))
4522 {
4523 #define __SECONDARY_RELOAD_CASE(M,m) \
4524 case E_##M##mode: \
4525 if (TARGET_64BIT) \
4526 sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 : \
4527 CODE_FOR_reload##m##di_tomem_z10; \
4528 else \
4529 sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 : \
4530 CODE_FOR_reload##m##si_tomem_z10; \
4531 break;
4532
4533 switch (GET_MODE (x))
4534 {
4535 __SECONDARY_RELOAD_CASE (QI, qi);
4536 __SECONDARY_RELOAD_CASE (HI, hi);
4537 __SECONDARY_RELOAD_CASE (SI, si);
4538 __SECONDARY_RELOAD_CASE (DI, di);
4539 __SECONDARY_RELOAD_CASE (TI, ti);
4540 __SECONDARY_RELOAD_CASE (SF, sf);
4541 __SECONDARY_RELOAD_CASE (DF, df);
4542 __SECONDARY_RELOAD_CASE (TF, tf);
4543 __SECONDARY_RELOAD_CASE (SD, sd);
4544 __SECONDARY_RELOAD_CASE (DD, dd);
4545 __SECONDARY_RELOAD_CASE (TD, td);
4546 __SECONDARY_RELOAD_CASE (V1QI, v1qi);
4547 __SECONDARY_RELOAD_CASE (V2QI, v2qi);
4548 __SECONDARY_RELOAD_CASE (V4QI, v4qi);
4549 __SECONDARY_RELOAD_CASE (V8QI, v8qi);
4550 __SECONDARY_RELOAD_CASE (V16QI, v16qi);
4551 __SECONDARY_RELOAD_CASE (V1HI, v1hi);
4552 __SECONDARY_RELOAD_CASE (V2HI, v2hi);
4553 __SECONDARY_RELOAD_CASE (V4HI, v4hi);
4554 __SECONDARY_RELOAD_CASE (V8HI, v8hi);
4555 __SECONDARY_RELOAD_CASE (V1SI, v1si);
4556 __SECONDARY_RELOAD_CASE (V2SI, v2si);
4557 __SECONDARY_RELOAD_CASE (V4SI, v4si);
4558 __SECONDARY_RELOAD_CASE (V1DI, v1di);
4559 __SECONDARY_RELOAD_CASE (V2DI, v2di);
4560 __SECONDARY_RELOAD_CASE (V1TI, v1ti);
4561 __SECONDARY_RELOAD_CASE (V1SF, v1sf);
4562 __SECONDARY_RELOAD_CASE (V2SF, v2sf);
4563 __SECONDARY_RELOAD_CASE (V4SF, v4sf);
4564 __SECONDARY_RELOAD_CASE (V1DF, v1df);
4565 __SECONDARY_RELOAD_CASE (V2DF, v2df);
4566 __SECONDARY_RELOAD_CASE (V1TF, v1tf);
4567 default:
4568 gcc_unreachable ();
4569 }
4570 #undef __SECONDARY_RELOAD_CASE
4571 }
4572 }
4573
4574 /* We need a scratch register when loading a PLUS expression which
4575 is not a legitimate operand of the LOAD ADDRESS instruction. */
4576 /* LRA can deal with transformation of plus op very well -- so we
4577 don't need to prompt LRA in this case. */
4578 if (! lra_in_progress && in_p && s390_plus_operand (x, mode))
4579 sri->icode = (TARGET_64BIT ?
4580 CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
4581
4582 /* Performing a multiword move from or to memory we have to make sure the
4583 second chunk in memory is addressable without causing a displacement
4584 overflow. If that would be the case we calculate the address in
4585 a scratch register. */
4586 if (MEM_P (x)
4587 && GET_CODE (XEXP (x, 0)) == PLUS
4588 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4589 && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1))
4590 + GET_MODE_SIZE (mode) - 1))
4591 {
4592 /* For GENERAL_REGS a displacement overflow is no problem if occurring
4593 in a s_operand address since we may fallback to lm/stm. So we only
4594 have to care about overflows in the b+i+d case. */
4595 if ((reg_classes_intersect_p (GENERAL_REGS, rclass)
4596 && s390_class_max_nregs (GENERAL_REGS, mode) > 1
4597 && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
4598 /* For FP_REGS no lm/stm is available so this check is triggered
4599 for displacement overflows in b+i+d and b+d like addresses. */
4600 || (reg_classes_intersect_p (FP_REGS, rclass)
4601 && s390_class_max_nregs (FP_REGS, mode) > 1))
4602 {
4603 if (in_p)
4604 sri->icode = (TARGET_64BIT ?
4605 CODE_FOR_reloaddi_la_in :
4606 CODE_FOR_reloadsi_la_in);
4607 else
4608 sri->icode = (TARGET_64BIT ?
4609 CODE_FOR_reloaddi_la_out :
4610 CODE_FOR_reloadsi_la_out);
4611 }
4612 }
4613
4614 /* A scratch address register is needed when a symbolic constant is
4615 copied to r0 compiling with -fPIC. In other cases the target
4616 register might be used as temporary (see legitimize_pic_address). */
4617 if (in_p && SYMBOLIC_CONST (x) && flag_pic == 2 && rclass != ADDR_REGS)
4618 sri->icode = (TARGET_64BIT ?
4619 CODE_FOR_reloaddi_PIC_addr :
4620 CODE_FOR_reloadsi_PIC_addr);
4621
4622 /* Either scratch or no register needed. */
4623 return NO_REGS;
4624 }
4625
4626 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
4627
4628 We need secondary memory to move data between GPRs and FPRs.
4629
4630 - With DFP the ldgr lgdr instructions are available. Due to the
4631 different alignment we cannot use them for SFmode. For 31 bit a
4632 64 bit value in GPR would be a register pair so here we still
4633 need to go via memory.
4634
4635 - With z13 we can do the SF/SImode moves with vlgvf. Due to the
4636 overlapping of FPRs and VRs we still disallow TF/TD modes to be
4637 in full VRs so as before also on z13 we do these moves via
4638 memory.
4639
4640 FIXME: Should we try splitting it into two vlgvg's/vlvg's instead? */
4641
4642 static bool
s390_secondary_memory_needed(machine_mode mode,reg_class_t class1,reg_class_t class2)4643 s390_secondary_memory_needed (machine_mode mode,
4644 reg_class_t class1, reg_class_t class2)
4645 {
4646 return (((reg_classes_intersect_p (class1, VEC_REGS)
4647 && reg_classes_intersect_p (class2, GENERAL_REGS))
4648 || (reg_classes_intersect_p (class1, GENERAL_REGS)
4649 && reg_classes_intersect_p (class2, VEC_REGS)))
4650 && (TARGET_TPF || !TARGET_DFP || !TARGET_64BIT
4651 || GET_MODE_SIZE (mode) != 8)
4652 && (!TARGET_VX || (SCALAR_FLOAT_MODE_P (mode)
4653 && GET_MODE_SIZE (mode) > 8)));
4654 }
4655
4656 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
4657
4658 get_secondary_mem widens its argument to BITS_PER_WORD which loses on 64bit
4659 because the movsi and movsf patterns don't handle r/f moves. */
4660
4661 static machine_mode
s390_secondary_memory_needed_mode(machine_mode mode)4662 s390_secondary_memory_needed_mode (machine_mode mode)
4663 {
4664 if (GET_MODE_BITSIZE (mode) < 32)
4665 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
4666 return mode;
4667 }
4668
4669 /* Generate code to load SRC, which is PLUS that is not a
4670 legitimate operand for the LA instruction, into TARGET.
4671 SCRATCH may be used as scratch register. */
4672
4673 void
s390_expand_plus_operand(rtx target,rtx src,rtx scratch)4674 s390_expand_plus_operand (rtx target, rtx src,
4675 rtx scratch)
4676 {
4677 rtx sum1, sum2;
4678 struct s390_address ad;
4679
4680 /* src must be a PLUS; get its two operands. */
4681 gcc_assert (GET_CODE (src) == PLUS);
4682 gcc_assert (GET_MODE (src) == Pmode);
4683
4684 /* Check if any of the two operands is already scheduled
4685 for replacement by reload. This can happen e.g. when
4686 float registers occur in an address. */
4687 sum1 = find_replacement (&XEXP (src, 0));
4688 sum2 = find_replacement (&XEXP (src, 1));
4689 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4690
4691 /* If the address is already strictly valid, there's nothing to do. */
4692 if (!s390_decompose_address (src, &ad)
4693 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4694 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
4695 {
4696 /* Otherwise, one of the operands cannot be an address register;
4697 we reload its value into the scratch register. */
4698 if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15)
4699 {
4700 emit_move_insn (scratch, sum1);
4701 sum1 = scratch;
4702 }
4703 if (true_regnum (sum2) < 1 || true_regnum (sum2) > 15)
4704 {
4705 emit_move_insn (scratch, sum2);
4706 sum2 = scratch;
4707 }
4708
4709 /* According to the way these invalid addresses are generated
4710 in reload.c, it should never happen (at least on s390) that
4711 *neither* of the PLUS components, after find_replacements
4712 was applied, is an address register. */
4713 if (sum1 == scratch && sum2 == scratch)
4714 {
4715 debug_rtx (src);
4716 gcc_unreachable ();
4717 }
4718
4719 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4720 }
4721
4722 /* Emit the LOAD ADDRESS pattern. Note that reload of PLUS
4723 is only ever performed on addresses, so we can mark the
4724 sum as legitimate for LA in any case. */
4725 s390_load_address (target, src);
4726 }
4727
4728
4729 /* Return true if ADDR is a valid memory address.
4730 STRICT specifies whether strict register checking applies. */
4731
4732 static bool
s390_legitimate_address_p(machine_mode mode,rtx addr,bool strict)4733 s390_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4734 {
4735 struct s390_address ad;
4736
4737 if (TARGET_Z10
4738 && larl_operand (addr, VOIDmode)
4739 && (mode == VOIDmode
4740 || s390_check_symref_alignment (addr, GET_MODE_SIZE (mode))))
4741 return true;
4742
4743 if (!s390_decompose_address (addr, &ad))
4744 return false;
4745
4746 /* The vector memory instructions only support short displacements.
4747 Reject invalid displacements early to prevent plenty of lay
4748 instructions to be generated later which then cannot be merged
4749 properly. */
4750 if (TARGET_VX
4751 && VECTOR_MODE_P (mode)
4752 && ad.disp != NULL_RTX
4753 && CONST_INT_P (ad.disp)
4754 && !SHORT_DISP_IN_RANGE (INTVAL (ad.disp)))
4755 return false;
4756
4757 if (strict)
4758 {
4759 if (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4760 return false;
4761
4762 if (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))
4763 return false;
4764 }
4765 else
4766 {
4767 if (ad.base
4768 && !(REGNO (ad.base) >= FIRST_PSEUDO_REGISTER
4769 || REGNO_REG_CLASS (REGNO (ad.base)) == ADDR_REGS))
4770 return false;
4771
4772 if (ad.indx
4773 && !(REGNO (ad.indx) >= FIRST_PSEUDO_REGISTER
4774 || REGNO_REG_CLASS (REGNO (ad.indx)) == ADDR_REGS))
4775 return false;
4776 }
4777 return true;
4778 }
4779
4780 /* Return true if OP is a valid operand for the LA instruction.
4781 In 31-bit, we need to prove that the result is used as an
4782 address, as LA performs only a 31-bit addition. */
4783
4784 bool
legitimate_la_operand_p(rtx op)4785 legitimate_la_operand_p (rtx op)
4786 {
4787 struct s390_address addr;
4788 if (!s390_decompose_address (op, &addr))
4789 return false;
4790
4791 return (TARGET_64BIT || addr.pointer);
4792 }
4793
4794 /* Return true if it is valid *and* preferable to use LA to
4795 compute the sum of OP1 and OP2. */
4796
4797 bool
preferred_la_operand_p(rtx op1,rtx op2)4798 preferred_la_operand_p (rtx op1, rtx op2)
4799 {
4800 struct s390_address addr;
4801
4802 if (op2 != const0_rtx)
4803 op1 = gen_rtx_PLUS (Pmode, op1, op2);
4804
4805 if (!s390_decompose_address (op1, &addr))
4806 return false;
4807 if (addr.base && !REGNO_OK_FOR_BASE_P (REGNO (addr.base)))
4808 return false;
4809 if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx)))
4810 return false;
4811
4812 /* Avoid LA instructions with index (and base) register on z196 or
4813 later; it is preferable to use regular add instructions when
4814 possible. Starting with zEC12 the la with index register is
4815 "uncracked" again but still slower than a regular add. */
4816 if (addr.indx && s390_tune >= PROCESSOR_2817_Z196)
4817 return false;
4818
4819 if (!TARGET_64BIT && !addr.pointer)
4820 return false;
4821
4822 if (addr.pointer)
4823 return true;
4824
4825 if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base))
4826 || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx)))
4827 return true;
4828
4829 return false;
4830 }
4831
4832 /* Emit a forced load-address operation to load SRC into DST.
4833 This will use the LOAD ADDRESS instruction even in situations
4834 where legitimate_la_operand_p (SRC) returns false. */
4835
4836 void
s390_load_address(rtx dst,rtx src)4837 s390_load_address (rtx dst, rtx src)
4838 {
4839 if (TARGET_64BIT)
4840 emit_move_insn (dst, src);
4841 else
4842 emit_insn (gen_force_la_31 (dst, src));
4843 }
4844
4845 /* Return true if it ok to use SYMBOL_REF in a relative address. */
4846
4847 bool
s390_rel_address_ok_p(rtx symbol_ref)4848 s390_rel_address_ok_p (rtx symbol_ref)
4849 {
4850 tree decl;
4851
4852 if (symbol_ref == s390_got_symbol () || CONSTANT_POOL_ADDRESS_P (symbol_ref))
4853 return true;
4854
4855 decl = SYMBOL_REF_DECL (symbol_ref);
4856
4857 if (!flag_pic || SYMBOL_REF_LOCAL_P (symbol_ref))
4858 return (s390_pic_data_is_text_relative
4859 || (decl
4860 && TREE_CODE (decl) == FUNCTION_DECL));
4861
4862 return false;
4863 }
4864
4865 /* Return a legitimate reference for ORIG (an address) using the
4866 register REG. If REG is 0, a new pseudo is generated.
4867
4868 There are two types of references that must be handled:
4869
4870 1. Global data references must load the address from the GOT, via
4871 the PIC reg. An insn is emitted to do this load, and the reg is
4872 returned.
4873
4874 2. Static data references, constant pool addresses, and code labels
4875 compute the address as an offset from the GOT, whose base is in
4876 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
4877 differentiate them from global data objects. The returned
4878 address is the PIC reg + an unspec constant.
4879
4880 TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
4881 reg also appears in the address. */
4882
4883 rtx
legitimize_pic_address(rtx orig,rtx reg)4884 legitimize_pic_address (rtx orig, rtx reg)
4885 {
4886 rtx addr = orig;
4887 rtx addend = const0_rtx;
4888 rtx new_rtx = orig;
4889
4890 gcc_assert (!TLS_SYMBOLIC_CONST (addr));
4891
4892 if (GET_CODE (addr) == CONST)
4893 addr = XEXP (addr, 0);
4894
4895 if (GET_CODE (addr) == PLUS)
4896 {
4897 addend = XEXP (addr, 1);
4898 addr = XEXP (addr, 0);
4899 }
4900
4901 if ((GET_CODE (addr) == LABEL_REF
4902 || (SYMBOL_REF_P (addr) && s390_rel_address_ok_p (addr))
4903 || (GET_CODE (addr) == UNSPEC &&
4904 (XINT (addr, 1) == UNSPEC_GOTENT
4905 || XINT (addr, 1) == UNSPEC_PLT)))
4906 && GET_CODE (addend) == CONST_INT)
4907 {
4908 /* This can be locally addressed. */
4909
4910 /* larl_operand requires UNSPECs to be wrapped in a const rtx. */
4911 rtx const_addr = (GET_CODE (addr) == UNSPEC ?
4912 gen_rtx_CONST (Pmode, addr) : addr);
4913
4914 if (larl_operand (const_addr, VOIDmode)
4915 && INTVAL (addend) < HOST_WIDE_INT_1 << 31
4916 && INTVAL (addend) >= -(HOST_WIDE_INT_1 << 31))
4917 {
4918 if (INTVAL (addend) & 1)
4919 {
4920 /* LARL can't handle odd offsets, so emit a pair of LARL
4921 and LA. */
4922 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4923
4924 if (!DISP_IN_RANGE (INTVAL (addend)))
4925 {
4926 HOST_WIDE_INT even = INTVAL (addend) - 1;
4927 addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (even));
4928 addr = gen_rtx_CONST (Pmode, addr);
4929 addend = const1_rtx;
4930 }
4931
4932 emit_move_insn (temp, addr);
4933 new_rtx = gen_rtx_PLUS (Pmode, temp, addend);
4934
4935 if (reg != 0)
4936 {
4937 s390_load_address (reg, new_rtx);
4938 new_rtx = reg;
4939 }
4940 }
4941 else
4942 {
4943 /* If the offset is even, we can just use LARL. This
4944 will happen automatically. */
4945 }
4946 }
4947 else
4948 {
4949 /* No larl - Access local symbols relative to the GOT. */
4950
4951 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4952
4953 if (reload_in_progress || reload_completed)
4954 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4955
4956 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
4957 if (addend != const0_rtx)
4958 addr = gen_rtx_PLUS (Pmode, addr, addend);
4959 addr = gen_rtx_CONST (Pmode, addr);
4960 addr = force_const_mem (Pmode, addr);
4961 emit_move_insn (temp, addr);
4962
4963 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4964 if (reg != 0)
4965 {
4966 s390_load_address (reg, new_rtx);
4967 new_rtx = reg;
4968 }
4969 }
4970 }
4971 else if (GET_CODE (addr) == SYMBOL_REF && addend == const0_rtx)
4972 {
4973 /* A non-local symbol reference without addend.
4974
4975 The symbol ref is wrapped into an UNSPEC to make sure the
4976 proper operand modifier (@GOT or @GOTENT) will be emitted.
4977 This will tell the linker to put the symbol into the GOT.
4978
4979 Additionally the code dereferencing the GOT slot is emitted here.
4980
4981 An addend to the symref needs to be added afterwards.
4982 legitimize_pic_address calls itself recursively to handle
4983 that case. So no need to do it here. */
4984
4985 if (reg == 0)
4986 reg = gen_reg_rtx (Pmode);
4987
4988 if (TARGET_Z10)
4989 {
4990 /* Use load relative if possible.
4991 lgrl <target>, sym@GOTENT */
4992 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4993 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4994 new_rtx = gen_const_mem (GET_MODE (reg), new_rtx);
4995
4996 emit_move_insn (reg, new_rtx);
4997 new_rtx = reg;
4998 }
4999 else if (flag_pic == 1)
5000 {
5001 /* Assume GOT offset is a valid displacement operand (< 4k
5002 or < 512k with z990). This is handled the same way in
5003 both 31- and 64-bit code (@GOT).
5004 lg <target>, sym@GOT(r12) */
5005
5006 if (reload_in_progress || reload_completed)
5007 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
5008
5009 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5010 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5011 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
5012 new_rtx = gen_const_mem (Pmode, new_rtx);
5013 emit_move_insn (reg, new_rtx);
5014 new_rtx = reg;
5015 }
5016 else
5017 {
5018 /* If the GOT offset might be >= 4k, we determine the position
5019 of the GOT entry via a PC-relative LARL (@GOTENT).
5020 larl temp, sym@GOTENT
5021 lg <target>, 0(temp) */
5022
5023 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
5024
5025 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
5026 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
5027
5028 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
5029 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5030 emit_move_insn (temp, new_rtx);
5031 new_rtx = gen_const_mem (Pmode, temp);
5032 emit_move_insn (reg, new_rtx);
5033
5034 new_rtx = reg;
5035 }
5036 }
5037 else if (GET_CODE (addr) == UNSPEC && GET_CODE (addend) == CONST_INT)
5038 {
5039 gcc_assert (XVECLEN (addr, 0) == 1);
5040 switch (XINT (addr, 1))
5041 {
5042 /* These address symbols (or PLT slots) relative to the GOT
5043 (not GOT slots!). In general this will exceed the
5044 displacement range so these value belong into the literal
5045 pool. */
5046 case UNSPEC_GOTOFF:
5047 case UNSPEC_PLTOFF:
5048 new_rtx = force_const_mem (Pmode, orig);
5049 break;
5050
5051 /* For -fPIC the GOT size might exceed the displacement
5052 range so make sure the value is in the literal pool. */
5053 case UNSPEC_GOT:
5054 if (flag_pic == 2)
5055 new_rtx = force_const_mem (Pmode, orig);
5056 break;
5057
5058 /* For @GOTENT larl is used. This is handled like local
5059 symbol refs. */
5060 case UNSPEC_GOTENT:
5061 gcc_unreachable ();
5062 break;
5063
5064 /* For @PLT larl is used. This is handled like local
5065 symbol refs. */
5066 case UNSPEC_PLT:
5067 gcc_unreachable ();
5068 break;
5069
5070 /* Everything else cannot happen. */
5071 default:
5072 gcc_unreachable ();
5073 }
5074 }
5075 else if (addend != const0_rtx)
5076 {
5077 /* Otherwise, compute the sum. */
5078
5079 rtx base = legitimize_pic_address (addr, reg);
5080 new_rtx = legitimize_pic_address (addend,
5081 base == reg ? NULL_RTX : reg);
5082 if (GET_CODE (new_rtx) == CONST_INT)
5083 new_rtx = plus_constant (Pmode, base, INTVAL (new_rtx));
5084 else
5085 {
5086 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
5087 {
5088 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
5089 new_rtx = XEXP (new_rtx, 1);
5090 }
5091 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
5092 }
5093
5094 if (GET_CODE (new_rtx) == CONST)
5095 new_rtx = XEXP (new_rtx, 0);
5096 new_rtx = force_operand (new_rtx, 0);
5097 }
5098
5099 return new_rtx;
5100 }
5101
5102 /* Load the thread pointer into a register. */
5103
5104 rtx
s390_get_thread_pointer(void)5105 s390_get_thread_pointer (void)
5106 {
5107 rtx tp = gen_reg_rtx (Pmode);
5108
5109 emit_insn (gen_get_thread_pointer (Pmode, tp));
5110
5111 mark_reg_pointer (tp, BITS_PER_WORD);
5112
5113 return tp;
5114 }
5115
5116 /* Emit a tls call insn. The call target is the SYMBOL_REF stored
5117 in s390_tls_symbol which always refers to __tls_get_offset.
5118 The returned offset is written to RESULT_REG and an USE rtx is
5119 generated for TLS_CALL. */
5120
5121 static GTY(()) rtx s390_tls_symbol;
5122
5123 static void
s390_emit_tls_call_insn(rtx result_reg,rtx tls_call)5124 s390_emit_tls_call_insn (rtx result_reg, rtx tls_call)
5125 {
5126 rtx insn;
5127
5128 if (!flag_pic)
5129 emit_insn (s390_load_got ());
5130
5131 if (!s390_tls_symbol)
5132 s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset");
5133
5134 insn = s390_emit_call (s390_tls_symbol, tls_call, result_reg,
5135 gen_rtx_REG (Pmode, RETURN_REGNUM));
5136
5137 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), result_reg);
5138 RTL_CONST_CALL_P (insn) = 1;
5139 }
5140
5141 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
5142 this (thread-local) address. REG may be used as temporary. */
5143
5144 static rtx
legitimize_tls_address(rtx addr,rtx reg)5145 legitimize_tls_address (rtx addr, rtx reg)
5146 {
5147 rtx new_rtx, tls_call, temp, base, r2;
5148 rtx_insn *insn;
5149
5150 if (GET_CODE (addr) == SYMBOL_REF)
5151 switch (tls_symbolic_operand (addr))
5152 {
5153 case TLS_MODEL_GLOBAL_DYNAMIC:
5154 start_sequence ();
5155 r2 = gen_rtx_REG (Pmode, 2);
5156 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD);
5157 new_rtx = gen_rtx_CONST (Pmode, tls_call);
5158 new_rtx = force_const_mem (Pmode, new_rtx);
5159 emit_move_insn (r2, new_rtx);
5160 s390_emit_tls_call_insn (r2, tls_call);
5161 insn = get_insns ();
5162 end_sequence ();
5163
5164 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
5165 temp = gen_reg_rtx (Pmode);
5166 emit_libcall_block (insn, temp, r2, new_rtx);
5167
5168 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5169 if (reg != 0)
5170 {
5171 s390_load_address (reg, new_rtx);
5172 new_rtx = reg;
5173 }
5174 break;
5175
5176 case TLS_MODEL_LOCAL_DYNAMIC:
5177 start_sequence ();
5178 r2 = gen_rtx_REG (Pmode, 2);
5179 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM);
5180 new_rtx = gen_rtx_CONST (Pmode, tls_call);
5181 new_rtx = force_const_mem (Pmode, new_rtx);
5182 emit_move_insn (r2, new_rtx);
5183 s390_emit_tls_call_insn (r2, tls_call);
5184 insn = get_insns ();
5185 end_sequence ();
5186
5187 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
5188 temp = gen_reg_rtx (Pmode);
5189 emit_libcall_block (insn, temp, r2, new_rtx);
5190
5191 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5192 base = gen_reg_rtx (Pmode);
5193 s390_load_address (base, new_rtx);
5194
5195 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF);
5196 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5197 new_rtx = force_const_mem (Pmode, new_rtx);
5198 temp = gen_reg_rtx (Pmode);
5199 emit_move_insn (temp, new_rtx);
5200
5201 new_rtx = gen_rtx_PLUS (Pmode, base, temp);
5202 if (reg != 0)
5203 {
5204 s390_load_address (reg, new_rtx);
5205 new_rtx = reg;
5206 }
5207 break;
5208
5209 case TLS_MODEL_INITIAL_EXEC:
5210 if (flag_pic == 1)
5211 {
5212 /* Assume GOT offset < 4k. This is handled the same way
5213 in both 31- and 64-bit code. */
5214
5215 if (reload_in_progress || reload_completed)
5216 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
5217
5218 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
5219 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5220 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
5221 new_rtx = gen_const_mem (Pmode, new_rtx);
5222 temp = gen_reg_rtx (Pmode);
5223 emit_move_insn (temp, new_rtx);
5224 }
5225 else
5226 {
5227 /* If the GOT offset might be >= 4k, we determine the position
5228 of the GOT entry via a PC-relative LARL. */
5229
5230 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
5231 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5232 temp = gen_reg_rtx (Pmode);
5233 emit_move_insn (temp, new_rtx);
5234
5235 new_rtx = gen_const_mem (Pmode, temp);
5236 temp = gen_reg_rtx (Pmode);
5237 emit_move_insn (temp, new_rtx);
5238 }
5239
5240 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5241 if (reg != 0)
5242 {
5243 s390_load_address (reg, new_rtx);
5244 new_rtx = reg;
5245 }
5246 break;
5247
5248 case TLS_MODEL_LOCAL_EXEC:
5249 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
5250 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5251 new_rtx = force_const_mem (Pmode, new_rtx);
5252 temp = gen_reg_rtx (Pmode);
5253 emit_move_insn (temp, new_rtx);
5254
5255 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5256 if (reg != 0)
5257 {
5258 s390_load_address (reg, new_rtx);
5259 new_rtx = reg;
5260 }
5261 break;
5262
5263 default:
5264 gcc_unreachable ();
5265 }
5266
5267 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == UNSPEC)
5268 {
5269 switch (XINT (XEXP (addr, 0), 1))
5270 {
5271 case UNSPEC_INDNTPOFF:
5272 new_rtx = addr;
5273 break;
5274
5275 default:
5276 gcc_unreachable ();
5277 }
5278 }
5279
5280 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
5281 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
5282 {
5283 new_rtx = XEXP (XEXP (addr, 0), 0);
5284 if (GET_CODE (new_rtx) != SYMBOL_REF)
5285 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5286
5287 new_rtx = legitimize_tls_address (new_rtx, reg);
5288 new_rtx = plus_constant (Pmode, new_rtx,
5289 INTVAL (XEXP (XEXP (addr, 0), 1)));
5290 new_rtx = force_operand (new_rtx, 0);
5291 }
5292
5293 else
5294 gcc_unreachable (); /* for now ... */
5295
5296 return new_rtx;
5297 }
5298
5299 /* Emit insns making the address in operands[1] valid for a standard
5300 move to operands[0]. operands[1] is replaced by an address which
5301 should be used instead of the former RTX to emit the move
5302 pattern. */
5303
5304 void
emit_symbolic_move(rtx * operands)5305 emit_symbolic_move (rtx *operands)
5306 {
5307 rtx temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
5308
5309 if (GET_CODE (operands[0]) == MEM)
5310 operands[1] = force_reg (Pmode, operands[1]);
5311 else if (TLS_SYMBOLIC_CONST (operands[1]))
5312 operands[1] = legitimize_tls_address (operands[1], temp);
5313 else if (flag_pic)
5314 operands[1] = legitimize_pic_address (operands[1], temp);
5315 }
5316
5317 /* Try machine-dependent ways of modifying an illegitimate address X
5318 to be legitimate. If we find one, return the new, valid address.
5319
5320 OLDX is the address as it was before break_out_memory_refs was called.
5321 In some cases it is useful to look at this to decide what needs to be done.
5322
5323 MODE is the mode of the operand pointed to by X.
5324
5325 When -fpic is used, special handling is needed for symbolic references.
5326 See comments by legitimize_pic_address for details. */
5327
5328 static rtx
s390_legitimize_address(rtx x,rtx oldx ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED)5329 s390_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
5330 machine_mode mode ATTRIBUTE_UNUSED)
5331 {
5332 rtx constant_term = const0_rtx;
5333
5334 if (TLS_SYMBOLIC_CONST (x))
5335 {
5336 x = legitimize_tls_address (x, 0);
5337
5338 if (s390_legitimate_address_p (mode, x, FALSE))
5339 return x;
5340 }
5341 else if (GET_CODE (x) == PLUS
5342 && (TLS_SYMBOLIC_CONST (XEXP (x, 0))
5343 || TLS_SYMBOLIC_CONST (XEXP (x, 1))))
5344 {
5345 return x;
5346 }
5347 else if (flag_pic)
5348 {
5349 if (SYMBOLIC_CONST (x)
5350 || (GET_CODE (x) == PLUS
5351 && (SYMBOLIC_CONST (XEXP (x, 0))
5352 || SYMBOLIC_CONST (XEXP (x, 1)))))
5353 x = legitimize_pic_address (x, 0);
5354
5355 if (s390_legitimate_address_p (mode, x, FALSE))
5356 return x;
5357 }
5358
5359 x = eliminate_constant_term (x, &constant_term);
5360
5361 /* Optimize loading of large displacements by splitting them
5362 into the multiple of 4K and the rest; this allows the
5363 former to be CSE'd if possible.
5364
5365 Don't do this if the displacement is added to a register
5366 pointing into the stack frame, as the offsets will
5367 change later anyway. */
5368
5369 if (GET_CODE (constant_term) == CONST_INT
5370 && !TARGET_LONG_DISPLACEMENT
5371 && !DISP_IN_RANGE (INTVAL (constant_term))
5372 && !(REG_P (x) && REGNO_PTR_FRAME_P (REGNO (x))))
5373 {
5374 HOST_WIDE_INT lower = INTVAL (constant_term) & 0xfff;
5375 HOST_WIDE_INT upper = INTVAL (constant_term) ^ lower;
5376
5377 rtx temp = gen_reg_rtx (Pmode);
5378 rtx val = force_operand (GEN_INT (upper), temp);
5379 if (val != temp)
5380 emit_move_insn (temp, val);
5381
5382 x = gen_rtx_PLUS (Pmode, x, temp);
5383 constant_term = GEN_INT (lower);
5384 }
5385
5386 if (GET_CODE (x) == PLUS)
5387 {
5388 if (GET_CODE (XEXP (x, 0)) == REG)
5389 {
5390 rtx temp = gen_reg_rtx (Pmode);
5391 rtx val = force_operand (XEXP (x, 1), temp);
5392 if (val != temp)
5393 emit_move_insn (temp, val);
5394
5395 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), temp);
5396 }
5397
5398 else if (GET_CODE (XEXP (x, 1)) == REG)
5399 {
5400 rtx temp = gen_reg_rtx (Pmode);
5401 rtx val = force_operand (XEXP (x, 0), temp);
5402 if (val != temp)
5403 emit_move_insn (temp, val);
5404
5405 x = gen_rtx_PLUS (Pmode, temp, XEXP (x, 1));
5406 }
5407 }
5408
5409 if (constant_term != const0_rtx)
5410 x = gen_rtx_PLUS (Pmode, x, constant_term);
5411
5412 return x;
5413 }
5414
5415 /* Try a machine-dependent way of reloading an illegitimate address AD
5416 operand. If we find one, push the reload and return the new address.
5417
5418 MODE is the mode of the enclosing MEM. OPNUM is the operand number
5419 and TYPE is the reload type of the current reload. */
5420
5421 rtx
legitimize_reload_address(rtx ad,machine_mode mode ATTRIBUTE_UNUSED,int opnum,int type)5422 legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
5423 int opnum, int type)
5424 {
5425 if (!optimize || TARGET_LONG_DISPLACEMENT)
5426 return NULL_RTX;
5427
5428 if (GET_CODE (ad) == PLUS)
5429 {
5430 rtx tem = simplify_binary_operation (PLUS, Pmode,
5431 XEXP (ad, 0), XEXP (ad, 1));
5432 if (tem)
5433 ad = tem;
5434 }
5435
5436 if (GET_CODE (ad) == PLUS
5437 && GET_CODE (XEXP (ad, 0)) == REG
5438 && GET_CODE (XEXP (ad, 1)) == CONST_INT
5439 && !DISP_IN_RANGE (INTVAL (XEXP (ad, 1))))
5440 {
5441 HOST_WIDE_INT lower = INTVAL (XEXP (ad, 1)) & 0xfff;
5442 HOST_WIDE_INT upper = INTVAL (XEXP (ad, 1)) ^ lower;
5443 rtx cst, tem, new_rtx;
5444
5445 cst = GEN_INT (upper);
5446 if (!legitimate_reload_constant_p (cst))
5447 cst = force_const_mem (Pmode, cst);
5448
5449 tem = gen_rtx_PLUS (Pmode, XEXP (ad, 0), cst);
5450 new_rtx = gen_rtx_PLUS (Pmode, tem, GEN_INT (lower));
5451
5452 push_reload (XEXP (tem, 1), 0, &XEXP (tem, 1), 0,
5453 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
5454 opnum, (enum reload_type) type);
5455 return new_rtx;
5456 }
5457
5458 return NULL_RTX;
5459 }
5460
5461 /* Emit code to move LEN bytes from DST to SRC. */
5462
5463 bool
s390_expand_cpymem(rtx dst,rtx src,rtx len)5464 s390_expand_cpymem (rtx dst, rtx src, rtx len)
5465 {
5466 /* When tuning for z10 or higher we rely on the Glibc functions to
5467 do the right thing. Only for constant lengths below 64k we will
5468 generate inline code. */
5469 if (s390_tune >= PROCESSOR_2097_Z10
5470 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5471 return false;
5472
5473 /* Expand memcpy for constant length operands without a loop if it
5474 is shorter that way.
5475
5476 With a constant length argument a
5477 memcpy loop (without pfd) is 36 bytes -> 6 * mvc */
5478 if (GET_CODE (len) == CONST_INT
5479 && INTVAL (len) >= 0
5480 && INTVAL (len) <= 256 * 6
5481 && (!TARGET_MVCLE || INTVAL (len) <= 256))
5482 {
5483 HOST_WIDE_INT o, l;
5484
5485 for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
5486 {
5487 rtx newdst = adjust_address (dst, BLKmode, o);
5488 rtx newsrc = adjust_address (src, BLKmode, o);
5489 emit_insn (gen_cpymem_short (newdst, newsrc,
5490 GEN_INT (l > 256 ? 255 : l - 1)));
5491 }
5492 }
5493
5494 else if (TARGET_MVCLE)
5495 {
5496 emit_insn (gen_cpymem_long (dst, src, convert_to_mode (Pmode, len, 1)));
5497 }
5498
5499 else
5500 {
5501 rtx dst_addr, src_addr, count, blocks, temp;
5502 rtx_code_label *loop_start_label = gen_label_rtx ();
5503 rtx_code_label *loop_end_label = gen_label_rtx ();
5504 rtx_code_label *end_label = gen_label_rtx ();
5505 machine_mode mode;
5506
5507 mode = GET_MODE (len);
5508 if (mode == VOIDmode)
5509 mode = Pmode;
5510
5511 dst_addr = gen_reg_rtx (Pmode);
5512 src_addr = gen_reg_rtx (Pmode);
5513 count = gen_reg_rtx (mode);
5514 blocks = gen_reg_rtx (mode);
5515
5516 convert_move (count, len, 1);
5517 emit_cmp_and_jump_insns (count, const0_rtx,
5518 EQ, NULL_RTX, mode, 1, end_label);
5519
5520 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5521 emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX));
5522 dst = change_address (dst, VOIDmode, dst_addr);
5523 src = change_address (src, VOIDmode, src_addr);
5524
5525 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5526 OPTAB_DIRECT);
5527 if (temp != count)
5528 emit_move_insn (count, temp);
5529
5530 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5531 OPTAB_DIRECT);
5532 if (temp != blocks)
5533 emit_move_insn (blocks, temp);
5534
5535 emit_cmp_and_jump_insns (blocks, const0_rtx,
5536 EQ, NULL_RTX, mode, 1, loop_end_label);
5537
5538 emit_label (loop_start_label);
5539
5540 if (TARGET_Z10
5541 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 768))
5542 {
5543 rtx prefetch;
5544
5545 /* Issue a read prefetch for the +3 cache line. */
5546 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, src_addr, GEN_INT (768)),
5547 const0_rtx, const0_rtx);
5548 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5549 emit_insn (prefetch);
5550
5551 /* Issue a write prefetch for the +3 cache line. */
5552 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (768)),
5553 const1_rtx, const0_rtx);
5554 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5555 emit_insn (prefetch);
5556 }
5557
5558 emit_insn (gen_cpymem_short (dst, src, GEN_INT (255)));
5559 s390_load_address (dst_addr,
5560 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5561 s390_load_address (src_addr,
5562 gen_rtx_PLUS (Pmode, src_addr, GEN_INT (256)));
5563
5564 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5565 OPTAB_DIRECT);
5566 if (temp != blocks)
5567 emit_move_insn (blocks, temp);
5568
5569 emit_cmp_and_jump_insns (blocks, const0_rtx,
5570 EQ, NULL_RTX, mode, 1, loop_end_label);
5571
5572 emit_jump (loop_start_label);
5573 emit_label (loop_end_label);
5574
5575 emit_insn (gen_cpymem_short (dst, src,
5576 convert_to_mode (Pmode, count, 1)));
5577 emit_label (end_label);
5578 }
5579 return true;
5580 }
5581
5582 /* Emit code to set LEN bytes at DST to VAL.
5583 Make use of clrmem if VAL is zero. */
5584
5585 void
s390_expand_setmem(rtx dst,rtx len,rtx val)5586 s390_expand_setmem (rtx dst, rtx len, rtx val)
5587 {
5588 if (GET_CODE (len) == CONST_INT && INTVAL (len) <= 0)
5589 return;
5590
5591 gcc_assert (GET_CODE (val) == CONST_INT || GET_MODE (val) == QImode);
5592
5593 /* Expand setmem/clrmem for a constant length operand without a
5594 loop if it will be shorter that way.
5595 clrmem loop (with PFD) is 30 bytes -> 5 * xc
5596 clrmem loop (without PFD) is 24 bytes -> 4 * xc
5597 setmem loop (with PFD) is 38 bytes -> ~4 * (mvi/stc + mvc)
5598 setmem loop (without PFD) is 32 bytes -> ~4 * (mvi/stc + mvc) */
5599 if (GET_CODE (len) == CONST_INT
5600 && ((val == const0_rtx
5601 && (INTVAL (len) <= 256 * 4
5602 || (INTVAL (len) <= 256 * 5 && TARGET_SETMEM_PFD(val,len))))
5603 || (val != const0_rtx && INTVAL (len) <= 257 * 4))
5604 && (!TARGET_MVCLE || INTVAL (len) <= 256))
5605 {
5606 HOST_WIDE_INT o, l;
5607
5608 if (val == const0_rtx)
5609 /* clrmem: emit 256 byte blockwise XCs. */
5610 for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
5611 {
5612 rtx newdst = adjust_address (dst, BLKmode, o);
5613 emit_insn (gen_clrmem_short (newdst,
5614 GEN_INT (l > 256 ? 255 : l - 1)));
5615 }
5616 else
5617 /* setmem: emit 1(mvi) + 256(mvc) byte blockwise memsets by
5618 setting first byte to val and using a 256 byte mvc with one
5619 byte overlap to propagate the byte. */
5620 for (l = INTVAL (len), o = 0; l > 0; l -= 257, o += 257)
5621 {
5622 rtx newdst = adjust_address (dst, BLKmode, o);
5623 emit_move_insn (adjust_address (dst, QImode, o), val);
5624 if (l > 1)
5625 {
5626 rtx newdstp1 = adjust_address (dst, BLKmode, o + 1);
5627 emit_insn (gen_cpymem_short (newdstp1, newdst,
5628 GEN_INT (l > 257 ? 255 : l - 2)));
5629 }
5630 }
5631 }
5632
5633 else if (TARGET_MVCLE)
5634 {
5635 val = force_not_mem (convert_modes (Pmode, QImode, val, 1));
5636 if (TARGET_64BIT)
5637 emit_insn (gen_setmem_long_di (dst, convert_to_mode (Pmode, len, 1),
5638 val));
5639 else
5640 emit_insn (gen_setmem_long_si (dst, convert_to_mode (Pmode, len, 1),
5641 val));
5642 }
5643
5644 else
5645 {
5646 rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX;
5647 rtx_code_label *loop_start_label = gen_label_rtx ();
5648 rtx_code_label *onebyte_end_label = gen_label_rtx ();
5649 rtx_code_label *zerobyte_end_label = gen_label_rtx ();
5650 rtx_code_label *restbyte_end_label = gen_label_rtx ();
5651 machine_mode mode;
5652
5653 mode = GET_MODE (len);
5654 if (mode == VOIDmode)
5655 mode = Pmode;
5656
5657 dst_addr = gen_reg_rtx (Pmode);
5658 count = gen_reg_rtx (mode);
5659 blocks = gen_reg_rtx (mode);
5660
5661 convert_move (count, len, 1);
5662 emit_cmp_and_jump_insns (count, const0_rtx,
5663 EQ, NULL_RTX, mode, 1, zerobyte_end_label,
5664 profile_probability::very_unlikely ());
5665
5666 /* We need to make a copy of the target address since memset is
5667 supposed to return it unmodified. We have to make it here
5668 already since the new reg is used at onebyte_end_label. */
5669 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5670 dst = change_address (dst, VOIDmode, dst_addr);
5671
5672 if (val != const0_rtx)
5673 {
5674 /* When using the overlapping mvc the original target
5675 address is only accessed as single byte entity (even by
5676 the mvc reading this value). */
5677 set_mem_size (dst, 1);
5678 dstp1 = adjust_address (dst, VOIDmode, 1);
5679 emit_cmp_and_jump_insns (count,
5680 const1_rtx, EQ, NULL_RTX, mode, 1,
5681 onebyte_end_label,
5682 profile_probability::very_unlikely ());
5683 }
5684
5685 /* There is one unconditional (mvi+mvc)/xc after the loop
5686 dealing with the rest of the bytes, subtracting two (mvi+mvc)
5687 or one (xc) here leaves this number of bytes to be handled by
5688 it. */
5689 temp = expand_binop (mode, add_optab, count,
5690 val == const0_rtx ? constm1_rtx : GEN_INT (-2),
5691 count, 1, OPTAB_DIRECT);
5692 if (temp != count)
5693 emit_move_insn (count, temp);
5694
5695 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5696 OPTAB_DIRECT);
5697 if (temp != blocks)
5698 emit_move_insn (blocks, temp);
5699
5700 emit_cmp_and_jump_insns (blocks, const0_rtx,
5701 EQ, NULL_RTX, mode, 1, restbyte_end_label);
5702
5703 emit_jump (loop_start_label);
5704
5705 if (val != const0_rtx)
5706 {
5707 /* The 1 byte != 0 special case. Not handled efficiently
5708 since we require two jumps for that. However, this
5709 should be very rare. */
5710 emit_label (onebyte_end_label);
5711 emit_move_insn (adjust_address (dst, QImode, 0), val);
5712 emit_jump (zerobyte_end_label);
5713 }
5714
5715 emit_label (loop_start_label);
5716
5717 if (TARGET_SETMEM_PFD (val, len))
5718 {
5719 /* Issue a write prefetch. */
5720 rtx distance = GEN_INT (TARGET_SETMEM_PREFETCH_DISTANCE);
5721 rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, distance),
5722 const1_rtx, const0_rtx);
5723 emit_insn (prefetch);
5724 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5725 }
5726
5727 if (val == const0_rtx)
5728 emit_insn (gen_clrmem_short (dst, GEN_INT (255)));
5729 else
5730 {
5731 /* Set the first byte in the block to the value and use an
5732 overlapping mvc for the block. */
5733 emit_move_insn (adjust_address (dst, QImode, 0), val);
5734 emit_insn (gen_cpymem_short (dstp1, dst, GEN_INT (254)));
5735 }
5736 s390_load_address (dst_addr,
5737 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5738
5739 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5740 OPTAB_DIRECT);
5741 if (temp != blocks)
5742 emit_move_insn (blocks, temp);
5743
5744 emit_cmp_and_jump_insns (blocks, const0_rtx,
5745 NE, NULL_RTX, mode, 1, loop_start_label);
5746
5747 emit_label (restbyte_end_label);
5748
5749 if (val == const0_rtx)
5750 emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1)));
5751 else
5752 {
5753 /* Set the first byte in the block to the value and use an
5754 overlapping mvc for the block. */
5755 emit_move_insn (adjust_address (dst, QImode, 0), val);
5756 /* execute only uses the lowest 8 bits of count that's
5757 exactly what we need here. */
5758 emit_insn (gen_cpymem_short (dstp1, dst,
5759 convert_to_mode (Pmode, count, 1)));
5760 }
5761
5762 emit_label (zerobyte_end_label);
5763 }
5764 }
5765
5766 /* Emit code to compare LEN bytes at OP0 with those at OP1,
5767 and return the result in TARGET. */
5768
5769 bool
s390_expand_cmpmem(rtx target,rtx op0,rtx op1,rtx len)5770 s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
5771 {
5772 rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM);
5773 rtx tmp;
5774
5775 /* When tuning for z10 or higher we rely on the Glibc functions to
5776 do the right thing. Only for constant lengths below 64k we will
5777 generate inline code. */
5778 if (s390_tune >= PROCESSOR_2097_Z10
5779 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5780 return false;
5781
5782 /* As the result of CMPINT is inverted compared to what we need,
5783 we have to swap the operands. */
5784 tmp = op0; op0 = op1; op1 = tmp;
5785
5786 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
5787 {
5788 if (INTVAL (len) > 0)
5789 {
5790 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1)));
5791 emit_insn (gen_cmpint (target, ccreg));
5792 }
5793 else
5794 emit_move_insn (target, const0_rtx);
5795 }
5796 else if (TARGET_MVCLE)
5797 {
5798 emit_insn (gen_cmpmem_long (op0, op1, convert_to_mode (Pmode, len, 1)));
5799 emit_insn (gen_cmpint (target, ccreg));
5800 }
5801 else
5802 {
5803 rtx addr0, addr1, count, blocks, temp;
5804 rtx_code_label *loop_start_label = gen_label_rtx ();
5805 rtx_code_label *loop_end_label = gen_label_rtx ();
5806 rtx_code_label *end_label = gen_label_rtx ();
5807 machine_mode mode;
5808
5809 mode = GET_MODE (len);
5810 if (mode == VOIDmode)
5811 mode = Pmode;
5812
5813 addr0 = gen_reg_rtx (Pmode);
5814 addr1 = gen_reg_rtx (Pmode);
5815 count = gen_reg_rtx (mode);
5816 blocks = gen_reg_rtx (mode);
5817
5818 convert_move (count, len, 1);
5819 emit_cmp_and_jump_insns (count, const0_rtx,
5820 EQ, NULL_RTX, mode, 1, end_label);
5821
5822 emit_move_insn (addr0, force_operand (XEXP (op0, 0), NULL_RTX));
5823 emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX));
5824 op0 = change_address (op0, VOIDmode, addr0);
5825 op1 = change_address (op1, VOIDmode, addr1);
5826
5827 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5828 OPTAB_DIRECT);
5829 if (temp != count)
5830 emit_move_insn (count, temp);
5831
5832 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5833 OPTAB_DIRECT);
5834 if (temp != blocks)
5835 emit_move_insn (blocks, temp);
5836
5837 emit_cmp_and_jump_insns (blocks, const0_rtx,
5838 EQ, NULL_RTX, mode, 1, loop_end_label);
5839
5840 emit_label (loop_start_label);
5841
5842 if (TARGET_Z10
5843 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 512))
5844 {
5845 rtx prefetch;
5846
5847 /* Issue a read prefetch for the +2 cache line of operand 1. */
5848 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr0, GEN_INT (512)),
5849 const0_rtx, const0_rtx);
5850 emit_insn (prefetch);
5851 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5852
5853 /* Issue a read prefetch for the +2 cache line of operand 2. */
5854 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr1, GEN_INT (512)),
5855 const0_rtx, const0_rtx);
5856 emit_insn (prefetch);
5857 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5858 }
5859
5860 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (255)));
5861 temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
5862 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5863 gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx);
5864 temp = gen_rtx_SET (pc_rtx, temp);
5865 emit_jump_insn (temp);
5866
5867 s390_load_address (addr0,
5868 gen_rtx_PLUS (Pmode, addr0, GEN_INT (256)));
5869 s390_load_address (addr1,
5870 gen_rtx_PLUS (Pmode, addr1, GEN_INT (256)));
5871
5872 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5873 OPTAB_DIRECT);
5874 if (temp != blocks)
5875 emit_move_insn (blocks, temp);
5876
5877 emit_cmp_and_jump_insns (blocks, const0_rtx,
5878 EQ, NULL_RTX, mode, 1, loop_end_label);
5879
5880 emit_jump (loop_start_label);
5881 emit_label (loop_end_label);
5882
5883 emit_insn (gen_cmpmem_short (op0, op1,
5884 convert_to_mode (Pmode, count, 1)));
5885 emit_label (end_label);
5886
5887 emit_insn (gen_cmpint (target, ccreg));
5888 }
5889 return true;
5890 }
5891
5892 /* Emit a conditional jump to LABEL for condition code mask MASK using
5893 comparsion operator COMPARISON. Return the emitted jump insn. */
5894
5895 static rtx_insn *
s390_emit_ccraw_jump(HOST_WIDE_INT mask,enum rtx_code comparison,rtx label)5896 s390_emit_ccraw_jump (HOST_WIDE_INT mask, enum rtx_code comparison, rtx label)
5897 {
5898 rtx temp;
5899
5900 gcc_assert (comparison == EQ || comparison == NE);
5901 gcc_assert (mask > 0 && mask < 15);
5902
5903 temp = gen_rtx_fmt_ee (comparison, VOIDmode,
5904 gen_rtx_REG (CCRAWmode, CC_REGNUM), GEN_INT (mask));
5905 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5906 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
5907 temp = gen_rtx_SET (pc_rtx, temp);
5908 return emit_jump_insn (temp);
5909 }
5910
5911 /* Emit the instructions to implement strlen of STRING and store the
5912 result in TARGET. The string has the known ALIGNMENT. This
5913 version uses vector instructions and is therefore not appropriate
5914 for targets prior to z13. */
5915
5916 void
s390_expand_vec_strlen(rtx target,rtx string,rtx alignment)5917 s390_expand_vec_strlen (rtx target, rtx string, rtx alignment)
5918 {
5919 rtx highest_index_to_load_reg = gen_reg_rtx (Pmode);
5920 rtx str_reg = gen_reg_rtx (V16QImode);
5921 rtx str_addr_base_reg = gen_reg_rtx (Pmode);
5922 rtx str_idx_reg = gen_reg_rtx (Pmode);
5923 rtx result_reg = gen_reg_rtx (V16QImode);
5924 rtx is_aligned_label = gen_label_rtx ();
5925 rtx into_loop_label = NULL_RTX;
5926 rtx loop_start_label = gen_label_rtx ();
5927 rtx temp;
5928 rtx len = gen_reg_rtx (QImode);
5929 rtx cond;
5930
5931 s390_load_address (str_addr_base_reg, XEXP (string, 0));
5932 emit_move_insn (str_idx_reg, const0_rtx);
5933
5934 if (INTVAL (alignment) < 16)
5935 {
5936 /* Check whether the address happens to be aligned properly so
5937 jump directly to the aligned loop. */
5938 emit_cmp_and_jump_insns (gen_rtx_AND (Pmode,
5939 str_addr_base_reg, GEN_INT (15)),
5940 const0_rtx, EQ, NULL_RTX,
5941 Pmode, 1, is_aligned_label);
5942
5943 temp = gen_reg_rtx (Pmode);
5944 temp = expand_binop (Pmode, and_optab, str_addr_base_reg,
5945 GEN_INT (15), temp, 1, OPTAB_DIRECT);
5946 gcc_assert (REG_P (temp));
5947 highest_index_to_load_reg =
5948 expand_binop (Pmode, sub_optab, GEN_INT (15), temp,
5949 highest_index_to_load_reg, 1, OPTAB_DIRECT);
5950 gcc_assert (REG_P (highest_index_to_load_reg));
5951 emit_insn (gen_vllv16qi (str_reg,
5952 convert_to_mode (SImode, highest_index_to_load_reg, 1),
5953 gen_rtx_MEM (BLKmode, str_addr_base_reg)));
5954
5955 into_loop_label = gen_label_rtx ();
5956 s390_emit_jump (into_loop_label, NULL_RTX);
5957 emit_barrier ();
5958 }
5959
5960 emit_label (is_aligned_label);
5961 LABEL_NUSES (is_aligned_label) = INTVAL (alignment) < 16 ? 2 : 1;
5962
5963 /* Reaching this point we are only performing 16 bytes aligned
5964 loads. */
5965 emit_move_insn (highest_index_to_load_reg, GEN_INT (15));
5966
5967 emit_label (loop_start_label);
5968 LABEL_NUSES (loop_start_label) = 1;
5969
5970 /* Load 16 bytes of the string into VR. */
5971 emit_move_insn (str_reg,
5972 gen_rtx_MEM (V16QImode,
5973 gen_rtx_PLUS (Pmode, str_idx_reg,
5974 str_addr_base_reg)));
5975 if (into_loop_label != NULL_RTX)
5976 {
5977 emit_label (into_loop_label);
5978 LABEL_NUSES (into_loop_label) = 1;
5979 }
5980
5981 /* Increment string index by 16 bytes. */
5982 expand_binop (Pmode, add_optab, str_idx_reg, GEN_INT (16),
5983 str_idx_reg, 1, OPTAB_DIRECT);
5984
5985 emit_insn (gen_vec_vfenesv16qi (result_reg, str_reg, str_reg,
5986 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
5987
5988 add_int_reg_note (s390_emit_ccraw_jump (8, NE, loop_start_label),
5989 REG_BR_PROB,
5990 profile_probability::very_likely ().to_reg_br_prob_note ());
5991 emit_insn (gen_vec_extractv16qiqi (len, result_reg, GEN_INT (7)));
5992
5993 /* If the string pointer wasn't aligned we have loaded less then 16
5994 bytes and the remaining bytes got filled with zeros (by vll).
5995 Now we have to check whether the resulting index lies within the
5996 bytes actually part of the string. */
5997
5998 cond = s390_emit_compare (GT, convert_to_mode (Pmode, len, 1),
5999 highest_index_to_load_reg);
6000 s390_load_address (highest_index_to_load_reg,
6001 gen_rtx_PLUS (Pmode, highest_index_to_load_reg,
6002 const1_rtx));
6003 if (TARGET_64BIT)
6004 emit_insn (gen_movdicc (str_idx_reg, cond,
6005 highest_index_to_load_reg, str_idx_reg));
6006 else
6007 emit_insn (gen_movsicc (str_idx_reg, cond,
6008 highest_index_to_load_reg, str_idx_reg));
6009
6010 add_reg_br_prob_note (s390_emit_jump (is_aligned_label, cond),
6011 profile_probability::very_unlikely ());
6012
6013 expand_binop (Pmode, add_optab, str_idx_reg,
6014 GEN_INT (-16), str_idx_reg, 1, OPTAB_DIRECT);
6015 /* FIXME: len is already zero extended - so avoid the llgcr emitted
6016 here. */
6017 temp = expand_binop (Pmode, add_optab, str_idx_reg,
6018 convert_to_mode (Pmode, len, 1),
6019 target, 1, OPTAB_DIRECT);
6020 if (temp != target)
6021 emit_move_insn (target, temp);
6022 }
6023
6024 void
s390_expand_vec_movstr(rtx result,rtx dst,rtx src)6025 s390_expand_vec_movstr (rtx result, rtx dst, rtx src)
6026 {
6027 rtx temp = gen_reg_rtx (Pmode);
6028 rtx src_addr = XEXP (src, 0);
6029 rtx dst_addr = XEXP (dst, 0);
6030 rtx src_addr_reg = gen_reg_rtx (Pmode);
6031 rtx dst_addr_reg = gen_reg_rtx (Pmode);
6032 rtx offset = gen_reg_rtx (Pmode);
6033 rtx vsrc = gen_reg_rtx (V16QImode);
6034 rtx vpos = gen_reg_rtx (V16QImode);
6035 rtx loadlen = gen_reg_rtx (SImode);
6036 rtx gpos_qi = gen_reg_rtx(QImode);
6037 rtx gpos = gen_reg_rtx (SImode);
6038 rtx done_label = gen_label_rtx ();
6039 rtx loop_label = gen_label_rtx ();
6040 rtx exit_label = gen_label_rtx ();
6041 rtx full_label = gen_label_rtx ();
6042
6043 /* Perform a quick check for string ending on the first up to 16
6044 bytes and exit early if successful. */
6045
6046 emit_insn (gen_vlbb (vsrc, src, GEN_INT (6)));
6047 emit_insn (gen_lcbb (loadlen, src_addr, GEN_INT (6)));
6048 emit_insn (gen_vfenezv16qi (vpos, vsrc, vsrc));
6049 emit_insn (gen_vec_extractv16qiqi (gpos_qi, vpos, GEN_INT (7)));
6050 emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
6051 /* gpos is the byte index if a zero was found and 16 otherwise.
6052 So if it is lower than the loaded bytes we have a hit. */
6053 emit_cmp_and_jump_insns (gpos, loadlen, GE, NULL_RTX, SImode, 1,
6054 full_label);
6055 emit_insn (gen_vstlv16qi (vsrc, gpos, dst));
6056
6057 force_expand_binop (Pmode, add_optab, dst_addr, gpos, result,
6058 1, OPTAB_DIRECT);
6059 emit_jump (exit_label);
6060 emit_barrier ();
6061
6062 emit_label (full_label);
6063 LABEL_NUSES (full_label) = 1;
6064
6065 /* Calculate `offset' so that src + offset points to the last byte
6066 before 16 byte alignment. */
6067
6068 /* temp = src_addr & 0xf */
6069 force_expand_binop (Pmode, and_optab, src_addr, GEN_INT (15), temp,
6070 1, OPTAB_DIRECT);
6071
6072 /* offset = 0xf - temp */
6073 emit_move_insn (offset, GEN_INT (15));
6074 force_expand_binop (Pmode, sub_optab, offset, temp, offset,
6075 1, OPTAB_DIRECT);
6076
6077 /* Store `offset' bytes in the dstination string. The quick check
6078 has loaded at least `offset' bytes into vsrc. */
6079
6080 emit_insn (gen_vstlv16qi (vsrc, gen_lowpart (SImode, offset), dst));
6081
6082 /* Advance to the next byte to be loaded. */
6083 force_expand_binop (Pmode, add_optab, offset, const1_rtx, offset,
6084 1, OPTAB_DIRECT);
6085
6086 /* Make sure the addresses are single regs which can be used as a
6087 base. */
6088 emit_move_insn (src_addr_reg, src_addr);
6089 emit_move_insn (dst_addr_reg, dst_addr);
6090
6091 /* MAIN LOOP */
6092
6093 emit_label (loop_label);
6094 LABEL_NUSES (loop_label) = 1;
6095
6096 emit_move_insn (vsrc,
6097 gen_rtx_MEM (V16QImode,
6098 gen_rtx_PLUS (Pmode, src_addr_reg, offset)));
6099
6100 emit_insn (gen_vec_vfenesv16qi (vpos, vsrc, vsrc,
6101 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
6102 add_int_reg_note (s390_emit_ccraw_jump (8, EQ, done_label),
6103 REG_BR_PROB, profile_probability::very_unlikely ()
6104 .to_reg_br_prob_note ());
6105
6106 emit_move_insn (gen_rtx_MEM (V16QImode,
6107 gen_rtx_PLUS (Pmode, dst_addr_reg, offset)),
6108 vsrc);
6109 /* offset += 16 */
6110 force_expand_binop (Pmode, add_optab, offset, GEN_INT (16),
6111 offset, 1, OPTAB_DIRECT);
6112
6113 emit_jump (loop_label);
6114 emit_barrier ();
6115
6116 /* REGULAR EXIT */
6117
6118 /* We are done. Add the offset of the zero character to the dst_addr
6119 pointer to get the result. */
6120
6121 emit_label (done_label);
6122 LABEL_NUSES (done_label) = 1;
6123
6124 force_expand_binop (Pmode, add_optab, dst_addr_reg, offset, dst_addr_reg,
6125 1, OPTAB_DIRECT);
6126
6127 emit_insn (gen_vec_extractv16qiqi (gpos_qi, vpos, GEN_INT (7)));
6128 emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
6129
6130 emit_insn (gen_vstlv16qi (vsrc, gpos, gen_rtx_MEM (BLKmode, dst_addr_reg)));
6131
6132 force_expand_binop (Pmode, add_optab, dst_addr_reg, gpos, result,
6133 1, OPTAB_DIRECT);
6134
6135 /* EARLY EXIT */
6136
6137 emit_label (exit_label);
6138 LABEL_NUSES (exit_label) = 1;
6139 }
6140
6141
6142 /* Expand conditional increment or decrement using alc/slb instructions.
6143 Should generate code setting DST to either SRC or SRC + INCREMENT,
6144 depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
6145 Returns true if successful, false otherwise.
6146
6147 That makes it possible to implement some if-constructs without jumps e.g.:
6148 (borrow = CC0 | CC1 and carry = CC2 | CC3)
6149 unsigned int a, b, c;
6150 if (a < b) c++; -> CCU b > a -> CC2; c += carry;
6151 if (a < b) c--; -> CCL3 a - b -> borrow; c -= borrow;
6152 if (a <= b) c++; -> CCL3 b - a -> borrow; c += carry;
6153 if (a <= b) c--; -> CCU a <= b -> borrow; c -= borrow;
6154
6155 Checks for EQ and NE with a nonzero value need an additional xor e.g.:
6156 if (a == b) c++; -> CCL3 a ^= b; 0 - a -> borrow; c += carry;
6157 if (a == b) c--; -> CCU a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
6158 if (a != b) c++; -> CCU a ^= b; a > 0 -> CC2; c += carry;
6159 if (a != b) c--; -> CCL3 a ^= b; 0 - a -> borrow; c -= borrow; */
6160
6161 bool
s390_expand_addcc(enum rtx_code cmp_code,rtx cmp_op0,rtx cmp_op1,rtx dst,rtx src,rtx increment)6162 s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1,
6163 rtx dst, rtx src, rtx increment)
6164 {
6165 machine_mode cmp_mode;
6166 machine_mode cc_mode;
6167 rtx op_res;
6168 rtx insn;
6169 rtvec p;
6170 int ret;
6171
6172 if ((GET_MODE (cmp_op0) == SImode || GET_MODE (cmp_op0) == VOIDmode)
6173 && (GET_MODE (cmp_op1) == SImode || GET_MODE (cmp_op1) == VOIDmode))
6174 cmp_mode = SImode;
6175 else if ((GET_MODE (cmp_op0) == DImode || GET_MODE (cmp_op0) == VOIDmode)
6176 && (GET_MODE (cmp_op1) == DImode || GET_MODE (cmp_op1) == VOIDmode))
6177 cmp_mode = DImode;
6178 else
6179 return false;
6180
6181 /* Try ADD LOGICAL WITH CARRY. */
6182 if (increment == const1_rtx)
6183 {
6184 /* Determine CC mode to use. */
6185 if (cmp_code == EQ || cmp_code == NE)
6186 {
6187 if (cmp_op1 != const0_rtx)
6188 {
6189 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
6190 NULL_RTX, 0, OPTAB_WIDEN);
6191 cmp_op1 = const0_rtx;
6192 }
6193
6194 cmp_code = cmp_code == EQ ? LEU : GTU;
6195 }
6196
6197 if (cmp_code == LTU || cmp_code == LEU)
6198 {
6199 rtx tem = cmp_op0;
6200 cmp_op0 = cmp_op1;
6201 cmp_op1 = tem;
6202 cmp_code = swap_condition (cmp_code);
6203 }
6204
6205 switch (cmp_code)
6206 {
6207 case GTU:
6208 cc_mode = CCUmode;
6209 break;
6210
6211 case GEU:
6212 cc_mode = CCL3mode;
6213 break;
6214
6215 default:
6216 return false;
6217 }
6218
6219 /* Emit comparison instruction pattern. */
6220 if (!register_operand (cmp_op0, cmp_mode))
6221 cmp_op0 = force_reg (cmp_mode, cmp_op0);
6222
6223 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
6224 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
6225 /* We use insn_invalid_p here to add clobbers if required. */
6226 ret = insn_invalid_p (emit_insn (insn), false);
6227 gcc_assert (!ret);
6228
6229 /* Emit ALC instruction pattern. */
6230 op_res = gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
6231 gen_rtx_REG (cc_mode, CC_REGNUM),
6232 const0_rtx);
6233
6234 if (src != const0_rtx)
6235 {
6236 if (!register_operand (src, GET_MODE (dst)))
6237 src = force_reg (GET_MODE (dst), src);
6238
6239 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, src);
6240 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, const0_rtx);
6241 }
6242
6243 p = rtvec_alloc (2);
6244 RTVEC_ELT (p, 0) =
6245 gen_rtx_SET (dst, op_res);
6246 RTVEC_ELT (p, 1) =
6247 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6248 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
6249
6250 return true;
6251 }
6252
6253 /* Try SUBTRACT LOGICAL WITH BORROW. */
6254 if (increment == constm1_rtx)
6255 {
6256 /* Determine CC mode to use. */
6257 if (cmp_code == EQ || cmp_code == NE)
6258 {
6259 if (cmp_op1 != const0_rtx)
6260 {
6261 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
6262 NULL_RTX, 0, OPTAB_WIDEN);
6263 cmp_op1 = const0_rtx;
6264 }
6265
6266 cmp_code = cmp_code == EQ ? LEU : GTU;
6267 }
6268
6269 if (cmp_code == GTU || cmp_code == GEU)
6270 {
6271 rtx tem = cmp_op0;
6272 cmp_op0 = cmp_op1;
6273 cmp_op1 = tem;
6274 cmp_code = swap_condition (cmp_code);
6275 }
6276
6277 switch (cmp_code)
6278 {
6279 case LEU:
6280 cc_mode = CCUmode;
6281 break;
6282
6283 case LTU:
6284 cc_mode = CCL3mode;
6285 break;
6286
6287 default:
6288 return false;
6289 }
6290
6291 /* Emit comparison instruction pattern. */
6292 if (!register_operand (cmp_op0, cmp_mode))
6293 cmp_op0 = force_reg (cmp_mode, cmp_op0);
6294
6295 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
6296 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
6297 /* We use insn_invalid_p here to add clobbers if required. */
6298 ret = insn_invalid_p (emit_insn (insn), false);
6299 gcc_assert (!ret);
6300
6301 /* Emit SLB instruction pattern. */
6302 if (!register_operand (src, GET_MODE (dst)))
6303 src = force_reg (GET_MODE (dst), src);
6304
6305 op_res = gen_rtx_MINUS (GET_MODE (dst),
6306 gen_rtx_MINUS (GET_MODE (dst), src, const0_rtx),
6307 gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
6308 gen_rtx_REG (cc_mode, CC_REGNUM),
6309 const0_rtx));
6310 p = rtvec_alloc (2);
6311 RTVEC_ELT (p, 0) =
6312 gen_rtx_SET (dst, op_res);
6313 RTVEC_ELT (p, 1) =
6314 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6315 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
6316
6317 return true;
6318 }
6319
6320 return false;
6321 }
6322
6323 /* Expand code for the insv template. Return true if successful. */
6324
6325 bool
s390_expand_insv(rtx dest,rtx op1,rtx op2,rtx src)6326 s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
6327 {
6328 int bitsize = INTVAL (op1);
6329 int bitpos = INTVAL (op2);
6330 machine_mode mode = GET_MODE (dest);
6331 machine_mode smode;
6332 int smode_bsize, mode_bsize;
6333 rtx op, clobber;
6334
6335 if (bitsize + bitpos > GET_MODE_BITSIZE (mode))
6336 return false;
6337
6338 /* Generate INSERT IMMEDIATE (IILL et al). */
6339 /* (set (ze (reg)) (const_int)). */
6340 if (TARGET_ZARCH
6341 && register_operand (dest, word_mode)
6342 && (bitpos % 16) == 0
6343 && (bitsize % 16) == 0
6344 && const_int_operand (src, VOIDmode))
6345 {
6346 HOST_WIDE_INT val = INTVAL (src);
6347 int regpos = bitpos + bitsize;
6348
6349 while (regpos > bitpos)
6350 {
6351 machine_mode putmode;
6352 int putsize;
6353
6354 if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32))
6355 putmode = SImode;
6356 else
6357 putmode = HImode;
6358
6359 putsize = GET_MODE_BITSIZE (putmode);
6360 regpos -= putsize;
6361 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6362 GEN_INT (putsize),
6363 GEN_INT (regpos)),
6364 gen_int_mode (val, putmode));
6365 val >>= putsize;
6366 }
6367 gcc_assert (regpos == bitpos);
6368 return true;
6369 }
6370
6371 smode = smallest_int_mode_for_size (bitsize);
6372 smode_bsize = GET_MODE_BITSIZE (smode);
6373 mode_bsize = GET_MODE_BITSIZE (mode);
6374
6375 /* Generate STORE CHARACTERS UNDER MASK (STCM et al). */
6376 if (bitpos == 0
6377 && (bitsize % BITS_PER_UNIT) == 0
6378 && MEM_P (dest)
6379 && (register_operand (src, word_mode)
6380 || const_int_operand (src, VOIDmode)))
6381 {
6382 /* Emit standard pattern if possible. */
6383 if (smode_bsize == bitsize)
6384 {
6385 emit_move_insn (adjust_address (dest, smode, 0),
6386 gen_lowpart (smode, src));
6387 return true;
6388 }
6389
6390 /* (set (ze (mem)) (const_int)). */
6391 else if (const_int_operand (src, VOIDmode))
6392 {
6393 int size = bitsize / BITS_PER_UNIT;
6394 rtx src_mem = adjust_address (force_const_mem (word_mode, src),
6395 BLKmode,
6396 UNITS_PER_WORD - size);
6397
6398 dest = adjust_address (dest, BLKmode, 0);
6399 set_mem_size (dest, size);
6400 s390_expand_cpymem (dest, src_mem, GEN_INT (size));
6401 return true;
6402 }
6403
6404 /* (set (ze (mem)) (reg)). */
6405 else if (register_operand (src, word_mode))
6406 {
6407 if (bitsize <= 32)
6408 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1,
6409 const0_rtx), src);
6410 else
6411 {
6412 /* Emit st,stcmh sequence. */
6413 int stcmh_width = bitsize - 32;
6414 int size = stcmh_width / BITS_PER_UNIT;
6415
6416 emit_move_insn (adjust_address (dest, SImode, size),
6417 gen_lowpart (SImode, src));
6418 set_mem_size (dest, size);
6419 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6420 GEN_INT (stcmh_width),
6421 const0_rtx),
6422 gen_rtx_LSHIFTRT (word_mode, src, GEN_INT (32)));
6423 }
6424 return true;
6425 }
6426 }
6427
6428 /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al). */
6429 if ((bitpos % BITS_PER_UNIT) == 0
6430 && (bitsize % BITS_PER_UNIT) == 0
6431 && (bitpos & 32) == ((bitpos + bitsize - 1) & 32)
6432 && MEM_P (src)
6433 && (mode == DImode || mode == SImode)
6434 && register_operand (dest, mode))
6435 {
6436 /* Emit a strict_low_part pattern if possible. */
6437 if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
6438 {
6439 op = gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (smode, dest));
6440 op = gen_rtx_SET (op, gen_lowpart (smode, src));
6441 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6442 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)));
6443 return true;
6444 }
6445
6446 /* ??? There are more powerful versions of ICM that are not
6447 completely represented in the md file. */
6448 }
6449
6450 /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al). */
6451 if (TARGET_Z10 && (mode == DImode || mode == SImode))
6452 {
6453 machine_mode mode_s = GET_MODE (src);
6454
6455 if (CONSTANT_P (src))
6456 {
6457 /* For constant zero values the representation with AND
6458 appears to be folded in more situations than the (set
6459 (zero_extract) ...).
6460 We only do this when the start and end of the bitfield
6461 remain in the same SImode chunk. That way nihf or nilf
6462 can be used.
6463 The AND patterns might still generate a risbg for this. */
6464 if (src == const0_rtx && bitpos / 32 == (bitpos + bitsize - 1) / 32)
6465 return false;
6466 else
6467 src = force_reg (mode, src);
6468 }
6469 else if (mode_s != mode)
6470 {
6471 gcc_assert (GET_MODE_BITSIZE (mode_s) >= bitsize);
6472 src = force_reg (mode_s, src);
6473 src = gen_lowpart (mode, src);
6474 }
6475
6476 op = gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2),
6477 op = gen_rtx_SET (op, src);
6478
6479 if (!TARGET_ZEC12)
6480 {
6481 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6482 op = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber));
6483 }
6484 emit_insn (op);
6485
6486 return true;
6487 }
6488
6489 return false;
6490 }
6491
6492 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
6493 register that holds VAL of mode MODE shifted by COUNT bits. */
6494
6495 static inline rtx
s390_expand_mask_and_shift(rtx val,machine_mode mode,rtx count)6496 s390_expand_mask_and_shift (rtx val, machine_mode mode, rtx count)
6497 {
6498 val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
6499 NULL_RTX, 1, OPTAB_DIRECT);
6500 return expand_simple_binop (SImode, ASHIFT, val, count,
6501 NULL_RTX, 1, OPTAB_DIRECT);
6502 }
6503
6504 /* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
6505 the result in TARGET. */
6506
6507 void
s390_expand_vec_compare(rtx target,enum rtx_code cond,rtx cmp_op1,rtx cmp_op2)6508 s390_expand_vec_compare (rtx target, enum rtx_code cond,
6509 rtx cmp_op1, rtx cmp_op2)
6510 {
6511 machine_mode mode = GET_MODE (target);
6512 bool neg_p = false, swap_p = false;
6513 rtx tmp;
6514
6515 if (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_VECTOR_FLOAT)
6516 {
6517 switch (cond)
6518 {
6519 /* NE a != b -> !(a == b) */
6520 case NE: cond = EQ; neg_p = true; break;
6521 case UNGT:
6522 emit_insn (gen_vec_cmpungt (target, cmp_op1, cmp_op2));
6523 return;
6524 case UNGE:
6525 emit_insn (gen_vec_cmpunge (target, cmp_op1, cmp_op2));
6526 return;
6527 case LE: cond = GE; swap_p = true; break;
6528 /* UNLE: (a u<= b) -> (b u>= a). */
6529 case UNLE:
6530 emit_insn (gen_vec_cmpunge (target, cmp_op2, cmp_op1));
6531 return;
6532 /* LT: a < b -> b > a */
6533 case LT: cond = GT; swap_p = true; break;
6534 /* UNLT: (a u< b) -> (b u> a). */
6535 case UNLT:
6536 emit_insn (gen_vec_cmpungt (target, cmp_op2, cmp_op1));
6537 return;
6538 case UNEQ:
6539 emit_insn (gen_vec_cmpuneq (target, cmp_op1, cmp_op2));
6540 return;
6541 case LTGT:
6542 emit_insn (gen_vec_cmpltgt (target, cmp_op1, cmp_op2));
6543 return;
6544 case ORDERED:
6545 emit_insn (gen_vec_cmpordered (target, cmp_op1, cmp_op2));
6546 return;
6547 case UNORDERED:
6548 emit_insn (gen_vec_cmpunordered (target, cmp_op1, cmp_op2));
6549 return;
6550 default: break;
6551 }
6552 }
6553 else
6554 {
6555 switch (cond)
6556 {
6557 /* NE: a != b -> !(a == b) */
6558 case NE: cond = EQ; neg_p = true; break;
6559 /* GE: a >= b -> !(b > a) */
6560 case GE: cond = GT; neg_p = true; swap_p = true; break;
6561 /* GEU: a >= b -> !(b > a) */
6562 case GEU: cond = GTU; neg_p = true; swap_p = true; break;
6563 /* LE: a <= b -> !(a > b) */
6564 case LE: cond = GT; neg_p = true; break;
6565 /* LEU: a <= b -> !(a > b) */
6566 case LEU: cond = GTU; neg_p = true; break;
6567 /* LT: a < b -> b > a */
6568 case LT: cond = GT; swap_p = true; break;
6569 /* LTU: a < b -> b > a */
6570 case LTU: cond = GTU; swap_p = true; break;
6571 default: break;
6572 }
6573 }
6574
6575 if (swap_p)
6576 {
6577 tmp = cmp_op1; cmp_op1 = cmp_op2; cmp_op2 = tmp;
6578 }
6579
6580 emit_insn (gen_rtx_SET (target, gen_rtx_fmt_ee (cond,
6581 mode,
6582 cmp_op1, cmp_op2)));
6583 if (neg_p)
6584 emit_insn (gen_rtx_SET (target, gen_rtx_NOT (mode, target)));
6585 }
6586
6587 /* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into
6588 TARGET if either all (ALL_P is true) or any (ALL_P is false) of the
6589 elements in CMP1 and CMP2 fulfill the comparison.
6590 This function is only used to emit patterns for the vx builtins and
6591 therefore only handles comparison codes required by the
6592 builtins. */
6593 void
s390_expand_vec_compare_cc(rtx target,enum rtx_code code,rtx cmp1,rtx cmp2,bool all_p)6594 s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
6595 rtx cmp1, rtx cmp2, bool all_p)
6596 {
6597 machine_mode cc_producer_mode, cc_consumer_mode, scratch_mode;
6598 rtx tmp_reg = gen_reg_rtx (SImode);
6599 bool swap_p = false;
6600
6601 if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_INT)
6602 {
6603 switch (code)
6604 {
6605 case EQ:
6606 case NE:
6607 cc_producer_mode = CCVEQmode;
6608 break;
6609 case GE:
6610 case LT:
6611 code = swap_condition (code);
6612 swap_p = true;
6613 /* fallthrough */
6614 case GT:
6615 case LE:
6616 cc_producer_mode = CCVIHmode;
6617 break;
6618 case GEU:
6619 case LTU:
6620 code = swap_condition (code);
6621 swap_p = true;
6622 /* fallthrough */
6623 case GTU:
6624 case LEU:
6625 cc_producer_mode = CCVIHUmode;
6626 break;
6627 default:
6628 gcc_unreachable ();
6629 }
6630
6631 scratch_mode = GET_MODE (cmp1);
6632 /* These codes represent inverted CC interpretations. Inverting
6633 an ALL CC mode results in an ANY CC mode and the other way
6634 around. Invert the all_p flag here to compensate for
6635 that. */
6636 if (code == NE || code == LE || code == LEU)
6637 all_p = !all_p;
6638
6639 cc_consumer_mode = all_p ? CCVIALLmode : CCVIANYmode;
6640 }
6641 else if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_FLOAT)
6642 {
6643 bool inv_p = false;
6644
6645 switch (code)
6646 {
6647 case EQ: cc_producer_mode = CCVEQmode; break;
6648 case NE: cc_producer_mode = CCVEQmode; inv_p = true; break;
6649 case GT: cc_producer_mode = CCVFHmode; break;
6650 case GE: cc_producer_mode = CCVFHEmode; break;
6651 case UNLE: cc_producer_mode = CCVFHmode; inv_p = true; break;
6652 case UNLT: cc_producer_mode = CCVFHEmode; inv_p = true; break;
6653 case LT: cc_producer_mode = CCVFHmode; code = GT; swap_p = true; break;
6654 case LE: cc_producer_mode = CCVFHEmode; code = GE; swap_p = true; break;
6655 default: gcc_unreachable ();
6656 }
6657 scratch_mode = related_int_vector_mode (GET_MODE (cmp1)).require ();
6658
6659 if (inv_p)
6660 all_p = !all_p;
6661
6662 cc_consumer_mode = all_p ? CCVFALLmode : CCVFANYmode;
6663 }
6664 else
6665 gcc_unreachable ();
6666
6667 if (swap_p)
6668 {
6669 rtx tmp = cmp2;
6670 cmp2 = cmp1;
6671 cmp1 = tmp;
6672 }
6673
6674 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6675 gen_rtvec (2, gen_rtx_SET (
6676 gen_rtx_REG (cc_producer_mode, CC_REGNUM),
6677 gen_rtx_COMPARE (cc_producer_mode, cmp1, cmp2)),
6678 gen_rtx_CLOBBER (VOIDmode,
6679 gen_rtx_SCRATCH (scratch_mode)))));
6680 emit_move_insn (target, const0_rtx);
6681 emit_move_insn (tmp_reg, const1_rtx);
6682
6683 emit_move_insn (target,
6684 gen_rtx_IF_THEN_ELSE (SImode,
6685 gen_rtx_fmt_ee (code, VOIDmode,
6686 gen_rtx_REG (cc_consumer_mode, CC_REGNUM),
6687 const0_rtx),
6688 tmp_reg, target));
6689 }
6690
6691 /* Invert the comparison CODE applied to a CC mode. This is only safe
6692 if we know whether there result was created by a floating point
6693 compare or not. For the CCV modes this is encoded as part of the
6694 mode. */
6695 enum rtx_code
s390_reverse_condition(machine_mode mode,enum rtx_code code)6696 s390_reverse_condition (machine_mode mode, enum rtx_code code)
6697 {
6698 /* Reversal of FP compares takes care -- an ordered compare
6699 becomes an unordered compare and vice versa. */
6700 if (mode == CCVFALLmode || mode == CCVFANYmode || mode == CCSFPSmode)
6701 return reverse_condition_maybe_unordered (code);
6702 else if (mode == CCVIALLmode || mode == CCVIANYmode)
6703 return reverse_condition (code);
6704 else
6705 gcc_unreachable ();
6706 }
6707
6708 /* Generate a vector comparison expression loading either elements of
6709 THEN or ELS into TARGET depending on the comparison COND of CMP_OP1
6710 and CMP_OP2. */
6711
6712 void
s390_expand_vcond(rtx target,rtx then,rtx els,enum rtx_code cond,rtx cmp_op1,rtx cmp_op2)6713 s390_expand_vcond (rtx target, rtx then, rtx els,
6714 enum rtx_code cond, rtx cmp_op1, rtx cmp_op2)
6715 {
6716 rtx tmp;
6717 machine_mode result_mode;
6718 rtx result_target;
6719
6720 machine_mode target_mode = GET_MODE (target);
6721 machine_mode cmp_mode = GET_MODE (cmp_op1);
6722 rtx op = (cond == LT) ? els : then;
6723
6724 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
6725 and x < 0 ? 1 : 0 into (unsigned) x >> 31. Likewise
6726 for short and byte (x >> 15 and x >> 7 respectively). */
6727 if ((cond == LT || cond == GE)
6728 && target_mode == cmp_mode
6729 && cmp_op2 == CONST0_RTX (cmp_mode)
6730 && op == CONST0_RTX (target_mode)
6731 && s390_vector_mode_supported_p (target_mode)
6732 && GET_MODE_CLASS (target_mode) == MODE_VECTOR_INT)
6733 {
6734 rtx negop = (cond == LT) ? then : els;
6735
6736 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (target_mode)) - 1;
6737
6738 /* if x < 0 ? 1 : 0 or if x >= 0 ? 0 : 1 */
6739 if (negop == CONST1_RTX (target_mode))
6740 {
6741 rtx res = expand_simple_binop (cmp_mode, LSHIFTRT, cmp_op1,
6742 GEN_INT (shift), target,
6743 1, OPTAB_DIRECT);
6744 if (res != target)
6745 emit_move_insn (target, res);
6746 return;
6747 }
6748
6749 /* if x < 0 ? -1 : 0 or if x >= 0 ? 0 : -1 */
6750 else if (all_ones_operand (negop, target_mode))
6751 {
6752 rtx res = expand_simple_binop (cmp_mode, ASHIFTRT, cmp_op1,
6753 GEN_INT (shift), target,
6754 0, OPTAB_DIRECT);
6755 if (res != target)
6756 emit_move_insn (target, res);
6757 return;
6758 }
6759 }
6760
6761 /* We always use an integral type vector to hold the comparison
6762 result. */
6763 result_mode = related_int_vector_mode (cmp_mode).require ();
6764 result_target = gen_reg_rtx (result_mode);
6765
6766 /* We allow vector immediates as comparison operands that
6767 can be handled by the optimization above but not by the
6768 following code. Hence, force them into registers here. */
6769 if (!REG_P (cmp_op1))
6770 cmp_op1 = force_reg (GET_MODE (cmp_op1), cmp_op1);
6771
6772 if (!REG_P (cmp_op2))
6773 cmp_op2 = force_reg (GET_MODE (cmp_op2), cmp_op2);
6774
6775 s390_expand_vec_compare (result_target, cond,
6776 cmp_op1, cmp_op2);
6777
6778 /* If the results are supposed to be either -1 or 0 we are done
6779 since this is what our compare instructions generate anyway. */
6780 if (all_ones_operand (then, GET_MODE (then))
6781 && const0_operand (els, GET_MODE (els)))
6782 {
6783 emit_move_insn (target, gen_rtx_SUBREG (target_mode,
6784 result_target, 0));
6785 return;
6786 }
6787
6788 /* Otherwise we will do a vsel afterwards. */
6789 /* This gets triggered e.g.
6790 with gcc.c-torture/compile/pr53410-1.c */
6791 if (!REG_P (then))
6792 then = force_reg (target_mode, then);
6793
6794 if (!REG_P (els))
6795 els = force_reg (target_mode, els);
6796
6797 tmp = gen_rtx_fmt_ee (EQ, VOIDmode,
6798 result_target,
6799 CONST0_RTX (result_mode));
6800
6801 /* We compared the result against zero above so we have to swap then
6802 and els here. */
6803 tmp = gen_rtx_IF_THEN_ELSE (target_mode, tmp, els, then);
6804
6805 gcc_assert (target_mode == GET_MODE (then));
6806 emit_insn (gen_rtx_SET (target, tmp));
6807 }
6808
6809 /* Emit the RTX necessary to initialize the vector TARGET with values
6810 in VALS. */
6811 void
s390_expand_vec_init(rtx target,rtx vals)6812 s390_expand_vec_init (rtx target, rtx vals)
6813 {
6814 machine_mode mode = GET_MODE (target);
6815 machine_mode inner_mode = GET_MODE_INNER (mode);
6816 int n_elts = GET_MODE_NUNITS (mode);
6817 bool all_same = true, all_regs = true, all_const_int = true;
6818 rtx x;
6819 int i;
6820
6821 for (i = 0; i < n_elts; ++i)
6822 {
6823 x = XVECEXP (vals, 0, i);
6824
6825 if (!CONST_INT_P (x))
6826 all_const_int = false;
6827
6828 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6829 all_same = false;
6830
6831 if (!REG_P (x))
6832 all_regs = false;
6833 }
6834
6835 /* Use vector gen mask or vector gen byte mask if possible. */
6836 if (all_same && all_const_int
6837 && (XVECEXP (vals, 0, 0) == const0_rtx
6838 || s390_contiguous_bitmask_vector_p (XVECEXP (vals, 0, 0),
6839 NULL, NULL)
6840 || s390_bytemask_vector_p (XVECEXP (vals, 0, 0), NULL)))
6841 {
6842 emit_insn (gen_rtx_SET (target,
6843 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))));
6844 return;
6845 }
6846
6847 /* Use vector replicate instructions. vlrep/vrepi/vrep */
6848 if (all_same)
6849 {
6850 rtx elem = XVECEXP (vals, 0, 0);
6851
6852 /* vec_splats accepts general_operand as source. */
6853 if (!general_operand (elem, GET_MODE (elem)))
6854 elem = force_reg (inner_mode, elem);
6855
6856 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, elem)));
6857 return;
6858 }
6859
6860 if (all_regs
6861 && REG_P (target)
6862 && n_elts == 2
6863 && GET_MODE_SIZE (inner_mode) == 8)
6864 {
6865 /* Use vector load pair. */
6866 emit_insn (gen_rtx_SET (target,
6867 gen_rtx_VEC_CONCAT (mode,
6868 XVECEXP (vals, 0, 0),
6869 XVECEXP (vals, 0, 1))));
6870 return;
6871 }
6872
6873 /* Use vector load logical element and zero. */
6874 if (TARGET_VXE && (mode == V4SImode || mode == V4SFmode))
6875 {
6876 bool found = true;
6877
6878 x = XVECEXP (vals, 0, 0);
6879 if (memory_operand (x, inner_mode))
6880 {
6881 for (i = 1; i < n_elts; ++i)
6882 found = found && XVECEXP (vals, 0, i) == const0_rtx;
6883
6884 if (found)
6885 {
6886 machine_mode half_mode = (inner_mode == SFmode
6887 ? V2SFmode : V2SImode);
6888 emit_insn (gen_rtx_SET (target,
6889 gen_rtx_VEC_CONCAT (mode,
6890 gen_rtx_VEC_CONCAT (half_mode,
6891 x,
6892 const0_rtx),
6893 gen_rtx_VEC_CONCAT (half_mode,
6894 const0_rtx,
6895 const0_rtx))));
6896 return;
6897 }
6898 }
6899 }
6900
6901 /* We are about to set the vector elements one by one. Zero out the
6902 full register first in order to help the data flow framework to
6903 detect it as full VR set. */
6904 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
6905
6906 /* Unfortunately the vec_init expander is not allowed to fail. So
6907 we have to implement the fallback ourselves. */
6908 for (i = 0; i < n_elts; i++)
6909 {
6910 rtx elem = XVECEXP (vals, 0, i);
6911 if (!general_operand (elem, GET_MODE (elem)))
6912 elem = force_reg (inner_mode, elem);
6913
6914 emit_insn (gen_rtx_SET (target,
6915 gen_rtx_UNSPEC (mode,
6916 gen_rtvec (3, elem,
6917 GEN_INT (i), target),
6918 UNSPEC_VEC_SET)));
6919 }
6920 }
6921
6922 /* Structure to hold the initial parameters for a compare_and_swap operation
6923 in HImode and QImode. */
6924
6925 struct alignment_context
6926 {
6927 rtx memsi; /* SI aligned memory location. */
6928 rtx shift; /* Bit offset with regard to lsb. */
6929 rtx modemask; /* Mask of the HQImode shifted by SHIFT bits. */
6930 rtx modemaski; /* ~modemask */
6931 bool aligned; /* True if memory is aligned, false else. */
6932 };
6933
6934 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
6935 structure AC for transparent simplifying, if the memory alignment is known
6936 to be at least 32bit. MEM is the memory location for the actual operation
6937 and MODE its mode. */
6938
6939 static void
init_alignment_context(struct alignment_context * ac,rtx mem,machine_mode mode)6940 init_alignment_context (struct alignment_context *ac, rtx mem,
6941 machine_mode mode)
6942 {
6943 ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode));
6944 ac->aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode));
6945
6946 if (ac->aligned)
6947 ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned. */
6948 else
6949 {
6950 /* Alignment is unknown. */
6951 rtx byteoffset, addr, align;
6952
6953 /* Force the address into a register. */
6954 addr = force_reg (Pmode, XEXP (mem, 0));
6955
6956 /* Align it to SImode. */
6957 align = expand_simple_binop (Pmode, AND, addr,
6958 GEN_INT (-GET_MODE_SIZE (SImode)),
6959 NULL_RTX, 1, OPTAB_DIRECT);
6960 /* Generate MEM. */
6961 ac->memsi = gen_rtx_MEM (SImode, align);
6962 MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem);
6963 set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER);
6964 set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode));
6965
6966 /* Calculate shiftcount. */
6967 byteoffset = expand_simple_binop (Pmode, AND, addr,
6968 GEN_INT (GET_MODE_SIZE (SImode) - 1),
6969 NULL_RTX, 1, OPTAB_DIRECT);
6970 /* As we already have some offset, evaluate the remaining distance. */
6971 ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
6972 NULL_RTX, 1, OPTAB_DIRECT);
6973 }
6974
6975 /* Shift is the byte count, but we need the bitcount. */
6976 ac->shift = expand_simple_binop (SImode, ASHIFT, ac->shift, GEN_INT (3),
6977 NULL_RTX, 1, OPTAB_DIRECT);
6978
6979 /* Calculate masks. */
6980 ac->modemask = expand_simple_binop (SImode, ASHIFT,
6981 GEN_INT (GET_MODE_MASK (mode)),
6982 ac->shift, NULL_RTX, 1, OPTAB_DIRECT);
6983 ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask,
6984 NULL_RTX, 1);
6985 }
6986
6987 /* A subroutine of s390_expand_cs_hqi. Insert INS into VAL. If possible,
6988 use a single insv insn into SEQ2. Otherwise, put prep insns in SEQ1 and
6989 perform the merge in SEQ2. */
6990
6991 static rtx
s390_two_part_insv(struct alignment_context * ac,rtx * seq1,rtx * seq2,machine_mode mode,rtx val,rtx ins)6992 s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
6993 machine_mode mode, rtx val, rtx ins)
6994 {
6995 rtx tmp;
6996
6997 if (ac->aligned)
6998 {
6999 start_sequence ();
7000 tmp = copy_to_mode_reg (SImode, val);
7001 if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)),
7002 const0_rtx, ins))
7003 {
7004 *seq1 = NULL;
7005 *seq2 = get_insns ();
7006 end_sequence ();
7007 return tmp;
7008 }
7009 end_sequence ();
7010 }
7011
7012 /* Failed to use insv. Generate a two part shift and mask. */
7013 start_sequence ();
7014 tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
7015 *seq1 = get_insns ();
7016 end_sequence ();
7017
7018 start_sequence ();
7019 tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
7020 *seq2 = get_insns ();
7021 end_sequence ();
7022
7023 return tmp;
7024 }
7025
7026 /* Expand an atomic compare and swap operation for HImode and QImode. MEM is
7027 the memory location, CMP the old value to compare MEM with and NEW_RTX the
7028 value to set if CMP == MEM. */
7029
7030 static void
s390_expand_cs_hqi(machine_mode mode,rtx btarget,rtx vtarget,rtx mem,rtx cmp,rtx new_rtx,bool is_weak)7031 s390_expand_cs_hqi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
7032 rtx cmp, rtx new_rtx, bool is_weak)
7033 {
7034 struct alignment_context ac;
7035 rtx cmpv, newv, val, cc, seq0, seq1, seq2, seq3;
7036 rtx res = gen_reg_rtx (SImode);
7037 rtx_code_label *csloop = NULL, *csend = NULL;
7038
7039 gcc_assert (MEM_P (mem));
7040
7041 init_alignment_context (&ac, mem, mode);
7042
7043 /* Load full word. Subsequent loads are performed by CS. */
7044 val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski,
7045 NULL_RTX, 1, OPTAB_DIRECT);
7046
7047 /* Prepare insertions of cmp and new_rtx into the loaded value. When
7048 possible, we try to use insv to make this happen efficiently. If
7049 that fails we'll generate code both inside and outside the loop. */
7050 cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp);
7051 newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx);
7052
7053 if (seq0)
7054 emit_insn (seq0);
7055 if (seq1)
7056 emit_insn (seq1);
7057
7058 /* Start CS loop. */
7059 if (!is_weak)
7060 {
7061 /* Begin assuming success. */
7062 emit_move_insn (btarget, const1_rtx);
7063
7064 csloop = gen_label_rtx ();
7065 csend = gen_label_rtx ();
7066 emit_label (csloop);
7067 }
7068
7069 /* val = "<mem>00..0<mem>"
7070 * cmp = "00..0<cmp>00..0"
7071 * new = "00..0<new>00..0"
7072 */
7073
7074 emit_insn (seq2);
7075 emit_insn (seq3);
7076
7077 cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv, CCZ1mode);
7078 if (is_weak)
7079 emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1)));
7080 else
7081 {
7082 rtx tmp;
7083
7084 /* Jump to end if we're done (likely?). */
7085 s390_emit_jump (csend, cc);
7086
7087 /* Check for changes outside mode, and loop internal if so.
7088 Arrange the moves so that the compare is adjacent to the
7089 branch so that we can generate CRJ. */
7090 tmp = copy_to_reg (val);
7091 force_expand_binop (SImode, and_optab, res, ac.modemaski, val,
7092 1, OPTAB_DIRECT);
7093 cc = s390_emit_compare (NE, val, tmp);
7094 s390_emit_jump (csloop, cc);
7095
7096 /* Failed. */
7097 emit_move_insn (btarget, const0_rtx);
7098 emit_label (csend);
7099 }
7100
7101 /* Return the correct part of the bitfield. */
7102 convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
7103 NULL_RTX, 1, OPTAB_DIRECT), 1);
7104 }
7105
7106 /* Variant of s390_expand_cs for SI, DI and TI modes. */
7107 static void
s390_expand_cs_tdsi(machine_mode mode,rtx btarget,rtx vtarget,rtx mem,rtx cmp,rtx new_rtx,bool is_weak)7108 s390_expand_cs_tdsi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
7109 rtx cmp, rtx new_rtx, bool is_weak)
7110 {
7111 rtx output = vtarget;
7112 rtx_code_label *skip_cs_label = NULL;
7113 bool do_const_opt = false;
7114
7115 if (!register_operand (output, mode))
7116 output = gen_reg_rtx (mode);
7117
7118 /* If IS_WEAK is true and the INPUT value is a constant, compare the memory
7119 with the constant first and skip the compare_and_swap because its very
7120 expensive and likely to fail anyway.
7121 Note 1: This is done only for IS_WEAK. C11 allows optimizations that may
7122 cause spurious in that case.
7123 Note 2: It may be useful to do this also for non-constant INPUT.
7124 Note 3: Currently only targets with "load on condition" are supported
7125 (z196 and newer). */
7126
7127 if (TARGET_Z196
7128 && (mode == SImode || mode == DImode))
7129 do_const_opt = (is_weak && CONST_INT_P (cmp));
7130
7131 if (do_const_opt)
7132 {
7133 rtx cc = gen_rtx_REG (CCZmode, CC_REGNUM);
7134
7135 skip_cs_label = gen_label_rtx ();
7136 emit_move_insn (btarget, const0_rtx);
7137 if (CONST_INT_P (cmp) && INTVAL (cmp) == 0)
7138 {
7139 rtvec lt = rtvec_alloc (2);
7140
7141 /* Load-and-test + conditional jump. */
7142 RTVEC_ELT (lt, 0)
7143 = gen_rtx_SET (cc, gen_rtx_COMPARE (CCZmode, mem, cmp));
7144 RTVEC_ELT (lt, 1) = gen_rtx_SET (output, mem);
7145 emit_insn (gen_rtx_PARALLEL (VOIDmode, lt));
7146 }
7147 else
7148 {
7149 emit_move_insn (output, mem);
7150 emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (CCZmode, output, cmp)));
7151 }
7152 s390_emit_jump (skip_cs_label, gen_rtx_NE (VOIDmode, cc, const0_rtx));
7153 add_reg_br_prob_note (get_last_insn (),
7154 profile_probability::very_unlikely ());
7155 /* If the jump is not taken, OUTPUT is the expected value. */
7156 cmp = output;
7157 /* Reload newval to a register manually, *after* the compare and jump
7158 above. Otherwise Reload might place it before the jump. */
7159 }
7160 else
7161 cmp = force_reg (mode, cmp);
7162 new_rtx = force_reg (mode, new_rtx);
7163 s390_emit_compare_and_swap (EQ, output, mem, cmp, new_rtx,
7164 (do_const_opt) ? CCZmode : CCZ1mode);
7165 if (skip_cs_label != NULL)
7166 emit_label (skip_cs_label);
7167
7168 /* We deliberately accept non-register operands in the predicate
7169 to ensure the write back to the output operand happens *before*
7170 the store-flags code below. This makes it easier for combine
7171 to merge the store-flags code with a potential test-and-branch
7172 pattern following (immediately!) afterwards. */
7173 if (output != vtarget)
7174 emit_move_insn (vtarget, output);
7175
7176 if (do_const_opt)
7177 {
7178 rtx cc, cond, ite;
7179
7180 /* Do not use gen_cstorecc4 here because it writes either 1 or 0, but
7181 btarget has already been initialized with 0 above. */
7182 cc = gen_rtx_REG (CCZmode, CC_REGNUM);
7183 cond = gen_rtx_EQ (VOIDmode, cc, const0_rtx);
7184 ite = gen_rtx_IF_THEN_ELSE (SImode, cond, const1_rtx, btarget);
7185 emit_insn (gen_rtx_SET (btarget, ite));
7186 }
7187 else
7188 {
7189 rtx cc, cond;
7190
7191 cc = gen_rtx_REG (CCZ1mode, CC_REGNUM);
7192 cond = gen_rtx_EQ (SImode, cc, const0_rtx);
7193 emit_insn (gen_cstorecc4 (btarget, cond, cc, const0_rtx));
7194 }
7195 }
7196
7197 /* Expand an atomic compare and swap operation. MEM is the memory location,
7198 CMP the old value to compare MEM with and NEW_RTX the value to set if
7199 CMP == MEM. */
7200
7201 void
s390_expand_cs(machine_mode mode,rtx btarget,rtx vtarget,rtx mem,rtx cmp,rtx new_rtx,bool is_weak)7202 s390_expand_cs (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
7203 rtx cmp, rtx new_rtx, bool is_weak)
7204 {
7205 switch (mode)
7206 {
7207 case E_TImode:
7208 case E_DImode:
7209 case E_SImode:
7210 s390_expand_cs_tdsi (mode, btarget, vtarget, mem, cmp, new_rtx, is_weak);
7211 break;
7212 case E_HImode:
7213 case E_QImode:
7214 s390_expand_cs_hqi (mode, btarget, vtarget, mem, cmp, new_rtx, is_weak);
7215 break;
7216 default:
7217 gcc_unreachable ();
7218 }
7219 }
7220
7221 /* Expand an atomic_exchange operation simulated with a compare-and-swap loop.
7222 The memory location MEM is set to INPUT. OUTPUT is set to the previous value
7223 of MEM. */
7224
7225 void
s390_expand_atomic_exchange_tdsi(rtx output,rtx mem,rtx input)7226 s390_expand_atomic_exchange_tdsi (rtx output, rtx mem, rtx input)
7227 {
7228 machine_mode mode = GET_MODE (mem);
7229 rtx_code_label *csloop;
7230
7231 if (TARGET_Z196
7232 && (mode == DImode || mode == SImode)
7233 && CONST_INT_P (input) && INTVAL (input) == 0)
7234 {
7235 emit_move_insn (output, const0_rtx);
7236 if (mode == DImode)
7237 emit_insn (gen_atomic_fetch_anddi (output, mem, const0_rtx, input));
7238 else
7239 emit_insn (gen_atomic_fetch_andsi (output, mem, const0_rtx, input));
7240 return;
7241 }
7242
7243 input = force_reg (mode, input);
7244 emit_move_insn (output, mem);
7245 csloop = gen_label_rtx ();
7246 emit_label (csloop);
7247 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, output, mem, output,
7248 input, CCZ1mode));
7249 }
7250
7251 /* Expand an atomic operation CODE of mode MODE. MEM is the memory location
7252 and VAL the value to play with. If AFTER is true then store the value
7253 MEM holds after the operation, if AFTER is false then store the value MEM
7254 holds before the operation. If TARGET is zero then discard that value, else
7255 store it to TARGET. */
7256
7257 void
s390_expand_atomic(machine_mode mode,enum rtx_code code,rtx target,rtx mem,rtx val,bool after)7258 s390_expand_atomic (machine_mode mode, enum rtx_code code,
7259 rtx target, rtx mem, rtx val, bool after)
7260 {
7261 struct alignment_context ac;
7262 rtx cmp;
7263 rtx new_rtx = gen_reg_rtx (SImode);
7264 rtx orig = gen_reg_rtx (SImode);
7265 rtx_code_label *csloop = gen_label_rtx ();
7266
7267 gcc_assert (!target || register_operand (target, VOIDmode));
7268 gcc_assert (MEM_P (mem));
7269
7270 init_alignment_context (&ac, mem, mode);
7271
7272 /* Shift val to the correct bit positions.
7273 Preserve "icm", but prevent "ex icm". */
7274 if (!(ac.aligned && code == SET && MEM_P (val)))
7275 val = s390_expand_mask_and_shift (val, mode, ac.shift);
7276
7277 /* Further preparation insns. */
7278 if (code == PLUS || code == MINUS)
7279 emit_move_insn (orig, val);
7280 else if (code == MULT || code == AND) /* val = "11..1<val>11..1" */
7281 val = expand_simple_binop (SImode, XOR, val, ac.modemaski,
7282 NULL_RTX, 1, OPTAB_DIRECT);
7283
7284 /* Load full word. Subsequent loads are performed by CS. */
7285 cmp = force_reg (SImode, ac.memsi);
7286
7287 /* Start CS loop. */
7288 emit_label (csloop);
7289 emit_move_insn (new_rtx, cmp);
7290
7291 /* Patch new with val at correct position. */
7292 switch (code)
7293 {
7294 case PLUS:
7295 case MINUS:
7296 val = expand_simple_binop (SImode, code, new_rtx, orig,
7297 NULL_RTX, 1, OPTAB_DIRECT);
7298 val = expand_simple_binop (SImode, AND, val, ac.modemask,
7299 NULL_RTX, 1, OPTAB_DIRECT);
7300 /* FALLTHRU */
7301 case SET:
7302 if (ac.aligned && MEM_P (val))
7303 store_bit_field (new_rtx, GET_MODE_BITSIZE (mode), 0,
7304 0, 0, SImode, val, false);
7305 else
7306 {
7307 new_rtx = expand_simple_binop (SImode, AND, new_rtx, ac.modemaski,
7308 NULL_RTX, 1, OPTAB_DIRECT);
7309 new_rtx = expand_simple_binop (SImode, IOR, new_rtx, val,
7310 NULL_RTX, 1, OPTAB_DIRECT);
7311 }
7312 break;
7313 case AND:
7314 case IOR:
7315 case XOR:
7316 new_rtx = expand_simple_binop (SImode, code, new_rtx, val,
7317 NULL_RTX, 1, OPTAB_DIRECT);
7318 break;
7319 case MULT: /* NAND */
7320 new_rtx = expand_simple_binop (SImode, AND, new_rtx, val,
7321 NULL_RTX, 1, OPTAB_DIRECT);
7322 new_rtx = expand_simple_binop (SImode, XOR, new_rtx, ac.modemask,
7323 NULL_RTX, 1, OPTAB_DIRECT);
7324 break;
7325 default:
7326 gcc_unreachable ();
7327 }
7328
7329 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, cmp,
7330 ac.memsi, cmp, new_rtx,
7331 CCZ1mode));
7332
7333 /* Return the correct part of the bitfield. */
7334 if (target)
7335 convert_move (target, expand_simple_binop (SImode, LSHIFTRT,
7336 after ? new_rtx : cmp, ac.shift,
7337 NULL_RTX, 1, OPTAB_DIRECT), 1);
7338 }
7339
7340 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7341 We need to emit DTP-relative relocations. */
7342
7343 static void s390_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
7344
7345 static void
s390_output_dwarf_dtprel(FILE * file,int size,rtx x)7346 s390_output_dwarf_dtprel (FILE *file, int size, rtx x)
7347 {
7348 switch (size)
7349 {
7350 case 4:
7351 fputs ("\t.long\t", file);
7352 break;
7353 case 8:
7354 fputs ("\t.quad\t", file);
7355 break;
7356 default:
7357 gcc_unreachable ();
7358 }
7359 output_addr_const (file, x);
7360 fputs ("@DTPOFF", file);
7361 }
7362
7363 /* Return the proper mode for REGNO being represented in the dwarf
7364 unwind table. */
7365 machine_mode
s390_dwarf_frame_reg_mode(int regno)7366 s390_dwarf_frame_reg_mode (int regno)
7367 {
7368 machine_mode save_mode = default_dwarf_frame_reg_mode (regno);
7369
7370 /* Make sure not to return DImode for any GPR with -m31 -mzarch. */
7371 if (GENERAL_REGNO_P (regno))
7372 save_mode = Pmode;
7373
7374 /* The rightmost 64 bits of vector registers are call-clobbered. */
7375 if (GET_MODE_SIZE (save_mode) > 8)
7376 save_mode = DImode;
7377
7378 return save_mode;
7379 }
7380
7381 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
7382 /* Implement TARGET_MANGLE_TYPE. */
7383
7384 static const char *
s390_mangle_type(const_tree type)7385 s390_mangle_type (const_tree type)
7386 {
7387 type = TYPE_MAIN_VARIANT (type);
7388
7389 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
7390 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
7391 return NULL;
7392
7393 if (type == s390_builtin_types[BT_BV16QI]) return "U6__boolc";
7394 if (type == s390_builtin_types[BT_BV8HI]) return "U6__bools";
7395 if (type == s390_builtin_types[BT_BV4SI]) return "U6__booli";
7396 if (type == s390_builtin_types[BT_BV2DI]) return "U6__booll";
7397
7398 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
7399 && TARGET_LONG_DOUBLE_128)
7400 return "g";
7401
7402 /* For all other types, use normal C++ mangling. */
7403 return NULL;
7404 }
7405 #endif
7406
7407 /* In the name of slightly smaller debug output, and to cater to
7408 general assembler lossage, recognize various UNSPEC sequences
7409 and turn them back into a direct symbol reference. */
7410
7411 static rtx
s390_delegitimize_address(rtx orig_x)7412 s390_delegitimize_address (rtx orig_x)
7413 {
7414 rtx x, y;
7415
7416 orig_x = delegitimize_mem_from_attrs (orig_x);
7417 x = orig_x;
7418
7419 /* Extract the symbol ref from:
7420 (plus:SI (reg:SI 12 %r12)
7421 (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
7422 UNSPEC_GOTOFF/PLTOFF)))
7423 and
7424 (plus:SI (reg:SI 12 %r12)
7425 (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
7426 UNSPEC_GOTOFF/PLTOFF)
7427 (const_int 4 [0x4])))) */
7428 if (GET_CODE (x) == PLUS
7429 && REG_P (XEXP (x, 0))
7430 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
7431 && GET_CODE (XEXP (x, 1)) == CONST)
7432 {
7433 HOST_WIDE_INT offset = 0;
7434
7435 /* The const operand. */
7436 y = XEXP (XEXP (x, 1), 0);
7437
7438 if (GET_CODE (y) == PLUS
7439 && GET_CODE (XEXP (y, 1)) == CONST_INT)
7440 {
7441 offset = INTVAL (XEXP (y, 1));
7442 y = XEXP (y, 0);
7443 }
7444
7445 if (GET_CODE (y) == UNSPEC
7446 && (XINT (y, 1) == UNSPEC_GOTOFF
7447 || XINT (y, 1) == UNSPEC_PLTOFF))
7448 return plus_constant (Pmode, XVECEXP (y, 0, 0), offset);
7449 }
7450
7451 if (GET_CODE (x) != MEM)
7452 return orig_x;
7453
7454 x = XEXP (x, 0);
7455 if (GET_CODE (x) == PLUS
7456 && GET_CODE (XEXP (x, 1)) == CONST
7457 && GET_CODE (XEXP (x, 0)) == REG
7458 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7459 {
7460 y = XEXP (XEXP (x, 1), 0);
7461 if (GET_CODE (y) == UNSPEC
7462 && XINT (y, 1) == UNSPEC_GOT)
7463 y = XVECEXP (y, 0, 0);
7464 else
7465 return orig_x;
7466 }
7467 else if (GET_CODE (x) == CONST)
7468 {
7469 /* Extract the symbol ref from:
7470 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
7471 UNSPEC_PLT/GOTENT))) */
7472
7473 y = XEXP (x, 0);
7474 if (GET_CODE (y) == UNSPEC
7475 && (XINT (y, 1) == UNSPEC_GOTENT
7476 || XINT (y, 1) == UNSPEC_PLT))
7477 y = XVECEXP (y, 0, 0);
7478 else
7479 return orig_x;
7480 }
7481 else
7482 return orig_x;
7483
7484 if (GET_MODE (orig_x) != Pmode)
7485 {
7486 if (GET_MODE (orig_x) == BLKmode)
7487 return orig_x;
7488 y = lowpart_subreg (GET_MODE (orig_x), y, Pmode);
7489 if (y == NULL_RTX)
7490 return orig_x;
7491 }
7492 return y;
7493 }
7494
7495 /* Output operand OP to stdio stream FILE.
7496 OP is an address (register + offset) which is not used to address data;
7497 instead the rightmost bits are interpreted as the value. */
7498
7499 static void
print_addrstyle_operand(FILE * file,rtx op)7500 print_addrstyle_operand (FILE *file, rtx op)
7501 {
7502 HOST_WIDE_INT offset;
7503 rtx base;
7504
7505 /* Extract base register and offset. */
7506 if (!s390_decompose_addrstyle_without_index (op, &base, &offset))
7507 gcc_unreachable ();
7508
7509 /* Sanity check. */
7510 if (base)
7511 {
7512 gcc_assert (GET_CODE (base) == REG);
7513 gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER);
7514 gcc_assert (REGNO_REG_CLASS (REGNO (base)) == ADDR_REGS);
7515 }
7516
7517 /* Offsets are constricted to twelve bits. */
7518 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset & ((1 << 12) - 1));
7519 if (base)
7520 fprintf (file, "(%s)", reg_names[REGNO (base)]);
7521 }
7522
7523 /* Print the shift count operand OP to FILE.
7524 OP is an address-style operand in a form which
7525 s390_valid_shift_count permits. Subregs and no-op
7526 and-masking of the operand are stripped. */
7527
7528 static void
print_shift_count_operand(FILE * file,rtx op)7529 print_shift_count_operand (FILE *file, rtx op)
7530 {
7531 /* No checking of the and mask required here. */
7532 if (!s390_valid_shift_count (op, 0))
7533 gcc_unreachable ();
7534
7535 while (op && GET_CODE (op) == SUBREG)
7536 op = SUBREG_REG (op);
7537
7538 if (GET_CODE (op) == AND)
7539 op = XEXP (op, 0);
7540
7541 print_addrstyle_operand (file, op);
7542 }
7543
7544 /* Assigns the number of NOP halfwords to be emitted before and after the
7545 function label to *HW_BEFORE and *HW_AFTER. Both pointers must not be NULL.
7546 If hotpatching is disabled for the function, the values are set to zero.
7547 */
7548
7549 static void
s390_function_num_hotpatch_hw(tree decl,int * hw_before,int * hw_after)7550 s390_function_num_hotpatch_hw (tree decl,
7551 int *hw_before,
7552 int *hw_after)
7553 {
7554 tree attr;
7555
7556 attr = lookup_attribute ("hotpatch", DECL_ATTRIBUTES (decl));
7557
7558 /* Handle the arguments of the hotpatch attribute. The values
7559 specified via attribute might override the cmdline argument
7560 values. */
7561 if (attr)
7562 {
7563 tree args = TREE_VALUE (attr);
7564
7565 *hw_before = TREE_INT_CST_LOW (TREE_VALUE (args));
7566 *hw_after = TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (args)));
7567 }
7568 else
7569 {
7570 /* Use the values specified by the cmdline arguments. */
7571 *hw_before = s390_hotpatch_hw_before_label;
7572 *hw_after = s390_hotpatch_hw_after_label;
7573 }
7574 }
7575
7576 /* Write the current .machine and .machinemode specification to the assembler
7577 file. */
7578
7579 #ifdef HAVE_AS_MACHINE_MACHINEMODE
7580 static void
s390_asm_output_machine_for_arch(FILE * asm_out_file)7581 s390_asm_output_machine_for_arch (FILE *asm_out_file)
7582 {
7583 fprintf (asm_out_file, "\t.machinemode %s\n",
7584 (TARGET_ZARCH) ? "zarch" : "esa");
7585 fprintf (asm_out_file, "\t.machine \"%s",
7586 processor_table[s390_arch].binutils_name);
7587 if (S390_USE_ARCHITECTURE_MODIFIERS)
7588 {
7589 int cpu_flags;
7590
7591 cpu_flags = processor_flags_table[(int) s390_arch];
7592 if (TARGET_HTM && !(cpu_flags & PF_TX))
7593 fprintf (asm_out_file, "+htm");
7594 else if (!TARGET_HTM && (cpu_flags & PF_TX))
7595 fprintf (asm_out_file, "+nohtm");
7596 if (TARGET_VX && !(cpu_flags & PF_VX))
7597 fprintf (asm_out_file, "+vx");
7598 else if (!TARGET_VX && (cpu_flags & PF_VX))
7599 fprintf (asm_out_file, "+novx");
7600 }
7601 fprintf (asm_out_file, "\"\n");
7602 }
7603
7604 /* Write an extra function header before the very start of the function. */
7605
7606 void
s390_asm_output_function_prefix(FILE * asm_out_file,const char * fnname ATTRIBUTE_UNUSED)7607 s390_asm_output_function_prefix (FILE *asm_out_file,
7608 const char *fnname ATTRIBUTE_UNUSED)
7609 {
7610 if (DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl) == NULL)
7611 return;
7612 /* Since only the function specific options are saved but not the indications
7613 which options are set, it's too much work here to figure out which options
7614 have actually changed. Thus, generate .machine and .machinemode whenever a
7615 function has the target attribute or pragma. */
7616 fprintf (asm_out_file, "\t.machinemode push\n");
7617 fprintf (asm_out_file, "\t.machine push\n");
7618 s390_asm_output_machine_for_arch (asm_out_file);
7619 }
7620
7621 /* Write an extra function footer after the very end of the function. */
7622
7623 void
s390_asm_declare_function_size(FILE * asm_out_file,const char * fnname,tree decl)7624 s390_asm_declare_function_size (FILE *asm_out_file,
7625 const char *fnname, tree decl)
7626 {
7627 if (!flag_inhibit_size_directive)
7628 ASM_OUTPUT_MEASURED_SIZE (asm_out_file, fnname);
7629 if (DECL_FUNCTION_SPECIFIC_TARGET (decl) == NULL)
7630 return;
7631 fprintf (asm_out_file, "\t.machine pop\n");
7632 fprintf (asm_out_file, "\t.machinemode pop\n");
7633 }
7634 #endif
7635
7636 /* Write the extra assembler code needed to declare a function properly. */
7637
7638 void
s390_asm_output_function_label(FILE * asm_out_file,const char * fname,tree decl)7639 s390_asm_output_function_label (FILE *asm_out_file, const char *fname,
7640 tree decl)
7641 {
7642 int hw_before, hw_after;
7643
7644 s390_function_num_hotpatch_hw (decl, &hw_before, &hw_after);
7645 if (hw_before > 0)
7646 {
7647 unsigned int function_alignment;
7648 int i;
7649
7650 /* Add a trampoline code area before the function label and initialize it
7651 with two-byte nop instructions. This area can be overwritten with code
7652 that jumps to a patched version of the function. */
7653 asm_fprintf (asm_out_file, "\tnopr\t%%r0"
7654 "\t# pre-label NOPs for hotpatch (%d halfwords)\n",
7655 hw_before);
7656 for (i = 1; i < hw_before; i++)
7657 fputs ("\tnopr\t%r0\n", asm_out_file);
7658
7659 /* Note: The function label must be aligned so that (a) the bytes of the
7660 following nop do not cross a cacheline boundary, and (b) a jump address
7661 (eight bytes for 64 bit targets, 4 bytes for 32 bit targets) can be
7662 stored directly before the label without crossing a cacheline
7663 boundary. All this is necessary to make sure the trampoline code can
7664 be changed atomically.
7665 This alignment is done automatically using the FOUNCTION_BOUNDARY, but
7666 if there are NOPs before the function label, the alignment is placed
7667 before them. So it is necessary to duplicate the alignment after the
7668 NOPs. */
7669 function_alignment = MAX (8, DECL_ALIGN (decl) / BITS_PER_UNIT);
7670 if (! DECL_USER_ALIGN (decl))
7671 function_alignment
7672 = MAX (function_alignment,
7673 (unsigned int) align_functions.levels[0].get_value ());
7674 fputs ("\t# alignment for hotpatch\n", asm_out_file);
7675 ASM_OUTPUT_ALIGN (asm_out_file, align_functions.levels[0].log);
7676 }
7677
7678 if (S390_USE_TARGET_ATTRIBUTE && TARGET_DEBUG_ARG)
7679 {
7680 asm_fprintf (asm_out_file, "\t# fn:%s ar%d\n", fname, s390_arch);
7681 asm_fprintf (asm_out_file, "\t# fn:%s tu%d\n", fname, s390_tune);
7682 asm_fprintf (asm_out_file, "\t# fn:%s sg%d\n", fname, s390_stack_guard);
7683 asm_fprintf (asm_out_file, "\t# fn:%s ss%d\n", fname, s390_stack_size);
7684 asm_fprintf (asm_out_file, "\t# fn:%s bc%d\n", fname, s390_branch_cost);
7685 asm_fprintf (asm_out_file, "\t# fn:%s wf%d\n", fname,
7686 s390_warn_framesize);
7687 asm_fprintf (asm_out_file, "\t# fn:%s ba%d\n", fname, TARGET_BACKCHAIN);
7688 asm_fprintf (asm_out_file, "\t# fn:%s hd%d\n", fname, TARGET_HARD_DFP);
7689 asm_fprintf (asm_out_file, "\t# fn:%s hf%d\n", fname, !TARGET_SOFT_FLOAT);
7690 asm_fprintf (asm_out_file, "\t# fn:%s ht%d\n", fname, TARGET_OPT_HTM);
7691 asm_fprintf (asm_out_file, "\t# fn:%s vx%d\n", fname, TARGET_OPT_VX);
7692 asm_fprintf (asm_out_file, "\t# fn:%s ps%d\n", fname,
7693 TARGET_PACKED_STACK);
7694 asm_fprintf (asm_out_file, "\t# fn:%s se%d\n", fname, TARGET_SMALL_EXEC);
7695 asm_fprintf (asm_out_file, "\t# fn:%s mv%d\n", fname, TARGET_MVCLE);
7696 asm_fprintf (asm_out_file, "\t# fn:%s zv%d\n", fname, TARGET_ZVECTOR);
7697 asm_fprintf (asm_out_file, "\t# fn:%s wd%d\n", fname,
7698 s390_warn_dynamicstack_p);
7699 }
7700 ASM_OUTPUT_LABEL (asm_out_file, fname);
7701 if (hw_after > 0)
7702 asm_fprintf (asm_out_file,
7703 "\t# post-label NOPs for hotpatch (%d halfwords)\n",
7704 hw_after);
7705 }
7706
7707 /* Output machine-dependent UNSPECs occurring in address constant X
7708 in assembler syntax to stdio stream FILE. Returns true if the
7709 constant X could be recognized, false otherwise. */
7710
7711 static bool
s390_output_addr_const_extra(FILE * file,rtx x)7712 s390_output_addr_const_extra (FILE *file, rtx x)
7713 {
7714 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1)
7715 switch (XINT (x, 1))
7716 {
7717 case UNSPEC_GOTENT:
7718 output_addr_const (file, XVECEXP (x, 0, 0));
7719 fprintf (file, "@GOTENT");
7720 return true;
7721 case UNSPEC_GOT:
7722 output_addr_const (file, XVECEXP (x, 0, 0));
7723 fprintf (file, "@GOT");
7724 return true;
7725 case UNSPEC_GOTOFF:
7726 output_addr_const (file, XVECEXP (x, 0, 0));
7727 fprintf (file, "@GOTOFF");
7728 return true;
7729 case UNSPEC_PLT:
7730 output_addr_const (file, XVECEXP (x, 0, 0));
7731 fprintf (file, "@PLT");
7732 return true;
7733 case UNSPEC_PLTOFF:
7734 output_addr_const (file, XVECEXP (x, 0, 0));
7735 fprintf (file, "@PLTOFF");
7736 return true;
7737 case UNSPEC_TLSGD:
7738 output_addr_const (file, XVECEXP (x, 0, 0));
7739 fprintf (file, "@TLSGD");
7740 return true;
7741 case UNSPEC_TLSLDM:
7742 assemble_name (file, get_some_local_dynamic_name ());
7743 fprintf (file, "@TLSLDM");
7744 return true;
7745 case UNSPEC_DTPOFF:
7746 output_addr_const (file, XVECEXP (x, 0, 0));
7747 fprintf (file, "@DTPOFF");
7748 return true;
7749 case UNSPEC_NTPOFF:
7750 output_addr_const (file, XVECEXP (x, 0, 0));
7751 fprintf (file, "@NTPOFF");
7752 return true;
7753 case UNSPEC_GOTNTPOFF:
7754 output_addr_const (file, XVECEXP (x, 0, 0));
7755 fprintf (file, "@GOTNTPOFF");
7756 return true;
7757 case UNSPEC_INDNTPOFF:
7758 output_addr_const (file, XVECEXP (x, 0, 0));
7759 fprintf (file, "@INDNTPOFF");
7760 return true;
7761 }
7762
7763 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 2)
7764 switch (XINT (x, 1))
7765 {
7766 case UNSPEC_POOL_OFFSET:
7767 x = gen_rtx_MINUS (GET_MODE (x), XVECEXP (x, 0, 0), XVECEXP (x, 0, 1));
7768 output_addr_const (file, x);
7769 return true;
7770 }
7771 return false;
7772 }
7773
7774 /* Output address operand ADDR in assembler syntax to
7775 stdio stream FILE. */
7776
7777 void
print_operand_address(FILE * file,rtx addr)7778 print_operand_address (FILE *file, rtx addr)
7779 {
7780 struct s390_address ad;
7781 memset (&ad, 0, sizeof (s390_address));
7782
7783 if (s390_loadrelative_operand_p (addr, NULL, NULL))
7784 {
7785 if (!TARGET_Z10)
7786 {
7787 output_operand_lossage ("symbolic memory references are "
7788 "only supported on z10 or later");
7789 return;
7790 }
7791 output_addr_const (file, addr);
7792 return;
7793 }
7794
7795 if (!s390_decompose_address (addr, &ad)
7796 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7797 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
7798 output_operand_lossage ("cannot decompose address");
7799
7800 if (ad.disp)
7801 output_addr_const (file, ad.disp);
7802 else
7803 fprintf (file, "0");
7804
7805 if (ad.base && ad.indx)
7806 fprintf (file, "(%s,%s)", reg_names[REGNO (ad.indx)],
7807 reg_names[REGNO (ad.base)]);
7808 else if (ad.base)
7809 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7810 }
7811
7812 /* Output operand X in assembler syntax to stdio stream FILE.
7813 CODE specified the format flag. The following format flags
7814 are recognized:
7815
7816 'A': On z14 or higher: If operand is a mem print the alignment
7817 hint usable with vl/vst prefixed by a comma.
7818 'C': print opcode suffix for branch condition.
7819 'D': print opcode suffix for inverse branch condition.
7820 'E': print opcode suffix for branch on index instruction.
7821 'G': print the size of the operand in bytes.
7822 'J': print tls_load/tls_gdcall/tls_ldcall suffix
7823 'M': print the second word of a TImode operand.
7824 'N': print the second word of a DImode operand.
7825 'O': print only the displacement of a memory reference or address.
7826 'R': print only the base register of a memory reference or address.
7827 'S': print S-type memory reference (base+displacement).
7828 'Y': print address style operand without index (e.g. shift count or setmem
7829 operand).
7830
7831 'b': print integer X as if it's an unsigned byte.
7832 'c': print integer X as if it's an signed byte.
7833 'e': "end" contiguous bitmask X in either DImode or vector inner mode.
7834 'f': "end" contiguous bitmask X in SImode.
7835 'h': print integer X as if it's a signed halfword.
7836 'i': print the first nonzero HImode part of X.
7837 'j': print the first HImode part unequal to -1 of X.
7838 'k': print the first nonzero SImode part of X.
7839 'm': print the first SImode part unequal to -1 of X.
7840 'o': print integer X as if it's an unsigned 32bit word.
7841 's': "start" of contiguous bitmask X in either DImode or vector inner mode.
7842 't': CONST_INT: "start" of contiguous bitmask X in SImode.
7843 CONST_VECTOR: Generate a bitmask for vgbm instruction.
7844 'x': print integer X as if it's an unsigned halfword.
7845 'v': print register number as vector register (v1 instead of f1).
7846 */
7847
7848 void
print_operand(FILE * file,rtx x,int code)7849 print_operand (FILE *file, rtx x, int code)
7850 {
7851 HOST_WIDE_INT ival;
7852
7853 switch (code)
7854 {
7855 case 'A':
7856 if (TARGET_VECTOR_LOADSTORE_ALIGNMENT_HINTS && MEM_P (x))
7857 {
7858 if (MEM_ALIGN (x) >= 128)
7859 fprintf (file, ",4");
7860 else if (MEM_ALIGN (x) == 64)
7861 fprintf (file, ",3");
7862 }
7863 return;
7864 case 'C':
7865 fprintf (file, s390_branch_condition_mnemonic (x, FALSE));
7866 return;
7867
7868 case 'D':
7869 fprintf (file, s390_branch_condition_mnemonic (x, TRUE));
7870 return;
7871
7872 case 'E':
7873 if (GET_CODE (x) == LE)
7874 fprintf (file, "l");
7875 else if (GET_CODE (x) == GT)
7876 fprintf (file, "h");
7877 else
7878 output_operand_lossage ("invalid comparison operator "
7879 "for 'E' output modifier");
7880 return;
7881
7882 case 'J':
7883 if (GET_CODE (x) == SYMBOL_REF)
7884 {
7885 fprintf (file, "%s", ":tls_load:");
7886 output_addr_const (file, x);
7887 }
7888 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
7889 {
7890 fprintf (file, "%s", ":tls_gdcall:");
7891 output_addr_const (file, XVECEXP (x, 0, 0));
7892 }
7893 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM)
7894 {
7895 fprintf (file, "%s", ":tls_ldcall:");
7896 const char *name = get_some_local_dynamic_name ();
7897 gcc_assert (name);
7898 assemble_name (file, name);
7899 }
7900 else
7901 output_operand_lossage ("invalid reference for 'J' output modifier");
7902 return;
7903
7904 case 'G':
7905 fprintf (file, "%u", GET_MODE_SIZE (GET_MODE (x)));
7906 return;
7907
7908 case 'O':
7909 {
7910 struct s390_address ad;
7911 int ret;
7912
7913 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7914
7915 if (!ret
7916 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7917 || ad.indx)
7918 {
7919 output_operand_lossage ("invalid address for 'O' output modifier");
7920 return;
7921 }
7922
7923 if (ad.disp)
7924 output_addr_const (file, ad.disp);
7925 else
7926 fprintf (file, "0");
7927 }
7928 return;
7929
7930 case 'R':
7931 {
7932 struct s390_address ad;
7933 int ret;
7934
7935 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7936
7937 if (!ret
7938 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7939 || ad.indx)
7940 {
7941 output_operand_lossage ("invalid address for 'R' output modifier");
7942 return;
7943 }
7944
7945 if (ad.base)
7946 fprintf (file, "%s", reg_names[REGNO (ad.base)]);
7947 else
7948 fprintf (file, "0");
7949 }
7950 return;
7951
7952 case 'S':
7953 {
7954 struct s390_address ad;
7955 int ret;
7956
7957 if (!MEM_P (x))
7958 {
7959 output_operand_lossage ("memory reference expected for "
7960 "'S' output modifier");
7961 return;
7962 }
7963 ret = s390_decompose_address (XEXP (x, 0), &ad);
7964
7965 if (!ret
7966 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7967 || ad.indx)
7968 {
7969 output_operand_lossage ("invalid address for 'S' output modifier");
7970 return;
7971 }
7972
7973 if (ad.disp)
7974 output_addr_const (file, ad.disp);
7975 else
7976 fprintf (file, "0");
7977
7978 if (ad.base)
7979 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7980 }
7981 return;
7982
7983 case 'N':
7984 if (GET_CODE (x) == REG)
7985 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7986 else if (GET_CODE (x) == MEM)
7987 x = change_address (x, VOIDmode,
7988 plus_constant (Pmode, XEXP (x, 0), 4));
7989 else
7990 output_operand_lossage ("register or memory expression expected "
7991 "for 'N' output modifier");
7992 break;
7993
7994 case 'M':
7995 if (GET_CODE (x) == REG)
7996 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7997 else if (GET_CODE (x) == MEM)
7998 x = change_address (x, VOIDmode,
7999 plus_constant (Pmode, XEXP (x, 0), 8));
8000 else
8001 output_operand_lossage ("register or memory expression expected "
8002 "for 'M' output modifier");
8003 break;
8004
8005 case 'Y':
8006 print_shift_count_operand (file, x);
8007 return;
8008 }
8009
8010 switch (GET_CODE (x))
8011 {
8012 case REG:
8013 /* Print FP regs as fx instead of vx when they are accessed
8014 through non-vector mode. */
8015 if (code == 'v'
8016 || VECTOR_NOFP_REG_P (x)
8017 || (FP_REG_P (x) && VECTOR_MODE_P (GET_MODE (x)))
8018 || (VECTOR_REG_P (x)
8019 && (GET_MODE_SIZE (GET_MODE (x)) /
8020 s390_class_max_nregs (FP_REGS, GET_MODE (x))) > 8))
8021 fprintf (file, "%%v%s", reg_names[REGNO (x)] + 2);
8022 else
8023 fprintf (file, "%s", reg_names[REGNO (x)]);
8024 break;
8025
8026 case MEM:
8027 output_address (GET_MODE (x), XEXP (x, 0));
8028 break;
8029
8030 case CONST:
8031 case CODE_LABEL:
8032 case LABEL_REF:
8033 case SYMBOL_REF:
8034 output_addr_const (file, x);
8035 break;
8036
8037 case CONST_INT:
8038 ival = INTVAL (x);
8039 switch (code)
8040 {
8041 case 0:
8042 break;
8043 case 'b':
8044 ival &= 0xff;
8045 break;
8046 case 'c':
8047 ival = ((ival & 0xff) ^ 0x80) - 0x80;
8048 break;
8049 case 'x':
8050 ival &= 0xffff;
8051 break;
8052 case 'h':
8053 ival = ((ival & 0xffff) ^ 0x8000) - 0x8000;
8054 break;
8055 case 'i':
8056 ival = s390_extract_part (x, HImode, 0);
8057 break;
8058 case 'j':
8059 ival = s390_extract_part (x, HImode, -1);
8060 break;
8061 case 'k':
8062 ival = s390_extract_part (x, SImode, 0);
8063 break;
8064 case 'm':
8065 ival = s390_extract_part (x, SImode, -1);
8066 break;
8067 case 'o':
8068 ival &= 0xffffffff;
8069 break;
8070 case 'e': case 'f':
8071 case 's': case 't':
8072 {
8073 int start, end;
8074 int len;
8075 bool ok;
8076
8077 len = (code == 's' || code == 'e' ? 64 : 32);
8078 ok = s390_contiguous_bitmask_p (ival, true, len, &start, &end);
8079 gcc_assert (ok);
8080 if (code == 's' || code == 't')
8081 ival = start;
8082 else
8083 ival = end;
8084 }
8085 break;
8086 default:
8087 output_operand_lossage ("invalid constant for output modifier '%c'", code);
8088 }
8089 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
8090 break;
8091
8092 case CONST_WIDE_INT:
8093 if (code == 'b')
8094 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
8095 CONST_WIDE_INT_ELT (x, 0) & 0xff);
8096 else if (code == 'x')
8097 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
8098 CONST_WIDE_INT_ELT (x, 0) & 0xffff);
8099 else if (code == 'h')
8100 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
8101 ((CONST_WIDE_INT_ELT (x, 0) & 0xffff) ^ 0x8000) - 0x8000);
8102 else
8103 {
8104 if (code == 0)
8105 output_operand_lossage ("invalid constant - try using "
8106 "an output modifier");
8107 else
8108 output_operand_lossage ("invalid constant for output modifier '%c'",
8109 code);
8110 }
8111 break;
8112 case CONST_VECTOR:
8113 switch (code)
8114 {
8115 case 'h':
8116 gcc_assert (const_vec_duplicate_p (x));
8117 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
8118 ((INTVAL (XVECEXP (x, 0, 0)) & 0xffff) ^ 0x8000) - 0x8000);
8119 break;
8120 case 'e':
8121 case 's':
8122 {
8123 int start, end;
8124 bool ok;
8125
8126 ok = s390_contiguous_bitmask_vector_p (x, &start, &end);
8127 gcc_assert (ok);
8128 ival = (code == 's') ? start : end;
8129 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
8130 }
8131 break;
8132 case 't':
8133 {
8134 unsigned mask;
8135 bool ok = s390_bytemask_vector_p (x, &mask);
8136 gcc_assert (ok);
8137 fprintf (file, "%u", mask);
8138 }
8139 break;
8140
8141 default:
8142 output_operand_lossage ("invalid constant vector for output "
8143 "modifier '%c'", code);
8144 }
8145 break;
8146
8147 default:
8148 if (code == 0)
8149 output_operand_lossage ("invalid expression - try using "
8150 "an output modifier");
8151 else
8152 output_operand_lossage ("invalid expression for output "
8153 "modifier '%c'", code);
8154 break;
8155 }
8156 }
8157
8158 /* Target hook for assembling integer objects. We need to define it
8159 here to work a round a bug in some versions of GAS, which couldn't
8160 handle values smaller than INT_MIN when printed in decimal. */
8161
8162 static bool
s390_assemble_integer(rtx x,unsigned int size,int aligned_p)8163 s390_assemble_integer (rtx x, unsigned int size, int aligned_p)
8164 {
8165 if (size == 8 && aligned_p
8166 && GET_CODE (x) == CONST_INT && INTVAL (x) < INT_MIN)
8167 {
8168 fprintf (asm_out_file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n",
8169 INTVAL (x));
8170 return true;
8171 }
8172 return default_assemble_integer (x, size, aligned_p);
8173 }
8174
8175 /* Returns true if register REGNO is used for forming
8176 a memory address in expression X. */
8177
8178 static bool
reg_used_in_mem_p(int regno,rtx x)8179 reg_used_in_mem_p (int regno, rtx x)
8180 {
8181 enum rtx_code code = GET_CODE (x);
8182 int i, j;
8183 const char *fmt;
8184
8185 if (code == MEM)
8186 {
8187 if (refers_to_regno_p (regno, XEXP (x, 0)))
8188 return true;
8189 }
8190 else if (code == SET
8191 && GET_CODE (SET_DEST (x)) == PC)
8192 {
8193 if (refers_to_regno_p (regno, SET_SRC (x)))
8194 return true;
8195 }
8196
8197 fmt = GET_RTX_FORMAT (code);
8198 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8199 {
8200 if (fmt[i] == 'e'
8201 && reg_used_in_mem_p (regno, XEXP (x, i)))
8202 return true;
8203
8204 else if (fmt[i] == 'E')
8205 for (j = 0; j < XVECLEN (x, i); j++)
8206 if (reg_used_in_mem_p (regno, XVECEXP (x, i, j)))
8207 return true;
8208 }
8209 return false;
8210 }
8211
8212 /* Returns true if expression DEP_RTX sets an address register
8213 used by instruction INSN to address memory. */
8214
8215 static bool
addr_generation_dependency_p(rtx dep_rtx,rtx_insn * insn)8216 addr_generation_dependency_p (rtx dep_rtx, rtx_insn *insn)
8217 {
8218 rtx target, pat;
8219
8220 if (NONJUMP_INSN_P (dep_rtx))
8221 dep_rtx = PATTERN (dep_rtx);
8222
8223 if (GET_CODE (dep_rtx) == SET)
8224 {
8225 target = SET_DEST (dep_rtx);
8226 if (GET_CODE (target) == STRICT_LOW_PART)
8227 target = XEXP (target, 0);
8228 while (GET_CODE (target) == SUBREG)
8229 target = SUBREG_REG (target);
8230
8231 if (GET_CODE (target) == REG)
8232 {
8233 int regno = REGNO (target);
8234
8235 if (s390_safe_attr_type (insn) == TYPE_LA)
8236 {
8237 pat = PATTERN (insn);
8238 if (GET_CODE (pat) == PARALLEL)
8239 {
8240 gcc_assert (XVECLEN (pat, 0) == 2);
8241 pat = XVECEXP (pat, 0, 0);
8242 }
8243 gcc_assert (GET_CODE (pat) == SET);
8244 return refers_to_regno_p (regno, SET_SRC (pat));
8245 }
8246 else if (get_attr_atype (insn) == ATYPE_AGEN)
8247 return reg_used_in_mem_p (regno, PATTERN (insn));
8248 }
8249 }
8250 return false;
8251 }
8252
8253 /* Return 1, if dep_insn sets register used in insn in the agen unit. */
8254
8255 int
s390_agen_dep_p(rtx_insn * dep_insn,rtx_insn * insn)8256 s390_agen_dep_p (rtx_insn *dep_insn, rtx_insn *insn)
8257 {
8258 rtx dep_rtx = PATTERN (dep_insn);
8259 int i;
8260
8261 if (GET_CODE (dep_rtx) == SET
8262 && addr_generation_dependency_p (dep_rtx, insn))
8263 return 1;
8264 else if (GET_CODE (dep_rtx) == PARALLEL)
8265 {
8266 for (i = 0; i < XVECLEN (dep_rtx, 0); i++)
8267 {
8268 if (addr_generation_dependency_p (XVECEXP (dep_rtx, 0, i), insn))
8269 return 1;
8270 }
8271 }
8272 return 0;
8273 }
8274
8275
8276 /* A C statement (sans semicolon) to update the integer scheduling priority
8277 INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier,
8278 reduce the priority to execute INSN later. Do not define this macro if
8279 you do not need to adjust the scheduling priorities of insns.
8280
8281 A STD instruction should be scheduled earlier,
8282 in order to use the bypass. */
8283 static int
s390_adjust_priority(rtx_insn * insn,int priority)8284 s390_adjust_priority (rtx_insn *insn, int priority)
8285 {
8286 if (! INSN_P (insn))
8287 return priority;
8288
8289 if (s390_tune <= PROCESSOR_2064_Z900)
8290 return priority;
8291
8292 switch (s390_safe_attr_type (insn))
8293 {
8294 case TYPE_FSTOREDF:
8295 case TYPE_FSTORESF:
8296 priority = priority << 3;
8297 break;
8298 case TYPE_STORE:
8299 case TYPE_STM:
8300 priority = priority << 1;
8301 break;
8302 default:
8303 break;
8304 }
8305 return priority;
8306 }
8307
8308
8309 /* The number of instructions that can be issued per cycle. */
8310
8311 static int
s390_issue_rate(void)8312 s390_issue_rate (void)
8313 {
8314 switch (s390_tune)
8315 {
8316 case PROCESSOR_2084_Z990:
8317 case PROCESSOR_2094_Z9_109:
8318 case PROCESSOR_2094_Z9_EC:
8319 case PROCESSOR_2817_Z196:
8320 return 3;
8321 case PROCESSOR_2097_Z10:
8322 return 2;
8323 case PROCESSOR_2064_Z900:
8324 /* Starting with EC12 we use the sched_reorder hook to take care
8325 of instruction dispatch constraints. The algorithm only
8326 picks the best instruction and assumes only a single
8327 instruction gets issued per cycle. */
8328 case PROCESSOR_2827_ZEC12:
8329 case PROCESSOR_2964_Z13:
8330 case PROCESSOR_3906_Z14:
8331 default:
8332 return 1;
8333 }
8334 }
8335
8336 static int
s390_first_cycle_multipass_dfa_lookahead(void)8337 s390_first_cycle_multipass_dfa_lookahead (void)
8338 {
8339 return 4;
8340 }
8341
8342 static void
annotate_constant_pool_refs_1(rtx * x)8343 annotate_constant_pool_refs_1 (rtx *x)
8344 {
8345 int i, j;
8346 const char *fmt;
8347
8348 gcc_assert (GET_CODE (*x) != SYMBOL_REF
8349 || !CONSTANT_POOL_ADDRESS_P (*x));
8350
8351 /* Literal pool references can only occur inside a MEM ... */
8352 if (GET_CODE (*x) == MEM)
8353 {
8354 rtx memref = XEXP (*x, 0);
8355
8356 if (GET_CODE (memref) == SYMBOL_REF
8357 && CONSTANT_POOL_ADDRESS_P (memref))
8358 {
8359 rtx base = cfun->machine->base_reg;
8360 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, memref, base),
8361 UNSPEC_LTREF);
8362
8363 *x = replace_equiv_address (*x, addr);
8364 return;
8365 }
8366
8367 if (GET_CODE (memref) == CONST
8368 && GET_CODE (XEXP (memref, 0)) == PLUS
8369 && GET_CODE (XEXP (XEXP (memref, 0), 1)) == CONST_INT
8370 && GET_CODE (XEXP (XEXP (memref, 0), 0)) == SYMBOL_REF
8371 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref, 0), 0)))
8372 {
8373 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (memref, 0), 1));
8374 rtx sym = XEXP (XEXP (memref, 0), 0);
8375 rtx base = cfun->machine->base_reg;
8376 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
8377 UNSPEC_LTREF);
8378
8379 *x = replace_equiv_address (*x, plus_constant (Pmode, addr, off));
8380 return;
8381 }
8382 }
8383
8384 /* ... or a load-address type pattern. */
8385 if (GET_CODE (*x) == SET)
8386 {
8387 rtx addrref = SET_SRC (*x);
8388
8389 if (GET_CODE (addrref) == SYMBOL_REF
8390 && CONSTANT_POOL_ADDRESS_P (addrref))
8391 {
8392 rtx base = cfun->machine->base_reg;
8393 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addrref, base),
8394 UNSPEC_LTREF);
8395
8396 SET_SRC (*x) = addr;
8397 return;
8398 }
8399
8400 if (GET_CODE (addrref) == CONST
8401 && GET_CODE (XEXP (addrref, 0)) == PLUS
8402 && GET_CODE (XEXP (XEXP (addrref, 0), 1)) == CONST_INT
8403 && GET_CODE (XEXP (XEXP (addrref, 0), 0)) == SYMBOL_REF
8404 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref, 0), 0)))
8405 {
8406 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (addrref, 0), 1));
8407 rtx sym = XEXP (XEXP (addrref, 0), 0);
8408 rtx base = cfun->machine->base_reg;
8409 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
8410 UNSPEC_LTREF);
8411
8412 SET_SRC (*x) = plus_constant (Pmode, addr, off);
8413 return;
8414 }
8415 }
8416
8417 fmt = GET_RTX_FORMAT (GET_CODE (*x));
8418 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8419 {
8420 if (fmt[i] == 'e')
8421 {
8422 annotate_constant_pool_refs_1 (&XEXP (*x, i));
8423 }
8424 else if (fmt[i] == 'E')
8425 {
8426 for (j = 0; j < XVECLEN (*x, i); j++)
8427 annotate_constant_pool_refs_1 (&XVECEXP (*x, i, j));
8428 }
8429 }
8430 }
8431
8432 /* Annotate every literal pool reference in INSN by an UNSPEC_LTREF expression.
8433 Fix up MEMs as required.
8434 Skip insns which support relative addressing, because they do not use a base
8435 register. */
8436
8437 static void
annotate_constant_pool_refs(rtx_insn * insn)8438 annotate_constant_pool_refs (rtx_insn *insn)
8439 {
8440 if (s390_safe_relative_long_p (insn))
8441 return;
8442 annotate_constant_pool_refs_1 (&PATTERN (insn));
8443 }
8444
8445 static void
find_constant_pool_ref_1(rtx x,rtx * ref)8446 find_constant_pool_ref_1 (rtx x, rtx *ref)
8447 {
8448 int i, j;
8449 const char *fmt;
8450
8451 /* Likewise POOL_ENTRY insns. */
8452 if (GET_CODE (x) == UNSPEC_VOLATILE
8453 && XINT (x, 1) == UNSPECV_POOL_ENTRY)
8454 return;
8455
8456 gcc_assert (GET_CODE (x) != SYMBOL_REF
8457 || !CONSTANT_POOL_ADDRESS_P (x));
8458
8459 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_LTREF)
8460 {
8461 rtx sym = XVECEXP (x, 0, 0);
8462 gcc_assert (GET_CODE (sym) == SYMBOL_REF
8463 && CONSTANT_POOL_ADDRESS_P (sym));
8464
8465 if (*ref == NULL_RTX)
8466 *ref = sym;
8467 else
8468 gcc_assert (*ref == sym);
8469
8470 return;
8471 }
8472
8473 fmt = GET_RTX_FORMAT (GET_CODE (x));
8474 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8475 {
8476 if (fmt[i] == 'e')
8477 {
8478 find_constant_pool_ref_1 (XEXP (x, i), ref);
8479 }
8480 else if (fmt[i] == 'E')
8481 {
8482 for (j = 0; j < XVECLEN (x, i); j++)
8483 find_constant_pool_ref_1 (XVECEXP (x, i, j), ref);
8484 }
8485 }
8486 }
8487
8488 /* Find an annotated literal pool symbol referenced in INSN,
8489 and store it at REF. Will abort if INSN contains references to
8490 more than one such pool symbol; multiple references to the same
8491 symbol are allowed, however.
8492
8493 The rtx pointed to by REF must be initialized to NULL_RTX
8494 by the caller before calling this routine.
8495
8496 Skip insns which support relative addressing, because they do not use a base
8497 register. */
8498
8499 static void
find_constant_pool_ref(rtx_insn * insn,rtx * ref)8500 find_constant_pool_ref (rtx_insn *insn, rtx *ref)
8501 {
8502 if (s390_safe_relative_long_p (insn))
8503 return;
8504 find_constant_pool_ref_1 (PATTERN (insn), ref);
8505 }
8506
8507 static void
replace_constant_pool_ref_1(rtx * x,rtx ref,rtx offset)8508 replace_constant_pool_ref_1 (rtx *x, rtx ref, rtx offset)
8509 {
8510 int i, j;
8511 const char *fmt;
8512
8513 gcc_assert (*x != ref);
8514
8515 if (GET_CODE (*x) == UNSPEC
8516 && XINT (*x, 1) == UNSPEC_LTREF
8517 && XVECEXP (*x, 0, 0) == ref)
8518 {
8519 *x = gen_rtx_PLUS (Pmode, XVECEXP (*x, 0, 1), offset);
8520 return;
8521 }
8522
8523 if (GET_CODE (*x) == PLUS
8524 && GET_CODE (XEXP (*x, 1)) == CONST_INT
8525 && GET_CODE (XEXP (*x, 0)) == UNSPEC
8526 && XINT (XEXP (*x, 0), 1) == UNSPEC_LTREF
8527 && XVECEXP (XEXP (*x, 0), 0, 0) == ref)
8528 {
8529 rtx addr = gen_rtx_PLUS (Pmode, XVECEXP (XEXP (*x, 0), 0, 1), offset);
8530 *x = plus_constant (Pmode, addr, INTVAL (XEXP (*x, 1)));
8531 return;
8532 }
8533
8534 fmt = GET_RTX_FORMAT (GET_CODE (*x));
8535 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8536 {
8537 if (fmt[i] == 'e')
8538 {
8539 replace_constant_pool_ref_1 (&XEXP (*x, i), ref, offset);
8540 }
8541 else if (fmt[i] == 'E')
8542 {
8543 for (j = 0; j < XVECLEN (*x, i); j++)
8544 replace_constant_pool_ref_1 (&XVECEXP (*x, i, j), ref, offset);
8545 }
8546 }
8547 }
8548
8549 /* Replace every reference to the annotated literal pool
8550 symbol REF in INSN by its base plus OFFSET.
8551 Skip insns which support relative addressing, because they do not use a base
8552 register. */
8553
8554 static void
replace_constant_pool_ref(rtx_insn * insn,rtx ref,rtx offset)8555 replace_constant_pool_ref (rtx_insn *insn, rtx ref, rtx offset)
8556 {
8557 if (s390_safe_relative_long_p (insn))
8558 return;
8559 replace_constant_pool_ref_1 (&PATTERN (insn), ref, offset);
8560 }
8561
8562 /* We keep a list of constants which we have to add to internal
8563 constant tables in the middle of large functions. */
8564
8565 #define NR_C_MODES 32
8566 machine_mode constant_modes[NR_C_MODES] =
8567 {
8568 TFmode, TImode, TDmode,
8569 V16QImode, V8HImode, V4SImode, V2DImode, V1TImode,
8570 V4SFmode, V2DFmode, V1TFmode,
8571 DFmode, DImode, DDmode,
8572 V8QImode, V4HImode, V2SImode, V1DImode, V2SFmode, V1DFmode,
8573 SFmode, SImode, SDmode,
8574 V4QImode, V2HImode, V1SImode, V1SFmode,
8575 HImode,
8576 V2QImode, V1HImode,
8577 QImode,
8578 V1QImode
8579 };
8580
8581 struct constant
8582 {
8583 struct constant *next;
8584 rtx value;
8585 rtx_code_label *label;
8586 };
8587
8588 struct constant_pool
8589 {
8590 struct constant_pool *next;
8591 rtx_insn *first_insn;
8592 rtx_insn *pool_insn;
8593 bitmap insns;
8594 rtx_insn *emit_pool_after;
8595
8596 struct constant *constants[NR_C_MODES];
8597 struct constant *execute;
8598 rtx_code_label *label;
8599 int size;
8600 };
8601
8602 /* Allocate new constant_pool structure. */
8603
8604 static struct constant_pool *
s390_alloc_pool(void)8605 s390_alloc_pool (void)
8606 {
8607 struct constant_pool *pool;
8608 int i;
8609
8610 pool = (struct constant_pool *) xmalloc (sizeof *pool);
8611 pool->next = NULL;
8612 for (i = 0; i < NR_C_MODES; i++)
8613 pool->constants[i] = NULL;
8614
8615 pool->execute = NULL;
8616 pool->label = gen_label_rtx ();
8617 pool->first_insn = NULL;
8618 pool->pool_insn = NULL;
8619 pool->insns = BITMAP_ALLOC (NULL);
8620 pool->size = 0;
8621 pool->emit_pool_after = NULL;
8622
8623 return pool;
8624 }
8625
8626 /* Create new constant pool covering instructions starting at INSN
8627 and chain it to the end of POOL_LIST. */
8628
8629 static struct constant_pool *
s390_start_pool(struct constant_pool ** pool_list,rtx_insn * insn)8630 s390_start_pool (struct constant_pool **pool_list, rtx_insn *insn)
8631 {
8632 struct constant_pool *pool, **prev;
8633
8634 pool = s390_alloc_pool ();
8635 pool->first_insn = insn;
8636
8637 for (prev = pool_list; *prev; prev = &(*prev)->next)
8638 ;
8639 *prev = pool;
8640
8641 return pool;
8642 }
8643
8644 /* End range of instructions covered by POOL at INSN and emit
8645 placeholder insn representing the pool. */
8646
8647 static void
s390_end_pool(struct constant_pool * pool,rtx_insn * insn)8648 s390_end_pool (struct constant_pool *pool, rtx_insn *insn)
8649 {
8650 rtx pool_size = GEN_INT (pool->size + 8 /* alignment slop */);
8651
8652 if (!insn)
8653 insn = get_last_insn ();
8654
8655 pool->pool_insn = emit_insn_after (gen_pool (pool_size), insn);
8656 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8657 }
8658
8659 /* Add INSN to the list of insns covered by POOL. */
8660
8661 static void
s390_add_pool_insn(struct constant_pool * pool,rtx insn)8662 s390_add_pool_insn (struct constant_pool *pool, rtx insn)
8663 {
8664 bitmap_set_bit (pool->insns, INSN_UID (insn));
8665 }
8666
8667 /* Return pool out of POOL_LIST that covers INSN. */
8668
8669 static struct constant_pool *
s390_find_pool(struct constant_pool * pool_list,rtx insn)8670 s390_find_pool (struct constant_pool *pool_list, rtx insn)
8671 {
8672 struct constant_pool *pool;
8673
8674 for (pool = pool_list; pool; pool = pool->next)
8675 if (bitmap_bit_p (pool->insns, INSN_UID (insn)))
8676 break;
8677
8678 return pool;
8679 }
8680
8681 /* Add constant VAL of mode MODE to the constant pool POOL. */
8682
8683 static void
s390_add_constant(struct constant_pool * pool,rtx val,machine_mode mode)8684 s390_add_constant (struct constant_pool *pool, rtx val, machine_mode mode)
8685 {
8686 struct constant *c;
8687 int i;
8688
8689 for (i = 0; i < NR_C_MODES; i++)
8690 if (constant_modes[i] == mode)
8691 break;
8692 gcc_assert (i != NR_C_MODES);
8693
8694 for (c = pool->constants[i]; c != NULL; c = c->next)
8695 if (rtx_equal_p (val, c->value))
8696 break;
8697
8698 if (c == NULL)
8699 {
8700 c = (struct constant *) xmalloc (sizeof *c);
8701 c->value = val;
8702 c->label = gen_label_rtx ();
8703 c->next = pool->constants[i];
8704 pool->constants[i] = c;
8705 pool->size += GET_MODE_SIZE (mode);
8706 }
8707 }
8708
8709 /* Return an rtx that represents the offset of X from the start of
8710 pool POOL. */
8711
8712 static rtx
s390_pool_offset(struct constant_pool * pool,rtx x)8713 s390_pool_offset (struct constant_pool *pool, rtx x)
8714 {
8715 rtx label;
8716
8717 label = gen_rtx_LABEL_REF (GET_MODE (x), pool->label);
8718 x = gen_rtx_UNSPEC (GET_MODE (x), gen_rtvec (2, x, label),
8719 UNSPEC_POOL_OFFSET);
8720 return gen_rtx_CONST (GET_MODE (x), x);
8721 }
8722
8723 /* Find constant VAL of mode MODE in the constant pool POOL.
8724 Return an RTX describing the distance from the start of
8725 the pool to the location of the new constant. */
8726
8727 static rtx
s390_find_constant(struct constant_pool * pool,rtx val,machine_mode mode)8728 s390_find_constant (struct constant_pool *pool, rtx val,
8729 machine_mode mode)
8730 {
8731 struct constant *c;
8732 int i;
8733
8734 for (i = 0; i < NR_C_MODES; i++)
8735 if (constant_modes[i] == mode)
8736 break;
8737 gcc_assert (i != NR_C_MODES);
8738
8739 for (c = pool->constants[i]; c != NULL; c = c->next)
8740 if (rtx_equal_p (val, c->value))
8741 break;
8742
8743 gcc_assert (c);
8744
8745 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8746 }
8747
8748 /* Check whether INSN is an execute. Return the label_ref to its
8749 execute target template if so, NULL_RTX otherwise. */
8750
8751 static rtx
s390_execute_label(rtx insn)8752 s390_execute_label (rtx insn)
8753 {
8754 if (INSN_P (insn)
8755 && GET_CODE (PATTERN (insn)) == PARALLEL
8756 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
8757 && (XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE
8758 || XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE_JUMP))
8759 {
8760 if (XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE)
8761 return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2);
8762 else
8763 {
8764 gcc_assert (JUMP_P (insn));
8765 /* For jump insns as execute target:
8766 - There is one operand less in the parallel (the
8767 modification register of the execute is always 0).
8768 - The execute target label is wrapped into an
8769 if_then_else in order to hide it from jump analysis. */
8770 return XEXP (XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 0), 0);
8771 }
8772 }
8773
8774 return NULL_RTX;
8775 }
8776
8777 /* Find execute target for INSN in the constant pool POOL.
8778 Return an RTX describing the distance from the start of
8779 the pool to the location of the execute target. */
8780
8781 static rtx
s390_find_execute(struct constant_pool * pool,rtx insn)8782 s390_find_execute (struct constant_pool *pool, rtx insn)
8783 {
8784 struct constant *c;
8785
8786 for (c = pool->execute; c != NULL; c = c->next)
8787 if (INSN_UID (insn) == INSN_UID (c->value))
8788 break;
8789
8790 gcc_assert (c);
8791
8792 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8793 }
8794
8795 /* For an execute INSN, extract the execute target template. */
8796
8797 static rtx
s390_execute_target(rtx insn)8798 s390_execute_target (rtx insn)
8799 {
8800 rtx pattern = PATTERN (insn);
8801 gcc_assert (s390_execute_label (insn));
8802
8803 if (XVECLEN (pattern, 0) == 2)
8804 {
8805 pattern = copy_rtx (XVECEXP (pattern, 0, 1));
8806 }
8807 else
8808 {
8809 rtvec vec = rtvec_alloc (XVECLEN (pattern, 0) - 1);
8810 int i;
8811
8812 for (i = 0; i < XVECLEN (pattern, 0) - 1; i++)
8813 RTVEC_ELT (vec, i) = copy_rtx (XVECEXP (pattern, 0, i + 1));
8814
8815 pattern = gen_rtx_PARALLEL (VOIDmode, vec);
8816 }
8817
8818 return pattern;
8819 }
8820
8821 /* Indicate that INSN cannot be duplicated. This is the case for
8822 execute insns that carry a unique label. */
8823
8824 static bool
s390_cannot_copy_insn_p(rtx_insn * insn)8825 s390_cannot_copy_insn_p (rtx_insn *insn)
8826 {
8827 rtx label = s390_execute_label (insn);
8828 return label && label != const0_rtx;
8829 }
8830
8831 /* Dump out the constants in POOL. If REMOTE_LABEL is true,
8832 do not emit the pool base label. */
8833
8834 static void
s390_dump_pool(struct constant_pool * pool,bool remote_label)8835 s390_dump_pool (struct constant_pool *pool, bool remote_label)
8836 {
8837 struct constant *c;
8838 rtx_insn *insn = pool->pool_insn;
8839 int i;
8840
8841 /* Switch to rodata section. */
8842 insn = emit_insn_after (gen_pool_section_start (), insn);
8843 INSN_ADDRESSES_NEW (insn, -1);
8844
8845 /* Ensure minimum pool alignment. */
8846 insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn);
8847 INSN_ADDRESSES_NEW (insn, -1);
8848
8849 /* Emit pool base label. */
8850 if (!remote_label)
8851 {
8852 insn = emit_label_after (pool->label, insn);
8853 INSN_ADDRESSES_NEW (insn, -1);
8854 }
8855
8856 /* Dump constants in descending alignment requirement order,
8857 ensuring proper alignment for every constant. */
8858 for (i = 0; i < NR_C_MODES; i++)
8859 for (c = pool->constants[i]; c; c = c->next)
8860 {
8861 /* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references. */
8862 rtx value = copy_rtx (c->value);
8863 if (GET_CODE (value) == CONST
8864 && GET_CODE (XEXP (value, 0)) == UNSPEC
8865 && XINT (XEXP (value, 0), 1) == UNSPEC_LTREL_OFFSET
8866 && XVECLEN (XEXP (value, 0), 0) == 1)
8867 value = s390_pool_offset (pool, XVECEXP (XEXP (value, 0), 0, 0));
8868
8869 insn = emit_label_after (c->label, insn);
8870 INSN_ADDRESSES_NEW (insn, -1);
8871
8872 value = gen_rtx_UNSPEC_VOLATILE (constant_modes[i],
8873 gen_rtvec (1, value),
8874 UNSPECV_POOL_ENTRY);
8875 insn = emit_insn_after (value, insn);
8876 INSN_ADDRESSES_NEW (insn, -1);
8877 }
8878
8879 /* Ensure minimum alignment for instructions. */
8880 insn = emit_insn_after (gen_pool_align (GEN_INT (2)), insn);
8881 INSN_ADDRESSES_NEW (insn, -1);
8882
8883 /* Output in-pool execute template insns. */
8884 for (c = pool->execute; c; c = c->next)
8885 {
8886 insn = emit_label_after (c->label, insn);
8887 INSN_ADDRESSES_NEW (insn, -1);
8888
8889 insn = emit_insn_after (s390_execute_target (c->value), insn);
8890 INSN_ADDRESSES_NEW (insn, -1);
8891 }
8892
8893 /* Switch back to previous section. */
8894 insn = emit_insn_after (gen_pool_section_end (), insn);
8895 INSN_ADDRESSES_NEW (insn, -1);
8896
8897 insn = emit_barrier_after (insn);
8898 INSN_ADDRESSES_NEW (insn, -1);
8899
8900 /* Remove placeholder insn. */
8901 remove_insn (pool->pool_insn);
8902 }
8903
8904 /* Free all memory used by POOL. */
8905
8906 static void
s390_free_pool(struct constant_pool * pool)8907 s390_free_pool (struct constant_pool *pool)
8908 {
8909 struct constant *c, *next;
8910 int i;
8911
8912 for (i = 0; i < NR_C_MODES; i++)
8913 for (c = pool->constants[i]; c; c = next)
8914 {
8915 next = c->next;
8916 free (c);
8917 }
8918
8919 for (c = pool->execute; c; c = next)
8920 {
8921 next = c->next;
8922 free (c);
8923 }
8924
8925 BITMAP_FREE (pool->insns);
8926 free (pool);
8927 }
8928
8929
8930 /* Collect main literal pool. Return NULL on overflow. */
8931
8932 static struct constant_pool *
s390_mainpool_start(void)8933 s390_mainpool_start (void)
8934 {
8935 struct constant_pool *pool;
8936 rtx_insn *insn;
8937
8938 pool = s390_alloc_pool ();
8939
8940 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8941 {
8942 if (NONJUMP_INSN_P (insn)
8943 && GET_CODE (PATTERN (insn)) == SET
8944 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC_VOLATILE
8945 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPECV_MAIN_POOL)
8946 {
8947 /* There might be two main_pool instructions if base_reg
8948 is call-clobbered; one for shrink-wrapped code and one
8949 for the rest. We want to keep the first. */
8950 if (pool->pool_insn)
8951 {
8952 insn = PREV_INSN (insn);
8953 delete_insn (NEXT_INSN (insn));
8954 continue;
8955 }
8956 pool->pool_insn = insn;
8957 }
8958
8959 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8960 {
8961 rtx pool_ref = NULL_RTX;
8962 find_constant_pool_ref (insn, &pool_ref);
8963 if (pool_ref)
8964 {
8965 rtx constant = get_pool_constant (pool_ref);
8966 machine_mode mode = get_pool_mode (pool_ref);
8967 s390_add_constant (pool, constant, mode);
8968 }
8969 }
8970
8971 /* If hot/cold partitioning is enabled we have to make sure that
8972 the literal pool is emitted in the same section where the
8973 initialization of the literal pool base pointer takes place.
8974 emit_pool_after is only used in the non-overflow case on non
8975 Z cpus where we can emit the literal pool at the end of the
8976 function body within the text section. */
8977 if (NOTE_P (insn)
8978 && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS
8979 && !pool->emit_pool_after)
8980 pool->emit_pool_after = PREV_INSN (insn);
8981 }
8982
8983 gcc_assert (pool->pool_insn || pool->size == 0);
8984
8985 if (pool->size >= 4096)
8986 {
8987 /* We're going to chunkify the pool, so remove the main
8988 pool placeholder insn. */
8989 remove_insn (pool->pool_insn);
8990
8991 s390_free_pool (pool);
8992 pool = NULL;
8993 }
8994
8995 /* If the functions ends with the section where the literal pool
8996 should be emitted set the marker to its end. */
8997 if (pool && !pool->emit_pool_after)
8998 pool->emit_pool_after = get_last_insn ();
8999
9000 return pool;
9001 }
9002
9003 /* POOL holds the main literal pool as collected by s390_mainpool_start.
9004 Modify the current function to output the pool constants as well as
9005 the pool register setup instruction. */
9006
9007 static void
s390_mainpool_finish(struct constant_pool * pool)9008 s390_mainpool_finish (struct constant_pool *pool)
9009 {
9010 rtx base_reg = cfun->machine->base_reg;
9011 rtx set;
9012 rtx_insn *insn;
9013
9014 /* If the pool is empty, we're done. */
9015 if (pool->size == 0)
9016 {
9017 /* We don't actually need a base register after all. */
9018 cfun->machine->base_reg = NULL_RTX;
9019
9020 if (pool->pool_insn)
9021 remove_insn (pool->pool_insn);
9022 s390_free_pool (pool);
9023 return;
9024 }
9025
9026 /* We need correct insn addresses. */
9027 shorten_branches (get_insns ());
9028
9029 /* Use a LARL to load the pool register. The pool is
9030 located in the .rodata section, so we emit it after the function. */
9031 set = gen_main_base_64 (base_reg, pool->label);
9032 insn = emit_insn_after (set, pool->pool_insn);
9033 INSN_ADDRESSES_NEW (insn, -1);
9034 remove_insn (pool->pool_insn);
9035
9036 insn = get_last_insn ();
9037 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
9038 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
9039
9040 s390_dump_pool (pool, 0);
9041
9042 /* Replace all literal pool references. */
9043
9044 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
9045 {
9046 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9047 {
9048 rtx addr, pool_ref = NULL_RTX;
9049 find_constant_pool_ref (insn, &pool_ref);
9050 if (pool_ref)
9051 {
9052 if (s390_execute_label (insn))
9053 addr = s390_find_execute (pool, insn);
9054 else
9055 addr = s390_find_constant (pool, get_pool_constant (pool_ref),
9056 get_pool_mode (pool_ref));
9057
9058 replace_constant_pool_ref (insn, pool_ref, addr);
9059 INSN_CODE (insn) = -1;
9060 }
9061 }
9062 }
9063
9064
9065 /* Free the pool. */
9066 s390_free_pool (pool);
9067 }
9068
9069 /* Chunkify the literal pool. */
9070
9071 #define S390_POOL_CHUNK_MIN 0xc00
9072 #define S390_POOL_CHUNK_MAX 0xe00
9073
9074 static struct constant_pool *
s390_chunkify_start(void)9075 s390_chunkify_start (void)
9076 {
9077 struct constant_pool *curr_pool = NULL, *pool_list = NULL;
9078 bitmap far_labels;
9079 rtx_insn *insn;
9080
9081 /* We need correct insn addresses. */
9082
9083 shorten_branches (get_insns ());
9084
9085 /* Scan all insns and move literals to pool chunks. */
9086
9087 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9088 {
9089 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9090 {
9091 rtx pool_ref = NULL_RTX;
9092 find_constant_pool_ref (insn, &pool_ref);
9093 if (pool_ref)
9094 {
9095 rtx constant = get_pool_constant (pool_ref);
9096 machine_mode mode = get_pool_mode (pool_ref);
9097
9098 if (!curr_pool)
9099 curr_pool = s390_start_pool (&pool_list, insn);
9100
9101 s390_add_constant (curr_pool, constant, mode);
9102 s390_add_pool_insn (curr_pool, insn);
9103 }
9104 }
9105
9106 if (JUMP_P (insn) || JUMP_TABLE_DATA_P (insn) || LABEL_P (insn))
9107 {
9108 if (curr_pool)
9109 s390_add_pool_insn (curr_pool, insn);
9110 }
9111
9112 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_VAR_LOCATION)
9113 continue;
9114
9115 if (!curr_pool
9116 || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn)
9117 || INSN_ADDRESSES (INSN_UID (insn)) == -1)
9118 continue;
9119
9120 if (curr_pool->size < S390_POOL_CHUNK_MAX)
9121 continue;
9122
9123 s390_end_pool (curr_pool, NULL);
9124 curr_pool = NULL;
9125 }
9126
9127 if (curr_pool)
9128 s390_end_pool (curr_pool, NULL);
9129
9130 /* Find all labels that are branched into
9131 from an insn belonging to a different chunk. */
9132
9133 far_labels = BITMAP_ALLOC (NULL);
9134
9135 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9136 {
9137 rtx_jump_table_data *table;
9138
9139 /* Labels marked with LABEL_PRESERVE_P can be target
9140 of non-local jumps, so we have to mark them.
9141 The same holds for named labels.
9142
9143 Don't do that, however, if it is the label before
9144 a jump table. */
9145
9146 if (LABEL_P (insn)
9147 && (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn)))
9148 {
9149 rtx_insn *vec_insn = NEXT_INSN (insn);
9150 if (! vec_insn || ! JUMP_TABLE_DATA_P (vec_insn))
9151 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn));
9152 }
9153 /* Check potential targets in a table jump (casesi_jump). */
9154 else if (tablejump_p (insn, NULL, &table))
9155 {
9156 rtx vec_pat = PATTERN (table);
9157 int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
9158
9159 for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
9160 {
9161 rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
9162
9163 if (s390_find_pool (pool_list, label)
9164 != s390_find_pool (pool_list, insn))
9165 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
9166 }
9167 }
9168 /* If we have a direct jump (conditional or unconditional),
9169 check all potential targets. */
9170 else if (JUMP_P (insn))
9171 {
9172 rtx pat = PATTERN (insn);
9173
9174 if (GET_CODE (pat) == PARALLEL)
9175 pat = XVECEXP (pat, 0, 0);
9176
9177 if (GET_CODE (pat) == SET)
9178 {
9179 rtx label = JUMP_LABEL (insn);
9180 if (label && !ANY_RETURN_P (label))
9181 {
9182 if (s390_find_pool (pool_list, label)
9183 != s390_find_pool (pool_list, insn))
9184 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
9185 }
9186 }
9187 }
9188 }
9189
9190 /* Insert base register reload insns before every pool. */
9191
9192 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9193 {
9194 rtx new_insn = gen_reload_base_64 (cfun->machine->base_reg,
9195 curr_pool->label);
9196 rtx_insn *insn = curr_pool->first_insn;
9197 INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1);
9198 }
9199
9200 /* Insert base register reload insns at every far label. */
9201
9202 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9203 if (LABEL_P (insn)
9204 && bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn)))
9205 {
9206 struct constant_pool *pool = s390_find_pool (pool_list, insn);
9207 if (pool)
9208 {
9209 rtx new_insn = gen_reload_base_64 (cfun->machine->base_reg,
9210 pool->label);
9211 INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1);
9212 }
9213 }
9214
9215
9216 BITMAP_FREE (far_labels);
9217
9218
9219 /* Recompute insn addresses. */
9220
9221 init_insn_lengths ();
9222 shorten_branches (get_insns ());
9223
9224 return pool_list;
9225 }
9226
9227 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
9228 After we have decided to use this list, finish implementing
9229 all changes to the current function as required. */
9230
9231 static void
s390_chunkify_finish(struct constant_pool * pool_list)9232 s390_chunkify_finish (struct constant_pool *pool_list)
9233 {
9234 struct constant_pool *curr_pool = NULL;
9235 rtx_insn *insn;
9236
9237
9238 /* Replace all literal pool references. */
9239
9240 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9241 {
9242 curr_pool = s390_find_pool (pool_list, insn);
9243 if (!curr_pool)
9244 continue;
9245
9246 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9247 {
9248 rtx addr, pool_ref = NULL_RTX;
9249 find_constant_pool_ref (insn, &pool_ref);
9250 if (pool_ref)
9251 {
9252 if (s390_execute_label (insn))
9253 addr = s390_find_execute (curr_pool, insn);
9254 else
9255 addr = s390_find_constant (curr_pool,
9256 get_pool_constant (pool_ref),
9257 get_pool_mode (pool_ref));
9258
9259 replace_constant_pool_ref (insn, pool_ref, addr);
9260 INSN_CODE (insn) = -1;
9261 }
9262 }
9263 }
9264
9265 /* Dump out all literal pools. */
9266
9267 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9268 s390_dump_pool (curr_pool, 0);
9269
9270 /* Free pool list. */
9271
9272 while (pool_list)
9273 {
9274 struct constant_pool *next = pool_list->next;
9275 s390_free_pool (pool_list);
9276 pool_list = next;
9277 }
9278 }
9279
9280 /* Output the constant pool entry EXP in mode MODE with alignment ALIGN. */
9281
9282 void
s390_output_pool_entry(rtx exp,machine_mode mode,unsigned int align)9283 s390_output_pool_entry (rtx exp, machine_mode mode, unsigned int align)
9284 {
9285 switch (GET_MODE_CLASS (mode))
9286 {
9287 case MODE_FLOAT:
9288 case MODE_DECIMAL_FLOAT:
9289 gcc_assert (GET_CODE (exp) == CONST_DOUBLE);
9290
9291 assemble_real (*CONST_DOUBLE_REAL_VALUE (exp),
9292 as_a <scalar_float_mode> (mode), align);
9293 break;
9294
9295 case MODE_INT:
9296 assemble_integer (exp, GET_MODE_SIZE (mode), align, 1);
9297 mark_symbol_refs_as_used (exp);
9298 break;
9299
9300 case MODE_VECTOR_INT:
9301 case MODE_VECTOR_FLOAT:
9302 {
9303 int i;
9304 machine_mode inner_mode;
9305 gcc_assert (GET_CODE (exp) == CONST_VECTOR);
9306
9307 inner_mode = GET_MODE_INNER (GET_MODE (exp));
9308 for (i = 0; i < XVECLEN (exp, 0); i++)
9309 s390_output_pool_entry (XVECEXP (exp, 0, i),
9310 inner_mode,
9311 i == 0
9312 ? align
9313 : GET_MODE_BITSIZE (inner_mode));
9314 }
9315 break;
9316
9317 default:
9318 gcc_unreachable ();
9319 }
9320 }
9321
9322
9323 /* Return an RTL expression representing the value of the return address
9324 for the frame COUNT steps up from the current frame. FRAME is the
9325 frame pointer of that frame. */
9326
9327 rtx
s390_return_addr_rtx(int count,rtx frame ATTRIBUTE_UNUSED)9328 s390_return_addr_rtx (int count, rtx frame ATTRIBUTE_UNUSED)
9329 {
9330 int offset;
9331 rtx addr;
9332
9333 /* Without backchain, we fail for all but the current frame. */
9334
9335 if (!TARGET_BACKCHAIN && count > 0)
9336 return NULL_RTX;
9337
9338 /* For the current frame, we need to make sure the initial
9339 value of RETURN_REGNUM is actually saved. */
9340
9341 if (count == 0)
9342 return get_hard_reg_initial_val (Pmode, RETURN_REGNUM);
9343
9344 if (TARGET_PACKED_STACK)
9345 offset = -2 * UNITS_PER_LONG;
9346 else
9347 offset = RETURN_REGNUM * UNITS_PER_LONG;
9348
9349 addr = plus_constant (Pmode, frame, offset);
9350 addr = memory_address (Pmode, addr);
9351 return gen_rtx_MEM (Pmode, addr);
9352 }
9353
9354 /* Return an RTL expression representing the back chain stored in
9355 the current stack frame. */
9356
9357 rtx
s390_back_chain_rtx(void)9358 s390_back_chain_rtx (void)
9359 {
9360 rtx chain;
9361
9362 gcc_assert (TARGET_BACKCHAIN);
9363
9364 if (TARGET_PACKED_STACK)
9365 chain = plus_constant (Pmode, stack_pointer_rtx,
9366 STACK_POINTER_OFFSET - UNITS_PER_LONG);
9367 else
9368 chain = stack_pointer_rtx;
9369
9370 chain = gen_rtx_MEM (Pmode, chain);
9371 return chain;
9372 }
9373
9374 /* Find first call clobbered register unused in a function.
9375 This could be used as base register in a leaf function
9376 or for holding the return address before epilogue. */
9377
9378 static int
find_unused_clobbered_reg(void)9379 find_unused_clobbered_reg (void)
9380 {
9381 int i;
9382 for (i = 0; i < 6; i++)
9383 if (!df_regs_ever_live_p (i))
9384 return i;
9385 return 0;
9386 }
9387
9388
9389 /* Helper function for s390_regs_ever_clobbered. Sets the fields in DATA for all
9390 clobbered hard regs in SETREG. */
9391
9392 static void
s390_reg_clobbered_rtx(rtx setreg,const_rtx set_insn ATTRIBUTE_UNUSED,void * data)9393 s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data)
9394 {
9395 char *regs_ever_clobbered = (char *)data;
9396 unsigned int i, regno;
9397 machine_mode mode = GET_MODE (setreg);
9398
9399 if (GET_CODE (setreg) == SUBREG)
9400 {
9401 rtx inner = SUBREG_REG (setreg);
9402 if (!GENERAL_REG_P (inner) && !FP_REG_P (inner))
9403 return;
9404 regno = subreg_regno (setreg);
9405 }
9406 else if (GENERAL_REG_P (setreg) || FP_REG_P (setreg))
9407 regno = REGNO (setreg);
9408 else
9409 return;
9410
9411 for (i = regno;
9412 i < end_hard_regno (mode, regno);
9413 i++)
9414 regs_ever_clobbered[i] = 1;
9415 }
9416
9417 /* Walks through all basic blocks of the current function looking
9418 for clobbered hard regs using s390_reg_clobbered_rtx. The fields
9419 of the passed integer array REGS_EVER_CLOBBERED are set to one for
9420 each of those regs. */
9421
9422 static void
s390_regs_ever_clobbered(char regs_ever_clobbered[])9423 s390_regs_ever_clobbered (char regs_ever_clobbered[])
9424 {
9425 basic_block cur_bb;
9426 rtx_insn *cur_insn;
9427 unsigned int i;
9428
9429 memset (regs_ever_clobbered, 0, 32);
9430
9431 /* For non-leaf functions we have to consider all call clobbered regs to be
9432 clobbered. */
9433 if (!crtl->is_leaf)
9434 {
9435 for (i = 0; i < 32; i++)
9436 regs_ever_clobbered[i] = call_used_regs[i];
9437 }
9438
9439 /* Make the "magic" eh_return registers live if necessary. For regs_ever_live
9440 this work is done by liveness analysis (mark_regs_live_at_end).
9441 Special care is needed for functions containing landing pads. Landing pads
9442 may use the eh registers, but the code which sets these registers is not
9443 contained in that function. Hence s390_regs_ever_clobbered is not able to
9444 deal with this automatically. */
9445 if (crtl->calls_eh_return || cfun->machine->has_landing_pad_p)
9446 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM ; i++)
9447 if (crtl->calls_eh_return
9448 || (cfun->machine->has_landing_pad_p
9449 && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i))))
9450 regs_ever_clobbered[EH_RETURN_DATA_REGNO (i)] = 1;
9451
9452 /* For nonlocal gotos all call-saved registers have to be saved.
9453 This flag is also set for the unwinding code in libgcc.
9454 See expand_builtin_unwind_init. For regs_ever_live this is done by
9455 reload. */
9456 if (crtl->saves_all_registers)
9457 for (i = 0; i < 32; i++)
9458 if (!call_used_regs[i])
9459 regs_ever_clobbered[i] = 1;
9460
9461 FOR_EACH_BB_FN (cur_bb, cfun)
9462 {
9463 FOR_BB_INSNS (cur_bb, cur_insn)
9464 {
9465 rtx pat;
9466
9467 if (!INSN_P (cur_insn))
9468 continue;
9469
9470 pat = PATTERN (cur_insn);
9471
9472 /* Ignore GPR restore insns. */
9473 if (epilogue_completed && RTX_FRAME_RELATED_P (cur_insn))
9474 {
9475 if (GET_CODE (pat) == SET
9476 && GENERAL_REG_P (SET_DEST (pat)))
9477 {
9478 /* lgdr */
9479 if (GET_MODE (SET_SRC (pat)) == DImode
9480 && FP_REG_P (SET_SRC (pat)))
9481 continue;
9482
9483 /* l / lg */
9484 if (GET_CODE (SET_SRC (pat)) == MEM)
9485 continue;
9486 }
9487
9488 /* lm / lmg */
9489 if (GET_CODE (pat) == PARALLEL
9490 && load_multiple_operation (pat, VOIDmode))
9491 continue;
9492 }
9493
9494 note_stores (cur_insn,
9495 s390_reg_clobbered_rtx,
9496 regs_ever_clobbered);
9497 }
9498 }
9499 }
9500
9501 /* Determine the frame area which actually has to be accessed
9502 in the function epilogue. The values are stored at the
9503 given pointers AREA_BOTTOM (address of the lowest used stack
9504 address) and AREA_TOP (address of the first item which does
9505 not belong to the stack frame). */
9506
9507 static void
s390_frame_area(int * area_bottom,int * area_top)9508 s390_frame_area (int *area_bottom, int *area_top)
9509 {
9510 int b, t;
9511
9512 b = INT_MAX;
9513 t = INT_MIN;
9514
9515 if (cfun_frame_layout.first_restore_gpr != -1)
9516 {
9517 b = (cfun_frame_layout.gprs_offset
9518 + cfun_frame_layout.first_restore_gpr * UNITS_PER_LONG);
9519 t = b + (cfun_frame_layout.last_restore_gpr
9520 - cfun_frame_layout.first_restore_gpr + 1) * UNITS_PER_LONG;
9521 }
9522
9523 if (TARGET_64BIT && cfun_save_high_fprs_p)
9524 {
9525 b = MIN (b, cfun_frame_layout.f8_offset);
9526 t = MAX (t, (cfun_frame_layout.f8_offset
9527 + cfun_frame_layout.high_fprs * 8));
9528 }
9529
9530 if (!TARGET_64BIT)
9531 {
9532 if (cfun_fpr_save_p (FPR4_REGNUM))
9533 {
9534 b = MIN (b, cfun_frame_layout.f4_offset);
9535 t = MAX (t, cfun_frame_layout.f4_offset + 8);
9536 }
9537 if (cfun_fpr_save_p (FPR6_REGNUM))
9538 {
9539 b = MIN (b, cfun_frame_layout.f4_offset + 8);
9540 t = MAX (t, cfun_frame_layout.f4_offset + 16);
9541 }
9542 }
9543 *area_bottom = b;
9544 *area_top = t;
9545 }
9546 /* Update gpr_save_slots in the frame layout trying to make use of
9547 FPRs as GPR save slots.
9548 This is a helper routine of s390_register_info. */
9549
9550 static void
s390_register_info_gprtofpr()9551 s390_register_info_gprtofpr ()
9552 {
9553 int save_reg_slot = FPR0_REGNUM;
9554 int i, j;
9555
9556 if (TARGET_TPF || !TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
9557 return;
9558
9559 /* builtin_eh_return needs to be able to modify the return address
9560 on the stack. It could also adjust the FPR save slot instead but
9561 is it worth the trouble?! */
9562 if (crtl->calls_eh_return)
9563 return;
9564
9565 for (i = 15; i >= 6; i--)
9566 {
9567 if (cfun_gpr_save_slot (i) == SAVE_SLOT_NONE)
9568 continue;
9569
9570 /* Advance to the next FP register which can be used as a
9571 GPR save slot. */
9572 while ((!call_used_regs[save_reg_slot]
9573 || df_regs_ever_live_p (save_reg_slot)
9574 || cfun_fpr_save_p (save_reg_slot))
9575 && FP_REGNO_P (save_reg_slot))
9576 save_reg_slot++;
9577 if (!FP_REGNO_P (save_reg_slot))
9578 {
9579 /* We only want to use ldgr/lgdr if we can get rid of
9580 stm/lm entirely. So undo the gpr slot allocation in
9581 case we ran out of FPR save slots. */
9582 for (j = 6; j <= 15; j++)
9583 if (FP_REGNO_P (cfun_gpr_save_slot (j)))
9584 cfun_gpr_save_slot (j) = SAVE_SLOT_STACK;
9585 break;
9586 }
9587 cfun_gpr_save_slot (i) = save_reg_slot++;
9588 }
9589 }
9590
9591 /* Set the bits in fpr_bitmap for FPRs which need to be saved due to
9592 stdarg.
9593 This is a helper routine for s390_register_info. */
9594
9595 static void
s390_register_info_stdarg_fpr()9596 s390_register_info_stdarg_fpr ()
9597 {
9598 int i;
9599 int min_fpr;
9600 int max_fpr;
9601
9602 /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and
9603 f0-f4 for 64 bit. */
9604 if (!cfun->stdarg
9605 || !TARGET_HARD_FLOAT
9606 || !cfun->va_list_fpr_size
9607 || crtl->args.info.fprs >= FP_ARG_NUM_REG)
9608 return;
9609
9610 min_fpr = crtl->args.info.fprs;
9611 max_fpr = min_fpr + cfun->va_list_fpr_size - 1;
9612 if (max_fpr >= FP_ARG_NUM_REG)
9613 max_fpr = FP_ARG_NUM_REG - 1;
9614
9615 /* FPR argument regs start at f0. */
9616 min_fpr += FPR0_REGNUM;
9617 max_fpr += FPR0_REGNUM;
9618
9619 for (i = min_fpr; i <= max_fpr; i++)
9620 cfun_set_fpr_save (i);
9621 }
9622
9623 /* Reserve the GPR save slots for GPRs which need to be saved due to
9624 stdarg.
9625 This is a helper routine for s390_register_info. */
9626
9627 static void
s390_register_info_stdarg_gpr()9628 s390_register_info_stdarg_gpr ()
9629 {
9630 int i;
9631 int min_gpr;
9632 int max_gpr;
9633
9634 if (!cfun->stdarg
9635 || !cfun->va_list_gpr_size
9636 || crtl->args.info.gprs >= GP_ARG_NUM_REG)
9637 return;
9638
9639 min_gpr = crtl->args.info.gprs;
9640 max_gpr = min_gpr + cfun->va_list_gpr_size - 1;
9641 if (max_gpr >= GP_ARG_NUM_REG)
9642 max_gpr = GP_ARG_NUM_REG - 1;
9643
9644 /* GPR argument regs start at r2. */
9645 min_gpr += GPR2_REGNUM;
9646 max_gpr += GPR2_REGNUM;
9647
9648 /* If r6 was supposed to be saved into an FPR and now needs to go to
9649 the stack for vararg we have to adjust the restore range to make
9650 sure that the restore is done from stack as well. */
9651 if (FP_REGNO_P (cfun_gpr_save_slot (GPR6_REGNUM))
9652 && min_gpr <= GPR6_REGNUM
9653 && max_gpr >= GPR6_REGNUM)
9654 {
9655 if (cfun_frame_layout.first_restore_gpr == -1
9656 || cfun_frame_layout.first_restore_gpr > GPR6_REGNUM)
9657 cfun_frame_layout.first_restore_gpr = GPR6_REGNUM;
9658 if (cfun_frame_layout.last_restore_gpr == -1
9659 || cfun_frame_layout.last_restore_gpr < GPR6_REGNUM)
9660 cfun_frame_layout.last_restore_gpr = GPR6_REGNUM;
9661 }
9662
9663 if (cfun_frame_layout.first_save_gpr == -1
9664 || cfun_frame_layout.first_save_gpr > min_gpr)
9665 cfun_frame_layout.first_save_gpr = min_gpr;
9666
9667 if (cfun_frame_layout.last_save_gpr == -1
9668 || cfun_frame_layout.last_save_gpr < max_gpr)
9669 cfun_frame_layout.last_save_gpr = max_gpr;
9670
9671 for (i = min_gpr; i <= max_gpr; i++)
9672 cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
9673 }
9674
9675 /* Calculate the save and restore ranges for stm(g) and lm(g) in the
9676 prologue and epilogue. */
9677
9678 static void
s390_register_info_set_ranges()9679 s390_register_info_set_ranges ()
9680 {
9681 int i, j;
9682
9683 /* Find the first and the last save slot supposed to use the stack
9684 to set the restore range.
9685 Vararg regs might be marked as save to stack but only the
9686 call-saved regs really need restoring (i.e. r6). This code
9687 assumes that the vararg regs have not yet been recorded in
9688 cfun_gpr_save_slot. */
9689 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != SAVE_SLOT_STACK; i++);
9690 for (j = 15; j > i && cfun_gpr_save_slot (j) != SAVE_SLOT_STACK; j--);
9691 cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
9692 cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
9693 cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
9694 cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
9695 }
9696
9697 /* The GPR and FPR save slots in cfun->machine->frame_layout are set
9698 for registers which need to be saved in function prologue.
9699 This function can be used until the insns emitted for save/restore
9700 of the regs are visible in the RTL stream. */
9701
9702 static void
s390_register_info()9703 s390_register_info ()
9704 {
9705 int i;
9706 char clobbered_regs[32];
9707
9708 gcc_assert (!epilogue_completed);
9709
9710 if (reload_completed)
9711 /* After reload we rely on our own routine to determine which
9712 registers need saving. */
9713 s390_regs_ever_clobbered (clobbered_regs);
9714 else
9715 /* During reload we use regs_ever_live as a base since reload
9716 does changes in there which we otherwise would not be aware
9717 of. */
9718 for (i = 0; i < 32; i++)
9719 clobbered_regs[i] = df_regs_ever_live_p (i);
9720
9721 for (i = 0; i < 32; i++)
9722 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
9723
9724 /* Mark the call-saved FPRs which need to be saved.
9725 This needs to be done before checking the special GPRs since the
9726 stack pointer usage depends on whether high FPRs have to be saved
9727 or not. */
9728 cfun_frame_layout.fpr_bitmap = 0;
9729 cfun_frame_layout.high_fprs = 0;
9730 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
9731 if (clobbered_regs[i] && !call_used_regs[i])
9732 {
9733 cfun_set_fpr_save (i);
9734 if (i >= FPR8_REGNUM)
9735 cfun_frame_layout.high_fprs++;
9736 }
9737
9738 /* Register 12 is used for GOT address, but also as temp in prologue
9739 for split-stack stdarg functions (unless r14 is available). */
9740 clobbered_regs[12]
9741 |= ((flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
9742 || (flag_split_stack && cfun->stdarg
9743 && (crtl->is_leaf || TARGET_TPF_PROFILING
9744 || has_hard_reg_initial_val (Pmode, RETURN_REGNUM))));
9745
9746 clobbered_regs[BASE_REGNUM]
9747 |= (cfun->machine->base_reg
9748 && REGNO (cfun->machine->base_reg) == BASE_REGNUM);
9749
9750 clobbered_regs[HARD_FRAME_POINTER_REGNUM]
9751 |= !!frame_pointer_needed;
9752
9753 /* On pre z900 machines this might take until machine dependent
9754 reorg to decide.
9755 save_return_addr_p will only be set on non-zarch machines so
9756 there is no risk that r14 goes into an FPR instead of a stack
9757 slot. */
9758 clobbered_regs[RETURN_REGNUM]
9759 |= (!crtl->is_leaf
9760 || TARGET_TPF_PROFILING
9761 || cfun_frame_layout.save_return_addr_p
9762 || crtl->calls_eh_return);
9763
9764 clobbered_regs[STACK_POINTER_REGNUM]
9765 |= (!crtl->is_leaf
9766 || TARGET_TPF_PROFILING
9767 || cfun_save_high_fprs_p
9768 || get_frame_size () > 0
9769 || (reload_completed && cfun_frame_layout.frame_size > 0)
9770 || cfun->calls_alloca);
9771
9772 memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 16);
9773
9774 for (i = 6; i < 16; i++)
9775 if (clobbered_regs[i])
9776 cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
9777
9778 s390_register_info_stdarg_fpr ();
9779 s390_register_info_gprtofpr ();
9780 s390_register_info_set_ranges ();
9781 /* stdarg functions might need to save GPRs 2 to 6. This might
9782 override the GPR->FPR save decision made by
9783 s390_register_info_gprtofpr for r6 since vararg regs must go to
9784 the stack. */
9785 s390_register_info_stdarg_gpr ();
9786 }
9787
9788 /* Return true if REGNO is a global register, but not one
9789 of the special ones that need to be saved/restored in anyway. */
9790
9791 static inline bool
global_not_special_regno_p(int regno)9792 global_not_special_regno_p (int regno)
9793 {
9794 return (global_regs[regno]
9795 /* These registers are special and need to be
9796 restored in any case. */
9797 && !(regno == STACK_POINTER_REGNUM
9798 || regno == RETURN_REGNUM
9799 || regno == BASE_REGNUM
9800 || (flag_pic && regno == (int)PIC_OFFSET_TABLE_REGNUM)));
9801 }
9802
9803 /* This function is called by s390_optimize_prologue in order to get
9804 rid of unnecessary GPR save/restore instructions. The register info
9805 for the GPRs is re-computed and the ranges are re-calculated. */
9806
9807 static void
s390_optimize_register_info()9808 s390_optimize_register_info ()
9809 {
9810 char clobbered_regs[32];
9811 int i;
9812
9813 gcc_assert (epilogue_completed);
9814
9815 s390_regs_ever_clobbered (clobbered_regs);
9816
9817 /* Global registers do not need to be saved and restored unless it
9818 is one of our special regs. (r12, r13, r14, or r15). */
9819 for (i = 0; i < 32; i++)
9820 clobbered_regs[i] = clobbered_regs[i] && !global_not_special_regno_p (i);
9821
9822 /* There is still special treatment needed for cases invisible to
9823 s390_regs_ever_clobbered. */
9824 clobbered_regs[RETURN_REGNUM]
9825 |= (TARGET_TPF_PROFILING
9826 /* When expanding builtin_return_addr in ESA mode we do not
9827 know whether r14 will later be needed as scratch reg when
9828 doing branch splitting. So the builtin always accesses the
9829 r14 save slot and we need to stick to the save/restore
9830 decision for r14 even if it turns out that it didn't get
9831 clobbered. */
9832 || cfun_frame_layout.save_return_addr_p
9833 || crtl->calls_eh_return);
9834
9835 memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 6);
9836
9837 for (i = 6; i < 16; i++)
9838 if (!clobbered_regs[i])
9839 cfun_gpr_save_slot (i) = SAVE_SLOT_NONE;
9840
9841 s390_register_info_set_ranges ();
9842 s390_register_info_stdarg_gpr ();
9843 }
9844
9845 /* Fill cfun->machine with info about frame of current function. */
9846
9847 static void
s390_frame_info(void)9848 s390_frame_info (void)
9849 {
9850 HOST_WIDE_INT lowest_offset;
9851
9852 cfun_frame_layout.first_save_gpr_slot = cfun_frame_layout.first_save_gpr;
9853 cfun_frame_layout.last_save_gpr_slot = cfun_frame_layout.last_save_gpr;
9854
9855 /* The va_arg builtin uses a constant distance of 16 *
9856 UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area
9857 pointer. So even if we are going to save the stack pointer in an
9858 FPR we need the stack space in order to keep the offsets
9859 correct. */
9860 if (cfun->stdarg && cfun_save_arg_fprs_p)
9861 {
9862 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
9863
9864 if (cfun_frame_layout.first_save_gpr_slot == -1)
9865 cfun_frame_layout.first_save_gpr_slot = STACK_POINTER_REGNUM;
9866 }
9867
9868 cfun_frame_layout.frame_size = get_frame_size ();
9869 if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000)
9870 fatal_error (input_location,
9871 "total size of local variables exceeds architecture limit");
9872
9873 if (!TARGET_PACKED_STACK)
9874 {
9875 /* Fixed stack layout. */
9876 cfun_frame_layout.backchain_offset = 0;
9877 cfun_frame_layout.f0_offset = 16 * UNITS_PER_LONG;
9878 cfun_frame_layout.f4_offset = cfun_frame_layout.f0_offset + 2 * 8;
9879 cfun_frame_layout.f8_offset = -cfun_frame_layout.high_fprs * 8;
9880 cfun_frame_layout.gprs_offset = (cfun_frame_layout.first_save_gpr_slot
9881 * UNITS_PER_LONG);
9882 }
9883 else if (TARGET_BACKCHAIN)
9884 {
9885 /* Kernel stack layout - packed stack, backchain, no float */
9886 gcc_assert (TARGET_SOFT_FLOAT);
9887 cfun_frame_layout.backchain_offset = (STACK_POINTER_OFFSET
9888 - UNITS_PER_LONG);
9889
9890 /* The distance between the backchain and the return address
9891 save slot must not change. So we always need a slot for the
9892 stack pointer which resides in between. */
9893 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
9894
9895 cfun_frame_layout.gprs_offset
9896 = cfun_frame_layout.backchain_offset - cfun_gprs_save_area_size;
9897
9898 /* FPRs will not be saved. Nevertheless pick sane values to
9899 keep area calculations valid. */
9900 cfun_frame_layout.f0_offset =
9901 cfun_frame_layout.f4_offset =
9902 cfun_frame_layout.f8_offset = cfun_frame_layout.gprs_offset;
9903 }
9904 else
9905 {
9906 int num_fprs;
9907
9908 /* Packed stack layout without backchain. */
9909
9910 /* With stdarg FPRs need their dedicated slots. */
9911 num_fprs = (TARGET_64BIT && cfun->stdarg ? 2
9912 : (cfun_fpr_save_p (FPR4_REGNUM) +
9913 cfun_fpr_save_p (FPR6_REGNUM)));
9914 cfun_frame_layout.f4_offset = STACK_POINTER_OFFSET - 8 * num_fprs;
9915
9916 num_fprs = (cfun->stdarg ? 2
9917 : (cfun_fpr_save_p (FPR0_REGNUM)
9918 + cfun_fpr_save_p (FPR2_REGNUM)));
9919 cfun_frame_layout.f0_offset = cfun_frame_layout.f4_offset - 8 * num_fprs;
9920
9921 cfun_frame_layout.gprs_offset
9922 = cfun_frame_layout.f0_offset - cfun_gprs_save_area_size;
9923
9924 cfun_frame_layout.f8_offset = (cfun_frame_layout.gprs_offset
9925 - cfun_frame_layout.high_fprs * 8);
9926 }
9927
9928 if (cfun_save_high_fprs_p)
9929 cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8;
9930
9931 if (!crtl->is_leaf)
9932 cfun_frame_layout.frame_size += crtl->outgoing_args_size;
9933
9934 /* In the following cases we have to allocate a STACK_POINTER_OFFSET
9935 sized area at the bottom of the stack. This is required also for
9936 leaf functions. When GCC generates a local stack reference it
9937 will always add STACK_POINTER_OFFSET to all these references. */
9938 if (crtl->is_leaf
9939 && !TARGET_TPF_PROFILING
9940 && cfun_frame_layout.frame_size == 0
9941 && !cfun->calls_alloca)
9942 return;
9943
9944 /* Calculate the number of bytes we have used in our own register
9945 save area. With the packed stack layout we can re-use the
9946 remaining bytes for normal stack elements. */
9947
9948 if (TARGET_PACKED_STACK)
9949 lowest_offset = MIN (MIN (cfun_frame_layout.f0_offset,
9950 cfun_frame_layout.f4_offset),
9951 cfun_frame_layout.gprs_offset);
9952 else
9953 lowest_offset = 0;
9954
9955 if (TARGET_BACKCHAIN)
9956 lowest_offset = MIN (lowest_offset, cfun_frame_layout.backchain_offset);
9957
9958 cfun_frame_layout.frame_size += STACK_POINTER_OFFSET - lowest_offset;
9959
9960 /* If under 31 bit an odd number of gprs has to be saved we have to
9961 adjust the frame size to sustain 8 byte alignment of stack
9962 frames. */
9963 cfun_frame_layout.frame_size = ((cfun_frame_layout.frame_size +
9964 STACK_BOUNDARY / BITS_PER_UNIT - 1)
9965 & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1));
9966 }
9967
9968 /* Generate frame layout. Fills in register and frame data for the current
9969 function in cfun->machine. This routine can be called multiple times;
9970 it will re-do the complete frame layout every time. */
9971
9972 static void
s390_init_frame_layout(void)9973 s390_init_frame_layout (void)
9974 {
9975 HOST_WIDE_INT frame_size;
9976 int base_used;
9977
9978 /* After LRA the frame layout is supposed to be read-only and should
9979 not be re-computed. */
9980 if (reload_completed)
9981 return;
9982
9983 do
9984 {
9985 frame_size = cfun_frame_layout.frame_size;
9986
9987 /* Try to predict whether we'll need the base register. */
9988 base_used = crtl->uses_const_pool
9989 || (!DISP_IN_RANGE (frame_size)
9990 && !CONST_OK_FOR_K (frame_size));
9991
9992 /* Decide which register to use as literal pool base. In small
9993 leaf functions, try to use an unused call-clobbered register
9994 as base register to avoid save/restore overhead. */
9995 if (!base_used)
9996 cfun->machine->base_reg = NULL_RTX;
9997 else
9998 {
9999 int br = 0;
10000
10001 if (crtl->is_leaf)
10002 /* Prefer r5 (most likely to be free). */
10003 for (br = 5; br >= 2 && df_regs_ever_live_p (br); br--)
10004 ;
10005 cfun->machine->base_reg =
10006 gen_rtx_REG (Pmode, (br >= 2) ? br : BASE_REGNUM);
10007 }
10008
10009 s390_register_info ();
10010 s390_frame_info ();
10011 }
10012 while (frame_size != cfun_frame_layout.frame_size);
10013 }
10014
10015 /* Remove the FPR clobbers from a tbegin insn if it can be proven that
10016 the TX is nonescaping. A transaction is considered escaping if
10017 there is at least one path from tbegin returning CC0 to the
10018 function exit block without an tend.
10019
10020 The check so far has some limitations:
10021 - only single tbegin/tend BBs are supported
10022 - the first cond jump after tbegin must separate the CC0 path from ~CC0
10023 - when CC is copied to a GPR and the CC0 check is done with the GPR
10024 this is not supported
10025 */
10026
10027 static void
s390_optimize_nonescaping_tx(void)10028 s390_optimize_nonescaping_tx (void)
10029 {
10030 const unsigned int CC0 = 1 << 3;
10031 basic_block tbegin_bb = NULL;
10032 basic_block tend_bb = NULL;
10033 basic_block bb;
10034 rtx_insn *insn;
10035 bool result = true;
10036 int bb_index;
10037 rtx_insn *tbegin_insn = NULL;
10038
10039 if (!cfun->machine->tbegin_p)
10040 return;
10041
10042 for (bb_index = 0; bb_index < n_basic_blocks_for_fn (cfun); bb_index++)
10043 {
10044 bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
10045
10046 if (!bb)
10047 continue;
10048
10049 FOR_BB_INSNS (bb, insn)
10050 {
10051 rtx ite, cc, pat, target;
10052 unsigned HOST_WIDE_INT mask;
10053
10054 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
10055 continue;
10056
10057 pat = PATTERN (insn);
10058
10059 if (GET_CODE (pat) == PARALLEL)
10060 pat = XVECEXP (pat, 0, 0);
10061
10062 if (GET_CODE (pat) != SET
10063 || GET_CODE (SET_SRC (pat)) != UNSPEC_VOLATILE)
10064 continue;
10065
10066 if (XINT (SET_SRC (pat), 1) == UNSPECV_TBEGIN)
10067 {
10068 rtx_insn *tmp;
10069
10070 tbegin_insn = insn;
10071
10072 /* Just return if the tbegin doesn't have clobbers. */
10073 if (GET_CODE (PATTERN (insn)) != PARALLEL)
10074 return;
10075
10076 if (tbegin_bb != NULL)
10077 return;
10078
10079 /* Find the next conditional jump. */
10080 for (tmp = NEXT_INSN (insn);
10081 tmp != NULL_RTX;
10082 tmp = NEXT_INSN (tmp))
10083 {
10084 if (reg_set_p (gen_rtx_REG (CCmode, CC_REGNUM), tmp))
10085 return;
10086 if (!JUMP_P (tmp))
10087 continue;
10088
10089 ite = SET_SRC (PATTERN (tmp));
10090 if (GET_CODE (ite) != IF_THEN_ELSE)
10091 continue;
10092
10093 cc = XEXP (XEXP (ite, 0), 0);
10094 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc))
10095 || GET_MODE (cc) != CCRAWmode
10096 || GET_CODE (XEXP (XEXP (ite, 0), 1)) != CONST_INT)
10097 return;
10098
10099 if (bb->succs->length () != 2)
10100 return;
10101
10102 mask = INTVAL (XEXP (XEXP (ite, 0), 1));
10103 if (GET_CODE (XEXP (ite, 0)) == NE)
10104 mask ^= 0xf;
10105
10106 if (mask == CC0)
10107 target = XEXP (ite, 1);
10108 else if (mask == (CC0 ^ 0xf))
10109 target = XEXP (ite, 2);
10110 else
10111 return;
10112
10113 {
10114 edge_iterator ei;
10115 edge e1, e2;
10116
10117 ei = ei_start (bb->succs);
10118 e1 = ei_safe_edge (ei);
10119 ei_next (&ei);
10120 e2 = ei_safe_edge (ei);
10121
10122 if (e2->flags & EDGE_FALLTHRU)
10123 {
10124 e2 = e1;
10125 e1 = ei_safe_edge (ei);
10126 }
10127
10128 if (!(e1->flags & EDGE_FALLTHRU))
10129 return;
10130
10131 tbegin_bb = (target == pc_rtx) ? e1->dest : e2->dest;
10132 }
10133 if (tmp == BB_END (bb))
10134 break;
10135 }
10136 }
10137
10138 if (XINT (SET_SRC (pat), 1) == UNSPECV_TEND)
10139 {
10140 if (tend_bb != NULL)
10141 return;
10142 tend_bb = bb;
10143 }
10144 }
10145 }
10146
10147 /* Either we successfully remove the FPR clobbers here or we are not
10148 able to do anything for this TX. Both cases don't qualify for
10149 another look. */
10150 cfun->machine->tbegin_p = false;
10151
10152 if (tbegin_bb == NULL || tend_bb == NULL)
10153 return;
10154
10155 calculate_dominance_info (CDI_POST_DOMINATORS);
10156 result = dominated_by_p (CDI_POST_DOMINATORS, tbegin_bb, tend_bb);
10157 free_dominance_info (CDI_POST_DOMINATORS);
10158
10159 if (!result)
10160 return;
10161
10162 PATTERN (tbegin_insn) = gen_rtx_PARALLEL (VOIDmode,
10163 gen_rtvec (2,
10164 XVECEXP (PATTERN (tbegin_insn), 0, 0),
10165 XVECEXP (PATTERN (tbegin_insn), 0, 1)));
10166 INSN_CODE (tbegin_insn) = -1;
10167 df_insn_rescan (tbegin_insn);
10168
10169 return;
10170 }
10171
10172 /* Implement TARGET_HARD_REGNO_NREGS. Because all registers in a class
10173 have the same size, this is equivalent to CLASS_MAX_NREGS. */
10174
10175 static unsigned int
s390_hard_regno_nregs(unsigned int regno,machine_mode mode)10176 s390_hard_regno_nregs (unsigned int regno, machine_mode mode)
10177 {
10178 return s390_class_max_nregs (REGNO_REG_CLASS (regno), mode);
10179 }
10180
10181 /* Implement TARGET_HARD_REGNO_MODE_OK.
10182
10183 Integer modes <= word size fit into any GPR.
10184 Integer modes > word size fit into successive GPRs, starting with
10185 an even-numbered register.
10186 SImode and DImode fit into FPRs as well.
10187
10188 Floating point modes <= word size fit into any FPR or GPR.
10189 Floating point modes > word size (i.e. DFmode on 32-bit) fit
10190 into any FPR, or an even-odd GPR pair.
10191 TFmode fits only into an even-odd FPR pair.
10192
10193 Complex floating point modes fit either into two FPRs, or into
10194 successive GPRs (again starting with an even number).
10195 TCmode fits only into two successive even-odd FPR pairs.
10196
10197 Condition code modes fit only into the CC register. */
10198
10199 static bool
s390_hard_regno_mode_ok(unsigned int regno,machine_mode mode)10200 s390_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
10201 {
10202 if (!TARGET_VX && VECTOR_NOFP_REGNO_P (regno))
10203 return false;
10204
10205 switch (REGNO_REG_CLASS (regno))
10206 {
10207 case VEC_REGS:
10208 return ((GET_MODE_CLASS (mode) == MODE_INT
10209 && s390_class_max_nregs (VEC_REGS, mode) == 1)
10210 || mode == DFmode
10211 || (TARGET_VXE && mode == SFmode)
10212 || s390_vector_mode_supported_p (mode));
10213 break;
10214 case FP_REGS:
10215 if (TARGET_VX
10216 && ((GET_MODE_CLASS (mode) == MODE_INT
10217 && s390_class_max_nregs (FP_REGS, mode) == 1)
10218 || mode == DFmode
10219 || s390_vector_mode_supported_p (mode)))
10220 return true;
10221
10222 if (REGNO_PAIR_OK (regno, mode))
10223 {
10224 if (mode == SImode || mode == DImode)
10225 return true;
10226
10227 if (FLOAT_MODE_P (mode) && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
10228 return true;
10229 }
10230 break;
10231 case ADDR_REGS:
10232 if (FRAME_REGNO_P (regno) && mode == Pmode)
10233 return true;
10234
10235 /* fallthrough */
10236 case GENERAL_REGS:
10237 if (REGNO_PAIR_OK (regno, mode))
10238 {
10239 if (TARGET_ZARCH
10240 || (mode != TFmode && mode != TCmode && mode != TDmode))
10241 return true;
10242 }
10243 break;
10244 case CC_REGS:
10245 if (GET_MODE_CLASS (mode) == MODE_CC)
10246 return true;
10247 break;
10248 case ACCESS_REGS:
10249 if (REGNO_PAIR_OK (regno, mode))
10250 {
10251 if (mode == SImode || mode == Pmode)
10252 return true;
10253 }
10254 break;
10255 default:
10256 return false;
10257 }
10258
10259 return false;
10260 }
10261
10262 /* Implement TARGET_MODES_TIEABLE_P. */
10263
10264 static bool
s390_modes_tieable_p(machine_mode mode1,machine_mode mode2)10265 s390_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10266 {
10267 return ((mode1 == SFmode || mode1 == DFmode)
10268 == (mode2 == SFmode || mode2 == DFmode));
10269 }
10270
10271 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
10272
10273 bool
s390_hard_regno_rename_ok(unsigned int old_reg,unsigned int new_reg)10274 s390_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg)
10275 {
10276 /* Once we've decided upon a register to use as base register, it must
10277 no longer be used for any other purpose. */
10278 if (cfun->machine->base_reg)
10279 if (REGNO (cfun->machine->base_reg) == old_reg
10280 || REGNO (cfun->machine->base_reg) == new_reg)
10281 return false;
10282
10283 /* Prevent regrename from using call-saved regs which haven't
10284 actually been saved. This is necessary since regrename assumes
10285 the backend save/restore decisions are based on
10286 df_regs_ever_live. Since we have our own routine we have to tell
10287 regrename manually about it. */
10288 if (GENERAL_REGNO_P (new_reg)
10289 && !call_used_regs[new_reg]
10290 && cfun_gpr_save_slot (new_reg) == SAVE_SLOT_NONE)
10291 return false;
10292
10293 return true;
10294 }
10295
10296 /* Return nonzero if register REGNO can be used as a scratch register
10297 in peephole2. */
10298
10299 static bool
s390_hard_regno_scratch_ok(unsigned int regno)10300 s390_hard_regno_scratch_ok (unsigned int regno)
10301 {
10302 /* See s390_hard_regno_rename_ok. */
10303 if (GENERAL_REGNO_P (regno)
10304 && !call_used_regs[regno]
10305 && cfun_gpr_save_slot (regno) == SAVE_SLOT_NONE)
10306 return false;
10307
10308 return true;
10309 }
10310
10311 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. When generating
10312 code that runs in z/Architecture mode, but conforms to the 31-bit
10313 ABI, GPRs can hold 8 bytes; the ABI guarantees only that the lower 4
10314 bytes are saved across calls, however. */
10315
10316 static bool
s390_hard_regno_call_part_clobbered(unsigned int,unsigned int regno,machine_mode mode)10317 s390_hard_regno_call_part_clobbered (unsigned int, unsigned int regno,
10318 machine_mode mode)
10319 {
10320 if (!TARGET_64BIT
10321 && TARGET_ZARCH
10322 && GET_MODE_SIZE (mode) > 4
10323 && ((regno >= 6 && regno <= 15) || regno == 32))
10324 return true;
10325
10326 if (TARGET_VX
10327 && GET_MODE_SIZE (mode) > 8
10328 && (((TARGET_64BIT && regno >= 24 && regno <= 31))
10329 || (!TARGET_64BIT && (regno == 18 || regno == 19))))
10330 return true;
10331
10332 return false;
10333 }
10334
10335 /* Maximum number of registers to represent a value of mode MODE
10336 in a register of class RCLASS. */
10337
10338 int
s390_class_max_nregs(enum reg_class rclass,machine_mode mode)10339 s390_class_max_nregs (enum reg_class rclass, machine_mode mode)
10340 {
10341 int reg_size;
10342 bool reg_pair_required_p = false;
10343
10344 switch (rclass)
10345 {
10346 case FP_REGS:
10347 case VEC_REGS:
10348 reg_size = TARGET_VX ? 16 : 8;
10349
10350 /* TF and TD modes would fit into a VR but we put them into a
10351 register pair since we do not have 128bit FP instructions on
10352 full VRs. */
10353 if (TARGET_VX
10354 && SCALAR_FLOAT_MODE_P (mode)
10355 && GET_MODE_SIZE (mode) >= 16)
10356 reg_pair_required_p = true;
10357
10358 /* Even if complex types would fit into a single FPR/VR we force
10359 them into a register pair to deal with the parts more easily.
10360 (FIXME: What about complex ints?) */
10361 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
10362 reg_pair_required_p = true;
10363 break;
10364 case ACCESS_REGS:
10365 reg_size = 4;
10366 break;
10367 default:
10368 reg_size = UNITS_PER_WORD;
10369 break;
10370 }
10371
10372 if (reg_pair_required_p)
10373 return 2 * ((GET_MODE_SIZE (mode) / 2 + reg_size - 1) / reg_size);
10374
10375 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
10376 }
10377
10378 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
10379
10380 static bool
s390_can_change_mode_class(machine_mode from_mode,machine_mode to_mode,reg_class_t rclass)10381 s390_can_change_mode_class (machine_mode from_mode,
10382 machine_mode to_mode,
10383 reg_class_t rclass)
10384 {
10385 machine_mode small_mode;
10386 machine_mode big_mode;
10387
10388 /* V1TF and TF have different representations in vector
10389 registers. */
10390 if (reg_classes_intersect_p (VEC_REGS, rclass)
10391 && ((from_mode == V1TFmode && to_mode == TFmode)
10392 || (from_mode == TFmode && to_mode == V1TFmode)))
10393 return false;
10394
10395 if (GET_MODE_SIZE (from_mode) == GET_MODE_SIZE (to_mode))
10396 return true;
10397
10398 if (GET_MODE_SIZE (from_mode) < GET_MODE_SIZE (to_mode))
10399 {
10400 small_mode = from_mode;
10401 big_mode = to_mode;
10402 }
10403 else
10404 {
10405 small_mode = to_mode;
10406 big_mode = from_mode;
10407 }
10408
10409 /* Values residing in VRs are little-endian style. All modes are
10410 placed left-aligned in an VR. This means that we cannot allow
10411 switching between modes with differing sizes. Also if the vector
10412 facility is available we still place TFmode values in VR register
10413 pairs, since the only instructions we have operating on TFmodes
10414 only deal with register pairs. Therefore we have to allow DFmode
10415 subregs of TFmodes to enable the TFmode splitters. */
10416 if (reg_classes_intersect_p (VEC_REGS, rclass)
10417 && (GET_MODE_SIZE (small_mode) < 8
10418 || s390_class_max_nregs (VEC_REGS, big_mode) == 1))
10419 return false;
10420
10421 /* Likewise for access registers, since they have only half the
10422 word size on 64-bit. */
10423 if (reg_classes_intersect_p (ACCESS_REGS, rclass))
10424 return false;
10425
10426 return true;
10427 }
10428
10429 /* Return true if we use LRA instead of reload pass. */
10430 static bool
s390_lra_p(void)10431 s390_lra_p (void)
10432 {
10433 return s390_lra_flag;
10434 }
10435
10436 /* Return true if register FROM can be eliminated via register TO. */
10437
10438 static bool
s390_can_eliminate(const int from,const int to)10439 s390_can_eliminate (const int from, const int to)
10440 {
10441 /* We have not marked the base register as fixed.
10442 Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
10443 If a function requires the base register, we say here that this
10444 elimination cannot be performed. This will cause reload to free
10445 up the base register (as if it were fixed). On the other hand,
10446 if the current function does *not* require the base register, we
10447 say here the elimination succeeds, which in turn allows reload
10448 to allocate the base register for any other purpose. */
10449 if (from == BASE_REGNUM && to == BASE_REGNUM)
10450 {
10451 s390_init_frame_layout ();
10452 return cfun->machine->base_reg == NULL_RTX;
10453 }
10454
10455 /* Everything else must point into the stack frame. */
10456 gcc_assert (to == STACK_POINTER_REGNUM
10457 || to == HARD_FRAME_POINTER_REGNUM);
10458
10459 gcc_assert (from == FRAME_POINTER_REGNUM
10460 || from == ARG_POINTER_REGNUM
10461 || from == RETURN_ADDRESS_POINTER_REGNUM);
10462
10463 /* Make sure we actually saved the return address. */
10464 if (from == RETURN_ADDRESS_POINTER_REGNUM)
10465 if (!crtl->calls_eh_return
10466 && !cfun->stdarg
10467 && !cfun_frame_layout.save_return_addr_p)
10468 return false;
10469
10470 return true;
10471 }
10472
10473 /* Return offset between register FROM and TO initially after prolog. */
10474
10475 HOST_WIDE_INT
s390_initial_elimination_offset(int from,int to)10476 s390_initial_elimination_offset (int from, int to)
10477 {
10478 HOST_WIDE_INT offset;
10479
10480 /* ??? Why are we called for non-eliminable pairs? */
10481 if (!s390_can_eliminate (from, to))
10482 return 0;
10483
10484 switch (from)
10485 {
10486 case FRAME_POINTER_REGNUM:
10487 offset = (get_frame_size()
10488 + STACK_POINTER_OFFSET
10489 + crtl->outgoing_args_size);
10490 break;
10491
10492 case ARG_POINTER_REGNUM:
10493 s390_init_frame_layout ();
10494 offset = cfun_frame_layout.frame_size + STACK_POINTER_OFFSET;
10495 break;
10496
10497 case RETURN_ADDRESS_POINTER_REGNUM:
10498 s390_init_frame_layout ();
10499
10500 if (cfun_frame_layout.first_save_gpr_slot == -1)
10501 {
10502 /* If it turns out that for stdarg nothing went into the reg
10503 save area we also do not need the return address
10504 pointer. */
10505 if (cfun->stdarg && !cfun_save_arg_fprs_p)
10506 return 0;
10507
10508 gcc_unreachable ();
10509 }
10510
10511 /* In order to make the following work it is not necessary for
10512 r14 to have a save slot. It is sufficient if one other GPR
10513 got one. Since the GPRs are always stored without gaps we
10514 are able to calculate where the r14 save slot would
10515 reside. */
10516 offset = (cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset +
10517 (RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot) *
10518 UNITS_PER_LONG);
10519 break;
10520
10521 case BASE_REGNUM:
10522 offset = 0;
10523 break;
10524
10525 default:
10526 gcc_unreachable ();
10527 }
10528
10529 return offset;
10530 }
10531
10532 /* Emit insn to save fpr REGNUM at offset OFFSET relative
10533 to register BASE. Return generated insn. */
10534
10535 static rtx
save_fpr(rtx base,int offset,int regnum)10536 save_fpr (rtx base, int offset, int regnum)
10537 {
10538 rtx addr;
10539 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10540
10541 if (regnum >= 16 && regnum <= (16 + FP_ARG_NUM_REG))
10542 set_mem_alias_set (addr, get_varargs_alias_set ());
10543 else
10544 set_mem_alias_set (addr, get_frame_alias_set ());
10545
10546 return emit_move_insn (addr, gen_rtx_REG (DFmode, regnum));
10547 }
10548
10549 /* Emit insn to restore fpr REGNUM from offset OFFSET relative
10550 to register BASE. Return generated insn. */
10551
10552 static rtx
restore_fpr(rtx base,int offset,int regnum)10553 restore_fpr (rtx base, int offset, int regnum)
10554 {
10555 rtx addr;
10556 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10557 set_mem_alias_set (addr, get_frame_alias_set ());
10558
10559 return emit_move_insn (gen_rtx_REG (DFmode, regnum), addr);
10560 }
10561
10562 /* Generate insn to save registers FIRST to LAST into
10563 the register save area located at offset OFFSET
10564 relative to register BASE. */
10565
10566 static rtx
save_gprs(rtx base,int offset,int first,int last)10567 save_gprs (rtx base, int offset, int first, int last)
10568 {
10569 rtx addr, insn, note;
10570 int i;
10571
10572 addr = plus_constant (Pmode, base, offset);
10573 addr = gen_rtx_MEM (Pmode, addr);
10574
10575 set_mem_alias_set (addr, get_frame_alias_set ());
10576
10577 /* Special-case single register. */
10578 if (first == last)
10579 {
10580 if (TARGET_64BIT)
10581 insn = gen_movdi (addr, gen_rtx_REG (Pmode, first));
10582 else
10583 insn = gen_movsi (addr, gen_rtx_REG (Pmode, first));
10584
10585 if (!global_not_special_regno_p (first))
10586 RTX_FRAME_RELATED_P (insn) = 1;
10587 return insn;
10588 }
10589
10590
10591 insn = gen_store_multiple (addr,
10592 gen_rtx_REG (Pmode, first),
10593 GEN_INT (last - first + 1));
10594
10595 if (first <= 6 && cfun->stdarg)
10596 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
10597 {
10598 rtx mem = XEXP (XVECEXP (PATTERN (insn), 0, i), 0);
10599
10600 if (first + i <= 6)
10601 set_mem_alias_set (mem, get_varargs_alias_set ());
10602 }
10603
10604 /* We need to set the FRAME_RELATED flag on all SETs
10605 inside the store-multiple pattern.
10606
10607 However, we must not emit DWARF records for registers 2..5
10608 if they are stored for use by variable arguments ...
10609
10610 ??? Unfortunately, it is not enough to simply not the
10611 FRAME_RELATED flags for those SETs, because the first SET
10612 of the PARALLEL is always treated as if it had the flag
10613 set, even if it does not. Therefore we emit a new pattern
10614 without those registers as REG_FRAME_RELATED_EXPR note. */
10615
10616 if (first >= 6 && !global_not_special_regno_p (first))
10617 {
10618 rtx pat = PATTERN (insn);
10619
10620 for (i = 0; i < XVECLEN (pat, 0); i++)
10621 if (GET_CODE (XVECEXP (pat, 0, i)) == SET
10622 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat,
10623 0, i)))))
10624 RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1;
10625
10626 RTX_FRAME_RELATED_P (insn) = 1;
10627 }
10628 else if (last >= 6)
10629 {
10630 int start;
10631
10632 for (start = first >= 6 ? first : 6; start <= last; start++)
10633 if (!global_not_special_regno_p (start))
10634 break;
10635
10636 if (start > last)
10637 return insn;
10638
10639 addr = plus_constant (Pmode, base,
10640 offset + (start - first) * UNITS_PER_LONG);
10641
10642 if (start == last)
10643 {
10644 if (TARGET_64BIT)
10645 note = gen_movdi (gen_rtx_MEM (Pmode, addr),
10646 gen_rtx_REG (Pmode, start));
10647 else
10648 note = gen_movsi (gen_rtx_MEM (Pmode, addr),
10649 gen_rtx_REG (Pmode, start));
10650 note = PATTERN (note);
10651
10652 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10653 RTX_FRAME_RELATED_P (insn) = 1;
10654
10655 return insn;
10656 }
10657
10658 note = gen_store_multiple (gen_rtx_MEM (Pmode, addr),
10659 gen_rtx_REG (Pmode, start),
10660 GEN_INT (last - start + 1));
10661 note = PATTERN (note);
10662
10663 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10664
10665 for (i = 0; i < XVECLEN (note, 0); i++)
10666 if (GET_CODE (XVECEXP (note, 0, i)) == SET
10667 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note,
10668 0, i)))))
10669 RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1;
10670
10671 RTX_FRAME_RELATED_P (insn) = 1;
10672 }
10673
10674 return insn;
10675 }
10676
10677 /* Generate insn to restore registers FIRST to LAST from
10678 the register save area located at offset OFFSET
10679 relative to register BASE. */
10680
10681 static rtx
restore_gprs(rtx base,int offset,int first,int last)10682 restore_gprs (rtx base, int offset, int first, int last)
10683 {
10684 rtx addr, insn;
10685
10686 addr = plus_constant (Pmode, base, offset);
10687 addr = gen_rtx_MEM (Pmode, addr);
10688 set_mem_alias_set (addr, get_frame_alias_set ());
10689
10690 /* Special-case single register. */
10691 if (first == last)
10692 {
10693 if (TARGET_64BIT)
10694 insn = gen_movdi (gen_rtx_REG (Pmode, first), addr);
10695 else
10696 insn = gen_movsi (gen_rtx_REG (Pmode, first), addr);
10697
10698 RTX_FRAME_RELATED_P (insn) = 1;
10699 return insn;
10700 }
10701
10702 insn = gen_load_multiple (gen_rtx_REG (Pmode, first),
10703 addr,
10704 GEN_INT (last - first + 1));
10705 RTX_FRAME_RELATED_P (insn) = 1;
10706 return insn;
10707 }
10708
10709 /* Return insn sequence to load the GOT register. */
10710
10711 rtx_insn *
s390_load_got(void)10712 s390_load_got (void)
10713 {
10714 rtx_insn *insns;
10715
10716 /* We cannot use pic_offset_table_rtx here since we use this
10717 function also for non-pic if __tls_get_offset is called and in
10718 that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
10719 aren't usable. */
10720 rtx got_rtx = gen_rtx_REG (Pmode, 12);
10721
10722 start_sequence ();
10723
10724 emit_move_insn (got_rtx, s390_got_symbol ());
10725
10726 insns = get_insns ();
10727 end_sequence ();
10728 return insns;
10729 }
10730
10731 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
10732 and the change to the stack pointer. */
10733
10734 static void
s390_emit_stack_tie(void)10735 s390_emit_stack_tie (void)
10736 {
10737 rtx mem = gen_frame_mem (BLKmode,
10738 gen_rtx_REG (Pmode, STACK_POINTER_REGNUM));
10739
10740 emit_insn (gen_stack_tie (mem));
10741 }
10742
10743 /* Copy GPRS into FPR save slots. */
10744
10745 static void
s390_save_gprs_to_fprs(void)10746 s390_save_gprs_to_fprs (void)
10747 {
10748 int i;
10749
10750 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10751 return;
10752
10753 for (i = 6; i < 16; i++)
10754 {
10755 if (FP_REGNO_P (cfun_gpr_save_slot (i)))
10756 {
10757 rtx_insn *insn =
10758 emit_move_insn (gen_rtx_REG (DImode, cfun_gpr_save_slot (i)),
10759 gen_rtx_REG (DImode, i));
10760 RTX_FRAME_RELATED_P (insn) = 1;
10761 /* This prevents dwarf2cfi from interpreting the set. Doing
10762 so it might emit def_cfa_register infos setting an FPR as
10763 new CFA. */
10764 add_reg_note (insn, REG_CFA_REGISTER, copy_rtx (PATTERN (insn)));
10765 }
10766 }
10767 }
10768
10769 /* Restore GPRs from FPR save slots. */
10770
10771 static void
s390_restore_gprs_from_fprs(void)10772 s390_restore_gprs_from_fprs (void)
10773 {
10774 int i;
10775
10776 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10777 return;
10778
10779 /* Restore the GPRs starting with the stack pointer. That way the
10780 stack pointer already has its original value when it comes to
10781 restoring the hard frame pointer. So we can set the cfa reg back
10782 to the stack pointer. */
10783 for (i = STACK_POINTER_REGNUM; i >= 6; i--)
10784 {
10785 rtx_insn *insn;
10786
10787 if (!FP_REGNO_P (cfun_gpr_save_slot (i)))
10788 continue;
10789
10790 rtx fpr = gen_rtx_REG (DImode, cfun_gpr_save_slot (i));
10791
10792 if (i == STACK_POINTER_REGNUM)
10793 insn = emit_insn (gen_stack_restore_from_fpr (fpr));
10794 else
10795 insn = emit_move_insn (gen_rtx_REG (DImode, i), fpr);
10796
10797 df_set_regs_ever_live (i, true);
10798 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, i));
10799
10800 /* If either the stack pointer or the frame pointer get restored
10801 set the CFA value to its value at function start. Doing this
10802 for the frame pointer results in .cfi_def_cfa_register 15
10803 what is ok since if the stack pointer got modified it has
10804 been restored already. */
10805 if (i == STACK_POINTER_REGNUM || i == HARD_FRAME_POINTER_REGNUM)
10806 add_reg_note (insn, REG_CFA_DEF_CFA,
10807 plus_constant (Pmode, stack_pointer_rtx,
10808 STACK_POINTER_OFFSET));
10809 RTX_FRAME_RELATED_P (insn) = 1;
10810 }
10811 }
10812
10813
10814 /* A pass run immediately before shrink-wrapping and prologue and epilogue
10815 generation. */
10816
10817 namespace {
10818
10819 const pass_data pass_data_s390_early_mach =
10820 {
10821 RTL_PASS, /* type */
10822 "early_mach", /* name */
10823 OPTGROUP_NONE, /* optinfo_flags */
10824 TV_MACH_DEP, /* tv_id */
10825 0, /* properties_required */
10826 0, /* properties_provided */
10827 0, /* properties_destroyed */
10828 0, /* todo_flags_start */
10829 ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */
10830 };
10831
10832 class pass_s390_early_mach : public rtl_opt_pass
10833 {
10834 public:
pass_s390_early_mach(gcc::context * ctxt)10835 pass_s390_early_mach (gcc::context *ctxt)
10836 : rtl_opt_pass (pass_data_s390_early_mach, ctxt)
10837 {}
10838
10839 /* opt_pass methods: */
10840 virtual unsigned int execute (function *);
10841
10842 }; // class pass_s390_early_mach
10843
10844 unsigned int
execute(function * fun)10845 pass_s390_early_mach::execute (function *fun)
10846 {
10847 rtx_insn *insn;
10848
10849 /* Try to get rid of the FPR clobbers. */
10850 s390_optimize_nonescaping_tx ();
10851
10852 /* Re-compute register info. */
10853 s390_register_info ();
10854
10855 /* If we're using a base register, ensure that it is always valid for
10856 the first non-prologue instruction. */
10857 if (fun->machine->base_reg)
10858 emit_insn_at_entry (gen_main_pool (fun->machine->base_reg));
10859
10860 /* Annotate all constant pool references to let the scheduler know
10861 they implicitly use the base register. */
10862 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
10863 if (INSN_P (insn))
10864 {
10865 annotate_constant_pool_refs (insn);
10866 df_insn_rescan (insn);
10867 }
10868 return 0;
10869 }
10870
10871 } // anon namespace
10872
10873 rtl_opt_pass *
make_pass_s390_early_mach(gcc::context * ctxt)10874 make_pass_s390_early_mach (gcc::context *ctxt)
10875 {
10876 return new pass_s390_early_mach (ctxt);
10877 }
10878
10879 /* Calculate TARGET = REG + OFFSET as s390_emit_prologue would do it.
10880 - push too big immediates to the literal pool and annotate the refs
10881 - emit frame related notes for stack pointer changes. */
10882
10883 static rtx
s390_prologue_plus_offset(rtx target,rtx reg,rtx offset,bool frame_related_p)10884 s390_prologue_plus_offset (rtx target, rtx reg, rtx offset, bool frame_related_p)
10885 {
10886 rtx_insn *insn;
10887 rtx orig_offset = offset;
10888
10889 gcc_assert (REG_P (target));
10890 gcc_assert (REG_P (reg));
10891 gcc_assert (CONST_INT_P (offset));
10892
10893 if (offset == const0_rtx) /* lr/lgr */
10894 {
10895 insn = emit_move_insn (target, reg);
10896 }
10897 else if (DISP_IN_RANGE (INTVAL (offset))) /* la */
10898 {
10899 insn = emit_move_insn (target, gen_rtx_PLUS (Pmode, reg,
10900 offset));
10901 }
10902 else
10903 {
10904 if (!satisfies_constraint_K (offset) /* ahi/aghi */
10905 && (!TARGET_EXTIMM
10906 || (!satisfies_constraint_Op (offset) /* alfi/algfi */
10907 && !satisfies_constraint_On (offset)))) /* slfi/slgfi */
10908 offset = force_const_mem (Pmode, offset);
10909
10910 if (target != reg)
10911 {
10912 insn = emit_move_insn (target, reg);
10913 RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0;
10914 }
10915
10916 insn = emit_insn (gen_add2_insn (target, offset));
10917
10918 if (!CONST_INT_P (offset))
10919 {
10920 annotate_constant_pool_refs (insn);
10921
10922 if (frame_related_p)
10923 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10924 gen_rtx_SET (target,
10925 gen_rtx_PLUS (Pmode, target,
10926 orig_offset)));
10927 }
10928 }
10929
10930 RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0;
10931
10932 /* If this is a stack adjustment and we are generating a stack clash
10933 prologue, then add a REG_STACK_CHECK note to signal that this insn
10934 should be left alone. */
10935 if (flag_stack_clash_protection && target == stack_pointer_rtx)
10936 add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
10937
10938 return insn;
10939 }
10940
10941 /* Emit a compare instruction with a volatile memory access as stack
10942 probe. It does not waste store tags and does not clobber any
10943 registers apart from the condition code. */
10944 static void
s390_emit_stack_probe(rtx addr)10945 s390_emit_stack_probe (rtx addr)
10946 {
10947 rtx tmp = gen_rtx_MEM (Pmode, addr);
10948 MEM_VOLATILE_P (tmp) = 1;
10949 s390_emit_compare (EQ, gen_rtx_REG (Pmode, 0), tmp);
10950 emit_insn (gen_blockage ());
10951 }
10952
10953 /* Use a runtime loop if we have to emit more probes than this. */
10954 #define MIN_UNROLL_PROBES 3
10955
10956 /* Allocate SIZE bytes of stack space, using TEMP_REG as a temporary
10957 if necessary. LAST_PROBE_OFFSET contains the offset of the closest
10958 probe relative to the stack pointer.
10959
10960 Note that SIZE is negative.
10961
10962 The return value is true if TEMP_REG has been clobbered. */
10963 static bool
allocate_stack_space(rtx size,HOST_WIDE_INT last_probe_offset,rtx temp_reg)10964 allocate_stack_space (rtx size, HOST_WIDE_INT last_probe_offset,
10965 rtx temp_reg)
10966 {
10967 bool temp_reg_clobbered_p = false;
10968 HOST_WIDE_INT probe_interval
10969 = 1 << param_stack_clash_protection_probe_interval;
10970 HOST_WIDE_INT guard_size
10971 = 1 << param_stack_clash_protection_guard_size;
10972
10973 if (flag_stack_clash_protection)
10974 {
10975 if (last_probe_offset + -INTVAL (size) < guard_size)
10976 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
10977 else
10978 {
10979 rtx offset = GEN_INT (probe_interval - UNITS_PER_LONG);
10980 HOST_WIDE_INT rounded_size = -INTVAL (size) & -probe_interval;
10981 HOST_WIDE_INT num_probes = rounded_size / probe_interval;
10982 HOST_WIDE_INT residual = -INTVAL (size) - rounded_size;
10983
10984 if (num_probes < MIN_UNROLL_PROBES)
10985 {
10986 /* Emit unrolled probe statements. */
10987
10988 for (unsigned int i = 0; i < num_probes; i++)
10989 {
10990 s390_prologue_plus_offset (stack_pointer_rtx,
10991 stack_pointer_rtx,
10992 GEN_INT (-probe_interval), true);
10993 s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
10994 stack_pointer_rtx,
10995 offset));
10996 }
10997 dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
10998 }
10999 else
11000 {
11001 /* Emit a loop probing the pages. */
11002
11003 rtx_code_label *loop_start_label = gen_label_rtx ();
11004
11005 /* From now on temp_reg will be the CFA register. */
11006 s390_prologue_plus_offset (temp_reg, stack_pointer_rtx,
11007 GEN_INT (-rounded_size), true);
11008 emit_label (loop_start_label);
11009
11010 s390_prologue_plus_offset (stack_pointer_rtx,
11011 stack_pointer_rtx,
11012 GEN_INT (-probe_interval), false);
11013 s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
11014 stack_pointer_rtx,
11015 offset));
11016 emit_cmp_and_jump_insns (stack_pointer_rtx, temp_reg,
11017 GT, NULL_RTX,
11018 Pmode, 1, loop_start_label);
11019
11020 /* Without this make_edges ICEes. */
11021 JUMP_LABEL (get_last_insn ()) = loop_start_label;
11022 LABEL_NUSES (loop_start_label) = 1;
11023
11024 /* That's going to be a NOP since stack pointer and
11025 temp_reg are supposed to be the same here. We just
11026 emit it to set the CFA reg back to r15. */
11027 s390_prologue_plus_offset (stack_pointer_rtx, temp_reg,
11028 const0_rtx, true);
11029 temp_reg_clobbered_p = true;
11030 dump_stack_clash_frame_info (PROBE_LOOP, residual != 0);
11031 }
11032
11033 /* Handle any residual allocation request. */
11034 s390_prologue_plus_offset (stack_pointer_rtx,
11035 stack_pointer_rtx,
11036 GEN_INT (-residual), true);
11037 last_probe_offset += residual;
11038 if (last_probe_offset >= probe_interval)
11039 s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
11040 stack_pointer_rtx,
11041 GEN_INT (residual
11042 - UNITS_PER_LONG)));
11043
11044 return temp_reg_clobbered_p;
11045 }
11046 }
11047
11048 /* Subtract frame size from stack pointer. */
11049 s390_prologue_plus_offset (stack_pointer_rtx,
11050 stack_pointer_rtx,
11051 size, true);
11052
11053 return temp_reg_clobbered_p;
11054 }
11055
11056 /* Expand the prologue into a bunch of separate insns. */
11057
11058 void
s390_emit_prologue(void)11059 s390_emit_prologue (void)
11060 {
11061 rtx insn, addr;
11062 rtx temp_reg;
11063 int i;
11064 int offset;
11065 int next_fpr = 0;
11066
11067 /* Choose best register to use for temp use within prologue.
11068 TPF with profiling must avoid the register 14 - the tracing function
11069 needs the original contents of r14 to be preserved. */
11070
11071 if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM)
11072 && !crtl->is_leaf
11073 && !TARGET_TPF_PROFILING)
11074 temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
11075 else if (flag_split_stack && cfun->stdarg)
11076 temp_reg = gen_rtx_REG (Pmode, 12);
11077 else
11078 temp_reg = gen_rtx_REG (Pmode, 1);
11079
11080 /* When probing for stack-clash mitigation, we have to track the distance
11081 between the stack pointer and closest known reference.
11082
11083 Most of the time we have to make a worst case assumption. The
11084 only exception is when TARGET_BACKCHAIN is active, in which case
11085 we know *sp (offset 0) was written. */
11086 HOST_WIDE_INT probe_interval
11087 = 1 << param_stack_clash_protection_probe_interval;
11088 HOST_WIDE_INT last_probe_offset
11089 = (TARGET_BACKCHAIN
11090 ? (TARGET_PACKED_STACK ? STACK_POINTER_OFFSET - UNITS_PER_LONG : 0)
11091 : probe_interval - (STACK_BOUNDARY / UNITS_PER_WORD));
11092
11093 s390_save_gprs_to_fprs ();
11094
11095 /* Save call saved gprs. */
11096 if (cfun_frame_layout.first_save_gpr != -1)
11097 {
11098 insn = save_gprs (stack_pointer_rtx,
11099 cfun_frame_layout.gprs_offset +
11100 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
11101 - cfun_frame_layout.first_save_gpr_slot),
11102 cfun_frame_layout.first_save_gpr,
11103 cfun_frame_layout.last_save_gpr);
11104
11105 /* This is not 100% correct. If we have more than one register saved,
11106 then LAST_PROBE_OFFSET can move even closer to sp. */
11107 last_probe_offset
11108 = (cfun_frame_layout.gprs_offset +
11109 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
11110 - cfun_frame_layout.first_save_gpr_slot));
11111
11112 emit_insn (insn);
11113 }
11114
11115 /* Dummy insn to mark literal pool slot. */
11116
11117 if (cfun->machine->base_reg)
11118 emit_insn (gen_main_pool (cfun->machine->base_reg));
11119
11120 offset = cfun_frame_layout.f0_offset;
11121
11122 /* Save f0 and f2. */
11123 for (i = FPR0_REGNUM; i <= FPR0_REGNUM + 1; i++)
11124 {
11125 if (cfun_fpr_save_p (i))
11126 {
11127 save_fpr (stack_pointer_rtx, offset, i);
11128 if (offset < last_probe_offset)
11129 last_probe_offset = offset;
11130 offset += 8;
11131 }
11132 else if (!TARGET_PACKED_STACK || cfun->stdarg)
11133 offset += 8;
11134 }
11135
11136 /* Save f4 and f6. */
11137 offset = cfun_frame_layout.f4_offset;
11138 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
11139 {
11140 if (cfun_fpr_save_p (i))
11141 {
11142 insn = save_fpr (stack_pointer_rtx, offset, i);
11143 if (offset < last_probe_offset)
11144 last_probe_offset = offset;
11145 offset += 8;
11146
11147 /* If f4 and f6 are call clobbered they are saved due to
11148 stdargs and therefore are not frame related. */
11149 if (!call_used_regs[i])
11150 RTX_FRAME_RELATED_P (insn) = 1;
11151 }
11152 else if (!TARGET_PACKED_STACK || call_used_regs[i])
11153 offset += 8;
11154 }
11155
11156 if (TARGET_PACKED_STACK
11157 && cfun_save_high_fprs_p
11158 && cfun_frame_layout.f8_offset + cfun_frame_layout.high_fprs * 8 > 0)
11159 {
11160 offset = (cfun_frame_layout.f8_offset
11161 + (cfun_frame_layout.high_fprs - 1) * 8);
11162
11163 for (i = FPR15_REGNUM; i >= FPR8_REGNUM && offset >= 0; i--)
11164 if (cfun_fpr_save_p (i))
11165 {
11166 insn = save_fpr (stack_pointer_rtx, offset, i);
11167 if (offset < last_probe_offset)
11168 last_probe_offset = offset;
11169
11170 RTX_FRAME_RELATED_P (insn) = 1;
11171 offset -= 8;
11172 }
11173 if (offset >= cfun_frame_layout.f8_offset)
11174 next_fpr = i;
11175 }
11176
11177 if (!TARGET_PACKED_STACK)
11178 next_fpr = cfun_save_high_fprs_p ? FPR15_REGNUM : 0;
11179
11180 if (flag_stack_usage_info)
11181 current_function_static_stack_size = cfun_frame_layout.frame_size;
11182
11183 /* Decrement stack pointer. */
11184
11185 if (cfun_frame_layout.frame_size > 0)
11186 {
11187 rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
11188 rtx_insn *stack_pointer_backup_loc;
11189 bool temp_reg_clobbered_p;
11190
11191 if (s390_stack_size)
11192 {
11193 HOST_WIDE_INT stack_guard;
11194
11195 if (s390_stack_guard)
11196 stack_guard = s390_stack_guard;
11197 else
11198 {
11199 /* If no value for stack guard is provided the smallest power of 2
11200 larger than the current frame size is chosen. */
11201 stack_guard = 1;
11202 while (stack_guard < cfun_frame_layout.frame_size)
11203 stack_guard <<= 1;
11204 }
11205
11206 if (cfun_frame_layout.frame_size >= s390_stack_size)
11207 {
11208 warning (0, "frame size of function %qs is %wd"
11209 " bytes exceeding user provided stack limit of "
11210 "%d bytes. "
11211 "An unconditional trap is added.",
11212 current_function_name(), cfun_frame_layout.frame_size,
11213 s390_stack_size);
11214 emit_insn (gen_trap ());
11215 emit_barrier ();
11216 }
11217 else
11218 {
11219 /* stack_guard has to be smaller than s390_stack_size.
11220 Otherwise we would emit an AND with zero which would
11221 not match the test under mask pattern. */
11222 if (stack_guard >= s390_stack_size)
11223 {
11224 warning (0, "frame size of function %qs is %wd"
11225 " bytes which is more than half the stack size. "
11226 "The dynamic check would not be reliable. "
11227 "No check emitted for this function.",
11228 current_function_name(),
11229 cfun_frame_layout.frame_size);
11230 }
11231 else
11232 {
11233 HOST_WIDE_INT stack_check_mask = ((s390_stack_size - 1)
11234 & ~(stack_guard - 1));
11235
11236 rtx t = gen_rtx_AND (Pmode, stack_pointer_rtx,
11237 GEN_INT (stack_check_mask));
11238 if (TARGET_64BIT)
11239 emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode,
11240 t, const0_rtx),
11241 t, const0_rtx, const0_rtx));
11242 else
11243 emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode,
11244 t, const0_rtx),
11245 t, const0_rtx, const0_rtx));
11246 }
11247 }
11248 }
11249
11250 if (s390_warn_framesize > 0
11251 && cfun_frame_layout.frame_size >= s390_warn_framesize)
11252 warning (0, "frame size of %qs is %wd bytes",
11253 current_function_name (), cfun_frame_layout.frame_size);
11254
11255 if (s390_warn_dynamicstack_p && cfun->calls_alloca)
11256 warning (0, "%qs uses dynamic stack allocation", current_function_name ());
11257
11258 /* Save the location where we could backup the incoming stack
11259 pointer. */
11260 stack_pointer_backup_loc = get_last_insn ();
11261
11262 temp_reg_clobbered_p = allocate_stack_space (frame_off, last_probe_offset,
11263 temp_reg);
11264
11265 if (TARGET_BACKCHAIN || next_fpr)
11266 {
11267 if (temp_reg_clobbered_p)
11268 {
11269 /* allocate_stack_space had to make use of temp_reg and
11270 we need it to hold a backup of the incoming stack
11271 pointer. Calculate back that value from the current
11272 stack pointer. */
11273 s390_prologue_plus_offset (temp_reg, stack_pointer_rtx,
11274 GEN_INT (cfun_frame_layout.frame_size),
11275 false);
11276 }
11277 else
11278 {
11279 /* allocate_stack_space didn't actually required
11280 temp_reg. Insert the stack pointer backup insn
11281 before the stack pointer decrement code - knowing now
11282 that the value will survive. */
11283 emit_insn_after (gen_move_insn (temp_reg, stack_pointer_rtx),
11284 stack_pointer_backup_loc);
11285 }
11286 }
11287
11288 /* Set backchain. */
11289
11290 if (TARGET_BACKCHAIN)
11291 {
11292 if (cfun_frame_layout.backchain_offset)
11293 addr = gen_rtx_MEM (Pmode,
11294 plus_constant (Pmode, stack_pointer_rtx,
11295 cfun_frame_layout.backchain_offset));
11296 else
11297 addr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
11298 set_mem_alias_set (addr, get_frame_alias_set ());
11299 insn = emit_insn (gen_move_insn (addr, temp_reg));
11300 }
11301
11302 /* If we support non-call exceptions (e.g. for Java),
11303 we need to make sure the backchain pointer is set up
11304 before any possibly trapping memory access. */
11305 if (TARGET_BACKCHAIN && cfun->can_throw_non_call_exceptions)
11306 {
11307 addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
11308 emit_clobber (addr);
11309 }
11310 }
11311 else if (flag_stack_clash_protection)
11312 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
11313
11314 /* Save fprs 8 - 15 (64 bit ABI). */
11315
11316 if (cfun_save_high_fprs_p && next_fpr)
11317 {
11318 /* If the stack might be accessed through a different register
11319 we have to make sure that the stack pointer decrement is not
11320 moved below the use of the stack slots. */
11321 s390_emit_stack_tie ();
11322
11323 insn = emit_insn (gen_add2_insn (temp_reg,
11324 GEN_INT (cfun_frame_layout.f8_offset)));
11325
11326 offset = 0;
11327
11328 for (i = FPR8_REGNUM; i <= next_fpr; i++)
11329 if (cfun_fpr_save_p (i))
11330 {
11331 rtx addr = plus_constant (Pmode, stack_pointer_rtx,
11332 cfun_frame_layout.frame_size
11333 + cfun_frame_layout.f8_offset
11334 + offset);
11335
11336 insn = save_fpr (temp_reg, offset, i);
11337 offset += 8;
11338 RTX_FRAME_RELATED_P (insn) = 1;
11339 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11340 gen_rtx_SET (gen_rtx_MEM (DFmode, addr),
11341 gen_rtx_REG (DFmode, i)));
11342 }
11343 }
11344
11345 /* Set frame pointer, if needed. */
11346
11347 if (frame_pointer_needed)
11348 {
11349 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11350 RTX_FRAME_RELATED_P (insn) = 1;
11351 }
11352
11353 /* Set up got pointer, if needed. */
11354
11355 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
11356 {
11357 rtx_insn *insns = s390_load_got ();
11358
11359 for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn))
11360 annotate_constant_pool_refs (insn);
11361
11362 emit_insn (insns);
11363 }
11364
11365 #if TARGET_TPF != 0
11366 if (TARGET_TPF_PROFILING)
11367 {
11368 /* Generate a BAS instruction to serve as a function entry
11369 intercept to facilitate the use of tracing algorithms located
11370 at the branch target. */
11371 emit_insn (gen_prologue_tpf (
11372 GEN_INT (s390_tpf_trace_hook_prologue_check),
11373 GEN_INT (s390_tpf_trace_hook_prologue_target)));
11374
11375 /* Emit a blockage here so that all code lies between the
11376 profiling mechanisms. */
11377 emit_insn (gen_blockage ());
11378 }
11379 #endif
11380 }
11381
11382 /* Expand the epilogue into a bunch of separate insns. */
11383
11384 void
s390_emit_epilogue(bool sibcall)11385 s390_emit_epilogue (bool sibcall)
11386 {
11387 rtx frame_pointer, return_reg = NULL_RTX, cfa_restores = NULL_RTX;
11388 int area_bottom, area_top, offset = 0;
11389 int next_offset;
11390 int i;
11391
11392 #if TARGET_TPF != 0
11393 if (TARGET_TPF_PROFILING)
11394 {
11395 /* Generate a BAS instruction to serve as a function entry
11396 intercept to facilitate the use of tracing algorithms located
11397 at the branch target. */
11398
11399 /* Emit a blockage here so that all code lies between the
11400 profiling mechanisms. */
11401 emit_insn (gen_blockage ());
11402
11403 emit_insn (gen_epilogue_tpf (
11404 GEN_INT (s390_tpf_trace_hook_epilogue_check),
11405 GEN_INT (s390_tpf_trace_hook_epilogue_target)));
11406 }
11407 #endif
11408
11409 /* Check whether to use frame or stack pointer for restore. */
11410
11411 frame_pointer = (frame_pointer_needed
11412 ? hard_frame_pointer_rtx : stack_pointer_rtx);
11413
11414 s390_frame_area (&area_bottom, &area_top);
11415
11416 /* Check whether we can access the register save area.
11417 If not, increment the frame pointer as required. */
11418
11419 if (area_top <= area_bottom)
11420 {
11421 /* Nothing to restore. */
11422 }
11423 else if (DISP_IN_RANGE (cfun_frame_layout.frame_size + area_bottom)
11424 && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1))
11425 {
11426 /* Area is in range. */
11427 offset = cfun_frame_layout.frame_size;
11428 }
11429 else
11430 {
11431 rtx_insn *insn;
11432 rtx frame_off, cfa;
11433
11434 offset = area_bottom < 0 ? -area_bottom : 0;
11435 frame_off = GEN_INT (cfun_frame_layout.frame_size - offset);
11436
11437 cfa = gen_rtx_SET (frame_pointer,
11438 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11439 if (DISP_IN_RANGE (INTVAL (frame_off)))
11440 {
11441 rtx set;
11442
11443 set = gen_rtx_SET (frame_pointer,
11444 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11445 insn = emit_insn (set);
11446 }
11447 else
11448 {
11449 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
11450 frame_off = force_const_mem (Pmode, frame_off);
11451
11452 insn = emit_insn (gen_add2_insn (frame_pointer, frame_off));
11453 annotate_constant_pool_refs (insn);
11454 }
11455 add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa);
11456 RTX_FRAME_RELATED_P (insn) = 1;
11457 }
11458
11459 /* Restore call saved fprs. */
11460
11461 if (TARGET_64BIT)
11462 {
11463 if (cfun_save_high_fprs_p)
11464 {
11465 next_offset = cfun_frame_layout.f8_offset;
11466 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
11467 {
11468 if (cfun_fpr_save_p (i))
11469 {
11470 restore_fpr (frame_pointer,
11471 offset + next_offset, i);
11472 cfa_restores
11473 = alloc_reg_note (REG_CFA_RESTORE,
11474 gen_rtx_REG (DFmode, i), cfa_restores);
11475 next_offset += 8;
11476 }
11477 }
11478 }
11479
11480 }
11481 else
11482 {
11483 next_offset = cfun_frame_layout.f4_offset;
11484 /* f4, f6 */
11485 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
11486 {
11487 if (cfun_fpr_save_p (i))
11488 {
11489 restore_fpr (frame_pointer,
11490 offset + next_offset, i);
11491 cfa_restores
11492 = alloc_reg_note (REG_CFA_RESTORE,
11493 gen_rtx_REG (DFmode, i), cfa_restores);
11494 next_offset += 8;
11495 }
11496 else if (!TARGET_PACKED_STACK)
11497 next_offset += 8;
11498 }
11499
11500 }
11501
11502 /* Restore call saved gprs. */
11503
11504 if (cfun_frame_layout.first_restore_gpr != -1)
11505 {
11506 rtx insn, addr;
11507 int i;
11508
11509 /* Check for global register and save them
11510 to stack location from where they get restored. */
11511
11512 for (i = cfun_frame_layout.first_restore_gpr;
11513 i <= cfun_frame_layout.last_restore_gpr;
11514 i++)
11515 {
11516 if (global_not_special_regno_p (i))
11517 {
11518 addr = plus_constant (Pmode, frame_pointer,
11519 offset + cfun_frame_layout.gprs_offset
11520 + (i - cfun_frame_layout.first_save_gpr_slot)
11521 * UNITS_PER_LONG);
11522 addr = gen_rtx_MEM (Pmode, addr);
11523 set_mem_alias_set (addr, get_frame_alias_set ());
11524 emit_move_insn (addr, gen_rtx_REG (Pmode, i));
11525 }
11526 else
11527 cfa_restores
11528 = alloc_reg_note (REG_CFA_RESTORE,
11529 gen_rtx_REG (Pmode, i), cfa_restores);
11530 }
11531
11532 /* Fetch return address from stack before load multiple,
11533 this will do good for scheduling.
11534
11535 Only do this if we already decided that r14 needs to be
11536 saved to a stack slot. (And not just because r14 happens to
11537 be in between two GPRs which need saving.) Otherwise it
11538 would be difficult to take that decision back in
11539 s390_optimize_prologue.
11540
11541 This optimization is only helpful on in-order machines. */
11542 if (! sibcall
11543 && cfun_gpr_save_slot (RETURN_REGNUM) == SAVE_SLOT_STACK
11544 && s390_tune <= PROCESSOR_2097_Z10)
11545 {
11546 int return_regnum = find_unused_clobbered_reg();
11547 if (!return_regnum
11548 || (TARGET_INDIRECT_BRANCH_NOBP_RET_OPTION
11549 && !TARGET_CPU_Z10
11550 && return_regnum == INDIRECT_BRANCH_THUNK_REGNUM))
11551 {
11552 gcc_assert (INDIRECT_BRANCH_THUNK_REGNUM != 4);
11553 return_regnum = 4;
11554 }
11555 return_reg = gen_rtx_REG (Pmode, return_regnum);
11556
11557 addr = plus_constant (Pmode, frame_pointer,
11558 offset + cfun_frame_layout.gprs_offset
11559 + (RETURN_REGNUM
11560 - cfun_frame_layout.first_save_gpr_slot)
11561 * UNITS_PER_LONG);
11562 addr = gen_rtx_MEM (Pmode, addr);
11563 set_mem_alias_set (addr, get_frame_alias_set ());
11564 emit_move_insn (return_reg, addr);
11565
11566 /* Once we did that optimization we have to make sure
11567 s390_optimize_prologue does not try to remove the store
11568 of r14 since we will not be able to find the load issued
11569 here. */
11570 cfun_frame_layout.save_return_addr_p = true;
11571 }
11572
11573 insn = restore_gprs (frame_pointer,
11574 offset + cfun_frame_layout.gprs_offset
11575 + (cfun_frame_layout.first_restore_gpr
11576 - cfun_frame_layout.first_save_gpr_slot)
11577 * UNITS_PER_LONG,
11578 cfun_frame_layout.first_restore_gpr,
11579 cfun_frame_layout.last_restore_gpr);
11580 insn = emit_insn (insn);
11581 REG_NOTES (insn) = cfa_restores;
11582 add_reg_note (insn, REG_CFA_DEF_CFA,
11583 plus_constant (Pmode, stack_pointer_rtx,
11584 STACK_POINTER_OFFSET));
11585 RTX_FRAME_RELATED_P (insn) = 1;
11586 }
11587
11588 s390_restore_gprs_from_fprs ();
11589
11590 if (! sibcall)
11591 {
11592 if (!return_reg && !s390_can_use_return_insn ())
11593 /* We planned to emit (return), be we are not allowed to. */
11594 return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
11595
11596 if (return_reg)
11597 /* Emit (return) and (use). */
11598 emit_jump_insn (gen_return_use (return_reg));
11599 else
11600 /* The fact that RETURN_REGNUM is used is already reflected by
11601 EPILOGUE_USES. Emit plain (return). */
11602 emit_jump_insn (gen_return ());
11603 }
11604 }
11605
11606 /* Implement TARGET_SET_UP_BY_PROLOGUE. */
11607
11608 static void
s300_set_up_by_prologue(hard_reg_set_container * regs)11609 s300_set_up_by_prologue (hard_reg_set_container *regs)
11610 {
11611 if (cfun->machine->base_reg
11612 && !call_used_regs[REGNO (cfun->machine->base_reg)])
11613 SET_HARD_REG_BIT (regs->set, REGNO (cfun->machine->base_reg));
11614 }
11615
11616 /* -fsplit-stack support. */
11617
11618 /* A SYMBOL_REF for __morestack. */
11619 static GTY(()) rtx morestack_ref;
11620
11621 /* When using -fsplit-stack, the allocation routines set a field in
11622 the TCB to the bottom of the stack plus this much space, measured
11623 in bytes. */
11624
11625 #define SPLIT_STACK_AVAILABLE 1024
11626
11627 /* Emit the parmblock for __morestack into .rodata section. It
11628 consists of 3 pointer size entries:
11629 - frame size
11630 - size of stack arguments
11631 - offset between parm block and __morestack return label */
11632
11633 void
s390_output_split_stack_data(rtx parm_block,rtx call_done,rtx frame_size,rtx args_size)11634 s390_output_split_stack_data (rtx parm_block, rtx call_done,
11635 rtx frame_size, rtx args_size)
11636 {
11637 rtx ops[] = { parm_block, call_done };
11638
11639 switch_to_section (targetm.asm_out.function_rodata_section
11640 (current_function_decl));
11641
11642 if (TARGET_64BIT)
11643 output_asm_insn (".align\t8", NULL);
11644 else
11645 output_asm_insn (".align\t4", NULL);
11646
11647 (*targetm.asm_out.internal_label) (asm_out_file, "L",
11648 CODE_LABEL_NUMBER (parm_block));
11649 if (TARGET_64BIT)
11650 {
11651 output_asm_insn (".quad\t%0", &frame_size);
11652 output_asm_insn (".quad\t%0", &args_size);
11653 output_asm_insn (".quad\t%1-%0", ops);
11654 }
11655 else
11656 {
11657 output_asm_insn (".long\t%0", &frame_size);
11658 output_asm_insn (".long\t%0", &args_size);
11659 output_asm_insn (".long\t%1-%0", ops);
11660 }
11661
11662 switch_to_section (current_function_section ());
11663 }
11664
11665 /* Emit -fsplit-stack prologue, which goes before the regular function
11666 prologue. */
11667
11668 void
s390_expand_split_stack_prologue(void)11669 s390_expand_split_stack_prologue (void)
11670 {
11671 rtx r1, guard, cc = NULL;
11672 rtx_insn *insn;
11673 /* Offset from thread pointer to __private_ss. */
11674 int psso = TARGET_64BIT ? 0x38 : 0x20;
11675 /* Pointer size in bytes. */
11676 /* Frame size and argument size - the two parameters to __morestack. */
11677 HOST_WIDE_INT frame_size = cfun_frame_layout.frame_size;
11678 /* Align argument size to 8 bytes - simplifies __morestack code. */
11679 HOST_WIDE_INT args_size = crtl->args.size >= 0
11680 ? ((crtl->args.size + 7) & ~7)
11681 : 0;
11682 /* Label to be called by __morestack. */
11683 rtx_code_label *call_done = NULL;
11684 rtx_code_label *parm_base = NULL;
11685 rtx tmp;
11686
11687 gcc_assert (flag_split_stack && reload_completed);
11688
11689 r1 = gen_rtx_REG (Pmode, 1);
11690
11691 /* If no stack frame will be allocated, don't do anything. */
11692 if (!frame_size)
11693 {
11694 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11695 {
11696 /* If va_start is used, just use r15. */
11697 emit_move_insn (r1,
11698 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11699 GEN_INT (STACK_POINTER_OFFSET)));
11700
11701 }
11702 return;
11703 }
11704
11705 if (morestack_ref == NULL_RTX)
11706 {
11707 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11708 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
11709 | SYMBOL_FLAG_FUNCTION);
11710 }
11711
11712 if (CONST_OK_FOR_K (frame_size) || CONST_OK_FOR_Op (frame_size))
11713 {
11714 /* If frame_size will fit in an add instruction, do a stack space
11715 check, and only call __morestack if there's not enough space. */
11716
11717 /* Get thread pointer. r1 is the only register we can always destroy - r0
11718 could contain a static chain (and cannot be used to address memory
11719 anyway), r2-r6 can contain parameters, and r6-r15 are callee-saved. */
11720 emit_insn (gen_get_thread_pointer (Pmode, r1));
11721 /* Aim at __private_ss. */
11722 guard = gen_rtx_MEM (Pmode, plus_constant (Pmode, r1, psso));
11723
11724 /* If less that 1kiB used, skip addition and compare directly with
11725 __private_ss. */
11726 if (frame_size > SPLIT_STACK_AVAILABLE)
11727 {
11728 emit_move_insn (r1, guard);
11729 if (TARGET_64BIT)
11730 emit_insn (gen_adddi3 (r1, r1, GEN_INT (frame_size)));
11731 else
11732 emit_insn (gen_addsi3 (r1, r1, GEN_INT (frame_size)));
11733 guard = r1;
11734 }
11735
11736 /* Compare the (maybe adjusted) guard with the stack pointer. */
11737 cc = s390_emit_compare (LT, stack_pointer_rtx, guard);
11738 }
11739
11740 call_done = gen_label_rtx ();
11741 parm_base = gen_label_rtx ();
11742 LABEL_NUSES (parm_base)++;
11743 LABEL_NUSES (call_done)++;
11744
11745 /* %r1 = litbase. */
11746 insn = emit_move_insn (r1, gen_rtx_LABEL_REF (VOIDmode, parm_base));
11747 add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
11748 LABEL_NUSES (parm_base)++;
11749
11750 /* Now, we need to call __morestack. It has very special calling
11751 conventions: it preserves param/return/static chain registers for
11752 calling main function body, and looks for its own parameters at %r1. */
11753 if (cc != NULL)
11754 tmp = gen_split_stack_cond_call (Pmode,
11755 morestack_ref,
11756 parm_base,
11757 call_done,
11758 GEN_INT (frame_size),
11759 GEN_INT (args_size),
11760 cc);
11761 else
11762 tmp = gen_split_stack_call (Pmode,
11763 morestack_ref,
11764 parm_base,
11765 call_done,
11766 GEN_INT (frame_size),
11767 GEN_INT (args_size));
11768
11769 insn = emit_jump_insn (tmp);
11770 JUMP_LABEL (insn) = call_done;
11771 add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
11772 add_reg_note (insn, REG_LABEL_OPERAND, call_done);
11773
11774 if (cc != NULL)
11775 {
11776 /* Mark the jump as very unlikely to be taken. */
11777 add_reg_br_prob_note (insn,
11778 profile_probability::very_unlikely ());
11779
11780 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11781 {
11782 /* If va_start is used, and __morestack was not called, just use
11783 r15. */
11784 emit_move_insn (r1,
11785 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11786 GEN_INT (STACK_POINTER_OFFSET)));
11787 }
11788 }
11789 else
11790 {
11791 emit_barrier ();
11792 }
11793
11794 /* __morestack will call us here. */
11795
11796 emit_label (call_done);
11797 }
11798
11799 /* We may have to tell the dataflow pass that the split stack prologue
11800 is initializing a register. */
11801
11802 static void
s390_live_on_entry(bitmap regs)11803 s390_live_on_entry (bitmap regs)
11804 {
11805 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11806 {
11807 gcc_assert (flag_split_stack);
11808 bitmap_set_bit (regs, 1);
11809 }
11810 }
11811
11812 /* Return true if the function can use simple_return to return outside
11813 of a shrink-wrapped region. At present shrink-wrapping is supported
11814 in all cases. */
11815
11816 bool
s390_can_use_simple_return_insn(void)11817 s390_can_use_simple_return_insn (void)
11818 {
11819 return true;
11820 }
11821
11822 /* Return true if the epilogue is guaranteed to contain only a return
11823 instruction and if a direct return can therefore be used instead.
11824 One of the main advantages of using direct return instructions
11825 is that we can then use conditional returns. */
11826
11827 bool
s390_can_use_return_insn(void)11828 s390_can_use_return_insn (void)
11829 {
11830 int i;
11831
11832 if (!reload_completed)
11833 return false;
11834
11835 if (crtl->profile)
11836 return false;
11837
11838 if (TARGET_TPF_PROFILING)
11839 return false;
11840
11841 for (i = 0; i < 16; i++)
11842 if (cfun_gpr_save_slot (i) != SAVE_SLOT_NONE)
11843 return false;
11844
11845 /* For 31 bit this is not covered by the frame_size check below
11846 since f4, f6 are saved in the register save area without needing
11847 additional stack space. */
11848 if (!TARGET_64BIT
11849 && (cfun_fpr_save_p (FPR4_REGNUM) || cfun_fpr_save_p (FPR6_REGNUM)))
11850 return false;
11851
11852 if (cfun->machine->base_reg
11853 && !call_used_regs[REGNO (cfun->machine->base_reg)])
11854 return false;
11855
11856 return cfun_frame_layout.frame_size == 0;
11857 }
11858
11859 /* The VX ABI differs for vararg functions. Therefore we need the
11860 prototype of the callee to be available when passing vector type
11861 values. */
11862 static const char *
s390_invalid_arg_for_unprototyped_fn(const_tree typelist,const_tree funcdecl,const_tree val)11863 s390_invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
11864 {
11865 return ((TARGET_VX_ABI
11866 && typelist == 0
11867 && VECTOR_TYPE_P (TREE_TYPE (val))
11868 && (funcdecl == NULL_TREE
11869 || (TREE_CODE (funcdecl) == FUNCTION_DECL
11870 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
11871 ? N_("vector argument passed to unprototyped function")
11872 : NULL);
11873 }
11874
11875
11876 /* Return the size in bytes of a function argument of
11877 type TYPE and/or mode MODE. At least one of TYPE or
11878 MODE must be specified. */
11879
11880 static int
s390_function_arg_size(machine_mode mode,const_tree type)11881 s390_function_arg_size (machine_mode mode, const_tree type)
11882 {
11883 if (type)
11884 return int_size_in_bytes (type);
11885
11886 /* No type info available for some library calls ... */
11887 if (mode != BLKmode)
11888 return GET_MODE_SIZE (mode);
11889
11890 /* If we have neither type nor mode, abort */
11891 gcc_unreachable ();
11892 }
11893
11894 /* Return true if a function argument of type TYPE and mode MODE
11895 is to be passed in a vector register, if available. */
11896
11897 bool
s390_function_arg_vector(machine_mode mode,const_tree type)11898 s390_function_arg_vector (machine_mode mode, const_tree type)
11899 {
11900 if (!TARGET_VX_ABI)
11901 return false;
11902
11903 if (s390_function_arg_size (mode, type) > 16)
11904 return false;
11905
11906 /* No type info available for some library calls ... */
11907 if (!type)
11908 return VECTOR_MODE_P (mode);
11909
11910 /* The ABI says that record types with a single member are treated
11911 just like that member would be. */
11912 int empty_base_seen = 0;
11913 const_tree orig_type = type;
11914 while (TREE_CODE (type) == RECORD_TYPE)
11915 {
11916 tree field, single = NULL_TREE;
11917
11918 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
11919 {
11920 if (TREE_CODE (field) != FIELD_DECL)
11921 continue;
11922
11923 if (DECL_FIELD_ABI_IGNORED (field))
11924 {
11925 if (lookup_attribute ("no_unique_address",
11926 DECL_ATTRIBUTES (field)))
11927 empty_base_seen |= 2;
11928 else
11929 empty_base_seen |= 1;
11930 continue;
11931 }
11932
11933 if (single == NULL_TREE)
11934 single = TREE_TYPE (field);
11935 else
11936 return false;
11937 }
11938
11939 if (single == NULL_TREE)
11940 return false;
11941 else
11942 {
11943 /* If the field declaration adds extra byte due to
11944 e.g. padding this is not accepted as vector type. */
11945 if (int_size_in_bytes (single) <= 0
11946 || int_size_in_bytes (single) != int_size_in_bytes (type))
11947 return false;
11948 type = single;
11949 }
11950 }
11951
11952 if (!VECTOR_TYPE_P (type))
11953 return false;
11954
11955 if (warn_psabi && empty_base_seen)
11956 {
11957 static unsigned last_reported_type_uid;
11958 unsigned uid = TYPE_UID (TYPE_MAIN_VARIANT (orig_type));
11959 if (uid != last_reported_type_uid)
11960 {
11961 const char *url = CHANGES_ROOT_URL "gcc-10/changes.html#empty_base";
11962 last_reported_type_uid = uid;
11963 if (empty_base_seen & 1)
11964 inform (input_location,
11965 "parameter passing for argument of type %qT when C++17 "
11966 "is enabled changed to match C++14 %{in GCC 10.1%}",
11967 orig_type, url);
11968 else
11969 inform (input_location,
11970 "parameter passing for argument of type %qT with "
11971 "%<[[no_unique_address]]%> members changed "
11972 "%{in GCC 10.1%}", orig_type, url);
11973 }
11974 }
11975 return true;
11976 }
11977
11978 /* Return true if a function argument of type TYPE and mode MODE
11979 is to be passed in a floating-point register, if available. */
11980
11981 static bool
s390_function_arg_float(machine_mode mode,const_tree type)11982 s390_function_arg_float (machine_mode mode, const_tree type)
11983 {
11984 if (s390_function_arg_size (mode, type) > 8)
11985 return false;
11986
11987 /* Soft-float changes the ABI: no floating-point registers are used. */
11988 if (TARGET_SOFT_FLOAT)
11989 return false;
11990
11991 /* No type info available for some library calls ... */
11992 if (!type)
11993 return mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode;
11994
11995 /* The ABI says that record types with a single member are treated
11996 just like that member would be. */
11997 int empty_base_seen = 0;
11998 const_tree orig_type = type;
11999 while (TREE_CODE (type) == RECORD_TYPE)
12000 {
12001 tree field, single = NULL_TREE;
12002
12003 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
12004 {
12005 if (TREE_CODE (field) != FIELD_DECL)
12006 continue;
12007 if (DECL_FIELD_ABI_IGNORED (field))
12008 {
12009 if (lookup_attribute ("no_unique_address",
12010 DECL_ATTRIBUTES (field)))
12011 empty_base_seen |= 2;
12012 else
12013 empty_base_seen |= 1;
12014 continue;
12015 }
12016
12017 if (single == NULL_TREE)
12018 single = TREE_TYPE (field);
12019 else
12020 return false;
12021 }
12022
12023 if (single == NULL_TREE)
12024 return false;
12025 else
12026 type = single;
12027 }
12028
12029 if (TREE_CODE (type) != REAL_TYPE)
12030 return false;
12031
12032 if (warn_psabi && empty_base_seen)
12033 {
12034 static unsigned last_reported_type_uid;
12035 unsigned uid = TYPE_UID (TYPE_MAIN_VARIANT (orig_type));
12036 if (uid != last_reported_type_uid)
12037 {
12038 const char *url = CHANGES_ROOT_URL "gcc-10/changes.html#empty_base";
12039 last_reported_type_uid = uid;
12040 if (empty_base_seen & 1)
12041 inform (input_location,
12042 "parameter passing for argument of type %qT when C++17 "
12043 "is enabled changed to match C++14 %{in GCC 10.1%}",
12044 orig_type, url);
12045 else
12046 inform (input_location,
12047 "parameter passing for argument of type %qT with "
12048 "%<[[no_unique_address]]%> members changed "
12049 "%{in GCC 10.1%}", orig_type, url);
12050 }
12051 }
12052
12053 return true;
12054 }
12055
12056 /* Return true if a function argument of type TYPE and mode MODE
12057 is to be passed in an integer register, or a pair of integer
12058 registers, if available. */
12059
12060 static bool
s390_function_arg_integer(machine_mode mode,const_tree type)12061 s390_function_arg_integer (machine_mode mode, const_tree type)
12062 {
12063 int size = s390_function_arg_size (mode, type);
12064 if (size > 8)
12065 return false;
12066
12067 /* No type info available for some library calls ... */
12068 if (!type)
12069 return GET_MODE_CLASS (mode) == MODE_INT
12070 || (TARGET_SOFT_FLOAT && SCALAR_FLOAT_MODE_P (mode));
12071
12072 /* We accept small integral (and similar) types. */
12073 if (INTEGRAL_TYPE_P (type)
12074 || POINTER_TYPE_P (type)
12075 || TREE_CODE (type) == NULLPTR_TYPE
12076 || TREE_CODE (type) == OFFSET_TYPE
12077 || (TARGET_SOFT_FLOAT && TREE_CODE (type) == REAL_TYPE))
12078 return true;
12079
12080 /* We also accept structs of size 1, 2, 4, 8 that are not
12081 passed in floating-point registers. */
12082 if (AGGREGATE_TYPE_P (type)
12083 && exact_log2 (size) >= 0
12084 && !s390_function_arg_float (mode, type))
12085 return true;
12086
12087 return false;
12088 }
12089
12090 /* Return 1 if a function argument ARG is to be passed by reference.
12091 The ABI specifies that only structures of size 1, 2, 4, or 8 bytes
12092 are passed by value, all other structures (and complex numbers) are
12093 passed by reference. */
12094
12095 static bool
s390_pass_by_reference(cumulative_args_t,const function_arg_info & arg)12096 s390_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
12097 {
12098 int size = s390_function_arg_size (arg.mode, arg.type);
12099
12100 if (s390_function_arg_vector (arg.mode, arg.type))
12101 return false;
12102
12103 if (size > 8)
12104 return true;
12105
12106 if (tree type = arg.type)
12107 {
12108 if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0)
12109 return true;
12110
12111 if (TREE_CODE (type) == COMPLEX_TYPE
12112 || TREE_CODE (type) == VECTOR_TYPE)
12113 return true;
12114 }
12115
12116 return false;
12117 }
12118
12119 /* Update the data in CUM to advance over argument ARG. */
12120
12121 static void
s390_function_arg_advance(cumulative_args_t cum_v,const function_arg_info & arg)12122 s390_function_arg_advance (cumulative_args_t cum_v,
12123 const function_arg_info &arg)
12124 {
12125 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12126
12127 if (s390_function_arg_vector (arg.mode, arg.type))
12128 {
12129 /* We are called for unnamed vector stdarg arguments which are
12130 passed on the stack. In this case this hook does not have to
12131 do anything since stack arguments are tracked by common
12132 code. */
12133 if (!arg.named)
12134 return;
12135 cum->vrs += 1;
12136 }
12137 else if (s390_function_arg_float (arg.mode, arg.type))
12138 {
12139 cum->fprs += 1;
12140 }
12141 else if (s390_function_arg_integer (arg.mode, arg.type))
12142 {
12143 int size = s390_function_arg_size (arg.mode, arg.type);
12144 cum->gprs += ((size + UNITS_PER_LONG - 1) / UNITS_PER_LONG);
12145 }
12146 else
12147 gcc_unreachable ();
12148 }
12149
12150 /* Define where to put the arguments to a function.
12151 Value is zero to push the argument on the stack,
12152 or a hard register in which to store the argument.
12153
12154 CUM is a variable of type CUMULATIVE_ARGS which gives info about
12155 the preceding args and about the function being called.
12156 ARG is a description of the argument.
12157
12158 On S/390, we use general purpose registers 2 through 6 to
12159 pass integer, pointer, and certain structure arguments, and
12160 floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
12161 to pass floating point arguments. All remaining arguments
12162 are pushed to the stack. */
12163
12164 static rtx
s390_function_arg(cumulative_args_t cum_v,const function_arg_info & arg)12165 s390_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
12166 {
12167 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12168
12169 if (!arg.named)
12170 s390_check_type_for_vector_abi (arg.type, true, false);
12171
12172 if (s390_function_arg_vector (arg.mode, arg.type))
12173 {
12174 /* Vector arguments being part of the ellipsis are passed on the
12175 stack. */
12176 if (!arg.named || (cum->vrs + 1 > VEC_ARG_NUM_REG))
12177 return NULL_RTX;
12178
12179 return gen_rtx_REG (arg.mode, cum->vrs + FIRST_VEC_ARG_REGNO);
12180 }
12181 else if (s390_function_arg_float (arg.mode, arg.type))
12182 {
12183 if (cum->fprs + 1 > FP_ARG_NUM_REG)
12184 return NULL_RTX;
12185 else
12186 return gen_rtx_REG (arg.mode, cum->fprs + 16);
12187 }
12188 else if (s390_function_arg_integer (arg.mode, arg.type))
12189 {
12190 int size = s390_function_arg_size (arg.mode, arg.type);
12191 int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
12192
12193 if (cum->gprs + n_gprs > GP_ARG_NUM_REG)
12194 return NULL_RTX;
12195 else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG)
12196 return gen_rtx_REG (arg.mode, cum->gprs + 2);
12197 else if (n_gprs == 2)
12198 {
12199 rtvec p = rtvec_alloc (2);
12200
12201 RTVEC_ELT (p, 0)
12202 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 2),
12203 const0_rtx);
12204 RTVEC_ELT (p, 1)
12205 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 3),
12206 GEN_INT (4));
12207
12208 return gen_rtx_PARALLEL (arg.mode, p);
12209 }
12210 }
12211
12212 /* After the real arguments, expand_call calls us once again with an
12213 end marker. Whatever we return here is passed as operand 2 to the
12214 call expanders.
12215
12216 We don't need this feature ... */
12217 else if (arg.end_marker_p ())
12218 return const0_rtx;
12219
12220 gcc_unreachable ();
12221 }
12222
12223 /* Implement TARGET_FUNCTION_ARG_BOUNDARY. Vector arguments are
12224 left-justified when placed on the stack during parameter passing. */
12225
12226 static pad_direction
s390_function_arg_padding(machine_mode mode,const_tree type)12227 s390_function_arg_padding (machine_mode mode, const_tree type)
12228 {
12229 if (s390_function_arg_vector (mode, type))
12230 return PAD_UPWARD;
12231
12232 return default_function_arg_padding (mode, type);
12233 }
12234
12235 /* Return true if return values of type TYPE should be returned
12236 in a memory buffer whose address is passed by the caller as
12237 hidden first argument. */
12238
12239 static bool
s390_return_in_memory(const_tree type,const_tree fundecl ATTRIBUTE_UNUSED)12240 s390_return_in_memory (const_tree type, const_tree fundecl ATTRIBUTE_UNUSED)
12241 {
12242 /* We accept small integral (and similar) types. */
12243 if (INTEGRAL_TYPE_P (type)
12244 || POINTER_TYPE_P (type)
12245 || TREE_CODE (type) == OFFSET_TYPE
12246 || TREE_CODE (type) == REAL_TYPE)
12247 return int_size_in_bytes (type) > 8;
12248
12249 /* vector types which fit into a VR. */
12250 if (TARGET_VX_ABI
12251 && VECTOR_TYPE_P (type)
12252 && int_size_in_bytes (type) <= 16)
12253 return false;
12254
12255 /* Aggregates and similar constructs are always returned
12256 in memory. */
12257 if (AGGREGATE_TYPE_P (type)
12258 || TREE_CODE (type) == COMPLEX_TYPE
12259 || VECTOR_TYPE_P (type))
12260 return true;
12261
12262 /* ??? We get called on all sorts of random stuff from
12263 aggregate_value_p. We can't abort, but it's not clear
12264 what's safe to return. Pretend it's a struct I guess. */
12265 return true;
12266 }
12267
12268 /* Function arguments and return values are promoted to word size. */
12269
12270 static machine_mode
s390_promote_function_mode(const_tree type,machine_mode mode,int * punsignedp,const_tree fntype ATTRIBUTE_UNUSED,int for_return ATTRIBUTE_UNUSED)12271 s390_promote_function_mode (const_tree type, machine_mode mode,
12272 int *punsignedp,
12273 const_tree fntype ATTRIBUTE_UNUSED,
12274 int for_return ATTRIBUTE_UNUSED)
12275 {
12276 if (INTEGRAL_MODE_P (mode)
12277 && GET_MODE_SIZE (mode) < UNITS_PER_LONG)
12278 {
12279 if (type != NULL_TREE && POINTER_TYPE_P (type))
12280 *punsignedp = POINTERS_EXTEND_UNSIGNED;
12281 return Pmode;
12282 }
12283
12284 return mode;
12285 }
12286
12287 /* Define where to return a (scalar) value of type RET_TYPE.
12288 If RET_TYPE is null, define where to return a (scalar)
12289 value of mode MODE from a libcall. */
12290
12291 static rtx
s390_function_and_libcall_value(machine_mode mode,const_tree ret_type,const_tree fntype_or_decl,bool outgoing ATTRIBUTE_UNUSED)12292 s390_function_and_libcall_value (machine_mode mode,
12293 const_tree ret_type,
12294 const_tree fntype_or_decl,
12295 bool outgoing ATTRIBUTE_UNUSED)
12296 {
12297 /* For vector return types it is important to use the RET_TYPE
12298 argument whenever available since the middle-end might have
12299 changed the mode to a scalar mode. */
12300 bool vector_ret_type_p = ((ret_type && VECTOR_TYPE_P (ret_type))
12301 || (!ret_type && VECTOR_MODE_P (mode)));
12302
12303 /* For normal functions perform the promotion as
12304 promote_function_mode would do. */
12305 if (ret_type)
12306 {
12307 int unsignedp = TYPE_UNSIGNED (ret_type);
12308 mode = promote_function_mode (ret_type, mode, &unsignedp,
12309 fntype_or_decl, 1);
12310 }
12311
12312 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
12313 || SCALAR_FLOAT_MODE_P (mode)
12314 || (TARGET_VX_ABI && vector_ret_type_p));
12315 gcc_assert (GET_MODE_SIZE (mode) <= (TARGET_VX_ABI ? 16 : 8));
12316
12317 if (TARGET_VX_ABI && vector_ret_type_p)
12318 return gen_rtx_REG (mode, FIRST_VEC_ARG_REGNO);
12319 else if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
12320 return gen_rtx_REG (mode, 16);
12321 else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG
12322 || UNITS_PER_LONG == UNITS_PER_WORD)
12323 return gen_rtx_REG (mode, 2);
12324 else if (GET_MODE_SIZE (mode) == 2 * UNITS_PER_LONG)
12325 {
12326 /* This case is triggered when returning a 64 bit value with
12327 -m31 -mzarch. Although the value would fit into a single
12328 register it has to be forced into a 32 bit register pair in
12329 order to match the ABI. */
12330 rtvec p = rtvec_alloc (2);
12331
12332 RTVEC_ELT (p, 0)
12333 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 2), const0_rtx);
12334 RTVEC_ELT (p, 1)
12335 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 3), GEN_INT (4));
12336
12337 return gen_rtx_PARALLEL (mode, p);
12338 }
12339
12340 gcc_unreachable ();
12341 }
12342
12343 /* Define where to return a scalar return value of type RET_TYPE. */
12344
12345 static rtx
s390_function_value(const_tree ret_type,const_tree fn_decl_or_type,bool outgoing)12346 s390_function_value (const_tree ret_type, const_tree fn_decl_or_type,
12347 bool outgoing)
12348 {
12349 return s390_function_and_libcall_value (TYPE_MODE (ret_type), ret_type,
12350 fn_decl_or_type, outgoing);
12351 }
12352
12353 /* Define where to return a scalar libcall return value of mode
12354 MODE. */
12355
12356 static rtx
s390_libcall_value(machine_mode mode,const_rtx fun ATTRIBUTE_UNUSED)12357 s390_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
12358 {
12359 return s390_function_and_libcall_value (mode, NULL_TREE,
12360 NULL_TREE, true);
12361 }
12362
12363
12364 /* Create and return the va_list datatype.
12365
12366 On S/390, va_list is an array type equivalent to
12367
12368 typedef struct __va_list_tag
12369 {
12370 long __gpr;
12371 long __fpr;
12372 void *__overflow_arg_area;
12373 void *__reg_save_area;
12374 } va_list[1];
12375
12376 where __gpr and __fpr hold the number of general purpose
12377 or floating point arguments used up to now, respectively,
12378 __overflow_arg_area points to the stack location of the
12379 next argument passed on the stack, and __reg_save_area
12380 always points to the start of the register area in the
12381 call frame of the current function. The function prologue
12382 saves all registers used for argument passing into this
12383 area if the function uses variable arguments. */
12384
12385 static tree
s390_build_builtin_va_list(void)12386 s390_build_builtin_va_list (void)
12387 {
12388 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
12389
12390 record = lang_hooks.types.make_type (RECORD_TYPE);
12391
12392 type_decl =
12393 build_decl (BUILTINS_LOCATION,
12394 TYPE_DECL, get_identifier ("__va_list_tag"), record);
12395
12396 f_gpr = build_decl (BUILTINS_LOCATION,
12397 FIELD_DECL, get_identifier ("__gpr"),
12398 long_integer_type_node);
12399 f_fpr = build_decl (BUILTINS_LOCATION,
12400 FIELD_DECL, get_identifier ("__fpr"),
12401 long_integer_type_node);
12402 f_ovf = build_decl (BUILTINS_LOCATION,
12403 FIELD_DECL, get_identifier ("__overflow_arg_area"),
12404 ptr_type_node);
12405 f_sav = build_decl (BUILTINS_LOCATION,
12406 FIELD_DECL, get_identifier ("__reg_save_area"),
12407 ptr_type_node);
12408
12409 va_list_gpr_counter_field = f_gpr;
12410 va_list_fpr_counter_field = f_fpr;
12411
12412 DECL_FIELD_CONTEXT (f_gpr) = record;
12413 DECL_FIELD_CONTEXT (f_fpr) = record;
12414 DECL_FIELD_CONTEXT (f_ovf) = record;
12415 DECL_FIELD_CONTEXT (f_sav) = record;
12416
12417 TYPE_STUB_DECL (record) = type_decl;
12418 TYPE_NAME (record) = type_decl;
12419 TYPE_FIELDS (record) = f_gpr;
12420 DECL_CHAIN (f_gpr) = f_fpr;
12421 DECL_CHAIN (f_fpr) = f_ovf;
12422 DECL_CHAIN (f_ovf) = f_sav;
12423
12424 layout_type (record);
12425
12426 /* The correct type is an array type of one element. */
12427 return build_array_type (record, build_index_type (size_zero_node));
12428 }
12429
12430 /* Implement va_start by filling the va_list structure VALIST.
12431 STDARG_P is always true, and ignored.
12432 NEXTARG points to the first anonymous stack argument.
12433
12434 The following global variables are used to initialize
12435 the va_list structure:
12436
12437 crtl->args.info:
12438 holds number of gprs and fprs used for named arguments.
12439 crtl->args.arg_offset_rtx:
12440 holds the offset of the first anonymous stack argument
12441 (relative to the virtual arg pointer). */
12442
12443 static void
s390_va_start(tree valist,rtx nextarg ATTRIBUTE_UNUSED)12444 s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
12445 {
12446 HOST_WIDE_INT n_gpr, n_fpr;
12447 int off;
12448 tree f_gpr, f_fpr, f_ovf, f_sav;
12449 tree gpr, fpr, ovf, sav, t;
12450
12451 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12452 f_fpr = DECL_CHAIN (f_gpr);
12453 f_ovf = DECL_CHAIN (f_fpr);
12454 f_sav = DECL_CHAIN (f_ovf);
12455
12456 valist = build_simple_mem_ref (valist);
12457 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12458 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
12459 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
12460 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
12461
12462 /* Count number of gp and fp argument registers used. */
12463
12464 n_gpr = crtl->args.info.gprs;
12465 n_fpr = crtl->args.info.fprs;
12466
12467 if (cfun->va_list_gpr_size)
12468 {
12469 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
12470 build_int_cst (NULL_TREE, n_gpr));
12471 TREE_SIDE_EFFECTS (t) = 1;
12472 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12473 }
12474
12475 if (cfun->va_list_fpr_size)
12476 {
12477 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
12478 build_int_cst (NULL_TREE, n_fpr));
12479 TREE_SIDE_EFFECTS (t) = 1;
12480 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12481 }
12482
12483 if (flag_split_stack
12484 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
12485 == NULL)
12486 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
12487 {
12488 rtx reg;
12489 rtx_insn *seq;
12490
12491 reg = gen_reg_rtx (Pmode);
12492 cfun->machine->split_stack_varargs_pointer = reg;
12493
12494 start_sequence ();
12495 emit_move_insn (reg, gen_rtx_REG (Pmode, 1));
12496 seq = get_insns ();
12497 end_sequence ();
12498
12499 push_topmost_sequence ();
12500 emit_insn_after (seq, entry_of_function ());
12501 pop_topmost_sequence ();
12502 }
12503
12504 /* Find the overflow area.
12505 FIXME: This currently is too pessimistic when the vector ABI is
12506 enabled. In that case we *always* set up the overflow area
12507 pointer. */
12508 if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG
12509 || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG
12510 || TARGET_VX_ABI)
12511 {
12512 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
12513 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
12514 else
12515 t = make_tree (TREE_TYPE (ovf), cfun->machine->split_stack_varargs_pointer);
12516
12517 off = INTVAL (crtl->args.arg_offset_rtx);
12518 off = off < 0 ? 0 : off;
12519 if (TARGET_DEBUG_ARG)
12520 fprintf (stderr, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
12521 (int)n_gpr, (int)n_fpr, off);
12522
12523 t = fold_build_pointer_plus_hwi (t, off);
12524
12525 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
12526 TREE_SIDE_EFFECTS (t) = 1;
12527 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12528 }
12529
12530 /* Find the register save area. */
12531 if ((cfun->va_list_gpr_size && n_gpr < GP_ARG_NUM_REG)
12532 || (cfun->va_list_fpr_size && n_fpr < FP_ARG_NUM_REG))
12533 {
12534 t = make_tree (TREE_TYPE (sav), return_address_pointer_rtx);
12535 t = fold_build_pointer_plus_hwi (t, -RETURN_REGNUM * UNITS_PER_LONG);
12536
12537 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
12538 TREE_SIDE_EFFECTS (t) = 1;
12539 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12540 }
12541 }
12542
12543 /* Implement va_arg by updating the va_list structure
12544 VALIST as required to retrieve an argument of type
12545 TYPE, and returning that argument.
12546
12547 Generates code equivalent to:
12548
12549 if (integral value) {
12550 if (size <= 4 && args.gpr < 5 ||
12551 size > 4 && args.gpr < 4 )
12552 ret = args.reg_save_area[args.gpr+8]
12553 else
12554 ret = *args.overflow_arg_area++;
12555 } else if (vector value) {
12556 ret = *args.overflow_arg_area;
12557 args.overflow_arg_area += size / 8;
12558 } else if (float value) {
12559 if (args.fgpr < 2)
12560 ret = args.reg_save_area[args.fpr+64]
12561 else
12562 ret = *args.overflow_arg_area++;
12563 } else if (aggregate value) {
12564 if (args.gpr < 5)
12565 ret = *args.reg_save_area[args.gpr]
12566 else
12567 ret = **args.overflow_arg_area++;
12568 } */
12569
12570 static tree
s390_gimplify_va_arg(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p ATTRIBUTE_UNUSED)12571 s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
12572 gimple_seq *post_p ATTRIBUTE_UNUSED)
12573 {
12574 tree f_gpr, f_fpr, f_ovf, f_sav;
12575 tree gpr, fpr, ovf, sav, reg, t, u;
12576 int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg;
12577 tree lab_false, lab_over = NULL_TREE;
12578 tree addr = create_tmp_var (ptr_type_node, "addr");
12579 bool left_align_p; /* How a value < UNITS_PER_LONG is aligned within
12580 a stack slot. */
12581
12582 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12583 f_fpr = DECL_CHAIN (f_gpr);
12584 f_ovf = DECL_CHAIN (f_fpr);
12585 f_sav = DECL_CHAIN (f_ovf);
12586
12587 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12588 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
12589 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
12590
12591 /* The tree for args* cannot be shared between gpr/fpr and ovf since
12592 both appear on a lhs. */
12593 valist = unshare_expr (valist);
12594 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
12595
12596 size = int_size_in_bytes (type);
12597
12598 s390_check_type_for_vector_abi (type, true, false);
12599
12600 if (pass_va_arg_by_reference (type))
12601 {
12602 if (TARGET_DEBUG_ARG)
12603 {
12604 fprintf (stderr, "va_arg: aggregate type");
12605 debug_tree (type);
12606 }
12607
12608 /* Aggregates are passed by reference. */
12609 indirect_p = 1;
12610 reg = gpr;
12611 n_reg = 1;
12612
12613 /* kernel stack layout on 31 bit: It is assumed here that no padding
12614 will be added by s390_frame_info because for va_args always an even
12615 number of gprs has to be saved r15-r2 = 14 regs. */
12616 sav_ofs = 2 * UNITS_PER_LONG;
12617 sav_scale = UNITS_PER_LONG;
12618 size = UNITS_PER_LONG;
12619 max_reg = GP_ARG_NUM_REG - n_reg;
12620 left_align_p = false;
12621 }
12622 else if (s390_function_arg_vector (TYPE_MODE (type), type))
12623 {
12624 if (TARGET_DEBUG_ARG)
12625 {
12626 fprintf (stderr, "va_arg: vector type");
12627 debug_tree (type);
12628 }
12629
12630 indirect_p = 0;
12631 reg = NULL_TREE;
12632 n_reg = 0;
12633 sav_ofs = 0;
12634 sav_scale = 8;
12635 max_reg = 0;
12636 left_align_p = true;
12637 }
12638 else if (s390_function_arg_float (TYPE_MODE (type), type))
12639 {
12640 if (TARGET_DEBUG_ARG)
12641 {
12642 fprintf (stderr, "va_arg: float type");
12643 debug_tree (type);
12644 }
12645
12646 /* FP args go in FP registers, if present. */
12647 indirect_p = 0;
12648 reg = fpr;
12649 n_reg = 1;
12650 sav_ofs = 16 * UNITS_PER_LONG;
12651 sav_scale = 8;
12652 max_reg = FP_ARG_NUM_REG - n_reg;
12653 left_align_p = false;
12654 }
12655 else
12656 {
12657 if (TARGET_DEBUG_ARG)
12658 {
12659 fprintf (stderr, "va_arg: other type");
12660 debug_tree (type);
12661 }
12662
12663 /* Otherwise into GP registers. */
12664 indirect_p = 0;
12665 reg = gpr;
12666 n_reg = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
12667
12668 /* kernel stack layout on 31 bit: It is assumed here that no padding
12669 will be added by s390_frame_info because for va_args always an even
12670 number of gprs has to be saved r15-r2 = 14 regs. */
12671 sav_ofs = 2 * UNITS_PER_LONG;
12672
12673 if (size < UNITS_PER_LONG)
12674 sav_ofs += UNITS_PER_LONG - size;
12675
12676 sav_scale = UNITS_PER_LONG;
12677 max_reg = GP_ARG_NUM_REG - n_reg;
12678 left_align_p = false;
12679 }
12680
12681 /* Pull the value out of the saved registers ... */
12682
12683 if (reg != NULL_TREE)
12684 {
12685 /*
12686 if (reg > ((typeof (reg))max_reg))
12687 goto lab_false;
12688
12689 addr = sav + sav_ofs + reg * save_scale;
12690
12691 goto lab_over;
12692
12693 lab_false:
12694 */
12695
12696 lab_false = create_artificial_label (UNKNOWN_LOCATION);
12697 lab_over = create_artificial_label (UNKNOWN_LOCATION);
12698
12699 t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
12700 t = build2 (GT_EXPR, boolean_type_node, reg, t);
12701 u = build1 (GOTO_EXPR, void_type_node, lab_false);
12702 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
12703 gimplify_and_add (t, pre_p);
12704
12705 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
12706 u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
12707 fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
12708 t = fold_build_pointer_plus (t, u);
12709
12710 gimplify_assign (addr, t, pre_p);
12711
12712 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
12713
12714 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
12715 }
12716
12717 /* ... Otherwise out of the overflow area. */
12718
12719 t = ovf;
12720 if (size < UNITS_PER_LONG && !left_align_p)
12721 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size);
12722
12723 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
12724
12725 gimplify_assign (addr, t, pre_p);
12726
12727 if (size < UNITS_PER_LONG && left_align_p)
12728 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG);
12729 else
12730 t = fold_build_pointer_plus_hwi (t, size);
12731
12732 gimplify_assign (ovf, t, pre_p);
12733
12734 if (reg != NULL_TREE)
12735 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
12736
12737
12738 /* Increment register save count. */
12739
12740 if (n_reg > 0)
12741 {
12742 u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
12743 fold_convert (TREE_TYPE (reg), size_int (n_reg)));
12744 gimplify_and_add (u, pre_p);
12745 }
12746
12747 if (indirect_p)
12748 {
12749 t = build_pointer_type_for_mode (build_pointer_type (type),
12750 ptr_mode, true);
12751 addr = fold_convert (t, addr);
12752 addr = build_va_arg_indirect_ref (addr);
12753 }
12754 else
12755 {
12756 t = build_pointer_type_for_mode (type, ptr_mode, true);
12757 addr = fold_convert (t, addr);
12758 }
12759
12760 return build_va_arg_indirect_ref (addr);
12761 }
12762
12763 /* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX)
12764 expanders.
12765 DEST - Register location where CC will be stored.
12766 TDB - Pointer to a 256 byte area where to store the transaction.
12767 diagnostic block. NULL if TDB is not needed.
12768 RETRY - Retry count value. If non-NULL a retry loop for CC2
12769 is emitted
12770 CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
12771 of the tbegin instruction pattern. */
12772
12773 void
s390_expand_tbegin(rtx dest,rtx tdb,rtx retry,bool clobber_fprs_p)12774 s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
12775 {
12776 rtx retry_plus_two = gen_reg_rtx (SImode);
12777 rtx retry_reg = gen_reg_rtx (SImode);
12778 rtx_code_label *retry_label = NULL;
12779
12780 if (retry != NULL_RTX)
12781 {
12782 emit_move_insn (retry_reg, retry);
12783 emit_insn (gen_addsi3 (retry_plus_two, retry_reg, const2_rtx));
12784 emit_insn (gen_addsi3 (retry_reg, retry_reg, const1_rtx));
12785 retry_label = gen_label_rtx ();
12786 emit_label (retry_label);
12787 }
12788
12789 if (clobber_fprs_p)
12790 {
12791 if (TARGET_VX)
12792 emit_insn (gen_tbegin_1_z13 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12793 tdb));
12794 else
12795 emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12796 tdb));
12797 }
12798 else
12799 emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12800 tdb));
12801
12802 emit_move_insn (dest, gen_rtx_UNSPEC (SImode,
12803 gen_rtvec (1, gen_rtx_REG (CCRAWmode,
12804 CC_REGNUM)),
12805 UNSPEC_CC_TO_INT));
12806 if (retry != NULL_RTX)
12807 {
12808 const int CC0 = 1 << 3;
12809 const int CC1 = 1 << 2;
12810 const int CC3 = 1 << 0;
12811 rtx jump;
12812 rtx count = gen_reg_rtx (SImode);
12813 rtx_code_label *leave_label = gen_label_rtx ();
12814
12815 /* Exit for success and permanent failures. */
12816 jump = s390_emit_jump (leave_label,
12817 gen_rtx_EQ (VOIDmode,
12818 gen_rtx_REG (CCRAWmode, CC_REGNUM),
12819 gen_rtx_CONST_INT (VOIDmode, CC0 | CC1 | CC3)));
12820 LABEL_NUSES (leave_label) = 1;
12821
12822 /* CC2 - transient failure. Perform retry with ppa. */
12823 emit_move_insn (count, retry_plus_two);
12824 emit_insn (gen_subsi3 (count, count, retry_reg));
12825 emit_insn (gen_tx_assist (count));
12826 jump = emit_jump_insn (gen_doloop_si64 (retry_label,
12827 retry_reg,
12828 retry_reg));
12829 JUMP_LABEL (jump) = retry_label;
12830 LABEL_NUSES (retry_label) = 1;
12831 emit_label (leave_label);
12832 }
12833 }
12834
12835
12836 /* Return the decl for the target specific builtin with the function
12837 code FCODE. */
12838
12839 static tree
s390_builtin_decl(unsigned fcode,bool initialized_p ATTRIBUTE_UNUSED)12840 s390_builtin_decl (unsigned fcode, bool initialized_p ATTRIBUTE_UNUSED)
12841 {
12842 if (fcode >= S390_BUILTIN_MAX)
12843 return error_mark_node;
12844
12845 return s390_builtin_decls[fcode];
12846 }
12847
12848 /* We call mcount before the function prologue. So a profiled leaf
12849 function should stay a leaf function. */
12850
12851 static bool
s390_keep_leaf_when_profiled()12852 s390_keep_leaf_when_profiled ()
12853 {
12854 return true;
12855 }
12856
12857 /* Output assembly code for the trampoline template to
12858 stdio stream FILE.
12859
12860 On S/390, we use gpr 1 internally in the trampoline code;
12861 gpr 0 is used to hold the static chain. */
12862
12863 static void
s390_asm_trampoline_template(FILE * file)12864 s390_asm_trampoline_template (FILE *file)
12865 {
12866 rtx op[2];
12867 op[0] = gen_rtx_REG (Pmode, 0);
12868 op[1] = gen_rtx_REG (Pmode, 1);
12869
12870 if (TARGET_64BIT)
12871 {
12872 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
12873 output_asm_insn ("lmg\t%0,%1,14(%1)", op); /* 6 byte */
12874 output_asm_insn ("br\t%1", op); /* 2 byte */
12875 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10));
12876 }
12877 else
12878 {
12879 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
12880 output_asm_insn ("lm\t%0,%1,6(%1)", op); /* 4 byte */
12881 output_asm_insn ("br\t%1", op); /* 2 byte */
12882 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8));
12883 }
12884 }
12885
12886 /* Emit RTL insns to initialize the variable parts of a trampoline.
12887 FNADDR is an RTX for the address of the function's pure code.
12888 CXT is an RTX for the static chain value for the function. */
12889
12890 static void
s390_trampoline_init(rtx m_tramp,tree fndecl,rtx cxt)12891 s390_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
12892 {
12893 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
12894 rtx mem;
12895
12896 emit_block_move (m_tramp, assemble_trampoline_template (),
12897 GEN_INT (2 * UNITS_PER_LONG), BLOCK_OP_NORMAL);
12898
12899 mem = adjust_address (m_tramp, Pmode, 2 * UNITS_PER_LONG);
12900 emit_move_insn (mem, cxt);
12901 mem = adjust_address (m_tramp, Pmode, 3 * UNITS_PER_LONG);
12902 emit_move_insn (mem, fnaddr);
12903 }
12904
12905 static void
output_asm_nops(const char * user,int hw)12906 output_asm_nops (const char *user, int hw)
12907 {
12908 asm_fprintf (asm_out_file, "\t# NOPs for %s (%d halfwords)\n", user, hw);
12909 while (hw > 0)
12910 {
12911 if (hw >= 3)
12912 {
12913 output_asm_insn ("brcl\t0,0", NULL);
12914 hw -= 3;
12915 }
12916 else if (hw >= 2)
12917 {
12918 output_asm_insn ("bc\t0,0", NULL);
12919 hw -= 2;
12920 }
12921 else
12922 {
12923 output_asm_insn ("bcr\t0,0", NULL);
12924 hw -= 1;
12925 }
12926 }
12927 }
12928
12929 /* Output assembler code to FILE to increment profiler label # LABELNO
12930 for profiling a function entry. */
12931
12932 void
s390_function_profiler(FILE * file,int labelno)12933 s390_function_profiler (FILE *file, int labelno)
12934 {
12935 rtx op[8];
12936
12937 char label[128];
12938 ASM_GENERATE_INTERNAL_LABEL (label, "LP", labelno);
12939
12940 fprintf (file, "# function profiler \n");
12941
12942 op[0] = gen_rtx_REG (Pmode, RETURN_REGNUM);
12943 op[1] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
12944 op[1] = gen_rtx_MEM (Pmode, plus_constant (Pmode, op[1], UNITS_PER_LONG));
12945 op[7] = GEN_INT (UNITS_PER_LONG);
12946
12947 op[2] = gen_rtx_REG (Pmode, 1);
12948 op[3] = gen_rtx_SYMBOL_REF (Pmode, label);
12949 SYMBOL_REF_FLAGS (op[3]) = SYMBOL_FLAG_LOCAL;
12950
12951 op[4] = gen_rtx_SYMBOL_REF (Pmode, flag_fentry ? "__fentry__" : "_mcount");
12952 if (flag_pic)
12953 {
12954 op[4] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[4]), UNSPEC_PLT);
12955 op[4] = gen_rtx_CONST (Pmode, op[4]);
12956 }
12957
12958 if (flag_record_mcount)
12959 fprintf (file, "1:\n");
12960
12961 if (flag_fentry)
12962 {
12963 if (flag_nop_mcount)
12964 output_asm_nops ("-mnop-mcount", /* brasl */ 3);
12965 else if (cfun->static_chain_decl)
12966 warning (OPT_Wcannot_profile, "nested functions cannot be profiled "
12967 "with %<-mfentry%> on s390");
12968 else
12969 output_asm_insn ("brasl\t0,%4", op);
12970 }
12971 else if (TARGET_64BIT)
12972 {
12973 if (flag_nop_mcount)
12974 output_asm_nops ("-mnop-mcount", /* stg */ 3 + /* larl */ 3 +
12975 /* brasl */ 3 + /* lg */ 3);
12976 else
12977 {
12978 output_asm_insn ("stg\t%0,%1", op);
12979 if (flag_dwarf2_cfi_asm)
12980 output_asm_insn (".cfi_rel_offset\t%0,%7", op);
12981 output_asm_insn ("larl\t%2,%3", op);
12982 output_asm_insn ("brasl\t%0,%4", op);
12983 output_asm_insn ("lg\t%0,%1", op);
12984 if (flag_dwarf2_cfi_asm)
12985 output_asm_insn (".cfi_restore\t%0", op);
12986 }
12987 }
12988 else
12989 {
12990 if (flag_nop_mcount)
12991 output_asm_nops ("-mnop-mcount", /* st */ 2 + /* larl */ 3 +
12992 /* brasl */ 3 + /* l */ 2);
12993 else
12994 {
12995 output_asm_insn ("st\t%0,%1", op);
12996 if (flag_dwarf2_cfi_asm)
12997 output_asm_insn (".cfi_rel_offset\t%0,%7", op);
12998 output_asm_insn ("larl\t%2,%3", op);
12999 output_asm_insn ("brasl\t%0,%4", op);
13000 output_asm_insn ("l\t%0,%1", op);
13001 if (flag_dwarf2_cfi_asm)
13002 output_asm_insn (".cfi_restore\t%0", op);
13003 }
13004 }
13005
13006 if (flag_record_mcount)
13007 {
13008 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
13009 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
13010 fprintf (file, "\t.previous\n");
13011 }
13012 }
13013
13014 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
13015 into its SYMBOL_REF_FLAGS. */
13016
13017 static void
s390_encode_section_info(tree decl,rtx rtl,int first)13018 s390_encode_section_info (tree decl, rtx rtl, int first)
13019 {
13020 default_encode_section_info (decl, rtl, first);
13021
13022 if (TREE_CODE (decl) == VAR_DECL)
13023 {
13024 /* Store the alignment to be able to check if we can use
13025 a larl/load-relative instruction. We only handle the cases
13026 that can go wrong (i.e. no FUNC_DECLs). */
13027 if (DECL_ALIGN (decl) == 0 || DECL_ALIGN (decl) % 16)
13028 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
13029 else if (DECL_ALIGN (decl) % 32)
13030 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
13031 else if (DECL_ALIGN (decl) % 64)
13032 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
13033 }
13034
13035 /* Literal pool references don't have a decl so they are handled
13036 differently here. We rely on the information in the MEM_ALIGN
13037 entry to decide upon the alignment. */
13038 if (MEM_P (rtl)
13039 && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF
13040 && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0)))
13041 {
13042 if (MEM_ALIGN (rtl) == 0 || MEM_ALIGN (rtl) % 16)
13043 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
13044 else if (MEM_ALIGN (rtl) % 32)
13045 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
13046 else if (MEM_ALIGN (rtl) % 64)
13047 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
13048 }
13049 }
13050
13051 /* Output thunk to FILE that implements a C++ virtual function call (with
13052 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
13053 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
13054 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
13055 relative to the resulting this pointer. */
13056
13057 static void
s390_output_mi_thunk(FILE * file,tree thunk ATTRIBUTE_UNUSED,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)13058 s390_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
13059 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
13060 tree function)
13061 {
13062 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
13063 rtx op[10];
13064 int nonlocal = 0;
13065
13066 assemble_start_function (thunk, fnname);
13067 /* Make sure unwind info is emitted for the thunk if needed. */
13068 final_start_function (emit_barrier (), file, 1);
13069
13070 /* Operand 0 is the target function. */
13071 op[0] = XEXP (DECL_RTL (function), 0);
13072 if (flag_pic && !SYMBOL_REF_LOCAL_P (op[0]))
13073 {
13074 nonlocal = 1;
13075 op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]),
13076 TARGET_64BIT ? UNSPEC_PLT : UNSPEC_GOT);
13077 op[0] = gen_rtx_CONST (Pmode, op[0]);
13078 }
13079
13080 /* Operand 1 is the 'this' pointer. */
13081 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
13082 op[1] = gen_rtx_REG (Pmode, 3);
13083 else
13084 op[1] = gen_rtx_REG (Pmode, 2);
13085
13086 /* Operand 2 is the delta. */
13087 op[2] = GEN_INT (delta);
13088
13089 /* Operand 3 is the vcall_offset. */
13090 op[3] = GEN_INT (vcall_offset);
13091
13092 /* Operand 4 is the temporary register. */
13093 op[4] = gen_rtx_REG (Pmode, 1);
13094
13095 /* Operands 5 to 8 can be used as labels. */
13096 op[5] = NULL_RTX;
13097 op[6] = NULL_RTX;
13098 op[7] = NULL_RTX;
13099 op[8] = NULL_RTX;
13100
13101 /* Operand 9 can be used for temporary register. */
13102 op[9] = NULL_RTX;
13103
13104 /* Generate code. */
13105 if (TARGET_64BIT)
13106 {
13107 /* Setup literal pool pointer if required. */
13108 if ((!DISP_IN_RANGE (delta)
13109 && !CONST_OK_FOR_K (delta)
13110 && !CONST_OK_FOR_Os (delta))
13111 || (!DISP_IN_RANGE (vcall_offset)
13112 && !CONST_OK_FOR_K (vcall_offset)
13113 && !CONST_OK_FOR_Os (vcall_offset)))
13114 {
13115 op[5] = gen_label_rtx ();
13116 output_asm_insn ("larl\t%4,%5", op);
13117 }
13118
13119 /* Add DELTA to this pointer. */
13120 if (delta)
13121 {
13122 if (CONST_OK_FOR_J (delta))
13123 output_asm_insn ("la\t%1,%2(%1)", op);
13124 else if (DISP_IN_RANGE (delta))
13125 output_asm_insn ("lay\t%1,%2(%1)", op);
13126 else if (CONST_OK_FOR_K (delta))
13127 output_asm_insn ("aghi\t%1,%2", op);
13128 else if (CONST_OK_FOR_Os (delta))
13129 output_asm_insn ("agfi\t%1,%2", op);
13130 else
13131 {
13132 op[6] = gen_label_rtx ();
13133 output_asm_insn ("agf\t%1,%6-%5(%4)", op);
13134 }
13135 }
13136
13137 /* Perform vcall adjustment. */
13138 if (vcall_offset)
13139 {
13140 if (DISP_IN_RANGE (vcall_offset))
13141 {
13142 output_asm_insn ("lg\t%4,0(%1)", op);
13143 output_asm_insn ("ag\t%1,%3(%4)", op);
13144 }
13145 else if (CONST_OK_FOR_K (vcall_offset))
13146 {
13147 output_asm_insn ("lghi\t%4,%3", op);
13148 output_asm_insn ("ag\t%4,0(%1)", op);
13149 output_asm_insn ("ag\t%1,0(%4)", op);
13150 }
13151 else if (CONST_OK_FOR_Os (vcall_offset))
13152 {
13153 output_asm_insn ("lgfi\t%4,%3", op);
13154 output_asm_insn ("ag\t%4,0(%1)", op);
13155 output_asm_insn ("ag\t%1,0(%4)", op);
13156 }
13157 else
13158 {
13159 op[7] = gen_label_rtx ();
13160 output_asm_insn ("llgf\t%4,%7-%5(%4)", op);
13161 output_asm_insn ("ag\t%4,0(%1)", op);
13162 output_asm_insn ("ag\t%1,0(%4)", op);
13163 }
13164 }
13165
13166 /* Jump to target. */
13167 output_asm_insn ("jg\t%0", op);
13168
13169 /* Output literal pool if required. */
13170 if (op[5])
13171 {
13172 output_asm_insn (".align\t4", op);
13173 targetm.asm_out.internal_label (file, "L",
13174 CODE_LABEL_NUMBER (op[5]));
13175 }
13176 if (op[6])
13177 {
13178 targetm.asm_out.internal_label (file, "L",
13179 CODE_LABEL_NUMBER (op[6]));
13180 output_asm_insn (".long\t%2", op);
13181 }
13182 if (op[7])
13183 {
13184 targetm.asm_out.internal_label (file, "L",
13185 CODE_LABEL_NUMBER (op[7]));
13186 output_asm_insn (".long\t%3", op);
13187 }
13188 }
13189 else
13190 {
13191 /* Setup base pointer if required. */
13192 if (!vcall_offset
13193 || (!DISP_IN_RANGE (delta)
13194 && !CONST_OK_FOR_K (delta)
13195 && !CONST_OK_FOR_Os (delta))
13196 || (!DISP_IN_RANGE (delta)
13197 && !CONST_OK_FOR_K (vcall_offset)
13198 && !CONST_OK_FOR_Os (vcall_offset)))
13199 {
13200 op[5] = gen_label_rtx ();
13201 output_asm_insn ("basr\t%4,0", op);
13202 targetm.asm_out.internal_label (file, "L",
13203 CODE_LABEL_NUMBER (op[5]));
13204 }
13205
13206 /* Add DELTA to this pointer. */
13207 if (delta)
13208 {
13209 if (CONST_OK_FOR_J (delta))
13210 output_asm_insn ("la\t%1,%2(%1)", op);
13211 else if (DISP_IN_RANGE (delta))
13212 output_asm_insn ("lay\t%1,%2(%1)", op);
13213 else if (CONST_OK_FOR_K (delta))
13214 output_asm_insn ("ahi\t%1,%2", op);
13215 else if (CONST_OK_FOR_Os (delta))
13216 output_asm_insn ("afi\t%1,%2", op);
13217 else
13218 {
13219 op[6] = gen_label_rtx ();
13220 output_asm_insn ("a\t%1,%6-%5(%4)", op);
13221 }
13222 }
13223
13224 /* Perform vcall adjustment. */
13225 if (vcall_offset)
13226 {
13227 if (CONST_OK_FOR_J (vcall_offset))
13228 {
13229 output_asm_insn ("l\t%4,0(%1)", op);
13230 output_asm_insn ("a\t%1,%3(%4)", op);
13231 }
13232 else if (DISP_IN_RANGE (vcall_offset))
13233 {
13234 output_asm_insn ("l\t%4,0(%1)", op);
13235 output_asm_insn ("ay\t%1,%3(%4)", op);
13236 }
13237 else if (CONST_OK_FOR_K (vcall_offset))
13238 {
13239 output_asm_insn ("lhi\t%4,%3", op);
13240 output_asm_insn ("a\t%4,0(%1)", op);
13241 output_asm_insn ("a\t%1,0(%4)", op);
13242 }
13243 else if (CONST_OK_FOR_Os (vcall_offset))
13244 {
13245 output_asm_insn ("iilf\t%4,%3", op);
13246 output_asm_insn ("a\t%4,0(%1)", op);
13247 output_asm_insn ("a\t%1,0(%4)", op);
13248 }
13249 else
13250 {
13251 op[7] = gen_label_rtx ();
13252 output_asm_insn ("l\t%4,%7-%5(%4)", op);
13253 output_asm_insn ("a\t%4,0(%1)", op);
13254 output_asm_insn ("a\t%1,0(%4)", op);
13255 }
13256
13257 /* We had to clobber the base pointer register.
13258 Re-setup the base pointer (with a different base). */
13259 op[5] = gen_label_rtx ();
13260 output_asm_insn ("basr\t%4,0", op);
13261 targetm.asm_out.internal_label (file, "L",
13262 CODE_LABEL_NUMBER (op[5]));
13263 }
13264
13265 /* Jump to target. */
13266 op[8] = gen_label_rtx ();
13267
13268 if (!flag_pic)
13269 output_asm_insn ("l\t%4,%8-%5(%4)", op);
13270 else if (!nonlocal)
13271 output_asm_insn ("a\t%4,%8-%5(%4)", op);
13272 /* We cannot call through .plt, since .plt requires %r12 loaded. */
13273 else if (flag_pic == 1)
13274 {
13275 output_asm_insn ("a\t%4,%8-%5(%4)", op);
13276 output_asm_insn ("l\t%4,%0(%4)", op);
13277 }
13278 else if (flag_pic == 2)
13279 {
13280 op[9] = gen_rtx_REG (Pmode, 0);
13281 output_asm_insn ("l\t%9,%8-4-%5(%4)", op);
13282 output_asm_insn ("a\t%4,%8-%5(%4)", op);
13283 output_asm_insn ("ar\t%4,%9", op);
13284 output_asm_insn ("l\t%4,0(%4)", op);
13285 }
13286
13287 output_asm_insn ("br\t%4", op);
13288
13289 /* Output literal pool. */
13290 output_asm_insn (".align\t4", op);
13291
13292 if (nonlocal && flag_pic == 2)
13293 output_asm_insn (".long\t%0", op);
13294 if (nonlocal)
13295 {
13296 op[0] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
13297 SYMBOL_REF_FLAGS (op[0]) = SYMBOL_FLAG_LOCAL;
13298 }
13299
13300 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[8]));
13301 if (!flag_pic)
13302 output_asm_insn (".long\t%0", op);
13303 else
13304 output_asm_insn (".long\t%0-%5", op);
13305
13306 if (op[6])
13307 {
13308 targetm.asm_out.internal_label (file, "L",
13309 CODE_LABEL_NUMBER (op[6]));
13310 output_asm_insn (".long\t%2", op);
13311 }
13312 if (op[7])
13313 {
13314 targetm.asm_out.internal_label (file, "L",
13315 CODE_LABEL_NUMBER (op[7]));
13316 output_asm_insn (".long\t%3", op);
13317 }
13318 }
13319 final_end_function ();
13320 assemble_end_function (thunk, fnname);
13321 }
13322
13323 /* Output either an indirect jump or an indirect call
13324 (RETURN_ADDR_REGNO != INVALID_REGNUM) with target register REGNO
13325 using a branch trampoline disabling branch target prediction. */
13326
13327 void
s390_indirect_branch_via_thunk(unsigned int regno,unsigned int return_addr_regno,rtx comparison_operator,enum s390_indirect_branch_type type)13328 s390_indirect_branch_via_thunk (unsigned int regno,
13329 unsigned int return_addr_regno,
13330 rtx comparison_operator,
13331 enum s390_indirect_branch_type type)
13332 {
13333 enum s390_indirect_branch_option option;
13334
13335 if (type == s390_indirect_branch_type_return)
13336 {
13337 if (s390_return_addr_from_memory ())
13338 option = s390_opt_function_return_mem;
13339 else
13340 option = s390_opt_function_return_reg;
13341 }
13342 else if (type == s390_indirect_branch_type_jump)
13343 option = s390_opt_indirect_branch_jump;
13344 else if (type == s390_indirect_branch_type_call)
13345 option = s390_opt_indirect_branch_call;
13346 else
13347 gcc_unreachable ();
13348
13349 if (TARGET_INDIRECT_BRANCH_TABLE)
13350 {
13351 char label[32];
13352
13353 ASM_GENERATE_INTERNAL_LABEL (label,
13354 indirect_branch_table_label[option],
13355 indirect_branch_table_label_no[option]++);
13356 ASM_OUTPUT_LABEL (asm_out_file, label);
13357 }
13358
13359 if (return_addr_regno != INVALID_REGNUM)
13360 {
13361 gcc_assert (comparison_operator == NULL_RTX);
13362 fprintf (asm_out_file, " \tbrasl\t%%r%d,", return_addr_regno);
13363 }
13364 else
13365 {
13366 fputs (" \tjg", asm_out_file);
13367 if (comparison_operator != NULL_RTX)
13368 print_operand (asm_out_file, comparison_operator, 'C');
13369
13370 fputs ("\t", asm_out_file);
13371 }
13372
13373 if (TARGET_CPU_Z10)
13374 fprintf (asm_out_file,
13375 TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL "\n",
13376 regno);
13377 else
13378 fprintf (asm_out_file,
13379 TARGET_INDIRECT_BRANCH_THUNK_NAME_EX "\n",
13380 INDIRECT_BRANCH_THUNK_REGNUM, regno);
13381
13382 if ((option == s390_opt_indirect_branch_jump
13383 && cfun->machine->indirect_branch_jump == indirect_branch_thunk)
13384 || (option == s390_opt_indirect_branch_call
13385 && cfun->machine->indirect_branch_call == indirect_branch_thunk)
13386 || (option == s390_opt_function_return_reg
13387 && cfun->machine->function_return_reg == indirect_branch_thunk)
13388 || (option == s390_opt_function_return_mem
13389 && cfun->machine->function_return_mem == indirect_branch_thunk))
13390 {
13391 if (TARGET_CPU_Z10)
13392 indirect_branch_z10thunk_mask |= (1 << regno);
13393 else
13394 indirect_branch_prez10thunk_mask |= (1 << regno);
13395 }
13396 }
13397
13398 /* Output an inline thunk for indirect jumps. EXECUTE_TARGET can
13399 either be an address register or a label pointing to the location
13400 of the jump instruction. */
13401
13402 void
s390_indirect_branch_via_inline_thunk(rtx execute_target)13403 s390_indirect_branch_via_inline_thunk (rtx execute_target)
13404 {
13405 if (TARGET_INDIRECT_BRANCH_TABLE)
13406 {
13407 char label[32];
13408
13409 ASM_GENERATE_INTERNAL_LABEL (label,
13410 indirect_branch_table_label[s390_opt_indirect_branch_jump],
13411 indirect_branch_table_label_no[s390_opt_indirect_branch_jump]++);
13412 ASM_OUTPUT_LABEL (asm_out_file, label);
13413 }
13414
13415 if (!TARGET_ZARCH)
13416 fputs ("\t.machinemode zarch\n", asm_out_file);
13417
13418 if (REG_P (execute_target))
13419 fprintf (asm_out_file, "\tex\t%%r0,0(%%r%d)\n", REGNO (execute_target));
13420 else
13421 output_asm_insn ("\texrl\t%%r0,%0", &execute_target);
13422
13423 if (!TARGET_ZARCH)
13424 fputs ("\t.machinemode esa\n", asm_out_file);
13425
13426 fputs ("0:\tj\t0b\n", asm_out_file);
13427 }
13428
13429 static bool
s390_valid_pointer_mode(scalar_int_mode mode)13430 s390_valid_pointer_mode (scalar_int_mode mode)
13431 {
13432 return (mode == SImode || (TARGET_64BIT && mode == DImode));
13433 }
13434
13435 /* Checks whether the given CALL_EXPR would use a caller
13436 saved register. This is used to decide whether sibling call
13437 optimization could be performed on the respective function
13438 call. */
13439
13440 static bool
s390_call_saved_register_used(tree call_expr)13441 s390_call_saved_register_used (tree call_expr)
13442 {
13443 CUMULATIVE_ARGS cum_v;
13444 cumulative_args_t cum;
13445 tree parameter;
13446 rtx parm_rtx;
13447 int reg, i;
13448
13449 INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
13450 cum = pack_cumulative_args (&cum_v);
13451
13452 for (i = 0; i < call_expr_nargs (call_expr); i++)
13453 {
13454 parameter = CALL_EXPR_ARG (call_expr, i);
13455 gcc_assert (parameter);
13456
13457 /* For an undeclared variable passed as parameter we will get
13458 an ERROR_MARK node here. */
13459 if (TREE_CODE (parameter) == ERROR_MARK)
13460 return true;
13461
13462 /* We assume that in the target function all parameters are
13463 named. This only has an impact on vector argument register
13464 usage none of which is call-saved. */
13465 function_arg_info arg (TREE_TYPE (parameter), /*named=*/true);
13466 apply_pass_by_reference_rules (&cum_v, arg);
13467
13468 parm_rtx = s390_function_arg (cum, arg);
13469
13470 s390_function_arg_advance (cum, arg);
13471
13472 if (!parm_rtx)
13473 continue;
13474
13475 if (REG_P (parm_rtx))
13476 {
13477 for (reg = 0; reg < REG_NREGS (parm_rtx); reg++)
13478 if (!call_used_or_fixed_reg_p (reg + REGNO (parm_rtx)))
13479 return true;
13480 }
13481
13482 if (GET_CODE (parm_rtx) == PARALLEL)
13483 {
13484 int i;
13485
13486 for (i = 0; i < XVECLEN (parm_rtx, 0); i++)
13487 {
13488 rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0);
13489
13490 gcc_assert (REG_P (r));
13491
13492 for (reg = 0; reg < REG_NREGS (r); reg++)
13493 if (!call_used_or_fixed_reg_p (reg + REGNO (r)))
13494 return true;
13495 }
13496 }
13497
13498 }
13499 return false;
13500 }
13501
13502 /* Return true if the given call expression can be
13503 turned into a sibling call.
13504 DECL holds the declaration of the function to be called whereas
13505 EXP is the call expression itself. */
13506
13507 static bool
s390_function_ok_for_sibcall(tree decl,tree exp)13508 s390_function_ok_for_sibcall (tree decl, tree exp)
13509 {
13510 /* The TPF epilogue uses register 1. */
13511 if (TARGET_TPF_PROFILING)
13512 return false;
13513
13514 /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
13515 which would have to be restored before the sibcall. */
13516 if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl))
13517 return false;
13518
13519 /* The thunks for indirect branches require r1 if no exrl is
13520 available. r1 might not be available when doing a sibling
13521 call. */
13522 if (TARGET_INDIRECT_BRANCH_NOBP_CALL
13523 && !TARGET_CPU_Z10
13524 && !decl)
13525 return false;
13526
13527 /* Register 6 on s390 is available as an argument register but unfortunately
13528 "caller saved". This makes functions needing this register for arguments
13529 not suitable for sibcalls. */
13530 return !s390_call_saved_register_used (exp);
13531 }
13532
13533 /* Return the fixed registers used for condition codes. */
13534
13535 static bool
s390_fixed_condition_code_regs(unsigned int * p1,unsigned int * p2)13536 s390_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
13537 {
13538 *p1 = CC_REGNUM;
13539 *p2 = INVALID_REGNUM;
13540
13541 return true;
13542 }
13543
13544 /* This function is used by the call expanders of the machine description.
13545 It emits the call insn itself together with the necessary operations
13546 to adjust the target address and returns the emitted insn.
13547 ADDR_LOCATION is the target address rtx
13548 TLS_CALL the location of the thread-local symbol
13549 RESULT_REG the register where the result of the call should be stored
13550 RETADDR_REG the register where the return address should be stored
13551 If this parameter is NULL_RTX the call is considered
13552 to be a sibling call. */
13553
13554 rtx_insn *
s390_emit_call(rtx addr_location,rtx tls_call,rtx result_reg,rtx retaddr_reg)13555 s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg,
13556 rtx retaddr_reg)
13557 {
13558 bool plt_call = false;
13559 rtx_insn *insn;
13560 rtx vec[4] = { NULL_RTX };
13561 int elts = 0;
13562 rtx *call = &vec[0];
13563 rtx *clobber_ret_reg = &vec[1];
13564 rtx *use = &vec[2];
13565 rtx *clobber_thunk_reg = &vec[3];
13566 int i;
13567
13568 /* Direct function calls need special treatment. */
13569 if (GET_CODE (addr_location) == SYMBOL_REF)
13570 {
13571 /* When calling a global routine in PIC mode, we must
13572 replace the symbol itself with the PLT stub. */
13573 if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location))
13574 {
13575 if (TARGET_64BIT || retaddr_reg != NULL_RTX)
13576 {
13577 addr_location = gen_rtx_UNSPEC (Pmode,
13578 gen_rtvec (1, addr_location),
13579 UNSPEC_PLT);
13580 addr_location = gen_rtx_CONST (Pmode, addr_location);
13581 plt_call = true;
13582 }
13583 else
13584 /* For -fpic code the PLT entries might use r12 which is
13585 call-saved. Therefore we cannot do a sibcall when
13586 calling directly using a symbol ref. When reaching
13587 this point we decided (in s390_function_ok_for_sibcall)
13588 to do a sibcall for a function pointer but one of the
13589 optimizers was able to get rid of the function pointer
13590 by propagating the symbol ref into the call. This
13591 optimization is illegal for S/390 so we turn the direct
13592 call into a indirect call again. */
13593 addr_location = force_reg (Pmode, addr_location);
13594 }
13595 }
13596
13597 /* If it is already an indirect call or the code above moved the
13598 SYMBOL_REF to somewhere else make sure the address can be found in
13599 register 1. */
13600 if (retaddr_reg == NULL_RTX
13601 && GET_CODE (addr_location) != SYMBOL_REF
13602 && !plt_call)
13603 {
13604 emit_move_insn (gen_rtx_REG (Pmode, SIBCALL_REGNUM), addr_location);
13605 addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM);
13606 }
13607
13608 if (TARGET_INDIRECT_BRANCH_NOBP_CALL
13609 && GET_CODE (addr_location) != SYMBOL_REF
13610 && !plt_call)
13611 {
13612 /* Indirect branch thunks require the target to be a single GPR. */
13613 addr_location = force_reg (Pmode, addr_location);
13614
13615 /* Without exrl the indirect branch thunks need an additional
13616 register for larl;ex */
13617 if (!TARGET_CPU_Z10)
13618 {
13619 *clobber_thunk_reg = gen_rtx_REG (Pmode, INDIRECT_BRANCH_THUNK_REGNUM);
13620 *clobber_thunk_reg = gen_rtx_CLOBBER (VOIDmode, *clobber_thunk_reg);
13621 }
13622 }
13623
13624 addr_location = gen_rtx_MEM (QImode, addr_location);
13625 *call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx);
13626
13627 if (result_reg != NULL_RTX)
13628 *call = gen_rtx_SET (result_reg, *call);
13629
13630 if (retaddr_reg != NULL_RTX)
13631 {
13632 *clobber_ret_reg = gen_rtx_CLOBBER (VOIDmode, retaddr_reg);
13633
13634 if (tls_call != NULL_RTX)
13635 *use = gen_rtx_USE (VOIDmode, tls_call);
13636 }
13637
13638
13639 for (i = 0; i < 4; i++)
13640 if (vec[i] != NULL_RTX)
13641 elts++;
13642
13643 if (elts > 1)
13644 {
13645 rtvec v;
13646 int e = 0;
13647
13648 v = rtvec_alloc (elts);
13649 for (i = 0; i < 4; i++)
13650 if (vec[i] != NULL_RTX)
13651 {
13652 RTVEC_ELT (v, e) = vec[i];
13653 e++;
13654 }
13655
13656 *call = gen_rtx_PARALLEL (VOIDmode, v);
13657 }
13658
13659 insn = emit_call_insn (*call);
13660
13661 /* 31-bit PLT stubs and tls calls use the GOT register implicitly. */
13662 if ((!TARGET_64BIT && plt_call) || tls_call != NULL_RTX)
13663 {
13664 /* s390_function_ok_for_sibcall should
13665 have denied sibcalls in this case. */
13666 gcc_assert (retaddr_reg != NULL_RTX);
13667 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, 12));
13668 }
13669 return insn;
13670 }
13671
13672 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
13673
13674 static void
s390_conditional_register_usage(void)13675 s390_conditional_register_usage (void)
13676 {
13677 int i;
13678
13679 if (flag_pic)
13680 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13681 fixed_regs[BASE_REGNUM] = 0;
13682 fixed_regs[RETURN_REGNUM] = 0;
13683 if (TARGET_64BIT)
13684 {
13685 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
13686 call_used_regs[i] = 0;
13687 }
13688 else
13689 {
13690 call_used_regs[FPR4_REGNUM] = 0;
13691 call_used_regs[FPR6_REGNUM] = 0;
13692 }
13693
13694 if (TARGET_SOFT_FLOAT)
13695 {
13696 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
13697 fixed_regs[i] = 1;
13698 }
13699
13700 /* Disable v16 - v31 for non-vector target. */
13701 if (!TARGET_VX)
13702 {
13703 for (i = VR16_REGNUM; i <= VR31_REGNUM; i++)
13704 fixed_regs[i] = call_used_regs[i] = 1;
13705 }
13706 }
13707
13708 /* Corresponding function to eh_return expander. */
13709
13710 static GTY(()) rtx s390_tpf_eh_return_symbol;
13711 void
s390_emit_tpf_eh_return(rtx target)13712 s390_emit_tpf_eh_return (rtx target)
13713 {
13714 rtx_insn *insn;
13715 rtx reg, orig_ra;
13716
13717 if (!s390_tpf_eh_return_symbol)
13718 s390_tpf_eh_return_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tpf_eh_return");
13719
13720 reg = gen_rtx_REG (Pmode, 2);
13721 orig_ra = gen_rtx_REG (Pmode, 3);
13722
13723 emit_move_insn (reg, target);
13724 emit_move_insn (orig_ra, get_hard_reg_initial_val (Pmode, RETURN_REGNUM));
13725 insn = s390_emit_call (s390_tpf_eh_return_symbol, NULL_RTX, reg,
13726 gen_rtx_REG (Pmode, RETURN_REGNUM));
13727 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
13728 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), orig_ra);
13729
13730 emit_move_insn (EH_RETURN_HANDLER_RTX, reg);
13731 }
13732
13733 /* Rework the prologue/epilogue to avoid saving/restoring
13734 registers unnecessarily. */
13735
13736 static void
s390_optimize_prologue(void)13737 s390_optimize_prologue (void)
13738 {
13739 rtx_insn *insn, *new_insn, *next_insn;
13740
13741 /* Do a final recompute of the frame-related data. */
13742 s390_optimize_register_info ();
13743
13744 /* If all special registers are in fact used, there's nothing we
13745 can do, so no point in walking the insn list. */
13746
13747 if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM
13748 && cfun_frame_layout.last_save_gpr >= BASE_REGNUM)
13749 return;
13750
13751 /* Search for prologue/epilogue insns and replace them. */
13752 for (insn = get_insns (); insn; insn = next_insn)
13753 {
13754 int first, last, off;
13755 rtx set, base, offset;
13756 rtx pat;
13757
13758 next_insn = NEXT_INSN (insn);
13759
13760 if (! NONJUMP_INSN_P (insn) || ! RTX_FRAME_RELATED_P (insn))
13761 continue;
13762
13763 pat = PATTERN (insn);
13764
13765 /* Remove ldgr/lgdr instructions used for saving and restore
13766 GPRs if possible. */
13767 if (TARGET_Z10)
13768 {
13769 rtx tmp_pat = pat;
13770
13771 if (INSN_CODE (insn) == CODE_FOR_stack_restore_from_fpr)
13772 tmp_pat = XVECEXP (pat, 0, 0);
13773
13774 if (GET_CODE (tmp_pat) == SET
13775 && GET_MODE (SET_SRC (tmp_pat)) == DImode
13776 && REG_P (SET_SRC (tmp_pat))
13777 && REG_P (SET_DEST (tmp_pat)))
13778 {
13779 int src_regno = REGNO (SET_SRC (tmp_pat));
13780 int dest_regno = REGNO (SET_DEST (tmp_pat));
13781 int gpr_regno;
13782 int fpr_regno;
13783
13784 if (!((GENERAL_REGNO_P (src_regno)
13785 && FP_REGNO_P (dest_regno))
13786 || (FP_REGNO_P (src_regno)
13787 && GENERAL_REGNO_P (dest_regno))))
13788 continue;
13789
13790 gpr_regno = GENERAL_REGNO_P (src_regno) ? src_regno : dest_regno;
13791 fpr_regno = FP_REGNO_P (src_regno) ? src_regno : dest_regno;
13792
13793 /* GPR must be call-saved, FPR must be call-clobbered. */
13794 if (!call_used_regs[fpr_regno]
13795 || call_used_regs[gpr_regno])
13796 continue;
13797
13798 /* It must not happen that what we once saved in an FPR now
13799 needs a stack slot. */
13800 gcc_assert (cfun_gpr_save_slot (gpr_regno) != SAVE_SLOT_STACK);
13801
13802 if (cfun_gpr_save_slot (gpr_regno) == SAVE_SLOT_NONE)
13803 {
13804 remove_insn (insn);
13805 continue;
13806 }
13807 }
13808 }
13809
13810 if (GET_CODE (pat) == PARALLEL
13811 && store_multiple_operation (pat, VOIDmode))
13812 {
13813 set = XVECEXP (pat, 0, 0);
13814 first = REGNO (SET_SRC (set));
13815 last = first + XVECLEN (pat, 0) - 1;
13816 offset = const0_rtx;
13817 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
13818 off = INTVAL (offset);
13819
13820 if (GET_CODE (base) != REG || off < 0)
13821 continue;
13822 if (cfun_frame_layout.first_save_gpr != -1
13823 && (cfun_frame_layout.first_save_gpr < first
13824 || cfun_frame_layout.last_save_gpr > last))
13825 continue;
13826 if (REGNO (base) != STACK_POINTER_REGNUM
13827 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13828 continue;
13829 if (first > BASE_REGNUM || last < BASE_REGNUM)
13830 continue;
13831
13832 if (cfun_frame_layout.first_save_gpr != -1)
13833 {
13834 rtx s_pat = save_gprs (base,
13835 off + (cfun_frame_layout.first_save_gpr
13836 - first) * UNITS_PER_LONG,
13837 cfun_frame_layout.first_save_gpr,
13838 cfun_frame_layout.last_save_gpr);
13839 new_insn = emit_insn_before (s_pat, insn);
13840 INSN_ADDRESSES_NEW (new_insn, -1);
13841 }
13842
13843 remove_insn (insn);
13844 continue;
13845 }
13846
13847 if (cfun_frame_layout.first_save_gpr == -1
13848 && GET_CODE (pat) == SET
13849 && GENERAL_REG_P (SET_SRC (pat))
13850 && GET_CODE (SET_DEST (pat)) == MEM)
13851 {
13852 set = pat;
13853 first = REGNO (SET_SRC (set));
13854 offset = const0_rtx;
13855 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
13856 off = INTVAL (offset);
13857
13858 if (GET_CODE (base) != REG || off < 0)
13859 continue;
13860 if (REGNO (base) != STACK_POINTER_REGNUM
13861 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13862 continue;
13863
13864 remove_insn (insn);
13865 continue;
13866 }
13867
13868 if (GET_CODE (pat) == PARALLEL
13869 && load_multiple_operation (pat, VOIDmode))
13870 {
13871 set = XVECEXP (pat, 0, 0);
13872 first = REGNO (SET_DEST (set));
13873 last = first + XVECLEN (pat, 0) - 1;
13874 offset = const0_rtx;
13875 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
13876 off = INTVAL (offset);
13877
13878 if (GET_CODE (base) != REG || off < 0)
13879 continue;
13880
13881 if (cfun_frame_layout.first_restore_gpr != -1
13882 && (cfun_frame_layout.first_restore_gpr < first
13883 || cfun_frame_layout.last_restore_gpr > last))
13884 continue;
13885 if (REGNO (base) != STACK_POINTER_REGNUM
13886 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13887 continue;
13888 if (first > BASE_REGNUM || last < BASE_REGNUM)
13889 continue;
13890
13891 if (cfun_frame_layout.first_restore_gpr != -1)
13892 {
13893 rtx rpat = restore_gprs (base,
13894 off + (cfun_frame_layout.first_restore_gpr
13895 - first) * UNITS_PER_LONG,
13896 cfun_frame_layout.first_restore_gpr,
13897 cfun_frame_layout.last_restore_gpr);
13898
13899 /* Remove REG_CFA_RESTOREs for registers that we no
13900 longer need to save. */
13901 REG_NOTES (rpat) = REG_NOTES (insn);
13902 for (rtx *ptr = ®_NOTES (rpat); *ptr; )
13903 if (REG_NOTE_KIND (*ptr) == REG_CFA_RESTORE
13904 && ((int) REGNO (XEXP (*ptr, 0))
13905 < cfun_frame_layout.first_restore_gpr))
13906 *ptr = XEXP (*ptr, 1);
13907 else
13908 ptr = &XEXP (*ptr, 1);
13909 new_insn = emit_insn_before (rpat, insn);
13910 RTX_FRAME_RELATED_P (new_insn) = 1;
13911 INSN_ADDRESSES_NEW (new_insn, -1);
13912 }
13913
13914 remove_insn (insn);
13915 continue;
13916 }
13917
13918 if (cfun_frame_layout.first_restore_gpr == -1
13919 && GET_CODE (pat) == SET
13920 && GENERAL_REG_P (SET_DEST (pat))
13921 && GET_CODE (SET_SRC (pat)) == MEM)
13922 {
13923 set = pat;
13924 first = REGNO (SET_DEST (set));
13925 offset = const0_rtx;
13926 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
13927 off = INTVAL (offset);
13928
13929 if (GET_CODE (base) != REG || off < 0)
13930 continue;
13931
13932 if (REGNO (base) != STACK_POINTER_REGNUM
13933 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13934 continue;
13935
13936 remove_insn (insn);
13937 continue;
13938 }
13939 }
13940 }
13941
13942 /* On z10 and later the dynamic branch prediction must see the
13943 backward jump within a certain windows. If not it falls back to
13944 the static prediction. This function rearranges the loop backward
13945 branch in a way which makes the static prediction always correct.
13946 The function returns true if it added an instruction. */
13947 static bool
s390_fix_long_loop_prediction(rtx_insn * insn)13948 s390_fix_long_loop_prediction (rtx_insn *insn)
13949 {
13950 rtx set = single_set (insn);
13951 rtx code_label, label_ref;
13952 rtx_insn *uncond_jump;
13953 rtx_insn *cur_insn;
13954 rtx tmp;
13955 int distance;
13956
13957 /* This will exclude branch on count and branch on index patterns
13958 since these are correctly statically predicted. */
13959 if (!set
13960 || SET_DEST (set) != pc_rtx
13961 || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
13962 return false;
13963
13964 /* Skip conditional returns. */
13965 if (ANY_RETURN_P (XEXP (SET_SRC (set), 1))
13966 && XEXP (SET_SRC (set), 2) == pc_rtx)
13967 return false;
13968
13969 label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
13970 XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
13971
13972 gcc_assert (GET_CODE (label_ref) == LABEL_REF);
13973
13974 code_label = XEXP (label_ref, 0);
13975
13976 if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
13977 || INSN_ADDRESSES (INSN_UID (insn)) == -1
13978 || (INSN_ADDRESSES (INSN_UID (insn))
13979 - INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE))
13980 return false;
13981
13982 for (distance = 0, cur_insn = PREV_INSN (insn);
13983 distance < PREDICT_DISTANCE - 6;
13984 distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
13985 if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
13986 return false;
13987
13988 rtx_code_label *new_label = gen_label_rtx ();
13989 uncond_jump = emit_jump_insn_after (
13990 gen_rtx_SET (pc_rtx,
13991 gen_rtx_LABEL_REF (VOIDmode, code_label)),
13992 insn);
13993 emit_label_after (new_label, uncond_jump);
13994
13995 tmp = XEXP (SET_SRC (set), 1);
13996 XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
13997 XEXP (SET_SRC (set), 2) = tmp;
13998 INSN_CODE (insn) = -1;
13999
14000 XEXP (label_ref, 0) = new_label;
14001 JUMP_LABEL (insn) = new_label;
14002 JUMP_LABEL (uncond_jump) = code_label;
14003
14004 return true;
14005 }
14006
14007 /* Returns 1 if INSN reads the value of REG for purposes not related
14008 to addressing of memory, and 0 otherwise. */
14009 static int
s390_non_addr_reg_read_p(rtx reg,rtx_insn * insn)14010 s390_non_addr_reg_read_p (rtx reg, rtx_insn *insn)
14011 {
14012 return reg_referenced_p (reg, PATTERN (insn))
14013 && !reg_used_in_mem_p (REGNO (reg), PATTERN (insn));
14014 }
14015
14016 /* Starting from INSN find_cond_jump looks downwards in the insn
14017 stream for a single jump insn which is the last user of the
14018 condition code set in INSN. */
14019 static rtx_insn *
find_cond_jump(rtx_insn * insn)14020 find_cond_jump (rtx_insn *insn)
14021 {
14022 for (; insn; insn = NEXT_INSN (insn))
14023 {
14024 rtx ite, cc;
14025
14026 if (LABEL_P (insn))
14027 break;
14028
14029 if (!JUMP_P (insn))
14030 {
14031 if (reg_mentioned_p (gen_rtx_REG (CCmode, CC_REGNUM), insn))
14032 break;
14033 continue;
14034 }
14035
14036 /* This will be triggered by a return. */
14037 if (GET_CODE (PATTERN (insn)) != SET)
14038 break;
14039
14040 gcc_assert (SET_DEST (PATTERN (insn)) == pc_rtx);
14041 ite = SET_SRC (PATTERN (insn));
14042
14043 if (GET_CODE (ite) != IF_THEN_ELSE)
14044 break;
14045
14046 cc = XEXP (XEXP (ite, 0), 0);
14047 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)))
14048 break;
14049
14050 if (find_reg_note (insn, REG_DEAD, cc))
14051 return insn;
14052 break;
14053 }
14054
14055 return NULL;
14056 }
14057
14058 /* Swap the condition in COND and the operands in OP0 and OP1 so that
14059 the semantics does not change. If NULL_RTX is passed as COND the
14060 function tries to find the conditional jump starting with INSN. */
14061 static void
s390_swap_cmp(rtx cond,rtx * op0,rtx * op1,rtx_insn * insn)14062 s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx_insn *insn)
14063 {
14064 rtx tmp = *op0;
14065
14066 if (cond == NULL_RTX)
14067 {
14068 rtx_insn *jump = find_cond_jump (NEXT_INSN (insn));
14069 rtx set = jump ? single_set (jump) : NULL_RTX;
14070
14071 if (set == NULL_RTX)
14072 return;
14073
14074 cond = XEXP (SET_SRC (set), 0);
14075 }
14076
14077 *op0 = *op1;
14078 *op1 = tmp;
14079 PUT_CODE (cond, swap_condition (GET_CODE (cond)));
14080 }
14081
14082 /* On z10, instructions of the compare-and-branch family have the
14083 property to access the register occurring as second operand with
14084 its bits complemented. If such a compare is grouped with a second
14085 instruction that accesses the same register non-complemented, and
14086 if that register's value is delivered via a bypass, then the
14087 pipeline recycles, thereby causing significant performance decline.
14088 This function locates such situations and exchanges the two
14089 operands of the compare. The function return true whenever it
14090 added an insn. */
14091 static bool
s390_z10_optimize_cmp(rtx_insn * insn)14092 s390_z10_optimize_cmp (rtx_insn *insn)
14093 {
14094 rtx_insn *prev_insn, *next_insn;
14095 bool insn_added_p = false;
14096 rtx cond, *op0, *op1;
14097
14098 if (GET_CODE (PATTERN (insn)) == PARALLEL)
14099 {
14100 /* Handle compare and branch and branch on count
14101 instructions. */
14102 rtx pattern = single_set (insn);
14103
14104 if (!pattern
14105 || SET_DEST (pattern) != pc_rtx
14106 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
14107 return false;
14108
14109 cond = XEXP (SET_SRC (pattern), 0);
14110 op0 = &XEXP (cond, 0);
14111 op1 = &XEXP (cond, 1);
14112 }
14113 else if (GET_CODE (PATTERN (insn)) == SET)
14114 {
14115 rtx src, dest;
14116
14117 /* Handle normal compare instructions. */
14118 src = SET_SRC (PATTERN (insn));
14119 dest = SET_DEST (PATTERN (insn));
14120
14121 if (!REG_P (dest)
14122 || !CC_REGNO_P (REGNO (dest))
14123 || GET_CODE (src) != COMPARE)
14124 return false;
14125
14126 /* s390_swap_cmp will try to find the conditional
14127 jump when passing NULL_RTX as condition. */
14128 cond = NULL_RTX;
14129 op0 = &XEXP (src, 0);
14130 op1 = &XEXP (src, 1);
14131 }
14132 else
14133 return false;
14134
14135 if (!REG_P (*op0) || !REG_P (*op1))
14136 return false;
14137
14138 if (GET_MODE_CLASS (GET_MODE (*op0)) != MODE_INT)
14139 return false;
14140
14141 /* Swap the COMPARE arguments and its mask if there is a
14142 conflicting access in the previous insn. */
14143 prev_insn = prev_active_insn (insn);
14144 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
14145 && reg_referenced_p (*op1, PATTERN (prev_insn)))
14146 s390_swap_cmp (cond, op0, op1, insn);
14147
14148 /* Check if there is a conflict with the next insn. If there
14149 was no conflict with the previous insn, then swap the
14150 COMPARE arguments and its mask. If we already swapped
14151 the operands, or if swapping them would cause a conflict
14152 with the previous insn, issue a NOP after the COMPARE in
14153 order to separate the two instuctions. */
14154 next_insn = next_active_insn (insn);
14155 if (next_insn != NULL_RTX && INSN_P (next_insn)
14156 && s390_non_addr_reg_read_p (*op1, next_insn))
14157 {
14158 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
14159 && s390_non_addr_reg_read_p (*op0, prev_insn))
14160 {
14161 if (REGNO (*op1) == 0)
14162 emit_insn_after (gen_nop_lr1 (), insn);
14163 else
14164 emit_insn_after (gen_nop_lr0 (), insn);
14165 insn_added_p = true;
14166 }
14167 else
14168 s390_swap_cmp (cond, op0, op1, insn);
14169 }
14170 return insn_added_p;
14171 }
14172
14173 /* Number of INSNs to be scanned backward in the last BB of the loop
14174 and forward in the first BB of the loop. This usually should be a
14175 bit more than the number of INSNs which could go into one
14176 group. */
14177 #define S390_OSC_SCAN_INSN_NUM 5
14178
14179 /* Scan LOOP for static OSC collisions and return true if a osc_break
14180 should be issued for this loop. */
14181 static bool
s390_adjust_loop_scan_osc(struct loop * loop)14182 s390_adjust_loop_scan_osc (struct loop* loop)
14183
14184 {
14185 HARD_REG_SET modregs, newregs;
14186 rtx_insn *insn, *store_insn = NULL;
14187 rtx set;
14188 struct s390_address addr_store, addr_load;
14189 subrtx_iterator::array_type array;
14190 int insn_count;
14191
14192 CLEAR_HARD_REG_SET (modregs);
14193
14194 insn_count = 0;
14195 FOR_BB_INSNS_REVERSE (loop->latch, insn)
14196 {
14197 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14198 continue;
14199
14200 insn_count++;
14201 if (insn_count > S390_OSC_SCAN_INSN_NUM)
14202 return false;
14203
14204 find_all_hard_reg_sets (insn, &newregs, true);
14205 modregs |= newregs;
14206
14207 set = single_set (insn);
14208 if (!set)
14209 continue;
14210
14211 if (MEM_P (SET_DEST (set))
14212 && s390_decompose_address (XEXP (SET_DEST (set), 0), &addr_store))
14213 {
14214 store_insn = insn;
14215 break;
14216 }
14217 }
14218
14219 if (store_insn == NULL_RTX)
14220 return false;
14221
14222 insn_count = 0;
14223 FOR_BB_INSNS (loop->header, insn)
14224 {
14225 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14226 continue;
14227
14228 if (insn == store_insn)
14229 return false;
14230
14231 insn_count++;
14232 if (insn_count > S390_OSC_SCAN_INSN_NUM)
14233 return false;
14234
14235 find_all_hard_reg_sets (insn, &newregs, true);
14236 modregs |= newregs;
14237
14238 set = single_set (insn);
14239 if (!set)
14240 continue;
14241
14242 /* An intermediate store disrupts static OSC checking
14243 anyway. */
14244 if (MEM_P (SET_DEST (set))
14245 && s390_decompose_address (XEXP (SET_DEST (set), 0), NULL))
14246 return false;
14247
14248 FOR_EACH_SUBRTX (iter, array, SET_SRC (set), NONCONST)
14249 if (MEM_P (*iter)
14250 && s390_decompose_address (XEXP (*iter, 0), &addr_load)
14251 && rtx_equal_p (addr_load.base, addr_store.base)
14252 && rtx_equal_p (addr_load.indx, addr_store.indx)
14253 && rtx_equal_p (addr_load.disp, addr_store.disp))
14254 {
14255 if ((addr_load.base != NULL_RTX
14256 && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.base)))
14257 || (addr_load.indx != NULL_RTX
14258 && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.indx))))
14259 return true;
14260 }
14261 }
14262 return false;
14263 }
14264
14265 /* Look for adjustments which can be done on simple innermost
14266 loops. */
14267 static void
s390_adjust_loops()14268 s390_adjust_loops ()
14269 {
14270 struct loop *loop = NULL;
14271
14272 df_analyze ();
14273 compute_bb_for_insn ();
14274
14275 /* Find the loops. */
14276 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
14277
14278 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
14279 {
14280 if (dump_file)
14281 {
14282 flow_loop_dump (loop, dump_file, NULL, 0);
14283 fprintf (dump_file, ";; OSC loop scan Loop: ");
14284 }
14285 if (loop->latch == NULL
14286 || pc_set (BB_END (loop->latch)) == NULL_RTX
14287 || !s390_adjust_loop_scan_osc (loop))
14288 {
14289 if (dump_file)
14290 {
14291 if (loop->latch == NULL)
14292 fprintf (dump_file, " muliple backward jumps\n");
14293 else
14294 {
14295 fprintf (dump_file, " header insn: %d latch insn: %d ",
14296 INSN_UID (BB_HEAD (loop->header)),
14297 INSN_UID (BB_END (loop->latch)));
14298 if (pc_set (BB_END (loop->latch)) == NULL_RTX)
14299 fprintf (dump_file, " loop does not end with jump\n");
14300 else
14301 fprintf (dump_file, " not instrumented\n");
14302 }
14303 }
14304 }
14305 else
14306 {
14307 rtx_insn *new_insn;
14308
14309 if (dump_file)
14310 fprintf (dump_file, " adding OSC break insn: ");
14311 new_insn = emit_insn_before (gen_osc_break (),
14312 BB_END (loop->latch));
14313 INSN_ADDRESSES_NEW (new_insn, -1);
14314 }
14315 }
14316
14317 loop_optimizer_finalize ();
14318
14319 df_finish_pass (false);
14320 }
14321
14322 /* Perform machine-dependent processing. */
14323
14324 static void
s390_reorg(void)14325 s390_reorg (void)
14326 {
14327 struct constant_pool *pool;
14328 rtx_insn *insn;
14329 int hw_before, hw_after;
14330
14331 if (s390_tune == PROCESSOR_2964_Z13)
14332 s390_adjust_loops ();
14333
14334 /* Make sure all splits have been performed; splits after
14335 machine_dependent_reorg might confuse insn length counts. */
14336 split_all_insns_noflow ();
14337
14338 /* Install the main literal pool and the associated base
14339 register load insns. The literal pool might be > 4096 bytes in
14340 size, so that some of its elements cannot be directly accessed.
14341
14342 To fix this, we split the single literal pool into multiple
14343 pool chunks, reloading the pool base register at various
14344 points throughout the function to ensure it always points to
14345 the pool chunk the following code expects. */
14346
14347 /* Collect the literal pool. */
14348 pool = s390_mainpool_start ();
14349 if (pool)
14350 {
14351 /* Finish up literal pool related changes. */
14352 s390_mainpool_finish (pool);
14353 }
14354 else
14355 {
14356 /* If literal pool overflowed, chunkify it. */
14357 pool = s390_chunkify_start ();
14358 s390_chunkify_finish (pool);
14359 }
14360
14361 /* Generate out-of-pool execute target insns. */
14362 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14363 {
14364 rtx label;
14365 rtx_insn *target;
14366
14367 label = s390_execute_label (insn);
14368 if (!label)
14369 continue;
14370
14371 gcc_assert (label != const0_rtx);
14372
14373 target = emit_label (XEXP (label, 0));
14374 INSN_ADDRESSES_NEW (target, -1);
14375
14376 if (JUMP_P (insn))
14377 {
14378 target = emit_jump_insn (s390_execute_target (insn));
14379 /* This is important in order to keep a table jump
14380 pointing at the jump table label. Only this makes it
14381 being recognized as table jump. */
14382 JUMP_LABEL (target) = JUMP_LABEL (insn);
14383 }
14384 else
14385 target = emit_insn (s390_execute_target (insn));
14386 INSN_ADDRESSES_NEW (target, -1);
14387 }
14388
14389 /* Try to optimize prologue and epilogue further. */
14390 s390_optimize_prologue ();
14391
14392 /* Walk over the insns and do some >=z10 specific changes. */
14393 if (s390_tune >= PROCESSOR_2097_Z10)
14394 {
14395 rtx_insn *insn;
14396 bool insn_added_p = false;
14397
14398 /* The insn lengths and addresses have to be up to date for the
14399 following manipulations. */
14400 shorten_branches (get_insns ());
14401
14402 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14403 {
14404 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14405 continue;
14406
14407 if (JUMP_P (insn))
14408 insn_added_p |= s390_fix_long_loop_prediction (insn);
14409
14410 if ((GET_CODE (PATTERN (insn)) == PARALLEL
14411 || GET_CODE (PATTERN (insn)) == SET)
14412 && s390_tune == PROCESSOR_2097_Z10)
14413 insn_added_p |= s390_z10_optimize_cmp (insn);
14414 }
14415
14416 /* Adjust branches if we added new instructions. */
14417 if (insn_added_p)
14418 shorten_branches (get_insns ());
14419 }
14420
14421 s390_function_num_hotpatch_hw (current_function_decl, &hw_before, &hw_after);
14422 if (hw_after > 0)
14423 {
14424 rtx_insn *insn;
14425
14426 /* Insert NOPs for hotpatching. */
14427 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14428 /* Emit NOPs
14429 1. inside the area covered by debug information to allow setting
14430 breakpoints at the NOPs,
14431 2. before any insn which results in an asm instruction,
14432 3. before in-function labels to avoid jumping to the NOPs, for
14433 example as part of a loop,
14434 4. before any barrier in case the function is completely empty
14435 (__builtin_unreachable ()) and has neither internal labels nor
14436 active insns.
14437 */
14438 if (active_insn_p (insn) || BARRIER_P (insn) || LABEL_P (insn))
14439 break;
14440 /* Output a series of NOPs before the first active insn. */
14441 while (insn && hw_after > 0)
14442 {
14443 if (hw_after >= 3)
14444 {
14445 emit_insn_before (gen_nop_6_byte (), insn);
14446 hw_after -= 3;
14447 }
14448 else if (hw_after >= 2)
14449 {
14450 emit_insn_before (gen_nop_4_byte (), insn);
14451 hw_after -= 2;
14452 }
14453 else
14454 {
14455 emit_insn_before (gen_nop_2_byte (), insn);
14456 hw_after -= 1;
14457 }
14458 }
14459 }
14460 }
14461
14462 /* Return true if INSN is a fp load insn writing register REGNO. */
14463 static inline bool
s390_fpload_toreg(rtx_insn * insn,unsigned int regno)14464 s390_fpload_toreg (rtx_insn *insn, unsigned int regno)
14465 {
14466 rtx set;
14467 enum attr_type flag = s390_safe_attr_type (insn);
14468
14469 if (flag != TYPE_FLOADSF && flag != TYPE_FLOADDF)
14470 return false;
14471
14472 set = single_set (insn);
14473
14474 if (set == NULL_RTX)
14475 return false;
14476
14477 if (!REG_P (SET_DEST (set)) || !MEM_P (SET_SRC (set)))
14478 return false;
14479
14480 if (REGNO (SET_DEST (set)) != regno)
14481 return false;
14482
14483 return true;
14484 }
14485
14486 /* This value describes the distance to be avoided between an
14487 arithmetic fp instruction and an fp load writing the same register.
14488 Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
14489 fine but the exact value has to be avoided. Otherwise the FP
14490 pipeline will throw an exception causing a major penalty. */
14491 #define Z10_EARLYLOAD_DISTANCE 7
14492
14493 /* Rearrange the ready list in order to avoid the situation described
14494 for Z10_EARLYLOAD_DISTANCE. A problematic load instruction is
14495 moved to the very end of the ready list. */
14496 static void
s390_z10_prevent_earlyload_conflicts(rtx_insn ** ready,int * nready_p)14497 s390_z10_prevent_earlyload_conflicts (rtx_insn **ready, int *nready_p)
14498 {
14499 unsigned int regno;
14500 int nready = *nready_p;
14501 rtx_insn *tmp;
14502 int i;
14503 rtx_insn *insn;
14504 rtx set;
14505 enum attr_type flag;
14506 int distance;
14507
14508 /* Skip DISTANCE - 1 active insns. */
14509 for (insn = last_scheduled_insn, distance = Z10_EARLYLOAD_DISTANCE - 1;
14510 distance > 0 && insn != NULL_RTX;
14511 distance--, insn = prev_active_insn (insn))
14512 if (CALL_P (insn) || JUMP_P (insn))
14513 return;
14514
14515 if (insn == NULL_RTX)
14516 return;
14517
14518 set = single_set (insn);
14519
14520 if (set == NULL_RTX || !REG_P (SET_DEST (set))
14521 || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) != MODE_FLOAT)
14522 return;
14523
14524 flag = s390_safe_attr_type (insn);
14525
14526 if (flag == TYPE_FLOADSF || flag == TYPE_FLOADDF)
14527 return;
14528
14529 regno = REGNO (SET_DEST (set));
14530 i = nready - 1;
14531
14532 while (!s390_fpload_toreg (ready[i], regno) && i > 0)
14533 i--;
14534
14535 if (!i)
14536 return;
14537
14538 tmp = ready[i];
14539 memmove (&ready[1], &ready[0], sizeof (rtx_insn *) * i);
14540 ready[0] = tmp;
14541 }
14542
14543 /* Returns TRUE if BB is entered via a fallthru edge and all other
14544 incoming edges are less than likely. */
14545 static bool
s390_bb_fallthru_entry_likely(basic_block bb)14546 s390_bb_fallthru_entry_likely (basic_block bb)
14547 {
14548 edge e, fallthru_edge;
14549 edge_iterator ei;
14550
14551 if (!bb)
14552 return false;
14553
14554 fallthru_edge = find_fallthru_edge (bb->preds);
14555 if (!fallthru_edge)
14556 return false;
14557
14558 FOR_EACH_EDGE (e, ei, bb->preds)
14559 if (e != fallthru_edge
14560 && e->probability >= profile_probability::likely ())
14561 return false;
14562
14563 return true;
14564 }
14565
14566 struct s390_sched_state
14567 {
14568 /* Number of insns in the group. */
14569 int group_state;
14570 /* Execution side of the group. */
14571 int side;
14572 /* Group can only hold two insns. */
14573 bool group_of_two;
14574 } s390_sched_state;
14575
14576 static struct s390_sched_state sched_state = {0, 1, false};
14577
14578 #define S390_SCHED_ATTR_MASK_CRACKED 0x1
14579 #define S390_SCHED_ATTR_MASK_EXPANDED 0x2
14580 #define S390_SCHED_ATTR_MASK_ENDGROUP 0x4
14581 #define S390_SCHED_ATTR_MASK_GROUPALONE 0x8
14582 #define S390_SCHED_ATTR_MASK_GROUPOFTWO 0x10
14583
14584 static unsigned int
s390_get_sched_attrmask(rtx_insn * insn)14585 s390_get_sched_attrmask (rtx_insn *insn)
14586 {
14587 unsigned int mask = 0;
14588
14589 switch (s390_tune)
14590 {
14591 case PROCESSOR_2827_ZEC12:
14592 if (get_attr_zEC12_cracked (insn))
14593 mask |= S390_SCHED_ATTR_MASK_CRACKED;
14594 if (get_attr_zEC12_expanded (insn))
14595 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14596 if (get_attr_zEC12_endgroup (insn))
14597 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14598 if (get_attr_zEC12_groupalone (insn))
14599 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14600 break;
14601 case PROCESSOR_2964_Z13:
14602 if (get_attr_z13_cracked (insn))
14603 mask |= S390_SCHED_ATTR_MASK_CRACKED;
14604 if (get_attr_z13_expanded (insn))
14605 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14606 if (get_attr_z13_endgroup (insn))
14607 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14608 if (get_attr_z13_groupalone (insn))
14609 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14610 if (get_attr_z13_groupoftwo (insn))
14611 mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
14612 break;
14613 case PROCESSOR_3906_Z14:
14614 if (get_attr_z14_cracked (insn))
14615 mask |= S390_SCHED_ATTR_MASK_CRACKED;
14616 if (get_attr_z14_expanded (insn))
14617 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14618 if (get_attr_z14_endgroup (insn))
14619 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14620 if (get_attr_z14_groupalone (insn))
14621 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14622 if (get_attr_z14_groupoftwo (insn))
14623 mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
14624 break;
14625 case PROCESSOR_8561_Z15:
14626 if (get_attr_z15_cracked (insn))
14627 mask |= S390_SCHED_ATTR_MASK_CRACKED;
14628 if (get_attr_z15_expanded (insn))
14629 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14630 if (get_attr_z15_endgroup (insn))
14631 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14632 if (get_attr_z15_groupalone (insn))
14633 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14634 if (get_attr_z15_groupoftwo (insn))
14635 mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
14636 break;
14637 default:
14638 gcc_unreachable ();
14639 }
14640 return mask;
14641 }
14642
14643 static unsigned int
s390_get_unit_mask(rtx_insn * insn,int * units)14644 s390_get_unit_mask (rtx_insn *insn, int *units)
14645 {
14646 unsigned int mask = 0;
14647
14648 switch (s390_tune)
14649 {
14650 case PROCESSOR_2964_Z13:
14651 *units = 4;
14652 if (get_attr_z13_unit_lsu (insn))
14653 mask |= 1 << 0;
14654 if (get_attr_z13_unit_fxa (insn))
14655 mask |= 1 << 1;
14656 if (get_attr_z13_unit_fxb (insn))
14657 mask |= 1 << 2;
14658 if (get_attr_z13_unit_vfu (insn))
14659 mask |= 1 << 3;
14660 break;
14661 case PROCESSOR_3906_Z14:
14662 *units = 4;
14663 if (get_attr_z14_unit_lsu (insn))
14664 mask |= 1 << 0;
14665 if (get_attr_z14_unit_fxa (insn))
14666 mask |= 1 << 1;
14667 if (get_attr_z14_unit_fxb (insn))
14668 mask |= 1 << 2;
14669 if (get_attr_z14_unit_vfu (insn))
14670 mask |= 1 << 3;
14671 break;
14672 case PROCESSOR_8561_Z15:
14673 *units = 4;
14674 if (get_attr_z15_unit_lsu (insn))
14675 mask |= 1 << 0;
14676 if (get_attr_z15_unit_fxa (insn))
14677 mask |= 1 << 1;
14678 if (get_attr_z15_unit_fxb (insn))
14679 mask |= 1 << 2;
14680 if (get_attr_z15_unit_vfu (insn))
14681 mask |= 1 << 3;
14682 break;
14683 default:
14684 gcc_unreachable ();
14685 }
14686 return mask;
14687 }
14688
14689 static bool
s390_is_fpd(rtx_insn * insn)14690 s390_is_fpd (rtx_insn *insn)
14691 {
14692 if (insn == NULL_RTX)
14693 return false;
14694
14695 return get_attr_z13_unit_fpd (insn) || get_attr_z14_unit_fpd (insn)
14696 || get_attr_z15_unit_fpd (insn);
14697 }
14698
14699 static bool
s390_is_fxd(rtx_insn * insn)14700 s390_is_fxd (rtx_insn *insn)
14701 {
14702 if (insn == NULL_RTX)
14703 return false;
14704
14705 return get_attr_z13_unit_fxd (insn) || get_attr_z14_unit_fxd (insn)
14706 || get_attr_z15_unit_fxd (insn);
14707 }
14708
14709 /* Returns TRUE if INSN is a long-running instruction. */
14710 static bool
s390_is_longrunning(rtx_insn * insn)14711 s390_is_longrunning (rtx_insn *insn)
14712 {
14713 if (insn == NULL_RTX)
14714 return false;
14715
14716 return s390_is_fxd (insn) || s390_is_fpd (insn);
14717 }
14718
14719
14720 /* Return the scheduling score for INSN. The higher the score the
14721 better. The score is calculated from the OOO scheduling attributes
14722 of INSN and the scheduling state sched_state. */
14723 static int
s390_sched_score(rtx_insn * insn)14724 s390_sched_score (rtx_insn *insn)
14725 {
14726 unsigned int mask = s390_get_sched_attrmask (insn);
14727 int score = 0;
14728
14729 switch (sched_state.group_state)
14730 {
14731 case 0:
14732 /* Try to put insns into the first slot which would otherwise
14733 break a group. */
14734 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14735 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
14736 score += 5;
14737 if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
14738 score += 10;
14739 break;
14740 case 1:
14741 /* Prefer not cracked insns while trying to put together a
14742 group. */
14743 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14744 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
14745 && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
14746 score += 10;
14747 if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) == 0)
14748 score += 5;
14749 /* If we are in a group of two already, try to schedule another
14750 group-of-two insn to avoid shortening another group. */
14751 if (sched_state.group_of_two
14752 && (mask & S390_SCHED_ATTR_MASK_GROUPOFTWO) != 0)
14753 score += 15;
14754 break;
14755 case 2:
14756 /* Prefer not cracked insns while trying to put together a
14757 group. */
14758 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14759 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
14760 && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
14761 score += 10;
14762 /* Prefer endgroup insns in the last slot. */
14763 if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0)
14764 score += 10;
14765 /* Try to avoid group-of-two insns in the last slot as they will
14766 shorten this group as well as the next one. */
14767 if ((mask & S390_SCHED_ATTR_MASK_GROUPOFTWO) != 0)
14768 score = MAX (0, score - 15);
14769 break;
14770 }
14771
14772 if (s390_tune >= PROCESSOR_2964_Z13)
14773 {
14774 int units, i;
14775 unsigned unit_mask, m = 1;
14776
14777 unit_mask = s390_get_unit_mask (insn, &units);
14778 gcc_assert (units <= MAX_SCHED_UNITS);
14779
14780 /* Add a score in range 0..MAX_SCHED_MIX_SCORE depending on how long
14781 ago the last insn of this unit type got scheduled. This is
14782 supposed to help providing a proper instruction mix to the
14783 CPU. */
14784 for (i = 0; i < units; i++, m <<= 1)
14785 if (m & unit_mask)
14786 score += (last_scheduled_unit_distance[i][sched_state.side]
14787 * MAX_SCHED_MIX_SCORE / MAX_SCHED_MIX_DISTANCE);
14788
14789 int other_side = 1 - sched_state.side;
14790
14791 /* Try to delay long-running insns when side is busy. */
14792 if (s390_is_longrunning (insn))
14793 {
14794 if (s390_is_fxd (insn))
14795 {
14796 if (fxd_longrunning[sched_state.side]
14797 && fxd_longrunning[other_side]
14798 <= fxd_longrunning[sched_state.side])
14799 score = MAX (0, score - 10);
14800
14801 else if (fxd_longrunning[other_side]
14802 >= fxd_longrunning[sched_state.side])
14803 score += 10;
14804 }
14805
14806 if (s390_is_fpd (insn))
14807 {
14808 if (fpd_longrunning[sched_state.side]
14809 && fpd_longrunning[other_side]
14810 <= fpd_longrunning[sched_state.side])
14811 score = MAX (0, score - 10);
14812
14813 else if (fpd_longrunning[other_side]
14814 >= fpd_longrunning[sched_state.side])
14815 score += 10;
14816 }
14817 }
14818 }
14819
14820 return score;
14821 }
14822
14823 /* This function is called via hook TARGET_SCHED_REORDER before
14824 issuing one insn from list READY which contains *NREADYP entries.
14825 For target z10 it reorders load instructions to avoid early load
14826 conflicts in the floating point pipeline */
14827 static int
s390_sched_reorder(FILE * file,int verbose,rtx_insn ** ready,int * nreadyp,int clock ATTRIBUTE_UNUSED)14828 s390_sched_reorder (FILE *file, int verbose,
14829 rtx_insn **ready, int *nreadyp, int clock ATTRIBUTE_UNUSED)
14830 {
14831 if (s390_tune == PROCESSOR_2097_Z10
14832 && reload_completed
14833 && *nreadyp > 1)
14834 s390_z10_prevent_earlyload_conflicts (ready, nreadyp);
14835
14836 if (s390_tune >= PROCESSOR_2827_ZEC12
14837 && reload_completed
14838 && *nreadyp > 1)
14839 {
14840 int i;
14841 int last_index = *nreadyp - 1;
14842 int max_index = -1;
14843 int max_score = -1;
14844 rtx_insn *tmp;
14845
14846 /* Just move the insn with the highest score to the top (the
14847 end) of the list. A full sort is not needed since a conflict
14848 in the hazard recognition cannot happen. So the top insn in
14849 the ready list will always be taken. */
14850 for (i = last_index; i >= 0; i--)
14851 {
14852 int score;
14853
14854 if (recog_memoized (ready[i]) < 0)
14855 continue;
14856
14857 score = s390_sched_score (ready[i]);
14858 if (score > max_score)
14859 {
14860 max_score = score;
14861 max_index = i;
14862 }
14863 }
14864
14865 if (max_index != -1)
14866 {
14867 if (max_index != last_index)
14868 {
14869 tmp = ready[max_index];
14870 ready[max_index] = ready[last_index];
14871 ready[last_index] = tmp;
14872
14873 if (verbose > 5)
14874 fprintf (file,
14875 ";;\t\tBACKEND: move insn %d to the top of list\n",
14876 INSN_UID (ready[last_index]));
14877 }
14878 else if (verbose > 5)
14879 fprintf (file,
14880 ";;\t\tBACKEND: best insn %d already on top\n",
14881 INSN_UID (ready[last_index]));
14882 }
14883
14884 if (verbose > 5)
14885 {
14886 fprintf (file, "ready list ooo attributes - sched state: %d\n",
14887 sched_state.group_state);
14888
14889 for (i = last_index; i >= 0; i--)
14890 {
14891 unsigned int sched_mask;
14892 rtx_insn *insn = ready[i];
14893
14894 if (recog_memoized (insn) < 0)
14895 continue;
14896
14897 sched_mask = s390_get_sched_attrmask (insn);
14898 fprintf (file, ";;\t\tBACKEND: insn %d score: %d: ",
14899 INSN_UID (insn),
14900 s390_sched_score (insn));
14901 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ",\
14902 ((M) & sched_mask) ? #ATTR : "");
14903 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
14904 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
14905 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
14906 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
14907 #undef PRINT_SCHED_ATTR
14908 if (s390_tune >= PROCESSOR_2964_Z13)
14909 {
14910 unsigned int unit_mask, m = 1;
14911 int units, j;
14912
14913 unit_mask = s390_get_unit_mask (insn, &units);
14914 fprintf (file, "(units:");
14915 for (j = 0; j < units; j++, m <<= 1)
14916 if (m & unit_mask)
14917 fprintf (file, " u%d", j);
14918 fprintf (file, ")");
14919 }
14920 fprintf (file, "\n");
14921 }
14922 }
14923 }
14924
14925 return s390_issue_rate ();
14926 }
14927
14928
14929 /* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
14930 the scheduler has issued INSN. It stores the last issued insn into
14931 last_scheduled_insn in order to make it available for
14932 s390_sched_reorder. */
14933 static int
s390_sched_variable_issue(FILE * file,int verbose,rtx_insn * insn,int more)14934 s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
14935 {
14936 last_scheduled_insn = insn;
14937
14938 bool ends_group = false;
14939
14940 if (s390_tune >= PROCESSOR_2827_ZEC12
14941 && reload_completed
14942 && recog_memoized (insn) >= 0)
14943 {
14944 unsigned int mask = s390_get_sched_attrmask (insn);
14945
14946 if ((mask & S390_SCHED_ATTR_MASK_GROUPOFTWO) != 0)
14947 sched_state.group_of_two = true;
14948
14949 /* If this is a group-of-two insn, we actually ended the last group
14950 and this insn is the first one of the new group. */
14951 if (sched_state.group_state == 2 && sched_state.group_of_two)
14952 {
14953 sched_state.side = sched_state.side ? 0 : 1;
14954 sched_state.group_state = 0;
14955 }
14956
14957 /* Longrunning and side bookkeeping. */
14958 for (int i = 0; i < 2; i++)
14959 {
14960 fxd_longrunning[i] = MAX (0, fxd_longrunning[i] - 1);
14961 fpd_longrunning[i] = MAX (0, fpd_longrunning[i] - 1);
14962 }
14963
14964 unsigned latency = insn_default_latency (insn);
14965 if (s390_is_longrunning (insn))
14966 {
14967 if (s390_is_fxd (insn))
14968 fxd_longrunning[sched_state.side] = latency;
14969 else
14970 fpd_longrunning[sched_state.side] = latency;
14971 }
14972
14973 if (s390_tune >= PROCESSOR_2964_Z13)
14974 {
14975 int units, i;
14976 unsigned unit_mask, m = 1;
14977
14978 unit_mask = s390_get_unit_mask (insn, &units);
14979 gcc_assert (units <= MAX_SCHED_UNITS);
14980
14981 for (i = 0; i < units; i++, m <<= 1)
14982 if (m & unit_mask)
14983 last_scheduled_unit_distance[i][sched_state.side] = 0;
14984 else if (last_scheduled_unit_distance[i][sched_state.side]
14985 < MAX_SCHED_MIX_DISTANCE)
14986 last_scheduled_unit_distance[i][sched_state.side]++;
14987 }
14988
14989 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14990 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0
14991 || (mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0
14992 || (mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0)
14993 {
14994 sched_state.group_state = 0;
14995 ends_group = true;
14996 }
14997 else
14998 {
14999 switch (sched_state.group_state)
15000 {
15001 case 0:
15002 sched_state.group_state++;
15003 break;
15004 case 1:
15005 sched_state.group_state++;
15006 if (sched_state.group_of_two)
15007 {
15008 sched_state.group_state = 0;
15009 ends_group = true;
15010 }
15011 break;
15012 case 2:
15013 sched_state.group_state++;
15014 ends_group = true;
15015 break;
15016 }
15017 }
15018
15019 if (verbose > 5)
15020 {
15021 unsigned int sched_mask;
15022
15023 sched_mask = s390_get_sched_attrmask (insn);
15024
15025 fprintf (file, ";;\t\tBACKEND: insn %d: ", INSN_UID (insn));
15026 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ", ((M) & sched_mask) ? #ATTR : "");
15027 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
15028 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
15029 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
15030 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
15031 #undef PRINT_SCHED_ATTR
15032
15033 if (s390_tune >= PROCESSOR_2964_Z13)
15034 {
15035 unsigned int unit_mask, m = 1;
15036 int units, j;
15037
15038 unit_mask = s390_get_unit_mask (insn, &units);
15039 fprintf (file, "(units:");
15040 for (j = 0; j < units; j++, m <<= 1)
15041 if (m & unit_mask)
15042 fprintf (file, " %d", j);
15043 fprintf (file, ")");
15044 }
15045 fprintf (file, " sched state: %d\n", sched_state.group_state);
15046
15047 if (s390_tune >= PROCESSOR_2964_Z13)
15048 {
15049 int units, j;
15050
15051 s390_get_unit_mask (insn, &units);
15052
15053 fprintf (file, ";;\t\tBACKEND: units on this side unused for: ");
15054 for (j = 0; j < units; j++)
15055 fprintf (file, "%d:%d ", j,
15056 last_scheduled_unit_distance[j][sched_state.side]);
15057 fprintf (file, "\n");
15058 }
15059 }
15060
15061 /* If this insn ended a group, the next will be on the other side. */
15062 if (ends_group)
15063 {
15064 sched_state.group_state = 0;
15065 sched_state.side = sched_state.side ? 0 : 1;
15066 sched_state.group_of_two = false;
15067 }
15068 }
15069
15070 if (GET_CODE (PATTERN (insn)) != USE
15071 && GET_CODE (PATTERN (insn)) != CLOBBER)
15072 return more - 1;
15073 else
15074 return more;
15075 }
15076
15077 static void
s390_sched_init(FILE * file ATTRIBUTE_UNUSED,int verbose ATTRIBUTE_UNUSED,int max_ready ATTRIBUTE_UNUSED)15078 s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
15079 int verbose ATTRIBUTE_UNUSED,
15080 int max_ready ATTRIBUTE_UNUSED)
15081 {
15082 /* If the next basic block is most likely entered via a fallthru edge
15083 we keep the last sched state. Otherwise we start a new group.
15084 The scheduler traverses basic blocks in "instruction stream" ordering
15085 so if we see a fallthru edge here, sched_state will be of its
15086 source block.
15087
15088 current_sched_info->prev_head is the insn before the first insn of the
15089 block of insns to be scheduled.
15090 */
15091 rtx_insn *insn = current_sched_info->prev_head
15092 ? NEXT_INSN (current_sched_info->prev_head) : NULL;
15093 basic_block bb = insn ? BLOCK_FOR_INSN (insn) : NULL;
15094 if (s390_tune < PROCESSOR_2964_Z13 || !s390_bb_fallthru_entry_likely (bb))
15095 {
15096 last_scheduled_insn = NULL;
15097 memset (last_scheduled_unit_distance, 0,
15098 MAX_SCHED_UNITS * NUM_SIDES * sizeof (int));
15099 sched_state.group_state = 0;
15100 sched_state.group_of_two = false;
15101 }
15102 }
15103
15104 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
15105 a new number struct loop *loop should be unrolled if tuned for cpus with
15106 a built-in stride prefetcher.
15107 The loop is analyzed for memory accesses by calling check_dpu for
15108 each rtx of the loop. Depending on the loop_depth and the amount of
15109 memory accesses a new number <=nunroll is returned to improve the
15110 behavior of the hardware prefetch unit. */
15111 static unsigned
s390_loop_unroll_adjust(unsigned nunroll,struct loop * loop)15112 s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
15113 {
15114 basic_block *bbs;
15115 rtx_insn *insn;
15116 unsigned i;
15117 unsigned mem_count = 0;
15118
15119 if (s390_tune < PROCESSOR_2097_Z10)
15120 return nunroll;
15121
15122 /* Count the number of memory references within the loop body. */
15123 bbs = get_loop_body (loop);
15124 subrtx_iterator::array_type array;
15125 for (i = 0; i < loop->num_nodes; i++)
15126 FOR_BB_INSNS (bbs[i], insn)
15127 if (INSN_P (insn) && INSN_CODE (insn) != -1)
15128 {
15129 rtx set;
15130
15131 /* The runtime of small loops with memory block operations
15132 will be determined by the memory operation. Doing
15133 unrolling doesn't help here. Measurements to confirm
15134 this where only done on recent CPU levels. So better do
15135 not change anything for older CPUs. */
15136 if (s390_tune >= PROCESSOR_2964_Z13
15137 && loop->ninsns <= BLOCK_MEM_OPS_LOOP_INSNS
15138 && ((set = single_set (insn)) != NULL_RTX)
15139 && ((GET_MODE (SET_DEST (set)) == BLKmode
15140 && (GET_MODE (SET_SRC (set)) == BLKmode
15141 || SET_SRC (set) == const0_rtx))
15142 || (GET_CODE (SET_SRC (set)) == COMPARE
15143 && GET_MODE (XEXP (SET_SRC (set), 0)) == BLKmode
15144 && GET_MODE (XEXP (SET_SRC (set), 1)) == BLKmode)))
15145 return 1;
15146
15147 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
15148 if (MEM_P (*iter))
15149 mem_count += 1;
15150 }
15151 free (bbs);
15152
15153 /* Prevent division by zero, and we do not need to adjust nunroll in this case. */
15154 if (mem_count == 0)
15155 return nunroll;
15156
15157 switch (loop_depth(loop))
15158 {
15159 case 1:
15160 return MIN (nunroll, 28 / mem_count);
15161 case 2:
15162 return MIN (nunroll, 22 / mem_count);
15163 default:
15164 return MIN (nunroll, 16 / mem_count);
15165 }
15166 }
15167
15168 /* Restore the current options. This is a hook function and also called
15169 internally. */
15170
15171 static void
s390_function_specific_restore(struct gcc_options * opts,struct cl_target_option * ptr ATTRIBUTE_UNUSED)15172 s390_function_specific_restore (struct gcc_options *opts,
15173 struct cl_target_option *ptr ATTRIBUTE_UNUSED)
15174 {
15175 opts->x_s390_cost_pointer = (long)processor_table[opts->x_s390_tune].cost;
15176 }
15177
15178 static void
s390_default_align(struct gcc_options * opts)15179 s390_default_align (struct gcc_options *opts)
15180 {
15181 /* Set the default function alignment to 16 in order to get rid of
15182 some unwanted performance effects. */
15183 if (opts->x_flag_align_functions && !opts->x_str_align_functions
15184 && opts->x_s390_tune >= PROCESSOR_2964_Z13)
15185 opts->x_str_align_functions = "16";
15186 }
15187
15188 static void
s390_override_options_after_change(void)15189 s390_override_options_after_change (void)
15190 {
15191 s390_default_align (&global_options);
15192 }
15193
15194 static void
s390_option_override_internal(struct gcc_options * opts,const struct gcc_options * opts_set)15195 s390_option_override_internal (struct gcc_options *opts,
15196 const struct gcc_options *opts_set)
15197 {
15198 /* Architecture mode defaults according to ABI. */
15199 if (!(opts_set->x_target_flags & MASK_ZARCH))
15200 {
15201 if (TARGET_64BIT)
15202 opts->x_target_flags |= MASK_ZARCH;
15203 else
15204 opts->x_target_flags &= ~MASK_ZARCH;
15205 }
15206
15207 /* Set the march default in case it hasn't been specified on cmdline. */
15208 if (!opts_set->x_s390_arch)
15209 opts->x_s390_arch = PROCESSOR_2064_Z900;
15210
15211 opts->x_s390_arch_flags = processor_flags_table[(int) opts->x_s390_arch];
15212
15213 /* Determine processor to tune for. */
15214 if (!opts_set->x_s390_tune)
15215 opts->x_s390_tune = opts->x_s390_arch;
15216
15217 opts->x_s390_tune_flags = processor_flags_table[opts->x_s390_tune];
15218
15219 /* Sanity checks. */
15220 if (opts->x_s390_arch == PROCESSOR_NATIVE
15221 || opts->x_s390_tune == PROCESSOR_NATIVE)
15222 gcc_unreachable ();
15223 if (TARGET_64BIT && !TARGET_ZARCH_P (opts->x_target_flags))
15224 error ("64-bit ABI not supported in ESA/390 mode");
15225
15226 if (opts->x_s390_indirect_branch == indirect_branch_thunk_inline
15227 || opts->x_s390_indirect_branch_call == indirect_branch_thunk_inline
15228 || opts->x_s390_function_return == indirect_branch_thunk_inline
15229 || opts->x_s390_function_return_reg == indirect_branch_thunk_inline
15230 || opts->x_s390_function_return_mem == indirect_branch_thunk_inline)
15231 error ("thunk-inline is only supported with %<-mindirect-branch-jump%>");
15232
15233 if (opts->x_s390_indirect_branch != indirect_branch_keep)
15234 {
15235 if (!opts_set->x_s390_indirect_branch_call)
15236 opts->x_s390_indirect_branch_call = opts->x_s390_indirect_branch;
15237
15238 if (!opts_set->x_s390_indirect_branch_jump)
15239 opts->x_s390_indirect_branch_jump = opts->x_s390_indirect_branch;
15240 }
15241
15242 if (opts->x_s390_function_return != indirect_branch_keep)
15243 {
15244 if (!opts_set->x_s390_function_return_reg)
15245 opts->x_s390_function_return_reg = opts->x_s390_function_return;
15246
15247 if (!opts_set->x_s390_function_return_mem)
15248 opts->x_s390_function_return_mem = opts->x_s390_function_return;
15249 }
15250
15251 /* Enable hardware transactions if available and not explicitly
15252 disabled by user. E.g. with -m31 -march=zEC12 -mzarch */
15253 if (!TARGET_OPT_HTM_P (opts_set->x_target_flags))
15254 {
15255 if (TARGET_CPU_HTM_P (opts) && TARGET_ZARCH_P (opts->x_target_flags))
15256 opts->x_target_flags |= MASK_OPT_HTM;
15257 else
15258 opts->x_target_flags &= ~MASK_OPT_HTM;
15259 }
15260
15261 if (TARGET_OPT_VX_P (opts_set->x_target_flags))
15262 {
15263 if (TARGET_OPT_VX_P (opts->x_target_flags))
15264 {
15265 if (!TARGET_CPU_VX_P (opts))
15266 error ("hardware vector support not available on %s",
15267 processor_table[(int)opts->x_s390_arch].name);
15268 if (TARGET_SOFT_FLOAT_P (opts->x_target_flags))
15269 error ("hardware vector support not available with "
15270 "%<-msoft-float%>");
15271 }
15272 }
15273 else
15274 {
15275 if (TARGET_CPU_VX_P (opts))
15276 /* Enable vector support if available and not explicitly disabled
15277 by user. E.g. with -m31 -march=z13 -mzarch */
15278 opts->x_target_flags |= MASK_OPT_VX;
15279 else
15280 opts->x_target_flags &= ~MASK_OPT_VX;
15281 }
15282
15283 /* Use hardware DFP if available and not explicitly disabled by
15284 user. E.g. with -m31 -march=z10 -mzarch */
15285 if (!TARGET_HARD_DFP_P (opts_set->x_target_flags))
15286 {
15287 if (TARGET_DFP_P (opts))
15288 opts->x_target_flags |= MASK_HARD_DFP;
15289 else
15290 opts->x_target_flags &= ~MASK_HARD_DFP;
15291 }
15292
15293 if (TARGET_HARD_DFP_P (opts->x_target_flags) && !TARGET_DFP_P (opts))
15294 {
15295 if (TARGET_HARD_DFP_P (opts_set->x_target_flags))
15296 {
15297 if (!TARGET_CPU_DFP_P (opts))
15298 error ("hardware decimal floating point instructions"
15299 " not available on %s",
15300 processor_table[(int)opts->x_s390_arch].name);
15301 if (!TARGET_ZARCH_P (opts->x_target_flags))
15302 error ("hardware decimal floating point instructions"
15303 " not available in ESA/390 mode");
15304 }
15305 else
15306 opts->x_target_flags &= ~MASK_HARD_DFP;
15307 }
15308
15309 if (TARGET_SOFT_FLOAT_P (opts_set->x_target_flags)
15310 && TARGET_SOFT_FLOAT_P (opts->x_target_flags))
15311 {
15312 if (TARGET_HARD_DFP_P (opts_set->x_target_flags)
15313 && TARGET_HARD_DFP_P (opts->x_target_flags))
15314 error ("%<-mhard-dfp%> can%'t be used in conjunction with "
15315 "%<-msoft-float%>");
15316
15317 opts->x_target_flags &= ~MASK_HARD_DFP;
15318 }
15319
15320 if (TARGET_BACKCHAIN_P (opts->x_target_flags)
15321 && TARGET_PACKED_STACK_P (opts->x_target_flags)
15322 && TARGET_HARD_FLOAT_P (opts->x_target_flags))
15323 error ("%<-mbackchain%> %<-mpacked-stack%> %<-mhard-float%> are not "
15324 "supported in combination");
15325
15326 if (opts->x_s390_stack_size)
15327 {
15328 if (opts->x_s390_stack_guard >= opts->x_s390_stack_size)
15329 error ("stack size must be greater than the stack guard value");
15330 else if (opts->x_s390_stack_size > 1 << 16)
15331 error ("stack size must not be greater than 64k");
15332 }
15333 else if (opts->x_s390_stack_guard)
15334 error ("%<-mstack-guard%> implies use of %<-mstack-size%>");
15335
15336 /* Our implementation of the stack probe requires the probe interval
15337 to be used as displacement in an address operand. The maximum
15338 probe interval currently is 64k. This would exceed short
15339 displacements. Trim that value down to 4k if that happens. This
15340 might result in too many probes being generated only on the
15341 oldest supported machine level z900. */
15342 if (!DISP_IN_RANGE ((1 << param_stack_clash_protection_probe_interval)))
15343 param_stack_clash_protection_probe_interval = 12;
15344
15345 #if TARGET_TPF != 0
15346 if (!CONST_OK_FOR_J (opts->x_s390_tpf_trace_hook_prologue_check))
15347 error ("-mtpf-trace-hook-prologue-check requires integer in range 0..4095");
15348
15349 if (!CONST_OK_FOR_J (opts->x_s390_tpf_trace_hook_prologue_target))
15350 error ("-mtpf-trace-hook-prologue-target requires integer in range 0..4095");
15351
15352 if (!CONST_OK_FOR_J (opts->x_s390_tpf_trace_hook_epilogue_check))
15353 error ("-mtpf-trace-hook-epilogue-check requires integer in range 0..4095");
15354
15355 if (!CONST_OK_FOR_J (opts->x_s390_tpf_trace_hook_epilogue_target))
15356 error ("-mtpf-trace-hook-epilogue-target requires integer in range 0..4095");
15357
15358 if (s390_tpf_trace_skip)
15359 {
15360 opts->x_s390_tpf_trace_hook_prologue_target = TPF_TRACE_PROLOGUE_SKIP_TARGET;
15361 opts->x_s390_tpf_trace_hook_epilogue_target = TPF_TRACE_EPILOGUE_SKIP_TARGET;
15362 }
15363 #endif
15364
15365 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
15366 if (!TARGET_LONG_DOUBLE_128_P (opts_set->x_target_flags))
15367 opts->x_target_flags |= MASK_LONG_DOUBLE_128;
15368 #endif
15369
15370 if (opts->x_s390_tune >= PROCESSOR_2097_Z10)
15371 {
15372 SET_OPTION_IF_UNSET (opts, opts_set, param_max_unrolled_insns,
15373 100);
15374 SET_OPTION_IF_UNSET (opts, opts_set, param_max_unroll_times, 32);
15375 SET_OPTION_IF_UNSET (opts, opts_set, param_max_completely_peeled_insns,
15376 2000);
15377 SET_OPTION_IF_UNSET (opts, opts_set, param_max_completely_peel_times,
15378 64);
15379 }
15380
15381 SET_OPTION_IF_UNSET (opts, opts_set, param_max_pending_list_length,
15382 256);
15383 /* values for loop prefetching */
15384 SET_OPTION_IF_UNSET (opts, opts_set, param_l1_cache_line_size, 256);
15385 SET_OPTION_IF_UNSET (opts, opts_set, param_l1_cache_size, 128);
15386 /* s390 has more than 2 levels and the size is much larger. Since
15387 we are always running virtualized assume that we only get a small
15388 part of the caches above l1. */
15389 SET_OPTION_IF_UNSET (opts, opts_set, param_l2_cache_size, 1500);
15390 SET_OPTION_IF_UNSET (opts, opts_set,
15391 param_prefetch_min_insn_to_mem_ratio, 2);
15392 SET_OPTION_IF_UNSET (opts, opts_set, param_simultaneous_prefetches, 6);
15393
15394 /* Use the alternative scheduling-pressure algorithm by default. */
15395 SET_OPTION_IF_UNSET (opts, opts_set, param_sched_pressure_algorithm, 2);
15396 SET_OPTION_IF_UNSET (opts, opts_set, param_min_vect_loop_bound, 2);
15397
15398 /* Use aggressive inlining parameters. */
15399 if (opts->x_s390_tune >= PROCESSOR_2964_Z13)
15400 {
15401 SET_OPTION_IF_UNSET (opts, opts_set, param_inline_min_speedup, 2);
15402 SET_OPTION_IF_UNSET (opts, opts_set, param_max_inline_insns_auto, 80);
15403 }
15404
15405 /* Set the default alignment. */
15406 s390_default_align (opts);
15407
15408 /* Call target specific restore function to do post-init work. At the moment,
15409 this just sets opts->x_s390_cost_pointer. */
15410 s390_function_specific_restore (opts, NULL);
15411
15412 /* Check whether -mfentry is supported. It cannot be used in 31-bit mode,
15413 because 31-bit PLT stubs assume that %r12 contains GOT address, which is
15414 not the case when the code runs before the prolog. */
15415 if (opts->x_flag_fentry && !TARGET_64BIT)
15416 error ("%<-mfentry%> is supported only for 64-bit CPUs");
15417 }
15418
15419 static void
s390_option_override(void)15420 s390_option_override (void)
15421 {
15422 unsigned int i;
15423 cl_deferred_option *opt;
15424 vec<cl_deferred_option> *v =
15425 (vec<cl_deferred_option> *) s390_deferred_options;
15426
15427 if (v)
15428 FOR_EACH_VEC_ELT (*v, i, opt)
15429 {
15430 switch (opt->opt_index)
15431 {
15432 case OPT_mhotpatch_:
15433 {
15434 int val1;
15435 int val2;
15436 char *s = strtok (ASTRDUP (opt->arg), ",");
15437 char *t = strtok (NULL, "\0");
15438
15439 if (t != NULL)
15440 {
15441 val1 = integral_argument (s);
15442 val2 = integral_argument (t);
15443 }
15444 else
15445 {
15446 val1 = -1;
15447 val2 = -1;
15448 }
15449 if (val1 == -1 || val2 == -1)
15450 {
15451 /* argument is not a plain number */
15452 error ("arguments to %qs should be non-negative integers",
15453 "-mhotpatch=n,m");
15454 break;
15455 }
15456 else if (val1 > s390_hotpatch_hw_max
15457 || val2 > s390_hotpatch_hw_max)
15458 {
15459 error ("argument to %qs is too large (max. %d)",
15460 "-mhotpatch=n,m", s390_hotpatch_hw_max);
15461 break;
15462 }
15463 s390_hotpatch_hw_before_label = val1;
15464 s390_hotpatch_hw_after_label = val2;
15465 break;
15466 }
15467 default:
15468 gcc_unreachable ();
15469 }
15470 }
15471
15472 /* Set up function hooks. */
15473 init_machine_status = s390_init_machine_status;
15474
15475 s390_option_override_internal (&global_options, &global_options_set);
15476
15477 /* Save the initial options in case the user does function specific
15478 options. */
15479 target_option_default_node = build_target_option_node (&global_options);
15480 target_option_current_node = target_option_default_node;
15481
15482 /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
15483 requires the arch flags to be evaluated already. Since prefetching
15484 is beneficial on s390, we enable it if available. */
15485 if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3)
15486 flag_prefetch_loop_arrays = 1;
15487
15488 if (!s390_pic_data_is_text_relative && !flag_pic)
15489 error ("%<-mno-pic-data-is-text-relative%> cannot be used without "
15490 "%<-fpic%>/%<-fPIC%>");
15491
15492 if (TARGET_TPF)
15493 {
15494 /* Don't emit DWARF3/4 unless specifically selected. The TPF
15495 debuggers do not yet support DWARF 3/4. */
15496 if (!global_options_set.x_dwarf_strict)
15497 dwarf_strict = 1;
15498 if (!global_options_set.x_dwarf_version)
15499 dwarf_version = 2;
15500 }
15501 }
15502
15503 #if S390_USE_TARGET_ATTRIBUTE
15504 /* Inner function to process the attribute((target(...))), take an argument and
15505 set the current options from the argument. If we have a list, recursively go
15506 over the list. */
15507
15508 static bool
s390_valid_target_attribute_inner_p(tree args,struct gcc_options * opts,struct gcc_options * new_opts_set,bool force_pragma)15509 s390_valid_target_attribute_inner_p (tree args,
15510 struct gcc_options *opts,
15511 struct gcc_options *new_opts_set,
15512 bool force_pragma)
15513 {
15514 char *next_optstr;
15515 bool ret = true;
15516
15517 #define S390_ATTRIB(S,O,A) { S, sizeof (S)-1, O, A, 0 }
15518 #define S390_PRAGMA(S,O,A) { S, sizeof (S)-1, O, A, 1 }
15519 static const struct
15520 {
15521 const char *string;
15522 size_t len;
15523 int opt;
15524 int has_arg;
15525 int only_as_pragma;
15526 } attrs[] = {
15527 /* enum options */
15528 S390_ATTRIB ("arch=", OPT_march_, 1),
15529 S390_ATTRIB ("tune=", OPT_mtune_, 1),
15530 /* uinteger options */
15531 S390_ATTRIB ("stack-guard=", OPT_mstack_guard_, 1),
15532 S390_ATTRIB ("stack-size=", OPT_mstack_size_, 1),
15533 S390_ATTRIB ("branch-cost=", OPT_mbranch_cost_, 1),
15534 S390_ATTRIB ("warn-framesize=", OPT_mwarn_framesize_, 1),
15535 /* flag options */
15536 S390_ATTRIB ("backchain", OPT_mbackchain, 0),
15537 S390_ATTRIB ("hard-dfp", OPT_mhard_dfp, 0),
15538 S390_ATTRIB ("hard-float", OPT_mhard_float, 0),
15539 S390_ATTRIB ("htm", OPT_mhtm, 0),
15540 S390_ATTRIB ("vx", OPT_mvx, 0),
15541 S390_ATTRIB ("packed-stack", OPT_mpacked_stack, 0),
15542 S390_ATTRIB ("small-exec", OPT_msmall_exec, 0),
15543 S390_ATTRIB ("soft-float", OPT_msoft_float, 0),
15544 S390_ATTRIB ("mvcle", OPT_mmvcle, 0),
15545 S390_PRAGMA ("zvector", OPT_mzvector, 0),
15546 /* boolean options */
15547 S390_ATTRIB ("warn-dynamicstack", OPT_mwarn_dynamicstack, 0),
15548 };
15549 #undef S390_ATTRIB
15550 #undef S390_PRAGMA
15551
15552 /* If this is a list, recurse to get the options. */
15553 if (TREE_CODE (args) == TREE_LIST)
15554 {
15555 bool ret = true;
15556 int num_pragma_values;
15557 int i;
15558
15559 /* Note: attribs.c:decl_attributes prepends the values from
15560 current_target_pragma to the list of target attributes. To determine
15561 whether we're looking at a value of the attribute or the pragma we
15562 assume that the first [list_length (current_target_pragma)] values in
15563 the list are the values from the pragma. */
15564 num_pragma_values = (!force_pragma && current_target_pragma != NULL)
15565 ? list_length (current_target_pragma) : 0;
15566 for (i = 0; args; args = TREE_CHAIN (args), i++)
15567 {
15568 bool is_pragma;
15569
15570 is_pragma = (force_pragma || i < num_pragma_values);
15571 if (TREE_VALUE (args)
15572 && !s390_valid_target_attribute_inner_p (TREE_VALUE (args),
15573 opts, new_opts_set,
15574 is_pragma))
15575 {
15576 ret = false;
15577 }
15578 }
15579 return ret;
15580 }
15581
15582 else if (TREE_CODE (args) != STRING_CST)
15583 {
15584 error ("attribute %<target%> argument not a string");
15585 return false;
15586 }
15587
15588 /* Handle multiple arguments separated by commas. */
15589 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
15590
15591 while (next_optstr && *next_optstr != '\0')
15592 {
15593 char *p = next_optstr;
15594 char *orig_p = p;
15595 char *comma = strchr (next_optstr, ',');
15596 size_t len, opt_len;
15597 int opt;
15598 bool opt_set_p;
15599 char ch;
15600 unsigned i;
15601 int mask = 0;
15602 enum cl_var_type var_type;
15603 bool found;
15604
15605 if (comma)
15606 {
15607 *comma = '\0';
15608 len = comma - next_optstr;
15609 next_optstr = comma + 1;
15610 }
15611 else
15612 {
15613 len = strlen (p);
15614 next_optstr = NULL;
15615 }
15616
15617 /* Recognize no-xxx. */
15618 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
15619 {
15620 opt_set_p = false;
15621 p += 3;
15622 len -= 3;
15623 }
15624 else
15625 opt_set_p = true;
15626
15627 /* Find the option. */
15628 ch = *p;
15629 found = false;
15630 for (i = 0; i < ARRAY_SIZE (attrs); i++)
15631 {
15632 opt_len = attrs[i].len;
15633 if (ch == attrs[i].string[0]
15634 && ((attrs[i].has_arg) ? len > opt_len : len == opt_len)
15635 && memcmp (p, attrs[i].string, opt_len) == 0)
15636 {
15637 opt = attrs[i].opt;
15638 if (!opt_set_p && cl_options[opt].cl_reject_negative)
15639 continue;
15640 mask = cl_options[opt].var_value;
15641 var_type = cl_options[opt].var_type;
15642 found = true;
15643 break;
15644 }
15645 }
15646
15647 /* Process the option. */
15648 if (!found)
15649 {
15650 error ("attribute(target(\"%s\")) is unknown", orig_p);
15651 return false;
15652 }
15653 else if (attrs[i].only_as_pragma && !force_pragma)
15654 {
15655 /* Value is not allowed for the target attribute. */
15656 error ("value %qs is not supported by attribute %<target%>",
15657 attrs[i].string);
15658 return false;
15659 }
15660
15661 else if (var_type == CLVC_BIT_SET || var_type == CLVC_BIT_CLEAR)
15662 {
15663 if (var_type == CLVC_BIT_CLEAR)
15664 opt_set_p = !opt_set_p;
15665
15666 if (opt_set_p)
15667 opts->x_target_flags |= mask;
15668 else
15669 opts->x_target_flags &= ~mask;
15670 new_opts_set->x_target_flags |= mask;
15671 }
15672
15673 else if (cl_options[opt].var_type == CLVC_BOOLEAN)
15674 {
15675 int value;
15676
15677 if (cl_options[opt].cl_uinteger)
15678 {
15679 /* Unsigned integer argument. Code based on the function
15680 decode_cmdline_option () in opts-common.c. */
15681 value = integral_argument (p + opt_len);
15682 }
15683 else
15684 value = (opt_set_p) ? 1 : 0;
15685
15686 if (value != -1)
15687 {
15688 struct cl_decoded_option decoded;
15689
15690 /* Value range check; only implemented for numeric and boolean
15691 options at the moment. */
15692 generate_option (opt, NULL, value, CL_TARGET, &decoded);
15693 s390_handle_option (opts, new_opts_set, &decoded, input_location);
15694 set_option (opts, new_opts_set, opt, value,
15695 p + opt_len, DK_UNSPECIFIED, input_location,
15696 global_dc);
15697 }
15698 else
15699 {
15700 error ("attribute(target(\"%s\")) is unknown", orig_p);
15701 ret = false;
15702 }
15703 }
15704
15705 else if (cl_options[opt].var_type == CLVC_ENUM)
15706 {
15707 bool arg_ok;
15708 int value;
15709
15710 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
15711 if (arg_ok)
15712 set_option (opts, new_opts_set, opt, value,
15713 p + opt_len, DK_UNSPECIFIED, input_location,
15714 global_dc);
15715 else
15716 {
15717 error ("attribute(target(\"%s\")) is unknown", orig_p);
15718 ret = false;
15719 }
15720 }
15721
15722 else
15723 gcc_unreachable ();
15724 }
15725 return ret;
15726 }
15727
15728 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
15729
15730 tree
s390_valid_target_attribute_tree(tree args,struct gcc_options * opts,const struct gcc_options * opts_set,bool force_pragma)15731 s390_valid_target_attribute_tree (tree args,
15732 struct gcc_options *opts,
15733 const struct gcc_options *opts_set,
15734 bool force_pragma)
15735 {
15736 tree t = NULL_TREE;
15737 struct gcc_options new_opts_set;
15738
15739 memset (&new_opts_set, 0, sizeof (new_opts_set));
15740
15741 /* Process each of the options on the chain. */
15742 if (! s390_valid_target_attribute_inner_p (args, opts, &new_opts_set,
15743 force_pragma))
15744 return error_mark_node;
15745
15746 /* If some option was set (even if it has not changed), rerun
15747 s390_option_override_internal, and then save the options away. */
15748 if (new_opts_set.x_target_flags
15749 || new_opts_set.x_s390_arch
15750 || new_opts_set.x_s390_tune
15751 || new_opts_set.x_s390_stack_guard
15752 || new_opts_set.x_s390_stack_size
15753 || new_opts_set.x_s390_branch_cost
15754 || new_opts_set.x_s390_warn_framesize
15755 || new_opts_set.x_s390_warn_dynamicstack_p)
15756 {
15757 const unsigned char *src = (const unsigned char *)opts_set;
15758 unsigned char *dest = (unsigned char *)&new_opts_set;
15759 unsigned int i;
15760
15761 /* Merge the original option flags into the new ones. */
15762 for (i = 0; i < sizeof(*opts_set); i++)
15763 dest[i] |= src[i];
15764
15765 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
15766 s390_option_override_internal (opts, &new_opts_set);
15767 /* Save the current options unless we are validating options for
15768 #pragma. */
15769 t = build_target_option_node (opts);
15770 }
15771 return t;
15772 }
15773
15774 /* Hook to validate attribute((target("string"))). */
15775
15776 static bool
s390_valid_target_attribute_p(tree fndecl,tree ARG_UNUSED (name),tree args,int ARG_UNUSED (flags))15777 s390_valid_target_attribute_p (tree fndecl,
15778 tree ARG_UNUSED (name),
15779 tree args,
15780 int ARG_UNUSED (flags))
15781 {
15782 struct gcc_options func_options;
15783 tree new_target, new_optimize;
15784 bool ret = true;
15785
15786 /* attribute((target("default"))) does nothing, beyond
15787 affecting multi-versioning. */
15788 if (TREE_VALUE (args)
15789 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
15790 && TREE_CHAIN (args) == NULL_TREE
15791 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
15792 return true;
15793
15794 tree old_optimize = build_optimization_node (&global_options);
15795
15796 /* Get the optimization options of the current function. */
15797 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
15798
15799 if (!func_optimize)
15800 func_optimize = old_optimize;
15801
15802 /* Init func_options. */
15803 memset (&func_options, 0, sizeof (func_options));
15804 init_options_struct (&func_options, NULL);
15805 lang_hooks.init_options_struct (&func_options);
15806
15807 cl_optimization_restore (&func_options, TREE_OPTIMIZATION (func_optimize));
15808
15809 /* Initialize func_options to the default before its target options can
15810 be set. */
15811 cl_target_option_restore (&func_options,
15812 TREE_TARGET_OPTION (target_option_default_node));
15813
15814 new_target = s390_valid_target_attribute_tree (args, &func_options,
15815 &global_options_set,
15816 (args ==
15817 current_target_pragma));
15818 new_optimize = build_optimization_node (&func_options);
15819 if (new_target == error_mark_node)
15820 ret = false;
15821 else if (fndecl && new_target)
15822 {
15823 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
15824 if (old_optimize != new_optimize)
15825 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
15826 }
15827 return ret;
15828 }
15829
15830 /* Hook to determine if one function can safely inline another. */
15831
15832 static bool
s390_can_inline_p(tree caller,tree callee)15833 s390_can_inline_p (tree caller, tree callee)
15834 {
15835 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
15836 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
15837
15838 if (!callee_tree)
15839 callee_tree = target_option_default_node;
15840 if (!caller_tree)
15841 caller_tree = target_option_default_node;
15842 if (callee_tree == caller_tree)
15843 return true;
15844
15845 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
15846 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
15847 bool ret = true;
15848
15849 if ((caller_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP))
15850 != (callee_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP)))
15851 ret = false;
15852
15853 /* Don't inline functions to be compiled for a more recent arch into a
15854 function for an older arch. */
15855 else if (caller_opts->x_s390_arch < callee_opts->x_s390_arch)
15856 ret = false;
15857
15858 /* Inlining a hard float function into a soft float function is only
15859 allowed if the hard float function doesn't actually make use of
15860 floating point.
15861
15862 We are called from FEs for multi-versioning call optimization, so
15863 beware of ipa_fn_summaries not available. */
15864 else if (((TARGET_SOFT_FLOAT_P (caller_opts->x_target_flags)
15865 && !TARGET_SOFT_FLOAT_P (callee_opts->x_target_flags))
15866 || (!TARGET_HARD_DFP_P (caller_opts->x_target_flags)
15867 && TARGET_HARD_DFP_P (callee_opts->x_target_flags)))
15868 && (! ipa_fn_summaries
15869 || ipa_fn_summaries->get
15870 (cgraph_node::get (callee))->fp_expressions))
15871 ret = false;
15872
15873 return ret;
15874 }
15875 #endif
15876
15877 /* Set VAL to correct enum value according to the indirect-branch or
15878 function-return attribute in ATTR. */
15879
15880 static inline void
s390_indirect_branch_attrvalue(tree attr,enum indirect_branch * val)15881 s390_indirect_branch_attrvalue (tree attr, enum indirect_branch *val)
15882 {
15883 const char *str = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
15884 if (strcmp (str, "keep") == 0)
15885 *val = indirect_branch_keep;
15886 else if (strcmp (str, "thunk") == 0)
15887 *val = indirect_branch_thunk;
15888 else if (strcmp (str, "thunk-inline") == 0)
15889 *val = indirect_branch_thunk_inline;
15890 else if (strcmp (str, "thunk-extern") == 0)
15891 *val = indirect_branch_thunk_extern;
15892 }
15893
15894 /* Memorize the setting for -mindirect-branch* and -mfunction-return*
15895 from either the cmdline or the function attributes in
15896 cfun->machine. */
15897
15898 static void
s390_indirect_branch_settings(tree fndecl)15899 s390_indirect_branch_settings (tree fndecl)
15900 {
15901 tree attr;
15902
15903 if (!fndecl)
15904 return;
15905
15906 /* Initialize with the cmdline options and let the attributes
15907 override it. */
15908 cfun->machine->indirect_branch_jump = s390_indirect_branch_jump;
15909 cfun->machine->indirect_branch_call = s390_indirect_branch_call;
15910
15911 cfun->machine->function_return_reg = s390_function_return_reg;
15912 cfun->machine->function_return_mem = s390_function_return_mem;
15913
15914 if ((attr = lookup_attribute ("indirect_branch",
15915 DECL_ATTRIBUTES (fndecl))))
15916 {
15917 s390_indirect_branch_attrvalue (attr,
15918 &cfun->machine->indirect_branch_jump);
15919 s390_indirect_branch_attrvalue (attr,
15920 &cfun->machine->indirect_branch_call);
15921 }
15922
15923 if ((attr = lookup_attribute ("indirect_branch_jump",
15924 DECL_ATTRIBUTES (fndecl))))
15925 s390_indirect_branch_attrvalue (attr, &cfun->machine->indirect_branch_jump);
15926
15927 if ((attr = lookup_attribute ("indirect_branch_call",
15928 DECL_ATTRIBUTES (fndecl))))
15929 s390_indirect_branch_attrvalue (attr, &cfun->machine->indirect_branch_call);
15930
15931 if ((attr = lookup_attribute ("function_return",
15932 DECL_ATTRIBUTES (fndecl))))
15933 {
15934 s390_indirect_branch_attrvalue (attr,
15935 &cfun->machine->function_return_reg);
15936 s390_indirect_branch_attrvalue (attr,
15937 &cfun->machine->function_return_mem);
15938 }
15939
15940 if ((attr = lookup_attribute ("function_return_reg",
15941 DECL_ATTRIBUTES (fndecl))))
15942 s390_indirect_branch_attrvalue (attr, &cfun->machine->function_return_reg);
15943
15944 if ((attr = lookup_attribute ("function_return_mem",
15945 DECL_ATTRIBUTES (fndecl))))
15946 s390_indirect_branch_attrvalue (attr, &cfun->machine->function_return_mem);
15947 }
15948
15949 #if S390_USE_TARGET_ATTRIBUTE
15950 /* Restore targets globals from NEW_TREE and invalidate s390_previous_fndecl
15951 cache. */
15952
15953 void
s390_activate_target_options(tree new_tree)15954 s390_activate_target_options (tree new_tree)
15955 {
15956 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
15957 if (TREE_TARGET_GLOBALS (new_tree))
15958 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
15959 else if (new_tree == target_option_default_node)
15960 restore_target_globals (&default_target_globals);
15961 else
15962 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
15963 s390_previous_fndecl = NULL_TREE;
15964 }
15965 #endif
15966
15967 /* Establish appropriate back-end context for processing the function
15968 FNDECL. The argument might be NULL to indicate processing at top
15969 level, outside of any function scope. */
15970 static void
s390_set_current_function(tree fndecl)15971 s390_set_current_function (tree fndecl)
15972 {
15973 #if S390_USE_TARGET_ATTRIBUTE
15974 /* Only change the context if the function changes. This hook is called
15975 several times in the course of compiling a function, and we don't want to
15976 slow things down too much or call target_reinit when it isn't safe. */
15977 if (fndecl == s390_previous_fndecl)
15978 {
15979 s390_indirect_branch_settings (fndecl);
15980 return;
15981 }
15982
15983 tree old_tree;
15984 if (s390_previous_fndecl == NULL_TREE)
15985 old_tree = target_option_current_node;
15986 else if (DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl))
15987 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl);
15988 else
15989 old_tree = target_option_default_node;
15990
15991 if (fndecl == NULL_TREE)
15992 {
15993 if (old_tree != target_option_current_node)
15994 s390_activate_target_options (target_option_current_node);
15995 return;
15996 }
15997
15998 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
15999 if (new_tree == NULL_TREE)
16000 new_tree = target_option_default_node;
16001
16002 if (old_tree != new_tree)
16003 s390_activate_target_options (new_tree);
16004 s390_previous_fndecl = fndecl;
16005 #endif
16006 s390_indirect_branch_settings (fndecl);
16007 }
16008
16009 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
16010
16011 static bool
s390_use_by_pieces_infrastructure_p(unsigned HOST_WIDE_INT size,unsigned int align ATTRIBUTE_UNUSED,enum by_pieces_operation op ATTRIBUTE_UNUSED,bool speed_p ATTRIBUTE_UNUSED)16012 s390_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
16013 unsigned int align ATTRIBUTE_UNUSED,
16014 enum by_pieces_operation op ATTRIBUTE_UNUSED,
16015 bool speed_p ATTRIBUTE_UNUSED)
16016 {
16017 return (size == 1 || size == 2
16018 || size == 4 || (TARGET_ZARCH && size == 8));
16019 }
16020
16021 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
16022
16023 static void
s390_atomic_assign_expand_fenv(tree * hold,tree * clear,tree * update)16024 s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
16025 {
16026 tree sfpc = s390_builtin_decls[S390_BUILTIN_s390_sfpc];
16027 tree efpc = s390_builtin_decls[S390_BUILTIN_s390_efpc];
16028 tree call_efpc = build_call_expr (efpc, 0);
16029 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
16030
16031 #define FPC_EXCEPTION_MASK HOST_WIDE_INT_UC (0xf8000000)
16032 #define FPC_FLAGS_MASK HOST_WIDE_INT_UC (0x00f80000)
16033 #define FPC_DXC_MASK HOST_WIDE_INT_UC (0x0000ff00)
16034 #define FPC_EXCEPTION_MASK_SHIFT HOST_WIDE_INT_UC (24)
16035 #define FPC_FLAGS_SHIFT HOST_WIDE_INT_UC (16)
16036 #define FPC_DXC_SHIFT HOST_WIDE_INT_UC (8)
16037
16038 /* Generates the equivalent of feholdexcept (&fenv_var)
16039
16040 fenv_var = __builtin_s390_efpc ();
16041 __builtin_s390_sfpc (fenv_var & mask) */
16042 tree old_fpc = build2 (MODIFY_EXPR, unsigned_type_node, fenv_var, call_efpc);
16043 tree new_fpc =
16044 build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
16045 build_int_cst (unsigned_type_node,
16046 ~(FPC_DXC_MASK | FPC_FLAGS_MASK |
16047 FPC_EXCEPTION_MASK)));
16048 tree set_new_fpc = build_call_expr (sfpc, 1, new_fpc);
16049 *hold = build2 (COMPOUND_EXPR, void_type_node, old_fpc, set_new_fpc);
16050
16051 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT)
16052
16053 __builtin_s390_sfpc (__builtin_s390_efpc () & mask) */
16054 new_fpc = build2 (BIT_AND_EXPR, unsigned_type_node, call_efpc,
16055 build_int_cst (unsigned_type_node,
16056 ~(FPC_DXC_MASK | FPC_FLAGS_MASK)));
16057 *clear = build_call_expr (sfpc, 1, new_fpc);
16058
16059 /* Generates the equivalent of feupdateenv (fenv_var)
16060
16061 old_fpc = __builtin_s390_efpc ();
16062 __builtin_s390_sfpc (fenv_var);
16063 __atomic_feraiseexcept ((old_fpc & FPC_FLAGS_MASK) >> FPC_FLAGS_SHIFT); */
16064
16065 old_fpc = create_tmp_var_raw (unsigned_type_node);
16066 tree store_old_fpc = build2 (MODIFY_EXPR, void_type_node,
16067 old_fpc, call_efpc);
16068
16069 set_new_fpc = build_call_expr (sfpc, 1, fenv_var);
16070
16071 tree raise_old_except = build2 (BIT_AND_EXPR, unsigned_type_node, old_fpc,
16072 build_int_cst (unsigned_type_node,
16073 FPC_FLAGS_MASK));
16074 raise_old_except = build2 (RSHIFT_EXPR, unsigned_type_node, raise_old_except,
16075 build_int_cst (unsigned_type_node,
16076 FPC_FLAGS_SHIFT));
16077 tree atomic_feraiseexcept
16078 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
16079 raise_old_except = build_call_expr (atomic_feraiseexcept,
16080 1, raise_old_except);
16081
16082 *update = build2 (COMPOUND_EXPR, void_type_node,
16083 build2 (COMPOUND_EXPR, void_type_node,
16084 store_old_fpc, set_new_fpc),
16085 raise_old_except);
16086
16087 #undef FPC_EXCEPTION_MASK
16088 #undef FPC_FLAGS_MASK
16089 #undef FPC_DXC_MASK
16090 #undef FPC_EXCEPTION_MASK_SHIFT
16091 #undef FPC_FLAGS_SHIFT
16092 #undef FPC_DXC_SHIFT
16093 }
16094
16095 /* Return the vector mode to be used for inner mode MODE when doing
16096 vectorization. */
16097 static machine_mode
s390_preferred_simd_mode(scalar_mode mode)16098 s390_preferred_simd_mode (scalar_mode mode)
16099 {
16100 if (TARGET_VXE)
16101 switch (mode)
16102 {
16103 case E_SFmode:
16104 return V4SFmode;
16105 default:;
16106 }
16107
16108 if (TARGET_VX)
16109 switch (mode)
16110 {
16111 case E_DFmode:
16112 return V2DFmode;
16113 case E_DImode:
16114 return V2DImode;
16115 case E_SImode:
16116 return V4SImode;
16117 case E_HImode:
16118 return V8HImode;
16119 case E_QImode:
16120 return V16QImode;
16121 default:;
16122 }
16123 return word_mode;
16124 }
16125
16126 /* Our hardware does not require vectors to be strictly aligned. */
16127 static bool
s390_support_vector_misalignment(machine_mode mode ATTRIBUTE_UNUSED,const_tree type ATTRIBUTE_UNUSED,int misalignment ATTRIBUTE_UNUSED,bool is_packed ATTRIBUTE_UNUSED)16128 s390_support_vector_misalignment (machine_mode mode ATTRIBUTE_UNUSED,
16129 const_tree type ATTRIBUTE_UNUSED,
16130 int misalignment ATTRIBUTE_UNUSED,
16131 bool is_packed ATTRIBUTE_UNUSED)
16132 {
16133 if (TARGET_VX)
16134 return true;
16135
16136 return default_builtin_support_vector_misalignment (mode, type, misalignment,
16137 is_packed);
16138 }
16139
16140 /* The vector ABI requires vector types to be aligned on an 8 byte
16141 boundary (our stack alignment). However, we allow this to be
16142 overriden by the user, while this definitely breaks the ABI. */
16143 static HOST_WIDE_INT
s390_vector_alignment(const_tree type)16144 s390_vector_alignment (const_tree type)
16145 {
16146 tree size = TYPE_SIZE (type);
16147
16148 if (!TARGET_VX_ABI)
16149 return default_vector_alignment (type);
16150
16151 if (TYPE_USER_ALIGN (type))
16152 return TYPE_ALIGN (type);
16153
16154 if (tree_fits_uhwi_p (size)
16155 && tree_to_uhwi (size) < BIGGEST_ALIGNMENT)
16156 return tree_to_uhwi (size);
16157
16158 return BIGGEST_ALIGNMENT;
16159 }
16160
16161 /* Implement TARGET_CONSTANT_ALIGNMENT. Alignment on even addresses for
16162 LARL instruction. */
16163
16164 static HOST_WIDE_INT
s390_constant_alignment(const_tree,HOST_WIDE_INT align)16165 s390_constant_alignment (const_tree, HOST_WIDE_INT align)
16166 {
16167 return MAX (align, 16);
16168 }
16169
16170 #ifdef HAVE_AS_MACHINE_MACHINEMODE
16171 /* Implement TARGET_ASM_FILE_START. */
16172 static void
s390_asm_file_start(void)16173 s390_asm_file_start (void)
16174 {
16175 default_file_start ();
16176 s390_asm_output_machine_for_arch (asm_out_file);
16177 }
16178 #endif
16179
16180 /* Implement TARGET_ASM_FILE_END. */
16181 static void
s390_asm_file_end(void)16182 s390_asm_file_end (void)
16183 {
16184 #ifdef HAVE_AS_GNU_ATTRIBUTE
16185 varpool_node *vnode;
16186 cgraph_node *cnode;
16187
16188 FOR_EACH_VARIABLE (vnode)
16189 if (TREE_PUBLIC (vnode->decl))
16190 s390_check_type_for_vector_abi (TREE_TYPE (vnode->decl), false, false);
16191
16192 FOR_EACH_FUNCTION (cnode)
16193 if (TREE_PUBLIC (cnode->decl))
16194 s390_check_type_for_vector_abi (TREE_TYPE (cnode->decl), false, false);
16195
16196
16197 if (s390_vector_abi != 0)
16198 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
16199 s390_vector_abi);
16200 #endif
16201 file_end_indicate_exec_stack ();
16202
16203 if (flag_split_stack)
16204 file_end_indicate_split_stack ();
16205 }
16206
16207 /* Return true if TYPE is a vector bool type. */
16208 static inline bool
s390_vector_bool_type_p(const_tree type)16209 s390_vector_bool_type_p (const_tree type)
16210 {
16211 return TYPE_VECTOR_OPAQUE (type);
16212 }
16213
16214 /* Return the diagnostic message string if the binary operation OP is
16215 not permitted on TYPE1 and TYPE2, NULL otherwise. */
16216 static const char*
s390_invalid_binary_op(int op ATTRIBUTE_UNUSED,const_tree type1,const_tree type2)16217 s390_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
16218 {
16219 bool bool1_p, bool2_p;
16220 bool plusminus_p;
16221 bool muldiv_p;
16222 bool compare_p;
16223 machine_mode mode1, mode2;
16224
16225 if (!TARGET_ZVECTOR)
16226 return NULL;
16227
16228 if (!VECTOR_TYPE_P (type1) || !VECTOR_TYPE_P (type2))
16229 return NULL;
16230
16231 bool1_p = s390_vector_bool_type_p (type1);
16232 bool2_p = s390_vector_bool_type_p (type2);
16233
16234 /* Mixing signed and unsigned types is forbidden for all
16235 operators. */
16236 if (!bool1_p && !bool2_p
16237 && TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
16238 return N_("types differ in signedness");
16239
16240 plusminus_p = (op == PLUS_EXPR || op == MINUS_EXPR);
16241 muldiv_p = (op == MULT_EXPR || op == RDIV_EXPR || op == TRUNC_DIV_EXPR
16242 || op == CEIL_DIV_EXPR || op == FLOOR_DIV_EXPR
16243 || op == ROUND_DIV_EXPR);
16244 compare_p = (op == LT_EXPR || op == LE_EXPR || op == GT_EXPR || op == GE_EXPR
16245 || op == EQ_EXPR || op == NE_EXPR);
16246
16247 if (bool1_p && bool2_p && (plusminus_p || muldiv_p))
16248 return N_("binary operator does not support two vector bool operands");
16249
16250 if (bool1_p != bool2_p && (muldiv_p || compare_p))
16251 return N_("binary operator does not support vector bool operand");
16252
16253 mode1 = TYPE_MODE (type1);
16254 mode2 = TYPE_MODE (type2);
16255
16256 if (bool1_p != bool2_p && plusminus_p
16257 && (GET_MODE_CLASS (mode1) == MODE_VECTOR_FLOAT
16258 || GET_MODE_CLASS (mode2) == MODE_VECTOR_FLOAT))
16259 return N_("binary operator does not support mixing vector "
16260 "bool with floating point vector operands");
16261
16262 return NULL;
16263 }
16264
16265 /* Implement TARGET_C_EXCESS_PRECISION.
16266
16267 FIXME: For historical reasons, float_t and double_t are typedef'ed to
16268 double on s390, causing operations on float_t to operate in a higher
16269 precision than is necessary. However, it is not the case that SFmode
16270 operations have implicit excess precision, and we generate more optimal
16271 code if we let the compiler know no implicit extra precision is added.
16272
16273 That means when we are compiling with -fexcess-precision=fast, the value
16274 we set for FLT_EVAL_METHOD will be out of line with the actual precision of
16275 float_t (though they would be correct for -fexcess-precision=standard).
16276
16277 A complete fix would modify glibc to remove the unnecessary typedef
16278 of float_t to double. */
16279
16280 static enum flt_eval_method
s390_excess_precision(enum excess_precision_type type)16281 s390_excess_precision (enum excess_precision_type type)
16282 {
16283 switch (type)
16284 {
16285 case EXCESS_PRECISION_TYPE_IMPLICIT:
16286 case EXCESS_PRECISION_TYPE_FAST:
16287 /* The fastest type to promote to will always be the native type,
16288 whether that occurs with implicit excess precision or
16289 otherwise. */
16290 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
16291 case EXCESS_PRECISION_TYPE_STANDARD:
16292 /* Otherwise, when we are in a standards compliant mode, to
16293 ensure consistency with the implementation in glibc, report that
16294 float is evaluated to the range and precision of double. */
16295 return FLT_EVAL_METHOD_PROMOTE_TO_DOUBLE;
16296 default:
16297 gcc_unreachable ();
16298 }
16299 return FLT_EVAL_METHOD_UNPREDICTABLE;
16300 }
16301
16302 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
16303
16304 static unsigned HOST_WIDE_INT
s390_asan_shadow_offset(void)16305 s390_asan_shadow_offset (void)
16306 {
16307 return TARGET_64BIT ? HOST_WIDE_INT_1U << 52 : HOST_WIDE_INT_UC (0x20000000);
16308 }
16309
16310 #ifdef HAVE_GAS_HIDDEN
16311 # define USE_HIDDEN_LINKONCE 1
16312 #else
16313 # define USE_HIDDEN_LINKONCE 0
16314 #endif
16315
16316 /* Output an indirect branch trampoline for target register REGNO. */
16317
16318 static void
s390_output_indirect_thunk_function(unsigned int regno,bool z10_p)16319 s390_output_indirect_thunk_function (unsigned int regno, bool z10_p)
16320 {
16321 tree decl;
16322 char thunk_label[32];
16323 int i;
16324
16325 if (z10_p)
16326 sprintf (thunk_label, TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL, regno);
16327 else
16328 sprintf (thunk_label, TARGET_INDIRECT_BRANCH_THUNK_NAME_EX,
16329 INDIRECT_BRANCH_THUNK_REGNUM, regno);
16330
16331 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
16332 get_identifier (thunk_label),
16333 build_function_type_list (void_type_node, NULL_TREE));
16334 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
16335 NULL_TREE, void_type_node);
16336 TREE_PUBLIC (decl) = 1;
16337 TREE_STATIC (decl) = 1;
16338 DECL_IGNORED_P (decl) = 1;
16339
16340 if (USE_HIDDEN_LINKONCE)
16341 {
16342 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
16343
16344 targetm.asm_out.unique_section (decl, 0);
16345 switch_to_section (get_named_section (decl, NULL, 0));
16346
16347 targetm.asm_out.globalize_label (asm_out_file, thunk_label);
16348 fputs ("\t.hidden\t", asm_out_file);
16349 assemble_name (asm_out_file, thunk_label);
16350 putc ('\n', asm_out_file);
16351 ASM_DECLARE_FUNCTION_NAME (asm_out_file, thunk_label, decl);
16352 }
16353 else
16354 {
16355 switch_to_section (text_section);
16356 ASM_OUTPUT_LABEL (asm_out_file, thunk_label);
16357 }
16358
16359 DECL_INITIAL (decl) = make_node (BLOCK);
16360 current_function_decl = decl;
16361 allocate_struct_function (decl, false);
16362 init_function_start (decl);
16363 cfun->is_thunk = true;
16364 first_function_block_is_cold = false;
16365 final_start_function (emit_barrier (), asm_out_file, 1);
16366
16367 /* This makes CFI at least usable for indirect jumps.
16368
16369 Stopping in the thunk: backtrace will point to the thunk target
16370 is if it was interrupted by a signal. For a call this means that
16371 the call chain will be: caller->callee->thunk */
16372 if (flag_asynchronous_unwind_tables && flag_dwarf2_cfi_asm)
16373 {
16374 fputs ("\t.cfi_signal_frame\n", asm_out_file);
16375 fprintf (asm_out_file, "\t.cfi_return_column %d\n", regno);
16376 for (i = 0; i < FPR15_REGNUM; i++)
16377 fprintf (asm_out_file, "\t.cfi_same_value %s\n", reg_names[i]);
16378 }
16379
16380 if (z10_p)
16381 {
16382 /* exrl 0,1f */
16383
16384 /* We generate a thunk for z10 compiled code although z10 is
16385 currently not enabled. Tell the assembler to accept the
16386 instruction. */
16387 if (!TARGET_CPU_Z10)
16388 {
16389 fputs ("\t.machine push\n", asm_out_file);
16390 fputs ("\t.machine z10\n", asm_out_file);
16391 }
16392 /* We use exrl even if -mzarch hasn't been specified on the
16393 command line so we have to tell the assembler to accept
16394 it. */
16395 if (!TARGET_ZARCH)
16396 fputs ("\t.machinemode zarch\n", asm_out_file);
16397
16398 fputs ("\texrl\t0,1f\n", asm_out_file);
16399
16400 if (!TARGET_ZARCH)
16401 fputs ("\t.machinemode esa\n", asm_out_file);
16402
16403 if (!TARGET_CPU_Z10)
16404 fputs ("\t.machine pop\n", asm_out_file);
16405 }
16406 else
16407 {
16408 /* larl %r1,1f */
16409 fprintf (asm_out_file, "\tlarl\t%%r%d,1f\n",
16410 INDIRECT_BRANCH_THUNK_REGNUM);
16411
16412 /* ex 0,0(%r1) */
16413 fprintf (asm_out_file, "\tex\t0,0(%%r%d)\n",
16414 INDIRECT_BRANCH_THUNK_REGNUM);
16415 }
16416
16417 /* 0: j 0b */
16418 fputs ("0:\tj\t0b\n", asm_out_file);
16419
16420 /* 1: br <regno> */
16421 fprintf (asm_out_file, "1:\tbr\t%%r%d\n", regno);
16422
16423 final_end_function ();
16424 init_insn_lengths ();
16425 free_after_compilation (cfun);
16426 set_cfun (NULL);
16427 current_function_decl = NULL;
16428 }
16429
16430 /* Implement the asm.code_end target hook. */
16431
16432 static void
s390_code_end(void)16433 s390_code_end (void)
16434 {
16435 int i;
16436
16437 for (i = 1; i < 16; i++)
16438 {
16439 if (indirect_branch_z10thunk_mask & (1 << i))
16440 s390_output_indirect_thunk_function (i, true);
16441
16442 if (indirect_branch_prez10thunk_mask & (1 << i))
16443 s390_output_indirect_thunk_function (i, false);
16444 }
16445
16446 if (TARGET_INDIRECT_BRANCH_TABLE)
16447 {
16448 int o;
16449 int i;
16450
16451 for (o = 0; o < INDIRECT_BRANCH_NUM_OPTIONS; o++)
16452 {
16453 if (indirect_branch_table_label_no[o] == 0)
16454 continue;
16455
16456 switch_to_section (get_section (indirect_branch_table_name[o],
16457 0,
16458 NULL_TREE));
16459 for (i = 0; i < indirect_branch_table_label_no[o]; i++)
16460 {
16461 char label_start[32];
16462
16463 ASM_GENERATE_INTERNAL_LABEL (label_start,
16464 indirect_branch_table_label[o], i);
16465
16466 fputs ("\t.long\t", asm_out_file);
16467 assemble_name_raw (asm_out_file, label_start);
16468 fputs ("-.\n", asm_out_file);
16469 }
16470 switch_to_section (current_function_section ());
16471 }
16472 }
16473 }
16474
16475 /* Implement the TARGET_CASE_VALUES_THRESHOLD target hook. */
16476
16477 unsigned int
s390_case_values_threshold(void)16478 s390_case_values_threshold (void)
16479 {
16480 /* Disabling branch prediction for indirect jumps makes jump tables
16481 much more expensive. */
16482 if (TARGET_INDIRECT_BRANCH_NOBP_JUMP)
16483 return 20;
16484
16485 return default_case_values_threshold ();
16486 }
16487
16488 /* Evaluate the insns between HEAD and TAIL and do back-end to install
16489 back-end specific dependencies.
16490
16491 Establish an ANTI dependency between r11 and r15 restores from FPRs
16492 to prevent the instructions scheduler from reordering them since
16493 this would break CFI. No further handling in the sched_reorder
16494 hook is required since the r11 and r15 restore will never appear in
16495 the same ready list with that change. */
16496 void
s390_sched_dependencies_evaluation(rtx_insn * head,rtx_insn * tail)16497 s390_sched_dependencies_evaluation (rtx_insn *head, rtx_insn *tail)
16498 {
16499 if (!frame_pointer_needed || !epilogue_completed)
16500 return;
16501
16502 while (head != tail && DEBUG_INSN_P (head))
16503 head = NEXT_INSN (head);
16504
16505 rtx_insn *r15_restore = NULL, *r11_restore = NULL;
16506
16507 for (rtx_insn *insn = tail; insn != head; insn = PREV_INSN (insn))
16508 {
16509 rtx set = single_set (insn);
16510 if (!INSN_P (insn)
16511 || !RTX_FRAME_RELATED_P (insn)
16512 || set == NULL_RTX
16513 || !REG_P (SET_DEST (set))
16514 || !FP_REG_P (SET_SRC (set)))
16515 continue;
16516
16517 if (REGNO (SET_DEST (set)) == HARD_FRAME_POINTER_REGNUM)
16518 r11_restore = insn;
16519
16520 if (REGNO (SET_DEST (set)) == STACK_POINTER_REGNUM)
16521 r15_restore = insn;
16522 }
16523
16524 if (r11_restore == NULL || r15_restore == NULL)
16525 return;
16526 add_dependence (r11_restore, r15_restore, REG_DEP_ANTI);
16527 }
16528
16529 /* Implement TARGET_SHIFT_TRUNCATION_MASK for integer shifts. */
16530
16531 static unsigned HOST_WIDE_INT
s390_shift_truncation_mask(machine_mode mode)16532 s390_shift_truncation_mask (machine_mode mode)
16533 {
16534 return mode == DImode || mode == SImode ? 63 : 0;
16535 }
16536
16537 /* Initialize GCC target structure. */
16538
16539 #undef TARGET_ASM_ALIGNED_HI_OP
16540 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
16541 #undef TARGET_ASM_ALIGNED_DI_OP
16542 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
16543 #undef TARGET_ASM_INTEGER
16544 #define TARGET_ASM_INTEGER s390_assemble_integer
16545
16546 #undef TARGET_ASM_OPEN_PAREN
16547 #define TARGET_ASM_OPEN_PAREN ""
16548
16549 #undef TARGET_ASM_CLOSE_PAREN
16550 #define TARGET_ASM_CLOSE_PAREN ""
16551
16552 #undef TARGET_OPTION_OVERRIDE
16553 #define TARGET_OPTION_OVERRIDE s390_option_override
16554
16555 #ifdef TARGET_THREAD_SSP_OFFSET
16556 #undef TARGET_STACK_PROTECT_GUARD
16557 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
16558 #endif
16559
16560 #undef TARGET_ENCODE_SECTION_INFO
16561 #define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
16562
16563 #undef TARGET_SCALAR_MODE_SUPPORTED_P
16564 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
16565
16566 #ifdef HAVE_AS_TLS
16567 #undef TARGET_HAVE_TLS
16568 #define TARGET_HAVE_TLS true
16569 #endif
16570 #undef TARGET_CANNOT_FORCE_CONST_MEM
16571 #define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
16572
16573 #undef TARGET_DELEGITIMIZE_ADDRESS
16574 #define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
16575
16576 #undef TARGET_LEGITIMIZE_ADDRESS
16577 #define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
16578
16579 #undef TARGET_RETURN_IN_MEMORY
16580 #define TARGET_RETURN_IN_MEMORY s390_return_in_memory
16581
16582 #undef TARGET_INIT_BUILTINS
16583 #define TARGET_INIT_BUILTINS s390_init_builtins
16584 #undef TARGET_EXPAND_BUILTIN
16585 #define TARGET_EXPAND_BUILTIN s390_expand_builtin
16586 #undef TARGET_BUILTIN_DECL
16587 #define TARGET_BUILTIN_DECL s390_builtin_decl
16588
16589 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
16590 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
16591
16592 #undef TARGET_ASM_OUTPUT_MI_THUNK
16593 #define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
16594 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
16595 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
16596
16597 #undef TARGET_C_EXCESS_PRECISION
16598 #define TARGET_C_EXCESS_PRECISION s390_excess_precision
16599
16600 #undef TARGET_SCHED_ADJUST_PRIORITY
16601 #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
16602 #undef TARGET_SCHED_ISSUE_RATE
16603 #define TARGET_SCHED_ISSUE_RATE s390_issue_rate
16604 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
16605 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
16606
16607 #undef TARGET_SCHED_VARIABLE_ISSUE
16608 #define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
16609 #undef TARGET_SCHED_REORDER
16610 #define TARGET_SCHED_REORDER s390_sched_reorder
16611 #undef TARGET_SCHED_INIT
16612 #define TARGET_SCHED_INIT s390_sched_init
16613
16614 #undef TARGET_CANNOT_COPY_INSN_P
16615 #define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
16616 #undef TARGET_RTX_COSTS
16617 #define TARGET_RTX_COSTS s390_rtx_costs
16618 #undef TARGET_ADDRESS_COST
16619 #define TARGET_ADDRESS_COST s390_address_cost
16620 #undef TARGET_REGISTER_MOVE_COST
16621 #define TARGET_REGISTER_MOVE_COST s390_register_move_cost
16622 #undef TARGET_MEMORY_MOVE_COST
16623 #define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
16624 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
16625 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
16626 s390_builtin_vectorization_cost
16627
16628 #undef TARGET_MACHINE_DEPENDENT_REORG
16629 #define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
16630
16631 #undef TARGET_VALID_POINTER_MODE
16632 #define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
16633
16634 #undef TARGET_BUILD_BUILTIN_VA_LIST
16635 #define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
16636 #undef TARGET_EXPAND_BUILTIN_VA_START
16637 #define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
16638 #undef TARGET_ASAN_SHADOW_OFFSET
16639 #define TARGET_ASAN_SHADOW_OFFSET s390_asan_shadow_offset
16640 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
16641 #define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
16642
16643 #undef TARGET_PROMOTE_FUNCTION_MODE
16644 #define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
16645 #undef TARGET_PASS_BY_REFERENCE
16646 #define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
16647
16648 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
16649 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE s390_override_options_after_change
16650
16651 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
16652 #define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
16653 #undef TARGET_FUNCTION_ARG
16654 #define TARGET_FUNCTION_ARG s390_function_arg
16655 #undef TARGET_FUNCTION_ARG_ADVANCE
16656 #define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
16657 #undef TARGET_FUNCTION_ARG_PADDING
16658 #define TARGET_FUNCTION_ARG_PADDING s390_function_arg_padding
16659 #undef TARGET_FUNCTION_VALUE
16660 #define TARGET_FUNCTION_VALUE s390_function_value
16661 #undef TARGET_LIBCALL_VALUE
16662 #define TARGET_LIBCALL_VALUE s390_libcall_value
16663 #undef TARGET_STRICT_ARGUMENT_NAMING
16664 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
16665
16666 #undef TARGET_KEEP_LEAF_WHEN_PROFILED
16667 #define TARGET_KEEP_LEAF_WHEN_PROFILED s390_keep_leaf_when_profiled
16668
16669 #undef TARGET_FIXED_CONDITION_CODE_REGS
16670 #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
16671
16672 #undef TARGET_CC_MODES_COMPATIBLE
16673 #define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
16674
16675 #undef TARGET_INVALID_WITHIN_DOLOOP
16676 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
16677
16678 #ifdef HAVE_AS_TLS
16679 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
16680 #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
16681 #endif
16682
16683 #undef TARGET_DWARF_FRAME_REG_MODE
16684 #define TARGET_DWARF_FRAME_REG_MODE s390_dwarf_frame_reg_mode
16685
16686 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
16687 #undef TARGET_MANGLE_TYPE
16688 #define TARGET_MANGLE_TYPE s390_mangle_type
16689 #endif
16690
16691 #undef TARGET_SCALAR_MODE_SUPPORTED_P
16692 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
16693
16694 #undef TARGET_VECTOR_MODE_SUPPORTED_P
16695 #define TARGET_VECTOR_MODE_SUPPORTED_P s390_vector_mode_supported_p
16696
16697 #undef TARGET_PREFERRED_RELOAD_CLASS
16698 #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
16699
16700 #undef TARGET_SECONDARY_RELOAD
16701 #define TARGET_SECONDARY_RELOAD s390_secondary_reload
16702 #undef TARGET_SECONDARY_MEMORY_NEEDED
16703 #define TARGET_SECONDARY_MEMORY_NEEDED s390_secondary_memory_needed
16704 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
16705 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE s390_secondary_memory_needed_mode
16706
16707 #undef TARGET_LIBGCC_CMP_RETURN_MODE
16708 #define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
16709
16710 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
16711 #define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
16712
16713 #undef TARGET_LEGITIMATE_ADDRESS_P
16714 #define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
16715
16716 #undef TARGET_LEGITIMATE_CONSTANT_P
16717 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
16718
16719 #undef TARGET_LRA_P
16720 #define TARGET_LRA_P s390_lra_p
16721
16722 #undef TARGET_CAN_ELIMINATE
16723 #define TARGET_CAN_ELIMINATE s390_can_eliminate
16724
16725 #undef TARGET_CONDITIONAL_REGISTER_USAGE
16726 #define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
16727
16728 #undef TARGET_LOOP_UNROLL_ADJUST
16729 #define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
16730
16731 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
16732 #define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
16733 #undef TARGET_TRAMPOLINE_INIT
16734 #define TARGET_TRAMPOLINE_INIT s390_trampoline_init
16735
16736 /* PR 79421 */
16737 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
16738 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
16739
16740 #undef TARGET_UNWIND_WORD_MODE
16741 #define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
16742
16743 #undef TARGET_CANONICALIZE_COMPARISON
16744 #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
16745
16746 #undef TARGET_HARD_REGNO_SCRATCH_OK
16747 #define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok
16748
16749 #undef TARGET_HARD_REGNO_NREGS
16750 #define TARGET_HARD_REGNO_NREGS s390_hard_regno_nregs
16751 #undef TARGET_HARD_REGNO_MODE_OK
16752 #define TARGET_HARD_REGNO_MODE_OK s390_hard_regno_mode_ok
16753 #undef TARGET_MODES_TIEABLE_P
16754 #define TARGET_MODES_TIEABLE_P s390_modes_tieable_p
16755
16756 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
16757 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
16758 s390_hard_regno_call_part_clobbered
16759
16760 #undef TARGET_ATTRIBUTE_TABLE
16761 #define TARGET_ATTRIBUTE_TABLE s390_attribute_table
16762
16763 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
16764 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
16765
16766 #undef TARGET_SET_UP_BY_PROLOGUE
16767 #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
16768
16769 #undef TARGET_EXTRA_LIVE_ON_ENTRY
16770 #define TARGET_EXTRA_LIVE_ON_ENTRY s390_live_on_entry
16771
16772 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
16773 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
16774 s390_use_by_pieces_infrastructure_p
16775
16776 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
16777 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV s390_atomic_assign_expand_fenv
16778
16779 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
16780 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN s390_invalid_arg_for_unprototyped_fn
16781
16782 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
16783 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE s390_preferred_simd_mode
16784
16785 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
16786 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT s390_support_vector_misalignment
16787
16788 #undef TARGET_VECTOR_ALIGNMENT
16789 #define TARGET_VECTOR_ALIGNMENT s390_vector_alignment
16790
16791 #undef TARGET_INVALID_BINARY_OP
16792 #define TARGET_INVALID_BINARY_OP s390_invalid_binary_op
16793
16794 #ifdef HAVE_AS_MACHINE_MACHINEMODE
16795 #undef TARGET_ASM_FILE_START
16796 #define TARGET_ASM_FILE_START s390_asm_file_start
16797 #endif
16798
16799 #undef TARGET_ASM_FILE_END
16800 #define TARGET_ASM_FILE_END s390_asm_file_end
16801
16802 #undef TARGET_SET_CURRENT_FUNCTION
16803 #define TARGET_SET_CURRENT_FUNCTION s390_set_current_function
16804
16805 #if S390_USE_TARGET_ATTRIBUTE
16806 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
16807 #define TARGET_OPTION_VALID_ATTRIBUTE_P s390_valid_target_attribute_p
16808
16809 #undef TARGET_CAN_INLINE_P
16810 #define TARGET_CAN_INLINE_P s390_can_inline_p
16811 #endif
16812
16813 #undef TARGET_OPTION_RESTORE
16814 #define TARGET_OPTION_RESTORE s390_function_specific_restore
16815
16816 #undef TARGET_CAN_CHANGE_MODE_CLASS
16817 #define TARGET_CAN_CHANGE_MODE_CLASS s390_can_change_mode_class
16818
16819 #undef TARGET_CONSTANT_ALIGNMENT
16820 #define TARGET_CONSTANT_ALIGNMENT s390_constant_alignment
16821
16822 #undef TARGET_ASM_CODE_END
16823 #define TARGET_ASM_CODE_END s390_code_end
16824
16825 #undef TARGET_CASE_VALUES_THRESHOLD
16826 #define TARGET_CASE_VALUES_THRESHOLD s390_case_values_threshold
16827
16828 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
16829 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
16830 s390_sched_dependencies_evaluation
16831
16832 #undef TARGET_SHIFT_TRUNCATION_MASK
16833 #define TARGET_SHIFT_TRUNCATION_MASK s390_shift_truncation_mask
16834
16835 /* Use only short displacement, since long displacement is not available for
16836 the floating point instructions. */
16837 #undef TARGET_MAX_ANCHOR_OFFSET
16838 #define TARGET_MAX_ANCHOR_OFFSET 0xfff
16839
16840 struct gcc_target targetm = TARGET_INITIALIZER;
16841
16842 #include "gt-s390.h"
16843