1 /* Subroutines used for code generation on IBM S/390 and zSeries
2 Copyright (C) 1999-2019 Free Software Foundation, Inc.
3 Contributed by Hartmut Penner (hpenner@de.ibm.com) and
4 Ulrich Weigand (uweigand@de.ibm.com) and
5 Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #define IN_TARGET_CODE 1
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "backend.h"
29 #include "target.h"
30 #include "target-globals.h"
31 #include "rtl.h"
32 #include "tree.h"
33 #include "gimple.h"
34 #include "cfghooks.h"
35 #include "cfgloop.h"
36 #include "df.h"
37 #include "memmodel.h"
38 #include "tm_p.h"
39 #include "stringpool.h"
40 #include "attribs.h"
41 #include "expmed.h"
42 #include "optabs.h"
43 #include "regs.h"
44 #include "emit-rtl.h"
45 #include "recog.h"
46 #include "cgraph.h"
47 #include "diagnostic-core.h"
48 #include "diagnostic.h"
49 #include "alias.h"
50 #include "fold-const.h"
51 #include "print-tree.h"
52 #include "stor-layout.h"
53 #include "varasm.h"
54 #include "calls.h"
55 #include "conditions.h"
56 #include "output.h"
57 #include "insn-attr.h"
58 #include "flags.h"
59 #include "except.h"
60 #include "dojump.h"
61 #include "explow.h"
62 #include "stmt.h"
63 #include "expr.h"
64 #include "reload.h"
65 #include "cfgrtl.h"
66 #include "cfganal.h"
67 #include "lcm.h"
68 #include "cfgbuild.h"
69 #include "cfgcleanup.h"
70 #include "debug.h"
71 #include "langhooks.h"
72 #include "internal-fn.h"
73 #include "gimple-fold.h"
74 #include "tree-eh.h"
75 #include "gimplify.h"
76 #include "params.h"
77 #include "opts.h"
78 #include "tree-pass.h"
79 #include "context.h"
80 #include "builtins.h"
81 #include "rtl-iter.h"
82 #include "intl.h"
83 #include "tm-constrs.h"
84 #include "tree-vrp.h"
85 #include "symbol-summary.h"
86 #include "ipa-prop.h"
87 #include "ipa-fnsummary.h"
88 #include "sched-int.h"
89
90 /* This file should be included last. */
91 #include "target-def.h"
92
93 static bool s390_hard_regno_mode_ok (unsigned int, machine_mode);
94
95 /* Remember the last target of s390_set_current_function. */
96 static GTY(()) tree s390_previous_fndecl;
97
98 /* Define the specific costs for a given cpu. */
99
100 struct processor_costs
101 {
102 /* multiplication */
103 const int m; /* cost of an M instruction. */
104 const int mghi; /* cost of an MGHI instruction. */
105 const int mh; /* cost of an MH instruction. */
106 const int mhi; /* cost of an MHI instruction. */
107 const int ml; /* cost of an ML instruction. */
108 const int mr; /* cost of an MR instruction. */
109 const int ms; /* cost of an MS instruction. */
110 const int msg; /* cost of an MSG instruction. */
111 const int msgf; /* cost of an MSGF instruction. */
112 const int msgfr; /* cost of an MSGFR instruction. */
113 const int msgr; /* cost of an MSGR instruction. */
114 const int msr; /* cost of an MSR instruction. */
115 const int mult_df; /* cost of multiplication in DFmode. */
116 const int mxbr;
117 /* square root */
118 const int sqxbr; /* cost of square root in TFmode. */
119 const int sqdbr; /* cost of square root in DFmode. */
120 const int sqebr; /* cost of square root in SFmode. */
121 /* multiply and add */
122 const int madbr; /* cost of multiply and add in DFmode. */
123 const int maebr; /* cost of multiply and add in SFmode. */
124 /* division */
125 const int dxbr;
126 const int ddbr;
127 const int debr;
128 const int dlgr;
129 const int dlr;
130 const int dr;
131 const int dsgfr;
132 const int dsgr;
133 };
134
135 #define s390_cost ((const struct processor_costs *)(s390_cost_pointer))
136
137 static const
138 struct processor_costs z900_cost =
139 {
140 COSTS_N_INSNS (5), /* M */
141 COSTS_N_INSNS (10), /* MGHI */
142 COSTS_N_INSNS (5), /* MH */
143 COSTS_N_INSNS (4), /* MHI */
144 COSTS_N_INSNS (5), /* ML */
145 COSTS_N_INSNS (5), /* MR */
146 COSTS_N_INSNS (4), /* MS */
147 COSTS_N_INSNS (15), /* MSG */
148 COSTS_N_INSNS (7), /* MSGF */
149 COSTS_N_INSNS (7), /* MSGFR */
150 COSTS_N_INSNS (10), /* MSGR */
151 COSTS_N_INSNS (4), /* MSR */
152 COSTS_N_INSNS (7), /* multiplication in DFmode */
153 COSTS_N_INSNS (13), /* MXBR */
154 COSTS_N_INSNS (136), /* SQXBR */
155 COSTS_N_INSNS (44), /* SQDBR */
156 COSTS_N_INSNS (35), /* SQEBR */
157 COSTS_N_INSNS (18), /* MADBR */
158 COSTS_N_INSNS (13), /* MAEBR */
159 COSTS_N_INSNS (134), /* DXBR */
160 COSTS_N_INSNS (30), /* DDBR */
161 COSTS_N_INSNS (27), /* DEBR */
162 COSTS_N_INSNS (220), /* DLGR */
163 COSTS_N_INSNS (34), /* DLR */
164 COSTS_N_INSNS (34), /* DR */
165 COSTS_N_INSNS (32), /* DSGFR */
166 COSTS_N_INSNS (32), /* DSGR */
167 };
168
169 static const
170 struct processor_costs z990_cost =
171 {
172 COSTS_N_INSNS (4), /* M */
173 COSTS_N_INSNS (2), /* MGHI */
174 COSTS_N_INSNS (2), /* MH */
175 COSTS_N_INSNS (2), /* MHI */
176 COSTS_N_INSNS (4), /* ML */
177 COSTS_N_INSNS (4), /* MR */
178 COSTS_N_INSNS (5), /* MS */
179 COSTS_N_INSNS (6), /* MSG */
180 COSTS_N_INSNS (4), /* MSGF */
181 COSTS_N_INSNS (4), /* MSGFR */
182 COSTS_N_INSNS (4), /* MSGR */
183 COSTS_N_INSNS (4), /* MSR */
184 COSTS_N_INSNS (1), /* multiplication in DFmode */
185 COSTS_N_INSNS (28), /* MXBR */
186 COSTS_N_INSNS (130), /* SQXBR */
187 COSTS_N_INSNS (66), /* SQDBR */
188 COSTS_N_INSNS (38), /* SQEBR */
189 COSTS_N_INSNS (1), /* MADBR */
190 COSTS_N_INSNS (1), /* MAEBR */
191 COSTS_N_INSNS (60), /* DXBR */
192 COSTS_N_INSNS (40), /* DDBR */
193 COSTS_N_INSNS (26), /* DEBR */
194 COSTS_N_INSNS (176), /* DLGR */
195 COSTS_N_INSNS (31), /* DLR */
196 COSTS_N_INSNS (31), /* DR */
197 COSTS_N_INSNS (31), /* DSGFR */
198 COSTS_N_INSNS (31), /* DSGR */
199 };
200
201 static const
202 struct processor_costs z9_109_cost =
203 {
204 COSTS_N_INSNS (4), /* M */
205 COSTS_N_INSNS (2), /* MGHI */
206 COSTS_N_INSNS (2), /* MH */
207 COSTS_N_INSNS (2), /* MHI */
208 COSTS_N_INSNS (4), /* ML */
209 COSTS_N_INSNS (4), /* MR */
210 COSTS_N_INSNS (5), /* MS */
211 COSTS_N_INSNS (6), /* MSG */
212 COSTS_N_INSNS (4), /* MSGF */
213 COSTS_N_INSNS (4), /* MSGFR */
214 COSTS_N_INSNS (4), /* MSGR */
215 COSTS_N_INSNS (4), /* MSR */
216 COSTS_N_INSNS (1), /* multiplication in DFmode */
217 COSTS_N_INSNS (28), /* MXBR */
218 COSTS_N_INSNS (130), /* SQXBR */
219 COSTS_N_INSNS (66), /* SQDBR */
220 COSTS_N_INSNS (38), /* SQEBR */
221 COSTS_N_INSNS (1), /* MADBR */
222 COSTS_N_INSNS (1), /* MAEBR */
223 COSTS_N_INSNS (60), /* DXBR */
224 COSTS_N_INSNS (40), /* DDBR */
225 COSTS_N_INSNS (26), /* DEBR */
226 COSTS_N_INSNS (30), /* DLGR */
227 COSTS_N_INSNS (23), /* DLR */
228 COSTS_N_INSNS (23), /* DR */
229 COSTS_N_INSNS (24), /* DSGFR */
230 COSTS_N_INSNS (24), /* DSGR */
231 };
232
233 static const
234 struct processor_costs z10_cost =
235 {
236 COSTS_N_INSNS (10), /* M */
237 COSTS_N_INSNS (10), /* MGHI */
238 COSTS_N_INSNS (10), /* MH */
239 COSTS_N_INSNS (10), /* MHI */
240 COSTS_N_INSNS (10), /* ML */
241 COSTS_N_INSNS (10), /* MR */
242 COSTS_N_INSNS (10), /* MS */
243 COSTS_N_INSNS (10), /* MSG */
244 COSTS_N_INSNS (10), /* MSGF */
245 COSTS_N_INSNS (10), /* MSGFR */
246 COSTS_N_INSNS (10), /* MSGR */
247 COSTS_N_INSNS (10), /* MSR */
248 COSTS_N_INSNS (1) , /* multiplication in DFmode */
249 COSTS_N_INSNS (50), /* MXBR */
250 COSTS_N_INSNS (120), /* SQXBR */
251 COSTS_N_INSNS (52), /* SQDBR */
252 COSTS_N_INSNS (38), /* SQEBR */
253 COSTS_N_INSNS (1), /* MADBR */
254 COSTS_N_INSNS (1), /* MAEBR */
255 COSTS_N_INSNS (111), /* DXBR */
256 COSTS_N_INSNS (39), /* DDBR */
257 COSTS_N_INSNS (32), /* DEBR */
258 COSTS_N_INSNS (160), /* DLGR */
259 COSTS_N_INSNS (71), /* DLR */
260 COSTS_N_INSNS (71), /* DR */
261 COSTS_N_INSNS (71), /* DSGFR */
262 COSTS_N_INSNS (71), /* DSGR */
263 };
264
265 static const
266 struct processor_costs z196_cost =
267 {
268 COSTS_N_INSNS (7), /* M */
269 COSTS_N_INSNS (5), /* MGHI */
270 COSTS_N_INSNS (5), /* MH */
271 COSTS_N_INSNS (5), /* MHI */
272 COSTS_N_INSNS (7), /* ML */
273 COSTS_N_INSNS (7), /* MR */
274 COSTS_N_INSNS (6), /* MS */
275 COSTS_N_INSNS (8), /* MSG */
276 COSTS_N_INSNS (6), /* MSGF */
277 COSTS_N_INSNS (6), /* MSGFR */
278 COSTS_N_INSNS (8), /* MSGR */
279 COSTS_N_INSNS (6), /* MSR */
280 COSTS_N_INSNS (1) , /* multiplication in DFmode */
281 COSTS_N_INSNS (40), /* MXBR B+40 */
282 COSTS_N_INSNS (100), /* SQXBR B+100 */
283 COSTS_N_INSNS (42), /* SQDBR B+42 */
284 COSTS_N_INSNS (28), /* SQEBR B+28 */
285 COSTS_N_INSNS (1), /* MADBR B */
286 COSTS_N_INSNS (1), /* MAEBR B */
287 COSTS_N_INSNS (101), /* DXBR B+101 */
288 COSTS_N_INSNS (29), /* DDBR */
289 COSTS_N_INSNS (22), /* DEBR */
290 COSTS_N_INSNS (160), /* DLGR cracked */
291 COSTS_N_INSNS (160), /* DLR cracked */
292 COSTS_N_INSNS (160), /* DR expanded */
293 COSTS_N_INSNS (160), /* DSGFR cracked */
294 COSTS_N_INSNS (160), /* DSGR cracked */
295 };
296
297 static const
298 struct processor_costs zEC12_cost =
299 {
300 COSTS_N_INSNS (7), /* M */
301 COSTS_N_INSNS (5), /* MGHI */
302 COSTS_N_INSNS (5), /* MH */
303 COSTS_N_INSNS (5), /* MHI */
304 COSTS_N_INSNS (7), /* ML */
305 COSTS_N_INSNS (7), /* MR */
306 COSTS_N_INSNS (6), /* MS */
307 COSTS_N_INSNS (8), /* MSG */
308 COSTS_N_INSNS (6), /* MSGF */
309 COSTS_N_INSNS (6), /* MSGFR */
310 COSTS_N_INSNS (8), /* MSGR */
311 COSTS_N_INSNS (6), /* MSR */
312 COSTS_N_INSNS (1) , /* multiplication in DFmode */
313 COSTS_N_INSNS (40), /* MXBR B+40 */
314 COSTS_N_INSNS (100), /* SQXBR B+100 */
315 COSTS_N_INSNS (42), /* SQDBR B+42 */
316 COSTS_N_INSNS (28), /* SQEBR B+28 */
317 COSTS_N_INSNS (1), /* MADBR B */
318 COSTS_N_INSNS (1), /* MAEBR B */
319 COSTS_N_INSNS (131), /* DXBR B+131 */
320 COSTS_N_INSNS (29), /* DDBR */
321 COSTS_N_INSNS (22), /* DEBR */
322 COSTS_N_INSNS (160), /* DLGR cracked */
323 COSTS_N_INSNS (160), /* DLR cracked */
324 COSTS_N_INSNS (160), /* DR expanded */
325 COSTS_N_INSNS (160), /* DSGFR cracked */
326 COSTS_N_INSNS (160), /* DSGR cracked */
327 };
328
329 const struct s390_processor processor_table[] =
330 {
331 { "z900", "z900", PROCESSOR_2064_Z900, &z900_cost, 5 },
332 { "z990", "z990", PROCESSOR_2084_Z990, &z990_cost, 6 },
333 { "z9-109", "z9-109", PROCESSOR_2094_Z9_109, &z9_109_cost, 7 },
334 { "z9-ec", "z9-ec", PROCESSOR_2094_Z9_EC, &z9_109_cost, 7 },
335 { "z10", "z10", PROCESSOR_2097_Z10, &z10_cost, 8 },
336 { "z196", "z196", PROCESSOR_2817_Z196, &z196_cost, 9 },
337 { "zEC12", "zEC12", PROCESSOR_2827_ZEC12, &zEC12_cost, 10 },
338 { "z13", "z13", PROCESSOR_2964_Z13, &zEC12_cost, 11 },
339 { "z14", "arch12", PROCESSOR_3906_Z14, &zEC12_cost, 12 },
340 { "z15", "arch13", PROCESSOR_8561_Z15, &zEC12_cost, 13 },
341 { "native", "", PROCESSOR_NATIVE, NULL, 0 }
342 };
343
344 extern int reload_completed;
345
346 /* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */
347 static rtx_insn *last_scheduled_insn;
348 #define NUM_SIDES 2
349
350 #define MAX_SCHED_UNITS 4
351 static int last_scheduled_unit_distance[MAX_SCHED_UNITS][NUM_SIDES];
352
353 /* Estimate of number of cycles a long-running insn occupies an
354 execution unit. */
355 static int fxd_longrunning[NUM_SIDES];
356 static int fpd_longrunning[NUM_SIDES];
357
358 /* The maximum score added for an instruction whose unit hasn't been
359 in use for MAX_SCHED_MIX_DISTANCE steps. Increase this value to
360 give instruction mix scheduling more priority over instruction
361 grouping. */
362 #define MAX_SCHED_MIX_SCORE 2
363
364 /* The maximum distance up to which individual scores will be
365 calculated. Everything beyond this gives MAX_SCHED_MIX_SCORE.
366 Increase this with the OOO windows size of the machine. */
367 #define MAX_SCHED_MIX_DISTANCE 70
368
369 /* Structure used to hold the components of a S/390 memory
370 address. A legitimate address on S/390 is of the general
371 form
372 base + index + displacement
373 where any of the components is optional.
374
375 base and index are registers of the class ADDR_REGS,
376 displacement is an unsigned 12-bit immediate constant. */
377
378 /* The max number of insns of backend generated memset/memcpy/memcmp
379 loops. This value is used in the unroll adjust hook to detect such
380 loops. Current max is 9 coming from the memcmp loop. */
381 #define BLOCK_MEM_OPS_LOOP_INSNS 9
382
383 struct s390_address
384 {
385 rtx base;
386 rtx indx;
387 rtx disp;
388 bool pointer;
389 bool literal_pool;
390 };
391
392 /* Few accessor macros for struct cfun->machine->s390_frame_layout. */
393
394 #define cfun_frame_layout (cfun->machine->frame_layout)
395 #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
396 #define cfun_save_arg_fprs_p (!!(TARGET_64BIT \
397 ? cfun_frame_layout.fpr_bitmap & 0x0f \
398 : cfun_frame_layout.fpr_bitmap & 0x03))
399 #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
400 cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
401 #define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |= \
402 (1 << (REGNO - FPR0_REGNUM)))
403 #define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap & \
404 (1 << (REGNO - FPR0_REGNUM))))
405 #define cfun_gpr_save_slot(REGNO) \
406 cfun->machine->frame_layout.gpr_save_slots[REGNO]
407
408 /* Number of GPRs and FPRs used for argument passing. */
409 #define GP_ARG_NUM_REG 5
410 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
411 #define VEC_ARG_NUM_REG 8
412
413 /* A couple of shortcuts. */
414 #define CONST_OK_FOR_J(x) \
415 CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
416 #define CONST_OK_FOR_K(x) \
417 CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
418 #define CONST_OK_FOR_Os(x) \
419 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
420 #define CONST_OK_FOR_Op(x) \
421 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
422 #define CONST_OK_FOR_On(x) \
423 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
424
425 #define REGNO_PAIR_OK(REGNO, MODE) \
426 (s390_hard_regno_nregs ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
427
428 /* That's the read ahead of the dynamic branch prediction unit in
429 bytes on a z10 (or higher) CPU. */
430 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
431
432 /* Masks per jump target register indicating which thunk need to be
433 generated. */
434 static GTY(()) int indirect_branch_prez10thunk_mask = 0;
435 static GTY(()) int indirect_branch_z10thunk_mask = 0;
436
437 #define INDIRECT_BRANCH_NUM_OPTIONS 4
438
439 enum s390_indirect_branch_option
440 {
441 s390_opt_indirect_branch_jump = 0,
442 s390_opt_indirect_branch_call,
443 s390_opt_function_return_reg,
444 s390_opt_function_return_mem
445 };
446
447 static GTY(()) int indirect_branch_table_label_no[INDIRECT_BRANCH_NUM_OPTIONS] = { 0 };
448 const char *indirect_branch_table_label[INDIRECT_BRANCH_NUM_OPTIONS] = \
449 { "LJUMP", "LCALL", "LRETREG", "LRETMEM" };
450 const char *indirect_branch_table_name[INDIRECT_BRANCH_NUM_OPTIONS] = \
451 { ".s390_indirect_jump", ".s390_indirect_call",
452 ".s390_return_reg", ".s390_return_mem" };
453
454 bool
s390_return_addr_from_memory()455 s390_return_addr_from_memory ()
456 {
457 return cfun_gpr_save_slot(RETURN_REGNUM) == SAVE_SLOT_STACK;
458 }
459
460 /* Indicate which ABI has been used for passing vector args.
461 0 - no vector type arguments have been passed where the ABI is relevant
462 1 - the old ABI has been used
463 2 - a vector type argument has been passed either in a vector register
464 or on the stack by value */
465 static int s390_vector_abi = 0;
466
467 /* Set the vector ABI marker if TYPE is subject to the vector ABI
468 switch. The vector ABI affects only vector data types. There are
469 two aspects of the vector ABI relevant here:
470
471 1. vectors >= 16 bytes have an alignment of 8 bytes with the new
472 ABI and natural alignment with the old.
473
474 2. vector <= 16 bytes are passed in VRs or by value on the stack
475 with the new ABI but by reference on the stack with the old.
476
477 If ARG_P is true TYPE is used for a function argument or return
478 value. The ABI marker then is set for all vector data types. If
479 ARG_P is false only type 1 vectors are being checked. */
480
481 static void
s390_check_type_for_vector_abi(const_tree type,bool arg_p,bool in_struct_p)482 s390_check_type_for_vector_abi (const_tree type, bool arg_p, bool in_struct_p)
483 {
484 static hash_set<const_tree> visited_types_hash;
485
486 if (s390_vector_abi)
487 return;
488
489 if (type == NULL_TREE || TREE_CODE (type) == ERROR_MARK)
490 return;
491
492 if (visited_types_hash.contains (type))
493 return;
494
495 visited_types_hash.add (type);
496
497 if (VECTOR_TYPE_P (type))
498 {
499 int type_size = int_size_in_bytes (type);
500
501 /* Outside arguments only the alignment is changing and this
502 only happens for vector types >= 16 bytes. */
503 if (!arg_p && type_size < 16)
504 return;
505
506 /* In arguments vector types > 16 are passed as before (GCC
507 never enforced the bigger alignment for arguments which was
508 required by the old vector ABI). However, it might still be
509 ABI relevant due to the changed alignment if it is a struct
510 member. */
511 if (arg_p && type_size > 16 && !in_struct_p)
512 return;
513
514 s390_vector_abi = TARGET_VX_ABI ? 2 : 1;
515 }
516 else if (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE)
517 {
518 /* ARRAY_TYPE: Since with neither of the ABIs we have more than
519 natural alignment there will never be ABI dependent padding
520 in an array type. That's why we do not set in_struct_p to
521 true here. */
522 s390_check_type_for_vector_abi (TREE_TYPE (type), arg_p, in_struct_p);
523 }
524 else if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
525 {
526 tree arg_chain;
527
528 /* Check the return type. */
529 s390_check_type_for_vector_abi (TREE_TYPE (type), true, false);
530
531 for (arg_chain = TYPE_ARG_TYPES (type);
532 arg_chain;
533 arg_chain = TREE_CHAIN (arg_chain))
534 s390_check_type_for_vector_abi (TREE_VALUE (arg_chain), true, false);
535 }
536 else if (RECORD_OR_UNION_TYPE_P (type))
537 {
538 tree field;
539
540 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
541 {
542 if (TREE_CODE (field) != FIELD_DECL)
543 continue;
544
545 s390_check_type_for_vector_abi (TREE_TYPE (field), arg_p, true);
546 }
547 }
548 }
549
550
551 /* System z builtins. */
552
553 #include "s390-builtins.h"
554
555 const unsigned int bflags_builtin[S390_BUILTIN_MAX + 1] =
556 {
557 #undef B_DEF
558 #undef OB_DEF
559 #undef OB_DEF_VAR
560 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, ...) BFLAGS,
561 #define OB_DEF(...)
562 #define OB_DEF_VAR(...)
563 #include "s390-builtins.def"
564 0
565 };
566
567 const unsigned int opflags_builtin[S390_BUILTIN_MAX + 1] =
568 {
569 #undef B_DEF
570 #undef OB_DEF
571 #undef OB_DEF_VAR
572 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, ...) OPFLAGS,
573 #define OB_DEF(...)
574 #define OB_DEF_VAR(...)
575 #include "s390-builtins.def"
576 0
577 };
578
579 const unsigned int bflags_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1] =
580 {
581 #undef B_DEF
582 #undef OB_DEF
583 #undef OB_DEF_VAR
584 #define B_DEF(...)
585 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, ...) BFLAGS,
586 #define OB_DEF_VAR(...)
587 #include "s390-builtins.def"
588 0
589 };
590
591 const unsigned int
592 bflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
593 {
594 #undef B_DEF
595 #undef OB_DEF
596 #undef OB_DEF_VAR
597 #define B_DEF(...)
598 #define OB_DEF(...)
599 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) FLAGS,
600 #include "s390-builtins.def"
601 0
602 };
603
604 const unsigned int
605 opflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
606 {
607 #undef B_DEF
608 #undef OB_DEF
609 #undef OB_DEF_VAR
610 #define B_DEF(...)
611 #define OB_DEF(...)
612 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) OPFLAGS,
613 #include "s390-builtins.def"
614 0
615 };
616
617 tree s390_builtin_types[BT_MAX];
618 tree s390_builtin_fn_types[BT_FN_MAX];
619 tree s390_builtin_decls[S390_BUILTIN_MAX +
620 S390_OVERLOADED_BUILTIN_MAX +
621 S390_OVERLOADED_BUILTIN_VAR_MAX];
622
623 static enum insn_code const code_for_builtin[S390_BUILTIN_MAX + 1] = {
624 #undef B_DEF
625 #undef OB_DEF
626 #undef OB_DEF_VAR
627 #define B_DEF(NAME, PATTERN, ...) CODE_FOR_##PATTERN,
628 #define OB_DEF(...)
629 #define OB_DEF_VAR(...)
630
631 #include "s390-builtins.def"
632 CODE_FOR_nothing
633 };
634
635 static void
s390_init_builtins(void)636 s390_init_builtins (void)
637 {
638 /* These definitions are being used in s390-builtins.def. */
639 tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"),
640 NULL, NULL);
641 tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL);
642 tree c_uint64_type_node;
643
644 /* The uint64_type_node from tree.c is not compatible to the C99
645 uint64_t data type. What we want is c_uint64_type_node from
646 c-common.c. But since backend code is not supposed to interface
647 with the frontend we recreate it here. */
648 if (TARGET_64BIT)
649 c_uint64_type_node = long_unsigned_type_node;
650 else
651 c_uint64_type_node = long_long_unsigned_type_node;
652
653 #undef DEF_TYPE
654 #define DEF_TYPE(INDEX, NODE, CONST_P) \
655 if (s390_builtin_types[INDEX] == NULL) \
656 s390_builtin_types[INDEX] = (!CONST_P) ? \
657 (NODE) : build_type_variant ((NODE), 1, 0);
658
659 #undef DEF_POINTER_TYPE
660 #define DEF_POINTER_TYPE(INDEX, INDEX_BASE) \
661 if (s390_builtin_types[INDEX] == NULL) \
662 s390_builtin_types[INDEX] = \
663 build_pointer_type (s390_builtin_types[INDEX_BASE]);
664
665 #undef DEF_DISTINCT_TYPE
666 #define DEF_DISTINCT_TYPE(INDEX, INDEX_BASE) \
667 if (s390_builtin_types[INDEX] == NULL) \
668 s390_builtin_types[INDEX] = \
669 build_distinct_type_copy (s390_builtin_types[INDEX_BASE]);
670
671 #undef DEF_VECTOR_TYPE
672 #define DEF_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \
673 if (s390_builtin_types[INDEX] == NULL) \
674 s390_builtin_types[INDEX] = \
675 build_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
676
677 #undef DEF_OPAQUE_VECTOR_TYPE
678 #define DEF_OPAQUE_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \
679 if (s390_builtin_types[INDEX] == NULL) \
680 s390_builtin_types[INDEX] = \
681 build_opaque_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
682
683 #undef DEF_FN_TYPE
684 #define DEF_FN_TYPE(INDEX, args...) \
685 if (s390_builtin_fn_types[INDEX] == NULL) \
686 s390_builtin_fn_types[INDEX] = \
687 build_function_type_list (args, NULL_TREE);
688 #undef DEF_OV_TYPE
689 #define DEF_OV_TYPE(...)
690 #include "s390-builtin-types.def"
691
692 #undef B_DEF
693 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, FNTYPE) \
694 if (s390_builtin_decls[S390_BUILTIN_##NAME] == NULL) \
695 s390_builtin_decls[S390_BUILTIN_##NAME] = \
696 add_builtin_function ("__builtin_" #NAME, \
697 s390_builtin_fn_types[FNTYPE], \
698 S390_BUILTIN_##NAME, \
699 BUILT_IN_MD, \
700 NULL, \
701 ATTRS);
702 #undef OB_DEF
703 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, FNTYPE) \
704 if (s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] \
705 == NULL) \
706 s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \
707 add_builtin_function ("__builtin_" #NAME, \
708 s390_builtin_fn_types[FNTYPE], \
709 S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX, \
710 BUILT_IN_MD, \
711 NULL, \
712 0);
713 #undef OB_DEF_VAR
714 #define OB_DEF_VAR(...)
715 #include "s390-builtins.def"
716
717 }
718
719 /* Return true if ARG is appropriate as argument number ARGNUM of
720 builtin DECL. The operand flags from s390-builtins.def have to
721 passed as OP_FLAGS. */
722 bool
s390_const_operand_ok(tree arg,int argnum,int op_flags,tree decl)723 s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl)
724 {
725 if (O_UIMM_P (op_flags))
726 {
727 int bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32 };
728 int bitwidth = bitwidths[op_flags - O_U1];
729
730 if (!tree_fits_uhwi_p (arg)
731 || tree_to_uhwi (arg) > (HOST_WIDE_INT_1U << bitwidth) - 1)
732 {
733 error ("constant argument %d for builtin %qF is out of range "
734 "(0..%wu)", argnum, decl,
735 (HOST_WIDE_INT_1U << bitwidth) - 1);
736 return false;
737 }
738 }
739
740 if (O_SIMM_P (op_flags))
741 {
742 int bitwidths[] = { 2, 3, 4, 5, 8, 12, 16, 32 };
743 int bitwidth = bitwidths[op_flags - O_S2];
744
745 if (!tree_fits_shwi_p (arg)
746 || tree_to_shwi (arg) < -(HOST_WIDE_INT_1 << (bitwidth - 1))
747 || tree_to_shwi (arg) > ((HOST_WIDE_INT_1 << (bitwidth - 1)) - 1))
748 {
749 error ("constant argument %d for builtin %qF is out of range "
750 "(%wd..%wd)", argnum, decl,
751 -(HOST_WIDE_INT_1 << (bitwidth - 1)),
752 (HOST_WIDE_INT_1 << (bitwidth - 1)) - 1);
753 return false;
754 }
755 }
756 return true;
757 }
758
759 /* Expand an expression EXP that calls a built-in function,
760 with result going to TARGET if that's convenient
761 (and in mode MODE if that's convenient).
762 SUBTARGET may be used as the target for computing one of EXP's operands.
763 IGNORE is nonzero if the value is to be ignored. */
764
765 static rtx
s390_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,int ignore ATTRIBUTE_UNUSED)766 s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
767 machine_mode mode ATTRIBUTE_UNUSED,
768 int ignore ATTRIBUTE_UNUSED)
769 {
770 #define MAX_ARGS 6
771
772 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
773 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
774 enum insn_code icode;
775 rtx op[MAX_ARGS], pat;
776 int arity;
777 bool nonvoid;
778 tree arg;
779 call_expr_arg_iterator iter;
780 unsigned int all_op_flags = opflags_for_builtin (fcode);
781 machine_mode last_vec_mode = VOIDmode;
782
783 if (TARGET_DEBUG_ARG)
784 {
785 fprintf (stderr,
786 "s390_expand_builtin, code = %4d, %s, bflags = 0x%x\n",
787 (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl)),
788 bflags_for_builtin (fcode));
789 }
790
791 if (S390_USE_TARGET_ATTRIBUTE)
792 {
793 unsigned int bflags;
794
795 bflags = bflags_for_builtin (fcode);
796 if ((bflags & B_HTM) && !TARGET_HTM)
797 {
798 error ("builtin %qF is not supported without %<-mhtm%> "
799 "(default with %<-march=zEC12%> and higher).", fndecl);
800 return const0_rtx;
801 }
802 if (((bflags & B_VX) || (bflags & B_VXE)) && !TARGET_VX)
803 {
804 error ("builtin %qF requires %<-mvx%> "
805 "(default with %<-march=z13%> and higher).", fndecl);
806 return const0_rtx;
807 }
808
809 if ((bflags & B_VXE) && !TARGET_VXE)
810 {
811 error ("Builtin %qF requires z14 or higher.", fndecl);
812 return const0_rtx;
813 }
814
815 if ((bflags & B_VXE2) && !TARGET_VXE2)
816 {
817 error ("Builtin %qF requires z15 or higher.", fndecl);
818 return const0_rtx;
819 }
820 }
821 if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET
822 && fcode < S390_ALL_BUILTIN_MAX)
823 {
824 gcc_unreachable ();
825 }
826 else if (fcode < S390_OVERLOADED_BUILTIN_OFFSET)
827 {
828 icode = code_for_builtin[fcode];
829 /* Set a flag in the machine specific cfun part in order to support
830 saving/restoring of FPRs. */
831 if (fcode == S390_BUILTIN_tbegin || fcode == S390_BUILTIN_tbegin_retry)
832 cfun->machine->tbegin_p = true;
833 }
834 else if (fcode < S390_OVERLOADED_BUILTIN_VAR_OFFSET)
835 {
836 error ("unresolved overloaded builtin");
837 return const0_rtx;
838 }
839 else
840 internal_error ("bad builtin fcode");
841
842 if (icode == 0)
843 internal_error ("bad builtin icode");
844
845 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
846
847 if (nonvoid)
848 {
849 machine_mode tmode = insn_data[icode].operand[0].mode;
850 if (!target
851 || GET_MODE (target) != tmode
852 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
853 target = gen_reg_rtx (tmode);
854
855 /* There are builtins (e.g. vec_promote) with no vector
856 arguments but an element selector. So we have to also look
857 at the vector return type when emitting the modulo
858 operation. */
859 if (VECTOR_MODE_P (insn_data[icode].operand[0].mode))
860 last_vec_mode = insn_data[icode].operand[0].mode;
861 }
862
863 arity = 0;
864 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
865 {
866 rtx tmp_rtx;
867 const struct insn_operand_data *insn_op;
868 unsigned int op_flags = all_op_flags & ((1 << O_SHIFT) - 1);
869
870 all_op_flags = all_op_flags >> O_SHIFT;
871
872 if (arg == error_mark_node)
873 return NULL_RTX;
874 if (arity >= MAX_ARGS)
875 return NULL_RTX;
876
877 if (O_IMM_P (op_flags)
878 && TREE_CODE (arg) != INTEGER_CST)
879 {
880 error ("constant value required for builtin %qF argument %d",
881 fndecl, arity + 1);
882 return const0_rtx;
883 }
884
885 if (!s390_const_operand_ok (arg, arity + 1, op_flags, fndecl))
886 return const0_rtx;
887
888 insn_op = &insn_data[icode].operand[arity + nonvoid];
889 op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
890
891 /* expand_expr truncates constants to the target mode only if it
892 is "convenient". However, our checks below rely on this
893 being done. */
894 if (CONST_INT_P (op[arity])
895 && SCALAR_INT_MODE_P (insn_op->mode)
896 && GET_MODE (op[arity]) != insn_op->mode)
897 op[arity] = GEN_INT (trunc_int_for_mode (INTVAL (op[arity]),
898 insn_op->mode));
899
900 /* Wrap the expanded RTX for pointer types into a MEM expr with
901 the proper mode. This allows us to use e.g. (match_operand
902 "memory_operand"..) in the insn patterns instead of (mem
903 (match_operand "address_operand)). This is helpful for
904 patterns not just accepting MEMs. */
905 if (POINTER_TYPE_P (TREE_TYPE (arg))
906 && insn_op->predicate != address_operand)
907 op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]);
908
909 /* Expand the module operation required on element selectors. */
910 if (op_flags == O_ELEM)
911 {
912 gcc_assert (last_vec_mode != VOIDmode);
913 op[arity] = simplify_expand_binop (SImode, code_to_optab (AND),
914 op[arity],
915 GEN_INT (GET_MODE_NUNITS (last_vec_mode) - 1),
916 NULL_RTX, 1, OPTAB_DIRECT);
917 }
918
919 /* Record the vector mode used for an element selector. This assumes:
920 1. There is no builtin with two different vector modes and an element selector
921 2. The element selector comes after the vector type it is referring to.
922 This currently the true for all the builtins but FIXME we
923 should better check for that. */
924 if (VECTOR_MODE_P (insn_op->mode))
925 last_vec_mode = insn_op->mode;
926
927 if (insn_op->predicate (op[arity], insn_op->mode))
928 {
929 arity++;
930 continue;
931 }
932
933 /* A memory operand is rejected by the memory_operand predicate.
934 Try making the address legal by copying it into a register. */
935 if (MEM_P (op[arity])
936 && insn_op->predicate == memory_operand
937 && (GET_MODE (XEXP (op[arity], 0)) == Pmode
938 || GET_MODE (XEXP (op[arity], 0)) == VOIDmode))
939 {
940 op[arity] = replace_equiv_address (op[arity],
941 copy_to_mode_reg (Pmode,
942 XEXP (op[arity], 0)));
943 }
944 /* Some of the builtins require different modes/types than the
945 pattern in order to implement a specific API. Instead of
946 adding many expanders which do the mode change we do it here.
947 E.g. s390_vec_add_u128 required to have vector unsigned char
948 arguments is mapped to addti3. */
949 else if (insn_op->mode != VOIDmode
950 && GET_MODE (op[arity]) != VOIDmode
951 && GET_MODE (op[arity]) != insn_op->mode
952 && ((tmp_rtx = simplify_gen_subreg (insn_op->mode, op[arity],
953 GET_MODE (op[arity]), 0))
954 != NULL_RTX))
955 {
956 op[arity] = tmp_rtx;
957 }
958
959 /* The predicate rejects the operand although the mode is fine.
960 Copy the operand to register. */
961 if (!insn_op->predicate (op[arity], insn_op->mode)
962 && (GET_MODE (op[arity]) == insn_op->mode
963 || GET_MODE (op[arity]) == VOIDmode
964 || (insn_op->predicate == address_operand
965 && GET_MODE (op[arity]) == Pmode)))
966 {
967 /* An address_operand usually has VOIDmode in the expander
968 so we cannot use this. */
969 machine_mode target_mode =
970 (insn_op->predicate == address_operand
971 ? (machine_mode) Pmode : insn_op->mode);
972 op[arity] = copy_to_mode_reg (target_mode, op[arity]);
973 }
974
975 if (!insn_op->predicate (op[arity], insn_op->mode))
976 {
977 error ("invalid argument %d for builtin %qF", arity + 1, fndecl);
978 return const0_rtx;
979 }
980 arity++;
981 }
982
983 switch (arity)
984 {
985 case 0:
986 pat = GEN_FCN (icode) (target);
987 break;
988 case 1:
989 if (nonvoid)
990 pat = GEN_FCN (icode) (target, op[0]);
991 else
992 pat = GEN_FCN (icode) (op[0]);
993 break;
994 case 2:
995 if (nonvoid)
996 pat = GEN_FCN (icode) (target, op[0], op[1]);
997 else
998 pat = GEN_FCN (icode) (op[0], op[1]);
999 break;
1000 case 3:
1001 if (nonvoid)
1002 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
1003 else
1004 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
1005 break;
1006 case 4:
1007 if (nonvoid)
1008 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
1009 else
1010 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
1011 break;
1012 case 5:
1013 if (nonvoid)
1014 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
1015 else
1016 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
1017 break;
1018 case 6:
1019 if (nonvoid)
1020 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4], op[5]);
1021 else
1022 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
1023 break;
1024 default:
1025 gcc_unreachable ();
1026 }
1027 if (!pat)
1028 return NULL_RTX;
1029 emit_insn (pat);
1030
1031 if (nonvoid)
1032 return target;
1033 else
1034 return const0_rtx;
1035 }
1036
1037
1038 static const int s390_hotpatch_hw_max = 1000000;
1039 static int s390_hotpatch_hw_before_label = 0;
1040 static int s390_hotpatch_hw_after_label = 0;
1041
1042 /* Check whether the hotpatch attribute is applied to a function and, if it has
1043 an argument, the argument is valid. */
1044
1045 static tree
s390_handle_hotpatch_attribute(tree * node,tree name,tree args,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)1046 s390_handle_hotpatch_attribute (tree *node, tree name, tree args,
1047 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1048 {
1049 tree expr;
1050 tree expr2;
1051 int err;
1052
1053 if (TREE_CODE (*node) != FUNCTION_DECL)
1054 {
1055 warning (OPT_Wattributes, "%qE attribute only applies to functions",
1056 name);
1057 *no_add_attrs = true;
1058 }
1059 if (args != NULL && TREE_CHAIN (args) != NULL)
1060 {
1061 expr = TREE_VALUE (args);
1062 expr2 = TREE_VALUE (TREE_CHAIN (args));
1063 }
1064 if (args == NULL || TREE_CHAIN (args) == NULL)
1065 err = 1;
1066 else if (TREE_CODE (expr) != INTEGER_CST
1067 || !INTEGRAL_TYPE_P (TREE_TYPE (expr))
1068 || wi::gtu_p (wi::to_wide (expr), s390_hotpatch_hw_max))
1069 err = 1;
1070 else if (TREE_CODE (expr2) != INTEGER_CST
1071 || !INTEGRAL_TYPE_P (TREE_TYPE (expr2))
1072 || wi::gtu_p (wi::to_wide (expr2), s390_hotpatch_hw_max))
1073 err = 1;
1074 else
1075 err = 0;
1076 if (err)
1077 {
1078 error ("requested %qE attribute is not a comma separated pair of"
1079 " non-negative integer constants or too large (max. %d)", name,
1080 s390_hotpatch_hw_max);
1081 *no_add_attrs = true;
1082 }
1083
1084 return NULL_TREE;
1085 }
1086
1087 /* Expand the s390_vector_bool type attribute. */
1088
1089 static tree
s390_handle_vectorbool_attribute(tree * node,tree name ATTRIBUTE_UNUSED,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)1090 s390_handle_vectorbool_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
1091 tree args ATTRIBUTE_UNUSED,
1092 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1093 {
1094 tree type = *node, result = NULL_TREE;
1095 machine_mode mode;
1096
1097 while (POINTER_TYPE_P (type)
1098 || TREE_CODE (type) == FUNCTION_TYPE
1099 || TREE_CODE (type) == METHOD_TYPE
1100 || TREE_CODE (type) == ARRAY_TYPE)
1101 type = TREE_TYPE (type);
1102
1103 mode = TYPE_MODE (type);
1104 switch (mode)
1105 {
1106 case E_DImode: case E_V2DImode:
1107 result = s390_builtin_types[BT_BV2DI];
1108 break;
1109 case E_SImode: case E_V4SImode:
1110 result = s390_builtin_types[BT_BV4SI];
1111 break;
1112 case E_HImode: case E_V8HImode:
1113 result = s390_builtin_types[BT_BV8HI];
1114 break;
1115 case E_QImode: case E_V16QImode:
1116 result = s390_builtin_types[BT_BV16QI];
1117 break;
1118 default:
1119 break;
1120 }
1121
1122 *no_add_attrs = true; /* No need to hang on to the attribute. */
1123
1124 if (result)
1125 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
1126
1127 return NULL_TREE;
1128 }
1129
1130 /* Check syntax of function decl attributes having a string type value. */
1131
1132 static tree
s390_handle_string_attribute(tree * node,tree name ATTRIBUTE_UNUSED,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)1133 s390_handle_string_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
1134 tree args ATTRIBUTE_UNUSED,
1135 int flags ATTRIBUTE_UNUSED,
1136 bool *no_add_attrs)
1137 {
1138 tree cst;
1139
1140 if (TREE_CODE (*node) != FUNCTION_DECL)
1141 {
1142 warning (OPT_Wattributes, "%qE attribute only applies to functions",
1143 name);
1144 *no_add_attrs = true;
1145 }
1146
1147 cst = TREE_VALUE (args);
1148
1149 if (TREE_CODE (cst) != STRING_CST)
1150 {
1151 warning (OPT_Wattributes,
1152 "%qE attribute requires a string constant argument",
1153 name);
1154 *no_add_attrs = true;
1155 }
1156
1157 if (is_attribute_p ("indirect_branch", name)
1158 || is_attribute_p ("indirect_branch_call", name)
1159 || is_attribute_p ("function_return", name)
1160 || is_attribute_p ("function_return_reg", name)
1161 || is_attribute_p ("function_return_mem", name))
1162 {
1163 if (strcmp (TREE_STRING_POINTER (cst), "keep") != 0
1164 && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0
1165 && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0)
1166 {
1167 warning (OPT_Wattributes,
1168 "argument to %qE attribute is not "
1169 "(keep|thunk|thunk-extern)", name);
1170 *no_add_attrs = true;
1171 }
1172 }
1173
1174 if (is_attribute_p ("indirect_branch_jump", name)
1175 && strcmp (TREE_STRING_POINTER (cst), "keep") != 0
1176 && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0
1177 && strcmp (TREE_STRING_POINTER (cst), "thunk-inline") != 0
1178 && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0)
1179 {
1180 warning (OPT_Wattributes,
1181 "argument to %qE attribute is not "
1182 "(keep|thunk|thunk-inline|thunk-extern)", name);
1183 *no_add_attrs = true;
1184 }
1185
1186 return NULL_TREE;
1187 }
1188
1189 static const struct attribute_spec s390_attribute_table[] = {
1190 { "hotpatch", 2, 2, true, false, false, false,
1191 s390_handle_hotpatch_attribute, NULL },
1192 { "s390_vector_bool", 0, 0, false, true, false, true,
1193 s390_handle_vectorbool_attribute, NULL },
1194 { "indirect_branch", 1, 1, true, false, false, false,
1195 s390_handle_string_attribute, NULL },
1196 { "indirect_branch_jump", 1, 1, true, false, false, false,
1197 s390_handle_string_attribute, NULL },
1198 { "indirect_branch_call", 1, 1, true, false, false, false,
1199 s390_handle_string_attribute, NULL },
1200 { "function_return", 1, 1, true, false, false, false,
1201 s390_handle_string_attribute, NULL },
1202 { "function_return_reg", 1, 1, true, false, false, false,
1203 s390_handle_string_attribute, NULL },
1204 { "function_return_mem", 1, 1, true, false, false, false,
1205 s390_handle_string_attribute, NULL },
1206
1207 /* End element. */
1208 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1209 };
1210
1211 /* Return the alignment for LABEL. We default to the -falign-labels
1212 value except for the literal pool base label. */
1213 int
s390_label_align(rtx_insn * label)1214 s390_label_align (rtx_insn *label)
1215 {
1216 rtx_insn *prev_insn = prev_active_insn (label);
1217 rtx set, src;
1218
1219 if (prev_insn == NULL_RTX)
1220 goto old;
1221
1222 set = single_set (prev_insn);
1223
1224 if (set == NULL_RTX)
1225 goto old;
1226
1227 src = SET_SRC (set);
1228
1229 /* Don't align literal pool base labels. */
1230 if (GET_CODE (src) == UNSPEC
1231 && XINT (src, 1) == UNSPEC_MAIN_BASE)
1232 return 0;
1233
1234 old:
1235 return align_labels.levels[0].log;
1236 }
1237
1238 static GTY(()) rtx got_symbol;
1239
1240 /* Return the GOT table symbol. The symbol will be created when the
1241 function is invoked for the first time. */
1242
1243 static rtx
s390_got_symbol(void)1244 s390_got_symbol (void)
1245 {
1246 if (!got_symbol)
1247 {
1248 got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
1249 SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL;
1250 }
1251
1252 return got_symbol;
1253 }
1254
1255 static scalar_int_mode
s390_libgcc_cmp_return_mode(void)1256 s390_libgcc_cmp_return_mode (void)
1257 {
1258 return TARGET_64BIT ? DImode : SImode;
1259 }
1260
1261 static scalar_int_mode
s390_libgcc_shift_count_mode(void)1262 s390_libgcc_shift_count_mode (void)
1263 {
1264 return TARGET_64BIT ? DImode : SImode;
1265 }
1266
1267 static scalar_int_mode
s390_unwind_word_mode(void)1268 s390_unwind_word_mode (void)
1269 {
1270 return TARGET_64BIT ? DImode : SImode;
1271 }
1272
1273 /* Return true if the back end supports mode MODE. */
1274 static bool
s390_scalar_mode_supported_p(scalar_mode mode)1275 s390_scalar_mode_supported_p (scalar_mode mode)
1276 {
1277 /* In contrast to the default implementation reject TImode constants on 31bit
1278 TARGET_ZARCH for ABI compliance. */
1279 if (!TARGET_64BIT && TARGET_ZARCH && mode == TImode)
1280 return false;
1281
1282 if (DECIMAL_FLOAT_MODE_P (mode))
1283 return default_decimal_float_supported_p ();
1284
1285 return default_scalar_mode_supported_p (mode);
1286 }
1287
1288 /* Return true if the back end supports vector mode MODE. */
1289 static bool
s390_vector_mode_supported_p(machine_mode mode)1290 s390_vector_mode_supported_p (machine_mode mode)
1291 {
1292 machine_mode inner;
1293
1294 if (!VECTOR_MODE_P (mode)
1295 || !TARGET_VX
1296 || GET_MODE_SIZE (mode) > 16)
1297 return false;
1298
1299 inner = GET_MODE_INNER (mode);
1300
1301 switch (inner)
1302 {
1303 case E_QImode:
1304 case E_HImode:
1305 case E_SImode:
1306 case E_DImode:
1307 case E_TImode:
1308 case E_SFmode:
1309 case E_DFmode:
1310 case E_TFmode:
1311 return true;
1312 default:
1313 return false;
1314 }
1315 }
1316
1317 /* Set the has_landing_pad_p flag in struct machine_function to VALUE. */
1318
1319 void
s390_set_has_landing_pad_p(bool value)1320 s390_set_has_landing_pad_p (bool value)
1321 {
1322 cfun->machine->has_landing_pad_p = value;
1323 }
1324
1325 /* If two condition code modes are compatible, return a condition code
1326 mode which is compatible with both. Otherwise, return
1327 VOIDmode. */
1328
1329 static machine_mode
s390_cc_modes_compatible(machine_mode m1,machine_mode m2)1330 s390_cc_modes_compatible (machine_mode m1, machine_mode m2)
1331 {
1332 if (m1 == m2)
1333 return m1;
1334
1335 switch (m1)
1336 {
1337 case E_CCZmode:
1338 if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode
1339 || m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode)
1340 return m2;
1341 return VOIDmode;
1342
1343 case E_CCSmode:
1344 case E_CCUmode:
1345 case E_CCTmode:
1346 case E_CCSRmode:
1347 case E_CCURmode:
1348 case E_CCZ1mode:
1349 if (m2 == CCZmode)
1350 return m1;
1351
1352 return VOIDmode;
1353
1354 default:
1355 return VOIDmode;
1356 }
1357 return VOIDmode;
1358 }
1359
1360 /* Return true if SET either doesn't set the CC register, or else
1361 the source and destination have matching CC modes and that
1362 CC mode is at least as constrained as REQ_MODE. */
1363
1364 static bool
s390_match_ccmode_set(rtx set,machine_mode req_mode)1365 s390_match_ccmode_set (rtx set, machine_mode req_mode)
1366 {
1367 machine_mode set_mode;
1368
1369 gcc_assert (GET_CODE (set) == SET);
1370
1371 /* These modes are supposed to be used only in CC consumer
1372 patterns. */
1373 gcc_assert (req_mode != CCVIALLmode && req_mode != CCVIANYmode
1374 && req_mode != CCVFALLmode && req_mode != CCVFANYmode);
1375
1376 if (GET_CODE (SET_DEST (set)) != REG || !CC_REGNO_P (REGNO (SET_DEST (set))))
1377 return 1;
1378
1379 set_mode = GET_MODE (SET_DEST (set));
1380 switch (set_mode)
1381 {
1382 case E_CCZ1mode:
1383 case E_CCSmode:
1384 case E_CCSRmode:
1385 case E_CCUmode:
1386 case E_CCURmode:
1387 case E_CCLmode:
1388 case E_CCL1mode:
1389 case E_CCL2mode:
1390 case E_CCL3mode:
1391 case E_CCT1mode:
1392 case E_CCT2mode:
1393 case E_CCT3mode:
1394 case E_CCVEQmode:
1395 case E_CCVIHmode:
1396 case E_CCVIHUmode:
1397 case E_CCVFHmode:
1398 case E_CCVFHEmode:
1399 if (req_mode != set_mode)
1400 return 0;
1401 break;
1402
1403 case E_CCZmode:
1404 if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode
1405 && req_mode != CCSRmode && req_mode != CCURmode
1406 && req_mode != CCZ1mode)
1407 return 0;
1408 break;
1409
1410 case E_CCAPmode:
1411 case E_CCANmode:
1412 if (req_mode != CCAmode)
1413 return 0;
1414 break;
1415
1416 default:
1417 gcc_unreachable ();
1418 }
1419
1420 return (GET_MODE (SET_SRC (set)) == set_mode);
1421 }
1422
1423 /* Return true if every SET in INSN that sets the CC register
1424 has source and destination with matching CC modes and that
1425 CC mode is at least as constrained as REQ_MODE.
1426 If REQ_MODE is VOIDmode, always return false. */
1427
1428 bool
s390_match_ccmode(rtx_insn * insn,machine_mode req_mode)1429 s390_match_ccmode (rtx_insn *insn, machine_mode req_mode)
1430 {
1431 int i;
1432
1433 /* s390_tm_ccmode returns VOIDmode to indicate failure. */
1434 if (req_mode == VOIDmode)
1435 return false;
1436
1437 if (GET_CODE (PATTERN (insn)) == SET)
1438 return s390_match_ccmode_set (PATTERN (insn), req_mode);
1439
1440 if (GET_CODE (PATTERN (insn)) == PARALLEL)
1441 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1442 {
1443 rtx set = XVECEXP (PATTERN (insn), 0, i);
1444 if (GET_CODE (set) == SET)
1445 if (!s390_match_ccmode_set (set, req_mode))
1446 return false;
1447 }
1448
1449 return true;
1450 }
1451
1452 /* If a test-under-mask instruction can be used to implement
1453 (compare (and ... OP1) OP2), return the CC mode required
1454 to do that. Otherwise, return VOIDmode.
1455 MIXED is true if the instruction can distinguish between
1456 CC1 and CC2 for mixed selected bits (TMxx), it is false
1457 if the instruction cannot (TM). */
1458
1459 machine_mode
s390_tm_ccmode(rtx op1,rtx op2,bool mixed)1460 s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
1461 {
1462 int bit0, bit1;
1463
1464 /* ??? Fixme: should work on CONST_WIDE_INT as well. */
1465 if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT)
1466 return VOIDmode;
1467
1468 /* Selected bits all zero: CC0.
1469 e.g.: int a; if ((a & (16 + 128)) == 0) */
1470 if (INTVAL (op2) == 0)
1471 return CCTmode;
1472
1473 /* Selected bits all one: CC3.
1474 e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
1475 if (INTVAL (op2) == INTVAL (op1))
1476 return CCT3mode;
1477
1478 /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
1479 int a;
1480 if ((a & (16 + 128)) == 16) -> CCT1
1481 if ((a & (16 + 128)) == 128) -> CCT2 */
1482 if (mixed)
1483 {
1484 bit1 = exact_log2 (INTVAL (op2));
1485 bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2));
1486 if (bit0 != -1 && bit1 != -1)
1487 return bit0 > bit1 ? CCT1mode : CCT2mode;
1488 }
1489
1490 return VOIDmode;
1491 }
1492
1493 /* Given a comparison code OP (EQ, NE, etc.) and the operands
1494 OP0 and OP1 of a COMPARE, return the mode to be used for the
1495 comparison. */
1496
1497 machine_mode
s390_select_ccmode(enum rtx_code code,rtx op0,rtx op1)1498 s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
1499 {
1500 switch (code)
1501 {
1502 case EQ:
1503 case NE:
1504 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1505 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1506 return CCAPmode;
1507 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1508 && CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))))
1509 return CCAPmode;
1510 if ((GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS
1511 || GET_CODE (op1) == NEG)
1512 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1513 return CCLmode;
1514
1515 if (GET_CODE (op0) == AND)
1516 {
1517 /* Check whether we can potentially do it via TM. */
1518 machine_mode ccmode;
1519 ccmode = s390_tm_ccmode (XEXP (op0, 1), op1, 1);
1520 if (ccmode != VOIDmode)
1521 {
1522 /* Relax CCTmode to CCZmode to allow fall-back to AND
1523 if that turns out to be beneficial. */
1524 return ccmode == CCTmode ? CCZmode : ccmode;
1525 }
1526 }
1527
1528 if (register_operand (op0, HImode)
1529 && GET_CODE (op1) == CONST_INT
1530 && (INTVAL (op1) == -1 || INTVAL (op1) == 65535))
1531 return CCT3mode;
1532 if (register_operand (op0, QImode)
1533 && GET_CODE (op1) == CONST_INT
1534 && (INTVAL (op1) == -1 || INTVAL (op1) == 255))
1535 return CCT3mode;
1536
1537 return CCZmode;
1538
1539 case LE:
1540 case LT:
1541 case GE:
1542 case GT:
1543 /* The only overflow condition of NEG and ABS happens when
1544 -INT_MAX is used as parameter, which stays negative. So
1545 we have an overflow from a positive value to a negative.
1546 Using CCAP mode the resulting cc can be used for comparisons. */
1547 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1548 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1549 return CCAPmode;
1550
1551 /* If constants are involved in an add instruction it is possible to use
1552 the resulting cc for comparisons with zero. Knowing the sign of the
1553 constant the overflow behavior gets predictable. e.g.:
1554 int a, b; if ((b = a + c) > 0)
1555 with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP */
1556 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1557 && (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1)))
1558 || (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os")
1559 /* Avoid INT32_MIN on 32 bit. */
1560 && (!TARGET_ZARCH || INTVAL (XEXP (op0, 1)) != -0x7fffffff - 1))))
1561 {
1562 if (INTVAL (XEXP((op0), 1)) < 0)
1563 return CCANmode;
1564 else
1565 return CCAPmode;
1566 }
1567 /* Fall through. */
1568 case UNORDERED:
1569 case ORDERED:
1570 case UNEQ:
1571 case UNLE:
1572 case UNLT:
1573 case UNGE:
1574 case UNGT:
1575 case LTGT:
1576 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1577 && GET_CODE (op1) != CONST_INT)
1578 return CCSRmode;
1579 return CCSmode;
1580
1581 case LTU:
1582 case GEU:
1583 if (GET_CODE (op0) == PLUS
1584 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1585 return CCL1mode;
1586
1587 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1588 && GET_CODE (op1) != CONST_INT)
1589 return CCURmode;
1590 return CCUmode;
1591
1592 case LEU:
1593 case GTU:
1594 if (GET_CODE (op0) == MINUS
1595 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1596 return CCL2mode;
1597
1598 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1599 && GET_CODE (op1) != CONST_INT)
1600 return CCURmode;
1601 return CCUmode;
1602
1603 default:
1604 gcc_unreachable ();
1605 }
1606 }
1607
1608 /* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
1609 that we can implement more efficiently. */
1610
1611 static void
s390_canonicalize_comparison(int * code,rtx * op0,rtx * op1,bool op0_preserve_value)1612 s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1613 bool op0_preserve_value)
1614 {
1615 if (op0_preserve_value)
1616 return;
1617
1618 /* Convert ZERO_EXTRACT back to AND to enable TM patterns. */
1619 if ((*code == EQ || *code == NE)
1620 && *op1 == const0_rtx
1621 && GET_CODE (*op0) == ZERO_EXTRACT
1622 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1623 && GET_CODE (XEXP (*op0, 2)) == CONST_INT
1624 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1625 {
1626 rtx inner = XEXP (*op0, 0);
1627 HOST_WIDE_INT modesize = GET_MODE_BITSIZE (GET_MODE (inner));
1628 HOST_WIDE_INT len = INTVAL (XEXP (*op0, 1));
1629 HOST_WIDE_INT pos = INTVAL (XEXP (*op0, 2));
1630
1631 if (len > 0 && len < modesize
1632 && pos >= 0 && pos + len <= modesize
1633 && modesize <= HOST_BITS_PER_WIDE_INT)
1634 {
1635 unsigned HOST_WIDE_INT block;
1636 block = (HOST_WIDE_INT_1U << len) - 1;
1637 block <<= modesize - pos - len;
1638
1639 *op0 = gen_rtx_AND (GET_MODE (inner), inner,
1640 gen_int_mode (block, GET_MODE (inner)));
1641 }
1642 }
1643
1644 /* Narrow AND of memory against immediate to enable TM. */
1645 if ((*code == EQ || *code == NE)
1646 && *op1 == const0_rtx
1647 && GET_CODE (*op0) == AND
1648 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1649 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1650 {
1651 rtx inner = XEXP (*op0, 0);
1652 rtx mask = XEXP (*op0, 1);
1653
1654 /* Ignore paradoxical SUBREGs if all extra bits are masked out. */
1655 if (GET_CODE (inner) == SUBREG
1656 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner)))
1657 && (GET_MODE_SIZE (GET_MODE (inner))
1658 >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1659 && ((INTVAL (mask)
1660 & GET_MODE_MASK (GET_MODE (inner))
1661 & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner))))
1662 == 0))
1663 inner = SUBREG_REG (inner);
1664
1665 /* Do not change volatile MEMs. */
1666 if (MEM_P (inner) && !MEM_VOLATILE_P (inner))
1667 {
1668 int part = s390_single_part (XEXP (*op0, 1),
1669 GET_MODE (inner), QImode, 0);
1670 if (part >= 0)
1671 {
1672 mask = gen_int_mode (s390_extract_part (mask, QImode, 0), QImode);
1673 inner = adjust_address_nv (inner, QImode, part);
1674 *op0 = gen_rtx_AND (QImode, inner, mask);
1675 }
1676 }
1677 }
1678
1679 /* Narrow comparisons against 0xffff to HImode if possible. */
1680 if ((*code == EQ || *code == NE)
1681 && GET_CODE (*op1) == CONST_INT
1682 && INTVAL (*op1) == 0xffff
1683 && SCALAR_INT_MODE_P (GET_MODE (*op0))
1684 && (nonzero_bits (*op0, GET_MODE (*op0))
1685 & ~HOST_WIDE_INT_UC (0xffff)) == 0)
1686 {
1687 *op0 = gen_lowpart (HImode, *op0);
1688 *op1 = constm1_rtx;
1689 }
1690
1691 /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible. */
1692 if (GET_CODE (*op0) == UNSPEC
1693 && XINT (*op0, 1) == UNSPEC_STRCMPCC_TO_INT
1694 && XVECLEN (*op0, 0) == 1
1695 && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode
1696 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1697 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1698 && *op1 == const0_rtx)
1699 {
1700 enum rtx_code new_code = UNKNOWN;
1701 switch (*code)
1702 {
1703 case EQ: new_code = EQ; break;
1704 case NE: new_code = NE; break;
1705 case LT: new_code = GTU; break;
1706 case GT: new_code = LTU; break;
1707 case LE: new_code = GEU; break;
1708 case GE: new_code = LEU; break;
1709 default: break;
1710 }
1711
1712 if (new_code != UNKNOWN)
1713 {
1714 *op0 = XVECEXP (*op0, 0, 0);
1715 *code = new_code;
1716 }
1717 }
1718
1719 /* Remove redundant UNSPEC_CC_TO_INT conversions if possible. */
1720 if (GET_CODE (*op0) == UNSPEC
1721 && XINT (*op0, 1) == UNSPEC_CC_TO_INT
1722 && XVECLEN (*op0, 0) == 1
1723 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1724 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1725 && CONST_INT_P (*op1))
1726 {
1727 enum rtx_code new_code = UNKNOWN;
1728 switch (GET_MODE (XVECEXP (*op0, 0, 0)))
1729 {
1730 case E_CCZmode:
1731 case E_CCRAWmode:
1732 switch (*code)
1733 {
1734 case EQ: new_code = EQ; break;
1735 case NE: new_code = NE; break;
1736 default: break;
1737 }
1738 break;
1739 default: break;
1740 }
1741
1742 if (new_code != UNKNOWN)
1743 {
1744 /* For CCRAWmode put the required cc mask into the second
1745 operand. */
1746 if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode
1747 && INTVAL (*op1) >= 0 && INTVAL (*op1) <= 3)
1748 *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1)));
1749 *op0 = XVECEXP (*op0, 0, 0);
1750 *code = new_code;
1751 }
1752 }
1753
1754 /* Simplify cascaded EQ, NE with const0_rtx. */
1755 if ((*code == NE || *code == EQ)
1756 && (GET_CODE (*op0) == EQ || GET_CODE (*op0) == NE)
1757 && GET_MODE (*op0) == SImode
1758 && GET_MODE (XEXP (*op0, 0)) == CCZ1mode
1759 && REG_P (XEXP (*op0, 0))
1760 && XEXP (*op0, 1) == const0_rtx
1761 && *op1 == const0_rtx)
1762 {
1763 if ((*code == EQ && GET_CODE (*op0) == NE)
1764 || (*code == NE && GET_CODE (*op0) == EQ))
1765 *code = EQ;
1766 else
1767 *code = NE;
1768 *op0 = XEXP (*op0, 0);
1769 }
1770
1771 /* Prefer register over memory as first operand. */
1772 if (MEM_P (*op0) && REG_P (*op1))
1773 {
1774 rtx tem = *op0; *op0 = *op1; *op1 = tem;
1775 *code = (int)swap_condition ((enum rtx_code)*code);
1776 }
1777
1778 /* A comparison result is compared against zero. Replace it with
1779 the (perhaps inverted) original comparison.
1780 This probably should be done by simplify_relational_operation. */
1781 if ((*code == EQ || *code == NE)
1782 && *op1 == const0_rtx
1783 && COMPARISON_P (*op0)
1784 && CC_REG_P (XEXP (*op0, 0)))
1785 {
1786 enum rtx_code new_code;
1787
1788 if (*code == EQ)
1789 new_code = reversed_comparison_code_parts (GET_CODE (*op0),
1790 XEXP (*op0, 0),
1791 XEXP (*op0, 1), NULL);
1792 else
1793 new_code = GET_CODE (*op0);
1794
1795 if (new_code != UNKNOWN)
1796 {
1797 *code = new_code;
1798 *op1 = XEXP (*op0, 1);
1799 *op0 = XEXP (*op0, 0);
1800 }
1801 }
1802
1803 /* ~a==b -> ~(a^b)==0 ~a!=b -> ~(a^b)!=0 */
1804 if (TARGET_Z15
1805 && (*code == EQ || *code == NE)
1806 && (GET_MODE (*op0) == DImode || GET_MODE (*op0) == SImode)
1807 && GET_CODE (*op0) == NOT)
1808 {
1809 machine_mode mode = GET_MODE (*op0);
1810 *op0 = gen_rtx_XOR (mode, XEXP (*op0, 0), *op1);
1811 *op0 = gen_rtx_NOT (mode, *op0);
1812 *op1 = const0_rtx;
1813 }
1814
1815 /* a&b == -1 -> ~a|~b == 0 a|b == -1 -> ~a&~b == 0 */
1816 if (TARGET_Z15
1817 && (*code == EQ || *code == NE)
1818 && (GET_CODE (*op0) == AND || GET_CODE (*op0) == IOR)
1819 && (GET_MODE (*op0) == DImode || GET_MODE (*op0) == SImode)
1820 && CONST_INT_P (*op1)
1821 && *op1 == constm1_rtx)
1822 {
1823 machine_mode mode = GET_MODE (*op0);
1824 rtx op00 = gen_rtx_NOT (mode, XEXP (*op0, 0));
1825 rtx op01 = gen_rtx_NOT (mode, XEXP (*op0, 1));
1826
1827 if (GET_CODE (*op0) == AND)
1828 *op0 = gen_rtx_IOR (mode, op00, op01);
1829 else
1830 *op0 = gen_rtx_AND (mode, op00, op01);
1831
1832 *op1 = const0_rtx;
1833 }
1834 }
1835
1836
1837 /* Emit a compare instruction suitable to implement the comparison
1838 OP0 CODE OP1. Return the correct condition RTL to be placed in
1839 the IF_THEN_ELSE of the conditional branch testing the result. */
1840
1841 rtx
s390_emit_compare(enum rtx_code code,rtx op0,rtx op1)1842 s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
1843 {
1844 machine_mode mode = s390_select_ccmode (code, op0, op1);
1845 rtx cc;
1846
1847 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
1848 {
1849 /* Do not output a redundant compare instruction if a
1850 compare_and_swap pattern already computed the result and the
1851 machine modes are compatible. */
1852 gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
1853 == GET_MODE (op0));
1854 cc = op0;
1855 }
1856 else
1857 {
1858 cc = gen_rtx_REG (mode, CC_REGNUM);
1859 emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, op0, op1)));
1860 }
1861
1862 return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
1863 }
1864
1865 /* If MEM is not a legitimate compare-and-swap memory operand, return a new
1866 MEM, whose address is a pseudo containing the original MEM's address. */
1867
1868 static rtx
s390_legitimize_cs_operand(rtx mem)1869 s390_legitimize_cs_operand (rtx mem)
1870 {
1871 rtx tmp;
1872
1873 if (!contains_symbol_ref_p (mem))
1874 return mem;
1875 tmp = gen_reg_rtx (Pmode);
1876 emit_move_insn (tmp, copy_rtx (XEXP (mem, 0)));
1877 return change_address (mem, VOIDmode, tmp);
1878 }
1879
1880 /* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
1881 matches CMP.
1882 Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
1883 conditional branch testing the result. */
1884
1885 static rtx
s390_emit_compare_and_swap(enum rtx_code code,rtx old,rtx mem,rtx cmp,rtx new_rtx,machine_mode ccmode)1886 s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
1887 rtx cmp, rtx new_rtx, machine_mode ccmode)
1888 {
1889 rtx cc;
1890
1891 mem = s390_legitimize_cs_operand (mem);
1892 cc = gen_rtx_REG (ccmode, CC_REGNUM);
1893 switch (GET_MODE (mem))
1894 {
1895 case E_SImode:
1896 emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp,
1897 new_rtx, cc));
1898 break;
1899 case E_DImode:
1900 emit_insn (gen_atomic_compare_and_swapdi_internal (old, mem, cmp,
1901 new_rtx, cc));
1902 break;
1903 case E_TImode:
1904 emit_insn (gen_atomic_compare_and_swapti_internal (old, mem, cmp,
1905 new_rtx, cc));
1906 break;
1907 case E_QImode:
1908 case E_HImode:
1909 default:
1910 gcc_unreachable ();
1911 }
1912 return s390_emit_compare (code, cc, const0_rtx);
1913 }
1914
1915 /* Emit a jump instruction to TARGET and return it. If COND is
1916 NULL_RTX, emit an unconditional jump, else a conditional jump under
1917 condition COND. */
1918
1919 rtx_insn *
s390_emit_jump(rtx target,rtx cond)1920 s390_emit_jump (rtx target, rtx cond)
1921 {
1922 rtx insn;
1923
1924 target = gen_rtx_LABEL_REF (VOIDmode, target);
1925 if (cond)
1926 target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
1927
1928 insn = gen_rtx_SET (pc_rtx, target);
1929 return emit_jump_insn (insn);
1930 }
1931
1932 /* Return branch condition mask to implement a branch
1933 specified by CODE. Return -1 for invalid comparisons. */
1934
1935 int
s390_branch_condition_mask(rtx code)1936 s390_branch_condition_mask (rtx code)
1937 {
1938 const int CC0 = 1 << 3;
1939 const int CC1 = 1 << 2;
1940 const int CC2 = 1 << 1;
1941 const int CC3 = 1 << 0;
1942
1943 gcc_assert (GET_CODE (XEXP (code, 0)) == REG);
1944 gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM);
1945 gcc_assert (XEXP (code, 1) == const0_rtx
1946 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
1947 && CONST_INT_P (XEXP (code, 1))));
1948
1949
1950 switch (GET_MODE (XEXP (code, 0)))
1951 {
1952 case E_CCZmode:
1953 case E_CCZ1mode:
1954 switch (GET_CODE (code))
1955 {
1956 case EQ: return CC0;
1957 case NE: return CC1 | CC2 | CC3;
1958 default: return -1;
1959 }
1960 break;
1961
1962 case E_CCT1mode:
1963 switch (GET_CODE (code))
1964 {
1965 case EQ: return CC1;
1966 case NE: return CC0 | CC2 | CC3;
1967 default: return -1;
1968 }
1969 break;
1970
1971 case E_CCT2mode:
1972 switch (GET_CODE (code))
1973 {
1974 case EQ: return CC2;
1975 case NE: return CC0 | CC1 | CC3;
1976 default: return -1;
1977 }
1978 break;
1979
1980 case E_CCT3mode:
1981 switch (GET_CODE (code))
1982 {
1983 case EQ: return CC3;
1984 case NE: return CC0 | CC1 | CC2;
1985 default: return -1;
1986 }
1987 break;
1988
1989 case E_CCLmode:
1990 switch (GET_CODE (code))
1991 {
1992 case EQ: return CC0 | CC2;
1993 case NE: return CC1 | CC3;
1994 default: return -1;
1995 }
1996 break;
1997
1998 case E_CCL1mode:
1999 switch (GET_CODE (code))
2000 {
2001 case LTU: return CC2 | CC3; /* carry */
2002 case GEU: return CC0 | CC1; /* no carry */
2003 default: return -1;
2004 }
2005 break;
2006
2007 case E_CCL2mode:
2008 switch (GET_CODE (code))
2009 {
2010 case GTU: return CC0 | CC1; /* borrow */
2011 case LEU: return CC2 | CC3; /* no borrow */
2012 default: return -1;
2013 }
2014 break;
2015
2016 case E_CCL3mode:
2017 switch (GET_CODE (code))
2018 {
2019 case EQ: return CC0 | CC2;
2020 case NE: return CC1 | CC3;
2021 case LTU: return CC1;
2022 case GTU: return CC3;
2023 case LEU: return CC1 | CC2;
2024 case GEU: return CC2 | CC3;
2025 default: return -1;
2026 }
2027
2028 case E_CCUmode:
2029 switch (GET_CODE (code))
2030 {
2031 case EQ: return CC0;
2032 case NE: return CC1 | CC2 | CC3;
2033 case LTU: return CC1;
2034 case GTU: return CC2;
2035 case LEU: return CC0 | CC1;
2036 case GEU: return CC0 | CC2;
2037 default: return -1;
2038 }
2039 break;
2040
2041 case E_CCURmode:
2042 switch (GET_CODE (code))
2043 {
2044 case EQ: return CC0;
2045 case NE: return CC2 | CC1 | CC3;
2046 case LTU: return CC2;
2047 case GTU: return CC1;
2048 case LEU: return CC0 | CC2;
2049 case GEU: return CC0 | CC1;
2050 default: return -1;
2051 }
2052 break;
2053
2054 case E_CCAPmode:
2055 switch (GET_CODE (code))
2056 {
2057 case EQ: return CC0;
2058 case NE: return CC1 | CC2 | CC3;
2059 case LT: return CC1 | CC3;
2060 case GT: return CC2;
2061 case LE: return CC0 | CC1 | CC3;
2062 case GE: return CC0 | CC2;
2063 default: return -1;
2064 }
2065 break;
2066
2067 case E_CCANmode:
2068 switch (GET_CODE (code))
2069 {
2070 case EQ: return CC0;
2071 case NE: return CC1 | CC2 | CC3;
2072 case LT: return CC1;
2073 case GT: return CC2 | CC3;
2074 case LE: return CC0 | CC1;
2075 case GE: return CC0 | CC2 | CC3;
2076 default: return -1;
2077 }
2078 break;
2079
2080 case E_CCSmode:
2081 switch (GET_CODE (code))
2082 {
2083 case EQ: return CC0;
2084 case NE: return CC1 | CC2 | CC3;
2085 case LT: return CC1;
2086 case GT: return CC2;
2087 case LE: return CC0 | CC1;
2088 case GE: return CC0 | CC2;
2089 case UNORDERED: return CC3;
2090 case ORDERED: return CC0 | CC1 | CC2;
2091 case UNEQ: return CC0 | CC3;
2092 case UNLT: return CC1 | CC3;
2093 case UNGT: return CC2 | CC3;
2094 case UNLE: return CC0 | CC1 | CC3;
2095 case UNGE: return CC0 | CC2 | CC3;
2096 case LTGT: return CC1 | CC2;
2097 default: return -1;
2098 }
2099 break;
2100
2101 case E_CCSRmode:
2102 switch (GET_CODE (code))
2103 {
2104 case EQ: return CC0;
2105 case NE: return CC2 | CC1 | CC3;
2106 case LT: return CC2;
2107 case GT: return CC1;
2108 case LE: return CC0 | CC2;
2109 case GE: return CC0 | CC1;
2110 case UNORDERED: return CC3;
2111 case ORDERED: return CC0 | CC2 | CC1;
2112 case UNEQ: return CC0 | CC3;
2113 case UNLT: return CC2 | CC3;
2114 case UNGT: return CC1 | CC3;
2115 case UNLE: return CC0 | CC2 | CC3;
2116 case UNGE: return CC0 | CC1 | CC3;
2117 case LTGT: return CC2 | CC1;
2118 default: return -1;
2119 }
2120 break;
2121
2122 /* Vector comparison modes. */
2123 /* CC2 will never be set. It however is part of the negated
2124 masks. */
2125 case E_CCVIALLmode:
2126 switch (GET_CODE (code))
2127 {
2128 case EQ:
2129 case GTU:
2130 case GT:
2131 case GE: return CC0;
2132 /* The inverted modes are in fact *any* modes. */
2133 case NE:
2134 case LEU:
2135 case LE:
2136 case LT: return CC3 | CC1 | CC2;
2137 default: return -1;
2138 }
2139
2140 case E_CCVIANYmode:
2141 switch (GET_CODE (code))
2142 {
2143 case EQ:
2144 case GTU:
2145 case GT:
2146 case GE: return CC0 | CC1;
2147 /* The inverted modes are in fact *all* modes. */
2148 case NE:
2149 case LEU:
2150 case LE:
2151 case LT: return CC3 | CC2;
2152 default: return -1;
2153 }
2154 case E_CCVFALLmode:
2155 switch (GET_CODE (code))
2156 {
2157 case EQ:
2158 case GT:
2159 case GE: return CC0;
2160 /* The inverted modes are in fact *any* modes. */
2161 case NE:
2162 case UNLE:
2163 case UNLT: return CC3 | CC1 | CC2;
2164 default: return -1;
2165 }
2166
2167 case E_CCVFANYmode:
2168 switch (GET_CODE (code))
2169 {
2170 case EQ:
2171 case GT:
2172 case GE: return CC0 | CC1;
2173 /* The inverted modes are in fact *all* modes. */
2174 case NE:
2175 case UNLE:
2176 case UNLT: return CC3 | CC2;
2177 default: return -1;
2178 }
2179
2180 case E_CCRAWmode:
2181 switch (GET_CODE (code))
2182 {
2183 case EQ:
2184 return INTVAL (XEXP (code, 1));
2185 case NE:
2186 return (INTVAL (XEXP (code, 1))) ^ 0xf;
2187 default:
2188 gcc_unreachable ();
2189 }
2190
2191 default:
2192 return -1;
2193 }
2194 }
2195
2196
2197 /* Return branch condition mask to implement a compare and branch
2198 specified by CODE. Return -1 for invalid comparisons. */
2199
2200 int
s390_compare_and_branch_condition_mask(rtx code)2201 s390_compare_and_branch_condition_mask (rtx code)
2202 {
2203 const int CC0 = 1 << 3;
2204 const int CC1 = 1 << 2;
2205 const int CC2 = 1 << 1;
2206
2207 switch (GET_CODE (code))
2208 {
2209 case EQ:
2210 return CC0;
2211 case NE:
2212 return CC1 | CC2;
2213 case LT:
2214 case LTU:
2215 return CC1;
2216 case GT:
2217 case GTU:
2218 return CC2;
2219 case LE:
2220 case LEU:
2221 return CC0 | CC1;
2222 case GE:
2223 case GEU:
2224 return CC0 | CC2;
2225 default:
2226 gcc_unreachable ();
2227 }
2228 return -1;
2229 }
2230
2231 /* If INV is false, return assembler mnemonic string to implement
2232 a branch specified by CODE. If INV is true, return mnemonic
2233 for the corresponding inverted branch. */
2234
2235 static const char *
s390_branch_condition_mnemonic(rtx code,int inv)2236 s390_branch_condition_mnemonic (rtx code, int inv)
2237 {
2238 int mask;
2239
2240 static const char *const mnemonic[16] =
2241 {
2242 NULL, "o", "h", "nle",
2243 "l", "nhe", "lh", "ne",
2244 "e", "nlh", "he", "nl",
2245 "le", "nh", "no", NULL
2246 };
2247
2248 if (GET_CODE (XEXP (code, 0)) == REG
2249 && REGNO (XEXP (code, 0)) == CC_REGNUM
2250 && (XEXP (code, 1) == const0_rtx
2251 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
2252 && CONST_INT_P (XEXP (code, 1)))))
2253 mask = s390_branch_condition_mask (code);
2254 else
2255 mask = s390_compare_and_branch_condition_mask (code);
2256
2257 gcc_assert (mask >= 0);
2258
2259 if (inv)
2260 mask ^= 15;
2261
2262 gcc_assert (mask >= 1 && mask <= 14);
2263
2264 return mnemonic[mask];
2265 }
2266
2267 /* Return the part of op which has a value different from def.
2268 The size of the part is determined by mode.
2269 Use this function only if you already know that op really
2270 contains such a part. */
2271
2272 unsigned HOST_WIDE_INT
s390_extract_part(rtx op,machine_mode mode,int def)2273 s390_extract_part (rtx op, machine_mode mode, int def)
2274 {
2275 unsigned HOST_WIDE_INT value = 0;
2276 int max_parts = HOST_BITS_PER_WIDE_INT / GET_MODE_BITSIZE (mode);
2277 int part_bits = GET_MODE_BITSIZE (mode);
2278 unsigned HOST_WIDE_INT part_mask = (HOST_WIDE_INT_1U << part_bits) - 1;
2279 int i;
2280
2281 for (i = 0; i < max_parts; i++)
2282 {
2283 if (i == 0)
2284 value = UINTVAL (op);
2285 else
2286 value >>= part_bits;
2287
2288 if ((value & part_mask) != (def & part_mask))
2289 return value & part_mask;
2290 }
2291
2292 gcc_unreachable ();
2293 }
2294
2295 /* If OP is an integer constant of mode MODE with exactly one
2296 part of mode PART_MODE unequal to DEF, return the number of that
2297 part. Otherwise, return -1. */
2298
2299 int
s390_single_part(rtx op,machine_mode mode,machine_mode part_mode,int def)2300 s390_single_part (rtx op,
2301 machine_mode mode,
2302 machine_mode part_mode,
2303 int def)
2304 {
2305 unsigned HOST_WIDE_INT value = 0;
2306 int n_parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (part_mode);
2307 unsigned HOST_WIDE_INT part_mask
2308 = (HOST_WIDE_INT_1U << GET_MODE_BITSIZE (part_mode)) - 1;
2309 int i, part = -1;
2310
2311 if (GET_CODE (op) != CONST_INT)
2312 return -1;
2313
2314 for (i = 0; i < n_parts; i++)
2315 {
2316 if (i == 0)
2317 value = UINTVAL (op);
2318 else
2319 value >>= GET_MODE_BITSIZE (part_mode);
2320
2321 if ((value & part_mask) != (def & part_mask))
2322 {
2323 if (part != -1)
2324 return -1;
2325 else
2326 part = i;
2327 }
2328 }
2329 return part == -1 ? -1 : n_parts - 1 - part;
2330 }
2331
2332 /* Return true if IN contains a contiguous bitfield in the lower SIZE
2333 bits and no other bits are set in (the lower SIZE bits of) IN.
2334
2335 PSTART and PEND can be used to obtain the start and end
2336 position (inclusive) of the bitfield relative to 64
2337 bits. *PSTART / *PEND gives the position of the first/last bit
2338 of the bitfield counting from the highest order bit starting
2339 with zero. */
2340
2341 bool
s390_contiguous_bitmask_nowrap_p(unsigned HOST_WIDE_INT in,int size,int * pstart,int * pend)2342 s390_contiguous_bitmask_nowrap_p (unsigned HOST_WIDE_INT in, int size,
2343 int *pstart, int *pend)
2344 {
2345 int start;
2346 int end = -1;
2347 int lowbit = HOST_BITS_PER_WIDE_INT - 1;
2348 int highbit = HOST_BITS_PER_WIDE_INT - size;
2349 unsigned HOST_WIDE_INT bitmask = HOST_WIDE_INT_1U;
2350
2351 gcc_assert (!!pstart == !!pend);
2352 for (start = lowbit; start >= highbit; bitmask <<= 1, start--)
2353 if (end == -1)
2354 {
2355 /* Look for the rightmost bit of a contiguous range of ones. */
2356 if (bitmask & in)
2357 /* Found it. */
2358 end = start;
2359 }
2360 else
2361 {
2362 /* Look for the firt zero bit after the range of ones. */
2363 if (! (bitmask & in))
2364 /* Found it. */
2365 break;
2366 }
2367 /* We're one past the last one-bit. */
2368 start++;
2369
2370 if (end == -1)
2371 /* No one bits found. */
2372 return false;
2373
2374 if (start > highbit)
2375 {
2376 unsigned HOST_WIDE_INT mask;
2377
2378 /* Calculate a mask for all bits beyond the contiguous bits. */
2379 mask = ((~HOST_WIDE_INT_0U >> highbit)
2380 & (~HOST_WIDE_INT_0U << (lowbit - start + 1)));
2381 if (mask & in)
2382 /* There are more bits set beyond the first range of one bits. */
2383 return false;
2384 }
2385
2386 if (pstart)
2387 {
2388 *pstart = start;
2389 *pend = end;
2390 }
2391
2392 return true;
2393 }
2394
2395 /* Same as s390_contiguous_bitmask_nowrap_p but also returns true
2396 if ~IN contains a contiguous bitfield. In that case, *END is <
2397 *START.
2398
2399 If WRAP_P is true, a bitmask that wraps around is also tested.
2400 When a wraparoud occurs *START is greater than *END (in
2401 non-null pointers), and the uppermost (64 - SIZE) bits are thus
2402 part of the range. If WRAP_P is false, no wraparound is
2403 tested. */
2404
2405 bool
s390_contiguous_bitmask_p(unsigned HOST_WIDE_INT in,bool wrap_p,int size,int * start,int * end)2406 s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, bool wrap_p,
2407 int size, int *start, int *end)
2408 {
2409 int bs = HOST_BITS_PER_WIDE_INT;
2410 bool b;
2411
2412 gcc_assert (!!start == !!end);
2413 if ((in & ((~HOST_WIDE_INT_0U) >> (bs - size))) == 0)
2414 /* This cannot be expressed as a contiguous bitmask. Exit early because
2415 the second call of s390_contiguous_bitmask_nowrap_p would accept this as
2416 a valid bitmask. */
2417 return false;
2418 b = s390_contiguous_bitmask_nowrap_p (in, size, start, end);
2419 if (b)
2420 return true;
2421 if (! wrap_p)
2422 return false;
2423 b = s390_contiguous_bitmask_nowrap_p (~in, size, start, end);
2424 if (b && start)
2425 {
2426 int s = *start;
2427 int e = *end;
2428
2429 gcc_assert (s >= 1);
2430 *start = ((e + 1) & (bs - 1));
2431 *end = ((s - 1 + bs) & (bs - 1));
2432 }
2433
2434 return b;
2435 }
2436
2437 /* Return true if OP contains the same contiguous bitfield in *all*
2438 its elements. START and END can be used to obtain the start and
2439 end position of the bitfield.
2440
2441 START/STOP give the position of the first/last bit of the bitfield
2442 counting from the lowest order bit starting with zero. In order to
2443 use these values for S/390 instructions this has to be converted to
2444 "bits big endian" style. */
2445
2446 bool
s390_contiguous_bitmask_vector_p(rtx op,int * start,int * end)2447 s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end)
2448 {
2449 unsigned HOST_WIDE_INT mask;
2450 int size;
2451 rtx elt;
2452 bool b;
2453
2454 gcc_assert (!!start == !!end);
2455 if (!const_vec_duplicate_p (op, &elt)
2456 || !CONST_INT_P (elt))
2457 return false;
2458
2459 size = GET_MODE_UNIT_BITSIZE (GET_MODE (op));
2460
2461 /* We cannot deal with V1TI/V1TF. This would require a vgmq. */
2462 if (size > 64)
2463 return false;
2464
2465 mask = UINTVAL (elt);
2466
2467 b = s390_contiguous_bitmask_p (mask, true, size, start, end);
2468 if (b)
2469 {
2470 if (start)
2471 {
2472 *start -= (HOST_BITS_PER_WIDE_INT - size);
2473 *end -= (HOST_BITS_PER_WIDE_INT - size);
2474 }
2475 return true;
2476 }
2477 else
2478 return false;
2479 }
2480
2481 /* Return true if C consists only of byte chunks being either 0 or
2482 0xff. If MASK is !=NULL a byte mask is generated which is
2483 appropriate for the vector generate byte mask instruction. */
2484
2485 bool
s390_bytemask_vector_p(rtx op,unsigned * mask)2486 s390_bytemask_vector_p (rtx op, unsigned *mask)
2487 {
2488 int i;
2489 unsigned tmp_mask = 0;
2490 int nunit, unit_size;
2491
2492 if (!VECTOR_MODE_P (GET_MODE (op))
2493 || GET_CODE (op) != CONST_VECTOR
2494 || !CONST_INT_P (XVECEXP (op, 0, 0)))
2495 return false;
2496
2497 nunit = GET_MODE_NUNITS (GET_MODE (op));
2498 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (op));
2499
2500 for (i = 0; i < nunit; i++)
2501 {
2502 unsigned HOST_WIDE_INT c;
2503 int j;
2504
2505 if (!CONST_INT_P (XVECEXP (op, 0, i)))
2506 return false;
2507
2508 c = UINTVAL (XVECEXP (op, 0, i));
2509 for (j = 0; j < unit_size; j++)
2510 {
2511 if ((c & 0xff) != 0 && (c & 0xff) != 0xff)
2512 return false;
2513 tmp_mask |= (c & 1) << ((nunit - 1 - i) * unit_size + j);
2514 c = c >> BITS_PER_UNIT;
2515 }
2516 }
2517
2518 if (mask != NULL)
2519 *mask = tmp_mask;
2520
2521 return true;
2522 }
2523
2524 /* Check whether a rotate of ROTL followed by an AND of CONTIG is
2525 equivalent to a shift followed by the AND. In particular, CONTIG
2526 should not overlap the (rotated) bit 0/bit 63 gap. Negative values
2527 for ROTL indicate a rotate to the right. */
2528
2529 bool
s390_extzv_shift_ok(int bitsize,int rotl,unsigned HOST_WIDE_INT contig)2530 s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig)
2531 {
2532 int start, end;
2533 bool ok;
2534
2535 ok = s390_contiguous_bitmask_nowrap_p (contig, bitsize, &start, &end);
2536 gcc_assert (ok);
2537
2538 if (rotl >= 0)
2539 return (64 - end >= rotl);
2540 else
2541 {
2542 /* Translate "- rotate right" in BITSIZE mode to "rotate left" in
2543 DIMode. */
2544 rotl = -rotl + (64 - bitsize);
2545 return (start >= rotl);
2546 }
2547 }
2548
2549 /* Check whether we can (and want to) split a double-word
2550 move in mode MODE from SRC to DST into two single-word
2551 moves, moving the subword FIRST_SUBWORD first. */
2552
2553 bool
s390_split_ok_p(rtx dst,rtx src,machine_mode mode,int first_subword)2554 s390_split_ok_p (rtx dst, rtx src, machine_mode mode, int first_subword)
2555 {
2556 /* Floating point and vector registers cannot be split. */
2557 if (FP_REG_P (src) || FP_REG_P (dst) || VECTOR_REG_P (src) || VECTOR_REG_P (dst))
2558 return false;
2559
2560 /* Non-offsettable memory references cannot be split. */
2561 if ((GET_CODE (src) == MEM && !offsettable_memref_p (src))
2562 || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst)))
2563 return false;
2564
2565 /* Moving the first subword must not clobber a register
2566 needed to move the second subword. */
2567 if (register_operand (dst, mode))
2568 {
2569 rtx subreg = operand_subword (dst, first_subword, 0, mode);
2570 if (reg_overlap_mentioned_p (subreg, src))
2571 return false;
2572 }
2573
2574 return true;
2575 }
2576
2577 /* Return true if it can be proven that [MEM1, MEM1 + SIZE]
2578 and [MEM2, MEM2 + SIZE] do overlap and false
2579 otherwise. */
2580
2581 bool
s390_overlap_p(rtx mem1,rtx mem2,HOST_WIDE_INT size)2582 s390_overlap_p (rtx mem1, rtx mem2, HOST_WIDE_INT size)
2583 {
2584 rtx addr1, addr2, addr_delta;
2585 HOST_WIDE_INT delta;
2586
2587 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2588 return true;
2589
2590 if (size == 0)
2591 return false;
2592
2593 addr1 = XEXP (mem1, 0);
2594 addr2 = XEXP (mem2, 0);
2595
2596 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2597
2598 /* This overlapping check is used by peepholes merging memory block operations.
2599 Overlapping operations would otherwise be recognized by the S/390 hardware
2600 and would fall back to a slower implementation. Allowing overlapping
2601 operations would lead to slow code but not to wrong code. Therefore we are
2602 somewhat optimistic if we cannot prove that the memory blocks are
2603 overlapping.
2604 That's why we return false here although this may accept operations on
2605 overlapping memory areas. */
2606 if (!addr_delta || GET_CODE (addr_delta) != CONST_INT)
2607 return false;
2608
2609 delta = INTVAL (addr_delta);
2610
2611 if (delta == 0
2612 || (delta > 0 && delta < size)
2613 || (delta < 0 && -delta < size))
2614 return true;
2615
2616 return false;
2617 }
2618
2619 /* Check whether the address of memory reference MEM2 equals exactly
2620 the address of memory reference MEM1 plus DELTA. Return true if
2621 we can prove this to be the case, false otherwise. */
2622
2623 bool
s390_offset_p(rtx mem1,rtx mem2,rtx delta)2624 s390_offset_p (rtx mem1, rtx mem2, rtx delta)
2625 {
2626 rtx addr1, addr2, addr_delta;
2627
2628 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2629 return false;
2630
2631 addr1 = XEXP (mem1, 0);
2632 addr2 = XEXP (mem2, 0);
2633
2634 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2635 if (!addr_delta || !rtx_equal_p (addr_delta, delta))
2636 return false;
2637
2638 return true;
2639 }
2640
2641 /* Expand logical operator CODE in mode MODE with operands OPERANDS. */
2642
2643 void
s390_expand_logical_operator(enum rtx_code code,machine_mode mode,rtx * operands)2644 s390_expand_logical_operator (enum rtx_code code, machine_mode mode,
2645 rtx *operands)
2646 {
2647 machine_mode wmode = mode;
2648 rtx dst = operands[0];
2649 rtx src1 = operands[1];
2650 rtx src2 = operands[2];
2651 rtx op, clob, tem;
2652
2653 /* If we cannot handle the operation directly, use a temp register. */
2654 if (!s390_logical_operator_ok_p (operands))
2655 dst = gen_reg_rtx (mode);
2656
2657 /* QImode and HImode patterns make sense only if we have a destination
2658 in memory. Otherwise perform the operation in SImode. */
2659 if ((mode == QImode || mode == HImode) && GET_CODE (dst) != MEM)
2660 wmode = SImode;
2661
2662 /* Widen operands if required. */
2663 if (mode != wmode)
2664 {
2665 if (GET_CODE (dst) == SUBREG
2666 && (tem = simplify_subreg (wmode, dst, mode, 0)) != 0)
2667 dst = tem;
2668 else if (REG_P (dst))
2669 dst = gen_rtx_SUBREG (wmode, dst, 0);
2670 else
2671 dst = gen_reg_rtx (wmode);
2672
2673 if (GET_CODE (src1) == SUBREG
2674 && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0)
2675 src1 = tem;
2676 else if (GET_MODE (src1) != VOIDmode)
2677 src1 = gen_rtx_SUBREG (wmode, force_reg (mode, src1), 0);
2678
2679 if (GET_CODE (src2) == SUBREG
2680 && (tem = simplify_subreg (wmode, src2, mode, 0)) != 0)
2681 src2 = tem;
2682 else if (GET_MODE (src2) != VOIDmode)
2683 src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0);
2684 }
2685
2686 /* Emit the instruction. */
2687 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, wmode, src1, src2));
2688 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
2689 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
2690
2691 /* Fix up the destination if needed. */
2692 if (dst != operands[0])
2693 emit_move_insn (operands[0], gen_lowpart (mode, dst));
2694 }
2695
2696 /* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR). */
2697
2698 bool
s390_logical_operator_ok_p(rtx * operands)2699 s390_logical_operator_ok_p (rtx *operands)
2700 {
2701 /* If the destination operand is in memory, it needs to coincide
2702 with one of the source operands. After reload, it has to be
2703 the first source operand. */
2704 if (GET_CODE (operands[0]) == MEM)
2705 return rtx_equal_p (operands[0], operands[1])
2706 || (!reload_completed && rtx_equal_p (operands[0], operands[2]));
2707
2708 return true;
2709 }
2710
2711 /* Narrow logical operation CODE of memory operand MEMOP with immediate
2712 operand IMMOP to switch from SS to SI type instructions. */
2713
2714 void
s390_narrow_logical_operator(enum rtx_code code,rtx * memop,rtx * immop)2715 s390_narrow_logical_operator (enum rtx_code code, rtx *memop, rtx *immop)
2716 {
2717 int def = code == AND ? -1 : 0;
2718 HOST_WIDE_INT mask;
2719 int part;
2720
2721 gcc_assert (GET_CODE (*memop) == MEM);
2722 gcc_assert (!MEM_VOLATILE_P (*memop));
2723
2724 mask = s390_extract_part (*immop, QImode, def);
2725 part = s390_single_part (*immop, GET_MODE (*memop), QImode, def);
2726 gcc_assert (part >= 0);
2727
2728 *memop = adjust_address (*memop, QImode, part);
2729 *immop = gen_int_mode (mask, QImode);
2730 }
2731
2732
2733 /* How to allocate a 'struct machine_function'. */
2734
2735 static struct machine_function *
s390_init_machine_status(void)2736 s390_init_machine_status (void)
2737 {
2738 return ggc_cleared_alloc<machine_function> ();
2739 }
2740
2741 /* Map for smallest class containing reg regno. */
2742
2743 const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
2744 { GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 0 */
2745 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 4 */
2746 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 8 */
2747 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 12 */
2748 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 16 */
2749 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 20 */
2750 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 24 */
2751 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 28 */
2752 ADDR_REGS, CC_REGS, ADDR_REGS, ADDR_REGS, /* 32 */
2753 ACCESS_REGS, ACCESS_REGS, VEC_REGS, VEC_REGS, /* 36 */
2754 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 40 */
2755 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 44 */
2756 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 48 */
2757 VEC_REGS, VEC_REGS /* 52 */
2758 };
2759
2760 /* Return attribute type of insn. */
2761
2762 static enum attr_type
s390_safe_attr_type(rtx_insn * insn)2763 s390_safe_attr_type (rtx_insn *insn)
2764 {
2765 if (recog_memoized (insn) >= 0)
2766 return get_attr_type (insn);
2767 else
2768 return TYPE_NONE;
2769 }
2770
2771 /* Return attribute relative_long of insn. */
2772
2773 static bool
s390_safe_relative_long_p(rtx_insn * insn)2774 s390_safe_relative_long_p (rtx_insn *insn)
2775 {
2776 if (recog_memoized (insn) >= 0)
2777 return get_attr_relative_long (insn) == RELATIVE_LONG_YES;
2778 else
2779 return false;
2780 }
2781
2782 /* Return true if DISP is a valid short displacement. */
2783
2784 static bool
s390_short_displacement(rtx disp)2785 s390_short_displacement (rtx disp)
2786 {
2787 /* No displacement is OK. */
2788 if (!disp)
2789 return true;
2790
2791 /* Without the long displacement facility we don't need to
2792 distingiush between long and short displacement. */
2793 if (!TARGET_LONG_DISPLACEMENT)
2794 return true;
2795
2796 /* Integer displacement in range. */
2797 if (GET_CODE (disp) == CONST_INT)
2798 return INTVAL (disp) >= 0 && INTVAL (disp) < 4096;
2799
2800 /* GOT offset is not OK, the GOT can be large. */
2801 if (GET_CODE (disp) == CONST
2802 && GET_CODE (XEXP (disp, 0)) == UNSPEC
2803 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOT
2804 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF))
2805 return false;
2806
2807 /* All other symbolic constants are literal pool references,
2808 which are OK as the literal pool must be small. */
2809 if (GET_CODE (disp) == CONST)
2810 return true;
2811
2812 return false;
2813 }
2814
2815 /* Attempts to split `ref', which should be UNSPEC_LTREF, into (base + `disp').
2816 If successful, also determines the
2817 following characteristics of `ref': `is_ptr' - whether it can be an
2818 LA argument, `is_base_ptr' - whether the resulting base is a well-known
2819 base register (stack/frame pointer, etc), `is_pool_ptr` - whether it is
2820 considered a literal pool pointer for purposes of avoiding two different
2821 literal pool pointers per insn during or after reload (`B' constraint). */
2822 static bool
s390_decompose_constant_pool_ref(rtx * ref,rtx * disp,bool * is_ptr,bool * is_base_ptr,bool * is_pool_ptr)2823 s390_decompose_constant_pool_ref (rtx *ref, rtx *disp, bool *is_ptr,
2824 bool *is_base_ptr, bool *is_pool_ptr)
2825 {
2826 if (!*ref)
2827 return true;
2828
2829 if (GET_CODE (*ref) == UNSPEC)
2830 switch (XINT (*ref, 1))
2831 {
2832 case UNSPEC_LTREF:
2833 if (!*disp)
2834 *disp = gen_rtx_UNSPEC (Pmode,
2835 gen_rtvec (1, XVECEXP (*ref, 0, 0)),
2836 UNSPEC_LTREL_OFFSET);
2837 else
2838 return false;
2839
2840 *ref = XVECEXP (*ref, 0, 1);
2841 break;
2842
2843 default:
2844 return false;
2845 }
2846
2847 if (!REG_P (*ref) || GET_MODE (*ref) != Pmode)
2848 return false;
2849
2850 if (REGNO (*ref) == STACK_POINTER_REGNUM
2851 || REGNO (*ref) == FRAME_POINTER_REGNUM
2852 || ((reload_completed || reload_in_progress)
2853 && frame_pointer_needed
2854 && REGNO (*ref) == HARD_FRAME_POINTER_REGNUM)
2855 || REGNO (*ref) == ARG_POINTER_REGNUM
2856 || (flag_pic
2857 && REGNO (*ref) == PIC_OFFSET_TABLE_REGNUM))
2858 *is_ptr = *is_base_ptr = true;
2859
2860 if ((reload_completed || reload_in_progress)
2861 && *ref == cfun->machine->base_reg)
2862 *is_ptr = *is_base_ptr = *is_pool_ptr = true;
2863
2864 return true;
2865 }
2866
2867 /* Decompose a RTL expression ADDR for a memory address into
2868 its components, returned in OUT.
2869
2870 Returns false if ADDR is not a valid memory address, true
2871 otherwise. If OUT is NULL, don't return the components,
2872 but check for validity only.
2873
2874 Note: Only addresses in canonical form are recognized.
2875 LEGITIMIZE_ADDRESS should convert non-canonical forms to the
2876 canonical form so that they will be recognized. */
2877
2878 static int
s390_decompose_address(rtx addr,struct s390_address * out)2879 s390_decompose_address (rtx addr, struct s390_address *out)
2880 {
2881 HOST_WIDE_INT offset = 0;
2882 rtx base = NULL_RTX;
2883 rtx indx = NULL_RTX;
2884 rtx disp = NULL_RTX;
2885 rtx orig_disp;
2886 bool pointer = false;
2887 bool base_ptr = false;
2888 bool indx_ptr = false;
2889 bool literal_pool = false;
2890
2891 /* We may need to substitute the literal pool base register into the address
2892 below. However, at this point we do not know which register is going to
2893 be used as base, so we substitute the arg pointer register. This is going
2894 to be treated as holding a pointer below -- it shouldn't be used for any
2895 other purpose. */
2896 rtx fake_pool_base = gen_rtx_REG (Pmode, ARG_POINTER_REGNUM);
2897
2898 /* Decompose address into base + index + displacement. */
2899
2900 if (GET_CODE (addr) == REG || GET_CODE (addr) == UNSPEC)
2901 base = addr;
2902
2903 else if (GET_CODE (addr) == PLUS)
2904 {
2905 rtx op0 = XEXP (addr, 0);
2906 rtx op1 = XEXP (addr, 1);
2907 enum rtx_code code0 = GET_CODE (op0);
2908 enum rtx_code code1 = GET_CODE (op1);
2909
2910 if (code0 == REG || code0 == UNSPEC)
2911 {
2912 if (code1 == REG || code1 == UNSPEC)
2913 {
2914 indx = op0; /* index + base */
2915 base = op1;
2916 }
2917
2918 else
2919 {
2920 base = op0; /* base + displacement */
2921 disp = op1;
2922 }
2923 }
2924
2925 else if (code0 == PLUS)
2926 {
2927 indx = XEXP (op0, 0); /* index + base + disp */
2928 base = XEXP (op0, 1);
2929 disp = op1;
2930 }
2931
2932 else
2933 {
2934 return false;
2935 }
2936 }
2937
2938 else
2939 disp = addr; /* displacement */
2940
2941 /* Extract integer part of displacement. */
2942 orig_disp = disp;
2943 if (disp)
2944 {
2945 if (GET_CODE (disp) == CONST_INT)
2946 {
2947 offset = INTVAL (disp);
2948 disp = NULL_RTX;
2949 }
2950 else if (GET_CODE (disp) == CONST
2951 && GET_CODE (XEXP (disp, 0)) == PLUS
2952 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
2953 {
2954 offset = INTVAL (XEXP (XEXP (disp, 0), 1));
2955 disp = XEXP (XEXP (disp, 0), 0);
2956 }
2957 }
2958
2959 /* Strip off CONST here to avoid special case tests later. */
2960 if (disp && GET_CODE (disp) == CONST)
2961 disp = XEXP (disp, 0);
2962
2963 /* We can convert literal pool addresses to
2964 displacements by basing them off the base register. */
2965 if (disp && GET_CODE (disp) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (disp))
2966 {
2967 if (base || indx)
2968 return false;
2969
2970 base = fake_pool_base, literal_pool = true;
2971
2972 /* Mark up the displacement. */
2973 disp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, disp),
2974 UNSPEC_LTREL_OFFSET);
2975 }
2976
2977 /* Validate base register. */
2978 if (!s390_decompose_constant_pool_ref (&base, &disp, &pointer, &base_ptr,
2979 &literal_pool))
2980 return false;
2981
2982 /* Validate index register. */
2983 if (!s390_decompose_constant_pool_ref (&indx, &disp, &pointer, &indx_ptr,
2984 &literal_pool))
2985 return false;
2986
2987 /* Prefer to use pointer as base, not index. */
2988 if (base && indx && !base_ptr
2989 && (indx_ptr || (!REG_POINTER (base) && REG_POINTER (indx))))
2990 {
2991 rtx tmp = base;
2992 base = indx;
2993 indx = tmp;
2994 }
2995
2996 /* Validate displacement. */
2997 if (!disp)
2998 {
2999 /* If virtual registers are involved, the displacement will change later
3000 anyway as the virtual registers get eliminated. This could make a
3001 valid displacement invalid, but it is more likely to make an invalid
3002 displacement valid, because we sometimes access the register save area
3003 via negative offsets to one of those registers.
3004 Thus we don't check the displacement for validity here. If after
3005 elimination the displacement turns out to be invalid after all,
3006 this is fixed up by reload in any case. */
3007 /* LRA maintains always displacements up to date and we need to
3008 know the displacement is right during all LRA not only at the
3009 final elimination. */
3010 if (lra_in_progress
3011 || (base != arg_pointer_rtx
3012 && indx != arg_pointer_rtx
3013 && base != return_address_pointer_rtx
3014 && indx != return_address_pointer_rtx
3015 && base != frame_pointer_rtx
3016 && indx != frame_pointer_rtx
3017 && base != virtual_stack_vars_rtx
3018 && indx != virtual_stack_vars_rtx))
3019 if (!DISP_IN_RANGE (offset))
3020 return false;
3021 }
3022 else
3023 {
3024 /* All the special cases are pointers. */
3025 pointer = true;
3026
3027 /* In the small-PIC case, the linker converts @GOT
3028 and @GOTNTPOFF offsets to possible displacements. */
3029 if (GET_CODE (disp) == UNSPEC
3030 && (XINT (disp, 1) == UNSPEC_GOT
3031 || XINT (disp, 1) == UNSPEC_GOTNTPOFF)
3032 && flag_pic == 1)
3033 {
3034 ;
3035 }
3036
3037 /* Accept pool label offsets. */
3038 else if (GET_CODE (disp) == UNSPEC
3039 && XINT (disp, 1) == UNSPEC_POOL_OFFSET)
3040 ;
3041
3042 /* Accept literal pool references. */
3043 else if (GET_CODE (disp) == UNSPEC
3044 && XINT (disp, 1) == UNSPEC_LTREL_OFFSET)
3045 {
3046 /* In case CSE pulled a non literal pool reference out of
3047 the pool we have to reject the address. This is
3048 especially important when loading the GOT pointer on non
3049 zarch CPUs. In this case the literal pool contains an lt
3050 relative offset to the _GLOBAL_OFFSET_TABLE_ label which
3051 will most likely exceed the displacement. */
3052 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
3053 || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp, 0, 0)))
3054 return false;
3055
3056 orig_disp = gen_rtx_CONST (Pmode, disp);
3057 if (offset)
3058 {
3059 /* If we have an offset, make sure it does not
3060 exceed the size of the constant pool entry.
3061 Otherwise we might generate an out-of-range
3062 displacement for the base register form. */
3063 rtx sym = XVECEXP (disp, 0, 0);
3064 if (offset >= GET_MODE_SIZE (get_pool_mode (sym)))
3065 return false;
3066
3067 orig_disp = plus_constant (Pmode, orig_disp, offset);
3068 }
3069 }
3070
3071 else
3072 return false;
3073 }
3074
3075 if (!base && !indx)
3076 pointer = true;
3077
3078 if (out)
3079 {
3080 out->base = base;
3081 out->indx = indx;
3082 out->disp = orig_disp;
3083 out->pointer = pointer;
3084 out->literal_pool = literal_pool;
3085 }
3086
3087 return true;
3088 }
3089
3090 /* Decompose a RTL expression OP for an address style operand into its
3091 components, and return the base register in BASE and the offset in
3092 OFFSET. While OP looks like an address it is never supposed to be
3093 used as such.
3094
3095 Return true if OP is a valid address operand, false if not. */
3096
3097 bool
s390_decompose_addrstyle_without_index(rtx op,rtx * base,HOST_WIDE_INT * offset)3098 s390_decompose_addrstyle_without_index (rtx op, rtx *base,
3099 HOST_WIDE_INT *offset)
3100 {
3101 rtx off = NULL_RTX;
3102
3103 /* We can have an integer constant, an address register,
3104 or a sum of the two. */
3105 if (CONST_SCALAR_INT_P (op))
3106 {
3107 off = op;
3108 op = NULL_RTX;
3109 }
3110 if (op && GET_CODE (op) == PLUS && CONST_SCALAR_INT_P (XEXP (op, 1)))
3111 {
3112 off = XEXP (op, 1);
3113 op = XEXP (op, 0);
3114 }
3115 while (op && GET_CODE (op) == SUBREG)
3116 op = SUBREG_REG (op);
3117
3118 if (op && GET_CODE (op) != REG)
3119 return false;
3120
3121 if (offset)
3122 {
3123 if (off == NULL_RTX)
3124 *offset = 0;
3125 else if (CONST_INT_P (off))
3126 *offset = INTVAL (off);
3127 else if (CONST_WIDE_INT_P (off))
3128 /* The offset will anyway be cut down to 12 bits so take just
3129 the lowest order chunk of the wide int. */
3130 *offset = CONST_WIDE_INT_ELT (off, 0);
3131 else
3132 gcc_unreachable ();
3133 }
3134 if (base)
3135 *base = op;
3136
3137 return true;
3138 }
3139
3140
3141 /* Return true if CODE is a valid address without index. */
3142
3143 bool
s390_legitimate_address_without_index_p(rtx op)3144 s390_legitimate_address_without_index_p (rtx op)
3145 {
3146 struct s390_address addr;
3147
3148 if (!s390_decompose_address (XEXP (op, 0), &addr))
3149 return false;
3150 if (addr.indx)
3151 return false;
3152
3153 return true;
3154 }
3155
3156
3157 /* Return TRUE if ADDR is an operand valid for a load/store relative
3158 instruction. Be aware that the alignment of the operand needs to
3159 be checked separately.
3160 Valid addresses are single references or a sum of a reference and a
3161 constant integer. Return these parts in SYMREF and ADDEND. You can
3162 pass NULL in REF and/or ADDEND if you are not interested in these
3163 values. */
3164
3165 static bool
s390_loadrelative_operand_p(rtx addr,rtx * symref,HOST_WIDE_INT * addend)3166 s390_loadrelative_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
3167 {
3168 HOST_WIDE_INT tmpaddend = 0;
3169
3170 if (GET_CODE (addr) == CONST)
3171 addr = XEXP (addr, 0);
3172
3173 if (GET_CODE (addr) == PLUS)
3174 {
3175 if (!CONST_INT_P (XEXP (addr, 1)))
3176 return false;
3177
3178 tmpaddend = INTVAL (XEXP (addr, 1));
3179 addr = XEXP (addr, 0);
3180 }
3181
3182 if (GET_CODE (addr) == SYMBOL_REF
3183 || (GET_CODE (addr) == UNSPEC
3184 && (XINT (addr, 1) == UNSPEC_GOTENT
3185 || XINT (addr, 1) == UNSPEC_PLT)))
3186 {
3187 if (symref)
3188 *symref = addr;
3189 if (addend)
3190 *addend = tmpaddend;
3191
3192 return true;
3193 }
3194 return false;
3195 }
3196
3197 /* Return true if the address in OP is valid for constraint letter C
3198 if wrapped in a MEM rtx. Set LIT_POOL_OK to true if it literal
3199 pool MEMs should be accepted. Only the Q, R, S, T constraint
3200 letters are allowed for C. */
3201
3202 static int
s390_check_qrst_address(char c,rtx op,bool lit_pool_ok)3203 s390_check_qrst_address (char c, rtx op, bool lit_pool_ok)
3204 {
3205 rtx symref;
3206 struct s390_address addr;
3207 bool decomposed = false;
3208
3209 if (!address_operand (op, GET_MODE (op)))
3210 return 0;
3211
3212 /* This check makes sure that no symbolic address (except literal
3213 pool references) are accepted by the R or T constraints. */
3214 if (s390_loadrelative_operand_p (op, &symref, NULL)
3215 && (!lit_pool_ok
3216 || !SYMBOL_REF_P (symref)
3217 || !CONSTANT_POOL_ADDRESS_P (symref)))
3218 return 0;
3219
3220 /* Ensure literal pool references are only accepted if LIT_POOL_OK. */
3221 if (!lit_pool_ok)
3222 {
3223 if (!s390_decompose_address (op, &addr))
3224 return 0;
3225 if (addr.literal_pool)
3226 return 0;
3227 decomposed = true;
3228 }
3229
3230 /* With reload, we sometimes get intermediate address forms that are
3231 actually invalid as-is, but we need to accept them in the most
3232 generic cases below ('R' or 'T'), since reload will in fact fix
3233 them up. LRA behaves differently here; we never see such forms,
3234 but on the other hand, we need to strictly reject every invalid
3235 address form. After both reload and LRA invalid address forms
3236 must be rejected, because nothing will fix them up later. Perform
3237 this check right up front. */
3238 if (lra_in_progress || reload_completed)
3239 {
3240 if (!decomposed && !s390_decompose_address (op, &addr))
3241 return 0;
3242 decomposed = true;
3243 }
3244
3245 switch (c)
3246 {
3247 case 'Q': /* no index short displacement */
3248 if (!decomposed && !s390_decompose_address (op, &addr))
3249 return 0;
3250 if (addr.indx)
3251 return 0;
3252 if (!s390_short_displacement (addr.disp))
3253 return 0;
3254 break;
3255
3256 case 'R': /* with index short displacement */
3257 if (TARGET_LONG_DISPLACEMENT)
3258 {
3259 if (!decomposed && !s390_decompose_address (op, &addr))
3260 return 0;
3261 if (!s390_short_displacement (addr.disp))
3262 return 0;
3263 }
3264 /* Any invalid address here will be fixed up by reload,
3265 so accept it for the most generic constraint. */
3266 break;
3267
3268 case 'S': /* no index long displacement */
3269 if (!decomposed && !s390_decompose_address (op, &addr))
3270 return 0;
3271 if (addr.indx)
3272 return 0;
3273 break;
3274
3275 case 'T': /* with index long displacement */
3276 /* Any invalid address here will be fixed up by reload,
3277 so accept it for the most generic constraint. */
3278 break;
3279
3280 default:
3281 return 0;
3282 }
3283 return 1;
3284 }
3285
3286
3287 /* Evaluates constraint strings described by the regular expression
3288 ([A|B|Z](Q|R|S|T))|Y and returns 1 if OP is a valid operand for
3289 the constraint given in STR, or 0 else. */
3290
3291 int
s390_mem_constraint(const char * str,rtx op)3292 s390_mem_constraint (const char *str, rtx op)
3293 {
3294 char c = str[0];
3295
3296 switch (c)
3297 {
3298 case 'A':
3299 /* Check for offsettable variants of memory constraints. */
3300 if (!MEM_P (op) || MEM_VOLATILE_P (op))
3301 return 0;
3302 if ((reload_completed || reload_in_progress)
3303 ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op))
3304 return 0;
3305 return s390_check_qrst_address (str[1], XEXP (op, 0), true);
3306 case 'B':
3307 /* Check for non-literal-pool variants of memory constraints. */
3308 if (!MEM_P (op))
3309 return 0;
3310 return s390_check_qrst_address (str[1], XEXP (op, 0), false);
3311 case 'Q':
3312 case 'R':
3313 case 'S':
3314 case 'T':
3315 if (GET_CODE (op) != MEM)
3316 return 0;
3317 return s390_check_qrst_address (c, XEXP (op, 0), true);
3318 case 'Y':
3319 /* Simply check for the basic form of a shift count. Reload will
3320 take care of making sure we have a proper base register. */
3321 if (!s390_decompose_addrstyle_without_index (op, NULL, NULL))
3322 return 0;
3323 break;
3324 case 'Z':
3325 return s390_check_qrst_address (str[1], op, true);
3326 default:
3327 return 0;
3328 }
3329 return 1;
3330 }
3331
3332
3333 /* Evaluates constraint strings starting with letter O. Input
3334 parameter C is the second letter following the "O" in the constraint
3335 string. Returns 1 if VALUE meets the respective constraint and 0
3336 otherwise. */
3337
3338 int
s390_O_constraint_str(const char c,HOST_WIDE_INT value)3339 s390_O_constraint_str (const char c, HOST_WIDE_INT value)
3340 {
3341 if (!TARGET_EXTIMM)
3342 return 0;
3343
3344 switch (c)
3345 {
3346 case 's':
3347 return trunc_int_for_mode (value, SImode) == value;
3348
3349 case 'p':
3350 return value == 0
3351 || s390_single_part (GEN_INT (value), DImode, SImode, 0) == 1;
3352
3353 case 'n':
3354 return s390_single_part (GEN_INT (value - 1), DImode, SImode, -1) == 1;
3355
3356 default:
3357 gcc_unreachable ();
3358 }
3359 }
3360
3361
3362 /* Evaluates constraint strings starting with letter N. Parameter STR
3363 contains the letters following letter "N" in the constraint string.
3364 Returns true if VALUE matches the constraint. */
3365
3366 int
s390_N_constraint_str(const char * str,HOST_WIDE_INT value)3367 s390_N_constraint_str (const char *str, HOST_WIDE_INT value)
3368 {
3369 machine_mode mode, part_mode;
3370 int def;
3371 int part, part_goal;
3372
3373
3374 if (str[0] == 'x')
3375 part_goal = -1;
3376 else
3377 part_goal = str[0] - '0';
3378
3379 switch (str[1])
3380 {
3381 case 'Q':
3382 part_mode = QImode;
3383 break;
3384 case 'H':
3385 part_mode = HImode;
3386 break;
3387 case 'S':
3388 part_mode = SImode;
3389 break;
3390 default:
3391 return 0;
3392 }
3393
3394 switch (str[2])
3395 {
3396 case 'H':
3397 mode = HImode;
3398 break;
3399 case 'S':
3400 mode = SImode;
3401 break;
3402 case 'D':
3403 mode = DImode;
3404 break;
3405 default:
3406 return 0;
3407 }
3408
3409 switch (str[3])
3410 {
3411 case '0':
3412 def = 0;
3413 break;
3414 case 'F':
3415 def = -1;
3416 break;
3417 default:
3418 return 0;
3419 }
3420
3421 if (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (part_mode))
3422 return 0;
3423
3424 part = s390_single_part (GEN_INT (value), mode, part_mode, def);
3425 if (part < 0)
3426 return 0;
3427 if (part_goal != -1 && part_goal != part)
3428 return 0;
3429
3430 return 1;
3431 }
3432
3433
3434 /* Returns true if the input parameter VALUE is a float zero. */
3435
3436 int
s390_float_const_zero_p(rtx value)3437 s390_float_const_zero_p (rtx value)
3438 {
3439 return (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT
3440 && value == CONST0_RTX (GET_MODE (value)));
3441 }
3442
3443 /* Implement TARGET_REGISTER_MOVE_COST. */
3444
3445 static int
s390_register_move_cost(machine_mode mode,reg_class_t from,reg_class_t to)3446 s390_register_move_cost (machine_mode mode,
3447 reg_class_t from, reg_class_t to)
3448 {
3449 /* On s390, copy between fprs and gprs is expensive. */
3450
3451 /* It becomes somewhat faster having ldgr/lgdr. */
3452 if (TARGET_Z10 && GET_MODE_SIZE (mode) == 8)
3453 {
3454 /* ldgr is single cycle. */
3455 if (reg_classes_intersect_p (from, GENERAL_REGS)
3456 && reg_classes_intersect_p (to, FP_REGS))
3457 return 1;
3458 /* lgdr needs 3 cycles. */
3459 if (reg_classes_intersect_p (to, GENERAL_REGS)
3460 && reg_classes_intersect_p (from, FP_REGS))
3461 return 3;
3462 }
3463
3464 /* Otherwise copying is done via memory. */
3465 if ((reg_classes_intersect_p (from, GENERAL_REGS)
3466 && reg_classes_intersect_p (to, FP_REGS))
3467 || (reg_classes_intersect_p (from, FP_REGS)
3468 && reg_classes_intersect_p (to, GENERAL_REGS)))
3469 return 10;
3470
3471 /* We usually do not want to copy via CC. */
3472 if (reg_classes_intersect_p (from, CC_REGS)
3473 || reg_classes_intersect_p (to, CC_REGS))
3474 return 5;
3475
3476 return 1;
3477 }
3478
3479 /* Implement TARGET_MEMORY_MOVE_COST. */
3480
3481 static int
s390_memory_move_cost(machine_mode mode ATTRIBUTE_UNUSED,reg_class_t rclass ATTRIBUTE_UNUSED,bool in ATTRIBUTE_UNUSED)3482 s390_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
3483 reg_class_t rclass ATTRIBUTE_UNUSED,
3484 bool in ATTRIBUTE_UNUSED)
3485 {
3486 return 2;
3487 }
3488
3489 /* Compute a (partial) cost for rtx X. Return true if the complete
3490 cost has been computed, and false if subexpressions should be
3491 scanned. In either case, *TOTAL contains the cost result. The
3492 initial value of *TOTAL is the default value computed by
3493 rtx_cost. It may be left unmodified. OUTER_CODE contains the
3494 code of the superexpression of x. */
3495
3496 static bool
s390_rtx_costs(rtx x,machine_mode mode,int outer_code,int opno ATTRIBUTE_UNUSED,int * total,bool speed ATTRIBUTE_UNUSED)3497 s390_rtx_costs (rtx x, machine_mode mode, int outer_code,
3498 int opno ATTRIBUTE_UNUSED,
3499 int *total, bool speed ATTRIBUTE_UNUSED)
3500 {
3501 int code = GET_CODE (x);
3502 switch (code)
3503 {
3504 case CONST:
3505 case CONST_INT:
3506 case LABEL_REF:
3507 case SYMBOL_REF:
3508 case CONST_DOUBLE:
3509 case CONST_WIDE_INT:
3510 case MEM:
3511 *total = 0;
3512 return true;
3513
3514 case SET:
3515 {
3516 /* Without this a conditional move instruction would be
3517 accounted as 3 * COSTS_N_INSNS (set, if_then_else,
3518 comparison operator). That's a bit pessimistic. */
3519
3520 if (!TARGET_Z196 || GET_CODE (SET_SRC (x)) != IF_THEN_ELSE)
3521 return false;
3522
3523 rtx cond = XEXP (SET_SRC (x), 0);
3524
3525 if (!CC_REG_P (XEXP (cond, 0)) || !CONST_INT_P (XEXP (cond, 1)))
3526 return false;
3527
3528 /* It is going to be a load/store on condition. Make it
3529 slightly more expensive than a normal load. */
3530 *total = COSTS_N_INSNS (1) + 1;
3531
3532 rtx dst = SET_DEST (x);
3533 rtx then = XEXP (SET_SRC (x), 1);
3534 rtx els = XEXP (SET_SRC (x), 2);
3535
3536 /* It is a real IF-THEN-ELSE. An additional move will be
3537 needed to implement that. */
3538 if (!TARGET_Z15
3539 && reload_completed
3540 && !rtx_equal_p (dst, then)
3541 && !rtx_equal_p (dst, els))
3542 *total += COSTS_N_INSNS (1) / 2;
3543
3544 /* A minor penalty for constants we cannot directly handle. */
3545 if ((CONST_INT_P (then) || CONST_INT_P (els))
3546 && (!TARGET_Z13 || MEM_P (dst)
3547 || (CONST_INT_P (then) && !satisfies_constraint_K (then))
3548 || (CONST_INT_P (els) && !satisfies_constraint_K (els))))
3549 *total += COSTS_N_INSNS (1) / 2;
3550
3551 /* A store on condition can only handle register src operands. */
3552 if (MEM_P (dst) && (!REG_P (then) || !REG_P (els)))
3553 *total += COSTS_N_INSNS (1) / 2;
3554
3555 return true;
3556 }
3557 case IOR:
3558
3559 /* nnrk, nngrk */
3560 if (TARGET_Z15
3561 && (mode == SImode || mode == DImode)
3562 && GET_CODE (XEXP (x, 0)) == NOT
3563 && GET_CODE (XEXP (x, 1)) == NOT)
3564 {
3565 *total = COSTS_N_INSNS (1);
3566 if (!REG_P (XEXP (XEXP (x, 0), 0)))
3567 *total += 1;
3568 if (!REG_P (XEXP (XEXP (x, 1), 0)))
3569 *total += 1;
3570 return true;
3571 }
3572
3573 /* risbg */
3574 if (GET_CODE (XEXP (x, 0)) == AND
3575 && GET_CODE (XEXP (x, 1)) == ASHIFT
3576 && REG_P (XEXP (XEXP (x, 0), 0))
3577 && REG_P (XEXP (XEXP (x, 1), 0))
3578 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3579 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3580 && (UINTVAL (XEXP (XEXP (x, 0), 1)) ==
3581 (HOST_WIDE_INT_1U << UINTVAL (XEXP (XEXP (x, 1), 1))) - 1))
3582 {
3583 *total = COSTS_N_INSNS (2);
3584 return true;
3585 }
3586
3587 /* ~AND on a 128 bit mode. This can be done using a vector
3588 instruction. */
3589 if (TARGET_VXE
3590 && GET_CODE (XEXP (x, 0)) == NOT
3591 && GET_CODE (XEXP (x, 1)) == NOT
3592 && REG_P (XEXP (XEXP (x, 0), 0))
3593 && REG_P (XEXP (XEXP (x, 1), 0))
3594 && GET_MODE_SIZE (GET_MODE (XEXP (XEXP (x, 0), 0))) == 16
3595 && s390_hard_regno_mode_ok (VR0_REGNUM,
3596 GET_MODE (XEXP (XEXP (x, 0), 0))))
3597 {
3598 *total = COSTS_N_INSNS (1);
3599 return true;
3600 }
3601
3602 *total = COSTS_N_INSNS (1);
3603 return false;
3604
3605 case AND:
3606 /* nork, nogrk */
3607 if (TARGET_Z15
3608 && (mode == SImode || mode == DImode)
3609 && GET_CODE (XEXP (x, 0)) == NOT
3610 && GET_CODE (XEXP (x, 1)) == NOT)
3611 {
3612 *total = COSTS_N_INSNS (1);
3613 if (!REG_P (XEXP (XEXP (x, 0), 0)))
3614 *total += 1;
3615 if (!REG_P (XEXP (XEXP (x, 1), 0)))
3616 *total += 1;
3617 return true;
3618 }
3619 /* fallthrough */
3620 case ASHIFT:
3621 case ASHIFTRT:
3622 case LSHIFTRT:
3623 case ROTATE:
3624 case ROTATERT:
3625 case XOR:
3626 case NEG:
3627 case NOT:
3628 case PLUS:
3629 case MINUS:
3630 *total = COSTS_N_INSNS (1);
3631 return false;
3632
3633 case MULT:
3634 switch (mode)
3635 {
3636 case E_SImode:
3637 {
3638 rtx left = XEXP (x, 0);
3639 rtx right = XEXP (x, 1);
3640 if (GET_CODE (right) == CONST_INT
3641 && CONST_OK_FOR_K (INTVAL (right)))
3642 *total = s390_cost->mhi;
3643 else if (GET_CODE (left) == SIGN_EXTEND)
3644 *total = s390_cost->mh;
3645 else
3646 *total = s390_cost->ms; /* msr, ms, msy */
3647 break;
3648 }
3649 case E_DImode:
3650 {
3651 rtx left = XEXP (x, 0);
3652 rtx right = XEXP (x, 1);
3653 if (TARGET_ZARCH)
3654 {
3655 if (GET_CODE (right) == CONST_INT
3656 && CONST_OK_FOR_K (INTVAL (right)))
3657 *total = s390_cost->mghi;
3658 else if (GET_CODE (left) == SIGN_EXTEND)
3659 *total = s390_cost->msgf;
3660 else
3661 *total = s390_cost->msg; /* msgr, msg */
3662 }
3663 else /* TARGET_31BIT */
3664 {
3665 if (GET_CODE (left) == SIGN_EXTEND
3666 && GET_CODE (right) == SIGN_EXTEND)
3667 /* mulsidi case: mr, m */
3668 *total = s390_cost->m;
3669 else if (GET_CODE (left) == ZERO_EXTEND
3670 && GET_CODE (right) == ZERO_EXTEND)
3671 /* umulsidi case: ml, mlr */
3672 *total = s390_cost->ml;
3673 else
3674 /* Complex calculation is required. */
3675 *total = COSTS_N_INSNS (40);
3676 }
3677 break;
3678 }
3679 case E_SFmode:
3680 case E_DFmode:
3681 *total = s390_cost->mult_df;
3682 break;
3683 case E_TFmode:
3684 *total = s390_cost->mxbr;
3685 break;
3686 default:
3687 return false;
3688 }
3689 return false;
3690
3691 case FMA:
3692 switch (mode)
3693 {
3694 case E_DFmode:
3695 *total = s390_cost->madbr;
3696 break;
3697 case E_SFmode:
3698 *total = s390_cost->maebr;
3699 break;
3700 default:
3701 return false;
3702 }
3703 /* Negate in the third argument is free: FMSUB. */
3704 if (GET_CODE (XEXP (x, 2)) == NEG)
3705 {
3706 *total += (rtx_cost (XEXP (x, 0), mode, FMA, 0, speed)
3707 + rtx_cost (XEXP (x, 1), mode, FMA, 1, speed)
3708 + rtx_cost (XEXP (XEXP (x, 2), 0), mode, FMA, 2, speed));
3709 return true;
3710 }
3711 return false;
3712
3713 case UDIV:
3714 case UMOD:
3715 if (mode == TImode) /* 128 bit division */
3716 *total = s390_cost->dlgr;
3717 else if (mode == DImode)
3718 {
3719 rtx right = XEXP (x, 1);
3720 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3721 *total = s390_cost->dlr;
3722 else /* 64 by 64 bit division */
3723 *total = s390_cost->dlgr;
3724 }
3725 else if (mode == SImode) /* 32 bit division */
3726 *total = s390_cost->dlr;
3727 return false;
3728
3729 case DIV:
3730 case MOD:
3731 if (mode == DImode)
3732 {
3733 rtx right = XEXP (x, 1);
3734 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3735 if (TARGET_ZARCH)
3736 *total = s390_cost->dsgfr;
3737 else
3738 *total = s390_cost->dr;
3739 else /* 64 by 64 bit division */
3740 *total = s390_cost->dsgr;
3741 }
3742 else if (mode == SImode) /* 32 bit division */
3743 *total = s390_cost->dlr;
3744 else if (mode == SFmode)
3745 {
3746 *total = s390_cost->debr;
3747 }
3748 else if (mode == DFmode)
3749 {
3750 *total = s390_cost->ddbr;
3751 }
3752 else if (mode == TFmode)
3753 {
3754 *total = s390_cost->dxbr;
3755 }
3756 return false;
3757
3758 case SQRT:
3759 if (mode == SFmode)
3760 *total = s390_cost->sqebr;
3761 else if (mode == DFmode)
3762 *total = s390_cost->sqdbr;
3763 else /* TFmode */
3764 *total = s390_cost->sqxbr;
3765 return false;
3766
3767 case SIGN_EXTEND:
3768 case ZERO_EXTEND:
3769 if (outer_code == MULT || outer_code == DIV || outer_code == MOD
3770 || outer_code == PLUS || outer_code == MINUS
3771 || outer_code == COMPARE)
3772 *total = 0;
3773 return false;
3774
3775 case COMPARE:
3776 *total = COSTS_N_INSNS (1);
3777
3778 /* nxrk, nxgrk ~(a^b)==0 */
3779 if (TARGET_Z15
3780 && GET_CODE (XEXP (x, 0)) == NOT
3781 && XEXP (x, 1) == const0_rtx
3782 && GET_CODE (XEXP (XEXP (x, 0), 0)) == XOR
3783 && (GET_MODE (XEXP (x, 0)) == SImode || GET_MODE (XEXP (x, 0)) == DImode)
3784 && mode == CCZmode)
3785 {
3786 if (!REG_P (XEXP (XEXP (XEXP (x, 0), 0), 0)))
3787 *total += 1;
3788 if (!REG_P (XEXP (XEXP (XEXP (x, 0), 0), 1)))
3789 *total += 1;
3790 return true;
3791 }
3792
3793 /* nnrk, nngrk, nork, nogrk */
3794 if (TARGET_Z15
3795 && (GET_CODE (XEXP (x, 0)) == AND || GET_CODE (XEXP (x, 0)) == IOR)
3796 && XEXP (x, 1) == const0_rtx
3797 && (GET_MODE (XEXP (x, 0)) == SImode || GET_MODE (XEXP (x, 0)) == DImode)
3798 && GET_CODE (XEXP (XEXP (x, 0), 0)) == NOT
3799 && GET_CODE (XEXP (XEXP (x, 0), 1)) == NOT
3800 && mode == CCZmode)
3801 {
3802 if (!REG_P (XEXP (XEXP (XEXP (x, 0), 0), 0)))
3803 *total += 1;
3804 if (!REG_P (XEXP (XEXP (XEXP (x, 0), 1), 0)))
3805 *total += 1;
3806 return true;
3807 }
3808
3809 if (GET_CODE (XEXP (x, 0)) == AND
3810 && GET_CODE (XEXP (x, 1)) == CONST_INT
3811 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
3812 {
3813 rtx op0 = XEXP (XEXP (x, 0), 0);
3814 rtx op1 = XEXP (XEXP (x, 0), 1);
3815 rtx op2 = XEXP (x, 1);
3816
3817 if (memory_operand (op0, GET_MODE (op0))
3818 && s390_tm_ccmode (op1, op2, 0) != VOIDmode)
3819 return true;
3820 if (register_operand (op0, GET_MODE (op0))
3821 && s390_tm_ccmode (op1, op2, 1) != VOIDmode)
3822 return true;
3823 }
3824 return false;
3825
3826 default:
3827 return false;
3828 }
3829 }
3830
3831 /* Return the cost of an address rtx ADDR. */
3832
3833 static int
s390_address_cost(rtx addr,machine_mode mode ATTRIBUTE_UNUSED,addr_space_t as ATTRIBUTE_UNUSED,bool speed ATTRIBUTE_UNUSED)3834 s390_address_cost (rtx addr, machine_mode mode ATTRIBUTE_UNUSED,
3835 addr_space_t as ATTRIBUTE_UNUSED,
3836 bool speed ATTRIBUTE_UNUSED)
3837 {
3838 struct s390_address ad;
3839 if (!s390_decompose_address (addr, &ad))
3840 return 1000;
3841
3842 return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
3843 }
3844
3845 /* Implement targetm.vectorize.builtin_vectorization_cost. */
3846 static int
s390_builtin_vectorization_cost(enum vect_cost_for_stmt type_of_cost,tree vectype,int misalign ATTRIBUTE_UNUSED)3847 s390_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
3848 tree vectype,
3849 int misalign ATTRIBUTE_UNUSED)
3850 {
3851 switch (type_of_cost)
3852 {
3853 case scalar_stmt:
3854 case scalar_load:
3855 case scalar_store:
3856 case vector_stmt:
3857 case vector_load:
3858 case vector_store:
3859 case vector_gather_load:
3860 case vector_scatter_store:
3861 case vec_to_scalar:
3862 case scalar_to_vec:
3863 case cond_branch_not_taken:
3864 case vec_perm:
3865 case vec_promote_demote:
3866 case unaligned_load:
3867 case unaligned_store:
3868 return 1;
3869
3870 case cond_branch_taken:
3871 return 3;
3872
3873 case vec_construct:
3874 return TYPE_VECTOR_SUBPARTS (vectype) - 1;
3875
3876 default:
3877 gcc_unreachable ();
3878 }
3879 }
3880
3881 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
3882 otherwise return 0. */
3883
3884 int
tls_symbolic_operand(rtx op)3885 tls_symbolic_operand (rtx op)
3886 {
3887 if (GET_CODE (op) != SYMBOL_REF)
3888 return 0;
3889 return SYMBOL_REF_TLS_MODEL (op);
3890 }
3891
3892 /* Split DImode access register reference REG (on 64-bit) into its constituent
3893 low and high parts, and store them into LO and HI. Note that gen_lowpart/
3894 gen_highpart cannot be used as they assume all registers are word-sized,
3895 while our access registers have only half that size. */
3896
3897 void
s390_split_access_reg(rtx reg,rtx * lo,rtx * hi)3898 s390_split_access_reg (rtx reg, rtx *lo, rtx *hi)
3899 {
3900 gcc_assert (TARGET_64BIT);
3901 gcc_assert (ACCESS_REG_P (reg));
3902 gcc_assert (GET_MODE (reg) == DImode);
3903 gcc_assert (!(REGNO (reg) & 1));
3904
3905 *lo = gen_rtx_REG (SImode, REGNO (reg) + 1);
3906 *hi = gen_rtx_REG (SImode, REGNO (reg));
3907 }
3908
3909 /* Return true if OP contains a symbol reference */
3910
3911 bool
symbolic_reference_mentioned_p(rtx op)3912 symbolic_reference_mentioned_p (rtx op)
3913 {
3914 const char *fmt;
3915 int i;
3916
3917 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3918 return 1;
3919
3920 fmt = GET_RTX_FORMAT (GET_CODE (op));
3921 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3922 {
3923 if (fmt[i] == 'E')
3924 {
3925 int j;
3926
3927 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3928 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3929 return 1;
3930 }
3931
3932 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3933 return 1;
3934 }
3935
3936 return 0;
3937 }
3938
3939 /* Return true if OP contains a reference to a thread-local symbol. */
3940
3941 bool
tls_symbolic_reference_mentioned_p(rtx op)3942 tls_symbolic_reference_mentioned_p (rtx op)
3943 {
3944 const char *fmt;
3945 int i;
3946
3947 if (GET_CODE (op) == SYMBOL_REF)
3948 return tls_symbolic_operand (op);
3949
3950 fmt = GET_RTX_FORMAT (GET_CODE (op));
3951 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3952 {
3953 if (fmt[i] == 'E')
3954 {
3955 int j;
3956
3957 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3958 if (tls_symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3959 return true;
3960 }
3961
3962 else if (fmt[i] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op, i)))
3963 return true;
3964 }
3965
3966 return false;
3967 }
3968
3969
3970 /* Return true if OP is a legitimate general operand when
3971 generating PIC code. It is given that flag_pic is on
3972 and that OP satisfies CONSTANT_P. */
3973
3974 int
legitimate_pic_operand_p(rtx op)3975 legitimate_pic_operand_p (rtx op)
3976 {
3977 /* Accept all non-symbolic constants. */
3978 if (!SYMBOLIC_CONST (op))
3979 return 1;
3980
3981 /* Accept addresses that can be expressed relative to (pc). */
3982 if (larl_operand (op, VOIDmode))
3983 return 1;
3984
3985 /* Reject everything else; must be handled
3986 via emit_symbolic_move. */
3987 return 0;
3988 }
3989
3990 /* Returns true if the constant value OP is a legitimate general operand.
3991 It is given that OP satisfies CONSTANT_P. */
3992
3993 static bool
s390_legitimate_constant_p(machine_mode mode,rtx op)3994 s390_legitimate_constant_p (machine_mode mode, rtx op)
3995 {
3996 if (TARGET_VX && VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR)
3997 {
3998 if (GET_MODE_SIZE (mode) != 16)
3999 return 0;
4000
4001 if (!satisfies_constraint_j00 (op)
4002 && !satisfies_constraint_jm1 (op)
4003 && !satisfies_constraint_jKK (op)
4004 && !satisfies_constraint_jxx (op)
4005 && !satisfies_constraint_jyy (op))
4006 return 0;
4007 }
4008
4009 /* Accept all non-symbolic constants. */
4010 if (!SYMBOLIC_CONST (op))
4011 return 1;
4012
4013 /* Accept immediate LARL operands. */
4014 if (larl_operand (op, mode))
4015 return 1;
4016
4017 /* Thread-local symbols are never legal constants. This is
4018 so that emit_call knows that computing such addresses
4019 might require a function call. */
4020 if (TLS_SYMBOLIC_CONST (op))
4021 return 0;
4022
4023 /* In the PIC case, symbolic constants must *not* be
4024 forced into the literal pool. We accept them here,
4025 so that they will be handled by emit_symbolic_move. */
4026 if (flag_pic)
4027 return 1;
4028
4029 /* All remaining non-PIC symbolic constants are
4030 forced into the literal pool. */
4031 return 0;
4032 }
4033
4034 /* Determine if it's legal to put X into the constant pool. This
4035 is not possible if X contains the address of a symbol that is
4036 not constant (TLS) or not known at final link time (PIC). */
4037
4038 static bool
s390_cannot_force_const_mem(machine_mode mode,rtx x)4039 s390_cannot_force_const_mem (machine_mode mode, rtx x)
4040 {
4041 switch (GET_CODE (x))
4042 {
4043 case CONST_INT:
4044 case CONST_DOUBLE:
4045 case CONST_WIDE_INT:
4046 case CONST_VECTOR:
4047 /* Accept all non-symbolic constants. */
4048 return false;
4049
4050 case LABEL_REF:
4051 /* Labels are OK iff we are non-PIC. */
4052 return flag_pic != 0;
4053
4054 case SYMBOL_REF:
4055 /* 'Naked' TLS symbol references are never OK,
4056 non-TLS symbols are OK iff we are non-PIC. */
4057 if (tls_symbolic_operand (x))
4058 return true;
4059 else
4060 return flag_pic != 0;
4061
4062 case CONST:
4063 return s390_cannot_force_const_mem (mode, XEXP (x, 0));
4064 case PLUS:
4065 case MINUS:
4066 return s390_cannot_force_const_mem (mode, XEXP (x, 0))
4067 || s390_cannot_force_const_mem (mode, XEXP (x, 1));
4068
4069 case UNSPEC:
4070 switch (XINT (x, 1))
4071 {
4072 /* Only lt-relative or GOT-relative UNSPECs are OK. */
4073 case UNSPEC_LTREL_OFFSET:
4074 case UNSPEC_GOT:
4075 case UNSPEC_GOTOFF:
4076 case UNSPEC_PLTOFF:
4077 case UNSPEC_TLSGD:
4078 case UNSPEC_TLSLDM:
4079 case UNSPEC_NTPOFF:
4080 case UNSPEC_DTPOFF:
4081 case UNSPEC_GOTNTPOFF:
4082 case UNSPEC_INDNTPOFF:
4083 return false;
4084
4085 /* If the literal pool shares the code section, be put
4086 execute template placeholders into the pool as well. */
4087 case UNSPEC_INSN:
4088 default:
4089 return true;
4090 }
4091 break;
4092
4093 default:
4094 gcc_unreachable ();
4095 }
4096 }
4097
4098 /* Returns true if the constant value OP is a legitimate general
4099 operand during and after reload. The difference to
4100 legitimate_constant_p is that this function will not accept
4101 a constant that would need to be forced to the literal pool
4102 before it can be used as operand.
4103 This function accepts all constants which can be loaded directly
4104 into a GPR. */
4105
4106 bool
legitimate_reload_constant_p(rtx op)4107 legitimate_reload_constant_p (rtx op)
4108 {
4109 /* Accept la(y) operands. */
4110 if (GET_CODE (op) == CONST_INT
4111 && DISP_IN_RANGE (INTVAL (op)))
4112 return true;
4113
4114 /* Accept l(g)hi/l(g)fi operands. */
4115 if (GET_CODE (op) == CONST_INT
4116 && (CONST_OK_FOR_K (INTVAL (op)) || CONST_OK_FOR_Os (INTVAL (op))))
4117 return true;
4118
4119 /* Accept lliXX operands. */
4120 if (TARGET_ZARCH
4121 && GET_CODE (op) == CONST_INT
4122 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
4123 && s390_single_part (op, word_mode, HImode, 0) >= 0)
4124 return true;
4125
4126 if (TARGET_EXTIMM
4127 && GET_CODE (op) == CONST_INT
4128 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
4129 && s390_single_part (op, word_mode, SImode, 0) >= 0)
4130 return true;
4131
4132 /* Accept larl operands. */
4133 if (larl_operand (op, VOIDmode))
4134 return true;
4135
4136 /* Accept floating-point zero operands that fit into a single GPR. */
4137 if (GET_CODE (op) == CONST_DOUBLE
4138 && s390_float_const_zero_p (op)
4139 && GET_MODE_SIZE (GET_MODE (op)) <= UNITS_PER_WORD)
4140 return true;
4141
4142 /* Accept double-word operands that can be split. */
4143 if (GET_CODE (op) == CONST_WIDE_INT
4144 || (GET_CODE (op) == CONST_INT
4145 && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op)))
4146 {
4147 machine_mode dword_mode = word_mode == SImode ? DImode : TImode;
4148 rtx hi = operand_subword (op, 0, 0, dword_mode);
4149 rtx lo = operand_subword (op, 1, 0, dword_mode);
4150 return legitimate_reload_constant_p (hi)
4151 && legitimate_reload_constant_p (lo);
4152 }
4153
4154 /* Everything else cannot be handled without reload. */
4155 return false;
4156 }
4157
4158 /* Returns true if the constant value OP is a legitimate fp operand
4159 during and after reload.
4160 This function accepts all constants which can be loaded directly
4161 into an FPR. */
4162
4163 static bool
legitimate_reload_fp_constant_p(rtx op)4164 legitimate_reload_fp_constant_p (rtx op)
4165 {
4166 /* Accept floating-point zero operands if the load zero instruction
4167 can be used. Prior to z196 the load fp zero instruction caused a
4168 performance penalty if the result is used as BFP number. */
4169 if (TARGET_Z196
4170 && GET_CODE (op) == CONST_DOUBLE
4171 && s390_float_const_zero_p (op))
4172 return true;
4173
4174 return false;
4175 }
4176
4177 /* Returns true if the constant value OP is a legitimate vector operand
4178 during and after reload.
4179 This function accepts all constants which can be loaded directly
4180 into an VR. */
4181
4182 static bool
legitimate_reload_vector_constant_p(rtx op)4183 legitimate_reload_vector_constant_p (rtx op)
4184 {
4185 if (TARGET_VX && GET_MODE_SIZE (GET_MODE (op)) == 16
4186 && (satisfies_constraint_j00 (op)
4187 || satisfies_constraint_jm1 (op)
4188 || satisfies_constraint_jKK (op)
4189 || satisfies_constraint_jxx (op)
4190 || satisfies_constraint_jyy (op)))
4191 return true;
4192
4193 return false;
4194 }
4195
4196 /* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
4197 return the class of reg to actually use. */
4198
4199 static reg_class_t
s390_preferred_reload_class(rtx op,reg_class_t rclass)4200 s390_preferred_reload_class (rtx op, reg_class_t rclass)
4201 {
4202 switch (GET_CODE (op))
4203 {
4204 /* Constants we cannot reload into general registers
4205 must be forced into the literal pool. */
4206 case CONST_VECTOR:
4207 case CONST_DOUBLE:
4208 case CONST_INT:
4209 case CONST_WIDE_INT:
4210 if (reg_class_subset_p (GENERAL_REGS, rclass)
4211 && legitimate_reload_constant_p (op))
4212 return GENERAL_REGS;
4213 else if (reg_class_subset_p (ADDR_REGS, rclass)
4214 && legitimate_reload_constant_p (op))
4215 return ADDR_REGS;
4216 else if (reg_class_subset_p (FP_REGS, rclass)
4217 && legitimate_reload_fp_constant_p (op))
4218 return FP_REGS;
4219 else if (reg_class_subset_p (VEC_REGS, rclass)
4220 && legitimate_reload_vector_constant_p (op))
4221 return VEC_REGS;
4222
4223 return NO_REGS;
4224
4225 /* If a symbolic constant or a PLUS is reloaded,
4226 it is most likely being used as an address, so
4227 prefer ADDR_REGS. If 'class' is not a superset
4228 of ADDR_REGS, e.g. FP_REGS, reject this reload. */
4229 case CONST:
4230 /* Symrefs cannot be pushed into the literal pool with -fPIC
4231 so we *MUST NOT* return NO_REGS for these cases
4232 (s390_cannot_force_const_mem will return true).
4233
4234 On the other hand we MUST return NO_REGS for symrefs with
4235 invalid addend which might have been pushed to the literal
4236 pool (no -fPIC). Usually we would expect them to be
4237 handled via secondary reload but this does not happen if
4238 they are used as literal pool slot replacement in reload
4239 inheritance (see emit_input_reload_insns). */
4240 if (GET_CODE (XEXP (op, 0)) == PLUS
4241 && GET_CODE (XEXP (XEXP(op, 0), 0)) == SYMBOL_REF
4242 && GET_CODE (XEXP (XEXP(op, 0), 1)) == CONST_INT)
4243 {
4244 if (flag_pic && reg_class_subset_p (ADDR_REGS, rclass))
4245 return ADDR_REGS;
4246 else
4247 return NO_REGS;
4248 }
4249 /* fallthrough */
4250 case LABEL_REF:
4251 case SYMBOL_REF:
4252 if (!legitimate_reload_constant_p (op))
4253 return NO_REGS;
4254 /* fallthrough */
4255 case PLUS:
4256 /* load address will be used. */
4257 if (reg_class_subset_p (ADDR_REGS, rclass))
4258 return ADDR_REGS;
4259 else
4260 return NO_REGS;
4261
4262 default:
4263 break;
4264 }
4265
4266 return rclass;
4267 }
4268
4269 /* Return true if ADDR is SYMBOL_REF + addend with addend being a
4270 multiple of ALIGNMENT and the SYMBOL_REF being naturally
4271 aligned. */
4272
4273 bool
s390_check_symref_alignment(rtx addr,HOST_WIDE_INT alignment)4274 s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment)
4275 {
4276 HOST_WIDE_INT addend;
4277 rtx symref;
4278
4279 /* The "required alignment" might be 0 (e.g. for certain structs
4280 accessed via BLKmode). Early abort in this case, as well as when
4281 an alignment > 8 is required. */
4282 if (alignment < 2 || alignment > 8)
4283 return false;
4284
4285 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4286 return false;
4287
4288 if (addend & (alignment - 1))
4289 return false;
4290
4291 if (GET_CODE (symref) == SYMBOL_REF)
4292 {
4293 /* s390_encode_section_info is not called for anchors, since they don't
4294 have corresponding VAR_DECLs. Therefore, we cannot rely on
4295 SYMBOL_FLAG_NOTALIGN{2,4,8}_P returning useful information. */
4296 if (SYMBOL_REF_ANCHOR_P (symref))
4297 {
4298 HOST_WIDE_INT block_offset = SYMBOL_REF_BLOCK_OFFSET (symref);
4299 unsigned int block_alignment = (SYMBOL_REF_BLOCK (symref)->alignment
4300 / BITS_PER_UNIT);
4301
4302 gcc_assert (block_offset >= 0);
4303 return ((block_offset & (alignment - 1)) == 0
4304 && block_alignment >= alignment);
4305 }
4306
4307 /* We have load-relative instructions for 2-byte, 4-byte, and
4308 8-byte alignment so allow only these. */
4309 switch (alignment)
4310 {
4311 case 8: return !SYMBOL_FLAG_NOTALIGN8_P (symref);
4312 case 4: return !SYMBOL_FLAG_NOTALIGN4_P (symref);
4313 case 2: return !SYMBOL_FLAG_NOTALIGN2_P (symref);
4314 default: return false;
4315 }
4316 }
4317
4318 if (GET_CODE (symref) == UNSPEC
4319 && alignment <= UNITS_PER_LONG)
4320 return true;
4321
4322 return false;
4323 }
4324
4325 /* ADDR is moved into REG using larl. If ADDR isn't a valid larl
4326 operand SCRATCH is used to reload the even part of the address and
4327 adding one. */
4328
4329 void
s390_reload_larl_operand(rtx reg,rtx addr,rtx scratch)4330 s390_reload_larl_operand (rtx reg, rtx addr, rtx scratch)
4331 {
4332 HOST_WIDE_INT addend;
4333 rtx symref;
4334
4335 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4336 gcc_unreachable ();
4337
4338 if (!(addend & 1))
4339 /* Easy case. The addend is even so larl will do fine. */
4340 emit_move_insn (reg, addr);
4341 else
4342 {
4343 /* We can leave the scratch register untouched if the target
4344 register is a valid base register. */
4345 if (REGNO (reg) < FIRST_PSEUDO_REGISTER
4346 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS)
4347 scratch = reg;
4348
4349 gcc_assert (REGNO (scratch) < FIRST_PSEUDO_REGISTER);
4350 gcc_assert (REGNO_REG_CLASS (REGNO (scratch)) == ADDR_REGS);
4351
4352 if (addend != 1)
4353 emit_move_insn (scratch,
4354 gen_rtx_CONST (Pmode,
4355 gen_rtx_PLUS (Pmode, symref,
4356 GEN_INT (addend - 1))));
4357 else
4358 emit_move_insn (scratch, symref);
4359
4360 /* Increment the address using la in order to avoid clobbering cc. */
4361 s390_load_address (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx));
4362 }
4363 }
4364
4365 /* Generate what is necessary to move between REG and MEM using
4366 SCRATCH. The direction is given by TOMEM. */
4367
4368 void
s390_reload_symref_address(rtx reg,rtx mem,rtx scratch,bool tomem)4369 s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem)
4370 {
4371 /* Reload might have pulled a constant out of the literal pool.
4372 Force it back in. */
4373 if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
4374 || GET_CODE (mem) == CONST_WIDE_INT
4375 || GET_CODE (mem) == CONST_VECTOR
4376 || GET_CODE (mem) == CONST)
4377 mem = force_const_mem (GET_MODE (reg), mem);
4378
4379 gcc_assert (MEM_P (mem));
4380
4381 /* For a load from memory we can leave the scratch register
4382 untouched if the target register is a valid base register. */
4383 if (!tomem
4384 && REGNO (reg) < FIRST_PSEUDO_REGISTER
4385 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS
4386 && GET_MODE (reg) == GET_MODE (scratch))
4387 scratch = reg;
4388
4389 /* Load address into scratch register. Since we can't have a
4390 secondary reload for a secondary reload we have to cover the case
4391 where larl would need a secondary reload here as well. */
4392 s390_reload_larl_operand (scratch, XEXP (mem, 0), scratch);
4393
4394 /* Now we can use a standard load/store to do the move. */
4395 if (tomem)
4396 emit_move_insn (replace_equiv_address (mem, scratch), reg);
4397 else
4398 emit_move_insn (reg, replace_equiv_address (mem, scratch));
4399 }
4400
4401 /* Inform reload about cases where moving X with a mode MODE to a register in
4402 RCLASS requires an extra scratch or immediate register. Return the class
4403 needed for the immediate register. */
4404
4405 static reg_class_t
s390_secondary_reload(bool in_p,rtx x,reg_class_t rclass_i,machine_mode mode,secondary_reload_info * sri)4406 s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
4407 machine_mode mode, secondary_reload_info *sri)
4408 {
4409 enum reg_class rclass = (enum reg_class) rclass_i;
4410
4411 /* Intermediate register needed. */
4412 if (reg_classes_intersect_p (CC_REGS, rclass))
4413 return GENERAL_REGS;
4414
4415 if (TARGET_VX)
4416 {
4417 /* The vst/vl vector move instructions allow only for short
4418 displacements. */
4419 if (MEM_P (x)
4420 && GET_CODE (XEXP (x, 0)) == PLUS
4421 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4422 && !SHORT_DISP_IN_RANGE(INTVAL (XEXP (XEXP (x, 0), 1)))
4423 && reg_class_subset_p (rclass, VEC_REGS)
4424 && (!reg_class_subset_p (rclass, FP_REGS)
4425 || (GET_MODE_SIZE (mode) > 8
4426 && s390_class_max_nregs (FP_REGS, mode) == 1)))
4427 {
4428 if (in_p)
4429 sri->icode = (TARGET_64BIT ?
4430 CODE_FOR_reloaddi_la_in :
4431 CODE_FOR_reloadsi_la_in);
4432 else
4433 sri->icode = (TARGET_64BIT ?
4434 CODE_FOR_reloaddi_la_out :
4435 CODE_FOR_reloadsi_la_out);
4436 }
4437 }
4438
4439 if (TARGET_Z10)
4440 {
4441 HOST_WIDE_INT offset;
4442 rtx symref;
4443
4444 /* On z10 several optimizer steps may generate larl operands with
4445 an odd addend. */
4446 if (in_p
4447 && s390_loadrelative_operand_p (x, &symref, &offset)
4448 && mode == Pmode
4449 && !SYMBOL_FLAG_NOTALIGN2_P (symref)
4450 && (offset & 1) == 1)
4451 sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10
4452 : CODE_FOR_reloadsi_larl_odd_addend_z10);
4453
4454 /* Handle all the (mem (symref)) accesses we cannot use the z10
4455 instructions for. */
4456 if (MEM_P (x)
4457 && s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL)
4458 && (mode == QImode
4459 || !reg_class_subset_p (rclass, GENERAL_REGS)
4460 || GET_MODE_SIZE (mode) > UNITS_PER_WORD
4461 || !s390_check_symref_alignment (XEXP (x, 0),
4462 GET_MODE_SIZE (mode))))
4463 {
4464 #define __SECONDARY_RELOAD_CASE(M,m) \
4465 case E_##M##mode: \
4466 if (TARGET_64BIT) \
4467 sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 : \
4468 CODE_FOR_reload##m##di_tomem_z10; \
4469 else \
4470 sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 : \
4471 CODE_FOR_reload##m##si_tomem_z10; \
4472 break;
4473
4474 switch (GET_MODE (x))
4475 {
4476 __SECONDARY_RELOAD_CASE (QI, qi);
4477 __SECONDARY_RELOAD_CASE (HI, hi);
4478 __SECONDARY_RELOAD_CASE (SI, si);
4479 __SECONDARY_RELOAD_CASE (DI, di);
4480 __SECONDARY_RELOAD_CASE (TI, ti);
4481 __SECONDARY_RELOAD_CASE (SF, sf);
4482 __SECONDARY_RELOAD_CASE (DF, df);
4483 __SECONDARY_RELOAD_CASE (TF, tf);
4484 __SECONDARY_RELOAD_CASE (SD, sd);
4485 __SECONDARY_RELOAD_CASE (DD, dd);
4486 __SECONDARY_RELOAD_CASE (TD, td);
4487 __SECONDARY_RELOAD_CASE (V1QI, v1qi);
4488 __SECONDARY_RELOAD_CASE (V2QI, v2qi);
4489 __SECONDARY_RELOAD_CASE (V4QI, v4qi);
4490 __SECONDARY_RELOAD_CASE (V8QI, v8qi);
4491 __SECONDARY_RELOAD_CASE (V16QI, v16qi);
4492 __SECONDARY_RELOAD_CASE (V1HI, v1hi);
4493 __SECONDARY_RELOAD_CASE (V2HI, v2hi);
4494 __SECONDARY_RELOAD_CASE (V4HI, v4hi);
4495 __SECONDARY_RELOAD_CASE (V8HI, v8hi);
4496 __SECONDARY_RELOAD_CASE (V1SI, v1si);
4497 __SECONDARY_RELOAD_CASE (V2SI, v2si);
4498 __SECONDARY_RELOAD_CASE (V4SI, v4si);
4499 __SECONDARY_RELOAD_CASE (V1DI, v1di);
4500 __SECONDARY_RELOAD_CASE (V2DI, v2di);
4501 __SECONDARY_RELOAD_CASE (V1TI, v1ti);
4502 __SECONDARY_RELOAD_CASE (V1SF, v1sf);
4503 __SECONDARY_RELOAD_CASE (V2SF, v2sf);
4504 __SECONDARY_RELOAD_CASE (V4SF, v4sf);
4505 __SECONDARY_RELOAD_CASE (V1DF, v1df);
4506 __SECONDARY_RELOAD_CASE (V2DF, v2df);
4507 __SECONDARY_RELOAD_CASE (V1TF, v1tf);
4508 default:
4509 gcc_unreachable ();
4510 }
4511 #undef __SECONDARY_RELOAD_CASE
4512 }
4513 }
4514
4515 /* We need a scratch register when loading a PLUS expression which
4516 is not a legitimate operand of the LOAD ADDRESS instruction. */
4517 /* LRA can deal with transformation of plus op very well -- so we
4518 don't need to prompt LRA in this case. */
4519 if (! lra_in_progress && in_p && s390_plus_operand (x, mode))
4520 sri->icode = (TARGET_64BIT ?
4521 CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
4522
4523 /* Performing a multiword move from or to memory we have to make sure the
4524 second chunk in memory is addressable without causing a displacement
4525 overflow. If that would be the case we calculate the address in
4526 a scratch register. */
4527 if (MEM_P (x)
4528 && GET_CODE (XEXP (x, 0)) == PLUS
4529 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4530 && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1))
4531 + GET_MODE_SIZE (mode) - 1))
4532 {
4533 /* For GENERAL_REGS a displacement overflow is no problem if occurring
4534 in a s_operand address since we may fallback to lm/stm. So we only
4535 have to care about overflows in the b+i+d case. */
4536 if ((reg_classes_intersect_p (GENERAL_REGS, rclass)
4537 && s390_class_max_nregs (GENERAL_REGS, mode) > 1
4538 && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
4539 /* For FP_REGS no lm/stm is available so this check is triggered
4540 for displacement overflows in b+i+d and b+d like addresses. */
4541 || (reg_classes_intersect_p (FP_REGS, rclass)
4542 && s390_class_max_nregs (FP_REGS, mode) > 1))
4543 {
4544 if (in_p)
4545 sri->icode = (TARGET_64BIT ?
4546 CODE_FOR_reloaddi_la_in :
4547 CODE_FOR_reloadsi_la_in);
4548 else
4549 sri->icode = (TARGET_64BIT ?
4550 CODE_FOR_reloaddi_la_out :
4551 CODE_FOR_reloadsi_la_out);
4552 }
4553 }
4554
4555 /* A scratch address register is needed when a symbolic constant is
4556 copied to r0 compiling with -fPIC. In other cases the target
4557 register might be used as temporary (see legitimize_pic_address). */
4558 if (in_p && SYMBOLIC_CONST (x) && flag_pic == 2 && rclass != ADDR_REGS)
4559 sri->icode = (TARGET_64BIT ?
4560 CODE_FOR_reloaddi_PIC_addr :
4561 CODE_FOR_reloadsi_PIC_addr);
4562
4563 /* Either scratch or no register needed. */
4564 return NO_REGS;
4565 }
4566
4567 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
4568
4569 We need secondary memory to move data between GPRs and FPRs.
4570
4571 - With DFP the ldgr lgdr instructions are available. Due to the
4572 different alignment we cannot use them for SFmode. For 31 bit a
4573 64 bit value in GPR would be a register pair so here we still
4574 need to go via memory.
4575
4576 - With z13 we can do the SF/SImode moves with vlgvf. Due to the
4577 overlapping of FPRs and VRs we still disallow TF/TD modes to be
4578 in full VRs so as before also on z13 we do these moves via
4579 memory.
4580
4581 FIXME: Should we try splitting it into two vlgvg's/vlvg's instead? */
4582
4583 static bool
s390_secondary_memory_needed(machine_mode mode,reg_class_t class1,reg_class_t class2)4584 s390_secondary_memory_needed (machine_mode mode,
4585 reg_class_t class1, reg_class_t class2)
4586 {
4587 return (((reg_classes_intersect_p (class1, VEC_REGS)
4588 && reg_classes_intersect_p (class2, GENERAL_REGS))
4589 || (reg_classes_intersect_p (class1, GENERAL_REGS)
4590 && reg_classes_intersect_p (class2, VEC_REGS)))
4591 && (!TARGET_DFP || !TARGET_64BIT || GET_MODE_SIZE (mode) != 8)
4592 && (!TARGET_VX || (SCALAR_FLOAT_MODE_P (mode)
4593 && GET_MODE_SIZE (mode) > 8)));
4594 }
4595
4596 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
4597
4598 get_secondary_mem widens its argument to BITS_PER_WORD which loses on 64bit
4599 because the movsi and movsf patterns don't handle r/f moves. */
4600
4601 static machine_mode
s390_secondary_memory_needed_mode(machine_mode mode)4602 s390_secondary_memory_needed_mode (machine_mode mode)
4603 {
4604 if (GET_MODE_BITSIZE (mode) < 32)
4605 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
4606 return mode;
4607 }
4608
4609 /* Generate code to load SRC, which is PLUS that is not a
4610 legitimate operand for the LA instruction, into TARGET.
4611 SCRATCH may be used as scratch register. */
4612
4613 void
s390_expand_plus_operand(rtx target,rtx src,rtx scratch)4614 s390_expand_plus_operand (rtx target, rtx src,
4615 rtx scratch)
4616 {
4617 rtx sum1, sum2;
4618 struct s390_address ad;
4619
4620 /* src must be a PLUS; get its two operands. */
4621 gcc_assert (GET_CODE (src) == PLUS);
4622 gcc_assert (GET_MODE (src) == Pmode);
4623
4624 /* Check if any of the two operands is already scheduled
4625 for replacement by reload. This can happen e.g. when
4626 float registers occur in an address. */
4627 sum1 = find_replacement (&XEXP (src, 0));
4628 sum2 = find_replacement (&XEXP (src, 1));
4629 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4630
4631 /* If the address is already strictly valid, there's nothing to do. */
4632 if (!s390_decompose_address (src, &ad)
4633 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4634 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
4635 {
4636 /* Otherwise, one of the operands cannot be an address register;
4637 we reload its value into the scratch register. */
4638 if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15)
4639 {
4640 emit_move_insn (scratch, sum1);
4641 sum1 = scratch;
4642 }
4643 if (true_regnum (sum2) < 1 || true_regnum (sum2) > 15)
4644 {
4645 emit_move_insn (scratch, sum2);
4646 sum2 = scratch;
4647 }
4648
4649 /* According to the way these invalid addresses are generated
4650 in reload.c, it should never happen (at least on s390) that
4651 *neither* of the PLUS components, after find_replacements
4652 was applied, is an address register. */
4653 if (sum1 == scratch && sum2 == scratch)
4654 {
4655 debug_rtx (src);
4656 gcc_unreachable ();
4657 }
4658
4659 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4660 }
4661
4662 /* Emit the LOAD ADDRESS pattern. Note that reload of PLUS
4663 is only ever performed on addresses, so we can mark the
4664 sum as legitimate for LA in any case. */
4665 s390_load_address (target, src);
4666 }
4667
4668
4669 /* Return true if ADDR is a valid memory address.
4670 STRICT specifies whether strict register checking applies. */
4671
4672 static bool
s390_legitimate_address_p(machine_mode mode,rtx addr,bool strict)4673 s390_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4674 {
4675 struct s390_address ad;
4676
4677 if (TARGET_Z10
4678 && larl_operand (addr, VOIDmode)
4679 && (mode == VOIDmode
4680 || s390_check_symref_alignment (addr, GET_MODE_SIZE (mode))))
4681 return true;
4682
4683 if (!s390_decompose_address (addr, &ad))
4684 return false;
4685
4686 /* The vector memory instructions only support short displacements.
4687 Reject invalid displacements early to prevent plenty of lay
4688 instructions to be generated later which then cannot be merged
4689 properly. */
4690 if (TARGET_VX
4691 && VECTOR_MODE_P (mode)
4692 && ad.disp != NULL_RTX
4693 && CONST_INT_P (ad.disp)
4694 && !SHORT_DISP_IN_RANGE (INTVAL (ad.disp)))
4695 return false;
4696
4697 if (strict)
4698 {
4699 if (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4700 return false;
4701
4702 if (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))
4703 return false;
4704 }
4705 else
4706 {
4707 if (ad.base
4708 && !(REGNO (ad.base) >= FIRST_PSEUDO_REGISTER
4709 || REGNO_REG_CLASS (REGNO (ad.base)) == ADDR_REGS))
4710 return false;
4711
4712 if (ad.indx
4713 && !(REGNO (ad.indx) >= FIRST_PSEUDO_REGISTER
4714 || REGNO_REG_CLASS (REGNO (ad.indx)) == ADDR_REGS))
4715 return false;
4716 }
4717 return true;
4718 }
4719
4720 /* Return true if OP is a valid operand for the LA instruction.
4721 In 31-bit, we need to prove that the result is used as an
4722 address, as LA performs only a 31-bit addition. */
4723
4724 bool
legitimate_la_operand_p(rtx op)4725 legitimate_la_operand_p (rtx op)
4726 {
4727 struct s390_address addr;
4728 if (!s390_decompose_address (op, &addr))
4729 return false;
4730
4731 return (TARGET_64BIT || addr.pointer);
4732 }
4733
4734 /* Return true if it is valid *and* preferable to use LA to
4735 compute the sum of OP1 and OP2. */
4736
4737 bool
preferred_la_operand_p(rtx op1,rtx op2)4738 preferred_la_operand_p (rtx op1, rtx op2)
4739 {
4740 struct s390_address addr;
4741
4742 if (op2 != const0_rtx)
4743 op1 = gen_rtx_PLUS (Pmode, op1, op2);
4744
4745 if (!s390_decompose_address (op1, &addr))
4746 return false;
4747 if (addr.base && !REGNO_OK_FOR_BASE_P (REGNO (addr.base)))
4748 return false;
4749 if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx)))
4750 return false;
4751
4752 /* Avoid LA instructions with index (and base) register on z196 or
4753 later; it is preferable to use regular add instructions when
4754 possible. Starting with zEC12 the la with index register is
4755 "uncracked" again but still slower than a regular add. */
4756 if (addr.indx && s390_tune >= PROCESSOR_2817_Z196)
4757 return false;
4758
4759 if (!TARGET_64BIT && !addr.pointer)
4760 return false;
4761
4762 if (addr.pointer)
4763 return true;
4764
4765 if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base))
4766 || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx)))
4767 return true;
4768
4769 return false;
4770 }
4771
4772 /* Emit a forced load-address operation to load SRC into DST.
4773 This will use the LOAD ADDRESS instruction even in situations
4774 where legitimate_la_operand_p (SRC) returns false. */
4775
4776 void
s390_load_address(rtx dst,rtx src)4777 s390_load_address (rtx dst, rtx src)
4778 {
4779 if (TARGET_64BIT)
4780 emit_move_insn (dst, src);
4781 else
4782 emit_insn (gen_force_la_31 (dst, src));
4783 }
4784
4785 /* Return true if it ok to use SYMBOL_REF in a relative address. */
4786
4787 bool
s390_rel_address_ok_p(rtx symbol_ref)4788 s390_rel_address_ok_p (rtx symbol_ref)
4789 {
4790 tree decl;
4791
4792 if (symbol_ref == s390_got_symbol () || CONSTANT_POOL_ADDRESS_P (symbol_ref))
4793 return true;
4794
4795 decl = SYMBOL_REF_DECL (symbol_ref);
4796
4797 if (!flag_pic || SYMBOL_REF_LOCAL_P (symbol_ref))
4798 return (s390_pic_data_is_text_relative
4799 || (decl
4800 && TREE_CODE (decl) == FUNCTION_DECL));
4801
4802 return false;
4803 }
4804
4805 /* Return a legitimate reference for ORIG (an address) using the
4806 register REG. If REG is 0, a new pseudo is generated.
4807
4808 There are two types of references that must be handled:
4809
4810 1. Global data references must load the address from the GOT, via
4811 the PIC reg. An insn is emitted to do this load, and the reg is
4812 returned.
4813
4814 2. Static data references, constant pool addresses, and code labels
4815 compute the address as an offset from the GOT, whose base is in
4816 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
4817 differentiate them from global data objects. The returned
4818 address is the PIC reg + an unspec constant.
4819
4820 TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
4821 reg also appears in the address. */
4822
4823 rtx
legitimize_pic_address(rtx orig,rtx reg)4824 legitimize_pic_address (rtx orig, rtx reg)
4825 {
4826 rtx addr = orig;
4827 rtx addend = const0_rtx;
4828 rtx new_rtx = orig;
4829
4830 gcc_assert (!TLS_SYMBOLIC_CONST (addr));
4831
4832 if (GET_CODE (addr) == CONST)
4833 addr = XEXP (addr, 0);
4834
4835 if (GET_CODE (addr) == PLUS)
4836 {
4837 addend = XEXP (addr, 1);
4838 addr = XEXP (addr, 0);
4839 }
4840
4841 if ((GET_CODE (addr) == LABEL_REF
4842 || (SYMBOL_REF_P (addr) && s390_rel_address_ok_p (addr))
4843 || (GET_CODE (addr) == UNSPEC &&
4844 (XINT (addr, 1) == UNSPEC_GOTENT
4845 || XINT (addr, 1) == UNSPEC_PLT)))
4846 && GET_CODE (addend) == CONST_INT)
4847 {
4848 /* This can be locally addressed. */
4849
4850 /* larl_operand requires UNSPECs to be wrapped in a const rtx. */
4851 rtx const_addr = (GET_CODE (addr) == UNSPEC ?
4852 gen_rtx_CONST (Pmode, addr) : addr);
4853
4854 if (larl_operand (const_addr, VOIDmode)
4855 && INTVAL (addend) < HOST_WIDE_INT_1 << 31
4856 && INTVAL (addend) >= -(HOST_WIDE_INT_1 << 31))
4857 {
4858 if (INTVAL (addend) & 1)
4859 {
4860 /* LARL can't handle odd offsets, so emit a pair of LARL
4861 and LA. */
4862 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4863
4864 if (!DISP_IN_RANGE (INTVAL (addend)))
4865 {
4866 HOST_WIDE_INT even = INTVAL (addend) - 1;
4867 addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (even));
4868 addr = gen_rtx_CONST (Pmode, addr);
4869 addend = const1_rtx;
4870 }
4871
4872 emit_move_insn (temp, addr);
4873 new_rtx = gen_rtx_PLUS (Pmode, temp, addend);
4874
4875 if (reg != 0)
4876 {
4877 s390_load_address (reg, new_rtx);
4878 new_rtx = reg;
4879 }
4880 }
4881 else
4882 {
4883 /* If the offset is even, we can just use LARL. This
4884 will happen automatically. */
4885 }
4886 }
4887 else
4888 {
4889 /* No larl - Access local symbols relative to the GOT. */
4890
4891 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4892
4893 if (reload_in_progress || reload_completed)
4894 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4895
4896 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
4897 if (addend != const0_rtx)
4898 addr = gen_rtx_PLUS (Pmode, addr, addend);
4899 addr = gen_rtx_CONST (Pmode, addr);
4900 addr = force_const_mem (Pmode, addr);
4901 emit_move_insn (temp, addr);
4902
4903 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4904 if (reg != 0)
4905 {
4906 s390_load_address (reg, new_rtx);
4907 new_rtx = reg;
4908 }
4909 }
4910 }
4911 else if (GET_CODE (addr) == SYMBOL_REF && addend == const0_rtx)
4912 {
4913 /* A non-local symbol reference without addend.
4914
4915 The symbol ref is wrapped into an UNSPEC to make sure the
4916 proper operand modifier (@GOT or @GOTENT) will be emitted.
4917 This will tell the linker to put the symbol into the GOT.
4918
4919 Additionally the code dereferencing the GOT slot is emitted here.
4920
4921 An addend to the symref needs to be added afterwards.
4922 legitimize_pic_address calls itself recursively to handle
4923 that case. So no need to do it here. */
4924
4925 if (reg == 0)
4926 reg = gen_reg_rtx (Pmode);
4927
4928 if (TARGET_Z10)
4929 {
4930 /* Use load relative if possible.
4931 lgrl <target>, sym@GOTENT */
4932 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4933 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4934 new_rtx = gen_const_mem (GET_MODE (reg), new_rtx);
4935
4936 emit_move_insn (reg, new_rtx);
4937 new_rtx = reg;
4938 }
4939 else if (flag_pic == 1)
4940 {
4941 /* Assume GOT offset is a valid displacement operand (< 4k
4942 or < 512k with z990). This is handled the same way in
4943 both 31- and 64-bit code (@GOT).
4944 lg <target>, sym@GOT(r12) */
4945
4946 if (reload_in_progress || reload_completed)
4947 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4948
4949 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
4950 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4951 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
4952 new_rtx = gen_const_mem (Pmode, new_rtx);
4953 emit_move_insn (reg, new_rtx);
4954 new_rtx = reg;
4955 }
4956 else
4957 {
4958 /* If the GOT offset might be >= 4k, we determine the position
4959 of the GOT entry via a PC-relative LARL (@GOTENT).
4960 larl temp, sym@GOTENT
4961 lg <target>, 0(temp) */
4962
4963 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
4964
4965 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
4966 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
4967
4968 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4969 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4970 emit_move_insn (temp, new_rtx);
4971 new_rtx = gen_const_mem (Pmode, temp);
4972 emit_move_insn (reg, new_rtx);
4973
4974 new_rtx = reg;
4975 }
4976 }
4977 else if (GET_CODE (addr) == UNSPEC && GET_CODE (addend) == CONST_INT)
4978 {
4979 gcc_assert (XVECLEN (addr, 0) == 1);
4980 switch (XINT (addr, 1))
4981 {
4982 /* These address symbols (or PLT slots) relative to the GOT
4983 (not GOT slots!). In general this will exceed the
4984 displacement range so these value belong into the literal
4985 pool. */
4986 case UNSPEC_GOTOFF:
4987 case UNSPEC_PLTOFF:
4988 new_rtx = force_const_mem (Pmode, orig);
4989 break;
4990
4991 /* For -fPIC the GOT size might exceed the displacement
4992 range so make sure the value is in the literal pool. */
4993 case UNSPEC_GOT:
4994 if (flag_pic == 2)
4995 new_rtx = force_const_mem (Pmode, orig);
4996 break;
4997
4998 /* For @GOTENT larl is used. This is handled like local
4999 symbol refs. */
5000 case UNSPEC_GOTENT:
5001 gcc_unreachable ();
5002 break;
5003
5004 /* For @PLT larl is used. This is handled like local
5005 symbol refs. */
5006 case UNSPEC_PLT:
5007 gcc_unreachable ();
5008 break;
5009
5010 /* Everything else cannot happen. */
5011 default:
5012 gcc_unreachable ();
5013 }
5014 }
5015 else if (addend != const0_rtx)
5016 {
5017 /* Otherwise, compute the sum. */
5018
5019 rtx base = legitimize_pic_address (addr, reg);
5020 new_rtx = legitimize_pic_address (addend,
5021 base == reg ? NULL_RTX : reg);
5022 if (GET_CODE (new_rtx) == CONST_INT)
5023 new_rtx = plus_constant (Pmode, base, INTVAL (new_rtx));
5024 else
5025 {
5026 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
5027 {
5028 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
5029 new_rtx = XEXP (new_rtx, 1);
5030 }
5031 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
5032 }
5033
5034 if (GET_CODE (new_rtx) == CONST)
5035 new_rtx = XEXP (new_rtx, 0);
5036 new_rtx = force_operand (new_rtx, 0);
5037 }
5038
5039 return new_rtx;
5040 }
5041
5042 /* Load the thread pointer into a register. */
5043
5044 rtx
s390_get_thread_pointer(void)5045 s390_get_thread_pointer (void)
5046 {
5047 rtx tp = gen_reg_rtx (Pmode);
5048
5049 emit_move_insn (tp, gen_rtx_REG (Pmode, TP_REGNUM));
5050 mark_reg_pointer (tp, BITS_PER_WORD);
5051
5052 return tp;
5053 }
5054
5055 /* Emit a tls call insn. The call target is the SYMBOL_REF stored
5056 in s390_tls_symbol which always refers to __tls_get_offset.
5057 The returned offset is written to RESULT_REG and an USE rtx is
5058 generated for TLS_CALL. */
5059
5060 static GTY(()) rtx s390_tls_symbol;
5061
5062 static void
s390_emit_tls_call_insn(rtx result_reg,rtx tls_call)5063 s390_emit_tls_call_insn (rtx result_reg, rtx tls_call)
5064 {
5065 rtx insn;
5066
5067 if (!flag_pic)
5068 emit_insn (s390_load_got ());
5069
5070 if (!s390_tls_symbol)
5071 s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset");
5072
5073 insn = s390_emit_call (s390_tls_symbol, tls_call, result_reg,
5074 gen_rtx_REG (Pmode, RETURN_REGNUM));
5075
5076 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), result_reg);
5077 RTL_CONST_CALL_P (insn) = 1;
5078 }
5079
5080 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
5081 this (thread-local) address. REG may be used as temporary. */
5082
5083 static rtx
legitimize_tls_address(rtx addr,rtx reg)5084 legitimize_tls_address (rtx addr, rtx reg)
5085 {
5086 rtx new_rtx, tls_call, temp, base, r2;
5087 rtx_insn *insn;
5088
5089 if (GET_CODE (addr) == SYMBOL_REF)
5090 switch (tls_symbolic_operand (addr))
5091 {
5092 case TLS_MODEL_GLOBAL_DYNAMIC:
5093 start_sequence ();
5094 r2 = gen_rtx_REG (Pmode, 2);
5095 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD);
5096 new_rtx = gen_rtx_CONST (Pmode, tls_call);
5097 new_rtx = force_const_mem (Pmode, new_rtx);
5098 emit_move_insn (r2, new_rtx);
5099 s390_emit_tls_call_insn (r2, tls_call);
5100 insn = get_insns ();
5101 end_sequence ();
5102
5103 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
5104 temp = gen_reg_rtx (Pmode);
5105 emit_libcall_block (insn, temp, r2, new_rtx);
5106
5107 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5108 if (reg != 0)
5109 {
5110 s390_load_address (reg, new_rtx);
5111 new_rtx = reg;
5112 }
5113 break;
5114
5115 case TLS_MODEL_LOCAL_DYNAMIC:
5116 start_sequence ();
5117 r2 = gen_rtx_REG (Pmode, 2);
5118 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM);
5119 new_rtx = gen_rtx_CONST (Pmode, tls_call);
5120 new_rtx = force_const_mem (Pmode, new_rtx);
5121 emit_move_insn (r2, new_rtx);
5122 s390_emit_tls_call_insn (r2, tls_call);
5123 insn = get_insns ();
5124 end_sequence ();
5125
5126 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
5127 temp = gen_reg_rtx (Pmode);
5128 emit_libcall_block (insn, temp, r2, new_rtx);
5129
5130 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5131 base = gen_reg_rtx (Pmode);
5132 s390_load_address (base, new_rtx);
5133
5134 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF);
5135 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5136 new_rtx = force_const_mem (Pmode, new_rtx);
5137 temp = gen_reg_rtx (Pmode);
5138 emit_move_insn (temp, new_rtx);
5139
5140 new_rtx = gen_rtx_PLUS (Pmode, base, temp);
5141 if (reg != 0)
5142 {
5143 s390_load_address (reg, new_rtx);
5144 new_rtx = reg;
5145 }
5146 break;
5147
5148 case TLS_MODEL_INITIAL_EXEC:
5149 if (flag_pic == 1)
5150 {
5151 /* Assume GOT offset < 4k. This is handled the same way
5152 in both 31- and 64-bit code. */
5153
5154 if (reload_in_progress || reload_completed)
5155 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
5156
5157 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
5158 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5159 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
5160 new_rtx = gen_const_mem (Pmode, new_rtx);
5161 temp = gen_reg_rtx (Pmode);
5162 emit_move_insn (temp, new_rtx);
5163 }
5164 else
5165 {
5166 /* If the GOT offset might be >= 4k, we determine the position
5167 of the GOT entry via a PC-relative LARL. */
5168
5169 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
5170 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5171 temp = gen_reg_rtx (Pmode);
5172 emit_move_insn (temp, new_rtx);
5173
5174 new_rtx = gen_const_mem (Pmode, temp);
5175 temp = gen_reg_rtx (Pmode);
5176 emit_move_insn (temp, new_rtx);
5177 }
5178
5179 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5180 if (reg != 0)
5181 {
5182 s390_load_address (reg, new_rtx);
5183 new_rtx = reg;
5184 }
5185 break;
5186
5187 case TLS_MODEL_LOCAL_EXEC:
5188 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
5189 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5190 new_rtx = force_const_mem (Pmode, new_rtx);
5191 temp = gen_reg_rtx (Pmode);
5192 emit_move_insn (temp, new_rtx);
5193
5194 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5195 if (reg != 0)
5196 {
5197 s390_load_address (reg, new_rtx);
5198 new_rtx = reg;
5199 }
5200 break;
5201
5202 default:
5203 gcc_unreachable ();
5204 }
5205
5206 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == UNSPEC)
5207 {
5208 switch (XINT (XEXP (addr, 0), 1))
5209 {
5210 case UNSPEC_INDNTPOFF:
5211 new_rtx = addr;
5212 break;
5213
5214 default:
5215 gcc_unreachable ();
5216 }
5217 }
5218
5219 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
5220 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
5221 {
5222 new_rtx = XEXP (XEXP (addr, 0), 0);
5223 if (GET_CODE (new_rtx) != SYMBOL_REF)
5224 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5225
5226 new_rtx = legitimize_tls_address (new_rtx, reg);
5227 new_rtx = plus_constant (Pmode, new_rtx,
5228 INTVAL (XEXP (XEXP (addr, 0), 1)));
5229 new_rtx = force_operand (new_rtx, 0);
5230 }
5231
5232 else
5233 gcc_unreachable (); /* for now ... */
5234
5235 return new_rtx;
5236 }
5237
5238 /* Emit insns making the address in operands[1] valid for a standard
5239 move to operands[0]. operands[1] is replaced by an address which
5240 should be used instead of the former RTX to emit the move
5241 pattern. */
5242
5243 void
emit_symbolic_move(rtx * operands)5244 emit_symbolic_move (rtx *operands)
5245 {
5246 rtx temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
5247
5248 if (GET_CODE (operands[0]) == MEM)
5249 operands[1] = force_reg (Pmode, operands[1]);
5250 else if (TLS_SYMBOLIC_CONST (operands[1]))
5251 operands[1] = legitimize_tls_address (operands[1], temp);
5252 else if (flag_pic)
5253 operands[1] = legitimize_pic_address (operands[1], temp);
5254 }
5255
5256 /* Try machine-dependent ways of modifying an illegitimate address X
5257 to be legitimate. If we find one, return the new, valid address.
5258
5259 OLDX is the address as it was before break_out_memory_refs was called.
5260 In some cases it is useful to look at this to decide what needs to be done.
5261
5262 MODE is the mode of the operand pointed to by X.
5263
5264 When -fpic is used, special handling is needed for symbolic references.
5265 See comments by legitimize_pic_address for details. */
5266
5267 static rtx
s390_legitimize_address(rtx x,rtx oldx ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED)5268 s390_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
5269 machine_mode mode ATTRIBUTE_UNUSED)
5270 {
5271 rtx constant_term = const0_rtx;
5272
5273 if (TLS_SYMBOLIC_CONST (x))
5274 {
5275 x = legitimize_tls_address (x, 0);
5276
5277 if (s390_legitimate_address_p (mode, x, FALSE))
5278 return x;
5279 }
5280 else if (GET_CODE (x) == PLUS
5281 && (TLS_SYMBOLIC_CONST (XEXP (x, 0))
5282 || TLS_SYMBOLIC_CONST (XEXP (x, 1))))
5283 {
5284 return x;
5285 }
5286 else if (flag_pic)
5287 {
5288 if (SYMBOLIC_CONST (x)
5289 || (GET_CODE (x) == PLUS
5290 && (SYMBOLIC_CONST (XEXP (x, 0))
5291 || SYMBOLIC_CONST (XEXP (x, 1)))))
5292 x = legitimize_pic_address (x, 0);
5293
5294 if (s390_legitimate_address_p (mode, x, FALSE))
5295 return x;
5296 }
5297
5298 x = eliminate_constant_term (x, &constant_term);
5299
5300 /* Optimize loading of large displacements by splitting them
5301 into the multiple of 4K and the rest; this allows the
5302 former to be CSE'd if possible.
5303
5304 Don't do this if the displacement is added to a register
5305 pointing into the stack frame, as the offsets will
5306 change later anyway. */
5307
5308 if (GET_CODE (constant_term) == CONST_INT
5309 && !TARGET_LONG_DISPLACEMENT
5310 && !DISP_IN_RANGE (INTVAL (constant_term))
5311 && !(REG_P (x) && REGNO_PTR_FRAME_P (REGNO (x))))
5312 {
5313 HOST_WIDE_INT lower = INTVAL (constant_term) & 0xfff;
5314 HOST_WIDE_INT upper = INTVAL (constant_term) ^ lower;
5315
5316 rtx temp = gen_reg_rtx (Pmode);
5317 rtx val = force_operand (GEN_INT (upper), temp);
5318 if (val != temp)
5319 emit_move_insn (temp, val);
5320
5321 x = gen_rtx_PLUS (Pmode, x, temp);
5322 constant_term = GEN_INT (lower);
5323 }
5324
5325 if (GET_CODE (x) == PLUS)
5326 {
5327 if (GET_CODE (XEXP (x, 0)) == REG)
5328 {
5329 rtx temp = gen_reg_rtx (Pmode);
5330 rtx val = force_operand (XEXP (x, 1), temp);
5331 if (val != temp)
5332 emit_move_insn (temp, val);
5333
5334 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), temp);
5335 }
5336
5337 else if (GET_CODE (XEXP (x, 1)) == REG)
5338 {
5339 rtx temp = gen_reg_rtx (Pmode);
5340 rtx val = force_operand (XEXP (x, 0), temp);
5341 if (val != temp)
5342 emit_move_insn (temp, val);
5343
5344 x = gen_rtx_PLUS (Pmode, temp, XEXP (x, 1));
5345 }
5346 }
5347
5348 if (constant_term != const0_rtx)
5349 x = gen_rtx_PLUS (Pmode, x, constant_term);
5350
5351 return x;
5352 }
5353
5354 /* Try a machine-dependent way of reloading an illegitimate address AD
5355 operand. If we find one, push the reload and return the new address.
5356
5357 MODE is the mode of the enclosing MEM. OPNUM is the operand number
5358 and TYPE is the reload type of the current reload. */
5359
5360 rtx
legitimize_reload_address(rtx ad,machine_mode mode ATTRIBUTE_UNUSED,int opnum,int type)5361 legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
5362 int opnum, int type)
5363 {
5364 if (!optimize || TARGET_LONG_DISPLACEMENT)
5365 return NULL_RTX;
5366
5367 if (GET_CODE (ad) == PLUS)
5368 {
5369 rtx tem = simplify_binary_operation (PLUS, Pmode,
5370 XEXP (ad, 0), XEXP (ad, 1));
5371 if (tem)
5372 ad = tem;
5373 }
5374
5375 if (GET_CODE (ad) == PLUS
5376 && GET_CODE (XEXP (ad, 0)) == REG
5377 && GET_CODE (XEXP (ad, 1)) == CONST_INT
5378 && !DISP_IN_RANGE (INTVAL (XEXP (ad, 1))))
5379 {
5380 HOST_WIDE_INT lower = INTVAL (XEXP (ad, 1)) & 0xfff;
5381 HOST_WIDE_INT upper = INTVAL (XEXP (ad, 1)) ^ lower;
5382 rtx cst, tem, new_rtx;
5383
5384 cst = GEN_INT (upper);
5385 if (!legitimate_reload_constant_p (cst))
5386 cst = force_const_mem (Pmode, cst);
5387
5388 tem = gen_rtx_PLUS (Pmode, XEXP (ad, 0), cst);
5389 new_rtx = gen_rtx_PLUS (Pmode, tem, GEN_INT (lower));
5390
5391 push_reload (XEXP (tem, 1), 0, &XEXP (tem, 1), 0,
5392 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
5393 opnum, (enum reload_type) type);
5394 return new_rtx;
5395 }
5396
5397 return NULL_RTX;
5398 }
5399
5400 /* Emit code to move LEN bytes from DST to SRC. */
5401
5402 bool
s390_expand_movmem(rtx dst,rtx src,rtx len)5403 s390_expand_movmem (rtx dst, rtx src, rtx len)
5404 {
5405 /* When tuning for z10 or higher we rely on the Glibc functions to
5406 do the right thing. Only for constant lengths below 64k we will
5407 generate inline code. */
5408 if (s390_tune >= PROCESSOR_2097_Z10
5409 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5410 return false;
5411
5412 /* Expand memcpy for constant length operands without a loop if it
5413 is shorter that way.
5414
5415 With a constant length argument a
5416 memcpy loop (without pfd) is 36 bytes -> 6 * mvc */
5417 if (GET_CODE (len) == CONST_INT
5418 && INTVAL (len) >= 0
5419 && INTVAL (len) <= 256 * 6
5420 && (!TARGET_MVCLE || INTVAL (len) <= 256))
5421 {
5422 HOST_WIDE_INT o, l;
5423
5424 for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
5425 {
5426 rtx newdst = adjust_address (dst, BLKmode, o);
5427 rtx newsrc = adjust_address (src, BLKmode, o);
5428 emit_insn (gen_movmem_short (newdst, newsrc,
5429 GEN_INT (l > 256 ? 255 : l - 1)));
5430 }
5431 }
5432
5433 else if (TARGET_MVCLE)
5434 {
5435 emit_insn (gen_movmem_long (dst, src, convert_to_mode (Pmode, len, 1)));
5436 }
5437
5438 else
5439 {
5440 rtx dst_addr, src_addr, count, blocks, temp;
5441 rtx_code_label *loop_start_label = gen_label_rtx ();
5442 rtx_code_label *loop_end_label = gen_label_rtx ();
5443 rtx_code_label *end_label = gen_label_rtx ();
5444 machine_mode mode;
5445
5446 mode = GET_MODE (len);
5447 if (mode == VOIDmode)
5448 mode = Pmode;
5449
5450 dst_addr = gen_reg_rtx (Pmode);
5451 src_addr = gen_reg_rtx (Pmode);
5452 count = gen_reg_rtx (mode);
5453 blocks = gen_reg_rtx (mode);
5454
5455 convert_move (count, len, 1);
5456 emit_cmp_and_jump_insns (count, const0_rtx,
5457 EQ, NULL_RTX, mode, 1, end_label);
5458
5459 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5460 emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX));
5461 dst = change_address (dst, VOIDmode, dst_addr);
5462 src = change_address (src, VOIDmode, src_addr);
5463
5464 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5465 OPTAB_DIRECT);
5466 if (temp != count)
5467 emit_move_insn (count, temp);
5468
5469 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5470 OPTAB_DIRECT);
5471 if (temp != blocks)
5472 emit_move_insn (blocks, temp);
5473
5474 emit_cmp_and_jump_insns (blocks, const0_rtx,
5475 EQ, NULL_RTX, mode, 1, loop_end_label);
5476
5477 emit_label (loop_start_label);
5478
5479 if (TARGET_Z10
5480 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 768))
5481 {
5482 rtx prefetch;
5483
5484 /* Issue a read prefetch for the +3 cache line. */
5485 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, src_addr, GEN_INT (768)),
5486 const0_rtx, const0_rtx);
5487 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5488 emit_insn (prefetch);
5489
5490 /* Issue a write prefetch for the +3 cache line. */
5491 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (768)),
5492 const1_rtx, const0_rtx);
5493 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5494 emit_insn (prefetch);
5495 }
5496
5497 emit_insn (gen_movmem_short (dst, src, GEN_INT (255)));
5498 s390_load_address (dst_addr,
5499 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5500 s390_load_address (src_addr,
5501 gen_rtx_PLUS (Pmode, src_addr, GEN_INT (256)));
5502
5503 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5504 OPTAB_DIRECT);
5505 if (temp != blocks)
5506 emit_move_insn (blocks, temp);
5507
5508 emit_cmp_and_jump_insns (blocks, const0_rtx,
5509 EQ, NULL_RTX, mode, 1, loop_end_label);
5510
5511 emit_jump (loop_start_label);
5512 emit_label (loop_end_label);
5513
5514 emit_insn (gen_movmem_short (dst, src,
5515 convert_to_mode (Pmode, count, 1)));
5516 emit_label (end_label);
5517 }
5518 return true;
5519 }
5520
5521 /* Emit code to set LEN bytes at DST to VAL.
5522 Make use of clrmem if VAL is zero. */
5523
5524 void
s390_expand_setmem(rtx dst,rtx len,rtx val)5525 s390_expand_setmem (rtx dst, rtx len, rtx val)
5526 {
5527 if (GET_CODE (len) == CONST_INT && INTVAL (len) <= 0)
5528 return;
5529
5530 gcc_assert (GET_CODE (val) == CONST_INT || GET_MODE (val) == QImode);
5531
5532 /* Expand setmem/clrmem for a constant length operand without a
5533 loop if it will be shorter that way.
5534 clrmem loop (with PFD) is 30 bytes -> 5 * xc
5535 clrmem loop (without PFD) is 24 bytes -> 4 * xc
5536 setmem loop (with PFD) is 38 bytes -> ~4 * (mvi/stc + mvc)
5537 setmem loop (without PFD) is 32 bytes -> ~4 * (mvi/stc + mvc) */
5538 if (GET_CODE (len) == CONST_INT
5539 && ((val == const0_rtx
5540 && (INTVAL (len) <= 256 * 4
5541 || (INTVAL (len) <= 256 * 5 && TARGET_SETMEM_PFD(val,len))))
5542 || (val != const0_rtx && INTVAL (len) <= 257 * 4))
5543 && (!TARGET_MVCLE || INTVAL (len) <= 256))
5544 {
5545 HOST_WIDE_INT o, l;
5546
5547 if (val == const0_rtx)
5548 /* clrmem: emit 256 byte blockwise XCs. */
5549 for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
5550 {
5551 rtx newdst = adjust_address (dst, BLKmode, o);
5552 emit_insn (gen_clrmem_short (newdst,
5553 GEN_INT (l > 256 ? 255 : l - 1)));
5554 }
5555 else
5556 /* setmem: emit 1(mvi) + 256(mvc) byte blockwise memsets by
5557 setting first byte to val and using a 256 byte mvc with one
5558 byte overlap to propagate the byte. */
5559 for (l = INTVAL (len), o = 0; l > 0; l -= 257, o += 257)
5560 {
5561 rtx newdst = adjust_address (dst, BLKmode, o);
5562 emit_move_insn (adjust_address (dst, QImode, o), val);
5563 if (l > 1)
5564 {
5565 rtx newdstp1 = adjust_address (dst, BLKmode, o + 1);
5566 emit_insn (gen_movmem_short (newdstp1, newdst,
5567 GEN_INT (l > 257 ? 255 : l - 2)));
5568 }
5569 }
5570 }
5571
5572 else if (TARGET_MVCLE)
5573 {
5574 val = force_not_mem (convert_modes (Pmode, QImode, val, 1));
5575 if (TARGET_64BIT)
5576 emit_insn (gen_setmem_long_di (dst, convert_to_mode (Pmode, len, 1),
5577 val));
5578 else
5579 emit_insn (gen_setmem_long_si (dst, convert_to_mode (Pmode, len, 1),
5580 val));
5581 }
5582
5583 else
5584 {
5585 rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX;
5586 rtx_code_label *loop_start_label = gen_label_rtx ();
5587 rtx_code_label *onebyte_end_label = gen_label_rtx ();
5588 rtx_code_label *zerobyte_end_label = gen_label_rtx ();
5589 rtx_code_label *restbyte_end_label = gen_label_rtx ();
5590 machine_mode mode;
5591
5592 mode = GET_MODE (len);
5593 if (mode == VOIDmode)
5594 mode = Pmode;
5595
5596 dst_addr = gen_reg_rtx (Pmode);
5597 count = gen_reg_rtx (mode);
5598 blocks = gen_reg_rtx (mode);
5599
5600 convert_move (count, len, 1);
5601 emit_cmp_and_jump_insns (count, const0_rtx,
5602 EQ, NULL_RTX, mode, 1, zerobyte_end_label,
5603 profile_probability::very_unlikely ());
5604
5605 /* We need to make a copy of the target address since memset is
5606 supposed to return it unmodified. We have to make it here
5607 already since the new reg is used at onebyte_end_label. */
5608 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5609 dst = change_address (dst, VOIDmode, dst_addr);
5610
5611 if (val != const0_rtx)
5612 {
5613 /* When using the overlapping mvc the original target
5614 address is only accessed as single byte entity (even by
5615 the mvc reading this value). */
5616 set_mem_size (dst, 1);
5617 dstp1 = adjust_address (dst, VOIDmode, 1);
5618 emit_cmp_and_jump_insns (count,
5619 const1_rtx, EQ, NULL_RTX, mode, 1,
5620 onebyte_end_label,
5621 profile_probability::very_unlikely ());
5622 }
5623
5624 /* There is one unconditional (mvi+mvc)/xc after the loop
5625 dealing with the rest of the bytes, subtracting two (mvi+mvc)
5626 or one (xc) here leaves this number of bytes to be handled by
5627 it. */
5628 temp = expand_binop (mode, add_optab, count,
5629 val == const0_rtx ? constm1_rtx : GEN_INT (-2),
5630 count, 1, OPTAB_DIRECT);
5631 if (temp != count)
5632 emit_move_insn (count, temp);
5633
5634 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5635 OPTAB_DIRECT);
5636 if (temp != blocks)
5637 emit_move_insn (blocks, temp);
5638
5639 emit_cmp_and_jump_insns (blocks, const0_rtx,
5640 EQ, NULL_RTX, mode, 1, restbyte_end_label);
5641
5642 emit_jump (loop_start_label);
5643
5644 if (val != const0_rtx)
5645 {
5646 /* The 1 byte != 0 special case. Not handled efficiently
5647 since we require two jumps for that. However, this
5648 should be very rare. */
5649 emit_label (onebyte_end_label);
5650 emit_move_insn (adjust_address (dst, QImode, 0), val);
5651 emit_jump (zerobyte_end_label);
5652 }
5653
5654 emit_label (loop_start_label);
5655
5656 if (TARGET_SETMEM_PFD (val, len))
5657 {
5658 /* Issue a write prefetch. */
5659 rtx distance = GEN_INT (TARGET_SETMEM_PREFETCH_DISTANCE);
5660 rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, distance),
5661 const1_rtx, const0_rtx);
5662 emit_insn (prefetch);
5663 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5664 }
5665
5666 if (val == const0_rtx)
5667 emit_insn (gen_clrmem_short (dst, GEN_INT (255)));
5668 else
5669 {
5670 /* Set the first byte in the block to the value and use an
5671 overlapping mvc for the block. */
5672 emit_move_insn (adjust_address (dst, QImode, 0), val);
5673 emit_insn (gen_movmem_short (dstp1, dst, GEN_INT (254)));
5674 }
5675 s390_load_address (dst_addr,
5676 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5677
5678 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5679 OPTAB_DIRECT);
5680 if (temp != blocks)
5681 emit_move_insn (blocks, temp);
5682
5683 emit_cmp_and_jump_insns (blocks, const0_rtx,
5684 NE, NULL_RTX, mode, 1, loop_start_label);
5685
5686 emit_label (restbyte_end_label);
5687
5688 if (val == const0_rtx)
5689 emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1)));
5690 else
5691 {
5692 /* Set the first byte in the block to the value and use an
5693 overlapping mvc for the block. */
5694 emit_move_insn (adjust_address (dst, QImode, 0), val);
5695 /* execute only uses the lowest 8 bits of count that's
5696 exactly what we need here. */
5697 emit_insn (gen_movmem_short (dstp1, dst,
5698 convert_to_mode (Pmode, count, 1)));
5699 }
5700
5701 emit_label (zerobyte_end_label);
5702 }
5703 }
5704
5705 /* Emit code to compare LEN bytes at OP0 with those at OP1,
5706 and return the result in TARGET. */
5707
5708 bool
s390_expand_cmpmem(rtx target,rtx op0,rtx op1,rtx len)5709 s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
5710 {
5711 rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM);
5712 rtx tmp;
5713
5714 /* When tuning for z10 or higher we rely on the Glibc functions to
5715 do the right thing. Only for constant lengths below 64k we will
5716 generate inline code. */
5717 if (s390_tune >= PROCESSOR_2097_Z10
5718 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5719 return false;
5720
5721 /* As the result of CMPINT is inverted compared to what we need,
5722 we have to swap the operands. */
5723 tmp = op0; op0 = op1; op1 = tmp;
5724
5725 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
5726 {
5727 if (INTVAL (len) > 0)
5728 {
5729 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1)));
5730 emit_insn (gen_cmpint (target, ccreg));
5731 }
5732 else
5733 emit_move_insn (target, const0_rtx);
5734 }
5735 else if (TARGET_MVCLE)
5736 {
5737 emit_insn (gen_cmpmem_long (op0, op1, convert_to_mode (Pmode, len, 1)));
5738 emit_insn (gen_cmpint (target, ccreg));
5739 }
5740 else
5741 {
5742 rtx addr0, addr1, count, blocks, temp;
5743 rtx_code_label *loop_start_label = gen_label_rtx ();
5744 rtx_code_label *loop_end_label = gen_label_rtx ();
5745 rtx_code_label *end_label = gen_label_rtx ();
5746 machine_mode mode;
5747
5748 mode = GET_MODE (len);
5749 if (mode == VOIDmode)
5750 mode = Pmode;
5751
5752 addr0 = gen_reg_rtx (Pmode);
5753 addr1 = gen_reg_rtx (Pmode);
5754 count = gen_reg_rtx (mode);
5755 blocks = gen_reg_rtx (mode);
5756
5757 convert_move (count, len, 1);
5758 emit_cmp_and_jump_insns (count, const0_rtx,
5759 EQ, NULL_RTX, mode, 1, end_label);
5760
5761 emit_move_insn (addr0, force_operand (XEXP (op0, 0), NULL_RTX));
5762 emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX));
5763 op0 = change_address (op0, VOIDmode, addr0);
5764 op1 = change_address (op1, VOIDmode, addr1);
5765
5766 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5767 OPTAB_DIRECT);
5768 if (temp != count)
5769 emit_move_insn (count, temp);
5770
5771 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5772 OPTAB_DIRECT);
5773 if (temp != blocks)
5774 emit_move_insn (blocks, temp);
5775
5776 emit_cmp_and_jump_insns (blocks, const0_rtx,
5777 EQ, NULL_RTX, mode, 1, loop_end_label);
5778
5779 emit_label (loop_start_label);
5780
5781 if (TARGET_Z10
5782 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 512))
5783 {
5784 rtx prefetch;
5785
5786 /* Issue a read prefetch for the +2 cache line of operand 1. */
5787 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr0, GEN_INT (512)),
5788 const0_rtx, const0_rtx);
5789 emit_insn (prefetch);
5790 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5791
5792 /* Issue a read prefetch for the +2 cache line of operand 2. */
5793 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr1, GEN_INT (512)),
5794 const0_rtx, const0_rtx);
5795 emit_insn (prefetch);
5796 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5797 }
5798
5799 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (255)));
5800 temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
5801 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5802 gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx);
5803 temp = gen_rtx_SET (pc_rtx, temp);
5804 emit_jump_insn (temp);
5805
5806 s390_load_address (addr0,
5807 gen_rtx_PLUS (Pmode, addr0, GEN_INT (256)));
5808 s390_load_address (addr1,
5809 gen_rtx_PLUS (Pmode, addr1, GEN_INT (256)));
5810
5811 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5812 OPTAB_DIRECT);
5813 if (temp != blocks)
5814 emit_move_insn (blocks, temp);
5815
5816 emit_cmp_and_jump_insns (blocks, const0_rtx,
5817 EQ, NULL_RTX, mode, 1, loop_end_label);
5818
5819 emit_jump (loop_start_label);
5820 emit_label (loop_end_label);
5821
5822 emit_insn (gen_cmpmem_short (op0, op1,
5823 convert_to_mode (Pmode, count, 1)));
5824 emit_label (end_label);
5825
5826 emit_insn (gen_cmpint (target, ccreg));
5827 }
5828 return true;
5829 }
5830
5831 /* Emit a conditional jump to LABEL for condition code mask MASK using
5832 comparsion operator COMPARISON. Return the emitted jump insn. */
5833
5834 static rtx_insn *
s390_emit_ccraw_jump(HOST_WIDE_INT mask,enum rtx_code comparison,rtx label)5835 s390_emit_ccraw_jump (HOST_WIDE_INT mask, enum rtx_code comparison, rtx label)
5836 {
5837 rtx temp;
5838
5839 gcc_assert (comparison == EQ || comparison == NE);
5840 gcc_assert (mask > 0 && mask < 15);
5841
5842 temp = gen_rtx_fmt_ee (comparison, VOIDmode,
5843 gen_rtx_REG (CCRAWmode, CC_REGNUM), GEN_INT (mask));
5844 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5845 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
5846 temp = gen_rtx_SET (pc_rtx, temp);
5847 return emit_jump_insn (temp);
5848 }
5849
5850 /* Emit the instructions to implement strlen of STRING and store the
5851 result in TARGET. The string has the known ALIGNMENT. This
5852 version uses vector instructions and is therefore not appropriate
5853 for targets prior to z13. */
5854
5855 void
s390_expand_vec_strlen(rtx target,rtx string,rtx alignment)5856 s390_expand_vec_strlen (rtx target, rtx string, rtx alignment)
5857 {
5858 rtx highest_index_to_load_reg = gen_reg_rtx (Pmode);
5859 rtx str_reg = gen_reg_rtx (V16QImode);
5860 rtx str_addr_base_reg = gen_reg_rtx (Pmode);
5861 rtx str_idx_reg = gen_reg_rtx (Pmode);
5862 rtx result_reg = gen_reg_rtx (V16QImode);
5863 rtx is_aligned_label = gen_label_rtx ();
5864 rtx into_loop_label = NULL_RTX;
5865 rtx loop_start_label = gen_label_rtx ();
5866 rtx temp;
5867 rtx len = gen_reg_rtx (QImode);
5868 rtx cond;
5869
5870 s390_load_address (str_addr_base_reg, XEXP (string, 0));
5871 emit_move_insn (str_idx_reg, const0_rtx);
5872
5873 if (INTVAL (alignment) < 16)
5874 {
5875 /* Check whether the address happens to be aligned properly so
5876 jump directly to the aligned loop. */
5877 emit_cmp_and_jump_insns (gen_rtx_AND (Pmode,
5878 str_addr_base_reg, GEN_INT (15)),
5879 const0_rtx, EQ, NULL_RTX,
5880 Pmode, 1, is_aligned_label);
5881
5882 temp = gen_reg_rtx (Pmode);
5883 temp = expand_binop (Pmode, and_optab, str_addr_base_reg,
5884 GEN_INT (15), temp, 1, OPTAB_DIRECT);
5885 gcc_assert (REG_P (temp));
5886 highest_index_to_load_reg =
5887 expand_binop (Pmode, sub_optab, GEN_INT (15), temp,
5888 highest_index_to_load_reg, 1, OPTAB_DIRECT);
5889 gcc_assert (REG_P (highest_index_to_load_reg));
5890 emit_insn (gen_vllv16qi (str_reg,
5891 convert_to_mode (SImode, highest_index_to_load_reg, 1),
5892 gen_rtx_MEM (BLKmode, str_addr_base_reg)));
5893
5894 into_loop_label = gen_label_rtx ();
5895 s390_emit_jump (into_loop_label, NULL_RTX);
5896 emit_barrier ();
5897 }
5898
5899 emit_label (is_aligned_label);
5900 LABEL_NUSES (is_aligned_label) = INTVAL (alignment) < 16 ? 2 : 1;
5901
5902 /* Reaching this point we are only performing 16 bytes aligned
5903 loads. */
5904 emit_move_insn (highest_index_to_load_reg, GEN_INT (15));
5905
5906 emit_label (loop_start_label);
5907 LABEL_NUSES (loop_start_label) = 1;
5908
5909 /* Load 16 bytes of the string into VR. */
5910 emit_move_insn (str_reg,
5911 gen_rtx_MEM (V16QImode,
5912 gen_rtx_PLUS (Pmode, str_idx_reg,
5913 str_addr_base_reg)));
5914 if (into_loop_label != NULL_RTX)
5915 {
5916 emit_label (into_loop_label);
5917 LABEL_NUSES (into_loop_label) = 1;
5918 }
5919
5920 /* Increment string index by 16 bytes. */
5921 expand_binop (Pmode, add_optab, str_idx_reg, GEN_INT (16),
5922 str_idx_reg, 1, OPTAB_DIRECT);
5923
5924 emit_insn (gen_vec_vfenesv16qi (result_reg, str_reg, str_reg,
5925 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
5926
5927 add_int_reg_note (s390_emit_ccraw_jump (8, NE, loop_start_label),
5928 REG_BR_PROB,
5929 profile_probability::very_likely ().to_reg_br_prob_note ());
5930 emit_insn (gen_vec_extractv16qiqi (len, result_reg, GEN_INT (7)));
5931
5932 /* If the string pointer wasn't aligned we have loaded less then 16
5933 bytes and the remaining bytes got filled with zeros (by vll).
5934 Now we have to check whether the resulting index lies within the
5935 bytes actually part of the string. */
5936
5937 cond = s390_emit_compare (GT, convert_to_mode (Pmode, len, 1),
5938 highest_index_to_load_reg);
5939 s390_load_address (highest_index_to_load_reg,
5940 gen_rtx_PLUS (Pmode, highest_index_to_load_reg,
5941 const1_rtx));
5942 if (TARGET_64BIT)
5943 emit_insn (gen_movdicc (str_idx_reg, cond,
5944 highest_index_to_load_reg, str_idx_reg));
5945 else
5946 emit_insn (gen_movsicc (str_idx_reg, cond,
5947 highest_index_to_load_reg, str_idx_reg));
5948
5949 add_reg_br_prob_note (s390_emit_jump (is_aligned_label, cond),
5950 profile_probability::very_unlikely ());
5951
5952 expand_binop (Pmode, add_optab, str_idx_reg,
5953 GEN_INT (-16), str_idx_reg, 1, OPTAB_DIRECT);
5954 /* FIXME: len is already zero extended - so avoid the llgcr emitted
5955 here. */
5956 temp = expand_binop (Pmode, add_optab, str_idx_reg,
5957 convert_to_mode (Pmode, len, 1),
5958 target, 1, OPTAB_DIRECT);
5959 if (temp != target)
5960 emit_move_insn (target, temp);
5961 }
5962
5963 void
s390_expand_vec_movstr(rtx result,rtx dst,rtx src)5964 s390_expand_vec_movstr (rtx result, rtx dst, rtx src)
5965 {
5966 rtx temp = gen_reg_rtx (Pmode);
5967 rtx src_addr = XEXP (src, 0);
5968 rtx dst_addr = XEXP (dst, 0);
5969 rtx src_addr_reg = gen_reg_rtx (Pmode);
5970 rtx dst_addr_reg = gen_reg_rtx (Pmode);
5971 rtx offset = gen_reg_rtx (Pmode);
5972 rtx vsrc = gen_reg_rtx (V16QImode);
5973 rtx vpos = gen_reg_rtx (V16QImode);
5974 rtx loadlen = gen_reg_rtx (SImode);
5975 rtx gpos_qi = gen_reg_rtx(QImode);
5976 rtx gpos = gen_reg_rtx (SImode);
5977 rtx done_label = gen_label_rtx ();
5978 rtx loop_label = gen_label_rtx ();
5979 rtx exit_label = gen_label_rtx ();
5980 rtx full_label = gen_label_rtx ();
5981
5982 /* Perform a quick check for string ending on the first up to 16
5983 bytes and exit early if successful. */
5984
5985 emit_insn (gen_vlbb (vsrc, src, GEN_INT (6)));
5986 emit_insn (gen_lcbb (loadlen, src_addr, GEN_INT (6)));
5987 emit_insn (gen_vfenezv16qi (vpos, vsrc, vsrc));
5988 emit_insn (gen_vec_extractv16qiqi (gpos_qi, vpos, GEN_INT (7)));
5989 emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
5990 /* gpos is the byte index if a zero was found and 16 otherwise.
5991 So if it is lower than the loaded bytes we have a hit. */
5992 emit_cmp_and_jump_insns (gpos, loadlen, GE, NULL_RTX, SImode, 1,
5993 full_label);
5994 emit_insn (gen_vstlv16qi (vsrc, gpos, dst));
5995
5996 force_expand_binop (Pmode, add_optab, dst_addr, gpos, result,
5997 1, OPTAB_DIRECT);
5998 emit_jump (exit_label);
5999 emit_barrier ();
6000
6001 emit_label (full_label);
6002 LABEL_NUSES (full_label) = 1;
6003
6004 /* Calculate `offset' so that src + offset points to the last byte
6005 before 16 byte alignment. */
6006
6007 /* temp = src_addr & 0xf */
6008 force_expand_binop (Pmode, and_optab, src_addr, GEN_INT (15), temp,
6009 1, OPTAB_DIRECT);
6010
6011 /* offset = 0xf - temp */
6012 emit_move_insn (offset, GEN_INT (15));
6013 force_expand_binop (Pmode, sub_optab, offset, temp, offset,
6014 1, OPTAB_DIRECT);
6015
6016 /* Store `offset' bytes in the dstination string. The quick check
6017 has loaded at least `offset' bytes into vsrc. */
6018
6019 emit_insn (gen_vstlv16qi (vsrc, gen_lowpart (SImode, offset), dst));
6020
6021 /* Advance to the next byte to be loaded. */
6022 force_expand_binop (Pmode, add_optab, offset, const1_rtx, offset,
6023 1, OPTAB_DIRECT);
6024
6025 /* Make sure the addresses are single regs which can be used as a
6026 base. */
6027 emit_move_insn (src_addr_reg, src_addr);
6028 emit_move_insn (dst_addr_reg, dst_addr);
6029
6030 /* MAIN LOOP */
6031
6032 emit_label (loop_label);
6033 LABEL_NUSES (loop_label) = 1;
6034
6035 emit_move_insn (vsrc,
6036 gen_rtx_MEM (V16QImode,
6037 gen_rtx_PLUS (Pmode, src_addr_reg, offset)));
6038
6039 emit_insn (gen_vec_vfenesv16qi (vpos, vsrc, vsrc,
6040 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
6041 add_int_reg_note (s390_emit_ccraw_jump (8, EQ, done_label),
6042 REG_BR_PROB, profile_probability::very_unlikely ()
6043 .to_reg_br_prob_note ());
6044
6045 emit_move_insn (gen_rtx_MEM (V16QImode,
6046 gen_rtx_PLUS (Pmode, dst_addr_reg, offset)),
6047 vsrc);
6048 /* offset += 16 */
6049 force_expand_binop (Pmode, add_optab, offset, GEN_INT (16),
6050 offset, 1, OPTAB_DIRECT);
6051
6052 emit_jump (loop_label);
6053 emit_barrier ();
6054
6055 /* REGULAR EXIT */
6056
6057 /* We are done. Add the offset of the zero character to the dst_addr
6058 pointer to get the result. */
6059
6060 emit_label (done_label);
6061 LABEL_NUSES (done_label) = 1;
6062
6063 force_expand_binop (Pmode, add_optab, dst_addr_reg, offset, dst_addr_reg,
6064 1, OPTAB_DIRECT);
6065
6066 emit_insn (gen_vec_extractv16qiqi (gpos_qi, vpos, GEN_INT (7)));
6067 emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
6068
6069 emit_insn (gen_vstlv16qi (vsrc, gpos, gen_rtx_MEM (BLKmode, dst_addr_reg)));
6070
6071 force_expand_binop (Pmode, add_optab, dst_addr_reg, gpos, result,
6072 1, OPTAB_DIRECT);
6073
6074 /* EARLY EXIT */
6075
6076 emit_label (exit_label);
6077 LABEL_NUSES (exit_label) = 1;
6078 }
6079
6080
6081 /* Expand conditional increment or decrement using alc/slb instructions.
6082 Should generate code setting DST to either SRC or SRC + INCREMENT,
6083 depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
6084 Returns true if successful, false otherwise.
6085
6086 That makes it possible to implement some if-constructs without jumps e.g.:
6087 (borrow = CC0 | CC1 and carry = CC2 | CC3)
6088 unsigned int a, b, c;
6089 if (a < b) c++; -> CCU b > a -> CC2; c += carry;
6090 if (a < b) c--; -> CCL3 a - b -> borrow; c -= borrow;
6091 if (a <= b) c++; -> CCL3 b - a -> borrow; c += carry;
6092 if (a <= b) c--; -> CCU a <= b -> borrow; c -= borrow;
6093
6094 Checks for EQ and NE with a nonzero value need an additional xor e.g.:
6095 if (a == b) c++; -> CCL3 a ^= b; 0 - a -> borrow; c += carry;
6096 if (a == b) c--; -> CCU a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
6097 if (a != b) c++; -> CCU a ^= b; a > 0 -> CC2; c += carry;
6098 if (a != b) c--; -> CCL3 a ^= b; 0 - a -> borrow; c -= borrow; */
6099
6100 bool
s390_expand_addcc(enum rtx_code cmp_code,rtx cmp_op0,rtx cmp_op1,rtx dst,rtx src,rtx increment)6101 s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1,
6102 rtx dst, rtx src, rtx increment)
6103 {
6104 machine_mode cmp_mode;
6105 machine_mode cc_mode;
6106 rtx op_res;
6107 rtx insn;
6108 rtvec p;
6109 int ret;
6110
6111 if ((GET_MODE (cmp_op0) == SImode || GET_MODE (cmp_op0) == VOIDmode)
6112 && (GET_MODE (cmp_op1) == SImode || GET_MODE (cmp_op1) == VOIDmode))
6113 cmp_mode = SImode;
6114 else if ((GET_MODE (cmp_op0) == DImode || GET_MODE (cmp_op0) == VOIDmode)
6115 && (GET_MODE (cmp_op1) == DImode || GET_MODE (cmp_op1) == VOIDmode))
6116 cmp_mode = DImode;
6117 else
6118 return false;
6119
6120 /* Try ADD LOGICAL WITH CARRY. */
6121 if (increment == const1_rtx)
6122 {
6123 /* Determine CC mode to use. */
6124 if (cmp_code == EQ || cmp_code == NE)
6125 {
6126 if (cmp_op1 != const0_rtx)
6127 {
6128 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
6129 NULL_RTX, 0, OPTAB_WIDEN);
6130 cmp_op1 = const0_rtx;
6131 }
6132
6133 cmp_code = cmp_code == EQ ? LEU : GTU;
6134 }
6135
6136 if (cmp_code == LTU || cmp_code == LEU)
6137 {
6138 rtx tem = cmp_op0;
6139 cmp_op0 = cmp_op1;
6140 cmp_op1 = tem;
6141 cmp_code = swap_condition (cmp_code);
6142 }
6143
6144 switch (cmp_code)
6145 {
6146 case GTU:
6147 cc_mode = CCUmode;
6148 break;
6149
6150 case GEU:
6151 cc_mode = CCL3mode;
6152 break;
6153
6154 default:
6155 return false;
6156 }
6157
6158 /* Emit comparison instruction pattern. */
6159 if (!register_operand (cmp_op0, cmp_mode))
6160 cmp_op0 = force_reg (cmp_mode, cmp_op0);
6161
6162 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
6163 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
6164 /* We use insn_invalid_p here to add clobbers if required. */
6165 ret = insn_invalid_p (emit_insn (insn), false);
6166 gcc_assert (!ret);
6167
6168 /* Emit ALC instruction pattern. */
6169 op_res = gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
6170 gen_rtx_REG (cc_mode, CC_REGNUM),
6171 const0_rtx);
6172
6173 if (src != const0_rtx)
6174 {
6175 if (!register_operand (src, GET_MODE (dst)))
6176 src = force_reg (GET_MODE (dst), src);
6177
6178 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, src);
6179 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, const0_rtx);
6180 }
6181
6182 p = rtvec_alloc (2);
6183 RTVEC_ELT (p, 0) =
6184 gen_rtx_SET (dst, op_res);
6185 RTVEC_ELT (p, 1) =
6186 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6187 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
6188
6189 return true;
6190 }
6191
6192 /* Try SUBTRACT LOGICAL WITH BORROW. */
6193 if (increment == constm1_rtx)
6194 {
6195 /* Determine CC mode to use. */
6196 if (cmp_code == EQ || cmp_code == NE)
6197 {
6198 if (cmp_op1 != const0_rtx)
6199 {
6200 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
6201 NULL_RTX, 0, OPTAB_WIDEN);
6202 cmp_op1 = const0_rtx;
6203 }
6204
6205 cmp_code = cmp_code == EQ ? LEU : GTU;
6206 }
6207
6208 if (cmp_code == GTU || cmp_code == GEU)
6209 {
6210 rtx tem = cmp_op0;
6211 cmp_op0 = cmp_op1;
6212 cmp_op1 = tem;
6213 cmp_code = swap_condition (cmp_code);
6214 }
6215
6216 switch (cmp_code)
6217 {
6218 case LEU:
6219 cc_mode = CCUmode;
6220 break;
6221
6222 case LTU:
6223 cc_mode = CCL3mode;
6224 break;
6225
6226 default:
6227 return false;
6228 }
6229
6230 /* Emit comparison instruction pattern. */
6231 if (!register_operand (cmp_op0, cmp_mode))
6232 cmp_op0 = force_reg (cmp_mode, cmp_op0);
6233
6234 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
6235 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
6236 /* We use insn_invalid_p here to add clobbers if required. */
6237 ret = insn_invalid_p (emit_insn (insn), false);
6238 gcc_assert (!ret);
6239
6240 /* Emit SLB instruction pattern. */
6241 if (!register_operand (src, GET_MODE (dst)))
6242 src = force_reg (GET_MODE (dst), src);
6243
6244 op_res = gen_rtx_MINUS (GET_MODE (dst),
6245 gen_rtx_MINUS (GET_MODE (dst), src, const0_rtx),
6246 gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
6247 gen_rtx_REG (cc_mode, CC_REGNUM),
6248 const0_rtx));
6249 p = rtvec_alloc (2);
6250 RTVEC_ELT (p, 0) =
6251 gen_rtx_SET (dst, op_res);
6252 RTVEC_ELT (p, 1) =
6253 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6254 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
6255
6256 return true;
6257 }
6258
6259 return false;
6260 }
6261
6262 /* Expand code for the insv template. Return true if successful. */
6263
6264 bool
s390_expand_insv(rtx dest,rtx op1,rtx op2,rtx src)6265 s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
6266 {
6267 int bitsize = INTVAL (op1);
6268 int bitpos = INTVAL (op2);
6269 machine_mode mode = GET_MODE (dest);
6270 machine_mode smode;
6271 int smode_bsize, mode_bsize;
6272 rtx op, clobber;
6273
6274 if (bitsize + bitpos > GET_MODE_BITSIZE (mode))
6275 return false;
6276
6277 /* Generate INSERT IMMEDIATE (IILL et al). */
6278 /* (set (ze (reg)) (const_int)). */
6279 if (TARGET_ZARCH
6280 && register_operand (dest, word_mode)
6281 && (bitpos % 16) == 0
6282 && (bitsize % 16) == 0
6283 && const_int_operand (src, VOIDmode))
6284 {
6285 HOST_WIDE_INT val = INTVAL (src);
6286 int regpos = bitpos + bitsize;
6287
6288 while (regpos > bitpos)
6289 {
6290 machine_mode putmode;
6291 int putsize;
6292
6293 if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32))
6294 putmode = SImode;
6295 else
6296 putmode = HImode;
6297
6298 putsize = GET_MODE_BITSIZE (putmode);
6299 regpos -= putsize;
6300 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6301 GEN_INT (putsize),
6302 GEN_INT (regpos)),
6303 gen_int_mode (val, putmode));
6304 val >>= putsize;
6305 }
6306 gcc_assert (regpos == bitpos);
6307 return true;
6308 }
6309
6310 smode = smallest_int_mode_for_size (bitsize);
6311 smode_bsize = GET_MODE_BITSIZE (smode);
6312 mode_bsize = GET_MODE_BITSIZE (mode);
6313
6314 /* Generate STORE CHARACTERS UNDER MASK (STCM et al). */
6315 if (bitpos == 0
6316 && (bitsize % BITS_PER_UNIT) == 0
6317 && MEM_P (dest)
6318 && (register_operand (src, word_mode)
6319 || const_int_operand (src, VOIDmode)))
6320 {
6321 /* Emit standard pattern if possible. */
6322 if (smode_bsize == bitsize)
6323 {
6324 emit_move_insn (adjust_address (dest, smode, 0),
6325 gen_lowpart (smode, src));
6326 return true;
6327 }
6328
6329 /* (set (ze (mem)) (const_int)). */
6330 else if (const_int_operand (src, VOIDmode))
6331 {
6332 int size = bitsize / BITS_PER_UNIT;
6333 rtx src_mem = adjust_address (force_const_mem (word_mode, src),
6334 BLKmode,
6335 UNITS_PER_WORD - size);
6336
6337 dest = adjust_address (dest, BLKmode, 0);
6338 set_mem_size (dest, size);
6339 s390_expand_movmem (dest, src_mem, GEN_INT (size));
6340 return true;
6341 }
6342
6343 /* (set (ze (mem)) (reg)). */
6344 else if (register_operand (src, word_mode))
6345 {
6346 if (bitsize <= 32)
6347 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1,
6348 const0_rtx), src);
6349 else
6350 {
6351 /* Emit st,stcmh sequence. */
6352 int stcmh_width = bitsize - 32;
6353 int size = stcmh_width / BITS_PER_UNIT;
6354
6355 emit_move_insn (adjust_address (dest, SImode, size),
6356 gen_lowpart (SImode, src));
6357 set_mem_size (dest, size);
6358 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6359 GEN_INT (stcmh_width),
6360 const0_rtx),
6361 gen_rtx_LSHIFTRT (word_mode, src, GEN_INT (32)));
6362 }
6363 return true;
6364 }
6365 }
6366
6367 /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al). */
6368 if ((bitpos % BITS_PER_UNIT) == 0
6369 && (bitsize % BITS_PER_UNIT) == 0
6370 && (bitpos & 32) == ((bitpos + bitsize - 1) & 32)
6371 && MEM_P (src)
6372 && (mode == DImode || mode == SImode)
6373 && register_operand (dest, mode))
6374 {
6375 /* Emit a strict_low_part pattern if possible. */
6376 if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
6377 {
6378 op = gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (smode, dest));
6379 op = gen_rtx_SET (op, gen_lowpart (smode, src));
6380 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6381 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)));
6382 return true;
6383 }
6384
6385 /* ??? There are more powerful versions of ICM that are not
6386 completely represented in the md file. */
6387 }
6388
6389 /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al). */
6390 if (TARGET_Z10 && (mode == DImode || mode == SImode))
6391 {
6392 machine_mode mode_s = GET_MODE (src);
6393
6394 if (CONSTANT_P (src))
6395 {
6396 /* For constant zero values the representation with AND
6397 appears to be folded in more situations than the (set
6398 (zero_extract) ...).
6399 We only do this when the start and end of the bitfield
6400 remain in the same SImode chunk. That way nihf or nilf
6401 can be used.
6402 The AND patterns might still generate a risbg for this. */
6403 if (src == const0_rtx && bitpos / 32 == (bitpos + bitsize - 1) / 32)
6404 return false;
6405 else
6406 src = force_reg (mode, src);
6407 }
6408 else if (mode_s != mode)
6409 {
6410 gcc_assert (GET_MODE_BITSIZE (mode_s) >= bitsize);
6411 src = force_reg (mode_s, src);
6412 src = gen_lowpart (mode, src);
6413 }
6414
6415 op = gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2),
6416 op = gen_rtx_SET (op, src);
6417
6418 if (!TARGET_ZEC12)
6419 {
6420 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6421 op = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber));
6422 }
6423 emit_insn (op);
6424
6425 return true;
6426 }
6427
6428 return false;
6429 }
6430
6431 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
6432 register that holds VAL of mode MODE shifted by COUNT bits. */
6433
6434 static inline rtx
s390_expand_mask_and_shift(rtx val,machine_mode mode,rtx count)6435 s390_expand_mask_and_shift (rtx val, machine_mode mode, rtx count)
6436 {
6437 val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
6438 NULL_RTX, 1, OPTAB_DIRECT);
6439 return expand_simple_binop (SImode, ASHIFT, val, count,
6440 NULL_RTX, 1, OPTAB_DIRECT);
6441 }
6442
6443 /* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
6444 the result in TARGET. */
6445
6446 void
s390_expand_vec_compare(rtx target,enum rtx_code cond,rtx cmp_op1,rtx cmp_op2)6447 s390_expand_vec_compare (rtx target, enum rtx_code cond,
6448 rtx cmp_op1, rtx cmp_op2)
6449 {
6450 machine_mode mode = GET_MODE (target);
6451 bool neg_p = false, swap_p = false;
6452 rtx tmp;
6453
6454 if (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_VECTOR_FLOAT)
6455 {
6456 switch (cond)
6457 {
6458 /* NE a != b -> !(a == b) */
6459 case NE: cond = EQ; neg_p = true; break;
6460 /* UNGT a u> b -> !(b >= a) */
6461 case UNGT: cond = GE; neg_p = true; swap_p = true; break;
6462 /* UNGE a u>= b -> !(b > a) */
6463 case UNGE: cond = GT; neg_p = true; swap_p = true; break;
6464 /* LE: a <= b -> b >= a */
6465 case LE: cond = GE; swap_p = true; break;
6466 /* UNLE: a u<= b -> !(a > b) */
6467 case UNLE: cond = GT; neg_p = true; break;
6468 /* LT: a < b -> b > a */
6469 case LT: cond = GT; swap_p = true; break;
6470 /* UNLT: a u< b -> !(a >= b) */
6471 case UNLT: cond = GE; neg_p = true; break;
6472 case UNEQ:
6473 emit_insn (gen_vec_cmpuneq (target, cmp_op1, cmp_op2));
6474 return;
6475 case LTGT:
6476 emit_insn (gen_vec_cmpltgt (target, cmp_op1, cmp_op2));
6477 return;
6478 case ORDERED:
6479 emit_insn (gen_vec_ordered (target, cmp_op1, cmp_op2));
6480 return;
6481 case UNORDERED:
6482 emit_insn (gen_vec_unordered (target, cmp_op1, cmp_op2));
6483 return;
6484 default: break;
6485 }
6486 }
6487 else
6488 {
6489 switch (cond)
6490 {
6491 /* NE: a != b -> !(a == b) */
6492 case NE: cond = EQ; neg_p = true; break;
6493 /* GE: a >= b -> !(b > a) */
6494 case GE: cond = GT; neg_p = true; swap_p = true; break;
6495 /* GEU: a >= b -> !(b > a) */
6496 case GEU: cond = GTU; neg_p = true; swap_p = true; break;
6497 /* LE: a <= b -> !(a > b) */
6498 case LE: cond = GT; neg_p = true; break;
6499 /* LEU: a <= b -> !(a > b) */
6500 case LEU: cond = GTU; neg_p = true; break;
6501 /* LT: a < b -> b > a */
6502 case LT: cond = GT; swap_p = true; break;
6503 /* LTU: a < b -> b > a */
6504 case LTU: cond = GTU; swap_p = true; break;
6505 default: break;
6506 }
6507 }
6508
6509 if (swap_p)
6510 {
6511 tmp = cmp_op1; cmp_op1 = cmp_op2; cmp_op2 = tmp;
6512 }
6513
6514 emit_insn (gen_rtx_SET (target, gen_rtx_fmt_ee (cond,
6515 mode,
6516 cmp_op1, cmp_op2)));
6517 if (neg_p)
6518 emit_insn (gen_rtx_SET (target, gen_rtx_NOT (mode, target)));
6519 }
6520
6521 /* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into
6522 TARGET if either all (ALL_P is true) or any (ALL_P is false) of the
6523 elements in CMP1 and CMP2 fulfill the comparison.
6524 This function is only used to emit patterns for the vx builtins and
6525 therefore only handles comparison codes required by the
6526 builtins. */
6527 void
s390_expand_vec_compare_cc(rtx target,enum rtx_code code,rtx cmp1,rtx cmp2,bool all_p)6528 s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
6529 rtx cmp1, rtx cmp2, bool all_p)
6530 {
6531 machine_mode cc_producer_mode, cc_consumer_mode, scratch_mode;
6532 rtx tmp_reg = gen_reg_rtx (SImode);
6533 bool swap_p = false;
6534
6535 if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_INT)
6536 {
6537 switch (code)
6538 {
6539 case EQ:
6540 case NE:
6541 cc_producer_mode = CCVEQmode;
6542 break;
6543 case GE:
6544 case LT:
6545 code = swap_condition (code);
6546 swap_p = true;
6547 /* fallthrough */
6548 case GT:
6549 case LE:
6550 cc_producer_mode = CCVIHmode;
6551 break;
6552 case GEU:
6553 case LTU:
6554 code = swap_condition (code);
6555 swap_p = true;
6556 /* fallthrough */
6557 case GTU:
6558 case LEU:
6559 cc_producer_mode = CCVIHUmode;
6560 break;
6561 default:
6562 gcc_unreachable ();
6563 }
6564
6565 scratch_mode = GET_MODE (cmp1);
6566 /* These codes represent inverted CC interpretations. Inverting
6567 an ALL CC mode results in an ANY CC mode and the other way
6568 around. Invert the all_p flag here to compensate for
6569 that. */
6570 if (code == NE || code == LE || code == LEU)
6571 all_p = !all_p;
6572
6573 cc_consumer_mode = all_p ? CCVIALLmode : CCVIANYmode;
6574 }
6575 else if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_FLOAT)
6576 {
6577 bool inv_p = false;
6578
6579 switch (code)
6580 {
6581 case EQ: cc_producer_mode = CCVEQmode; break;
6582 case NE: cc_producer_mode = CCVEQmode; inv_p = true; break;
6583 case GT: cc_producer_mode = CCVFHmode; break;
6584 case GE: cc_producer_mode = CCVFHEmode; break;
6585 case UNLE: cc_producer_mode = CCVFHmode; inv_p = true; break;
6586 case UNLT: cc_producer_mode = CCVFHEmode; inv_p = true; break;
6587 case LT: cc_producer_mode = CCVFHmode; code = GT; swap_p = true; break;
6588 case LE: cc_producer_mode = CCVFHEmode; code = GE; swap_p = true; break;
6589 default: gcc_unreachable ();
6590 }
6591 scratch_mode = mode_for_int_vector (GET_MODE (cmp1)).require ();
6592
6593 if (inv_p)
6594 all_p = !all_p;
6595
6596 cc_consumer_mode = all_p ? CCVFALLmode : CCVFANYmode;
6597 }
6598 else
6599 gcc_unreachable ();
6600
6601 if (swap_p)
6602 {
6603 rtx tmp = cmp2;
6604 cmp2 = cmp1;
6605 cmp1 = tmp;
6606 }
6607
6608 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6609 gen_rtvec (2, gen_rtx_SET (
6610 gen_rtx_REG (cc_producer_mode, CC_REGNUM),
6611 gen_rtx_COMPARE (cc_producer_mode, cmp1, cmp2)),
6612 gen_rtx_CLOBBER (VOIDmode,
6613 gen_rtx_SCRATCH (scratch_mode)))));
6614 emit_move_insn (target, const0_rtx);
6615 emit_move_insn (tmp_reg, const1_rtx);
6616
6617 emit_move_insn (target,
6618 gen_rtx_IF_THEN_ELSE (SImode,
6619 gen_rtx_fmt_ee (code, VOIDmode,
6620 gen_rtx_REG (cc_consumer_mode, CC_REGNUM),
6621 const0_rtx),
6622 tmp_reg, target));
6623 }
6624
6625 /* Invert the comparison CODE applied to a CC mode. This is only safe
6626 if we know whether there result was created by a floating point
6627 compare or not. For the CCV modes this is encoded as part of the
6628 mode. */
6629 enum rtx_code
s390_reverse_condition(machine_mode mode,enum rtx_code code)6630 s390_reverse_condition (machine_mode mode, enum rtx_code code)
6631 {
6632 /* Reversal of FP compares takes care -- an ordered compare
6633 becomes an unordered compare and vice versa. */
6634 if (mode == CCVFALLmode || mode == CCVFANYmode)
6635 return reverse_condition_maybe_unordered (code);
6636 else if (mode == CCVIALLmode || mode == CCVIANYmode)
6637 return reverse_condition (code);
6638 else
6639 gcc_unreachable ();
6640 }
6641
6642 /* Generate a vector comparison expression loading either elements of
6643 THEN or ELS into TARGET depending on the comparison COND of CMP_OP1
6644 and CMP_OP2. */
6645
6646 void
s390_expand_vcond(rtx target,rtx then,rtx els,enum rtx_code cond,rtx cmp_op1,rtx cmp_op2)6647 s390_expand_vcond (rtx target, rtx then, rtx els,
6648 enum rtx_code cond, rtx cmp_op1, rtx cmp_op2)
6649 {
6650 rtx tmp;
6651 machine_mode result_mode;
6652 rtx result_target;
6653
6654 machine_mode target_mode = GET_MODE (target);
6655 machine_mode cmp_mode = GET_MODE (cmp_op1);
6656 rtx op = (cond == LT) ? els : then;
6657
6658 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
6659 and x < 0 ? 1 : 0 into (unsigned) x >> 31. Likewise
6660 for short and byte (x >> 15 and x >> 7 respectively). */
6661 if ((cond == LT || cond == GE)
6662 && target_mode == cmp_mode
6663 && cmp_op2 == CONST0_RTX (cmp_mode)
6664 && op == CONST0_RTX (target_mode)
6665 && s390_vector_mode_supported_p (target_mode)
6666 && GET_MODE_CLASS (target_mode) == MODE_VECTOR_INT)
6667 {
6668 rtx negop = (cond == LT) ? then : els;
6669
6670 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (target_mode)) - 1;
6671
6672 /* if x < 0 ? 1 : 0 or if x >= 0 ? 0 : 1 */
6673 if (negop == CONST1_RTX (target_mode))
6674 {
6675 rtx res = expand_simple_binop (cmp_mode, LSHIFTRT, cmp_op1,
6676 GEN_INT (shift), target,
6677 1, OPTAB_DIRECT);
6678 if (res != target)
6679 emit_move_insn (target, res);
6680 return;
6681 }
6682
6683 /* if x < 0 ? -1 : 0 or if x >= 0 ? 0 : -1 */
6684 else if (all_ones_operand (negop, target_mode))
6685 {
6686 rtx res = expand_simple_binop (cmp_mode, ASHIFTRT, cmp_op1,
6687 GEN_INT (shift), target,
6688 0, OPTAB_DIRECT);
6689 if (res != target)
6690 emit_move_insn (target, res);
6691 return;
6692 }
6693 }
6694
6695 /* We always use an integral type vector to hold the comparison
6696 result. */
6697 result_mode = mode_for_int_vector (cmp_mode).require ();
6698 result_target = gen_reg_rtx (result_mode);
6699
6700 /* We allow vector immediates as comparison operands that
6701 can be handled by the optimization above but not by the
6702 following code. Hence, force them into registers here. */
6703 if (!REG_P (cmp_op1))
6704 cmp_op1 = force_reg (GET_MODE (cmp_op1), cmp_op1);
6705
6706 if (!REG_P (cmp_op2))
6707 cmp_op2 = force_reg (GET_MODE (cmp_op2), cmp_op2);
6708
6709 s390_expand_vec_compare (result_target, cond,
6710 cmp_op1, cmp_op2);
6711
6712 /* If the results are supposed to be either -1 or 0 we are done
6713 since this is what our compare instructions generate anyway. */
6714 if (all_ones_operand (then, GET_MODE (then))
6715 && const0_operand (els, GET_MODE (els)))
6716 {
6717 emit_move_insn (target, gen_rtx_SUBREG (target_mode,
6718 result_target, 0));
6719 return;
6720 }
6721
6722 /* Otherwise we will do a vsel afterwards. */
6723 /* This gets triggered e.g.
6724 with gcc.c-torture/compile/pr53410-1.c */
6725 if (!REG_P (then))
6726 then = force_reg (target_mode, then);
6727
6728 if (!REG_P (els))
6729 els = force_reg (target_mode, els);
6730
6731 tmp = gen_rtx_fmt_ee (EQ, VOIDmode,
6732 result_target,
6733 CONST0_RTX (result_mode));
6734
6735 /* We compared the result against zero above so we have to swap then
6736 and els here. */
6737 tmp = gen_rtx_IF_THEN_ELSE (target_mode, tmp, els, then);
6738
6739 gcc_assert (target_mode == GET_MODE (then));
6740 emit_insn (gen_rtx_SET (target, tmp));
6741 }
6742
6743 /* Emit the RTX necessary to initialize the vector TARGET with values
6744 in VALS. */
6745 void
s390_expand_vec_init(rtx target,rtx vals)6746 s390_expand_vec_init (rtx target, rtx vals)
6747 {
6748 machine_mode mode = GET_MODE (target);
6749 machine_mode inner_mode = GET_MODE_INNER (mode);
6750 int n_elts = GET_MODE_NUNITS (mode);
6751 bool all_same = true, all_regs = true, all_const_int = true;
6752 rtx x;
6753 int i;
6754
6755 for (i = 0; i < n_elts; ++i)
6756 {
6757 x = XVECEXP (vals, 0, i);
6758
6759 if (!CONST_INT_P (x))
6760 all_const_int = false;
6761
6762 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6763 all_same = false;
6764
6765 if (!REG_P (x))
6766 all_regs = false;
6767 }
6768
6769 /* Use vector gen mask or vector gen byte mask if possible. */
6770 if (all_same && all_const_int
6771 && (XVECEXP (vals, 0, 0) == const0_rtx
6772 || s390_contiguous_bitmask_vector_p (XVECEXP (vals, 0, 0),
6773 NULL, NULL)
6774 || s390_bytemask_vector_p (XVECEXP (vals, 0, 0), NULL)))
6775 {
6776 emit_insn (gen_rtx_SET (target,
6777 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))));
6778 return;
6779 }
6780
6781 /* Use vector replicate instructions. vlrep/vrepi/vrep */
6782 if (all_same)
6783 {
6784 rtx elem = XVECEXP (vals, 0, 0);
6785
6786 /* vec_splats accepts general_operand as source. */
6787 if (!general_operand (elem, GET_MODE (elem)))
6788 elem = force_reg (inner_mode, elem);
6789
6790 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, elem)));
6791 return;
6792 }
6793
6794 if (all_regs
6795 && REG_P (target)
6796 && n_elts == 2
6797 && GET_MODE_SIZE (inner_mode) == 8)
6798 {
6799 /* Use vector load pair. */
6800 emit_insn (gen_rtx_SET (target,
6801 gen_rtx_VEC_CONCAT (mode,
6802 XVECEXP (vals, 0, 0),
6803 XVECEXP (vals, 0, 1))));
6804 return;
6805 }
6806
6807 /* Use vector load logical element and zero. */
6808 if (TARGET_VXE && (mode == V4SImode || mode == V4SFmode))
6809 {
6810 bool found = true;
6811
6812 x = XVECEXP (vals, 0, 0);
6813 if (memory_operand (x, inner_mode))
6814 {
6815 for (i = 1; i < n_elts; ++i)
6816 found = found && XVECEXP (vals, 0, i) == const0_rtx;
6817
6818 if (found)
6819 {
6820 machine_mode half_mode = (inner_mode == SFmode
6821 ? V2SFmode : V2SImode);
6822 emit_insn (gen_rtx_SET (target,
6823 gen_rtx_VEC_CONCAT (mode,
6824 gen_rtx_VEC_CONCAT (half_mode,
6825 x,
6826 const0_rtx),
6827 gen_rtx_VEC_CONCAT (half_mode,
6828 const0_rtx,
6829 const0_rtx))));
6830 return;
6831 }
6832 }
6833 }
6834
6835 /* We are about to set the vector elements one by one. Zero out the
6836 full register first in order to help the data flow framework to
6837 detect it as full VR set. */
6838 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
6839
6840 /* Unfortunately the vec_init expander is not allowed to fail. So
6841 we have to implement the fallback ourselves. */
6842 for (i = 0; i < n_elts; i++)
6843 {
6844 rtx elem = XVECEXP (vals, 0, i);
6845 if (!general_operand (elem, GET_MODE (elem)))
6846 elem = force_reg (inner_mode, elem);
6847
6848 emit_insn (gen_rtx_SET (target,
6849 gen_rtx_UNSPEC (mode,
6850 gen_rtvec (3, elem,
6851 GEN_INT (i), target),
6852 UNSPEC_VEC_SET)));
6853 }
6854 }
6855
6856 /* Structure to hold the initial parameters for a compare_and_swap operation
6857 in HImode and QImode. */
6858
6859 struct alignment_context
6860 {
6861 rtx memsi; /* SI aligned memory location. */
6862 rtx shift; /* Bit offset with regard to lsb. */
6863 rtx modemask; /* Mask of the HQImode shifted by SHIFT bits. */
6864 rtx modemaski; /* ~modemask */
6865 bool aligned; /* True if memory is aligned, false else. */
6866 };
6867
6868 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
6869 structure AC for transparent simplifying, if the memory alignment is known
6870 to be at least 32bit. MEM is the memory location for the actual operation
6871 and MODE its mode. */
6872
6873 static void
init_alignment_context(struct alignment_context * ac,rtx mem,machine_mode mode)6874 init_alignment_context (struct alignment_context *ac, rtx mem,
6875 machine_mode mode)
6876 {
6877 ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode));
6878 ac->aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode));
6879
6880 if (ac->aligned)
6881 ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned. */
6882 else
6883 {
6884 /* Alignment is unknown. */
6885 rtx byteoffset, addr, align;
6886
6887 /* Force the address into a register. */
6888 addr = force_reg (Pmode, XEXP (mem, 0));
6889
6890 /* Align it to SImode. */
6891 align = expand_simple_binop (Pmode, AND, addr,
6892 GEN_INT (-GET_MODE_SIZE (SImode)),
6893 NULL_RTX, 1, OPTAB_DIRECT);
6894 /* Generate MEM. */
6895 ac->memsi = gen_rtx_MEM (SImode, align);
6896 MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem);
6897 set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER);
6898 set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode));
6899
6900 /* Calculate shiftcount. */
6901 byteoffset = expand_simple_binop (Pmode, AND, addr,
6902 GEN_INT (GET_MODE_SIZE (SImode) - 1),
6903 NULL_RTX, 1, OPTAB_DIRECT);
6904 /* As we already have some offset, evaluate the remaining distance. */
6905 ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
6906 NULL_RTX, 1, OPTAB_DIRECT);
6907 }
6908
6909 /* Shift is the byte count, but we need the bitcount. */
6910 ac->shift = expand_simple_binop (SImode, ASHIFT, ac->shift, GEN_INT (3),
6911 NULL_RTX, 1, OPTAB_DIRECT);
6912
6913 /* Calculate masks. */
6914 ac->modemask = expand_simple_binop (SImode, ASHIFT,
6915 GEN_INT (GET_MODE_MASK (mode)),
6916 ac->shift, NULL_RTX, 1, OPTAB_DIRECT);
6917 ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask,
6918 NULL_RTX, 1);
6919 }
6920
6921 /* A subroutine of s390_expand_cs_hqi. Insert INS into VAL. If possible,
6922 use a single insv insn into SEQ2. Otherwise, put prep insns in SEQ1 and
6923 perform the merge in SEQ2. */
6924
6925 static rtx
s390_two_part_insv(struct alignment_context * ac,rtx * seq1,rtx * seq2,machine_mode mode,rtx val,rtx ins)6926 s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
6927 machine_mode mode, rtx val, rtx ins)
6928 {
6929 rtx tmp;
6930
6931 if (ac->aligned)
6932 {
6933 start_sequence ();
6934 tmp = copy_to_mode_reg (SImode, val);
6935 if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)),
6936 const0_rtx, ins))
6937 {
6938 *seq1 = NULL;
6939 *seq2 = get_insns ();
6940 end_sequence ();
6941 return tmp;
6942 }
6943 end_sequence ();
6944 }
6945
6946 /* Failed to use insv. Generate a two part shift and mask. */
6947 start_sequence ();
6948 tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
6949 *seq1 = get_insns ();
6950 end_sequence ();
6951
6952 start_sequence ();
6953 tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
6954 *seq2 = get_insns ();
6955 end_sequence ();
6956
6957 return tmp;
6958 }
6959
6960 /* Expand an atomic compare and swap operation for HImode and QImode. MEM is
6961 the memory location, CMP the old value to compare MEM with and NEW_RTX the
6962 value to set if CMP == MEM. */
6963
6964 static void
s390_expand_cs_hqi(machine_mode mode,rtx btarget,rtx vtarget,rtx mem,rtx cmp,rtx new_rtx,bool is_weak)6965 s390_expand_cs_hqi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
6966 rtx cmp, rtx new_rtx, bool is_weak)
6967 {
6968 struct alignment_context ac;
6969 rtx cmpv, newv, val, cc, seq0, seq1, seq2, seq3;
6970 rtx res = gen_reg_rtx (SImode);
6971 rtx_code_label *csloop = NULL, *csend = NULL;
6972
6973 gcc_assert (MEM_P (mem));
6974
6975 init_alignment_context (&ac, mem, mode);
6976
6977 /* Load full word. Subsequent loads are performed by CS. */
6978 val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski,
6979 NULL_RTX, 1, OPTAB_DIRECT);
6980
6981 /* Prepare insertions of cmp and new_rtx into the loaded value. When
6982 possible, we try to use insv to make this happen efficiently. If
6983 that fails we'll generate code both inside and outside the loop. */
6984 cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp);
6985 newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx);
6986
6987 if (seq0)
6988 emit_insn (seq0);
6989 if (seq1)
6990 emit_insn (seq1);
6991
6992 /* Start CS loop. */
6993 if (!is_weak)
6994 {
6995 /* Begin assuming success. */
6996 emit_move_insn (btarget, const1_rtx);
6997
6998 csloop = gen_label_rtx ();
6999 csend = gen_label_rtx ();
7000 emit_label (csloop);
7001 }
7002
7003 /* val = "<mem>00..0<mem>"
7004 * cmp = "00..0<cmp>00..0"
7005 * new = "00..0<new>00..0"
7006 */
7007
7008 emit_insn (seq2);
7009 emit_insn (seq3);
7010
7011 cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv, CCZ1mode);
7012 if (is_weak)
7013 emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1)));
7014 else
7015 {
7016 rtx tmp;
7017
7018 /* Jump to end if we're done (likely?). */
7019 s390_emit_jump (csend, cc);
7020
7021 /* Check for changes outside mode, and loop internal if so.
7022 Arrange the moves so that the compare is adjacent to the
7023 branch so that we can generate CRJ. */
7024 tmp = copy_to_reg (val);
7025 force_expand_binop (SImode, and_optab, res, ac.modemaski, val,
7026 1, OPTAB_DIRECT);
7027 cc = s390_emit_compare (NE, val, tmp);
7028 s390_emit_jump (csloop, cc);
7029
7030 /* Failed. */
7031 emit_move_insn (btarget, const0_rtx);
7032 emit_label (csend);
7033 }
7034
7035 /* Return the correct part of the bitfield. */
7036 convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
7037 NULL_RTX, 1, OPTAB_DIRECT), 1);
7038 }
7039
7040 /* Variant of s390_expand_cs for SI, DI and TI modes. */
7041 static void
s390_expand_cs_tdsi(machine_mode mode,rtx btarget,rtx vtarget,rtx mem,rtx cmp,rtx new_rtx,bool is_weak)7042 s390_expand_cs_tdsi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
7043 rtx cmp, rtx new_rtx, bool is_weak)
7044 {
7045 rtx output = vtarget;
7046 rtx_code_label *skip_cs_label = NULL;
7047 bool do_const_opt = false;
7048
7049 if (!register_operand (output, mode))
7050 output = gen_reg_rtx (mode);
7051
7052 /* If IS_WEAK is true and the INPUT value is a constant, compare the memory
7053 with the constant first and skip the compare_and_swap because its very
7054 expensive and likely to fail anyway.
7055 Note 1: This is done only for IS_WEAK. C11 allows optimizations that may
7056 cause spurious in that case.
7057 Note 2: It may be useful to do this also for non-constant INPUT.
7058 Note 3: Currently only targets with "load on condition" are supported
7059 (z196 and newer). */
7060
7061 if (TARGET_Z196
7062 && (mode == SImode || mode == DImode))
7063 do_const_opt = (is_weak && CONST_INT_P (cmp));
7064
7065 if (do_const_opt)
7066 {
7067 rtx cc = gen_rtx_REG (CCZmode, CC_REGNUM);
7068
7069 skip_cs_label = gen_label_rtx ();
7070 emit_move_insn (btarget, const0_rtx);
7071 if (CONST_INT_P (cmp) && INTVAL (cmp) == 0)
7072 {
7073 rtvec lt = rtvec_alloc (2);
7074
7075 /* Load-and-test + conditional jump. */
7076 RTVEC_ELT (lt, 0)
7077 = gen_rtx_SET (cc, gen_rtx_COMPARE (CCZmode, mem, cmp));
7078 RTVEC_ELT (lt, 1) = gen_rtx_SET (output, mem);
7079 emit_insn (gen_rtx_PARALLEL (VOIDmode, lt));
7080 }
7081 else
7082 {
7083 emit_move_insn (output, mem);
7084 emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (CCZmode, output, cmp)));
7085 }
7086 s390_emit_jump (skip_cs_label, gen_rtx_NE (VOIDmode, cc, const0_rtx));
7087 add_reg_br_prob_note (get_last_insn (),
7088 profile_probability::very_unlikely ());
7089 /* If the jump is not taken, OUTPUT is the expected value. */
7090 cmp = output;
7091 /* Reload newval to a register manually, *after* the compare and jump
7092 above. Otherwise Reload might place it before the jump. */
7093 }
7094 else
7095 cmp = force_reg (mode, cmp);
7096 new_rtx = force_reg (mode, new_rtx);
7097 s390_emit_compare_and_swap (EQ, output, mem, cmp, new_rtx,
7098 (do_const_opt) ? CCZmode : CCZ1mode);
7099 if (skip_cs_label != NULL)
7100 emit_label (skip_cs_label);
7101
7102 /* We deliberately accept non-register operands in the predicate
7103 to ensure the write back to the output operand happens *before*
7104 the store-flags code below. This makes it easier for combine
7105 to merge the store-flags code with a potential test-and-branch
7106 pattern following (immediately!) afterwards. */
7107 if (output != vtarget)
7108 emit_move_insn (vtarget, output);
7109
7110 if (do_const_opt)
7111 {
7112 rtx cc, cond, ite;
7113
7114 /* Do not use gen_cstorecc4 here because it writes either 1 or 0, but
7115 btarget has already been initialized with 0 above. */
7116 cc = gen_rtx_REG (CCZmode, CC_REGNUM);
7117 cond = gen_rtx_EQ (VOIDmode, cc, const0_rtx);
7118 ite = gen_rtx_IF_THEN_ELSE (SImode, cond, const1_rtx, btarget);
7119 emit_insn (gen_rtx_SET (btarget, ite));
7120 }
7121 else
7122 {
7123 rtx cc, cond;
7124
7125 cc = gen_rtx_REG (CCZ1mode, CC_REGNUM);
7126 cond = gen_rtx_EQ (SImode, cc, const0_rtx);
7127 emit_insn (gen_cstorecc4 (btarget, cond, cc, const0_rtx));
7128 }
7129 }
7130
7131 /* Expand an atomic compare and swap operation. MEM is the memory location,
7132 CMP the old value to compare MEM with and NEW_RTX the value to set if
7133 CMP == MEM. */
7134
7135 void
s390_expand_cs(machine_mode mode,rtx btarget,rtx vtarget,rtx mem,rtx cmp,rtx new_rtx,bool is_weak)7136 s390_expand_cs (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
7137 rtx cmp, rtx new_rtx, bool is_weak)
7138 {
7139 switch (mode)
7140 {
7141 case E_TImode:
7142 case E_DImode:
7143 case E_SImode:
7144 s390_expand_cs_tdsi (mode, btarget, vtarget, mem, cmp, new_rtx, is_weak);
7145 break;
7146 case E_HImode:
7147 case E_QImode:
7148 s390_expand_cs_hqi (mode, btarget, vtarget, mem, cmp, new_rtx, is_weak);
7149 break;
7150 default:
7151 gcc_unreachable ();
7152 }
7153 }
7154
7155 /* Expand an atomic_exchange operation simulated with a compare-and-swap loop.
7156 The memory location MEM is set to INPUT. OUTPUT is set to the previous value
7157 of MEM. */
7158
7159 void
s390_expand_atomic_exchange_tdsi(rtx output,rtx mem,rtx input)7160 s390_expand_atomic_exchange_tdsi (rtx output, rtx mem, rtx input)
7161 {
7162 machine_mode mode = GET_MODE (mem);
7163 rtx_code_label *csloop;
7164
7165 if (TARGET_Z196
7166 && (mode == DImode || mode == SImode)
7167 && CONST_INT_P (input) && INTVAL (input) == 0)
7168 {
7169 emit_move_insn (output, const0_rtx);
7170 if (mode == DImode)
7171 emit_insn (gen_atomic_fetch_anddi (output, mem, const0_rtx, input));
7172 else
7173 emit_insn (gen_atomic_fetch_andsi (output, mem, const0_rtx, input));
7174 return;
7175 }
7176
7177 input = force_reg (mode, input);
7178 emit_move_insn (output, mem);
7179 csloop = gen_label_rtx ();
7180 emit_label (csloop);
7181 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, output, mem, output,
7182 input, CCZ1mode));
7183 }
7184
7185 /* Expand an atomic operation CODE of mode MODE. MEM is the memory location
7186 and VAL the value to play with. If AFTER is true then store the value
7187 MEM holds after the operation, if AFTER is false then store the value MEM
7188 holds before the operation. If TARGET is zero then discard that value, else
7189 store it to TARGET. */
7190
7191 void
s390_expand_atomic(machine_mode mode,enum rtx_code code,rtx target,rtx mem,rtx val,bool after)7192 s390_expand_atomic (machine_mode mode, enum rtx_code code,
7193 rtx target, rtx mem, rtx val, bool after)
7194 {
7195 struct alignment_context ac;
7196 rtx cmp;
7197 rtx new_rtx = gen_reg_rtx (SImode);
7198 rtx orig = gen_reg_rtx (SImode);
7199 rtx_code_label *csloop = gen_label_rtx ();
7200
7201 gcc_assert (!target || register_operand (target, VOIDmode));
7202 gcc_assert (MEM_P (mem));
7203
7204 init_alignment_context (&ac, mem, mode);
7205
7206 /* Shift val to the correct bit positions.
7207 Preserve "icm", but prevent "ex icm". */
7208 if (!(ac.aligned && code == SET && MEM_P (val)))
7209 val = s390_expand_mask_and_shift (val, mode, ac.shift);
7210
7211 /* Further preparation insns. */
7212 if (code == PLUS || code == MINUS)
7213 emit_move_insn (orig, val);
7214 else if (code == MULT || code == AND) /* val = "11..1<val>11..1" */
7215 val = expand_simple_binop (SImode, XOR, val, ac.modemaski,
7216 NULL_RTX, 1, OPTAB_DIRECT);
7217
7218 /* Load full word. Subsequent loads are performed by CS. */
7219 cmp = force_reg (SImode, ac.memsi);
7220
7221 /* Start CS loop. */
7222 emit_label (csloop);
7223 emit_move_insn (new_rtx, cmp);
7224
7225 /* Patch new with val at correct position. */
7226 switch (code)
7227 {
7228 case PLUS:
7229 case MINUS:
7230 val = expand_simple_binop (SImode, code, new_rtx, orig,
7231 NULL_RTX, 1, OPTAB_DIRECT);
7232 val = expand_simple_binop (SImode, AND, val, ac.modemask,
7233 NULL_RTX, 1, OPTAB_DIRECT);
7234 /* FALLTHRU */
7235 case SET:
7236 if (ac.aligned && MEM_P (val))
7237 store_bit_field (new_rtx, GET_MODE_BITSIZE (mode), 0,
7238 0, 0, SImode, val, false);
7239 else
7240 {
7241 new_rtx = expand_simple_binop (SImode, AND, new_rtx, ac.modemaski,
7242 NULL_RTX, 1, OPTAB_DIRECT);
7243 new_rtx = expand_simple_binop (SImode, IOR, new_rtx, val,
7244 NULL_RTX, 1, OPTAB_DIRECT);
7245 }
7246 break;
7247 case AND:
7248 case IOR:
7249 case XOR:
7250 new_rtx = expand_simple_binop (SImode, code, new_rtx, val,
7251 NULL_RTX, 1, OPTAB_DIRECT);
7252 break;
7253 case MULT: /* NAND */
7254 new_rtx = expand_simple_binop (SImode, AND, new_rtx, val,
7255 NULL_RTX, 1, OPTAB_DIRECT);
7256 new_rtx = expand_simple_binop (SImode, XOR, new_rtx, ac.modemask,
7257 NULL_RTX, 1, OPTAB_DIRECT);
7258 break;
7259 default:
7260 gcc_unreachable ();
7261 }
7262
7263 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, cmp,
7264 ac.memsi, cmp, new_rtx,
7265 CCZ1mode));
7266
7267 /* Return the correct part of the bitfield. */
7268 if (target)
7269 convert_move (target, expand_simple_binop (SImode, LSHIFTRT,
7270 after ? new_rtx : cmp, ac.shift,
7271 NULL_RTX, 1, OPTAB_DIRECT), 1);
7272 }
7273
7274 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7275 We need to emit DTP-relative relocations. */
7276
7277 static void s390_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
7278
7279 static void
s390_output_dwarf_dtprel(FILE * file,int size,rtx x)7280 s390_output_dwarf_dtprel (FILE *file, int size, rtx x)
7281 {
7282 switch (size)
7283 {
7284 case 4:
7285 fputs ("\t.long\t", file);
7286 break;
7287 case 8:
7288 fputs ("\t.quad\t", file);
7289 break;
7290 default:
7291 gcc_unreachable ();
7292 }
7293 output_addr_const (file, x);
7294 fputs ("@DTPOFF", file);
7295 }
7296
7297 /* Return the proper mode for REGNO being represented in the dwarf
7298 unwind table. */
7299 machine_mode
s390_dwarf_frame_reg_mode(int regno)7300 s390_dwarf_frame_reg_mode (int regno)
7301 {
7302 machine_mode save_mode = default_dwarf_frame_reg_mode (regno);
7303
7304 /* Make sure not to return DImode for any GPR with -m31 -mzarch. */
7305 if (GENERAL_REGNO_P (regno))
7306 save_mode = Pmode;
7307
7308 /* The rightmost 64 bits of vector registers are call-clobbered. */
7309 if (GET_MODE_SIZE (save_mode) > 8)
7310 save_mode = DImode;
7311
7312 return save_mode;
7313 }
7314
7315 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
7316 /* Implement TARGET_MANGLE_TYPE. */
7317
7318 static const char *
s390_mangle_type(const_tree type)7319 s390_mangle_type (const_tree type)
7320 {
7321 type = TYPE_MAIN_VARIANT (type);
7322
7323 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
7324 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
7325 return NULL;
7326
7327 if (type == s390_builtin_types[BT_BV16QI]) return "U6__boolc";
7328 if (type == s390_builtin_types[BT_BV8HI]) return "U6__bools";
7329 if (type == s390_builtin_types[BT_BV4SI]) return "U6__booli";
7330 if (type == s390_builtin_types[BT_BV2DI]) return "U6__booll";
7331
7332 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
7333 && TARGET_LONG_DOUBLE_128)
7334 return "g";
7335
7336 /* For all other types, use normal C++ mangling. */
7337 return NULL;
7338 }
7339 #endif
7340
7341 /* In the name of slightly smaller debug output, and to cater to
7342 general assembler lossage, recognize various UNSPEC sequences
7343 and turn them back into a direct symbol reference. */
7344
7345 static rtx
s390_delegitimize_address(rtx orig_x)7346 s390_delegitimize_address (rtx orig_x)
7347 {
7348 rtx x, y;
7349
7350 orig_x = delegitimize_mem_from_attrs (orig_x);
7351 x = orig_x;
7352
7353 /* Extract the symbol ref from:
7354 (plus:SI (reg:SI 12 %r12)
7355 (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
7356 UNSPEC_GOTOFF/PLTOFF)))
7357 and
7358 (plus:SI (reg:SI 12 %r12)
7359 (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
7360 UNSPEC_GOTOFF/PLTOFF)
7361 (const_int 4 [0x4])))) */
7362 if (GET_CODE (x) == PLUS
7363 && REG_P (XEXP (x, 0))
7364 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
7365 && GET_CODE (XEXP (x, 1)) == CONST)
7366 {
7367 HOST_WIDE_INT offset = 0;
7368
7369 /* The const operand. */
7370 y = XEXP (XEXP (x, 1), 0);
7371
7372 if (GET_CODE (y) == PLUS
7373 && GET_CODE (XEXP (y, 1)) == CONST_INT)
7374 {
7375 offset = INTVAL (XEXP (y, 1));
7376 y = XEXP (y, 0);
7377 }
7378
7379 if (GET_CODE (y) == UNSPEC
7380 && (XINT (y, 1) == UNSPEC_GOTOFF
7381 || XINT (y, 1) == UNSPEC_PLTOFF))
7382 return plus_constant (Pmode, XVECEXP (y, 0, 0), offset);
7383 }
7384
7385 if (GET_CODE (x) != MEM)
7386 return orig_x;
7387
7388 x = XEXP (x, 0);
7389 if (GET_CODE (x) == PLUS
7390 && GET_CODE (XEXP (x, 1)) == CONST
7391 && GET_CODE (XEXP (x, 0)) == REG
7392 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7393 {
7394 y = XEXP (XEXP (x, 1), 0);
7395 if (GET_CODE (y) == UNSPEC
7396 && XINT (y, 1) == UNSPEC_GOT)
7397 y = XVECEXP (y, 0, 0);
7398 else
7399 return orig_x;
7400 }
7401 else if (GET_CODE (x) == CONST)
7402 {
7403 /* Extract the symbol ref from:
7404 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
7405 UNSPEC_PLT/GOTENT))) */
7406
7407 y = XEXP (x, 0);
7408 if (GET_CODE (y) == UNSPEC
7409 && (XINT (y, 1) == UNSPEC_GOTENT
7410 || XINT (y, 1) == UNSPEC_PLT))
7411 y = XVECEXP (y, 0, 0);
7412 else
7413 return orig_x;
7414 }
7415 else
7416 return orig_x;
7417
7418 if (GET_MODE (orig_x) != Pmode)
7419 {
7420 if (GET_MODE (orig_x) == BLKmode)
7421 return orig_x;
7422 y = lowpart_subreg (GET_MODE (orig_x), y, Pmode);
7423 if (y == NULL_RTX)
7424 return orig_x;
7425 }
7426 return y;
7427 }
7428
7429 /* Output operand OP to stdio stream FILE.
7430 OP is an address (register + offset) which is not used to address data;
7431 instead the rightmost bits are interpreted as the value. */
7432
7433 static void
print_addrstyle_operand(FILE * file,rtx op)7434 print_addrstyle_operand (FILE *file, rtx op)
7435 {
7436 HOST_WIDE_INT offset;
7437 rtx base;
7438
7439 /* Extract base register and offset. */
7440 if (!s390_decompose_addrstyle_without_index (op, &base, &offset))
7441 gcc_unreachable ();
7442
7443 /* Sanity check. */
7444 if (base)
7445 {
7446 gcc_assert (GET_CODE (base) == REG);
7447 gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER);
7448 gcc_assert (REGNO_REG_CLASS (REGNO (base)) == ADDR_REGS);
7449 }
7450
7451 /* Offsets are constricted to twelve bits. */
7452 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset & ((1 << 12) - 1));
7453 if (base)
7454 fprintf (file, "(%s)", reg_names[REGNO (base)]);
7455 }
7456
7457 /* Assigns the number of NOP halfwords to be emitted before and after the
7458 function label to *HW_BEFORE and *HW_AFTER. Both pointers must not be NULL.
7459 If hotpatching is disabled for the function, the values are set to zero.
7460 */
7461
7462 static void
s390_function_num_hotpatch_hw(tree decl,int * hw_before,int * hw_after)7463 s390_function_num_hotpatch_hw (tree decl,
7464 int *hw_before,
7465 int *hw_after)
7466 {
7467 tree attr;
7468
7469 attr = lookup_attribute ("hotpatch", DECL_ATTRIBUTES (decl));
7470
7471 /* Handle the arguments of the hotpatch attribute. The values
7472 specified via attribute might override the cmdline argument
7473 values. */
7474 if (attr)
7475 {
7476 tree args = TREE_VALUE (attr);
7477
7478 *hw_before = TREE_INT_CST_LOW (TREE_VALUE (args));
7479 *hw_after = TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (args)));
7480 }
7481 else
7482 {
7483 /* Use the values specified by the cmdline arguments. */
7484 *hw_before = s390_hotpatch_hw_before_label;
7485 *hw_after = s390_hotpatch_hw_after_label;
7486 }
7487 }
7488
7489 /* Write the current .machine and .machinemode specification to the assembler
7490 file. */
7491
7492 #ifdef HAVE_AS_MACHINE_MACHINEMODE
7493 static void
s390_asm_output_machine_for_arch(FILE * asm_out_file)7494 s390_asm_output_machine_for_arch (FILE *asm_out_file)
7495 {
7496 fprintf (asm_out_file, "\t.machinemode %s\n",
7497 (TARGET_ZARCH) ? "zarch" : "esa");
7498 fprintf (asm_out_file, "\t.machine \"%s",
7499 processor_table[s390_arch].binutils_name);
7500 if (S390_USE_ARCHITECTURE_MODIFIERS)
7501 {
7502 int cpu_flags;
7503
7504 cpu_flags = processor_flags_table[(int) s390_arch];
7505 if (TARGET_HTM && !(cpu_flags & PF_TX))
7506 fprintf (asm_out_file, "+htm");
7507 else if (!TARGET_HTM && (cpu_flags & PF_TX))
7508 fprintf (asm_out_file, "+nohtm");
7509 if (TARGET_VX && !(cpu_flags & PF_VX))
7510 fprintf (asm_out_file, "+vx");
7511 else if (!TARGET_VX && (cpu_flags & PF_VX))
7512 fprintf (asm_out_file, "+novx");
7513 }
7514 fprintf (asm_out_file, "\"\n");
7515 }
7516
7517 /* Write an extra function header before the very start of the function. */
7518
7519 void
s390_asm_output_function_prefix(FILE * asm_out_file,const char * fnname ATTRIBUTE_UNUSED)7520 s390_asm_output_function_prefix (FILE *asm_out_file,
7521 const char *fnname ATTRIBUTE_UNUSED)
7522 {
7523 if (DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl) == NULL)
7524 return;
7525 /* Since only the function specific options are saved but not the indications
7526 which options are set, it's too much work here to figure out which options
7527 have actually changed. Thus, generate .machine and .machinemode whenever a
7528 function has the target attribute or pragma. */
7529 fprintf (asm_out_file, "\t.machinemode push\n");
7530 fprintf (asm_out_file, "\t.machine push\n");
7531 s390_asm_output_machine_for_arch (asm_out_file);
7532 }
7533
7534 /* Write an extra function footer after the very end of the function. */
7535
7536 void
s390_asm_declare_function_size(FILE * asm_out_file,const char * fnname,tree decl)7537 s390_asm_declare_function_size (FILE *asm_out_file,
7538 const char *fnname, tree decl)
7539 {
7540 if (!flag_inhibit_size_directive)
7541 ASM_OUTPUT_MEASURED_SIZE (asm_out_file, fnname);
7542 if (DECL_FUNCTION_SPECIFIC_TARGET (decl) == NULL)
7543 return;
7544 fprintf (asm_out_file, "\t.machine pop\n");
7545 fprintf (asm_out_file, "\t.machinemode pop\n");
7546 }
7547 #endif
7548
7549 /* Write the extra assembler code needed to declare a function properly. */
7550
7551 void
s390_asm_output_function_label(FILE * asm_out_file,const char * fname,tree decl)7552 s390_asm_output_function_label (FILE *asm_out_file, const char *fname,
7553 tree decl)
7554 {
7555 int hw_before, hw_after;
7556
7557 s390_function_num_hotpatch_hw (decl, &hw_before, &hw_after);
7558 if (hw_before > 0)
7559 {
7560 unsigned int function_alignment;
7561 int i;
7562
7563 /* Add a trampoline code area before the function label and initialize it
7564 with two-byte nop instructions. This area can be overwritten with code
7565 that jumps to a patched version of the function. */
7566 asm_fprintf (asm_out_file, "\tnopr\t%%r0"
7567 "\t# pre-label NOPs for hotpatch (%d halfwords)\n",
7568 hw_before);
7569 for (i = 1; i < hw_before; i++)
7570 fputs ("\tnopr\t%r0\n", asm_out_file);
7571
7572 /* Note: The function label must be aligned so that (a) the bytes of the
7573 following nop do not cross a cacheline boundary, and (b) a jump address
7574 (eight bytes for 64 bit targets, 4 bytes for 32 bit targets) can be
7575 stored directly before the label without crossing a cacheline
7576 boundary. All this is necessary to make sure the trampoline code can
7577 be changed atomically.
7578 This alignment is done automatically using the FOUNCTION_BOUNDARY, but
7579 if there are NOPs before the function label, the alignment is placed
7580 before them. So it is necessary to duplicate the alignment after the
7581 NOPs. */
7582 function_alignment = MAX (8, DECL_ALIGN (decl) / BITS_PER_UNIT);
7583 if (! DECL_USER_ALIGN (decl))
7584 function_alignment
7585 = MAX (function_alignment,
7586 (unsigned int) align_functions.levels[0].get_value ());
7587 fputs ("\t# alignment for hotpatch\n", asm_out_file);
7588 ASM_OUTPUT_ALIGN (asm_out_file, align_functions.levels[0].log);
7589 }
7590
7591 if (S390_USE_TARGET_ATTRIBUTE && TARGET_DEBUG_ARG)
7592 {
7593 asm_fprintf (asm_out_file, "\t# fn:%s ar%d\n", fname, s390_arch);
7594 asm_fprintf (asm_out_file, "\t# fn:%s tu%d\n", fname, s390_tune);
7595 asm_fprintf (asm_out_file, "\t# fn:%s sg%d\n", fname, s390_stack_guard);
7596 asm_fprintf (asm_out_file, "\t# fn:%s ss%d\n", fname, s390_stack_size);
7597 asm_fprintf (asm_out_file, "\t# fn:%s bc%d\n", fname, s390_branch_cost);
7598 asm_fprintf (asm_out_file, "\t# fn:%s wf%d\n", fname,
7599 s390_warn_framesize);
7600 asm_fprintf (asm_out_file, "\t# fn:%s ba%d\n", fname, TARGET_BACKCHAIN);
7601 asm_fprintf (asm_out_file, "\t# fn:%s hd%d\n", fname, TARGET_HARD_DFP);
7602 asm_fprintf (asm_out_file, "\t# fn:%s hf%d\n", fname, !TARGET_SOFT_FLOAT);
7603 asm_fprintf (asm_out_file, "\t# fn:%s ht%d\n", fname, TARGET_OPT_HTM);
7604 asm_fprintf (asm_out_file, "\t# fn:%s vx%d\n", fname, TARGET_OPT_VX);
7605 asm_fprintf (asm_out_file, "\t# fn:%s ps%d\n", fname,
7606 TARGET_PACKED_STACK);
7607 asm_fprintf (asm_out_file, "\t# fn:%s se%d\n", fname, TARGET_SMALL_EXEC);
7608 asm_fprintf (asm_out_file, "\t# fn:%s mv%d\n", fname, TARGET_MVCLE);
7609 asm_fprintf (asm_out_file, "\t# fn:%s zv%d\n", fname, TARGET_ZVECTOR);
7610 asm_fprintf (asm_out_file, "\t# fn:%s wd%d\n", fname,
7611 s390_warn_dynamicstack_p);
7612 }
7613 ASM_OUTPUT_LABEL (asm_out_file, fname);
7614 if (hw_after > 0)
7615 asm_fprintf (asm_out_file,
7616 "\t# post-label NOPs for hotpatch (%d halfwords)\n",
7617 hw_after);
7618 }
7619
7620 /* Output machine-dependent UNSPECs occurring in address constant X
7621 in assembler syntax to stdio stream FILE. Returns true if the
7622 constant X could be recognized, false otherwise. */
7623
7624 static bool
s390_output_addr_const_extra(FILE * file,rtx x)7625 s390_output_addr_const_extra (FILE *file, rtx x)
7626 {
7627 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1)
7628 switch (XINT (x, 1))
7629 {
7630 case UNSPEC_GOTENT:
7631 output_addr_const (file, XVECEXP (x, 0, 0));
7632 fprintf (file, "@GOTENT");
7633 return true;
7634 case UNSPEC_GOT:
7635 output_addr_const (file, XVECEXP (x, 0, 0));
7636 fprintf (file, "@GOT");
7637 return true;
7638 case UNSPEC_GOTOFF:
7639 output_addr_const (file, XVECEXP (x, 0, 0));
7640 fprintf (file, "@GOTOFF");
7641 return true;
7642 case UNSPEC_PLT:
7643 output_addr_const (file, XVECEXP (x, 0, 0));
7644 fprintf (file, "@PLT");
7645 return true;
7646 case UNSPEC_PLTOFF:
7647 output_addr_const (file, XVECEXP (x, 0, 0));
7648 fprintf (file, "@PLTOFF");
7649 return true;
7650 case UNSPEC_TLSGD:
7651 output_addr_const (file, XVECEXP (x, 0, 0));
7652 fprintf (file, "@TLSGD");
7653 return true;
7654 case UNSPEC_TLSLDM:
7655 assemble_name (file, get_some_local_dynamic_name ());
7656 fprintf (file, "@TLSLDM");
7657 return true;
7658 case UNSPEC_DTPOFF:
7659 output_addr_const (file, XVECEXP (x, 0, 0));
7660 fprintf (file, "@DTPOFF");
7661 return true;
7662 case UNSPEC_NTPOFF:
7663 output_addr_const (file, XVECEXP (x, 0, 0));
7664 fprintf (file, "@NTPOFF");
7665 return true;
7666 case UNSPEC_GOTNTPOFF:
7667 output_addr_const (file, XVECEXP (x, 0, 0));
7668 fprintf (file, "@GOTNTPOFF");
7669 return true;
7670 case UNSPEC_INDNTPOFF:
7671 output_addr_const (file, XVECEXP (x, 0, 0));
7672 fprintf (file, "@INDNTPOFF");
7673 return true;
7674 }
7675
7676 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 2)
7677 switch (XINT (x, 1))
7678 {
7679 case UNSPEC_POOL_OFFSET:
7680 x = gen_rtx_MINUS (GET_MODE (x), XVECEXP (x, 0, 0), XVECEXP (x, 0, 1));
7681 output_addr_const (file, x);
7682 return true;
7683 }
7684 return false;
7685 }
7686
7687 /* Output address operand ADDR in assembler syntax to
7688 stdio stream FILE. */
7689
7690 void
print_operand_address(FILE * file,rtx addr)7691 print_operand_address (FILE *file, rtx addr)
7692 {
7693 struct s390_address ad;
7694 memset (&ad, 0, sizeof (s390_address));
7695
7696 if (s390_loadrelative_operand_p (addr, NULL, NULL))
7697 {
7698 if (!TARGET_Z10)
7699 {
7700 output_operand_lossage ("symbolic memory references are "
7701 "only supported on z10 or later");
7702 return;
7703 }
7704 output_addr_const (file, addr);
7705 return;
7706 }
7707
7708 if (!s390_decompose_address (addr, &ad)
7709 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7710 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
7711 output_operand_lossage ("cannot decompose address");
7712
7713 if (ad.disp)
7714 output_addr_const (file, ad.disp);
7715 else
7716 fprintf (file, "0");
7717
7718 if (ad.base && ad.indx)
7719 fprintf (file, "(%s,%s)", reg_names[REGNO (ad.indx)],
7720 reg_names[REGNO (ad.base)]);
7721 else if (ad.base)
7722 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7723 }
7724
7725 /* Output operand X in assembler syntax to stdio stream FILE.
7726 CODE specified the format flag. The following format flags
7727 are recognized:
7728
7729 'A': On z14 or higher: If operand is a mem print the alignment
7730 hint usable with vl/vst prefixed by a comma.
7731 'C': print opcode suffix for branch condition.
7732 'D': print opcode suffix for inverse branch condition.
7733 'E': print opcode suffix for branch on index instruction.
7734 'G': print the size of the operand in bytes.
7735 'J': print tls_load/tls_gdcall/tls_ldcall suffix
7736 'M': print the second word of a TImode operand.
7737 'N': print the second word of a DImode operand.
7738 'O': print only the displacement of a memory reference or address.
7739 'R': print only the base register of a memory reference or address.
7740 'S': print S-type memory reference (base+displacement).
7741 'Y': print address style operand without index (e.g. shift count or setmem
7742 operand).
7743
7744 'b': print integer X as if it's an unsigned byte.
7745 'c': print integer X as if it's an signed byte.
7746 'e': "end" contiguous bitmask X in either DImode or vector inner mode.
7747 'f': "end" contiguous bitmask X in SImode.
7748 'h': print integer X as if it's a signed halfword.
7749 'i': print the first nonzero HImode part of X.
7750 'j': print the first HImode part unequal to -1 of X.
7751 'k': print the first nonzero SImode part of X.
7752 'm': print the first SImode part unequal to -1 of X.
7753 'o': print integer X as if it's an unsigned 32bit word.
7754 's': "start" of contiguous bitmask X in either DImode or vector inner mode.
7755 't': CONST_INT: "start" of contiguous bitmask X in SImode.
7756 CONST_VECTOR: Generate a bitmask for vgbm instruction.
7757 'x': print integer X as if it's an unsigned halfword.
7758 'v': print register number as vector register (v1 instead of f1).
7759 */
7760
7761 void
print_operand(FILE * file,rtx x,int code)7762 print_operand (FILE *file, rtx x, int code)
7763 {
7764 HOST_WIDE_INT ival;
7765
7766 switch (code)
7767 {
7768 case 'A':
7769 if (TARGET_VECTOR_LOADSTORE_ALIGNMENT_HINTS && MEM_P (x))
7770 {
7771 if (MEM_ALIGN (x) >= 128)
7772 fprintf (file, ",4");
7773 else if (MEM_ALIGN (x) == 64)
7774 fprintf (file, ",3");
7775 }
7776 return;
7777 case 'C':
7778 fprintf (file, s390_branch_condition_mnemonic (x, FALSE));
7779 return;
7780
7781 case 'D':
7782 fprintf (file, s390_branch_condition_mnemonic (x, TRUE));
7783 return;
7784
7785 case 'E':
7786 if (GET_CODE (x) == LE)
7787 fprintf (file, "l");
7788 else if (GET_CODE (x) == GT)
7789 fprintf (file, "h");
7790 else
7791 output_operand_lossage ("invalid comparison operator "
7792 "for 'E' output modifier");
7793 return;
7794
7795 case 'J':
7796 if (GET_CODE (x) == SYMBOL_REF)
7797 {
7798 fprintf (file, "%s", ":tls_load:");
7799 output_addr_const (file, x);
7800 }
7801 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
7802 {
7803 fprintf (file, "%s", ":tls_gdcall:");
7804 output_addr_const (file, XVECEXP (x, 0, 0));
7805 }
7806 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM)
7807 {
7808 fprintf (file, "%s", ":tls_ldcall:");
7809 const char *name = get_some_local_dynamic_name ();
7810 gcc_assert (name);
7811 assemble_name (file, name);
7812 }
7813 else
7814 output_operand_lossage ("invalid reference for 'J' output modifier");
7815 return;
7816
7817 case 'G':
7818 fprintf (file, "%u", GET_MODE_SIZE (GET_MODE (x)));
7819 return;
7820
7821 case 'O':
7822 {
7823 struct s390_address ad;
7824 int ret;
7825
7826 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7827
7828 if (!ret
7829 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7830 || ad.indx)
7831 {
7832 output_operand_lossage ("invalid address for 'O' output modifier");
7833 return;
7834 }
7835
7836 if (ad.disp)
7837 output_addr_const (file, ad.disp);
7838 else
7839 fprintf (file, "0");
7840 }
7841 return;
7842
7843 case 'R':
7844 {
7845 struct s390_address ad;
7846 int ret;
7847
7848 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7849
7850 if (!ret
7851 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7852 || ad.indx)
7853 {
7854 output_operand_lossage ("invalid address for 'R' output modifier");
7855 return;
7856 }
7857
7858 if (ad.base)
7859 fprintf (file, "%s", reg_names[REGNO (ad.base)]);
7860 else
7861 fprintf (file, "0");
7862 }
7863 return;
7864
7865 case 'S':
7866 {
7867 struct s390_address ad;
7868 int ret;
7869
7870 if (!MEM_P (x))
7871 {
7872 output_operand_lossage ("memory reference expected for "
7873 "'S' output modifier");
7874 return;
7875 }
7876 ret = s390_decompose_address (XEXP (x, 0), &ad);
7877
7878 if (!ret
7879 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7880 || ad.indx)
7881 {
7882 output_operand_lossage ("invalid address for 'S' output modifier");
7883 return;
7884 }
7885
7886 if (ad.disp)
7887 output_addr_const (file, ad.disp);
7888 else
7889 fprintf (file, "0");
7890
7891 if (ad.base)
7892 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7893 }
7894 return;
7895
7896 case 'N':
7897 if (GET_CODE (x) == REG)
7898 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7899 else if (GET_CODE (x) == MEM)
7900 x = change_address (x, VOIDmode,
7901 plus_constant (Pmode, XEXP (x, 0), 4));
7902 else
7903 output_operand_lossage ("register or memory expression expected "
7904 "for 'N' output modifier");
7905 break;
7906
7907 case 'M':
7908 if (GET_CODE (x) == REG)
7909 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7910 else if (GET_CODE (x) == MEM)
7911 x = change_address (x, VOIDmode,
7912 plus_constant (Pmode, XEXP (x, 0), 8));
7913 else
7914 output_operand_lossage ("register or memory expression expected "
7915 "for 'M' output modifier");
7916 break;
7917
7918 case 'Y':
7919 print_addrstyle_operand (file, x);
7920 return;
7921 }
7922
7923 switch (GET_CODE (x))
7924 {
7925 case REG:
7926 /* Print FP regs as fx instead of vx when they are accessed
7927 through non-vector mode. */
7928 if (code == 'v'
7929 || VECTOR_NOFP_REG_P (x)
7930 || (FP_REG_P (x) && VECTOR_MODE_P (GET_MODE (x)))
7931 || (VECTOR_REG_P (x)
7932 && (GET_MODE_SIZE (GET_MODE (x)) /
7933 s390_class_max_nregs (FP_REGS, GET_MODE (x))) > 8))
7934 fprintf (file, "%%v%s", reg_names[REGNO (x)] + 2);
7935 else
7936 fprintf (file, "%s", reg_names[REGNO (x)]);
7937 break;
7938
7939 case MEM:
7940 output_address (GET_MODE (x), XEXP (x, 0));
7941 break;
7942
7943 case CONST:
7944 case CODE_LABEL:
7945 case LABEL_REF:
7946 case SYMBOL_REF:
7947 output_addr_const (file, x);
7948 break;
7949
7950 case CONST_INT:
7951 ival = INTVAL (x);
7952 switch (code)
7953 {
7954 case 0:
7955 break;
7956 case 'b':
7957 ival &= 0xff;
7958 break;
7959 case 'c':
7960 ival = ((ival & 0xff) ^ 0x80) - 0x80;
7961 break;
7962 case 'x':
7963 ival &= 0xffff;
7964 break;
7965 case 'h':
7966 ival = ((ival & 0xffff) ^ 0x8000) - 0x8000;
7967 break;
7968 case 'i':
7969 ival = s390_extract_part (x, HImode, 0);
7970 break;
7971 case 'j':
7972 ival = s390_extract_part (x, HImode, -1);
7973 break;
7974 case 'k':
7975 ival = s390_extract_part (x, SImode, 0);
7976 break;
7977 case 'm':
7978 ival = s390_extract_part (x, SImode, -1);
7979 break;
7980 case 'o':
7981 ival &= 0xffffffff;
7982 break;
7983 case 'e': case 'f':
7984 case 's': case 't':
7985 {
7986 int start, end;
7987 int len;
7988 bool ok;
7989
7990 len = (code == 's' || code == 'e' ? 64 : 32);
7991 ok = s390_contiguous_bitmask_p (ival, true, len, &start, &end);
7992 gcc_assert (ok);
7993 if (code == 's' || code == 't')
7994 ival = start;
7995 else
7996 ival = end;
7997 }
7998 break;
7999 default:
8000 output_operand_lossage ("invalid constant for output modifier '%c'", code);
8001 }
8002 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
8003 break;
8004
8005 case CONST_WIDE_INT:
8006 if (code == 'b')
8007 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
8008 CONST_WIDE_INT_ELT (x, 0) & 0xff);
8009 else if (code == 'x')
8010 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
8011 CONST_WIDE_INT_ELT (x, 0) & 0xffff);
8012 else if (code == 'h')
8013 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
8014 ((CONST_WIDE_INT_ELT (x, 0) & 0xffff) ^ 0x8000) - 0x8000);
8015 else
8016 {
8017 if (code == 0)
8018 output_operand_lossage ("invalid constant - try using "
8019 "an output modifier");
8020 else
8021 output_operand_lossage ("invalid constant for output modifier '%c'",
8022 code);
8023 }
8024 break;
8025 case CONST_VECTOR:
8026 switch (code)
8027 {
8028 case 'h':
8029 gcc_assert (const_vec_duplicate_p (x));
8030 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
8031 ((INTVAL (XVECEXP (x, 0, 0)) & 0xffff) ^ 0x8000) - 0x8000);
8032 break;
8033 case 'e':
8034 case 's':
8035 {
8036 int start, end;
8037 bool ok;
8038
8039 ok = s390_contiguous_bitmask_vector_p (x, &start, &end);
8040 gcc_assert (ok);
8041 ival = (code == 's') ? start : end;
8042 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
8043 }
8044 break;
8045 case 't':
8046 {
8047 unsigned mask;
8048 bool ok = s390_bytemask_vector_p (x, &mask);
8049 gcc_assert (ok);
8050 fprintf (file, "%u", mask);
8051 }
8052 break;
8053
8054 default:
8055 output_operand_lossage ("invalid constant vector for output "
8056 "modifier '%c'", code);
8057 }
8058 break;
8059
8060 default:
8061 if (code == 0)
8062 output_operand_lossage ("invalid expression - try using "
8063 "an output modifier");
8064 else
8065 output_operand_lossage ("invalid expression for output "
8066 "modifier '%c'", code);
8067 break;
8068 }
8069 }
8070
8071 /* Target hook for assembling integer objects. We need to define it
8072 here to work a round a bug in some versions of GAS, which couldn't
8073 handle values smaller than INT_MIN when printed in decimal. */
8074
8075 static bool
s390_assemble_integer(rtx x,unsigned int size,int aligned_p)8076 s390_assemble_integer (rtx x, unsigned int size, int aligned_p)
8077 {
8078 if (size == 8 && aligned_p
8079 && GET_CODE (x) == CONST_INT && INTVAL (x) < INT_MIN)
8080 {
8081 fprintf (asm_out_file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n",
8082 INTVAL (x));
8083 return true;
8084 }
8085 return default_assemble_integer (x, size, aligned_p);
8086 }
8087
8088 /* Returns true if register REGNO is used for forming
8089 a memory address in expression X. */
8090
8091 static bool
reg_used_in_mem_p(int regno,rtx x)8092 reg_used_in_mem_p (int regno, rtx x)
8093 {
8094 enum rtx_code code = GET_CODE (x);
8095 int i, j;
8096 const char *fmt;
8097
8098 if (code == MEM)
8099 {
8100 if (refers_to_regno_p (regno, XEXP (x, 0)))
8101 return true;
8102 }
8103 else if (code == SET
8104 && GET_CODE (SET_DEST (x)) == PC)
8105 {
8106 if (refers_to_regno_p (regno, SET_SRC (x)))
8107 return true;
8108 }
8109
8110 fmt = GET_RTX_FORMAT (code);
8111 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8112 {
8113 if (fmt[i] == 'e'
8114 && reg_used_in_mem_p (regno, XEXP (x, i)))
8115 return true;
8116
8117 else if (fmt[i] == 'E')
8118 for (j = 0; j < XVECLEN (x, i); j++)
8119 if (reg_used_in_mem_p (regno, XVECEXP (x, i, j)))
8120 return true;
8121 }
8122 return false;
8123 }
8124
8125 /* Returns true if expression DEP_RTX sets an address register
8126 used by instruction INSN to address memory. */
8127
8128 static bool
addr_generation_dependency_p(rtx dep_rtx,rtx_insn * insn)8129 addr_generation_dependency_p (rtx dep_rtx, rtx_insn *insn)
8130 {
8131 rtx target, pat;
8132
8133 if (NONJUMP_INSN_P (dep_rtx))
8134 dep_rtx = PATTERN (dep_rtx);
8135
8136 if (GET_CODE (dep_rtx) == SET)
8137 {
8138 target = SET_DEST (dep_rtx);
8139 if (GET_CODE (target) == STRICT_LOW_PART)
8140 target = XEXP (target, 0);
8141 while (GET_CODE (target) == SUBREG)
8142 target = SUBREG_REG (target);
8143
8144 if (GET_CODE (target) == REG)
8145 {
8146 int regno = REGNO (target);
8147
8148 if (s390_safe_attr_type (insn) == TYPE_LA)
8149 {
8150 pat = PATTERN (insn);
8151 if (GET_CODE (pat) == PARALLEL)
8152 {
8153 gcc_assert (XVECLEN (pat, 0) == 2);
8154 pat = XVECEXP (pat, 0, 0);
8155 }
8156 gcc_assert (GET_CODE (pat) == SET);
8157 return refers_to_regno_p (regno, SET_SRC (pat));
8158 }
8159 else if (get_attr_atype (insn) == ATYPE_AGEN)
8160 return reg_used_in_mem_p (regno, PATTERN (insn));
8161 }
8162 }
8163 return false;
8164 }
8165
8166 /* Return 1, if dep_insn sets register used in insn in the agen unit. */
8167
8168 int
s390_agen_dep_p(rtx_insn * dep_insn,rtx_insn * insn)8169 s390_agen_dep_p (rtx_insn *dep_insn, rtx_insn *insn)
8170 {
8171 rtx dep_rtx = PATTERN (dep_insn);
8172 int i;
8173
8174 if (GET_CODE (dep_rtx) == SET
8175 && addr_generation_dependency_p (dep_rtx, insn))
8176 return 1;
8177 else if (GET_CODE (dep_rtx) == PARALLEL)
8178 {
8179 for (i = 0; i < XVECLEN (dep_rtx, 0); i++)
8180 {
8181 if (addr_generation_dependency_p (XVECEXP (dep_rtx, 0, i), insn))
8182 return 1;
8183 }
8184 }
8185 return 0;
8186 }
8187
8188
8189 /* A C statement (sans semicolon) to update the integer scheduling priority
8190 INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier,
8191 reduce the priority to execute INSN later. Do not define this macro if
8192 you do not need to adjust the scheduling priorities of insns.
8193
8194 A STD instruction should be scheduled earlier,
8195 in order to use the bypass. */
8196 static int
s390_adjust_priority(rtx_insn * insn,int priority)8197 s390_adjust_priority (rtx_insn *insn, int priority)
8198 {
8199 if (! INSN_P (insn))
8200 return priority;
8201
8202 if (s390_tune <= PROCESSOR_2064_Z900)
8203 return priority;
8204
8205 switch (s390_safe_attr_type (insn))
8206 {
8207 case TYPE_FSTOREDF:
8208 case TYPE_FSTORESF:
8209 priority = priority << 3;
8210 break;
8211 case TYPE_STORE:
8212 case TYPE_STM:
8213 priority = priority << 1;
8214 break;
8215 default:
8216 break;
8217 }
8218 return priority;
8219 }
8220
8221
8222 /* The number of instructions that can be issued per cycle. */
8223
8224 static int
s390_issue_rate(void)8225 s390_issue_rate (void)
8226 {
8227 switch (s390_tune)
8228 {
8229 case PROCESSOR_2084_Z990:
8230 case PROCESSOR_2094_Z9_109:
8231 case PROCESSOR_2094_Z9_EC:
8232 case PROCESSOR_2817_Z196:
8233 return 3;
8234 case PROCESSOR_2097_Z10:
8235 return 2;
8236 case PROCESSOR_2064_Z900:
8237 /* Starting with EC12 we use the sched_reorder hook to take care
8238 of instruction dispatch constraints. The algorithm only
8239 picks the best instruction and assumes only a single
8240 instruction gets issued per cycle. */
8241 case PROCESSOR_2827_ZEC12:
8242 case PROCESSOR_2964_Z13:
8243 case PROCESSOR_3906_Z14:
8244 default:
8245 return 1;
8246 }
8247 }
8248
8249 static int
s390_first_cycle_multipass_dfa_lookahead(void)8250 s390_first_cycle_multipass_dfa_lookahead (void)
8251 {
8252 return 4;
8253 }
8254
8255 static void
annotate_constant_pool_refs_1(rtx * x)8256 annotate_constant_pool_refs_1 (rtx *x)
8257 {
8258 int i, j;
8259 const char *fmt;
8260
8261 gcc_assert (GET_CODE (*x) != SYMBOL_REF
8262 || !CONSTANT_POOL_ADDRESS_P (*x));
8263
8264 /* Literal pool references can only occur inside a MEM ... */
8265 if (GET_CODE (*x) == MEM)
8266 {
8267 rtx memref = XEXP (*x, 0);
8268
8269 if (GET_CODE (memref) == SYMBOL_REF
8270 && CONSTANT_POOL_ADDRESS_P (memref))
8271 {
8272 rtx base = cfun->machine->base_reg;
8273 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, memref, base),
8274 UNSPEC_LTREF);
8275
8276 *x = replace_equiv_address (*x, addr);
8277 return;
8278 }
8279
8280 if (GET_CODE (memref) == CONST
8281 && GET_CODE (XEXP (memref, 0)) == PLUS
8282 && GET_CODE (XEXP (XEXP (memref, 0), 1)) == CONST_INT
8283 && GET_CODE (XEXP (XEXP (memref, 0), 0)) == SYMBOL_REF
8284 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref, 0), 0)))
8285 {
8286 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (memref, 0), 1));
8287 rtx sym = XEXP (XEXP (memref, 0), 0);
8288 rtx base = cfun->machine->base_reg;
8289 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
8290 UNSPEC_LTREF);
8291
8292 *x = replace_equiv_address (*x, plus_constant (Pmode, addr, off));
8293 return;
8294 }
8295 }
8296
8297 /* ... or a load-address type pattern. */
8298 if (GET_CODE (*x) == SET)
8299 {
8300 rtx addrref = SET_SRC (*x);
8301
8302 if (GET_CODE (addrref) == SYMBOL_REF
8303 && CONSTANT_POOL_ADDRESS_P (addrref))
8304 {
8305 rtx base = cfun->machine->base_reg;
8306 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addrref, base),
8307 UNSPEC_LTREF);
8308
8309 SET_SRC (*x) = addr;
8310 return;
8311 }
8312
8313 if (GET_CODE (addrref) == CONST
8314 && GET_CODE (XEXP (addrref, 0)) == PLUS
8315 && GET_CODE (XEXP (XEXP (addrref, 0), 1)) == CONST_INT
8316 && GET_CODE (XEXP (XEXP (addrref, 0), 0)) == SYMBOL_REF
8317 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref, 0), 0)))
8318 {
8319 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (addrref, 0), 1));
8320 rtx sym = XEXP (XEXP (addrref, 0), 0);
8321 rtx base = cfun->machine->base_reg;
8322 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
8323 UNSPEC_LTREF);
8324
8325 SET_SRC (*x) = plus_constant (Pmode, addr, off);
8326 return;
8327 }
8328 }
8329
8330 fmt = GET_RTX_FORMAT (GET_CODE (*x));
8331 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8332 {
8333 if (fmt[i] == 'e')
8334 {
8335 annotate_constant_pool_refs_1 (&XEXP (*x, i));
8336 }
8337 else if (fmt[i] == 'E')
8338 {
8339 for (j = 0; j < XVECLEN (*x, i); j++)
8340 annotate_constant_pool_refs_1 (&XVECEXP (*x, i, j));
8341 }
8342 }
8343 }
8344
8345 /* Annotate every literal pool reference in INSN by an UNSPEC_LTREF expression.
8346 Fix up MEMs as required.
8347 Skip insns which support relative addressing, because they do not use a base
8348 register. */
8349
8350 static void
annotate_constant_pool_refs(rtx_insn * insn)8351 annotate_constant_pool_refs (rtx_insn *insn)
8352 {
8353 if (s390_safe_relative_long_p (insn))
8354 return;
8355 annotate_constant_pool_refs_1 (&PATTERN (insn));
8356 }
8357
8358 static void
find_constant_pool_ref_1(rtx x,rtx * ref)8359 find_constant_pool_ref_1 (rtx x, rtx *ref)
8360 {
8361 int i, j;
8362 const char *fmt;
8363
8364 /* Likewise POOL_ENTRY insns. */
8365 if (GET_CODE (x) == UNSPEC_VOLATILE
8366 && XINT (x, 1) == UNSPECV_POOL_ENTRY)
8367 return;
8368
8369 gcc_assert (GET_CODE (x) != SYMBOL_REF
8370 || !CONSTANT_POOL_ADDRESS_P (x));
8371
8372 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_LTREF)
8373 {
8374 rtx sym = XVECEXP (x, 0, 0);
8375 gcc_assert (GET_CODE (sym) == SYMBOL_REF
8376 && CONSTANT_POOL_ADDRESS_P (sym));
8377
8378 if (*ref == NULL_RTX)
8379 *ref = sym;
8380 else
8381 gcc_assert (*ref == sym);
8382
8383 return;
8384 }
8385
8386 fmt = GET_RTX_FORMAT (GET_CODE (x));
8387 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8388 {
8389 if (fmt[i] == 'e')
8390 {
8391 find_constant_pool_ref_1 (XEXP (x, i), ref);
8392 }
8393 else if (fmt[i] == 'E')
8394 {
8395 for (j = 0; j < XVECLEN (x, i); j++)
8396 find_constant_pool_ref_1 (XVECEXP (x, i, j), ref);
8397 }
8398 }
8399 }
8400
8401 /* Find an annotated literal pool symbol referenced in INSN,
8402 and store it at REF. Will abort if INSN contains references to
8403 more than one such pool symbol; multiple references to the same
8404 symbol are allowed, however.
8405
8406 The rtx pointed to by REF must be initialized to NULL_RTX
8407 by the caller before calling this routine.
8408
8409 Skip insns which support relative addressing, because they do not use a base
8410 register. */
8411
8412 static void
find_constant_pool_ref(rtx_insn * insn,rtx * ref)8413 find_constant_pool_ref (rtx_insn *insn, rtx *ref)
8414 {
8415 if (s390_safe_relative_long_p (insn))
8416 return;
8417 find_constant_pool_ref_1 (PATTERN (insn), ref);
8418 }
8419
8420 static void
replace_constant_pool_ref_1(rtx * x,rtx ref,rtx offset)8421 replace_constant_pool_ref_1 (rtx *x, rtx ref, rtx offset)
8422 {
8423 int i, j;
8424 const char *fmt;
8425
8426 gcc_assert (*x != ref);
8427
8428 if (GET_CODE (*x) == UNSPEC
8429 && XINT (*x, 1) == UNSPEC_LTREF
8430 && XVECEXP (*x, 0, 0) == ref)
8431 {
8432 *x = gen_rtx_PLUS (Pmode, XVECEXP (*x, 0, 1), offset);
8433 return;
8434 }
8435
8436 if (GET_CODE (*x) == PLUS
8437 && GET_CODE (XEXP (*x, 1)) == CONST_INT
8438 && GET_CODE (XEXP (*x, 0)) == UNSPEC
8439 && XINT (XEXP (*x, 0), 1) == UNSPEC_LTREF
8440 && XVECEXP (XEXP (*x, 0), 0, 0) == ref)
8441 {
8442 rtx addr = gen_rtx_PLUS (Pmode, XVECEXP (XEXP (*x, 0), 0, 1), offset);
8443 *x = plus_constant (Pmode, addr, INTVAL (XEXP (*x, 1)));
8444 return;
8445 }
8446
8447 fmt = GET_RTX_FORMAT (GET_CODE (*x));
8448 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8449 {
8450 if (fmt[i] == 'e')
8451 {
8452 replace_constant_pool_ref_1 (&XEXP (*x, i), ref, offset);
8453 }
8454 else if (fmt[i] == 'E')
8455 {
8456 for (j = 0; j < XVECLEN (*x, i); j++)
8457 replace_constant_pool_ref_1 (&XVECEXP (*x, i, j), ref, offset);
8458 }
8459 }
8460 }
8461
8462 /* Replace every reference to the annotated literal pool
8463 symbol REF in INSN by its base plus OFFSET.
8464 Skip insns which support relative addressing, because they do not use a base
8465 register. */
8466
8467 static void
replace_constant_pool_ref(rtx_insn * insn,rtx ref,rtx offset)8468 replace_constant_pool_ref (rtx_insn *insn, rtx ref, rtx offset)
8469 {
8470 if (s390_safe_relative_long_p (insn))
8471 return;
8472 replace_constant_pool_ref_1 (&PATTERN (insn), ref, offset);
8473 }
8474
8475 /* We keep a list of constants which we have to add to internal
8476 constant tables in the middle of large functions. */
8477
8478 #define NR_C_MODES 32
8479 machine_mode constant_modes[NR_C_MODES] =
8480 {
8481 TFmode, TImode, TDmode,
8482 V16QImode, V8HImode, V4SImode, V2DImode, V1TImode,
8483 V4SFmode, V2DFmode, V1TFmode,
8484 DFmode, DImode, DDmode,
8485 V8QImode, V4HImode, V2SImode, V1DImode, V2SFmode, V1DFmode,
8486 SFmode, SImode, SDmode,
8487 V4QImode, V2HImode, V1SImode, V1SFmode,
8488 HImode,
8489 V2QImode, V1HImode,
8490 QImode,
8491 V1QImode
8492 };
8493
8494 struct constant
8495 {
8496 struct constant *next;
8497 rtx value;
8498 rtx_code_label *label;
8499 };
8500
8501 struct constant_pool
8502 {
8503 struct constant_pool *next;
8504 rtx_insn *first_insn;
8505 rtx_insn *pool_insn;
8506 bitmap insns;
8507 rtx_insn *emit_pool_after;
8508
8509 struct constant *constants[NR_C_MODES];
8510 struct constant *execute;
8511 rtx_code_label *label;
8512 int size;
8513 };
8514
8515 /* Allocate new constant_pool structure. */
8516
8517 static struct constant_pool *
s390_alloc_pool(void)8518 s390_alloc_pool (void)
8519 {
8520 struct constant_pool *pool;
8521 int i;
8522
8523 pool = (struct constant_pool *) xmalloc (sizeof *pool);
8524 pool->next = NULL;
8525 for (i = 0; i < NR_C_MODES; i++)
8526 pool->constants[i] = NULL;
8527
8528 pool->execute = NULL;
8529 pool->label = gen_label_rtx ();
8530 pool->first_insn = NULL;
8531 pool->pool_insn = NULL;
8532 pool->insns = BITMAP_ALLOC (NULL);
8533 pool->size = 0;
8534 pool->emit_pool_after = NULL;
8535
8536 return pool;
8537 }
8538
8539 /* Create new constant pool covering instructions starting at INSN
8540 and chain it to the end of POOL_LIST. */
8541
8542 static struct constant_pool *
s390_start_pool(struct constant_pool ** pool_list,rtx_insn * insn)8543 s390_start_pool (struct constant_pool **pool_list, rtx_insn *insn)
8544 {
8545 struct constant_pool *pool, **prev;
8546
8547 pool = s390_alloc_pool ();
8548 pool->first_insn = insn;
8549
8550 for (prev = pool_list; *prev; prev = &(*prev)->next)
8551 ;
8552 *prev = pool;
8553
8554 return pool;
8555 }
8556
8557 /* End range of instructions covered by POOL at INSN and emit
8558 placeholder insn representing the pool. */
8559
8560 static void
s390_end_pool(struct constant_pool * pool,rtx_insn * insn)8561 s390_end_pool (struct constant_pool *pool, rtx_insn *insn)
8562 {
8563 rtx pool_size = GEN_INT (pool->size + 8 /* alignment slop */);
8564
8565 if (!insn)
8566 insn = get_last_insn ();
8567
8568 pool->pool_insn = emit_insn_after (gen_pool (pool_size), insn);
8569 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8570 }
8571
8572 /* Add INSN to the list of insns covered by POOL. */
8573
8574 static void
s390_add_pool_insn(struct constant_pool * pool,rtx insn)8575 s390_add_pool_insn (struct constant_pool *pool, rtx insn)
8576 {
8577 bitmap_set_bit (pool->insns, INSN_UID (insn));
8578 }
8579
8580 /* Return pool out of POOL_LIST that covers INSN. */
8581
8582 static struct constant_pool *
s390_find_pool(struct constant_pool * pool_list,rtx insn)8583 s390_find_pool (struct constant_pool *pool_list, rtx insn)
8584 {
8585 struct constant_pool *pool;
8586
8587 for (pool = pool_list; pool; pool = pool->next)
8588 if (bitmap_bit_p (pool->insns, INSN_UID (insn)))
8589 break;
8590
8591 return pool;
8592 }
8593
8594 /* Add constant VAL of mode MODE to the constant pool POOL. */
8595
8596 static void
s390_add_constant(struct constant_pool * pool,rtx val,machine_mode mode)8597 s390_add_constant (struct constant_pool *pool, rtx val, machine_mode mode)
8598 {
8599 struct constant *c;
8600 int i;
8601
8602 for (i = 0; i < NR_C_MODES; i++)
8603 if (constant_modes[i] == mode)
8604 break;
8605 gcc_assert (i != NR_C_MODES);
8606
8607 for (c = pool->constants[i]; c != NULL; c = c->next)
8608 if (rtx_equal_p (val, c->value))
8609 break;
8610
8611 if (c == NULL)
8612 {
8613 c = (struct constant *) xmalloc (sizeof *c);
8614 c->value = val;
8615 c->label = gen_label_rtx ();
8616 c->next = pool->constants[i];
8617 pool->constants[i] = c;
8618 pool->size += GET_MODE_SIZE (mode);
8619 }
8620 }
8621
8622 /* Return an rtx that represents the offset of X from the start of
8623 pool POOL. */
8624
8625 static rtx
s390_pool_offset(struct constant_pool * pool,rtx x)8626 s390_pool_offset (struct constant_pool *pool, rtx x)
8627 {
8628 rtx label;
8629
8630 label = gen_rtx_LABEL_REF (GET_MODE (x), pool->label);
8631 x = gen_rtx_UNSPEC (GET_MODE (x), gen_rtvec (2, x, label),
8632 UNSPEC_POOL_OFFSET);
8633 return gen_rtx_CONST (GET_MODE (x), x);
8634 }
8635
8636 /* Find constant VAL of mode MODE in the constant pool POOL.
8637 Return an RTX describing the distance from the start of
8638 the pool to the location of the new constant. */
8639
8640 static rtx
s390_find_constant(struct constant_pool * pool,rtx val,machine_mode mode)8641 s390_find_constant (struct constant_pool *pool, rtx val,
8642 machine_mode mode)
8643 {
8644 struct constant *c;
8645 int i;
8646
8647 for (i = 0; i < NR_C_MODES; i++)
8648 if (constant_modes[i] == mode)
8649 break;
8650 gcc_assert (i != NR_C_MODES);
8651
8652 for (c = pool->constants[i]; c != NULL; c = c->next)
8653 if (rtx_equal_p (val, c->value))
8654 break;
8655
8656 gcc_assert (c);
8657
8658 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8659 }
8660
8661 /* Check whether INSN is an execute. Return the label_ref to its
8662 execute target template if so, NULL_RTX otherwise. */
8663
8664 static rtx
s390_execute_label(rtx insn)8665 s390_execute_label (rtx insn)
8666 {
8667 if (INSN_P (insn)
8668 && GET_CODE (PATTERN (insn)) == PARALLEL
8669 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
8670 && (XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE
8671 || XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE_JUMP))
8672 {
8673 if (XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE)
8674 return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2);
8675 else
8676 {
8677 gcc_assert (JUMP_P (insn));
8678 /* For jump insns as execute target:
8679 - There is one operand less in the parallel (the
8680 modification register of the execute is always 0).
8681 - The execute target label is wrapped into an
8682 if_then_else in order to hide it from jump analysis. */
8683 return XEXP (XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 0), 0);
8684 }
8685 }
8686
8687 return NULL_RTX;
8688 }
8689
8690 /* Find execute target for INSN in the constant pool POOL.
8691 Return an RTX describing the distance from the start of
8692 the pool to the location of the execute target. */
8693
8694 static rtx
s390_find_execute(struct constant_pool * pool,rtx insn)8695 s390_find_execute (struct constant_pool *pool, rtx insn)
8696 {
8697 struct constant *c;
8698
8699 for (c = pool->execute; c != NULL; c = c->next)
8700 if (INSN_UID (insn) == INSN_UID (c->value))
8701 break;
8702
8703 gcc_assert (c);
8704
8705 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8706 }
8707
8708 /* For an execute INSN, extract the execute target template. */
8709
8710 static rtx
s390_execute_target(rtx insn)8711 s390_execute_target (rtx insn)
8712 {
8713 rtx pattern = PATTERN (insn);
8714 gcc_assert (s390_execute_label (insn));
8715
8716 if (XVECLEN (pattern, 0) == 2)
8717 {
8718 pattern = copy_rtx (XVECEXP (pattern, 0, 1));
8719 }
8720 else
8721 {
8722 rtvec vec = rtvec_alloc (XVECLEN (pattern, 0) - 1);
8723 int i;
8724
8725 for (i = 0; i < XVECLEN (pattern, 0) - 1; i++)
8726 RTVEC_ELT (vec, i) = copy_rtx (XVECEXP (pattern, 0, i + 1));
8727
8728 pattern = gen_rtx_PARALLEL (VOIDmode, vec);
8729 }
8730
8731 return pattern;
8732 }
8733
8734 /* Indicate that INSN cannot be duplicated. This is the case for
8735 execute insns that carry a unique label. */
8736
8737 static bool
s390_cannot_copy_insn_p(rtx_insn * insn)8738 s390_cannot_copy_insn_p (rtx_insn *insn)
8739 {
8740 rtx label = s390_execute_label (insn);
8741 return label && label != const0_rtx;
8742 }
8743
8744 /* Dump out the constants in POOL. If REMOTE_LABEL is true,
8745 do not emit the pool base label. */
8746
8747 static void
s390_dump_pool(struct constant_pool * pool,bool remote_label)8748 s390_dump_pool (struct constant_pool *pool, bool remote_label)
8749 {
8750 struct constant *c;
8751 rtx_insn *insn = pool->pool_insn;
8752 int i;
8753
8754 /* Switch to rodata section. */
8755 insn = emit_insn_after (gen_pool_section_start (), insn);
8756 INSN_ADDRESSES_NEW (insn, -1);
8757
8758 /* Ensure minimum pool alignment. */
8759 insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn);
8760 INSN_ADDRESSES_NEW (insn, -1);
8761
8762 /* Emit pool base label. */
8763 if (!remote_label)
8764 {
8765 insn = emit_label_after (pool->label, insn);
8766 INSN_ADDRESSES_NEW (insn, -1);
8767 }
8768
8769 /* Dump constants in descending alignment requirement order,
8770 ensuring proper alignment for every constant. */
8771 for (i = 0; i < NR_C_MODES; i++)
8772 for (c = pool->constants[i]; c; c = c->next)
8773 {
8774 /* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references. */
8775 rtx value = copy_rtx (c->value);
8776 if (GET_CODE (value) == CONST
8777 && GET_CODE (XEXP (value, 0)) == UNSPEC
8778 && XINT (XEXP (value, 0), 1) == UNSPEC_LTREL_OFFSET
8779 && XVECLEN (XEXP (value, 0), 0) == 1)
8780 value = s390_pool_offset (pool, XVECEXP (XEXP (value, 0), 0, 0));
8781
8782 insn = emit_label_after (c->label, insn);
8783 INSN_ADDRESSES_NEW (insn, -1);
8784
8785 value = gen_rtx_UNSPEC_VOLATILE (constant_modes[i],
8786 gen_rtvec (1, value),
8787 UNSPECV_POOL_ENTRY);
8788 insn = emit_insn_after (value, insn);
8789 INSN_ADDRESSES_NEW (insn, -1);
8790 }
8791
8792 /* Ensure minimum alignment for instructions. */
8793 insn = emit_insn_after (gen_pool_align (GEN_INT (2)), insn);
8794 INSN_ADDRESSES_NEW (insn, -1);
8795
8796 /* Output in-pool execute template insns. */
8797 for (c = pool->execute; c; c = c->next)
8798 {
8799 insn = emit_label_after (c->label, insn);
8800 INSN_ADDRESSES_NEW (insn, -1);
8801
8802 insn = emit_insn_after (s390_execute_target (c->value), insn);
8803 INSN_ADDRESSES_NEW (insn, -1);
8804 }
8805
8806 /* Switch back to previous section. */
8807 insn = emit_insn_after (gen_pool_section_end (), insn);
8808 INSN_ADDRESSES_NEW (insn, -1);
8809
8810 insn = emit_barrier_after (insn);
8811 INSN_ADDRESSES_NEW (insn, -1);
8812
8813 /* Remove placeholder insn. */
8814 remove_insn (pool->pool_insn);
8815 }
8816
8817 /* Free all memory used by POOL. */
8818
8819 static void
s390_free_pool(struct constant_pool * pool)8820 s390_free_pool (struct constant_pool *pool)
8821 {
8822 struct constant *c, *next;
8823 int i;
8824
8825 for (i = 0; i < NR_C_MODES; i++)
8826 for (c = pool->constants[i]; c; c = next)
8827 {
8828 next = c->next;
8829 free (c);
8830 }
8831
8832 for (c = pool->execute; c; c = next)
8833 {
8834 next = c->next;
8835 free (c);
8836 }
8837
8838 BITMAP_FREE (pool->insns);
8839 free (pool);
8840 }
8841
8842
8843 /* Collect main literal pool. Return NULL on overflow. */
8844
8845 static struct constant_pool *
s390_mainpool_start(void)8846 s390_mainpool_start (void)
8847 {
8848 struct constant_pool *pool;
8849 rtx_insn *insn;
8850
8851 pool = s390_alloc_pool ();
8852
8853 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8854 {
8855 if (NONJUMP_INSN_P (insn)
8856 && GET_CODE (PATTERN (insn)) == SET
8857 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC_VOLATILE
8858 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPECV_MAIN_POOL)
8859 {
8860 /* There might be two main_pool instructions if base_reg
8861 is call-clobbered; one for shrink-wrapped code and one
8862 for the rest. We want to keep the first. */
8863 if (pool->pool_insn)
8864 {
8865 insn = PREV_INSN (insn);
8866 delete_insn (NEXT_INSN (insn));
8867 continue;
8868 }
8869 pool->pool_insn = insn;
8870 }
8871
8872 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8873 {
8874 rtx pool_ref = NULL_RTX;
8875 find_constant_pool_ref (insn, &pool_ref);
8876 if (pool_ref)
8877 {
8878 rtx constant = get_pool_constant (pool_ref);
8879 machine_mode mode = get_pool_mode (pool_ref);
8880 s390_add_constant (pool, constant, mode);
8881 }
8882 }
8883
8884 /* If hot/cold partitioning is enabled we have to make sure that
8885 the literal pool is emitted in the same section where the
8886 initialization of the literal pool base pointer takes place.
8887 emit_pool_after is only used in the non-overflow case on non
8888 Z cpus where we can emit the literal pool at the end of the
8889 function body within the text section. */
8890 if (NOTE_P (insn)
8891 && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS
8892 && !pool->emit_pool_after)
8893 pool->emit_pool_after = PREV_INSN (insn);
8894 }
8895
8896 gcc_assert (pool->pool_insn || pool->size == 0);
8897
8898 if (pool->size >= 4096)
8899 {
8900 /* We're going to chunkify the pool, so remove the main
8901 pool placeholder insn. */
8902 remove_insn (pool->pool_insn);
8903
8904 s390_free_pool (pool);
8905 pool = NULL;
8906 }
8907
8908 /* If the functions ends with the section where the literal pool
8909 should be emitted set the marker to its end. */
8910 if (pool && !pool->emit_pool_after)
8911 pool->emit_pool_after = get_last_insn ();
8912
8913 return pool;
8914 }
8915
8916 /* POOL holds the main literal pool as collected by s390_mainpool_start.
8917 Modify the current function to output the pool constants as well as
8918 the pool register setup instruction. */
8919
8920 static void
s390_mainpool_finish(struct constant_pool * pool)8921 s390_mainpool_finish (struct constant_pool *pool)
8922 {
8923 rtx base_reg = cfun->machine->base_reg;
8924 rtx set;
8925 rtx_insn *insn;
8926
8927 /* If the pool is empty, we're done. */
8928 if (pool->size == 0)
8929 {
8930 /* We don't actually need a base register after all. */
8931 cfun->machine->base_reg = NULL_RTX;
8932
8933 if (pool->pool_insn)
8934 remove_insn (pool->pool_insn);
8935 s390_free_pool (pool);
8936 return;
8937 }
8938
8939 /* We need correct insn addresses. */
8940 shorten_branches (get_insns ());
8941
8942 /* Use a LARL to load the pool register. The pool is
8943 located in the .rodata section, so we emit it after the function. */
8944 set = gen_main_base_64 (base_reg, pool->label);
8945 insn = emit_insn_after (set, pool->pool_insn);
8946 INSN_ADDRESSES_NEW (insn, -1);
8947 remove_insn (pool->pool_insn);
8948
8949 insn = get_last_insn ();
8950 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
8951 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8952
8953 s390_dump_pool (pool, 0);
8954
8955 /* Replace all literal pool references. */
8956
8957 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
8958 {
8959 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8960 {
8961 rtx addr, pool_ref = NULL_RTX;
8962 find_constant_pool_ref (insn, &pool_ref);
8963 if (pool_ref)
8964 {
8965 if (s390_execute_label (insn))
8966 addr = s390_find_execute (pool, insn);
8967 else
8968 addr = s390_find_constant (pool, get_pool_constant (pool_ref),
8969 get_pool_mode (pool_ref));
8970
8971 replace_constant_pool_ref (insn, pool_ref, addr);
8972 INSN_CODE (insn) = -1;
8973 }
8974 }
8975 }
8976
8977
8978 /* Free the pool. */
8979 s390_free_pool (pool);
8980 }
8981
8982 /* Chunkify the literal pool. */
8983
8984 #define S390_POOL_CHUNK_MIN 0xc00
8985 #define S390_POOL_CHUNK_MAX 0xe00
8986
8987 static struct constant_pool *
s390_chunkify_start(void)8988 s390_chunkify_start (void)
8989 {
8990 struct constant_pool *curr_pool = NULL, *pool_list = NULL;
8991 bitmap far_labels;
8992 rtx_insn *insn;
8993
8994 /* We need correct insn addresses. */
8995
8996 shorten_branches (get_insns ());
8997
8998 /* Scan all insns and move literals to pool chunks. */
8999
9000 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9001 {
9002 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9003 {
9004 rtx pool_ref = NULL_RTX;
9005 find_constant_pool_ref (insn, &pool_ref);
9006 if (pool_ref)
9007 {
9008 rtx constant = get_pool_constant (pool_ref);
9009 machine_mode mode = get_pool_mode (pool_ref);
9010
9011 if (!curr_pool)
9012 curr_pool = s390_start_pool (&pool_list, insn);
9013
9014 s390_add_constant (curr_pool, constant, mode);
9015 s390_add_pool_insn (curr_pool, insn);
9016 }
9017 }
9018
9019 if (JUMP_P (insn) || JUMP_TABLE_DATA_P (insn) || LABEL_P (insn))
9020 {
9021 if (curr_pool)
9022 s390_add_pool_insn (curr_pool, insn);
9023 }
9024
9025 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_VAR_LOCATION)
9026 continue;
9027
9028 if (!curr_pool
9029 || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn)
9030 || INSN_ADDRESSES (INSN_UID (insn)) == -1)
9031 continue;
9032
9033 if (curr_pool->size < S390_POOL_CHUNK_MAX)
9034 continue;
9035
9036 s390_end_pool (curr_pool, NULL);
9037 curr_pool = NULL;
9038 }
9039
9040 if (curr_pool)
9041 s390_end_pool (curr_pool, NULL);
9042
9043 /* Find all labels that are branched into
9044 from an insn belonging to a different chunk. */
9045
9046 far_labels = BITMAP_ALLOC (NULL);
9047
9048 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9049 {
9050 rtx_jump_table_data *table;
9051
9052 /* Labels marked with LABEL_PRESERVE_P can be target
9053 of non-local jumps, so we have to mark them.
9054 The same holds for named labels.
9055
9056 Don't do that, however, if it is the label before
9057 a jump table. */
9058
9059 if (LABEL_P (insn)
9060 && (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn)))
9061 {
9062 rtx_insn *vec_insn = NEXT_INSN (insn);
9063 if (! vec_insn || ! JUMP_TABLE_DATA_P (vec_insn))
9064 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn));
9065 }
9066 /* Check potential targets in a table jump (casesi_jump). */
9067 else if (tablejump_p (insn, NULL, &table))
9068 {
9069 rtx vec_pat = PATTERN (table);
9070 int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
9071
9072 for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
9073 {
9074 rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
9075
9076 if (s390_find_pool (pool_list, label)
9077 != s390_find_pool (pool_list, insn))
9078 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
9079 }
9080 }
9081 /* If we have a direct jump (conditional or unconditional),
9082 check all potential targets. */
9083 else if (JUMP_P (insn))
9084 {
9085 rtx pat = PATTERN (insn);
9086
9087 if (GET_CODE (pat) == PARALLEL)
9088 pat = XVECEXP (pat, 0, 0);
9089
9090 if (GET_CODE (pat) == SET)
9091 {
9092 rtx label = JUMP_LABEL (insn);
9093 if (label && !ANY_RETURN_P (label))
9094 {
9095 if (s390_find_pool (pool_list, label)
9096 != s390_find_pool (pool_list, insn))
9097 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
9098 }
9099 }
9100 }
9101 }
9102
9103 /* Insert base register reload insns before every pool. */
9104
9105 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9106 {
9107 rtx new_insn = gen_reload_base_64 (cfun->machine->base_reg,
9108 curr_pool->label);
9109 rtx_insn *insn = curr_pool->first_insn;
9110 INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1);
9111 }
9112
9113 /* Insert base register reload insns at every far label. */
9114
9115 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9116 if (LABEL_P (insn)
9117 && bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn)))
9118 {
9119 struct constant_pool *pool = s390_find_pool (pool_list, insn);
9120 if (pool)
9121 {
9122 rtx new_insn = gen_reload_base_64 (cfun->machine->base_reg,
9123 pool->label);
9124 INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1);
9125 }
9126 }
9127
9128
9129 BITMAP_FREE (far_labels);
9130
9131
9132 /* Recompute insn addresses. */
9133
9134 init_insn_lengths ();
9135 shorten_branches (get_insns ());
9136
9137 return pool_list;
9138 }
9139
9140 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
9141 After we have decided to use this list, finish implementing
9142 all changes to the current function as required. */
9143
9144 static void
s390_chunkify_finish(struct constant_pool * pool_list)9145 s390_chunkify_finish (struct constant_pool *pool_list)
9146 {
9147 struct constant_pool *curr_pool = NULL;
9148 rtx_insn *insn;
9149
9150
9151 /* Replace all literal pool references. */
9152
9153 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9154 {
9155 curr_pool = s390_find_pool (pool_list, insn);
9156 if (!curr_pool)
9157 continue;
9158
9159 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9160 {
9161 rtx addr, pool_ref = NULL_RTX;
9162 find_constant_pool_ref (insn, &pool_ref);
9163 if (pool_ref)
9164 {
9165 if (s390_execute_label (insn))
9166 addr = s390_find_execute (curr_pool, insn);
9167 else
9168 addr = s390_find_constant (curr_pool,
9169 get_pool_constant (pool_ref),
9170 get_pool_mode (pool_ref));
9171
9172 replace_constant_pool_ref (insn, pool_ref, addr);
9173 INSN_CODE (insn) = -1;
9174 }
9175 }
9176 }
9177
9178 /* Dump out all literal pools. */
9179
9180 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9181 s390_dump_pool (curr_pool, 0);
9182
9183 /* Free pool list. */
9184
9185 while (pool_list)
9186 {
9187 struct constant_pool *next = pool_list->next;
9188 s390_free_pool (pool_list);
9189 pool_list = next;
9190 }
9191 }
9192
9193 /* Output the constant pool entry EXP in mode MODE with alignment ALIGN. */
9194
9195 void
s390_output_pool_entry(rtx exp,machine_mode mode,unsigned int align)9196 s390_output_pool_entry (rtx exp, machine_mode mode, unsigned int align)
9197 {
9198 switch (GET_MODE_CLASS (mode))
9199 {
9200 case MODE_FLOAT:
9201 case MODE_DECIMAL_FLOAT:
9202 gcc_assert (GET_CODE (exp) == CONST_DOUBLE);
9203
9204 assemble_real (*CONST_DOUBLE_REAL_VALUE (exp),
9205 as_a <scalar_float_mode> (mode), align);
9206 break;
9207
9208 case MODE_INT:
9209 assemble_integer (exp, GET_MODE_SIZE (mode), align, 1);
9210 mark_symbol_refs_as_used (exp);
9211 break;
9212
9213 case MODE_VECTOR_INT:
9214 case MODE_VECTOR_FLOAT:
9215 {
9216 int i;
9217 machine_mode inner_mode;
9218 gcc_assert (GET_CODE (exp) == CONST_VECTOR);
9219
9220 inner_mode = GET_MODE_INNER (GET_MODE (exp));
9221 for (i = 0; i < XVECLEN (exp, 0); i++)
9222 s390_output_pool_entry (XVECEXP (exp, 0, i),
9223 inner_mode,
9224 i == 0
9225 ? align
9226 : GET_MODE_BITSIZE (inner_mode));
9227 }
9228 break;
9229
9230 default:
9231 gcc_unreachable ();
9232 }
9233 }
9234
9235
9236 /* Return an RTL expression representing the value of the return address
9237 for the frame COUNT steps up from the current frame. FRAME is the
9238 frame pointer of that frame. */
9239
9240 rtx
s390_return_addr_rtx(int count,rtx frame ATTRIBUTE_UNUSED)9241 s390_return_addr_rtx (int count, rtx frame ATTRIBUTE_UNUSED)
9242 {
9243 int offset;
9244 rtx addr;
9245
9246 /* Without backchain, we fail for all but the current frame. */
9247
9248 if (!TARGET_BACKCHAIN && count > 0)
9249 return NULL_RTX;
9250
9251 /* For the current frame, we need to make sure the initial
9252 value of RETURN_REGNUM is actually saved. */
9253
9254 if (count == 0)
9255 return get_hard_reg_initial_val (Pmode, RETURN_REGNUM);
9256
9257 if (TARGET_PACKED_STACK)
9258 offset = -2 * UNITS_PER_LONG;
9259 else
9260 offset = RETURN_REGNUM * UNITS_PER_LONG;
9261
9262 addr = plus_constant (Pmode, frame, offset);
9263 addr = memory_address (Pmode, addr);
9264 return gen_rtx_MEM (Pmode, addr);
9265 }
9266
9267 /* Return an RTL expression representing the back chain stored in
9268 the current stack frame. */
9269
9270 rtx
s390_back_chain_rtx(void)9271 s390_back_chain_rtx (void)
9272 {
9273 rtx chain;
9274
9275 gcc_assert (TARGET_BACKCHAIN);
9276
9277 if (TARGET_PACKED_STACK)
9278 chain = plus_constant (Pmode, stack_pointer_rtx,
9279 STACK_POINTER_OFFSET - UNITS_PER_LONG);
9280 else
9281 chain = stack_pointer_rtx;
9282
9283 chain = gen_rtx_MEM (Pmode, chain);
9284 return chain;
9285 }
9286
9287 /* Find first call clobbered register unused in a function.
9288 This could be used as base register in a leaf function
9289 or for holding the return address before epilogue. */
9290
9291 static int
find_unused_clobbered_reg(void)9292 find_unused_clobbered_reg (void)
9293 {
9294 int i;
9295 for (i = 0; i < 6; i++)
9296 if (!df_regs_ever_live_p (i))
9297 return i;
9298 return 0;
9299 }
9300
9301
9302 /* Helper function for s390_regs_ever_clobbered. Sets the fields in DATA for all
9303 clobbered hard regs in SETREG. */
9304
9305 static void
s390_reg_clobbered_rtx(rtx setreg,const_rtx set_insn ATTRIBUTE_UNUSED,void * data)9306 s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data)
9307 {
9308 char *regs_ever_clobbered = (char *)data;
9309 unsigned int i, regno;
9310 machine_mode mode = GET_MODE (setreg);
9311
9312 if (GET_CODE (setreg) == SUBREG)
9313 {
9314 rtx inner = SUBREG_REG (setreg);
9315 if (!GENERAL_REG_P (inner) && !FP_REG_P (inner))
9316 return;
9317 regno = subreg_regno (setreg);
9318 }
9319 else if (GENERAL_REG_P (setreg) || FP_REG_P (setreg))
9320 regno = REGNO (setreg);
9321 else
9322 return;
9323
9324 for (i = regno;
9325 i < end_hard_regno (mode, regno);
9326 i++)
9327 regs_ever_clobbered[i] = 1;
9328 }
9329
9330 /* Walks through all basic blocks of the current function looking
9331 for clobbered hard regs using s390_reg_clobbered_rtx. The fields
9332 of the passed integer array REGS_EVER_CLOBBERED are set to one for
9333 each of those regs. */
9334
9335 static void
s390_regs_ever_clobbered(char regs_ever_clobbered[])9336 s390_regs_ever_clobbered (char regs_ever_clobbered[])
9337 {
9338 basic_block cur_bb;
9339 rtx_insn *cur_insn;
9340 unsigned int i;
9341
9342 memset (regs_ever_clobbered, 0, 32);
9343
9344 /* For non-leaf functions we have to consider all call clobbered regs to be
9345 clobbered. */
9346 if (!crtl->is_leaf)
9347 {
9348 for (i = 0; i < 32; i++)
9349 regs_ever_clobbered[i] = call_really_used_regs[i];
9350 }
9351
9352 /* Make the "magic" eh_return registers live if necessary. For regs_ever_live
9353 this work is done by liveness analysis (mark_regs_live_at_end).
9354 Special care is needed for functions containing landing pads. Landing pads
9355 may use the eh registers, but the code which sets these registers is not
9356 contained in that function. Hence s390_regs_ever_clobbered is not able to
9357 deal with this automatically. */
9358 if (crtl->calls_eh_return || cfun->machine->has_landing_pad_p)
9359 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM ; i++)
9360 if (crtl->calls_eh_return
9361 || (cfun->machine->has_landing_pad_p
9362 && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i))))
9363 regs_ever_clobbered[EH_RETURN_DATA_REGNO (i)] = 1;
9364
9365 /* For nonlocal gotos all call-saved registers have to be saved.
9366 This flag is also set for the unwinding code in libgcc.
9367 See expand_builtin_unwind_init. For regs_ever_live this is done by
9368 reload. */
9369 if (crtl->saves_all_registers)
9370 for (i = 0; i < 32; i++)
9371 if (!call_really_used_regs[i])
9372 regs_ever_clobbered[i] = 1;
9373
9374 FOR_EACH_BB_FN (cur_bb, cfun)
9375 {
9376 FOR_BB_INSNS (cur_bb, cur_insn)
9377 {
9378 rtx pat;
9379
9380 if (!INSN_P (cur_insn))
9381 continue;
9382
9383 pat = PATTERN (cur_insn);
9384
9385 /* Ignore GPR restore insns. */
9386 if (epilogue_completed && RTX_FRAME_RELATED_P (cur_insn))
9387 {
9388 if (GET_CODE (pat) == SET
9389 && GENERAL_REG_P (SET_DEST (pat)))
9390 {
9391 /* lgdr */
9392 if (GET_MODE (SET_SRC (pat)) == DImode
9393 && FP_REG_P (SET_SRC (pat)))
9394 continue;
9395
9396 /* l / lg */
9397 if (GET_CODE (SET_SRC (pat)) == MEM)
9398 continue;
9399 }
9400
9401 /* lm / lmg */
9402 if (GET_CODE (pat) == PARALLEL
9403 && load_multiple_operation (pat, VOIDmode))
9404 continue;
9405 }
9406
9407 note_stores (pat,
9408 s390_reg_clobbered_rtx,
9409 regs_ever_clobbered);
9410 }
9411 }
9412 }
9413
9414 /* Determine the frame area which actually has to be accessed
9415 in the function epilogue. The values are stored at the
9416 given pointers AREA_BOTTOM (address of the lowest used stack
9417 address) and AREA_TOP (address of the first item which does
9418 not belong to the stack frame). */
9419
9420 static void
s390_frame_area(int * area_bottom,int * area_top)9421 s390_frame_area (int *area_bottom, int *area_top)
9422 {
9423 int b, t;
9424
9425 b = INT_MAX;
9426 t = INT_MIN;
9427
9428 if (cfun_frame_layout.first_restore_gpr != -1)
9429 {
9430 b = (cfun_frame_layout.gprs_offset
9431 + cfun_frame_layout.first_restore_gpr * UNITS_PER_LONG);
9432 t = b + (cfun_frame_layout.last_restore_gpr
9433 - cfun_frame_layout.first_restore_gpr + 1) * UNITS_PER_LONG;
9434 }
9435
9436 if (TARGET_64BIT && cfun_save_high_fprs_p)
9437 {
9438 b = MIN (b, cfun_frame_layout.f8_offset);
9439 t = MAX (t, (cfun_frame_layout.f8_offset
9440 + cfun_frame_layout.high_fprs * 8));
9441 }
9442
9443 if (!TARGET_64BIT)
9444 {
9445 if (cfun_fpr_save_p (FPR4_REGNUM))
9446 {
9447 b = MIN (b, cfun_frame_layout.f4_offset);
9448 t = MAX (t, cfun_frame_layout.f4_offset + 8);
9449 }
9450 if (cfun_fpr_save_p (FPR6_REGNUM))
9451 {
9452 b = MIN (b, cfun_frame_layout.f4_offset + 8);
9453 t = MAX (t, cfun_frame_layout.f4_offset + 16);
9454 }
9455 }
9456 *area_bottom = b;
9457 *area_top = t;
9458 }
9459 /* Update gpr_save_slots in the frame layout trying to make use of
9460 FPRs as GPR save slots.
9461 This is a helper routine of s390_register_info. */
9462
9463 static void
s390_register_info_gprtofpr()9464 s390_register_info_gprtofpr ()
9465 {
9466 int save_reg_slot = FPR0_REGNUM;
9467 int i, j;
9468
9469 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
9470 return;
9471
9472 /* builtin_eh_return needs to be able to modify the return address
9473 on the stack. It could also adjust the FPR save slot instead but
9474 is it worth the trouble?! */
9475 if (crtl->calls_eh_return)
9476 return;
9477
9478 for (i = 15; i >= 6; i--)
9479 {
9480 if (cfun_gpr_save_slot (i) == SAVE_SLOT_NONE)
9481 continue;
9482
9483 /* Advance to the next FP register which can be used as a
9484 GPR save slot. */
9485 while ((!call_really_used_regs[save_reg_slot]
9486 || df_regs_ever_live_p (save_reg_slot)
9487 || cfun_fpr_save_p (save_reg_slot))
9488 && FP_REGNO_P (save_reg_slot))
9489 save_reg_slot++;
9490 if (!FP_REGNO_P (save_reg_slot))
9491 {
9492 /* We only want to use ldgr/lgdr if we can get rid of
9493 stm/lm entirely. So undo the gpr slot allocation in
9494 case we ran out of FPR save slots. */
9495 for (j = 6; j <= 15; j++)
9496 if (FP_REGNO_P (cfun_gpr_save_slot (j)))
9497 cfun_gpr_save_slot (j) = SAVE_SLOT_STACK;
9498 break;
9499 }
9500 cfun_gpr_save_slot (i) = save_reg_slot++;
9501 }
9502 }
9503
9504 /* Set the bits in fpr_bitmap for FPRs which need to be saved due to
9505 stdarg.
9506 This is a helper routine for s390_register_info. */
9507
9508 static void
s390_register_info_stdarg_fpr()9509 s390_register_info_stdarg_fpr ()
9510 {
9511 int i;
9512 int min_fpr;
9513 int max_fpr;
9514
9515 /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and
9516 f0-f4 for 64 bit. */
9517 if (!cfun->stdarg
9518 || !TARGET_HARD_FLOAT
9519 || !cfun->va_list_fpr_size
9520 || crtl->args.info.fprs >= FP_ARG_NUM_REG)
9521 return;
9522
9523 min_fpr = crtl->args.info.fprs;
9524 max_fpr = min_fpr + cfun->va_list_fpr_size - 1;
9525 if (max_fpr >= FP_ARG_NUM_REG)
9526 max_fpr = FP_ARG_NUM_REG - 1;
9527
9528 /* FPR argument regs start at f0. */
9529 min_fpr += FPR0_REGNUM;
9530 max_fpr += FPR0_REGNUM;
9531
9532 for (i = min_fpr; i <= max_fpr; i++)
9533 cfun_set_fpr_save (i);
9534 }
9535
9536 /* Reserve the GPR save slots for GPRs which need to be saved due to
9537 stdarg.
9538 This is a helper routine for s390_register_info. */
9539
9540 static void
s390_register_info_stdarg_gpr()9541 s390_register_info_stdarg_gpr ()
9542 {
9543 int i;
9544 int min_gpr;
9545 int max_gpr;
9546
9547 if (!cfun->stdarg
9548 || !cfun->va_list_gpr_size
9549 || crtl->args.info.gprs >= GP_ARG_NUM_REG)
9550 return;
9551
9552 min_gpr = crtl->args.info.gprs;
9553 max_gpr = min_gpr + cfun->va_list_gpr_size - 1;
9554 if (max_gpr >= GP_ARG_NUM_REG)
9555 max_gpr = GP_ARG_NUM_REG - 1;
9556
9557 /* GPR argument regs start at r2. */
9558 min_gpr += GPR2_REGNUM;
9559 max_gpr += GPR2_REGNUM;
9560
9561 /* If r6 was supposed to be saved into an FPR and now needs to go to
9562 the stack for vararg we have to adjust the restore range to make
9563 sure that the restore is done from stack as well. */
9564 if (FP_REGNO_P (cfun_gpr_save_slot (GPR6_REGNUM))
9565 && min_gpr <= GPR6_REGNUM
9566 && max_gpr >= GPR6_REGNUM)
9567 {
9568 if (cfun_frame_layout.first_restore_gpr == -1
9569 || cfun_frame_layout.first_restore_gpr > GPR6_REGNUM)
9570 cfun_frame_layout.first_restore_gpr = GPR6_REGNUM;
9571 if (cfun_frame_layout.last_restore_gpr == -1
9572 || cfun_frame_layout.last_restore_gpr < GPR6_REGNUM)
9573 cfun_frame_layout.last_restore_gpr = GPR6_REGNUM;
9574 }
9575
9576 if (cfun_frame_layout.first_save_gpr == -1
9577 || cfun_frame_layout.first_save_gpr > min_gpr)
9578 cfun_frame_layout.first_save_gpr = min_gpr;
9579
9580 if (cfun_frame_layout.last_save_gpr == -1
9581 || cfun_frame_layout.last_save_gpr < max_gpr)
9582 cfun_frame_layout.last_save_gpr = max_gpr;
9583
9584 for (i = min_gpr; i <= max_gpr; i++)
9585 cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
9586 }
9587
9588 /* Calculate the save and restore ranges for stm(g) and lm(g) in the
9589 prologue and epilogue. */
9590
9591 static void
s390_register_info_set_ranges()9592 s390_register_info_set_ranges ()
9593 {
9594 int i, j;
9595
9596 /* Find the first and the last save slot supposed to use the stack
9597 to set the restore range.
9598 Vararg regs might be marked as save to stack but only the
9599 call-saved regs really need restoring (i.e. r6). This code
9600 assumes that the vararg regs have not yet been recorded in
9601 cfun_gpr_save_slot. */
9602 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != SAVE_SLOT_STACK; i++);
9603 for (j = 15; j > i && cfun_gpr_save_slot (j) != SAVE_SLOT_STACK; j--);
9604 cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
9605 cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
9606 cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
9607 cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
9608 }
9609
9610 /* The GPR and FPR save slots in cfun->machine->frame_layout are set
9611 for registers which need to be saved in function prologue.
9612 This function can be used until the insns emitted for save/restore
9613 of the regs are visible in the RTL stream. */
9614
9615 static void
s390_register_info()9616 s390_register_info ()
9617 {
9618 int i;
9619 char clobbered_regs[32];
9620
9621 gcc_assert (!epilogue_completed);
9622
9623 if (reload_completed)
9624 /* After reload we rely on our own routine to determine which
9625 registers need saving. */
9626 s390_regs_ever_clobbered (clobbered_regs);
9627 else
9628 /* During reload we use regs_ever_live as a base since reload
9629 does changes in there which we otherwise would not be aware
9630 of. */
9631 for (i = 0; i < 32; i++)
9632 clobbered_regs[i] = df_regs_ever_live_p (i);
9633
9634 for (i = 0; i < 32; i++)
9635 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
9636
9637 /* Mark the call-saved FPRs which need to be saved.
9638 This needs to be done before checking the special GPRs since the
9639 stack pointer usage depends on whether high FPRs have to be saved
9640 or not. */
9641 cfun_frame_layout.fpr_bitmap = 0;
9642 cfun_frame_layout.high_fprs = 0;
9643 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
9644 if (clobbered_regs[i] && !call_really_used_regs[i])
9645 {
9646 cfun_set_fpr_save (i);
9647 if (i >= FPR8_REGNUM)
9648 cfun_frame_layout.high_fprs++;
9649 }
9650
9651 /* Register 12 is used for GOT address, but also as temp in prologue
9652 for split-stack stdarg functions (unless r14 is available). */
9653 clobbered_regs[12]
9654 |= ((flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
9655 || (flag_split_stack && cfun->stdarg
9656 && (crtl->is_leaf || TARGET_TPF_PROFILING
9657 || has_hard_reg_initial_val (Pmode, RETURN_REGNUM))));
9658
9659 clobbered_regs[BASE_REGNUM]
9660 |= (cfun->machine->base_reg
9661 && REGNO (cfun->machine->base_reg) == BASE_REGNUM);
9662
9663 clobbered_regs[HARD_FRAME_POINTER_REGNUM]
9664 |= !!frame_pointer_needed;
9665
9666 /* On pre z900 machines this might take until machine dependent
9667 reorg to decide.
9668 save_return_addr_p will only be set on non-zarch machines so
9669 there is no risk that r14 goes into an FPR instead of a stack
9670 slot. */
9671 clobbered_regs[RETURN_REGNUM]
9672 |= (!crtl->is_leaf
9673 || TARGET_TPF_PROFILING
9674 || cfun_frame_layout.save_return_addr_p
9675 || crtl->calls_eh_return);
9676
9677 clobbered_regs[STACK_POINTER_REGNUM]
9678 |= (!crtl->is_leaf
9679 || TARGET_TPF_PROFILING
9680 || cfun_save_high_fprs_p
9681 || get_frame_size () > 0
9682 || (reload_completed && cfun_frame_layout.frame_size > 0)
9683 || cfun->calls_alloca);
9684
9685 memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 16);
9686
9687 for (i = 6; i < 16; i++)
9688 if (clobbered_regs[i])
9689 cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
9690
9691 s390_register_info_stdarg_fpr ();
9692 s390_register_info_gprtofpr ();
9693 s390_register_info_set_ranges ();
9694 /* stdarg functions might need to save GPRs 2 to 6. This might
9695 override the GPR->FPR save decision made by
9696 s390_register_info_gprtofpr for r6 since vararg regs must go to
9697 the stack. */
9698 s390_register_info_stdarg_gpr ();
9699 }
9700
9701 /* Return true if REGNO is a global register, but not one
9702 of the special ones that need to be saved/restored in anyway. */
9703
9704 static inline bool
global_not_special_regno_p(int regno)9705 global_not_special_regno_p (int regno)
9706 {
9707 return (global_regs[regno]
9708 /* These registers are special and need to be
9709 restored in any case. */
9710 && !(regno == STACK_POINTER_REGNUM
9711 || regno == RETURN_REGNUM
9712 || regno == BASE_REGNUM
9713 || (flag_pic && regno == (int)PIC_OFFSET_TABLE_REGNUM)));
9714 }
9715
9716 /* This function is called by s390_optimize_prologue in order to get
9717 rid of unnecessary GPR save/restore instructions. The register info
9718 for the GPRs is re-computed and the ranges are re-calculated. */
9719
9720 static void
s390_optimize_register_info()9721 s390_optimize_register_info ()
9722 {
9723 char clobbered_regs[32];
9724 int i;
9725
9726 gcc_assert (epilogue_completed);
9727
9728 s390_regs_ever_clobbered (clobbered_regs);
9729
9730 /* Global registers do not need to be saved and restored unless it
9731 is one of our special regs. (r12, r13, r14, or r15). */
9732 for (i = 0; i < 32; i++)
9733 clobbered_regs[i] = clobbered_regs[i] && !global_not_special_regno_p (i);
9734
9735 /* There is still special treatment needed for cases invisible to
9736 s390_regs_ever_clobbered. */
9737 clobbered_regs[RETURN_REGNUM]
9738 |= (TARGET_TPF_PROFILING
9739 /* When expanding builtin_return_addr in ESA mode we do not
9740 know whether r14 will later be needed as scratch reg when
9741 doing branch splitting. So the builtin always accesses the
9742 r14 save slot and we need to stick to the save/restore
9743 decision for r14 even if it turns out that it didn't get
9744 clobbered. */
9745 || cfun_frame_layout.save_return_addr_p
9746 || crtl->calls_eh_return);
9747
9748 memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 6);
9749
9750 for (i = 6; i < 16; i++)
9751 if (!clobbered_regs[i])
9752 cfun_gpr_save_slot (i) = SAVE_SLOT_NONE;
9753
9754 s390_register_info_set_ranges ();
9755 s390_register_info_stdarg_gpr ();
9756 }
9757
9758 /* Fill cfun->machine with info about frame of current function. */
9759
9760 static void
s390_frame_info(void)9761 s390_frame_info (void)
9762 {
9763 HOST_WIDE_INT lowest_offset;
9764
9765 cfun_frame_layout.first_save_gpr_slot = cfun_frame_layout.first_save_gpr;
9766 cfun_frame_layout.last_save_gpr_slot = cfun_frame_layout.last_save_gpr;
9767
9768 /* The va_arg builtin uses a constant distance of 16 *
9769 UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area
9770 pointer. So even if we are going to save the stack pointer in an
9771 FPR we need the stack space in order to keep the offsets
9772 correct. */
9773 if (cfun->stdarg && cfun_save_arg_fprs_p)
9774 {
9775 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
9776
9777 if (cfun_frame_layout.first_save_gpr_slot == -1)
9778 cfun_frame_layout.first_save_gpr_slot = STACK_POINTER_REGNUM;
9779 }
9780
9781 cfun_frame_layout.frame_size = get_frame_size ();
9782 if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000)
9783 fatal_error (input_location,
9784 "total size of local variables exceeds architecture limit");
9785
9786 if (!TARGET_PACKED_STACK)
9787 {
9788 /* Fixed stack layout. */
9789 cfun_frame_layout.backchain_offset = 0;
9790 cfun_frame_layout.f0_offset = 16 * UNITS_PER_LONG;
9791 cfun_frame_layout.f4_offset = cfun_frame_layout.f0_offset + 2 * 8;
9792 cfun_frame_layout.f8_offset = -cfun_frame_layout.high_fprs * 8;
9793 cfun_frame_layout.gprs_offset = (cfun_frame_layout.first_save_gpr_slot
9794 * UNITS_PER_LONG);
9795 }
9796 else if (TARGET_BACKCHAIN)
9797 {
9798 /* Kernel stack layout - packed stack, backchain, no float */
9799 gcc_assert (TARGET_SOFT_FLOAT);
9800 cfun_frame_layout.backchain_offset = (STACK_POINTER_OFFSET
9801 - UNITS_PER_LONG);
9802
9803 /* The distance between the backchain and the return address
9804 save slot must not change. So we always need a slot for the
9805 stack pointer which resides in between. */
9806 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
9807
9808 cfun_frame_layout.gprs_offset
9809 = cfun_frame_layout.backchain_offset - cfun_gprs_save_area_size;
9810
9811 /* FPRs will not be saved. Nevertheless pick sane values to
9812 keep area calculations valid. */
9813 cfun_frame_layout.f0_offset =
9814 cfun_frame_layout.f4_offset =
9815 cfun_frame_layout.f8_offset = cfun_frame_layout.gprs_offset;
9816 }
9817 else
9818 {
9819 int num_fprs;
9820
9821 /* Packed stack layout without backchain. */
9822
9823 /* With stdarg FPRs need their dedicated slots. */
9824 num_fprs = (TARGET_64BIT && cfun->stdarg ? 2
9825 : (cfun_fpr_save_p (FPR4_REGNUM) +
9826 cfun_fpr_save_p (FPR6_REGNUM)));
9827 cfun_frame_layout.f4_offset = STACK_POINTER_OFFSET - 8 * num_fprs;
9828
9829 num_fprs = (cfun->stdarg ? 2
9830 : (cfun_fpr_save_p (FPR0_REGNUM)
9831 + cfun_fpr_save_p (FPR2_REGNUM)));
9832 cfun_frame_layout.f0_offset = cfun_frame_layout.f4_offset - 8 * num_fprs;
9833
9834 cfun_frame_layout.gprs_offset
9835 = cfun_frame_layout.f0_offset - cfun_gprs_save_area_size;
9836
9837 cfun_frame_layout.f8_offset = (cfun_frame_layout.gprs_offset
9838 - cfun_frame_layout.high_fprs * 8);
9839 }
9840
9841 if (cfun_save_high_fprs_p)
9842 cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8;
9843
9844 if (!crtl->is_leaf)
9845 cfun_frame_layout.frame_size += crtl->outgoing_args_size;
9846
9847 /* In the following cases we have to allocate a STACK_POINTER_OFFSET
9848 sized area at the bottom of the stack. This is required also for
9849 leaf functions. When GCC generates a local stack reference it
9850 will always add STACK_POINTER_OFFSET to all these references. */
9851 if (crtl->is_leaf
9852 && !TARGET_TPF_PROFILING
9853 && cfun_frame_layout.frame_size == 0
9854 && !cfun->calls_alloca)
9855 return;
9856
9857 /* Calculate the number of bytes we have used in our own register
9858 save area. With the packed stack layout we can re-use the
9859 remaining bytes for normal stack elements. */
9860
9861 if (TARGET_PACKED_STACK)
9862 lowest_offset = MIN (MIN (cfun_frame_layout.f0_offset,
9863 cfun_frame_layout.f4_offset),
9864 cfun_frame_layout.gprs_offset);
9865 else
9866 lowest_offset = 0;
9867
9868 if (TARGET_BACKCHAIN)
9869 lowest_offset = MIN (lowest_offset, cfun_frame_layout.backchain_offset);
9870
9871 cfun_frame_layout.frame_size += STACK_POINTER_OFFSET - lowest_offset;
9872
9873 /* If under 31 bit an odd number of gprs has to be saved we have to
9874 adjust the frame size to sustain 8 byte alignment of stack
9875 frames. */
9876 cfun_frame_layout.frame_size = ((cfun_frame_layout.frame_size +
9877 STACK_BOUNDARY / BITS_PER_UNIT - 1)
9878 & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1));
9879 }
9880
9881 /* Generate frame layout. Fills in register and frame data for the current
9882 function in cfun->machine. This routine can be called multiple times;
9883 it will re-do the complete frame layout every time. */
9884
9885 static void
s390_init_frame_layout(void)9886 s390_init_frame_layout (void)
9887 {
9888 HOST_WIDE_INT frame_size;
9889 int base_used;
9890
9891 /* After LRA the frame layout is supposed to be read-only and should
9892 not be re-computed. */
9893 if (reload_completed)
9894 return;
9895
9896 do
9897 {
9898 frame_size = cfun_frame_layout.frame_size;
9899
9900 /* Try to predict whether we'll need the base register. */
9901 base_used = crtl->uses_const_pool
9902 || (!DISP_IN_RANGE (frame_size)
9903 && !CONST_OK_FOR_K (frame_size));
9904
9905 /* Decide which register to use as literal pool base. In small
9906 leaf functions, try to use an unused call-clobbered register
9907 as base register to avoid save/restore overhead. */
9908 if (!base_used)
9909 cfun->machine->base_reg = NULL_RTX;
9910 else
9911 {
9912 int br = 0;
9913
9914 if (crtl->is_leaf)
9915 /* Prefer r5 (most likely to be free). */
9916 for (br = 5; br >= 2 && df_regs_ever_live_p (br); br--)
9917 ;
9918 cfun->machine->base_reg =
9919 gen_rtx_REG (Pmode, (br >= 2) ? br : BASE_REGNUM);
9920 }
9921
9922 s390_register_info ();
9923 s390_frame_info ();
9924 }
9925 while (frame_size != cfun_frame_layout.frame_size);
9926 }
9927
9928 /* Remove the FPR clobbers from a tbegin insn if it can be proven that
9929 the TX is nonescaping. A transaction is considered escaping if
9930 there is at least one path from tbegin returning CC0 to the
9931 function exit block without an tend.
9932
9933 The check so far has some limitations:
9934 - only single tbegin/tend BBs are supported
9935 - the first cond jump after tbegin must separate the CC0 path from ~CC0
9936 - when CC is copied to a GPR and the CC0 check is done with the GPR
9937 this is not supported
9938 */
9939
9940 static void
s390_optimize_nonescaping_tx(void)9941 s390_optimize_nonescaping_tx (void)
9942 {
9943 const unsigned int CC0 = 1 << 3;
9944 basic_block tbegin_bb = NULL;
9945 basic_block tend_bb = NULL;
9946 basic_block bb;
9947 rtx_insn *insn;
9948 bool result = true;
9949 int bb_index;
9950 rtx_insn *tbegin_insn = NULL;
9951
9952 if (!cfun->machine->tbegin_p)
9953 return;
9954
9955 for (bb_index = 0; bb_index < n_basic_blocks_for_fn (cfun); bb_index++)
9956 {
9957 bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
9958
9959 if (!bb)
9960 continue;
9961
9962 FOR_BB_INSNS (bb, insn)
9963 {
9964 rtx ite, cc, pat, target;
9965 unsigned HOST_WIDE_INT mask;
9966
9967 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
9968 continue;
9969
9970 pat = PATTERN (insn);
9971
9972 if (GET_CODE (pat) == PARALLEL)
9973 pat = XVECEXP (pat, 0, 0);
9974
9975 if (GET_CODE (pat) != SET
9976 || GET_CODE (SET_SRC (pat)) != UNSPEC_VOLATILE)
9977 continue;
9978
9979 if (XINT (SET_SRC (pat), 1) == UNSPECV_TBEGIN)
9980 {
9981 rtx_insn *tmp;
9982
9983 tbegin_insn = insn;
9984
9985 /* Just return if the tbegin doesn't have clobbers. */
9986 if (GET_CODE (PATTERN (insn)) != PARALLEL)
9987 return;
9988
9989 if (tbegin_bb != NULL)
9990 return;
9991
9992 /* Find the next conditional jump. */
9993 for (tmp = NEXT_INSN (insn);
9994 tmp != NULL_RTX;
9995 tmp = NEXT_INSN (tmp))
9996 {
9997 if (reg_set_p (gen_rtx_REG (CCmode, CC_REGNUM), tmp))
9998 return;
9999 if (!JUMP_P (tmp))
10000 continue;
10001
10002 ite = SET_SRC (PATTERN (tmp));
10003 if (GET_CODE (ite) != IF_THEN_ELSE)
10004 continue;
10005
10006 cc = XEXP (XEXP (ite, 0), 0);
10007 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc))
10008 || GET_MODE (cc) != CCRAWmode
10009 || GET_CODE (XEXP (XEXP (ite, 0), 1)) != CONST_INT)
10010 return;
10011
10012 if (bb->succs->length () != 2)
10013 return;
10014
10015 mask = INTVAL (XEXP (XEXP (ite, 0), 1));
10016 if (GET_CODE (XEXP (ite, 0)) == NE)
10017 mask ^= 0xf;
10018
10019 if (mask == CC0)
10020 target = XEXP (ite, 1);
10021 else if (mask == (CC0 ^ 0xf))
10022 target = XEXP (ite, 2);
10023 else
10024 return;
10025
10026 {
10027 edge_iterator ei;
10028 edge e1, e2;
10029
10030 ei = ei_start (bb->succs);
10031 e1 = ei_safe_edge (ei);
10032 ei_next (&ei);
10033 e2 = ei_safe_edge (ei);
10034
10035 if (e2->flags & EDGE_FALLTHRU)
10036 {
10037 e2 = e1;
10038 e1 = ei_safe_edge (ei);
10039 }
10040
10041 if (!(e1->flags & EDGE_FALLTHRU))
10042 return;
10043
10044 tbegin_bb = (target == pc_rtx) ? e1->dest : e2->dest;
10045 }
10046 if (tmp == BB_END (bb))
10047 break;
10048 }
10049 }
10050
10051 if (XINT (SET_SRC (pat), 1) == UNSPECV_TEND)
10052 {
10053 if (tend_bb != NULL)
10054 return;
10055 tend_bb = bb;
10056 }
10057 }
10058 }
10059
10060 /* Either we successfully remove the FPR clobbers here or we are not
10061 able to do anything for this TX. Both cases don't qualify for
10062 another look. */
10063 cfun->machine->tbegin_p = false;
10064
10065 if (tbegin_bb == NULL || tend_bb == NULL)
10066 return;
10067
10068 calculate_dominance_info (CDI_POST_DOMINATORS);
10069 result = dominated_by_p (CDI_POST_DOMINATORS, tbegin_bb, tend_bb);
10070 free_dominance_info (CDI_POST_DOMINATORS);
10071
10072 if (!result)
10073 return;
10074
10075 PATTERN (tbegin_insn) = gen_rtx_PARALLEL (VOIDmode,
10076 gen_rtvec (2,
10077 XVECEXP (PATTERN (tbegin_insn), 0, 0),
10078 XVECEXP (PATTERN (tbegin_insn), 0, 1)));
10079 INSN_CODE (tbegin_insn) = -1;
10080 df_insn_rescan (tbegin_insn);
10081
10082 return;
10083 }
10084
10085 /* Implement TARGET_HARD_REGNO_NREGS. Because all registers in a class
10086 have the same size, this is equivalent to CLASS_MAX_NREGS. */
10087
10088 static unsigned int
s390_hard_regno_nregs(unsigned int regno,machine_mode mode)10089 s390_hard_regno_nregs (unsigned int regno, machine_mode mode)
10090 {
10091 return s390_class_max_nregs (REGNO_REG_CLASS (regno), mode);
10092 }
10093
10094 /* Implement TARGET_HARD_REGNO_MODE_OK.
10095
10096 Integer modes <= word size fit into any GPR.
10097 Integer modes > word size fit into successive GPRs, starting with
10098 an even-numbered register.
10099 SImode and DImode fit into FPRs as well.
10100
10101 Floating point modes <= word size fit into any FPR or GPR.
10102 Floating point modes > word size (i.e. DFmode on 32-bit) fit
10103 into any FPR, or an even-odd GPR pair.
10104 TFmode fits only into an even-odd FPR pair.
10105
10106 Complex floating point modes fit either into two FPRs, or into
10107 successive GPRs (again starting with an even number).
10108 TCmode fits only into two successive even-odd FPR pairs.
10109
10110 Condition code modes fit only into the CC register. */
10111
10112 static bool
s390_hard_regno_mode_ok(unsigned int regno,machine_mode mode)10113 s390_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
10114 {
10115 if (!TARGET_VX && VECTOR_NOFP_REGNO_P (regno))
10116 return false;
10117
10118 switch (REGNO_REG_CLASS (regno))
10119 {
10120 case VEC_REGS:
10121 return ((GET_MODE_CLASS (mode) == MODE_INT
10122 && s390_class_max_nregs (VEC_REGS, mode) == 1)
10123 || mode == DFmode
10124 || (TARGET_VXE && mode == SFmode)
10125 || s390_vector_mode_supported_p (mode));
10126 break;
10127 case FP_REGS:
10128 if (TARGET_VX
10129 && ((GET_MODE_CLASS (mode) == MODE_INT
10130 && s390_class_max_nregs (FP_REGS, mode) == 1)
10131 || mode == DFmode
10132 || s390_vector_mode_supported_p (mode)))
10133 return true;
10134
10135 if (REGNO_PAIR_OK (regno, mode))
10136 {
10137 if (mode == SImode || mode == DImode)
10138 return true;
10139
10140 if (FLOAT_MODE_P (mode) && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
10141 return true;
10142 }
10143 break;
10144 case ADDR_REGS:
10145 if (FRAME_REGNO_P (regno) && mode == Pmode)
10146 return true;
10147
10148 /* fallthrough */
10149 case GENERAL_REGS:
10150 if (REGNO_PAIR_OK (regno, mode))
10151 {
10152 if (TARGET_ZARCH
10153 || (mode != TFmode && mode != TCmode && mode != TDmode))
10154 return true;
10155 }
10156 break;
10157 case CC_REGS:
10158 if (GET_MODE_CLASS (mode) == MODE_CC)
10159 return true;
10160 break;
10161 case ACCESS_REGS:
10162 if (REGNO_PAIR_OK (regno, mode))
10163 {
10164 if (mode == SImode || mode == Pmode)
10165 return true;
10166 }
10167 break;
10168 default:
10169 return false;
10170 }
10171
10172 return false;
10173 }
10174
10175 /* Implement TARGET_MODES_TIEABLE_P. */
10176
10177 static bool
s390_modes_tieable_p(machine_mode mode1,machine_mode mode2)10178 s390_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10179 {
10180 return ((mode1 == SFmode || mode1 == DFmode)
10181 == (mode2 == SFmode || mode2 == DFmode));
10182 }
10183
10184 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
10185
10186 bool
s390_hard_regno_rename_ok(unsigned int old_reg,unsigned int new_reg)10187 s390_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg)
10188 {
10189 /* Once we've decided upon a register to use as base register, it must
10190 no longer be used for any other purpose. */
10191 if (cfun->machine->base_reg)
10192 if (REGNO (cfun->machine->base_reg) == old_reg
10193 || REGNO (cfun->machine->base_reg) == new_reg)
10194 return false;
10195
10196 /* Prevent regrename from using call-saved regs which haven't
10197 actually been saved. This is necessary since regrename assumes
10198 the backend save/restore decisions are based on
10199 df_regs_ever_live. Since we have our own routine we have to tell
10200 regrename manually about it. */
10201 if (GENERAL_REGNO_P (new_reg)
10202 && !call_really_used_regs[new_reg]
10203 && cfun_gpr_save_slot (new_reg) == SAVE_SLOT_NONE)
10204 return false;
10205
10206 return true;
10207 }
10208
10209 /* Return nonzero if register REGNO can be used as a scratch register
10210 in peephole2. */
10211
10212 static bool
s390_hard_regno_scratch_ok(unsigned int regno)10213 s390_hard_regno_scratch_ok (unsigned int regno)
10214 {
10215 /* See s390_hard_regno_rename_ok. */
10216 if (GENERAL_REGNO_P (regno)
10217 && !call_really_used_regs[regno]
10218 && cfun_gpr_save_slot (regno) == SAVE_SLOT_NONE)
10219 return false;
10220
10221 return true;
10222 }
10223
10224 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. When generating
10225 code that runs in z/Architecture mode, but conforms to the 31-bit
10226 ABI, GPRs can hold 8 bytes; the ABI guarantees only that the lower 4
10227 bytes are saved across calls, however. */
10228
10229 static bool
s390_hard_regno_call_part_clobbered(rtx_insn * insn ATTRIBUTE_UNUSED,unsigned int regno,machine_mode mode)10230 s390_hard_regno_call_part_clobbered (rtx_insn *insn ATTRIBUTE_UNUSED,
10231 unsigned int regno, machine_mode mode)
10232 {
10233 if (!TARGET_64BIT
10234 && TARGET_ZARCH
10235 && GET_MODE_SIZE (mode) > 4
10236 && ((regno >= 6 && regno <= 15) || regno == 32))
10237 return true;
10238
10239 if (TARGET_VX
10240 && GET_MODE_SIZE (mode) > 8
10241 && (((TARGET_64BIT && regno >= 24 && regno <= 31))
10242 || (!TARGET_64BIT && (regno == 18 || regno == 19))))
10243 return true;
10244
10245 return false;
10246 }
10247
10248 /* Maximum number of registers to represent a value of mode MODE
10249 in a register of class RCLASS. */
10250
10251 int
s390_class_max_nregs(enum reg_class rclass,machine_mode mode)10252 s390_class_max_nregs (enum reg_class rclass, machine_mode mode)
10253 {
10254 int reg_size;
10255 bool reg_pair_required_p = false;
10256
10257 switch (rclass)
10258 {
10259 case FP_REGS:
10260 case VEC_REGS:
10261 reg_size = TARGET_VX ? 16 : 8;
10262
10263 /* TF and TD modes would fit into a VR but we put them into a
10264 register pair since we do not have 128bit FP instructions on
10265 full VRs. */
10266 if (TARGET_VX
10267 && SCALAR_FLOAT_MODE_P (mode)
10268 && GET_MODE_SIZE (mode) >= 16)
10269 reg_pair_required_p = true;
10270
10271 /* Even if complex types would fit into a single FPR/VR we force
10272 them into a register pair to deal with the parts more easily.
10273 (FIXME: What about complex ints?) */
10274 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
10275 reg_pair_required_p = true;
10276 break;
10277 case ACCESS_REGS:
10278 reg_size = 4;
10279 break;
10280 default:
10281 reg_size = UNITS_PER_WORD;
10282 break;
10283 }
10284
10285 if (reg_pair_required_p)
10286 return 2 * ((GET_MODE_SIZE (mode) / 2 + reg_size - 1) / reg_size);
10287
10288 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
10289 }
10290
10291 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
10292
10293 static bool
s390_can_change_mode_class(machine_mode from_mode,machine_mode to_mode,reg_class_t rclass)10294 s390_can_change_mode_class (machine_mode from_mode,
10295 machine_mode to_mode,
10296 reg_class_t rclass)
10297 {
10298 machine_mode small_mode;
10299 machine_mode big_mode;
10300
10301 /* V1TF and TF have different representations in vector
10302 registers. */
10303 if (reg_classes_intersect_p (VEC_REGS, rclass)
10304 && ((from_mode == V1TFmode && to_mode == TFmode)
10305 || (from_mode == TFmode && to_mode == V1TFmode)))
10306 return false;
10307
10308 if (GET_MODE_SIZE (from_mode) == GET_MODE_SIZE (to_mode))
10309 return true;
10310
10311 if (GET_MODE_SIZE (from_mode) < GET_MODE_SIZE (to_mode))
10312 {
10313 small_mode = from_mode;
10314 big_mode = to_mode;
10315 }
10316 else
10317 {
10318 small_mode = to_mode;
10319 big_mode = from_mode;
10320 }
10321
10322 /* Values residing in VRs are little-endian style. All modes are
10323 placed left-aligned in an VR. This means that we cannot allow
10324 switching between modes with differing sizes. Also if the vector
10325 facility is available we still place TFmode values in VR register
10326 pairs, since the only instructions we have operating on TFmodes
10327 only deal with register pairs. Therefore we have to allow DFmode
10328 subregs of TFmodes to enable the TFmode splitters. */
10329 if (reg_classes_intersect_p (VEC_REGS, rclass)
10330 && (GET_MODE_SIZE (small_mode) < 8
10331 || s390_class_max_nregs (VEC_REGS, big_mode) == 1))
10332 return false;
10333
10334 /* Likewise for access registers, since they have only half the
10335 word size on 64-bit. */
10336 if (reg_classes_intersect_p (ACCESS_REGS, rclass))
10337 return false;
10338
10339 return true;
10340 }
10341
10342 /* Return true if we use LRA instead of reload pass. */
10343 static bool
s390_lra_p(void)10344 s390_lra_p (void)
10345 {
10346 return s390_lra_flag;
10347 }
10348
10349 /* Return true if register FROM can be eliminated via register TO. */
10350
10351 static bool
s390_can_eliminate(const int from,const int to)10352 s390_can_eliminate (const int from, const int to)
10353 {
10354 /* We have not marked the base register as fixed.
10355 Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
10356 If a function requires the base register, we say here that this
10357 elimination cannot be performed. This will cause reload to free
10358 up the base register (as if it were fixed). On the other hand,
10359 if the current function does *not* require the base register, we
10360 say here the elimination succeeds, which in turn allows reload
10361 to allocate the base register for any other purpose. */
10362 if (from == BASE_REGNUM && to == BASE_REGNUM)
10363 {
10364 s390_init_frame_layout ();
10365 return cfun->machine->base_reg == NULL_RTX;
10366 }
10367
10368 /* Everything else must point into the stack frame. */
10369 gcc_assert (to == STACK_POINTER_REGNUM
10370 || to == HARD_FRAME_POINTER_REGNUM);
10371
10372 gcc_assert (from == FRAME_POINTER_REGNUM
10373 || from == ARG_POINTER_REGNUM
10374 || from == RETURN_ADDRESS_POINTER_REGNUM);
10375
10376 /* Make sure we actually saved the return address. */
10377 if (from == RETURN_ADDRESS_POINTER_REGNUM)
10378 if (!crtl->calls_eh_return
10379 && !cfun->stdarg
10380 && !cfun_frame_layout.save_return_addr_p)
10381 return false;
10382
10383 return true;
10384 }
10385
10386 /* Return offset between register FROM and TO initially after prolog. */
10387
10388 HOST_WIDE_INT
s390_initial_elimination_offset(int from,int to)10389 s390_initial_elimination_offset (int from, int to)
10390 {
10391 HOST_WIDE_INT offset;
10392
10393 /* ??? Why are we called for non-eliminable pairs? */
10394 if (!s390_can_eliminate (from, to))
10395 return 0;
10396
10397 switch (from)
10398 {
10399 case FRAME_POINTER_REGNUM:
10400 offset = (get_frame_size()
10401 + STACK_POINTER_OFFSET
10402 + crtl->outgoing_args_size);
10403 break;
10404
10405 case ARG_POINTER_REGNUM:
10406 s390_init_frame_layout ();
10407 offset = cfun_frame_layout.frame_size + STACK_POINTER_OFFSET;
10408 break;
10409
10410 case RETURN_ADDRESS_POINTER_REGNUM:
10411 s390_init_frame_layout ();
10412
10413 if (cfun_frame_layout.first_save_gpr_slot == -1)
10414 {
10415 /* If it turns out that for stdarg nothing went into the reg
10416 save area we also do not need the return address
10417 pointer. */
10418 if (cfun->stdarg && !cfun_save_arg_fprs_p)
10419 return 0;
10420
10421 gcc_unreachable ();
10422 }
10423
10424 /* In order to make the following work it is not necessary for
10425 r14 to have a save slot. It is sufficient if one other GPR
10426 got one. Since the GPRs are always stored without gaps we
10427 are able to calculate where the r14 save slot would
10428 reside. */
10429 offset = (cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset +
10430 (RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot) *
10431 UNITS_PER_LONG);
10432 break;
10433
10434 case BASE_REGNUM:
10435 offset = 0;
10436 break;
10437
10438 default:
10439 gcc_unreachable ();
10440 }
10441
10442 return offset;
10443 }
10444
10445 /* Emit insn to save fpr REGNUM at offset OFFSET relative
10446 to register BASE. Return generated insn. */
10447
10448 static rtx
save_fpr(rtx base,int offset,int regnum)10449 save_fpr (rtx base, int offset, int regnum)
10450 {
10451 rtx addr;
10452 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10453
10454 if (regnum >= 16 && regnum <= (16 + FP_ARG_NUM_REG))
10455 set_mem_alias_set (addr, get_varargs_alias_set ());
10456 else
10457 set_mem_alias_set (addr, get_frame_alias_set ());
10458
10459 return emit_move_insn (addr, gen_rtx_REG (DFmode, regnum));
10460 }
10461
10462 /* Emit insn to restore fpr REGNUM from offset OFFSET relative
10463 to register BASE. Return generated insn. */
10464
10465 static rtx
restore_fpr(rtx base,int offset,int regnum)10466 restore_fpr (rtx base, int offset, int regnum)
10467 {
10468 rtx addr;
10469 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10470 set_mem_alias_set (addr, get_frame_alias_set ());
10471
10472 return emit_move_insn (gen_rtx_REG (DFmode, regnum), addr);
10473 }
10474
10475 /* Generate insn to save registers FIRST to LAST into
10476 the register save area located at offset OFFSET
10477 relative to register BASE. */
10478
10479 static rtx
save_gprs(rtx base,int offset,int first,int last)10480 save_gprs (rtx base, int offset, int first, int last)
10481 {
10482 rtx addr, insn, note;
10483 int i;
10484
10485 addr = plus_constant (Pmode, base, offset);
10486 addr = gen_rtx_MEM (Pmode, addr);
10487
10488 set_mem_alias_set (addr, get_frame_alias_set ());
10489
10490 /* Special-case single register. */
10491 if (first == last)
10492 {
10493 if (TARGET_64BIT)
10494 insn = gen_movdi (addr, gen_rtx_REG (Pmode, first));
10495 else
10496 insn = gen_movsi (addr, gen_rtx_REG (Pmode, first));
10497
10498 if (!global_not_special_regno_p (first))
10499 RTX_FRAME_RELATED_P (insn) = 1;
10500 return insn;
10501 }
10502
10503
10504 insn = gen_store_multiple (addr,
10505 gen_rtx_REG (Pmode, first),
10506 GEN_INT (last - first + 1));
10507
10508 if (first <= 6 && cfun->stdarg)
10509 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
10510 {
10511 rtx mem = XEXP (XVECEXP (PATTERN (insn), 0, i), 0);
10512
10513 if (first + i <= 6)
10514 set_mem_alias_set (mem, get_varargs_alias_set ());
10515 }
10516
10517 /* We need to set the FRAME_RELATED flag on all SETs
10518 inside the store-multiple pattern.
10519
10520 However, we must not emit DWARF records for registers 2..5
10521 if they are stored for use by variable arguments ...
10522
10523 ??? Unfortunately, it is not enough to simply not the
10524 FRAME_RELATED flags for those SETs, because the first SET
10525 of the PARALLEL is always treated as if it had the flag
10526 set, even if it does not. Therefore we emit a new pattern
10527 without those registers as REG_FRAME_RELATED_EXPR note. */
10528
10529 if (first >= 6 && !global_not_special_regno_p (first))
10530 {
10531 rtx pat = PATTERN (insn);
10532
10533 for (i = 0; i < XVECLEN (pat, 0); i++)
10534 if (GET_CODE (XVECEXP (pat, 0, i)) == SET
10535 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat,
10536 0, i)))))
10537 RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1;
10538
10539 RTX_FRAME_RELATED_P (insn) = 1;
10540 }
10541 else if (last >= 6)
10542 {
10543 int start;
10544
10545 for (start = first >= 6 ? first : 6; start <= last; start++)
10546 if (!global_not_special_regno_p (start))
10547 break;
10548
10549 if (start > last)
10550 return insn;
10551
10552 addr = plus_constant (Pmode, base,
10553 offset + (start - first) * UNITS_PER_LONG);
10554
10555 if (start == last)
10556 {
10557 if (TARGET_64BIT)
10558 note = gen_movdi (gen_rtx_MEM (Pmode, addr),
10559 gen_rtx_REG (Pmode, start));
10560 else
10561 note = gen_movsi (gen_rtx_MEM (Pmode, addr),
10562 gen_rtx_REG (Pmode, start));
10563 note = PATTERN (note);
10564
10565 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10566 RTX_FRAME_RELATED_P (insn) = 1;
10567
10568 return insn;
10569 }
10570
10571 note = gen_store_multiple (gen_rtx_MEM (Pmode, addr),
10572 gen_rtx_REG (Pmode, start),
10573 GEN_INT (last - start + 1));
10574 note = PATTERN (note);
10575
10576 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10577
10578 for (i = 0; i < XVECLEN (note, 0); i++)
10579 if (GET_CODE (XVECEXP (note, 0, i)) == SET
10580 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note,
10581 0, i)))))
10582 RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1;
10583
10584 RTX_FRAME_RELATED_P (insn) = 1;
10585 }
10586
10587 return insn;
10588 }
10589
10590 /* Generate insn to restore registers FIRST to LAST from
10591 the register save area located at offset OFFSET
10592 relative to register BASE. */
10593
10594 static rtx
restore_gprs(rtx base,int offset,int first,int last)10595 restore_gprs (rtx base, int offset, int first, int last)
10596 {
10597 rtx addr, insn;
10598
10599 addr = plus_constant (Pmode, base, offset);
10600 addr = gen_rtx_MEM (Pmode, addr);
10601 set_mem_alias_set (addr, get_frame_alias_set ());
10602
10603 /* Special-case single register. */
10604 if (first == last)
10605 {
10606 if (TARGET_64BIT)
10607 insn = gen_movdi (gen_rtx_REG (Pmode, first), addr);
10608 else
10609 insn = gen_movsi (gen_rtx_REG (Pmode, first), addr);
10610
10611 RTX_FRAME_RELATED_P (insn) = 1;
10612 return insn;
10613 }
10614
10615 insn = gen_load_multiple (gen_rtx_REG (Pmode, first),
10616 addr,
10617 GEN_INT (last - first + 1));
10618 RTX_FRAME_RELATED_P (insn) = 1;
10619 return insn;
10620 }
10621
10622 /* Return insn sequence to load the GOT register. */
10623
10624 rtx_insn *
s390_load_got(void)10625 s390_load_got (void)
10626 {
10627 rtx_insn *insns;
10628
10629 /* We cannot use pic_offset_table_rtx here since we use this
10630 function also for non-pic if __tls_get_offset is called and in
10631 that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
10632 aren't usable. */
10633 rtx got_rtx = gen_rtx_REG (Pmode, 12);
10634
10635 start_sequence ();
10636
10637 emit_move_insn (got_rtx, s390_got_symbol ());
10638
10639 insns = get_insns ();
10640 end_sequence ();
10641 return insns;
10642 }
10643
10644 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
10645 and the change to the stack pointer. */
10646
10647 static void
s390_emit_stack_tie(void)10648 s390_emit_stack_tie (void)
10649 {
10650 rtx mem = gen_frame_mem (BLKmode,
10651 gen_rtx_REG (Pmode, STACK_POINTER_REGNUM));
10652
10653 emit_insn (gen_stack_tie (mem));
10654 }
10655
10656 /* Copy GPRS into FPR save slots. */
10657
10658 static void
s390_save_gprs_to_fprs(void)10659 s390_save_gprs_to_fprs (void)
10660 {
10661 int i;
10662
10663 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10664 return;
10665
10666 for (i = 6; i < 16; i++)
10667 {
10668 if (FP_REGNO_P (cfun_gpr_save_slot (i)))
10669 {
10670 rtx_insn *insn =
10671 emit_move_insn (gen_rtx_REG (DImode, cfun_gpr_save_slot (i)),
10672 gen_rtx_REG (DImode, i));
10673 RTX_FRAME_RELATED_P (insn) = 1;
10674 /* This prevents dwarf2cfi from interpreting the set. Doing
10675 so it might emit def_cfa_register infos setting an FPR as
10676 new CFA. */
10677 add_reg_note (insn, REG_CFA_REGISTER, copy_rtx (PATTERN (insn)));
10678 }
10679 }
10680 }
10681
10682 /* Restore GPRs from FPR save slots. */
10683
10684 static void
s390_restore_gprs_from_fprs(void)10685 s390_restore_gprs_from_fprs (void)
10686 {
10687 int i;
10688
10689 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10690 return;
10691
10692 /* Restore the GPRs starting with the stack pointer. That way the
10693 stack pointer already has its original value when it comes to
10694 restoring the hard frame pointer. So we can set the cfa reg back
10695 to the stack pointer. */
10696 for (i = STACK_POINTER_REGNUM; i >= 6; i--)
10697 {
10698 rtx_insn *insn;
10699
10700 if (!FP_REGNO_P (cfun_gpr_save_slot (i)))
10701 continue;
10702
10703 rtx fpr = gen_rtx_REG (DImode, cfun_gpr_save_slot (i));
10704
10705 if (i == STACK_POINTER_REGNUM)
10706 insn = emit_insn (gen_stack_restore_from_fpr (fpr));
10707 else
10708 insn = emit_move_insn (gen_rtx_REG (DImode, i), fpr);
10709
10710 df_set_regs_ever_live (i, true);
10711 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, i));
10712
10713 /* If either the stack pointer or the frame pointer get restored
10714 set the CFA value to its value at function start. Doing this
10715 for the frame pointer results in .cfi_def_cfa_register 15
10716 what is ok since if the stack pointer got modified it has
10717 been restored already. */
10718 if (i == STACK_POINTER_REGNUM || i == HARD_FRAME_POINTER_REGNUM)
10719 add_reg_note (insn, REG_CFA_DEF_CFA,
10720 plus_constant (Pmode, stack_pointer_rtx,
10721 STACK_POINTER_OFFSET));
10722 RTX_FRAME_RELATED_P (insn) = 1;
10723 }
10724 }
10725
10726
10727 /* A pass run immediately before shrink-wrapping and prologue and epilogue
10728 generation. */
10729
10730 namespace {
10731
10732 const pass_data pass_data_s390_early_mach =
10733 {
10734 RTL_PASS, /* type */
10735 "early_mach", /* name */
10736 OPTGROUP_NONE, /* optinfo_flags */
10737 TV_MACH_DEP, /* tv_id */
10738 0, /* properties_required */
10739 0, /* properties_provided */
10740 0, /* properties_destroyed */
10741 0, /* todo_flags_start */
10742 ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */
10743 };
10744
10745 class pass_s390_early_mach : public rtl_opt_pass
10746 {
10747 public:
pass_s390_early_mach(gcc::context * ctxt)10748 pass_s390_early_mach (gcc::context *ctxt)
10749 : rtl_opt_pass (pass_data_s390_early_mach, ctxt)
10750 {}
10751
10752 /* opt_pass methods: */
10753 virtual unsigned int execute (function *);
10754
10755 }; // class pass_s390_early_mach
10756
10757 unsigned int
execute(function * fun)10758 pass_s390_early_mach::execute (function *fun)
10759 {
10760 rtx_insn *insn;
10761
10762 /* Try to get rid of the FPR clobbers. */
10763 s390_optimize_nonescaping_tx ();
10764
10765 /* Re-compute register info. */
10766 s390_register_info ();
10767
10768 /* If we're using a base register, ensure that it is always valid for
10769 the first non-prologue instruction. */
10770 if (fun->machine->base_reg)
10771 emit_insn_at_entry (gen_main_pool (fun->machine->base_reg));
10772
10773 /* Annotate all constant pool references to let the scheduler know
10774 they implicitly use the base register. */
10775 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
10776 if (INSN_P (insn))
10777 {
10778 annotate_constant_pool_refs (insn);
10779 df_insn_rescan (insn);
10780 }
10781 return 0;
10782 }
10783
10784 } // anon namespace
10785
10786 rtl_opt_pass *
make_pass_s390_early_mach(gcc::context * ctxt)10787 make_pass_s390_early_mach (gcc::context *ctxt)
10788 {
10789 return new pass_s390_early_mach (ctxt);
10790 }
10791
10792 /* Calculate TARGET = REG + OFFSET as s390_emit_prologue would do it.
10793 - push too big immediates to the literal pool and annotate the refs
10794 - emit frame related notes for stack pointer changes. */
10795
10796 static rtx
s390_prologue_plus_offset(rtx target,rtx reg,rtx offset,bool frame_related_p)10797 s390_prologue_plus_offset (rtx target, rtx reg, rtx offset, bool frame_related_p)
10798 {
10799 rtx_insn *insn;
10800 rtx orig_offset = offset;
10801
10802 gcc_assert (REG_P (target));
10803 gcc_assert (REG_P (reg));
10804 gcc_assert (CONST_INT_P (offset));
10805
10806 if (offset == const0_rtx) /* lr/lgr */
10807 {
10808 insn = emit_move_insn (target, reg);
10809 }
10810 else if (DISP_IN_RANGE (INTVAL (offset))) /* la */
10811 {
10812 insn = emit_move_insn (target, gen_rtx_PLUS (Pmode, reg,
10813 offset));
10814 }
10815 else
10816 {
10817 if (!satisfies_constraint_K (offset) /* ahi/aghi */
10818 && (!TARGET_EXTIMM
10819 || (!satisfies_constraint_Op (offset) /* alfi/algfi */
10820 && !satisfies_constraint_On (offset)))) /* slfi/slgfi */
10821 offset = force_const_mem (Pmode, offset);
10822
10823 if (target != reg)
10824 {
10825 insn = emit_move_insn (target, reg);
10826 RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0;
10827 }
10828
10829 insn = emit_insn (gen_add2_insn (target, offset));
10830
10831 if (!CONST_INT_P (offset))
10832 {
10833 annotate_constant_pool_refs (insn);
10834
10835 if (frame_related_p)
10836 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10837 gen_rtx_SET (target,
10838 gen_rtx_PLUS (Pmode, target,
10839 orig_offset)));
10840 }
10841 }
10842
10843 RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0;
10844
10845 /* If this is a stack adjustment and we are generating a stack clash
10846 prologue, then add a REG_STACK_CHECK note to signal that this insn
10847 should be left alone. */
10848 if (flag_stack_clash_protection && target == stack_pointer_rtx)
10849 add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
10850
10851 return insn;
10852 }
10853
10854 /* Emit a compare instruction with a volatile memory access as stack
10855 probe. It does not waste store tags and does not clobber any
10856 registers apart from the condition code. */
10857 static void
s390_emit_stack_probe(rtx addr)10858 s390_emit_stack_probe (rtx addr)
10859 {
10860 rtx tmp = gen_rtx_MEM (Pmode, addr);
10861 MEM_VOLATILE_P (tmp) = 1;
10862 s390_emit_compare (EQ, gen_rtx_REG (Pmode, 0), tmp);
10863 emit_insn (gen_blockage ());
10864 }
10865
10866 /* Use a runtime loop if we have to emit more probes than this. */
10867 #define MIN_UNROLL_PROBES 3
10868
10869 /* Allocate SIZE bytes of stack space, using TEMP_REG as a temporary
10870 if necessary. LAST_PROBE_OFFSET contains the offset of the closest
10871 probe relative to the stack pointer.
10872
10873 Note that SIZE is negative.
10874
10875 The return value is true if TEMP_REG has been clobbered. */
10876 static bool
allocate_stack_space(rtx size,HOST_WIDE_INT last_probe_offset,rtx temp_reg)10877 allocate_stack_space (rtx size, HOST_WIDE_INT last_probe_offset,
10878 rtx temp_reg)
10879 {
10880 bool temp_reg_clobbered_p = false;
10881 HOST_WIDE_INT probe_interval
10882 = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL);
10883 HOST_WIDE_INT guard_size
10884 = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE);
10885
10886 if (flag_stack_clash_protection)
10887 {
10888 if (last_probe_offset + -INTVAL (size) < guard_size)
10889 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
10890 else
10891 {
10892 rtx offset = GEN_INT (probe_interval - UNITS_PER_LONG);
10893 HOST_WIDE_INT rounded_size = -INTVAL (size) & -probe_interval;
10894 HOST_WIDE_INT num_probes = rounded_size / probe_interval;
10895 HOST_WIDE_INT residual = -INTVAL (size) - rounded_size;
10896
10897 if (num_probes < MIN_UNROLL_PROBES)
10898 {
10899 /* Emit unrolled probe statements. */
10900
10901 for (unsigned int i = 0; i < num_probes; i++)
10902 {
10903 s390_prologue_plus_offset (stack_pointer_rtx,
10904 stack_pointer_rtx,
10905 GEN_INT (-probe_interval), true);
10906 s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
10907 stack_pointer_rtx,
10908 offset));
10909 }
10910 dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
10911 }
10912 else
10913 {
10914 /* Emit a loop probing the pages. */
10915
10916 rtx_code_label *loop_start_label = gen_label_rtx ();
10917
10918 /* From now on temp_reg will be the CFA register. */
10919 s390_prologue_plus_offset (temp_reg, stack_pointer_rtx,
10920 GEN_INT (-rounded_size), true);
10921 emit_label (loop_start_label);
10922
10923 s390_prologue_plus_offset (stack_pointer_rtx,
10924 stack_pointer_rtx,
10925 GEN_INT (-probe_interval), false);
10926 s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
10927 stack_pointer_rtx,
10928 offset));
10929 emit_cmp_and_jump_insns (stack_pointer_rtx, temp_reg,
10930 GT, NULL_RTX,
10931 Pmode, 1, loop_start_label);
10932
10933 /* Without this make_edges ICEes. */
10934 JUMP_LABEL (get_last_insn ()) = loop_start_label;
10935 LABEL_NUSES (loop_start_label) = 1;
10936
10937 /* That's going to be a NOP since stack pointer and
10938 temp_reg are supposed to be the same here. We just
10939 emit it to set the CFA reg back to r15. */
10940 s390_prologue_plus_offset (stack_pointer_rtx, temp_reg,
10941 const0_rtx, true);
10942 temp_reg_clobbered_p = true;
10943 dump_stack_clash_frame_info (PROBE_LOOP, residual != 0);
10944 }
10945
10946 /* Handle any residual allocation request. */
10947 s390_prologue_plus_offset (stack_pointer_rtx,
10948 stack_pointer_rtx,
10949 GEN_INT (-residual), true);
10950 last_probe_offset += residual;
10951 if (last_probe_offset >= probe_interval)
10952 s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
10953 stack_pointer_rtx,
10954 GEN_INT (residual
10955 - UNITS_PER_LONG)));
10956
10957 return temp_reg_clobbered_p;
10958 }
10959 }
10960
10961 /* Subtract frame size from stack pointer. */
10962 s390_prologue_plus_offset (stack_pointer_rtx,
10963 stack_pointer_rtx,
10964 size, true);
10965
10966 return temp_reg_clobbered_p;
10967 }
10968
10969 /* Expand the prologue into a bunch of separate insns. */
10970
10971 void
s390_emit_prologue(void)10972 s390_emit_prologue (void)
10973 {
10974 rtx insn, addr;
10975 rtx temp_reg;
10976 int i;
10977 int offset;
10978 int next_fpr = 0;
10979
10980 /* Choose best register to use for temp use within prologue.
10981 TPF with profiling must avoid the register 14 - the tracing function
10982 needs the original contents of r14 to be preserved. */
10983
10984 if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM)
10985 && !crtl->is_leaf
10986 && !TARGET_TPF_PROFILING)
10987 temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
10988 else if (flag_split_stack && cfun->stdarg)
10989 temp_reg = gen_rtx_REG (Pmode, 12);
10990 else
10991 temp_reg = gen_rtx_REG (Pmode, 1);
10992
10993 /* When probing for stack-clash mitigation, we have to track the distance
10994 between the stack pointer and closest known reference.
10995
10996 Most of the time we have to make a worst case assumption. The
10997 only exception is when TARGET_BACKCHAIN is active, in which case
10998 we know *sp (offset 0) was written. */
10999 HOST_WIDE_INT probe_interval
11000 = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL);
11001 HOST_WIDE_INT last_probe_offset
11002 = (TARGET_BACKCHAIN
11003 ? (TARGET_PACKED_STACK ? STACK_POINTER_OFFSET - UNITS_PER_LONG : 0)
11004 : probe_interval - (STACK_BOUNDARY / UNITS_PER_WORD));
11005
11006 s390_save_gprs_to_fprs ();
11007
11008 /* Save call saved gprs. */
11009 if (cfun_frame_layout.first_save_gpr != -1)
11010 {
11011 insn = save_gprs (stack_pointer_rtx,
11012 cfun_frame_layout.gprs_offset +
11013 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
11014 - cfun_frame_layout.first_save_gpr_slot),
11015 cfun_frame_layout.first_save_gpr,
11016 cfun_frame_layout.last_save_gpr);
11017
11018 /* This is not 100% correct. If we have more than one register saved,
11019 then LAST_PROBE_OFFSET can move even closer to sp. */
11020 last_probe_offset
11021 = (cfun_frame_layout.gprs_offset +
11022 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
11023 - cfun_frame_layout.first_save_gpr_slot));
11024
11025 emit_insn (insn);
11026 }
11027
11028 /* Dummy insn to mark literal pool slot. */
11029
11030 if (cfun->machine->base_reg)
11031 emit_insn (gen_main_pool (cfun->machine->base_reg));
11032
11033 offset = cfun_frame_layout.f0_offset;
11034
11035 /* Save f0 and f2. */
11036 for (i = FPR0_REGNUM; i <= FPR0_REGNUM + 1; i++)
11037 {
11038 if (cfun_fpr_save_p (i))
11039 {
11040 save_fpr (stack_pointer_rtx, offset, i);
11041 if (offset < last_probe_offset)
11042 last_probe_offset = offset;
11043 offset += 8;
11044 }
11045 else if (!TARGET_PACKED_STACK || cfun->stdarg)
11046 offset += 8;
11047 }
11048
11049 /* Save f4 and f6. */
11050 offset = cfun_frame_layout.f4_offset;
11051 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
11052 {
11053 if (cfun_fpr_save_p (i))
11054 {
11055 insn = save_fpr (stack_pointer_rtx, offset, i);
11056 if (offset < last_probe_offset)
11057 last_probe_offset = offset;
11058 offset += 8;
11059
11060 /* If f4 and f6 are call clobbered they are saved due to
11061 stdargs and therefore are not frame related. */
11062 if (!call_really_used_regs[i])
11063 RTX_FRAME_RELATED_P (insn) = 1;
11064 }
11065 else if (!TARGET_PACKED_STACK || call_really_used_regs[i])
11066 offset += 8;
11067 }
11068
11069 if (TARGET_PACKED_STACK
11070 && cfun_save_high_fprs_p
11071 && cfun_frame_layout.f8_offset + cfun_frame_layout.high_fprs * 8 > 0)
11072 {
11073 offset = (cfun_frame_layout.f8_offset
11074 + (cfun_frame_layout.high_fprs - 1) * 8);
11075
11076 for (i = FPR15_REGNUM; i >= FPR8_REGNUM && offset >= 0; i--)
11077 if (cfun_fpr_save_p (i))
11078 {
11079 insn = save_fpr (stack_pointer_rtx, offset, i);
11080 if (offset < last_probe_offset)
11081 last_probe_offset = offset;
11082
11083 RTX_FRAME_RELATED_P (insn) = 1;
11084 offset -= 8;
11085 }
11086 if (offset >= cfun_frame_layout.f8_offset)
11087 next_fpr = i;
11088 }
11089
11090 if (!TARGET_PACKED_STACK)
11091 next_fpr = cfun_save_high_fprs_p ? FPR15_REGNUM : 0;
11092
11093 if (flag_stack_usage_info)
11094 current_function_static_stack_size = cfun_frame_layout.frame_size;
11095
11096 /* Decrement stack pointer. */
11097
11098 if (cfun_frame_layout.frame_size > 0)
11099 {
11100 rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
11101 rtx_insn *stack_pointer_backup_loc;
11102 bool temp_reg_clobbered_p;
11103
11104 if (s390_stack_size)
11105 {
11106 HOST_WIDE_INT stack_guard;
11107
11108 if (s390_stack_guard)
11109 stack_guard = s390_stack_guard;
11110 else
11111 {
11112 /* If no value for stack guard is provided the smallest power of 2
11113 larger than the current frame size is chosen. */
11114 stack_guard = 1;
11115 while (stack_guard < cfun_frame_layout.frame_size)
11116 stack_guard <<= 1;
11117 }
11118
11119 if (cfun_frame_layout.frame_size >= s390_stack_size)
11120 {
11121 warning (0, "frame size of function %qs is %wd"
11122 " bytes exceeding user provided stack limit of "
11123 "%d bytes. "
11124 "An unconditional trap is added.",
11125 current_function_name(), cfun_frame_layout.frame_size,
11126 s390_stack_size);
11127 emit_insn (gen_trap ());
11128 emit_barrier ();
11129 }
11130 else
11131 {
11132 /* stack_guard has to be smaller than s390_stack_size.
11133 Otherwise we would emit an AND with zero which would
11134 not match the test under mask pattern. */
11135 if (stack_guard >= s390_stack_size)
11136 {
11137 warning (0, "frame size of function %qs is %wd"
11138 " bytes which is more than half the stack size. "
11139 "The dynamic check would not be reliable. "
11140 "No check emitted for this function.",
11141 current_function_name(),
11142 cfun_frame_layout.frame_size);
11143 }
11144 else
11145 {
11146 HOST_WIDE_INT stack_check_mask = ((s390_stack_size - 1)
11147 & ~(stack_guard - 1));
11148
11149 rtx t = gen_rtx_AND (Pmode, stack_pointer_rtx,
11150 GEN_INT (stack_check_mask));
11151 if (TARGET_64BIT)
11152 emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode,
11153 t, const0_rtx),
11154 t, const0_rtx, const0_rtx));
11155 else
11156 emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode,
11157 t, const0_rtx),
11158 t, const0_rtx, const0_rtx));
11159 }
11160 }
11161 }
11162
11163 if (s390_warn_framesize > 0
11164 && cfun_frame_layout.frame_size >= s390_warn_framesize)
11165 warning (0, "frame size of %qs is %wd bytes",
11166 current_function_name (), cfun_frame_layout.frame_size);
11167
11168 if (s390_warn_dynamicstack_p && cfun->calls_alloca)
11169 warning (0, "%qs uses dynamic stack allocation", current_function_name ());
11170
11171 /* Save the location where we could backup the incoming stack
11172 pointer. */
11173 stack_pointer_backup_loc = get_last_insn ();
11174
11175 temp_reg_clobbered_p = allocate_stack_space (frame_off, last_probe_offset,
11176 temp_reg);
11177
11178 if (TARGET_BACKCHAIN || next_fpr)
11179 {
11180 if (temp_reg_clobbered_p)
11181 {
11182 /* allocate_stack_space had to make use of temp_reg and
11183 we need it to hold a backup of the incoming stack
11184 pointer. Calculate back that value from the current
11185 stack pointer. */
11186 s390_prologue_plus_offset (temp_reg, stack_pointer_rtx,
11187 GEN_INT (cfun_frame_layout.frame_size),
11188 false);
11189 }
11190 else
11191 {
11192 /* allocate_stack_space didn't actually required
11193 temp_reg. Insert the stack pointer backup insn
11194 before the stack pointer decrement code - knowing now
11195 that the value will survive. */
11196 emit_insn_after (gen_move_insn (temp_reg, stack_pointer_rtx),
11197 stack_pointer_backup_loc);
11198 }
11199 }
11200
11201 /* Set backchain. */
11202
11203 if (TARGET_BACKCHAIN)
11204 {
11205 if (cfun_frame_layout.backchain_offset)
11206 addr = gen_rtx_MEM (Pmode,
11207 plus_constant (Pmode, stack_pointer_rtx,
11208 cfun_frame_layout.backchain_offset));
11209 else
11210 addr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
11211 set_mem_alias_set (addr, get_frame_alias_set ());
11212 insn = emit_insn (gen_move_insn (addr, temp_reg));
11213 }
11214
11215 /* If we support non-call exceptions (e.g. for Java),
11216 we need to make sure the backchain pointer is set up
11217 before any possibly trapping memory access. */
11218 if (TARGET_BACKCHAIN && cfun->can_throw_non_call_exceptions)
11219 {
11220 addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
11221 emit_clobber (addr);
11222 }
11223 }
11224 else if (flag_stack_clash_protection)
11225 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
11226
11227 /* Save fprs 8 - 15 (64 bit ABI). */
11228
11229 if (cfun_save_high_fprs_p && next_fpr)
11230 {
11231 /* If the stack might be accessed through a different register
11232 we have to make sure that the stack pointer decrement is not
11233 moved below the use of the stack slots. */
11234 s390_emit_stack_tie ();
11235
11236 insn = emit_insn (gen_add2_insn (temp_reg,
11237 GEN_INT (cfun_frame_layout.f8_offset)));
11238
11239 offset = 0;
11240
11241 for (i = FPR8_REGNUM; i <= next_fpr; i++)
11242 if (cfun_fpr_save_p (i))
11243 {
11244 rtx addr = plus_constant (Pmode, stack_pointer_rtx,
11245 cfun_frame_layout.frame_size
11246 + cfun_frame_layout.f8_offset
11247 + offset);
11248
11249 insn = save_fpr (temp_reg, offset, i);
11250 offset += 8;
11251 RTX_FRAME_RELATED_P (insn) = 1;
11252 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11253 gen_rtx_SET (gen_rtx_MEM (DFmode, addr),
11254 gen_rtx_REG (DFmode, i)));
11255 }
11256 }
11257
11258 /* Set frame pointer, if needed. */
11259
11260 if (frame_pointer_needed)
11261 {
11262 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11263 RTX_FRAME_RELATED_P (insn) = 1;
11264 }
11265
11266 /* Set up got pointer, if needed. */
11267
11268 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
11269 {
11270 rtx_insn *insns = s390_load_got ();
11271
11272 for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn))
11273 annotate_constant_pool_refs (insn);
11274
11275 emit_insn (insns);
11276 }
11277
11278 if (TARGET_TPF_PROFILING)
11279 {
11280 /* Generate a BAS instruction to serve as a function
11281 entry intercept to facilitate the use of tracing
11282 algorithms located at the branch target. */
11283 emit_insn (gen_prologue_tpf ());
11284
11285 /* Emit a blockage here so that all code
11286 lies between the profiling mechanisms. */
11287 emit_insn (gen_blockage ());
11288 }
11289 }
11290
11291 /* Expand the epilogue into a bunch of separate insns. */
11292
11293 void
s390_emit_epilogue(bool sibcall)11294 s390_emit_epilogue (bool sibcall)
11295 {
11296 rtx frame_pointer, return_reg = NULL_RTX, cfa_restores = NULL_RTX;
11297 int area_bottom, area_top, offset = 0;
11298 int next_offset;
11299 int i;
11300
11301 if (TARGET_TPF_PROFILING)
11302 {
11303
11304 /* Generate a BAS instruction to serve as a function
11305 entry intercept to facilitate the use of tracing
11306 algorithms located at the branch target. */
11307
11308 /* Emit a blockage here so that all code
11309 lies between the profiling mechanisms. */
11310 emit_insn (gen_blockage ());
11311
11312 emit_insn (gen_epilogue_tpf ());
11313 }
11314
11315 /* Check whether to use frame or stack pointer for restore. */
11316
11317 frame_pointer = (frame_pointer_needed
11318 ? hard_frame_pointer_rtx : stack_pointer_rtx);
11319
11320 s390_frame_area (&area_bottom, &area_top);
11321
11322 /* Check whether we can access the register save area.
11323 If not, increment the frame pointer as required. */
11324
11325 if (area_top <= area_bottom)
11326 {
11327 /* Nothing to restore. */
11328 }
11329 else if (DISP_IN_RANGE (cfun_frame_layout.frame_size + area_bottom)
11330 && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1))
11331 {
11332 /* Area is in range. */
11333 offset = cfun_frame_layout.frame_size;
11334 }
11335 else
11336 {
11337 rtx_insn *insn;
11338 rtx frame_off, cfa;
11339
11340 offset = area_bottom < 0 ? -area_bottom : 0;
11341 frame_off = GEN_INT (cfun_frame_layout.frame_size - offset);
11342
11343 cfa = gen_rtx_SET (frame_pointer,
11344 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11345 if (DISP_IN_RANGE (INTVAL (frame_off)))
11346 {
11347 rtx set;
11348
11349 set = gen_rtx_SET (frame_pointer,
11350 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11351 insn = emit_insn (set);
11352 }
11353 else
11354 {
11355 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
11356 frame_off = force_const_mem (Pmode, frame_off);
11357
11358 insn = emit_insn (gen_add2_insn (frame_pointer, frame_off));
11359 annotate_constant_pool_refs (insn);
11360 }
11361 add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa);
11362 RTX_FRAME_RELATED_P (insn) = 1;
11363 }
11364
11365 /* Restore call saved fprs. */
11366
11367 if (TARGET_64BIT)
11368 {
11369 if (cfun_save_high_fprs_p)
11370 {
11371 next_offset = cfun_frame_layout.f8_offset;
11372 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
11373 {
11374 if (cfun_fpr_save_p (i))
11375 {
11376 restore_fpr (frame_pointer,
11377 offset + next_offset, i);
11378 cfa_restores
11379 = alloc_reg_note (REG_CFA_RESTORE,
11380 gen_rtx_REG (DFmode, i), cfa_restores);
11381 next_offset += 8;
11382 }
11383 }
11384 }
11385
11386 }
11387 else
11388 {
11389 next_offset = cfun_frame_layout.f4_offset;
11390 /* f4, f6 */
11391 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
11392 {
11393 if (cfun_fpr_save_p (i))
11394 {
11395 restore_fpr (frame_pointer,
11396 offset + next_offset, i);
11397 cfa_restores
11398 = alloc_reg_note (REG_CFA_RESTORE,
11399 gen_rtx_REG (DFmode, i), cfa_restores);
11400 next_offset += 8;
11401 }
11402 else if (!TARGET_PACKED_STACK)
11403 next_offset += 8;
11404 }
11405
11406 }
11407
11408 /* Restore call saved gprs. */
11409
11410 if (cfun_frame_layout.first_restore_gpr != -1)
11411 {
11412 rtx insn, addr;
11413 int i;
11414
11415 /* Check for global register and save them
11416 to stack location from where they get restored. */
11417
11418 for (i = cfun_frame_layout.first_restore_gpr;
11419 i <= cfun_frame_layout.last_restore_gpr;
11420 i++)
11421 {
11422 if (global_not_special_regno_p (i))
11423 {
11424 addr = plus_constant (Pmode, frame_pointer,
11425 offset + cfun_frame_layout.gprs_offset
11426 + (i - cfun_frame_layout.first_save_gpr_slot)
11427 * UNITS_PER_LONG);
11428 addr = gen_rtx_MEM (Pmode, addr);
11429 set_mem_alias_set (addr, get_frame_alias_set ());
11430 emit_move_insn (addr, gen_rtx_REG (Pmode, i));
11431 }
11432 else
11433 cfa_restores
11434 = alloc_reg_note (REG_CFA_RESTORE,
11435 gen_rtx_REG (Pmode, i), cfa_restores);
11436 }
11437
11438 /* Fetch return address from stack before load multiple,
11439 this will do good for scheduling.
11440
11441 Only do this if we already decided that r14 needs to be
11442 saved to a stack slot. (And not just because r14 happens to
11443 be in between two GPRs which need saving.) Otherwise it
11444 would be difficult to take that decision back in
11445 s390_optimize_prologue.
11446
11447 This optimization is only helpful on in-order machines. */
11448 if (! sibcall
11449 && cfun_gpr_save_slot (RETURN_REGNUM) == SAVE_SLOT_STACK
11450 && s390_tune <= PROCESSOR_2097_Z10)
11451 {
11452 int return_regnum = find_unused_clobbered_reg();
11453 if (!return_regnum
11454 || (TARGET_INDIRECT_BRANCH_NOBP_RET_OPTION
11455 && !TARGET_CPU_Z10
11456 && return_regnum == INDIRECT_BRANCH_THUNK_REGNUM))
11457 {
11458 gcc_assert (INDIRECT_BRANCH_THUNK_REGNUM != 4);
11459 return_regnum = 4;
11460 }
11461 return_reg = gen_rtx_REG (Pmode, return_regnum);
11462
11463 addr = plus_constant (Pmode, frame_pointer,
11464 offset + cfun_frame_layout.gprs_offset
11465 + (RETURN_REGNUM
11466 - cfun_frame_layout.first_save_gpr_slot)
11467 * UNITS_PER_LONG);
11468 addr = gen_rtx_MEM (Pmode, addr);
11469 set_mem_alias_set (addr, get_frame_alias_set ());
11470 emit_move_insn (return_reg, addr);
11471
11472 /* Once we did that optimization we have to make sure
11473 s390_optimize_prologue does not try to remove the store
11474 of r14 since we will not be able to find the load issued
11475 here. */
11476 cfun_frame_layout.save_return_addr_p = true;
11477 }
11478
11479 insn = restore_gprs (frame_pointer,
11480 offset + cfun_frame_layout.gprs_offset
11481 + (cfun_frame_layout.first_restore_gpr
11482 - cfun_frame_layout.first_save_gpr_slot)
11483 * UNITS_PER_LONG,
11484 cfun_frame_layout.first_restore_gpr,
11485 cfun_frame_layout.last_restore_gpr);
11486 insn = emit_insn (insn);
11487 REG_NOTES (insn) = cfa_restores;
11488 add_reg_note (insn, REG_CFA_DEF_CFA,
11489 plus_constant (Pmode, stack_pointer_rtx,
11490 STACK_POINTER_OFFSET));
11491 RTX_FRAME_RELATED_P (insn) = 1;
11492 }
11493
11494 s390_restore_gprs_from_fprs ();
11495
11496 if (! sibcall)
11497 {
11498 if (!return_reg && !s390_can_use_return_insn ())
11499 /* We planned to emit (return), be we are not allowed to. */
11500 return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
11501
11502 if (return_reg)
11503 /* Emit (return) and (use). */
11504 emit_jump_insn (gen_return_use (return_reg));
11505 else
11506 /* The fact that RETURN_REGNUM is used is already reflected by
11507 EPILOGUE_USES. Emit plain (return). */
11508 emit_jump_insn (gen_return ());
11509 }
11510 }
11511
11512 /* Implement TARGET_SET_UP_BY_PROLOGUE. */
11513
11514 static void
s300_set_up_by_prologue(hard_reg_set_container * regs)11515 s300_set_up_by_prologue (hard_reg_set_container *regs)
11516 {
11517 if (cfun->machine->base_reg
11518 && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
11519 SET_HARD_REG_BIT (regs->set, REGNO (cfun->machine->base_reg));
11520 }
11521
11522 /* -fsplit-stack support. */
11523
11524 /* A SYMBOL_REF for __morestack. */
11525 static GTY(()) rtx morestack_ref;
11526
11527 /* When using -fsplit-stack, the allocation routines set a field in
11528 the TCB to the bottom of the stack plus this much space, measured
11529 in bytes. */
11530
11531 #define SPLIT_STACK_AVAILABLE 1024
11532
11533 /* Emit -fsplit-stack prologue, which goes before the regular function
11534 prologue. */
11535
11536 void
s390_expand_split_stack_prologue(void)11537 s390_expand_split_stack_prologue (void)
11538 {
11539 rtx r1, guard, cc = NULL;
11540 rtx_insn *insn;
11541 /* Offset from thread pointer to __private_ss. */
11542 int psso = TARGET_64BIT ? 0x38 : 0x20;
11543 /* Pointer size in bytes. */
11544 /* Frame size and argument size - the two parameters to __morestack. */
11545 HOST_WIDE_INT frame_size = cfun_frame_layout.frame_size;
11546 /* Align argument size to 8 bytes - simplifies __morestack code. */
11547 HOST_WIDE_INT args_size = crtl->args.size >= 0
11548 ? ((crtl->args.size + 7) & ~7)
11549 : 0;
11550 /* Label to be called by __morestack. */
11551 rtx_code_label *call_done = NULL;
11552 rtx_code_label *parm_base = NULL;
11553 rtx tmp;
11554
11555 gcc_assert (flag_split_stack && reload_completed);
11556
11557 r1 = gen_rtx_REG (Pmode, 1);
11558
11559 /* If no stack frame will be allocated, don't do anything. */
11560 if (!frame_size)
11561 {
11562 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11563 {
11564 /* If va_start is used, just use r15. */
11565 emit_move_insn (r1,
11566 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11567 GEN_INT (STACK_POINTER_OFFSET)));
11568
11569 }
11570 return;
11571 }
11572
11573 if (morestack_ref == NULL_RTX)
11574 {
11575 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11576 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
11577 | SYMBOL_FLAG_FUNCTION);
11578 }
11579
11580 if (CONST_OK_FOR_K (frame_size) || CONST_OK_FOR_Op (frame_size))
11581 {
11582 /* If frame_size will fit in an add instruction, do a stack space
11583 check, and only call __morestack if there's not enough space. */
11584
11585 /* Get thread pointer. r1 is the only register we can always destroy - r0
11586 could contain a static chain (and cannot be used to address memory
11587 anyway), r2-r6 can contain parameters, and r6-r15 are callee-saved. */
11588 emit_move_insn (r1, gen_rtx_REG (Pmode, TP_REGNUM));
11589 /* Aim at __private_ss. */
11590 guard = gen_rtx_MEM (Pmode, plus_constant (Pmode, r1, psso));
11591
11592 /* If less that 1kiB used, skip addition and compare directly with
11593 __private_ss. */
11594 if (frame_size > SPLIT_STACK_AVAILABLE)
11595 {
11596 emit_move_insn (r1, guard);
11597 if (TARGET_64BIT)
11598 emit_insn (gen_adddi3 (r1, r1, GEN_INT (frame_size)));
11599 else
11600 emit_insn (gen_addsi3 (r1, r1, GEN_INT (frame_size)));
11601 guard = r1;
11602 }
11603
11604 /* Compare the (maybe adjusted) guard with the stack pointer. */
11605 cc = s390_emit_compare (LT, stack_pointer_rtx, guard);
11606 }
11607
11608 call_done = gen_label_rtx ();
11609 parm_base = gen_label_rtx ();
11610
11611 /* Emit the parameter block. */
11612 tmp = gen_split_stack_data (parm_base, call_done,
11613 GEN_INT (frame_size),
11614 GEN_INT (args_size));
11615 insn = emit_insn (tmp);
11616 add_reg_note (insn, REG_LABEL_OPERAND, call_done);
11617 LABEL_NUSES (call_done)++;
11618 add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
11619 LABEL_NUSES (parm_base)++;
11620
11621 /* %r1 = litbase. */
11622 insn = emit_move_insn (r1, gen_rtx_LABEL_REF (VOIDmode, parm_base));
11623 add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
11624 LABEL_NUSES (parm_base)++;
11625
11626 /* Now, we need to call __morestack. It has very special calling
11627 conventions: it preserves param/return/static chain registers for
11628 calling main function body, and looks for its own parameters at %r1. */
11629
11630 if (cc != NULL)
11631 {
11632 tmp = gen_split_stack_cond_call (morestack_ref, cc, call_done);
11633
11634 insn = emit_jump_insn (tmp);
11635 JUMP_LABEL (insn) = call_done;
11636 LABEL_NUSES (call_done)++;
11637
11638 /* Mark the jump as very unlikely to be taken. */
11639 add_reg_br_prob_note (insn,
11640 profile_probability::very_unlikely ());
11641
11642 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11643 {
11644 /* If va_start is used, and __morestack was not called, just use
11645 r15. */
11646 emit_move_insn (r1,
11647 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11648 GEN_INT (STACK_POINTER_OFFSET)));
11649 }
11650 }
11651 else
11652 {
11653 tmp = gen_split_stack_call (morestack_ref, call_done);
11654 insn = emit_jump_insn (tmp);
11655 JUMP_LABEL (insn) = call_done;
11656 LABEL_NUSES (call_done)++;
11657 emit_barrier ();
11658 }
11659
11660 /* __morestack will call us here. */
11661
11662 emit_label (call_done);
11663 }
11664
11665 /* We may have to tell the dataflow pass that the split stack prologue
11666 is initializing a register. */
11667
11668 static void
s390_live_on_entry(bitmap regs)11669 s390_live_on_entry (bitmap regs)
11670 {
11671 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11672 {
11673 gcc_assert (flag_split_stack);
11674 bitmap_set_bit (regs, 1);
11675 }
11676 }
11677
11678 /* Return true if the function can use simple_return to return outside
11679 of a shrink-wrapped region. At present shrink-wrapping is supported
11680 in all cases. */
11681
11682 bool
s390_can_use_simple_return_insn(void)11683 s390_can_use_simple_return_insn (void)
11684 {
11685 return true;
11686 }
11687
11688 /* Return true if the epilogue is guaranteed to contain only a return
11689 instruction and if a direct return can therefore be used instead.
11690 One of the main advantages of using direct return instructions
11691 is that we can then use conditional returns. */
11692
11693 bool
s390_can_use_return_insn(void)11694 s390_can_use_return_insn (void)
11695 {
11696 int i;
11697
11698 if (!reload_completed)
11699 return false;
11700
11701 if (crtl->profile)
11702 return false;
11703
11704 if (TARGET_TPF_PROFILING)
11705 return false;
11706
11707 for (i = 0; i < 16; i++)
11708 if (cfun_gpr_save_slot (i) != SAVE_SLOT_NONE)
11709 return false;
11710
11711 /* For 31 bit this is not covered by the frame_size check below
11712 since f4, f6 are saved in the register save area without needing
11713 additional stack space. */
11714 if (!TARGET_64BIT
11715 && (cfun_fpr_save_p (FPR4_REGNUM) || cfun_fpr_save_p (FPR6_REGNUM)))
11716 return false;
11717
11718 if (cfun->machine->base_reg
11719 && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
11720 return false;
11721
11722 return cfun_frame_layout.frame_size == 0;
11723 }
11724
11725 /* The VX ABI differs for vararg functions. Therefore we need the
11726 prototype of the callee to be available when passing vector type
11727 values. */
11728 static const char *
s390_invalid_arg_for_unprototyped_fn(const_tree typelist,const_tree funcdecl,const_tree val)11729 s390_invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
11730 {
11731 return ((TARGET_VX_ABI
11732 && typelist == 0
11733 && VECTOR_TYPE_P (TREE_TYPE (val))
11734 && (funcdecl == NULL_TREE
11735 || (TREE_CODE (funcdecl) == FUNCTION_DECL
11736 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
11737 ? N_("vector argument passed to unprototyped function")
11738 : NULL);
11739 }
11740
11741
11742 /* Return the size in bytes of a function argument of
11743 type TYPE and/or mode MODE. At least one of TYPE or
11744 MODE must be specified. */
11745
11746 static int
s390_function_arg_size(machine_mode mode,const_tree type)11747 s390_function_arg_size (machine_mode mode, const_tree type)
11748 {
11749 if (type)
11750 return int_size_in_bytes (type);
11751
11752 /* No type info available for some library calls ... */
11753 if (mode != BLKmode)
11754 return GET_MODE_SIZE (mode);
11755
11756 /* If we have neither type nor mode, abort */
11757 gcc_unreachable ();
11758 }
11759
11760 /* Return true if a function argument of type TYPE and mode MODE
11761 is to be passed in a vector register, if available. */
11762
11763 bool
s390_function_arg_vector(machine_mode mode,const_tree type)11764 s390_function_arg_vector (machine_mode mode, const_tree type)
11765 {
11766 if (!TARGET_VX_ABI)
11767 return false;
11768
11769 if (s390_function_arg_size (mode, type) > 16)
11770 return false;
11771
11772 /* No type info available for some library calls ... */
11773 if (!type)
11774 return VECTOR_MODE_P (mode);
11775
11776 /* The ABI says that record types with a single member are treated
11777 just like that member would be. */
11778 while (TREE_CODE (type) == RECORD_TYPE)
11779 {
11780 tree field, single = NULL_TREE;
11781
11782 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
11783 {
11784 if (TREE_CODE (field) != FIELD_DECL)
11785 continue;
11786
11787 if (single == NULL_TREE)
11788 single = TREE_TYPE (field);
11789 else
11790 return false;
11791 }
11792
11793 if (single == NULL_TREE)
11794 return false;
11795 else
11796 {
11797 /* If the field declaration adds extra byte due to
11798 e.g. padding this is not accepted as vector type. */
11799 if (int_size_in_bytes (single) <= 0
11800 || int_size_in_bytes (single) != int_size_in_bytes (type))
11801 return false;
11802 type = single;
11803 }
11804 }
11805
11806 return VECTOR_TYPE_P (type);
11807 }
11808
11809 /* Return true if a function argument of type TYPE and mode MODE
11810 is to be passed in a floating-point register, if available. */
11811
11812 static bool
s390_function_arg_float(machine_mode mode,const_tree type)11813 s390_function_arg_float (machine_mode mode, const_tree type)
11814 {
11815 if (s390_function_arg_size (mode, type) > 8)
11816 return false;
11817
11818 /* Soft-float changes the ABI: no floating-point registers are used. */
11819 if (TARGET_SOFT_FLOAT)
11820 return false;
11821
11822 /* No type info available for some library calls ... */
11823 if (!type)
11824 return mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode;
11825
11826 /* The ABI says that record types with a single member are treated
11827 just like that member would be. */
11828 while (TREE_CODE (type) == RECORD_TYPE)
11829 {
11830 tree field, single = NULL_TREE;
11831
11832 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
11833 {
11834 if (TREE_CODE (field) != FIELD_DECL)
11835 continue;
11836
11837 if (single == NULL_TREE)
11838 single = TREE_TYPE (field);
11839 else
11840 return false;
11841 }
11842
11843 if (single == NULL_TREE)
11844 return false;
11845 else
11846 type = single;
11847 }
11848
11849 return TREE_CODE (type) == REAL_TYPE;
11850 }
11851
11852 /* Return true if a function argument of type TYPE and mode MODE
11853 is to be passed in an integer register, or a pair of integer
11854 registers, if available. */
11855
11856 static bool
s390_function_arg_integer(machine_mode mode,const_tree type)11857 s390_function_arg_integer (machine_mode mode, const_tree type)
11858 {
11859 int size = s390_function_arg_size (mode, type);
11860 if (size > 8)
11861 return false;
11862
11863 /* No type info available for some library calls ... */
11864 if (!type)
11865 return GET_MODE_CLASS (mode) == MODE_INT
11866 || (TARGET_SOFT_FLOAT && SCALAR_FLOAT_MODE_P (mode));
11867
11868 /* We accept small integral (and similar) types. */
11869 if (INTEGRAL_TYPE_P (type)
11870 || POINTER_TYPE_P (type)
11871 || TREE_CODE (type) == NULLPTR_TYPE
11872 || TREE_CODE (type) == OFFSET_TYPE
11873 || (TARGET_SOFT_FLOAT && TREE_CODE (type) == REAL_TYPE))
11874 return true;
11875
11876 /* We also accept structs of size 1, 2, 4, 8 that are not
11877 passed in floating-point registers. */
11878 if (AGGREGATE_TYPE_P (type)
11879 && exact_log2 (size) >= 0
11880 && !s390_function_arg_float (mode, type))
11881 return true;
11882
11883 return false;
11884 }
11885
11886 /* Return 1 if a function argument of type TYPE and mode MODE
11887 is to be passed by reference. The ABI specifies that only
11888 structures of size 1, 2, 4, or 8 bytes are passed by value,
11889 all other structures (and complex numbers) are passed by
11890 reference. */
11891
11892 static bool
s390_pass_by_reference(cumulative_args_t ca ATTRIBUTE_UNUSED,machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)11893 s390_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
11894 machine_mode mode, const_tree type,
11895 bool named ATTRIBUTE_UNUSED)
11896 {
11897 int size = s390_function_arg_size (mode, type);
11898
11899 if (s390_function_arg_vector (mode, type))
11900 return false;
11901
11902 if (size > 8)
11903 return true;
11904
11905 if (type)
11906 {
11907 if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0)
11908 return true;
11909
11910 if (TREE_CODE (type) == COMPLEX_TYPE
11911 || TREE_CODE (type) == VECTOR_TYPE)
11912 return true;
11913 }
11914
11915 return false;
11916 }
11917
11918 /* Update the data in CUM to advance over an argument of mode MODE and
11919 data type TYPE. (TYPE is null for libcalls where that information
11920 may not be available.). The boolean NAMED specifies whether the
11921 argument is a named argument (as opposed to an unnamed argument
11922 matching an ellipsis). */
11923
11924 static void
s390_function_arg_advance(cumulative_args_t cum_v,machine_mode mode,const_tree type,bool named)11925 s390_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
11926 const_tree type, bool named)
11927 {
11928 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11929
11930 if (s390_function_arg_vector (mode, type))
11931 {
11932 /* We are called for unnamed vector stdarg arguments which are
11933 passed on the stack. In this case this hook does not have to
11934 do anything since stack arguments are tracked by common
11935 code. */
11936 if (!named)
11937 return;
11938 cum->vrs += 1;
11939 }
11940 else if (s390_function_arg_float (mode, type))
11941 {
11942 cum->fprs += 1;
11943 }
11944 else if (s390_function_arg_integer (mode, type))
11945 {
11946 int size = s390_function_arg_size (mode, type);
11947 cum->gprs += ((size + UNITS_PER_LONG - 1) / UNITS_PER_LONG);
11948 }
11949 else
11950 gcc_unreachable ();
11951 }
11952
11953 /* Define where to put the arguments to a function.
11954 Value is zero to push the argument on the stack,
11955 or a hard register in which to store the argument.
11956
11957 MODE is the argument's machine mode.
11958 TYPE is the data type of the argument (as a tree).
11959 This is null for libcalls where that information may
11960 not be available.
11961 CUM is a variable of type CUMULATIVE_ARGS which gives info about
11962 the preceding args and about the function being called.
11963 NAMED is nonzero if this argument is a named parameter
11964 (otherwise it is an extra parameter matching an ellipsis).
11965
11966 On S/390, we use general purpose registers 2 through 6 to
11967 pass integer, pointer, and certain structure arguments, and
11968 floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
11969 to pass floating point arguments. All remaining arguments
11970 are pushed to the stack. */
11971
11972 static rtx
s390_function_arg(cumulative_args_t cum_v,machine_mode mode,const_tree type,bool named)11973 s390_function_arg (cumulative_args_t cum_v, machine_mode mode,
11974 const_tree type, bool named)
11975 {
11976 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11977
11978 if (!named)
11979 s390_check_type_for_vector_abi (type, true, false);
11980
11981 if (s390_function_arg_vector (mode, type))
11982 {
11983 /* Vector arguments being part of the ellipsis are passed on the
11984 stack. */
11985 if (!named || (cum->vrs + 1 > VEC_ARG_NUM_REG))
11986 return NULL_RTX;
11987
11988 return gen_rtx_REG (mode, cum->vrs + FIRST_VEC_ARG_REGNO);
11989 }
11990 else if (s390_function_arg_float (mode, type))
11991 {
11992 if (cum->fprs + 1 > FP_ARG_NUM_REG)
11993 return NULL_RTX;
11994 else
11995 return gen_rtx_REG (mode, cum->fprs + 16);
11996 }
11997 else if (s390_function_arg_integer (mode, type))
11998 {
11999 int size = s390_function_arg_size (mode, type);
12000 int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
12001
12002 if (cum->gprs + n_gprs > GP_ARG_NUM_REG)
12003 return NULL_RTX;
12004 else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG)
12005 return gen_rtx_REG (mode, cum->gprs + 2);
12006 else if (n_gprs == 2)
12007 {
12008 rtvec p = rtvec_alloc (2);
12009
12010 RTVEC_ELT (p, 0)
12011 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 2),
12012 const0_rtx);
12013 RTVEC_ELT (p, 1)
12014 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 3),
12015 GEN_INT (4));
12016
12017 return gen_rtx_PARALLEL (mode, p);
12018 }
12019 }
12020
12021 /* After the real arguments, expand_call calls us once again
12022 with a void_type_node type. Whatever we return here is
12023 passed as operand 2 to the call expanders.
12024
12025 We don't need this feature ... */
12026 else if (type == void_type_node)
12027 return const0_rtx;
12028
12029 gcc_unreachable ();
12030 }
12031
12032 /* Implement TARGET_FUNCTION_ARG_BOUNDARY. Vector arguments are
12033 left-justified when placed on the stack during parameter passing. */
12034
12035 static pad_direction
s390_function_arg_padding(machine_mode mode,const_tree type)12036 s390_function_arg_padding (machine_mode mode, const_tree type)
12037 {
12038 if (s390_function_arg_vector (mode, type))
12039 return PAD_UPWARD;
12040
12041 return default_function_arg_padding (mode, type);
12042 }
12043
12044 /* Return true if return values of type TYPE should be returned
12045 in a memory buffer whose address is passed by the caller as
12046 hidden first argument. */
12047
12048 static bool
s390_return_in_memory(const_tree type,const_tree fundecl ATTRIBUTE_UNUSED)12049 s390_return_in_memory (const_tree type, const_tree fundecl ATTRIBUTE_UNUSED)
12050 {
12051 /* We accept small integral (and similar) types. */
12052 if (INTEGRAL_TYPE_P (type)
12053 || POINTER_TYPE_P (type)
12054 || TREE_CODE (type) == OFFSET_TYPE
12055 || TREE_CODE (type) == REAL_TYPE)
12056 return int_size_in_bytes (type) > 8;
12057
12058 /* vector types which fit into a VR. */
12059 if (TARGET_VX_ABI
12060 && VECTOR_TYPE_P (type)
12061 && int_size_in_bytes (type) <= 16)
12062 return false;
12063
12064 /* Aggregates and similar constructs are always returned
12065 in memory. */
12066 if (AGGREGATE_TYPE_P (type)
12067 || TREE_CODE (type) == COMPLEX_TYPE
12068 || VECTOR_TYPE_P (type))
12069 return true;
12070
12071 /* ??? We get called on all sorts of random stuff from
12072 aggregate_value_p. We can't abort, but it's not clear
12073 what's safe to return. Pretend it's a struct I guess. */
12074 return true;
12075 }
12076
12077 /* Function arguments and return values are promoted to word size. */
12078
12079 static machine_mode
s390_promote_function_mode(const_tree type,machine_mode mode,int * punsignedp,const_tree fntype ATTRIBUTE_UNUSED,int for_return ATTRIBUTE_UNUSED)12080 s390_promote_function_mode (const_tree type, machine_mode mode,
12081 int *punsignedp,
12082 const_tree fntype ATTRIBUTE_UNUSED,
12083 int for_return ATTRIBUTE_UNUSED)
12084 {
12085 if (INTEGRAL_MODE_P (mode)
12086 && GET_MODE_SIZE (mode) < UNITS_PER_LONG)
12087 {
12088 if (type != NULL_TREE && POINTER_TYPE_P (type))
12089 *punsignedp = POINTERS_EXTEND_UNSIGNED;
12090 return Pmode;
12091 }
12092
12093 return mode;
12094 }
12095
12096 /* Define where to return a (scalar) value of type RET_TYPE.
12097 If RET_TYPE is null, define where to return a (scalar)
12098 value of mode MODE from a libcall. */
12099
12100 static rtx
s390_function_and_libcall_value(machine_mode mode,const_tree ret_type,const_tree fntype_or_decl,bool outgoing ATTRIBUTE_UNUSED)12101 s390_function_and_libcall_value (machine_mode mode,
12102 const_tree ret_type,
12103 const_tree fntype_or_decl,
12104 bool outgoing ATTRIBUTE_UNUSED)
12105 {
12106 /* For vector return types it is important to use the RET_TYPE
12107 argument whenever available since the middle-end might have
12108 changed the mode to a scalar mode. */
12109 bool vector_ret_type_p = ((ret_type && VECTOR_TYPE_P (ret_type))
12110 || (!ret_type && VECTOR_MODE_P (mode)));
12111
12112 /* For normal functions perform the promotion as
12113 promote_function_mode would do. */
12114 if (ret_type)
12115 {
12116 int unsignedp = TYPE_UNSIGNED (ret_type);
12117 mode = promote_function_mode (ret_type, mode, &unsignedp,
12118 fntype_or_decl, 1);
12119 }
12120
12121 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
12122 || SCALAR_FLOAT_MODE_P (mode)
12123 || (TARGET_VX_ABI && vector_ret_type_p));
12124 gcc_assert (GET_MODE_SIZE (mode) <= (TARGET_VX_ABI ? 16 : 8));
12125
12126 if (TARGET_VX_ABI && vector_ret_type_p)
12127 return gen_rtx_REG (mode, FIRST_VEC_ARG_REGNO);
12128 else if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
12129 return gen_rtx_REG (mode, 16);
12130 else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG
12131 || UNITS_PER_LONG == UNITS_PER_WORD)
12132 return gen_rtx_REG (mode, 2);
12133 else if (GET_MODE_SIZE (mode) == 2 * UNITS_PER_LONG)
12134 {
12135 /* This case is triggered when returning a 64 bit value with
12136 -m31 -mzarch. Although the value would fit into a single
12137 register it has to be forced into a 32 bit register pair in
12138 order to match the ABI. */
12139 rtvec p = rtvec_alloc (2);
12140
12141 RTVEC_ELT (p, 0)
12142 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 2), const0_rtx);
12143 RTVEC_ELT (p, 1)
12144 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 3), GEN_INT (4));
12145
12146 return gen_rtx_PARALLEL (mode, p);
12147 }
12148
12149 gcc_unreachable ();
12150 }
12151
12152 /* Define where to return a scalar return value of type RET_TYPE. */
12153
12154 static rtx
s390_function_value(const_tree ret_type,const_tree fn_decl_or_type,bool outgoing)12155 s390_function_value (const_tree ret_type, const_tree fn_decl_or_type,
12156 bool outgoing)
12157 {
12158 return s390_function_and_libcall_value (TYPE_MODE (ret_type), ret_type,
12159 fn_decl_or_type, outgoing);
12160 }
12161
12162 /* Define where to return a scalar libcall return value of mode
12163 MODE. */
12164
12165 static rtx
s390_libcall_value(machine_mode mode,const_rtx fun ATTRIBUTE_UNUSED)12166 s390_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
12167 {
12168 return s390_function_and_libcall_value (mode, NULL_TREE,
12169 NULL_TREE, true);
12170 }
12171
12172
12173 /* Create and return the va_list datatype.
12174
12175 On S/390, va_list is an array type equivalent to
12176
12177 typedef struct __va_list_tag
12178 {
12179 long __gpr;
12180 long __fpr;
12181 void *__overflow_arg_area;
12182 void *__reg_save_area;
12183 } va_list[1];
12184
12185 where __gpr and __fpr hold the number of general purpose
12186 or floating point arguments used up to now, respectively,
12187 __overflow_arg_area points to the stack location of the
12188 next argument passed on the stack, and __reg_save_area
12189 always points to the start of the register area in the
12190 call frame of the current function. The function prologue
12191 saves all registers used for argument passing into this
12192 area if the function uses variable arguments. */
12193
12194 static tree
s390_build_builtin_va_list(void)12195 s390_build_builtin_va_list (void)
12196 {
12197 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
12198
12199 record = lang_hooks.types.make_type (RECORD_TYPE);
12200
12201 type_decl =
12202 build_decl (BUILTINS_LOCATION,
12203 TYPE_DECL, get_identifier ("__va_list_tag"), record);
12204
12205 f_gpr = build_decl (BUILTINS_LOCATION,
12206 FIELD_DECL, get_identifier ("__gpr"),
12207 long_integer_type_node);
12208 f_fpr = build_decl (BUILTINS_LOCATION,
12209 FIELD_DECL, get_identifier ("__fpr"),
12210 long_integer_type_node);
12211 f_ovf = build_decl (BUILTINS_LOCATION,
12212 FIELD_DECL, get_identifier ("__overflow_arg_area"),
12213 ptr_type_node);
12214 f_sav = build_decl (BUILTINS_LOCATION,
12215 FIELD_DECL, get_identifier ("__reg_save_area"),
12216 ptr_type_node);
12217
12218 va_list_gpr_counter_field = f_gpr;
12219 va_list_fpr_counter_field = f_fpr;
12220
12221 DECL_FIELD_CONTEXT (f_gpr) = record;
12222 DECL_FIELD_CONTEXT (f_fpr) = record;
12223 DECL_FIELD_CONTEXT (f_ovf) = record;
12224 DECL_FIELD_CONTEXT (f_sav) = record;
12225
12226 TYPE_STUB_DECL (record) = type_decl;
12227 TYPE_NAME (record) = type_decl;
12228 TYPE_FIELDS (record) = f_gpr;
12229 DECL_CHAIN (f_gpr) = f_fpr;
12230 DECL_CHAIN (f_fpr) = f_ovf;
12231 DECL_CHAIN (f_ovf) = f_sav;
12232
12233 layout_type (record);
12234
12235 /* The correct type is an array type of one element. */
12236 return build_array_type (record, build_index_type (size_zero_node));
12237 }
12238
12239 /* Implement va_start by filling the va_list structure VALIST.
12240 STDARG_P is always true, and ignored.
12241 NEXTARG points to the first anonymous stack argument.
12242
12243 The following global variables are used to initialize
12244 the va_list structure:
12245
12246 crtl->args.info:
12247 holds number of gprs and fprs used for named arguments.
12248 crtl->args.arg_offset_rtx:
12249 holds the offset of the first anonymous stack argument
12250 (relative to the virtual arg pointer). */
12251
12252 static void
s390_va_start(tree valist,rtx nextarg ATTRIBUTE_UNUSED)12253 s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
12254 {
12255 HOST_WIDE_INT n_gpr, n_fpr;
12256 int off;
12257 tree f_gpr, f_fpr, f_ovf, f_sav;
12258 tree gpr, fpr, ovf, sav, t;
12259
12260 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12261 f_fpr = DECL_CHAIN (f_gpr);
12262 f_ovf = DECL_CHAIN (f_fpr);
12263 f_sav = DECL_CHAIN (f_ovf);
12264
12265 valist = build_simple_mem_ref (valist);
12266 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12267 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
12268 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
12269 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
12270
12271 /* Count number of gp and fp argument registers used. */
12272
12273 n_gpr = crtl->args.info.gprs;
12274 n_fpr = crtl->args.info.fprs;
12275
12276 if (cfun->va_list_gpr_size)
12277 {
12278 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
12279 build_int_cst (NULL_TREE, n_gpr));
12280 TREE_SIDE_EFFECTS (t) = 1;
12281 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12282 }
12283
12284 if (cfun->va_list_fpr_size)
12285 {
12286 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
12287 build_int_cst (NULL_TREE, n_fpr));
12288 TREE_SIDE_EFFECTS (t) = 1;
12289 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12290 }
12291
12292 if (flag_split_stack
12293 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
12294 == NULL)
12295 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
12296 {
12297 rtx reg;
12298 rtx_insn *seq;
12299
12300 reg = gen_reg_rtx (Pmode);
12301 cfun->machine->split_stack_varargs_pointer = reg;
12302
12303 start_sequence ();
12304 emit_move_insn (reg, gen_rtx_REG (Pmode, 1));
12305 seq = get_insns ();
12306 end_sequence ();
12307
12308 push_topmost_sequence ();
12309 emit_insn_after (seq, entry_of_function ());
12310 pop_topmost_sequence ();
12311 }
12312
12313 /* Find the overflow area.
12314 FIXME: This currently is too pessimistic when the vector ABI is
12315 enabled. In that case we *always* set up the overflow area
12316 pointer. */
12317 if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG
12318 || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG
12319 || TARGET_VX_ABI)
12320 {
12321 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
12322 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
12323 else
12324 t = make_tree (TREE_TYPE (ovf), cfun->machine->split_stack_varargs_pointer);
12325
12326 off = INTVAL (crtl->args.arg_offset_rtx);
12327 off = off < 0 ? 0 : off;
12328 if (TARGET_DEBUG_ARG)
12329 fprintf (stderr, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
12330 (int)n_gpr, (int)n_fpr, off);
12331
12332 t = fold_build_pointer_plus_hwi (t, off);
12333
12334 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
12335 TREE_SIDE_EFFECTS (t) = 1;
12336 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12337 }
12338
12339 /* Find the register save area. */
12340 if ((cfun->va_list_gpr_size && n_gpr < GP_ARG_NUM_REG)
12341 || (cfun->va_list_fpr_size && n_fpr < FP_ARG_NUM_REG))
12342 {
12343 t = make_tree (TREE_TYPE (sav), return_address_pointer_rtx);
12344 t = fold_build_pointer_plus_hwi (t, -RETURN_REGNUM * UNITS_PER_LONG);
12345
12346 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
12347 TREE_SIDE_EFFECTS (t) = 1;
12348 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12349 }
12350 }
12351
12352 /* Implement va_arg by updating the va_list structure
12353 VALIST as required to retrieve an argument of type
12354 TYPE, and returning that argument.
12355
12356 Generates code equivalent to:
12357
12358 if (integral value) {
12359 if (size <= 4 && args.gpr < 5 ||
12360 size > 4 && args.gpr < 4 )
12361 ret = args.reg_save_area[args.gpr+8]
12362 else
12363 ret = *args.overflow_arg_area++;
12364 } else if (vector value) {
12365 ret = *args.overflow_arg_area;
12366 args.overflow_arg_area += size / 8;
12367 } else if (float value) {
12368 if (args.fgpr < 2)
12369 ret = args.reg_save_area[args.fpr+64]
12370 else
12371 ret = *args.overflow_arg_area++;
12372 } else if (aggregate value) {
12373 if (args.gpr < 5)
12374 ret = *args.reg_save_area[args.gpr]
12375 else
12376 ret = **args.overflow_arg_area++;
12377 } */
12378
12379 static tree
s390_gimplify_va_arg(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p ATTRIBUTE_UNUSED)12380 s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
12381 gimple_seq *post_p ATTRIBUTE_UNUSED)
12382 {
12383 tree f_gpr, f_fpr, f_ovf, f_sav;
12384 tree gpr, fpr, ovf, sav, reg, t, u;
12385 int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg;
12386 tree lab_false, lab_over = NULL_TREE;
12387 tree addr = create_tmp_var (ptr_type_node, "addr");
12388 bool left_align_p; /* How a value < UNITS_PER_LONG is aligned within
12389 a stack slot. */
12390
12391 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12392 f_fpr = DECL_CHAIN (f_gpr);
12393 f_ovf = DECL_CHAIN (f_fpr);
12394 f_sav = DECL_CHAIN (f_ovf);
12395
12396 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12397 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
12398 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
12399
12400 /* The tree for args* cannot be shared between gpr/fpr and ovf since
12401 both appear on a lhs. */
12402 valist = unshare_expr (valist);
12403 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
12404
12405 size = int_size_in_bytes (type);
12406
12407 s390_check_type_for_vector_abi (type, true, false);
12408
12409 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
12410 {
12411 if (TARGET_DEBUG_ARG)
12412 {
12413 fprintf (stderr, "va_arg: aggregate type");
12414 debug_tree (type);
12415 }
12416
12417 /* Aggregates are passed by reference. */
12418 indirect_p = 1;
12419 reg = gpr;
12420 n_reg = 1;
12421
12422 /* kernel stack layout on 31 bit: It is assumed here that no padding
12423 will be added by s390_frame_info because for va_args always an even
12424 number of gprs has to be saved r15-r2 = 14 regs. */
12425 sav_ofs = 2 * UNITS_PER_LONG;
12426 sav_scale = UNITS_PER_LONG;
12427 size = UNITS_PER_LONG;
12428 max_reg = GP_ARG_NUM_REG - n_reg;
12429 left_align_p = false;
12430 }
12431 else if (s390_function_arg_vector (TYPE_MODE (type), type))
12432 {
12433 if (TARGET_DEBUG_ARG)
12434 {
12435 fprintf (stderr, "va_arg: vector type");
12436 debug_tree (type);
12437 }
12438
12439 indirect_p = 0;
12440 reg = NULL_TREE;
12441 n_reg = 0;
12442 sav_ofs = 0;
12443 sav_scale = 8;
12444 max_reg = 0;
12445 left_align_p = true;
12446 }
12447 else if (s390_function_arg_float (TYPE_MODE (type), type))
12448 {
12449 if (TARGET_DEBUG_ARG)
12450 {
12451 fprintf (stderr, "va_arg: float type");
12452 debug_tree (type);
12453 }
12454
12455 /* FP args go in FP registers, if present. */
12456 indirect_p = 0;
12457 reg = fpr;
12458 n_reg = 1;
12459 sav_ofs = 16 * UNITS_PER_LONG;
12460 sav_scale = 8;
12461 max_reg = FP_ARG_NUM_REG - n_reg;
12462 left_align_p = false;
12463 }
12464 else
12465 {
12466 if (TARGET_DEBUG_ARG)
12467 {
12468 fprintf (stderr, "va_arg: other type");
12469 debug_tree (type);
12470 }
12471
12472 /* Otherwise into GP registers. */
12473 indirect_p = 0;
12474 reg = gpr;
12475 n_reg = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
12476
12477 /* kernel stack layout on 31 bit: It is assumed here that no padding
12478 will be added by s390_frame_info because for va_args always an even
12479 number of gprs has to be saved r15-r2 = 14 regs. */
12480 sav_ofs = 2 * UNITS_PER_LONG;
12481
12482 if (size < UNITS_PER_LONG)
12483 sav_ofs += UNITS_PER_LONG - size;
12484
12485 sav_scale = UNITS_PER_LONG;
12486 max_reg = GP_ARG_NUM_REG - n_reg;
12487 left_align_p = false;
12488 }
12489
12490 /* Pull the value out of the saved registers ... */
12491
12492 if (reg != NULL_TREE)
12493 {
12494 /*
12495 if (reg > ((typeof (reg))max_reg))
12496 goto lab_false;
12497
12498 addr = sav + sav_ofs + reg * save_scale;
12499
12500 goto lab_over;
12501
12502 lab_false:
12503 */
12504
12505 lab_false = create_artificial_label (UNKNOWN_LOCATION);
12506 lab_over = create_artificial_label (UNKNOWN_LOCATION);
12507
12508 t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
12509 t = build2 (GT_EXPR, boolean_type_node, reg, t);
12510 u = build1 (GOTO_EXPR, void_type_node, lab_false);
12511 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
12512 gimplify_and_add (t, pre_p);
12513
12514 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
12515 u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
12516 fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
12517 t = fold_build_pointer_plus (t, u);
12518
12519 gimplify_assign (addr, t, pre_p);
12520
12521 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
12522
12523 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
12524 }
12525
12526 /* ... Otherwise out of the overflow area. */
12527
12528 t = ovf;
12529 if (size < UNITS_PER_LONG && !left_align_p)
12530 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size);
12531
12532 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
12533
12534 gimplify_assign (addr, t, pre_p);
12535
12536 if (size < UNITS_PER_LONG && left_align_p)
12537 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG);
12538 else
12539 t = fold_build_pointer_plus_hwi (t, size);
12540
12541 gimplify_assign (ovf, t, pre_p);
12542
12543 if (reg != NULL_TREE)
12544 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
12545
12546
12547 /* Increment register save count. */
12548
12549 if (n_reg > 0)
12550 {
12551 u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
12552 fold_convert (TREE_TYPE (reg), size_int (n_reg)));
12553 gimplify_and_add (u, pre_p);
12554 }
12555
12556 if (indirect_p)
12557 {
12558 t = build_pointer_type_for_mode (build_pointer_type (type),
12559 ptr_mode, true);
12560 addr = fold_convert (t, addr);
12561 addr = build_va_arg_indirect_ref (addr);
12562 }
12563 else
12564 {
12565 t = build_pointer_type_for_mode (type, ptr_mode, true);
12566 addr = fold_convert (t, addr);
12567 }
12568
12569 return build_va_arg_indirect_ref (addr);
12570 }
12571
12572 /* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX)
12573 expanders.
12574 DEST - Register location where CC will be stored.
12575 TDB - Pointer to a 256 byte area where to store the transaction.
12576 diagnostic block. NULL if TDB is not needed.
12577 RETRY - Retry count value. If non-NULL a retry loop for CC2
12578 is emitted
12579 CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
12580 of the tbegin instruction pattern. */
12581
12582 void
s390_expand_tbegin(rtx dest,rtx tdb,rtx retry,bool clobber_fprs_p)12583 s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
12584 {
12585 rtx retry_plus_two = gen_reg_rtx (SImode);
12586 rtx retry_reg = gen_reg_rtx (SImode);
12587 rtx_code_label *retry_label = NULL;
12588
12589 if (retry != NULL_RTX)
12590 {
12591 emit_move_insn (retry_reg, retry);
12592 emit_insn (gen_addsi3 (retry_plus_two, retry_reg, const2_rtx));
12593 emit_insn (gen_addsi3 (retry_reg, retry_reg, const1_rtx));
12594 retry_label = gen_label_rtx ();
12595 emit_label (retry_label);
12596 }
12597
12598 if (clobber_fprs_p)
12599 {
12600 if (TARGET_VX)
12601 emit_insn (gen_tbegin_1_z13 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12602 tdb));
12603 else
12604 emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12605 tdb));
12606 }
12607 else
12608 emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12609 tdb));
12610
12611 emit_move_insn (dest, gen_rtx_UNSPEC (SImode,
12612 gen_rtvec (1, gen_rtx_REG (CCRAWmode,
12613 CC_REGNUM)),
12614 UNSPEC_CC_TO_INT));
12615 if (retry != NULL_RTX)
12616 {
12617 const int CC0 = 1 << 3;
12618 const int CC1 = 1 << 2;
12619 const int CC3 = 1 << 0;
12620 rtx jump;
12621 rtx count = gen_reg_rtx (SImode);
12622 rtx_code_label *leave_label = gen_label_rtx ();
12623
12624 /* Exit for success and permanent failures. */
12625 jump = s390_emit_jump (leave_label,
12626 gen_rtx_EQ (VOIDmode,
12627 gen_rtx_REG (CCRAWmode, CC_REGNUM),
12628 gen_rtx_CONST_INT (VOIDmode, CC0 | CC1 | CC3)));
12629 LABEL_NUSES (leave_label) = 1;
12630
12631 /* CC2 - transient failure. Perform retry with ppa. */
12632 emit_move_insn (count, retry_plus_two);
12633 emit_insn (gen_subsi3 (count, count, retry_reg));
12634 emit_insn (gen_tx_assist (count));
12635 jump = emit_jump_insn (gen_doloop_si64 (retry_label,
12636 retry_reg,
12637 retry_reg));
12638 JUMP_LABEL (jump) = retry_label;
12639 LABEL_NUSES (retry_label) = 1;
12640 emit_label (leave_label);
12641 }
12642 }
12643
12644
12645 /* Return the decl for the target specific builtin with the function
12646 code FCODE. */
12647
12648 static tree
s390_builtin_decl(unsigned fcode,bool initialized_p ATTRIBUTE_UNUSED)12649 s390_builtin_decl (unsigned fcode, bool initialized_p ATTRIBUTE_UNUSED)
12650 {
12651 if (fcode >= S390_BUILTIN_MAX)
12652 return error_mark_node;
12653
12654 return s390_builtin_decls[fcode];
12655 }
12656
12657 /* We call mcount before the function prologue. So a profiled leaf
12658 function should stay a leaf function. */
12659
12660 static bool
s390_keep_leaf_when_profiled()12661 s390_keep_leaf_when_profiled ()
12662 {
12663 return true;
12664 }
12665
12666 /* Output assembly code for the trampoline template to
12667 stdio stream FILE.
12668
12669 On S/390, we use gpr 1 internally in the trampoline code;
12670 gpr 0 is used to hold the static chain. */
12671
12672 static void
s390_asm_trampoline_template(FILE * file)12673 s390_asm_trampoline_template (FILE *file)
12674 {
12675 rtx op[2];
12676 op[0] = gen_rtx_REG (Pmode, 0);
12677 op[1] = gen_rtx_REG (Pmode, 1);
12678
12679 if (TARGET_64BIT)
12680 {
12681 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
12682 output_asm_insn ("lmg\t%0,%1,14(%1)", op); /* 6 byte */
12683 output_asm_insn ("br\t%1", op); /* 2 byte */
12684 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10));
12685 }
12686 else
12687 {
12688 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
12689 output_asm_insn ("lm\t%0,%1,6(%1)", op); /* 4 byte */
12690 output_asm_insn ("br\t%1", op); /* 2 byte */
12691 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8));
12692 }
12693 }
12694
12695 /* Emit RTL insns to initialize the variable parts of a trampoline.
12696 FNADDR is an RTX for the address of the function's pure code.
12697 CXT is an RTX for the static chain value for the function. */
12698
12699 static void
s390_trampoline_init(rtx m_tramp,tree fndecl,rtx cxt)12700 s390_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
12701 {
12702 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
12703 rtx mem;
12704
12705 emit_block_move (m_tramp, assemble_trampoline_template (),
12706 GEN_INT (2 * UNITS_PER_LONG), BLOCK_OP_NORMAL);
12707
12708 mem = adjust_address (m_tramp, Pmode, 2 * UNITS_PER_LONG);
12709 emit_move_insn (mem, cxt);
12710 mem = adjust_address (m_tramp, Pmode, 3 * UNITS_PER_LONG);
12711 emit_move_insn (mem, fnaddr);
12712 }
12713
12714 static void
output_asm_nops(const char * user,int hw)12715 output_asm_nops (const char *user, int hw)
12716 {
12717 asm_fprintf (asm_out_file, "\t# NOPs for %s (%d halfwords)\n", user, hw);
12718 while (hw > 0)
12719 {
12720 if (hw >= 3)
12721 {
12722 output_asm_insn ("brcl\t0,0", NULL);
12723 hw -= 3;
12724 }
12725 else if (hw >= 2)
12726 {
12727 output_asm_insn ("bc\t0,0", NULL);
12728 hw -= 2;
12729 }
12730 else
12731 {
12732 output_asm_insn ("bcr\t0,0", NULL);
12733 hw -= 1;
12734 }
12735 }
12736 }
12737
12738 /* Output assembler code to FILE to increment profiler label # LABELNO
12739 for profiling a function entry. */
12740
12741 void
s390_function_profiler(FILE * file,int labelno)12742 s390_function_profiler (FILE *file, int labelno)
12743 {
12744 rtx op[8];
12745
12746 char label[128];
12747 ASM_GENERATE_INTERNAL_LABEL (label, "LP", labelno);
12748
12749 fprintf (file, "# function profiler \n");
12750
12751 op[0] = gen_rtx_REG (Pmode, RETURN_REGNUM);
12752 op[1] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
12753 op[1] = gen_rtx_MEM (Pmode, plus_constant (Pmode, op[1], UNITS_PER_LONG));
12754 op[7] = GEN_INT (UNITS_PER_LONG);
12755
12756 op[2] = gen_rtx_REG (Pmode, 1);
12757 op[3] = gen_rtx_SYMBOL_REF (Pmode, label);
12758 SYMBOL_REF_FLAGS (op[3]) = SYMBOL_FLAG_LOCAL;
12759
12760 op[4] = gen_rtx_SYMBOL_REF (Pmode, flag_fentry ? "__fentry__" : "_mcount");
12761 if (flag_pic)
12762 {
12763 op[4] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[4]), UNSPEC_PLT);
12764 op[4] = gen_rtx_CONST (Pmode, op[4]);
12765 }
12766
12767 if (flag_record_mcount)
12768 fprintf (file, "1:\n");
12769
12770 if (flag_fentry)
12771 {
12772 if (flag_nop_mcount)
12773 output_asm_nops ("-mnop-mcount", /* brasl */ 3);
12774 else if (cfun->static_chain_decl)
12775 warning (OPT_Wcannot_profile, "nested functions cannot be profiled "
12776 "with %<-mfentry%> on s390");
12777 else
12778 output_asm_insn ("brasl\t0,%4", op);
12779 }
12780 else if (TARGET_64BIT)
12781 {
12782 if (flag_nop_mcount)
12783 output_asm_nops ("-mnop-mcount", /* stg */ 3 + /* larl */ 3 +
12784 /* brasl */ 3 + /* lg */ 3);
12785 else
12786 {
12787 output_asm_insn ("stg\t%0,%1", op);
12788 if (flag_dwarf2_cfi_asm)
12789 output_asm_insn (".cfi_rel_offset\t%0,%7", op);
12790 output_asm_insn ("larl\t%2,%3", op);
12791 output_asm_insn ("brasl\t%0,%4", op);
12792 output_asm_insn ("lg\t%0,%1", op);
12793 if (flag_dwarf2_cfi_asm)
12794 output_asm_insn (".cfi_restore\t%0", op);
12795 }
12796 }
12797 else
12798 {
12799 if (flag_nop_mcount)
12800 output_asm_nops ("-mnop-mcount", /* st */ 2 + /* larl */ 3 +
12801 /* brasl */ 3 + /* l */ 2);
12802 else
12803 {
12804 output_asm_insn ("st\t%0,%1", op);
12805 if (flag_dwarf2_cfi_asm)
12806 output_asm_insn (".cfi_rel_offset\t%0,%7", op);
12807 output_asm_insn ("larl\t%2,%3", op);
12808 output_asm_insn ("brasl\t%0,%4", op);
12809 output_asm_insn ("l\t%0,%1", op);
12810 if (flag_dwarf2_cfi_asm)
12811 output_asm_insn (".cfi_restore\t%0", op);
12812 }
12813 }
12814
12815 if (flag_record_mcount)
12816 {
12817 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
12818 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
12819 fprintf (file, "\t.previous\n");
12820 }
12821 }
12822
12823 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
12824 into its SYMBOL_REF_FLAGS. */
12825
12826 static void
s390_encode_section_info(tree decl,rtx rtl,int first)12827 s390_encode_section_info (tree decl, rtx rtl, int first)
12828 {
12829 default_encode_section_info (decl, rtl, first);
12830
12831 if (TREE_CODE (decl) == VAR_DECL)
12832 {
12833 /* Store the alignment to be able to check if we can use
12834 a larl/load-relative instruction. We only handle the cases
12835 that can go wrong (i.e. no FUNC_DECLs). */
12836 if (DECL_ALIGN (decl) == 0 || DECL_ALIGN (decl) % 16)
12837 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
12838 else if (DECL_ALIGN (decl) % 32)
12839 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
12840 else if (DECL_ALIGN (decl) % 64)
12841 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
12842 }
12843
12844 /* Literal pool references don't have a decl so they are handled
12845 differently here. We rely on the information in the MEM_ALIGN
12846 entry to decide upon the alignment. */
12847 if (MEM_P (rtl)
12848 && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF
12849 && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0)))
12850 {
12851 if (MEM_ALIGN (rtl) == 0 || MEM_ALIGN (rtl) % 16)
12852 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
12853 else if (MEM_ALIGN (rtl) % 32)
12854 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
12855 else if (MEM_ALIGN (rtl) % 64)
12856 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
12857 }
12858 }
12859
12860 /* Output thunk to FILE that implements a C++ virtual function call (with
12861 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
12862 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
12863 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
12864 relative to the resulting this pointer. */
12865
12866 static void
s390_output_mi_thunk(FILE * file,tree thunk ATTRIBUTE_UNUSED,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)12867 s390_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
12868 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12869 tree function)
12870 {
12871 rtx op[10];
12872 int nonlocal = 0;
12873
12874 /* Make sure unwind info is emitted for the thunk if needed. */
12875 final_start_function (emit_barrier (), file, 1);
12876
12877 /* Operand 0 is the target function. */
12878 op[0] = XEXP (DECL_RTL (function), 0);
12879 if (flag_pic && !SYMBOL_REF_LOCAL_P (op[0]))
12880 {
12881 nonlocal = 1;
12882 op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]),
12883 TARGET_64BIT ? UNSPEC_PLT : UNSPEC_GOT);
12884 op[0] = gen_rtx_CONST (Pmode, op[0]);
12885 }
12886
12887 /* Operand 1 is the 'this' pointer. */
12888 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12889 op[1] = gen_rtx_REG (Pmode, 3);
12890 else
12891 op[1] = gen_rtx_REG (Pmode, 2);
12892
12893 /* Operand 2 is the delta. */
12894 op[2] = GEN_INT (delta);
12895
12896 /* Operand 3 is the vcall_offset. */
12897 op[3] = GEN_INT (vcall_offset);
12898
12899 /* Operand 4 is the temporary register. */
12900 op[4] = gen_rtx_REG (Pmode, 1);
12901
12902 /* Operands 5 to 8 can be used as labels. */
12903 op[5] = NULL_RTX;
12904 op[6] = NULL_RTX;
12905 op[7] = NULL_RTX;
12906 op[8] = NULL_RTX;
12907
12908 /* Operand 9 can be used for temporary register. */
12909 op[9] = NULL_RTX;
12910
12911 /* Generate code. */
12912 if (TARGET_64BIT)
12913 {
12914 /* Setup literal pool pointer if required. */
12915 if ((!DISP_IN_RANGE (delta)
12916 && !CONST_OK_FOR_K (delta)
12917 && !CONST_OK_FOR_Os (delta))
12918 || (!DISP_IN_RANGE (vcall_offset)
12919 && !CONST_OK_FOR_K (vcall_offset)
12920 && !CONST_OK_FOR_Os (vcall_offset)))
12921 {
12922 op[5] = gen_label_rtx ();
12923 output_asm_insn ("larl\t%4,%5", op);
12924 }
12925
12926 /* Add DELTA to this pointer. */
12927 if (delta)
12928 {
12929 if (CONST_OK_FOR_J (delta))
12930 output_asm_insn ("la\t%1,%2(%1)", op);
12931 else if (DISP_IN_RANGE (delta))
12932 output_asm_insn ("lay\t%1,%2(%1)", op);
12933 else if (CONST_OK_FOR_K (delta))
12934 output_asm_insn ("aghi\t%1,%2", op);
12935 else if (CONST_OK_FOR_Os (delta))
12936 output_asm_insn ("agfi\t%1,%2", op);
12937 else
12938 {
12939 op[6] = gen_label_rtx ();
12940 output_asm_insn ("agf\t%1,%6-%5(%4)", op);
12941 }
12942 }
12943
12944 /* Perform vcall adjustment. */
12945 if (vcall_offset)
12946 {
12947 if (DISP_IN_RANGE (vcall_offset))
12948 {
12949 output_asm_insn ("lg\t%4,0(%1)", op);
12950 output_asm_insn ("ag\t%1,%3(%4)", op);
12951 }
12952 else if (CONST_OK_FOR_K (vcall_offset))
12953 {
12954 output_asm_insn ("lghi\t%4,%3", op);
12955 output_asm_insn ("ag\t%4,0(%1)", op);
12956 output_asm_insn ("ag\t%1,0(%4)", op);
12957 }
12958 else if (CONST_OK_FOR_Os (vcall_offset))
12959 {
12960 output_asm_insn ("lgfi\t%4,%3", op);
12961 output_asm_insn ("ag\t%4,0(%1)", op);
12962 output_asm_insn ("ag\t%1,0(%4)", op);
12963 }
12964 else
12965 {
12966 op[7] = gen_label_rtx ();
12967 output_asm_insn ("llgf\t%4,%7-%5(%4)", op);
12968 output_asm_insn ("ag\t%4,0(%1)", op);
12969 output_asm_insn ("ag\t%1,0(%4)", op);
12970 }
12971 }
12972
12973 /* Jump to target. */
12974 output_asm_insn ("jg\t%0", op);
12975
12976 /* Output literal pool if required. */
12977 if (op[5])
12978 {
12979 output_asm_insn (".align\t4", op);
12980 targetm.asm_out.internal_label (file, "L",
12981 CODE_LABEL_NUMBER (op[5]));
12982 }
12983 if (op[6])
12984 {
12985 targetm.asm_out.internal_label (file, "L",
12986 CODE_LABEL_NUMBER (op[6]));
12987 output_asm_insn (".long\t%2", op);
12988 }
12989 if (op[7])
12990 {
12991 targetm.asm_out.internal_label (file, "L",
12992 CODE_LABEL_NUMBER (op[7]));
12993 output_asm_insn (".long\t%3", op);
12994 }
12995 }
12996 else
12997 {
12998 /* Setup base pointer if required. */
12999 if (!vcall_offset
13000 || (!DISP_IN_RANGE (delta)
13001 && !CONST_OK_FOR_K (delta)
13002 && !CONST_OK_FOR_Os (delta))
13003 || (!DISP_IN_RANGE (delta)
13004 && !CONST_OK_FOR_K (vcall_offset)
13005 && !CONST_OK_FOR_Os (vcall_offset)))
13006 {
13007 op[5] = gen_label_rtx ();
13008 output_asm_insn ("basr\t%4,0", op);
13009 targetm.asm_out.internal_label (file, "L",
13010 CODE_LABEL_NUMBER (op[5]));
13011 }
13012
13013 /* Add DELTA to this pointer. */
13014 if (delta)
13015 {
13016 if (CONST_OK_FOR_J (delta))
13017 output_asm_insn ("la\t%1,%2(%1)", op);
13018 else if (DISP_IN_RANGE (delta))
13019 output_asm_insn ("lay\t%1,%2(%1)", op);
13020 else if (CONST_OK_FOR_K (delta))
13021 output_asm_insn ("ahi\t%1,%2", op);
13022 else if (CONST_OK_FOR_Os (delta))
13023 output_asm_insn ("afi\t%1,%2", op);
13024 else
13025 {
13026 op[6] = gen_label_rtx ();
13027 output_asm_insn ("a\t%1,%6-%5(%4)", op);
13028 }
13029 }
13030
13031 /* Perform vcall adjustment. */
13032 if (vcall_offset)
13033 {
13034 if (CONST_OK_FOR_J (vcall_offset))
13035 {
13036 output_asm_insn ("l\t%4,0(%1)", op);
13037 output_asm_insn ("a\t%1,%3(%4)", op);
13038 }
13039 else if (DISP_IN_RANGE (vcall_offset))
13040 {
13041 output_asm_insn ("l\t%4,0(%1)", op);
13042 output_asm_insn ("ay\t%1,%3(%4)", op);
13043 }
13044 else if (CONST_OK_FOR_K (vcall_offset))
13045 {
13046 output_asm_insn ("lhi\t%4,%3", op);
13047 output_asm_insn ("a\t%4,0(%1)", op);
13048 output_asm_insn ("a\t%1,0(%4)", op);
13049 }
13050 else if (CONST_OK_FOR_Os (vcall_offset))
13051 {
13052 output_asm_insn ("iilf\t%4,%3", op);
13053 output_asm_insn ("a\t%4,0(%1)", op);
13054 output_asm_insn ("a\t%1,0(%4)", op);
13055 }
13056 else
13057 {
13058 op[7] = gen_label_rtx ();
13059 output_asm_insn ("l\t%4,%7-%5(%4)", op);
13060 output_asm_insn ("a\t%4,0(%1)", op);
13061 output_asm_insn ("a\t%1,0(%4)", op);
13062 }
13063
13064 /* We had to clobber the base pointer register.
13065 Re-setup the base pointer (with a different base). */
13066 op[5] = gen_label_rtx ();
13067 output_asm_insn ("basr\t%4,0", op);
13068 targetm.asm_out.internal_label (file, "L",
13069 CODE_LABEL_NUMBER (op[5]));
13070 }
13071
13072 /* Jump to target. */
13073 op[8] = gen_label_rtx ();
13074
13075 if (!flag_pic)
13076 output_asm_insn ("l\t%4,%8-%5(%4)", op);
13077 else if (!nonlocal)
13078 output_asm_insn ("a\t%4,%8-%5(%4)", op);
13079 /* We cannot call through .plt, since .plt requires %r12 loaded. */
13080 else if (flag_pic == 1)
13081 {
13082 output_asm_insn ("a\t%4,%8-%5(%4)", op);
13083 output_asm_insn ("l\t%4,%0(%4)", op);
13084 }
13085 else if (flag_pic == 2)
13086 {
13087 op[9] = gen_rtx_REG (Pmode, 0);
13088 output_asm_insn ("l\t%9,%8-4-%5(%4)", op);
13089 output_asm_insn ("a\t%4,%8-%5(%4)", op);
13090 output_asm_insn ("ar\t%4,%9", op);
13091 output_asm_insn ("l\t%4,0(%4)", op);
13092 }
13093
13094 output_asm_insn ("br\t%4", op);
13095
13096 /* Output literal pool. */
13097 output_asm_insn (".align\t4", op);
13098
13099 if (nonlocal && flag_pic == 2)
13100 output_asm_insn (".long\t%0", op);
13101 if (nonlocal)
13102 {
13103 op[0] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
13104 SYMBOL_REF_FLAGS (op[0]) = SYMBOL_FLAG_LOCAL;
13105 }
13106
13107 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[8]));
13108 if (!flag_pic)
13109 output_asm_insn (".long\t%0", op);
13110 else
13111 output_asm_insn (".long\t%0-%5", op);
13112
13113 if (op[6])
13114 {
13115 targetm.asm_out.internal_label (file, "L",
13116 CODE_LABEL_NUMBER (op[6]));
13117 output_asm_insn (".long\t%2", op);
13118 }
13119 if (op[7])
13120 {
13121 targetm.asm_out.internal_label (file, "L",
13122 CODE_LABEL_NUMBER (op[7]));
13123 output_asm_insn (".long\t%3", op);
13124 }
13125 }
13126 final_end_function ();
13127 }
13128
13129 /* Output either an indirect jump or a an indirect call
13130 (RETURN_ADDR_REGNO != INVALID_REGNUM) with target register REGNO
13131 using a branch trampoline disabling branch target prediction. */
13132
13133 void
s390_indirect_branch_via_thunk(unsigned int regno,unsigned int return_addr_regno,rtx comparison_operator,enum s390_indirect_branch_type type)13134 s390_indirect_branch_via_thunk (unsigned int regno,
13135 unsigned int return_addr_regno,
13136 rtx comparison_operator,
13137 enum s390_indirect_branch_type type)
13138 {
13139 enum s390_indirect_branch_option option;
13140
13141 if (type == s390_indirect_branch_type_return)
13142 {
13143 if (s390_return_addr_from_memory ())
13144 option = s390_opt_function_return_mem;
13145 else
13146 option = s390_opt_function_return_reg;
13147 }
13148 else if (type == s390_indirect_branch_type_jump)
13149 option = s390_opt_indirect_branch_jump;
13150 else if (type == s390_indirect_branch_type_call)
13151 option = s390_opt_indirect_branch_call;
13152 else
13153 gcc_unreachable ();
13154
13155 if (TARGET_INDIRECT_BRANCH_TABLE)
13156 {
13157 char label[32];
13158
13159 ASM_GENERATE_INTERNAL_LABEL (label,
13160 indirect_branch_table_label[option],
13161 indirect_branch_table_label_no[option]++);
13162 ASM_OUTPUT_LABEL (asm_out_file, label);
13163 }
13164
13165 if (return_addr_regno != INVALID_REGNUM)
13166 {
13167 gcc_assert (comparison_operator == NULL_RTX);
13168 fprintf (asm_out_file, " \tbrasl\t%%r%d,", return_addr_regno);
13169 }
13170 else
13171 {
13172 fputs (" \tjg", asm_out_file);
13173 if (comparison_operator != NULL_RTX)
13174 print_operand (asm_out_file, comparison_operator, 'C');
13175
13176 fputs ("\t", asm_out_file);
13177 }
13178
13179 if (TARGET_CPU_Z10)
13180 fprintf (asm_out_file,
13181 TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL "\n",
13182 regno);
13183 else
13184 fprintf (asm_out_file,
13185 TARGET_INDIRECT_BRANCH_THUNK_NAME_EX "\n",
13186 INDIRECT_BRANCH_THUNK_REGNUM, regno);
13187
13188 if ((option == s390_opt_indirect_branch_jump
13189 && cfun->machine->indirect_branch_jump == indirect_branch_thunk)
13190 || (option == s390_opt_indirect_branch_call
13191 && cfun->machine->indirect_branch_call == indirect_branch_thunk)
13192 || (option == s390_opt_function_return_reg
13193 && cfun->machine->function_return_reg == indirect_branch_thunk)
13194 || (option == s390_opt_function_return_mem
13195 && cfun->machine->function_return_mem == indirect_branch_thunk))
13196 {
13197 if (TARGET_CPU_Z10)
13198 indirect_branch_z10thunk_mask |= (1 << regno);
13199 else
13200 indirect_branch_prez10thunk_mask |= (1 << regno);
13201 }
13202 }
13203
13204 /* Output an inline thunk for indirect jumps. EXECUTE_TARGET can
13205 either be an address register or a label pointing to the location
13206 of the jump instruction. */
13207
13208 void
s390_indirect_branch_via_inline_thunk(rtx execute_target)13209 s390_indirect_branch_via_inline_thunk (rtx execute_target)
13210 {
13211 if (TARGET_INDIRECT_BRANCH_TABLE)
13212 {
13213 char label[32];
13214
13215 ASM_GENERATE_INTERNAL_LABEL (label,
13216 indirect_branch_table_label[s390_opt_indirect_branch_jump],
13217 indirect_branch_table_label_no[s390_opt_indirect_branch_jump]++);
13218 ASM_OUTPUT_LABEL (asm_out_file, label);
13219 }
13220
13221 if (!TARGET_ZARCH)
13222 fputs ("\t.machinemode zarch\n", asm_out_file);
13223
13224 if (REG_P (execute_target))
13225 fprintf (asm_out_file, "\tex\t%%r0,0(%%r%d)\n", REGNO (execute_target));
13226 else
13227 output_asm_insn ("\texrl\t%%r0,%0", &execute_target);
13228
13229 if (!TARGET_ZARCH)
13230 fputs ("\t.machinemode esa\n", asm_out_file);
13231
13232 fputs ("0:\tj\t0b\n", asm_out_file);
13233 }
13234
13235 static bool
s390_valid_pointer_mode(scalar_int_mode mode)13236 s390_valid_pointer_mode (scalar_int_mode mode)
13237 {
13238 return (mode == SImode || (TARGET_64BIT && mode == DImode));
13239 }
13240
13241 /* Checks whether the given CALL_EXPR would use a caller
13242 saved register. This is used to decide whether sibling call
13243 optimization could be performed on the respective function
13244 call. */
13245
13246 static bool
s390_call_saved_register_used(tree call_expr)13247 s390_call_saved_register_used (tree call_expr)
13248 {
13249 CUMULATIVE_ARGS cum_v;
13250 cumulative_args_t cum;
13251 tree parameter;
13252 machine_mode mode;
13253 tree type;
13254 rtx parm_rtx;
13255 int reg, i;
13256
13257 INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
13258 cum = pack_cumulative_args (&cum_v);
13259
13260 for (i = 0; i < call_expr_nargs (call_expr); i++)
13261 {
13262 parameter = CALL_EXPR_ARG (call_expr, i);
13263 gcc_assert (parameter);
13264
13265 /* For an undeclared variable passed as parameter we will get
13266 an ERROR_MARK node here. */
13267 if (TREE_CODE (parameter) == ERROR_MARK)
13268 return true;
13269
13270 type = TREE_TYPE (parameter);
13271 gcc_assert (type);
13272
13273 mode = TYPE_MODE (type);
13274 gcc_assert (mode);
13275
13276 /* We assume that in the target function all parameters are
13277 named. This only has an impact on vector argument register
13278 usage none of which is call-saved. */
13279 if (pass_by_reference (&cum_v, mode, type, true))
13280 {
13281 mode = Pmode;
13282 type = build_pointer_type (type);
13283 }
13284
13285 parm_rtx = s390_function_arg (cum, mode, type, true);
13286
13287 s390_function_arg_advance (cum, mode, type, true);
13288
13289 if (!parm_rtx)
13290 continue;
13291
13292 if (REG_P (parm_rtx))
13293 {
13294 for (reg = 0; reg < REG_NREGS (parm_rtx); reg++)
13295 if (!call_used_regs[reg + REGNO (parm_rtx)])
13296 return true;
13297 }
13298
13299 if (GET_CODE (parm_rtx) == PARALLEL)
13300 {
13301 int i;
13302
13303 for (i = 0; i < XVECLEN (parm_rtx, 0); i++)
13304 {
13305 rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0);
13306
13307 gcc_assert (REG_P (r));
13308
13309 for (reg = 0; reg < REG_NREGS (r); reg++)
13310 if (!call_used_regs[reg + REGNO (r)])
13311 return true;
13312 }
13313 }
13314
13315 }
13316 return false;
13317 }
13318
13319 /* Return true if the given call expression can be
13320 turned into a sibling call.
13321 DECL holds the declaration of the function to be called whereas
13322 EXP is the call expression itself. */
13323
13324 static bool
s390_function_ok_for_sibcall(tree decl,tree exp)13325 s390_function_ok_for_sibcall (tree decl, tree exp)
13326 {
13327 /* The TPF epilogue uses register 1. */
13328 if (TARGET_TPF_PROFILING)
13329 return false;
13330
13331 /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
13332 which would have to be restored before the sibcall. */
13333 if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl))
13334 return false;
13335
13336 /* The thunks for indirect branches require r1 if no exrl is
13337 available. r1 might not be available when doing a sibling
13338 call. */
13339 if (TARGET_INDIRECT_BRANCH_NOBP_CALL
13340 && !TARGET_CPU_Z10
13341 && !decl)
13342 return false;
13343
13344 /* Register 6 on s390 is available as an argument register but unfortunately
13345 "caller saved". This makes functions needing this register for arguments
13346 not suitable for sibcalls. */
13347 return !s390_call_saved_register_used (exp);
13348 }
13349
13350 /* Return the fixed registers used for condition codes. */
13351
13352 static bool
s390_fixed_condition_code_regs(unsigned int * p1,unsigned int * p2)13353 s390_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
13354 {
13355 *p1 = CC_REGNUM;
13356 *p2 = INVALID_REGNUM;
13357
13358 return true;
13359 }
13360
13361 /* This function is used by the call expanders of the machine description.
13362 It emits the call insn itself together with the necessary operations
13363 to adjust the target address and returns the emitted insn.
13364 ADDR_LOCATION is the target address rtx
13365 TLS_CALL the location of the thread-local symbol
13366 RESULT_REG the register where the result of the call should be stored
13367 RETADDR_REG the register where the return address should be stored
13368 If this parameter is NULL_RTX the call is considered
13369 to be a sibling call. */
13370
13371 rtx_insn *
s390_emit_call(rtx addr_location,rtx tls_call,rtx result_reg,rtx retaddr_reg)13372 s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg,
13373 rtx retaddr_reg)
13374 {
13375 bool plt_call = false;
13376 rtx_insn *insn;
13377 rtx vec[4] = { NULL_RTX };
13378 int elts = 0;
13379 rtx *call = &vec[0];
13380 rtx *clobber_ret_reg = &vec[1];
13381 rtx *use = &vec[2];
13382 rtx *clobber_thunk_reg = &vec[3];
13383 int i;
13384
13385 /* Direct function calls need special treatment. */
13386 if (GET_CODE (addr_location) == SYMBOL_REF)
13387 {
13388 /* When calling a global routine in PIC mode, we must
13389 replace the symbol itself with the PLT stub. */
13390 if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location))
13391 {
13392 if (TARGET_64BIT || retaddr_reg != NULL_RTX)
13393 {
13394 addr_location = gen_rtx_UNSPEC (Pmode,
13395 gen_rtvec (1, addr_location),
13396 UNSPEC_PLT);
13397 addr_location = gen_rtx_CONST (Pmode, addr_location);
13398 plt_call = true;
13399 }
13400 else
13401 /* For -fpic code the PLT entries might use r12 which is
13402 call-saved. Therefore we cannot do a sibcall when
13403 calling directly using a symbol ref. When reaching
13404 this point we decided (in s390_function_ok_for_sibcall)
13405 to do a sibcall for a function pointer but one of the
13406 optimizers was able to get rid of the function pointer
13407 by propagating the symbol ref into the call. This
13408 optimization is illegal for S/390 so we turn the direct
13409 call into a indirect call again. */
13410 addr_location = force_reg (Pmode, addr_location);
13411 }
13412 }
13413
13414 /* If it is already an indirect call or the code above moved the
13415 SYMBOL_REF to somewhere else make sure the address can be found in
13416 register 1. */
13417 if (retaddr_reg == NULL_RTX
13418 && GET_CODE (addr_location) != SYMBOL_REF
13419 && !plt_call)
13420 {
13421 emit_move_insn (gen_rtx_REG (Pmode, SIBCALL_REGNUM), addr_location);
13422 addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM);
13423 }
13424
13425 if (TARGET_INDIRECT_BRANCH_NOBP_CALL
13426 && GET_CODE (addr_location) != SYMBOL_REF
13427 && !plt_call)
13428 {
13429 /* Indirect branch thunks require the target to be a single GPR. */
13430 addr_location = force_reg (Pmode, addr_location);
13431
13432 /* Without exrl the indirect branch thunks need an additional
13433 register for larl;ex */
13434 if (!TARGET_CPU_Z10)
13435 {
13436 *clobber_thunk_reg = gen_rtx_REG (Pmode, INDIRECT_BRANCH_THUNK_REGNUM);
13437 *clobber_thunk_reg = gen_rtx_CLOBBER (VOIDmode, *clobber_thunk_reg);
13438 }
13439 }
13440
13441 addr_location = gen_rtx_MEM (QImode, addr_location);
13442 *call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx);
13443
13444 if (result_reg != NULL_RTX)
13445 *call = gen_rtx_SET (result_reg, *call);
13446
13447 if (retaddr_reg != NULL_RTX)
13448 {
13449 *clobber_ret_reg = gen_rtx_CLOBBER (VOIDmode, retaddr_reg);
13450
13451 if (tls_call != NULL_RTX)
13452 *use = gen_rtx_USE (VOIDmode, tls_call);
13453 }
13454
13455
13456 for (i = 0; i < 4; i++)
13457 if (vec[i] != NULL_RTX)
13458 elts++;
13459
13460 if (elts > 1)
13461 {
13462 rtvec v;
13463 int e = 0;
13464
13465 v = rtvec_alloc (elts);
13466 for (i = 0; i < 4; i++)
13467 if (vec[i] != NULL_RTX)
13468 {
13469 RTVEC_ELT (v, e) = vec[i];
13470 e++;
13471 }
13472
13473 *call = gen_rtx_PARALLEL (VOIDmode, v);
13474 }
13475
13476 insn = emit_call_insn (*call);
13477
13478 /* 31-bit PLT stubs and tls calls use the GOT register implicitly. */
13479 if ((!TARGET_64BIT && plt_call) || tls_call != NULL_RTX)
13480 {
13481 /* s390_function_ok_for_sibcall should
13482 have denied sibcalls in this case. */
13483 gcc_assert (retaddr_reg != NULL_RTX);
13484 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, 12));
13485 }
13486 return insn;
13487 }
13488
13489 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
13490
13491 static void
s390_conditional_register_usage(void)13492 s390_conditional_register_usage (void)
13493 {
13494 int i;
13495
13496 if (flag_pic)
13497 {
13498 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13499 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13500 }
13501 fixed_regs[BASE_REGNUM] = 0;
13502 call_used_regs[BASE_REGNUM] = 0;
13503 fixed_regs[RETURN_REGNUM] = 0;
13504 call_used_regs[RETURN_REGNUM] = 0;
13505 if (TARGET_64BIT)
13506 {
13507 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
13508 call_used_regs[i] = call_really_used_regs[i] = 0;
13509 }
13510 else
13511 {
13512 call_used_regs[FPR4_REGNUM] = call_really_used_regs[FPR4_REGNUM] = 0;
13513 call_used_regs[FPR6_REGNUM] = call_really_used_regs[FPR6_REGNUM] = 0;
13514 }
13515
13516 if (TARGET_SOFT_FLOAT)
13517 {
13518 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
13519 call_used_regs[i] = fixed_regs[i] = 1;
13520 }
13521
13522 /* Disable v16 - v31 for non-vector target. */
13523 if (!TARGET_VX)
13524 {
13525 for (i = VR16_REGNUM; i <= VR31_REGNUM; i++)
13526 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
13527 }
13528 }
13529
13530 /* Corresponding function to eh_return expander. */
13531
13532 static GTY(()) rtx s390_tpf_eh_return_symbol;
13533 void
s390_emit_tpf_eh_return(rtx target)13534 s390_emit_tpf_eh_return (rtx target)
13535 {
13536 rtx_insn *insn;
13537 rtx reg, orig_ra;
13538
13539 if (!s390_tpf_eh_return_symbol)
13540 s390_tpf_eh_return_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tpf_eh_return");
13541
13542 reg = gen_rtx_REG (Pmode, 2);
13543 orig_ra = gen_rtx_REG (Pmode, 3);
13544
13545 emit_move_insn (reg, target);
13546 emit_move_insn (orig_ra, get_hard_reg_initial_val (Pmode, RETURN_REGNUM));
13547 insn = s390_emit_call (s390_tpf_eh_return_symbol, NULL_RTX, reg,
13548 gen_rtx_REG (Pmode, RETURN_REGNUM));
13549 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
13550 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), orig_ra);
13551
13552 emit_move_insn (EH_RETURN_HANDLER_RTX, reg);
13553 }
13554
13555 /* Rework the prologue/epilogue to avoid saving/restoring
13556 registers unnecessarily. */
13557
13558 static void
s390_optimize_prologue(void)13559 s390_optimize_prologue (void)
13560 {
13561 rtx_insn *insn, *new_insn, *next_insn;
13562
13563 /* Do a final recompute of the frame-related data. */
13564 s390_optimize_register_info ();
13565
13566 /* If all special registers are in fact used, there's nothing we
13567 can do, so no point in walking the insn list. */
13568
13569 if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM
13570 && cfun_frame_layout.last_save_gpr >= BASE_REGNUM)
13571 return;
13572
13573 /* Search for prologue/epilogue insns and replace them. */
13574 for (insn = get_insns (); insn; insn = next_insn)
13575 {
13576 int first, last, off;
13577 rtx set, base, offset;
13578 rtx pat;
13579
13580 next_insn = NEXT_INSN (insn);
13581
13582 if (! NONJUMP_INSN_P (insn) || ! RTX_FRAME_RELATED_P (insn))
13583 continue;
13584
13585 pat = PATTERN (insn);
13586
13587 /* Remove ldgr/lgdr instructions used for saving and restore
13588 GPRs if possible. */
13589 if (TARGET_Z10)
13590 {
13591 rtx tmp_pat = pat;
13592
13593 if (INSN_CODE (insn) == CODE_FOR_stack_restore_from_fpr)
13594 tmp_pat = XVECEXP (pat, 0, 0);
13595
13596 if (GET_CODE (tmp_pat) == SET
13597 && GET_MODE (SET_SRC (tmp_pat)) == DImode
13598 && REG_P (SET_SRC (tmp_pat))
13599 && REG_P (SET_DEST (tmp_pat)))
13600 {
13601 int src_regno = REGNO (SET_SRC (tmp_pat));
13602 int dest_regno = REGNO (SET_DEST (tmp_pat));
13603 int gpr_regno;
13604 int fpr_regno;
13605
13606 if (!((GENERAL_REGNO_P (src_regno)
13607 && FP_REGNO_P (dest_regno))
13608 || (FP_REGNO_P (src_regno)
13609 && GENERAL_REGNO_P (dest_regno))))
13610 continue;
13611
13612 gpr_regno = GENERAL_REGNO_P (src_regno) ? src_regno : dest_regno;
13613 fpr_regno = FP_REGNO_P (src_regno) ? src_regno : dest_regno;
13614
13615 /* GPR must be call-saved, FPR must be call-clobbered. */
13616 if (!call_really_used_regs[fpr_regno]
13617 || call_really_used_regs[gpr_regno])
13618 continue;
13619
13620 /* It must not happen that what we once saved in an FPR now
13621 needs a stack slot. */
13622 gcc_assert (cfun_gpr_save_slot (gpr_regno) != SAVE_SLOT_STACK);
13623
13624 if (cfun_gpr_save_slot (gpr_regno) == SAVE_SLOT_NONE)
13625 {
13626 remove_insn (insn);
13627 continue;
13628 }
13629 }
13630 }
13631
13632 if (GET_CODE (pat) == PARALLEL
13633 && store_multiple_operation (pat, VOIDmode))
13634 {
13635 set = XVECEXP (pat, 0, 0);
13636 first = REGNO (SET_SRC (set));
13637 last = first + XVECLEN (pat, 0) - 1;
13638 offset = const0_rtx;
13639 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
13640 off = INTVAL (offset);
13641
13642 if (GET_CODE (base) != REG || off < 0)
13643 continue;
13644 if (cfun_frame_layout.first_save_gpr != -1
13645 && (cfun_frame_layout.first_save_gpr < first
13646 || cfun_frame_layout.last_save_gpr > last))
13647 continue;
13648 if (REGNO (base) != STACK_POINTER_REGNUM
13649 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13650 continue;
13651 if (first > BASE_REGNUM || last < BASE_REGNUM)
13652 continue;
13653
13654 if (cfun_frame_layout.first_save_gpr != -1)
13655 {
13656 rtx s_pat = save_gprs (base,
13657 off + (cfun_frame_layout.first_save_gpr
13658 - first) * UNITS_PER_LONG,
13659 cfun_frame_layout.first_save_gpr,
13660 cfun_frame_layout.last_save_gpr);
13661 new_insn = emit_insn_before (s_pat, insn);
13662 INSN_ADDRESSES_NEW (new_insn, -1);
13663 }
13664
13665 remove_insn (insn);
13666 continue;
13667 }
13668
13669 if (cfun_frame_layout.first_save_gpr == -1
13670 && GET_CODE (pat) == SET
13671 && GENERAL_REG_P (SET_SRC (pat))
13672 && GET_CODE (SET_DEST (pat)) == MEM)
13673 {
13674 set = pat;
13675 first = REGNO (SET_SRC (set));
13676 offset = const0_rtx;
13677 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
13678 off = INTVAL (offset);
13679
13680 if (GET_CODE (base) != REG || off < 0)
13681 continue;
13682 if (REGNO (base) != STACK_POINTER_REGNUM
13683 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13684 continue;
13685
13686 remove_insn (insn);
13687 continue;
13688 }
13689
13690 if (GET_CODE (pat) == PARALLEL
13691 && load_multiple_operation (pat, VOIDmode))
13692 {
13693 set = XVECEXP (pat, 0, 0);
13694 first = REGNO (SET_DEST (set));
13695 last = first + XVECLEN (pat, 0) - 1;
13696 offset = const0_rtx;
13697 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
13698 off = INTVAL (offset);
13699
13700 if (GET_CODE (base) != REG || off < 0)
13701 continue;
13702
13703 if (cfun_frame_layout.first_restore_gpr != -1
13704 && (cfun_frame_layout.first_restore_gpr < first
13705 || cfun_frame_layout.last_restore_gpr > last))
13706 continue;
13707 if (REGNO (base) != STACK_POINTER_REGNUM
13708 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13709 continue;
13710 if (first > BASE_REGNUM || last < BASE_REGNUM)
13711 continue;
13712
13713 if (cfun_frame_layout.first_restore_gpr != -1)
13714 {
13715 rtx rpat = restore_gprs (base,
13716 off + (cfun_frame_layout.first_restore_gpr
13717 - first) * UNITS_PER_LONG,
13718 cfun_frame_layout.first_restore_gpr,
13719 cfun_frame_layout.last_restore_gpr);
13720
13721 /* Remove REG_CFA_RESTOREs for registers that we no
13722 longer need to save. */
13723 REG_NOTES (rpat) = REG_NOTES (insn);
13724 for (rtx *ptr = ®_NOTES (rpat); *ptr; )
13725 if (REG_NOTE_KIND (*ptr) == REG_CFA_RESTORE
13726 && ((int) REGNO (XEXP (*ptr, 0))
13727 < cfun_frame_layout.first_restore_gpr))
13728 *ptr = XEXP (*ptr, 1);
13729 else
13730 ptr = &XEXP (*ptr, 1);
13731 new_insn = emit_insn_before (rpat, insn);
13732 RTX_FRAME_RELATED_P (new_insn) = 1;
13733 INSN_ADDRESSES_NEW (new_insn, -1);
13734 }
13735
13736 remove_insn (insn);
13737 continue;
13738 }
13739
13740 if (cfun_frame_layout.first_restore_gpr == -1
13741 && GET_CODE (pat) == SET
13742 && GENERAL_REG_P (SET_DEST (pat))
13743 && GET_CODE (SET_SRC (pat)) == MEM)
13744 {
13745 set = pat;
13746 first = REGNO (SET_DEST (set));
13747 offset = const0_rtx;
13748 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
13749 off = INTVAL (offset);
13750
13751 if (GET_CODE (base) != REG || off < 0)
13752 continue;
13753
13754 if (REGNO (base) != STACK_POINTER_REGNUM
13755 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13756 continue;
13757
13758 remove_insn (insn);
13759 continue;
13760 }
13761 }
13762 }
13763
13764 /* On z10 and later the dynamic branch prediction must see the
13765 backward jump within a certain windows. If not it falls back to
13766 the static prediction. This function rearranges the loop backward
13767 branch in a way which makes the static prediction always correct.
13768 The function returns true if it added an instruction. */
13769 static bool
s390_fix_long_loop_prediction(rtx_insn * insn)13770 s390_fix_long_loop_prediction (rtx_insn *insn)
13771 {
13772 rtx set = single_set (insn);
13773 rtx code_label, label_ref;
13774 rtx_insn *uncond_jump;
13775 rtx_insn *cur_insn;
13776 rtx tmp;
13777 int distance;
13778
13779 /* This will exclude branch on count and branch on index patterns
13780 since these are correctly statically predicted. */
13781 if (!set
13782 || SET_DEST (set) != pc_rtx
13783 || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
13784 return false;
13785
13786 /* Skip conditional returns. */
13787 if (ANY_RETURN_P (XEXP (SET_SRC (set), 1))
13788 && XEXP (SET_SRC (set), 2) == pc_rtx)
13789 return false;
13790
13791 label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
13792 XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
13793
13794 gcc_assert (GET_CODE (label_ref) == LABEL_REF);
13795
13796 code_label = XEXP (label_ref, 0);
13797
13798 if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
13799 || INSN_ADDRESSES (INSN_UID (insn)) == -1
13800 || (INSN_ADDRESSES (INSN_UID (insn))
13801 - INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE))
13802 return false;
13803
13804 for (distance = 0, cur_insn = PREV_INSN (insn);
13805 distance < PREDICT_DISTANCE - 6;
13806 distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
13807 if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
13808 return false;
13809
13810 rtx_code_label *new_label = gen_label_rtx ();
13811 uncond_jump = emit_jump_insn_after (
13812 gen_rtx_SET (pc_rtx,
13813 gen_rtx_LABEL_REF (VOIDmode, code_label)),
13814 insn);
13815 emit_label_after (new_label, uncond_jump);
13816
13817 tmp = XEXP (SET_SRC (set), 1);
13818 XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
13819 XEXP (SET_SRC (set), 2) = tmp;
13820 INSN_CODE (insn) = -1;
13821
13822 XEXP (label_ref, 0) = new_label;
13823 JUMP_LABEL (insn) = new_label;
13824 JUMP_LABEL (uncond_jump) = code_label;
13825
13826 return true;
13827 }
13828
13829 /* Returns 1 if INSN reads the value of REG for purposes not related
13830 to addressing of memory, and 0 otherwise. */
13831 static int
s390_non_addr_reg_read_p(rtx reg,rtx_insn * insn)13832 s390_non_addr_reg_read_p (rtx reg, rtx_insn *insn)
13833 {
13834 return reg_referenced_p (reg, PATTERN (insn))
13835 && !reg_used_in_mem_p (REGNO (reg), PATTERN (insn));
13836 }
13837
13838 /* Starting from INSN find_cond_jump looks downwards in the insn
13839 stream for a single jump insn which is the last user of the
13840 condition code set in INSN. */
13841 static rtx_insn *
find_cond_jump(rtx_insn * insn)13842 find_cond_jump (rtx_insn *insn)
13843 {
13844 for (; insn; insn = NEXT_INSN (insn))
13845 {
13846 rtx ite, cc;
13847
13848 if (LABEL_P (insn))
13849 break;
13850
13851 if (!JUMP_P (insn))
13852 {
13853 if (reg_mentioned_p (gen_rtx_REG (CCmode, CC_REGNUM), insn))
13854 break;
13855 continue;
13856 }
13857
13858 /* This will be triggered by a return. */
13859 if (GET_CODE (PATTERN (insn)) != SET)
13860 break;
13861
13862 gcc_assert (SET_DEST (PATTERN (insn)) == pc_rtx);
13863 ite = SET_SRC (PATTERN (insn));
13864
13865 if (GET_CODE (ite) != IF_THEN_ELSE)
13866 break;
13867
13868 cc = XEXP (XEXP (ite, 0), 0);
13869 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)))
13870 break;
13871
13872 if (find_reg_note (insn, REG_DEAD, cc))
13873 return insn;
13874 break;
13875 }
13876
13877 return NULL;
13878 }
13879
13880 /* Swap the condition in COND and the operands in OP0 and OP1 so that
13881 the semantics does not change. If NULL_RTX is passed as COND the
13882 function tries to find the conditional jump starting with INSN. */
13883 static void
s390_swap_cmp(rtx cond,rtx * op0,rtx * op1,rtx_insn * insn)13884 s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx_insn *insn)
13885 {
13886 rtx tmp = *op0;
13887
13888 if (cond == NULL_RTX)
13889 {
13890 rtx_insn *jump = find_cond_jump (NEXT_INSN (insn));
13891 rtx set = jump ? single_set (jump) : NULL_RTX;
13892
13893 if (set == NULL_RTX)
13894 return;
13895
13896 cond = XEXP (SET_SRC (set), 0);
13897 }
13898
13899 *op0 = *op1;
13900 *op1 = tmp;
13901 PUT_CODE (cond, swap_condition (GET_CODE (cond)));
13902 }
13903
13904 /* On z10, instructions of the compare-and-branch family have the
13905 property to access the register occurring as second operand with
13906 its bits complemented. If such a compare is grouped with a second
13907 instruction that accesses the same register non-complemented, and
13908 if that register's value is delivered via a bypass, then the
13909 pipeline recycles, thereby causing significant performance decline.
13910 This function locates such situations and exchanges the two
13911 operands of the compare. The function return true whenever it
13912 added an insn. */
13913 static bool
s390_z10_optimize_cmp(rtx_insn * insn)13914 s390_z10_optimize_cmp (rtx_insn *insn)
13915 {
13916 rtx_insn *prev_insn, *next_insn;
13917 bool insn_added_p = false;
13918 rtx cond, *op0, *op1;
13919
13920 if (GET_CODE (PATTERN (insn)) == PARALLEL)
13921 {
13922 /* Handle compare and branch and branch on count
13923 instructions. */
13924 rtx pattern = single_set (insn);
13925
13926 if (!pattern
13927 || SET_DEST (pattern) != pc_rtx
13928 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
13929 return false;
13930
13931 cond = XEXP (SET_SRC (pattern), 0);
13932 op0 = &XEXP (cond, 0);
13933 op1 = &XEXP (cond, 1);
13934 }
13935 else if (GET_CODE (PATTERN (insn)) == SET)
13936 {
13937 rtx src, dest;
13938
13939 /* Handle normal compare instructions. */
13940 src = SET_SRC (PATTERN (insn));
13941 dest = SET_DEST (PATTERN (insn));
13942
13943 if (!REG_P (dest)
13944 || !CC_REGNO_P (REGNO (dest))
13945 || GET_CODE (src) != COMPARE)
13946 return false;
13947
13948 /* s390_swap_cmp will try to find the conditional
13949 jump when passing NULL_RTX as condition. */
13950 cond = NULL_RTX;
13951 op0 = &XEXP (src, 0);
13952 op1 = &XEXP (src, 1);
13953 }
13954 else
13955 return false;
13956
13957 if (!REG_P (*op0) || !REG_P (*op1))
13958 return false;
13959
13960 if (GET_MODE_CLASS (GET_MODE (*op0)) != MODE_INT)
13961 return false;
13962
13963 /* Swap the COMPARE arguments and its mask if there is a
13964 conflicting access in the previous insn. */
13965 prev_insn = prev_active_insn (insn);
13966 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
13967 && reg_referenced_p (*op1, PATTERN (prev_insn)))
13968 s390_swap_cmp (cond, op0, op1, insn);
13969
13970 /* Check if there is a conflict with the next insn. If there
13971 was no conflict with the previous insn, then swap the
13972 COMPARE arguments and its mask. If we already swapped
13973 the operands, or if swapping them would cause a conflict
13974 with the previous insn, issue a NOP after the COMPARE in
13975 order to separate the two instuctions. */
13976 next_insn = next_active_insn (insn);
13977 if (next_insn != NULL_RTX && INSN_P (next_insn)
13978 && s390_non_addr_reg_read_p (*op1, next_insn))
13979 {
13980 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
13981 && s390_non_addr_reg_read_p (*op0, prev_insn))
13982 {
13983 if (REGNO (*op1) == 0)
13984 emit_insn_after (gen_nop_lr1 (), insn);
13985 else
13986 emit_insn_after (gen_nop_lr0 (), insn);
13987 insn_added_p = true;
13988 }
13989 else
13990 s390_swap_cmp (cond, op0, op1, insn);
13991 }
13992 return insn_added_p;
13993 }
13994
13995 /* Number of INSNs to be scanned backward in the last BB of the loop
13996 and forward in the first BB of the loop. This usually should be a
13997 bit more than the number of INSNs which could go into one
13998 group. */
13999 #define S390_OSC_SCAN_INSN_NUM 5
14000
14001 /* Scan LOOP for static OSC collisions and return true if a osc_break
14002 should be issued for this loop. */
14003 static bool
s390_adjust_loop_scan_osc(struct loop * loop)14004 s390_adjust_loop_scan_osc (struct loop* loop)
14005
14006 {
14007 HARD_REG_SET modregs, newregs;
14008 rtx_insn *insn, *store_insn = NULL;
14009 rtx set;
14010 struct s390_address addr_store, addr_load;
14011 subrtx_iterator::array_type array;
14012 int insn_count;
14013
14014 CLEAR_HARD_REG_SET (modregs);
14015
14016 insn_count = 0;
14017 FOR_BB_INSNS_REVERSE (loop->latch, insn)
14018 {
14019 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14020 continue;
14021
14022 insn_count++;
14023 if (insn_count > S390_OSC_SCAN_INSN_NUM)
14024 return false;
14025
14026 find_all_hard_reg_sets (insn, &newregs, true);
14027 IOR_HARD_REG_SET (modregs, newregs);
14028
14029 set = single_set (insn);
14030 if (!set)
14031 continue;
14032
14033 if (MEM_P (SET_DEST (set))
14034 && s390_decompose_address (XEXP (SET_DEST (set), 0), &addr_store))
14035 {
14036 store_insn = insn;
14037 break;
14038 }
14039 }
14040
14041 if (store_insn == NULL_RTX)
14042 return false;
14043
14044 insn_count = 0;
14045 FOR_BB_INSNS (loop->header, insn)
14046 {
14047 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14048 continue;
14049
14050 if (insn == store_insn)
14051 return false;
14052
14053 insn_count++;
14054 if (insn_count > S390_OSC_SCAN_INSN_NUM)
14055 return false;
14056
14057 find_all_hard_reg_sets (insn, &newregs, true);
14058 IOR_HARD_REG_SET (modregs, newregs);
14059
14060 set = single_set (insn);
14061 if (!set)
14062 continue;
14063
14064 /* An intermediate store disrupts static OSC checking
14065 anyway. */
14066 if (MEM_P (SET_DEST (set))
14067 && s390_decompose_address (XEXP (SET_DEST (set), 0), NULL))
14068 return false;
14069
14070 FOR_EACH_SUBRTX (iter, array, SET_SRC (set), NONCONST)
14071 if (MEM_P (*iter)
14072 && s390_decompose_address (XEXP (*iter, 0), &addr_load)
14073 && rtx_equal_p (addr_load.base, addr_store.base)
14074 && rtx_equal_p (addr_load.indx, addr_store.indx)
14075 && rtx_equal_p (addr_load.disp, addr_store.disp))
14076 {
14077 if ((addr_load.base != NULL_RTX
14078 && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.base)))
14079 || (addr_load.indx != NULL_RTX
14080 && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.indx))))
14081 return true;
14082 }
14083 }
14084 return false;
14085 }
14086
14087 /* Look for adjustments which can be done on simple innermost
14088 loops. */
14089 static void
s390_adjust_loops()14090 s390_adjust_loops ()
14091 {
14092 struct loop *loop = NULL;
14093
14094 df_analyze ();
14095 compute_bb_for_insn ();
14096
14097 /* Find the loops. */
14098 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
14099
14100 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
14101 {
14102 if (dump_file)
14103 {
14104 flow_loop_dump (loop, dump_file, NULL, 0);
14105 fprintf (dump_file, ";; OSC loop scan Loop: ");
14106 }
14107 if (loop->latch == NULL
14108 || pc_set (BB_END (loop->latch)) == NULL_RTX
14109 || !s390_adjust_loop_scan_osc (loop))
14110 {
14111 if (dump_file)
14112 {
14113 if (loop->latch == NULL)
14114 fprintf (dump_file, " muliple backward jumps\n");
14115 else
14116 {
14117 fprintf (dump_file, " header insn: %d latch insn: %d ",
14118 INSN_UID (BB_HEAD (loop->header)),
14119 INSN_UID (BB_END (loop->latch)));
14120 if (pc_set (BB_END (loop->latch)) == NULL_RTX)
14121 fprintf (dump_file, " loop does not end with jump\n");
14122 else
14123 fprintf (dump_file, " not instrumented\n");
14124 }
14125 }
14126 }
14127 else
14128 {
14129 rtx_insn *new_insn;
14130
14131 if (dump_file)
14132 fprintf (dump_file, " adding OSC break insn: ");
14133 new_insn = emit_insn_before (gen_osc_break (),
14134 BB_END (loop->latch));
14135 INSN_ADDRESSES_NEW (new_insn, -1);
14136 }
14137 }
14138
14139 loop_optimizer_finalize ();
14140
14141 df_finish_pass (false);
14142 }
14143
14144 /* Perform machine-dependent processing. */
14145
14146 static void
s390_reorg(void)14147 s390_reorg (void)
14148 {
14149 struct constant_pool *pool;
14150 rtx_insn *insn;
14151 int hw_before, hw_after;
14152
14153 if (s390_tune == PROCESSOR_2964_Z13)
14154 s390_adjust_loops ();
14155
14156 /* Make sure all splits have been performed; splits after
14157 machine_dependent_reorg might confuse insn length counts. */
14158 split_all_insns_noflow ();
14159
14160 /* Install the main literal pool and the associated base
14161 register load insns. The literal pool might be > 4096 bytes in
14162 size, so that some of its elements cannot be directly accessed.
14163
14164 To fix this, we split the single literal pool into multiple
14165 pool chunks, reloading the pool base register at various
14166 points throughout the function to ensure it always points to
14167 the pool chunk the following code expects. */
14168
14169 /* Collect the literal pool. */
14170 pool = s390_mainpool_start ();
14171 if (pool)
14172 {
14173 /* Finish up literal pool related changes. */
14174 s390_mainpool_finish (pool);
14175 }
14176 else
14177 {
14178 /* If literal pool overflowed, chunkify it. */
14179 pool = s390_chunkify_start ();
14180 s390_chunkify_finish (pool);
14181 }
14182
14183 /* Generate out-of-pool execute target insns. */
14184 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14185 {
14186 rtx label;
14187 rtx_insn *target;
14188
14189 label = s390_execute_label (insn);
14190 if (!label)
14191 continue;
14192
14193 gcc_assert (label != const0_rtx);
14194
14195 target = emit_label (XEXP (label, 0));
14196 INSN_ADDRESSES_NEW (target, -1);
14197
14198 if (JUMP_P (insn))
14199 {
14200 target = emit_jump_insn (s390_execute_target (insn));
14201 /* This is important in order to keep a table jump
14202 pointing at the jump table label. Only this makes it
14203 being recognized as table jump. */
14204 JUMP_LABEL (target) = JUMP_LABEL (insn);
14205 }
14206 else
14207 target = emit_insn (s390_execute_target (insn));
14208 INSN_ADDRESSES_NEW (target, -1);
14209 }
14210
14211 /* Try to optimize prologue and epilogue further. */
14212 s390_optimize_prologue ();
14213
14214 /* Walk over the insns and do some >=z10 specific changes. */
14215 if (s390_tune >= PROCESSOR_2097_Z10)
14216 {
14217 rtx_insn *insn;
14218 bool insn_added_p = false;
14219
14220 /* The insn lengths and addresses have to be up to date for the
14221 following manipulations. */
14222 shorten_branches (get_insns ());
14223
14224 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14225 {
14226 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14227 continue;
14228
14229 if (JUMP_P (insn))
14230 insn_added_p |= s390_fix_long_loop_prediction (insn);
14231
14232 if ((GET_CODE (PATTERN (insn)) == PARALLEL
14233 || GET_CODE (PATTERN (insn)) == SET)
14234 && s390_tune == PROCESSOR_2097_Z10)
14235 insn_added_p |= s390_z10_optimize_cmp (insn);
14236 }
14237
14238 /* Adjust branches if we added new instructions. */
14239 if (insn_added_p)
14240 shorten_branches (get_insns ());
14241 }
14242
14243 s390_function_num_hotpatch_hw (current_function_decl, &hw_before, &hw_after);
14244 if (hw_after > 0)
14245 {
14246 rtx_insn *insn;
14247
14248 /* Insert NOPs for hotpatching. */
14249 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14250 /* Emit NOPs
14251 1. inside the area covered by debug information to allow setting
14252 breakpoints at the NOPs,
14253 2. before any insn which results in an asm instruction,
14254 3. before in-function labels to avoid jumping to the NOPs, for
14255 example as part of a loop,
14256 4. before any barrier in case the function is completely empty
14257 (__builtin_unreachable ()) and has neither internal labels nor
14258 active insns.
14259 */
14260 if (active_insn_p (insn) || BARRIER_P (insn) || LABEL_P (insn))
14261 break;
14262 /* Output a series of NOPs before the first active insn. */
14263 while (insn && hw_after > 0)
14264 {
14265 if (hw_after >= 3)
14266 {
14267 emit_insn_before (gen_nop_6_byte (), insn);
14268 hw_after -= 3;
14269 }
14270 else if (hw_after >= 2)
14271 {
14272 emit_insn_before (gen_nop_4_byte (), insn);
14273 hw_after -= 2;
14274 }
14275 else
14276 {
14277 emit_insn_before (gen_nop_2_byte (), insn);
14278 hw_after -= 1;
14279 }
14280 }
14281 }
14282 }
14283
14284 /* Return true if INSN is a fp load insn writing register REGNO. */
14285 static inline bool
s390_fpload_toreg(rtx_insn * insn,unsigned int regno)14286 s390_fpload_toreg (rtx_insn *insn, unsigned int regno)
14287 {
14288 rtx set;
14289 enum attr_type flag = s390_safe_attr_type (insn);
14290
14291 if (flag != TYPE_FLOADSF && flag != TYPE_FLOADDF)
14292 return false;
14293
14294 set = single_set (insn);
14295
14296 if (set == NULL_RTX)
14297 return false;
14298
14299 if (!REG_P (SET_DEST (set)) || !MEM_P (SET_SRC (set)))
14300 return false;
14301
14302 if (REGNO (SET_DEST (set)) != regno)
14303 return false;
14304
14305 return true;
14306 }
14307
14308 /* This value describes the distance to be avoided between an
14309 arithmetic fp instruction and an fp load writing the same register.
14310 Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
14311 fine but the exact value has to be avoided. Otherwise the FP
14312 pipeline will throw an exception causing a major penalty. */
14313 #define Z10_EARLYLOAD_DISTANCE 7
14314
14315 /* Rearrange the ready list in order to avoid the situation described
14316 for Z10_EARLYLOAD_DISTANCE. A problematic load instruction is
14317 moved to the very end of the ready list. */
14318 static void
s390_z10_prevent_earlyload_conflicts(rtx_insn ** ready,int * nready_p)14319 s390_z10_prevent_earlyload_conflicts (rtx_insn **ready, int *nready_p)
14320 {
14321 unsigned int regno;
14322 int nready = *nready_p;
14323 rtx_insn *tmp;
14324 int i;
14325 rtx_insn *insn;
14326 rtx set;
14327 enum attr_type flag;
14328 int distance;
14329
14330 /* Skip DISTANCE - 1 active insns. */
14331 for (insn = last_scheduled_insn, distance = Z10_EARLYLOAD_DISTANCE - 1;
14332 distance > 0 && insn != NULL_RTX;
14333 distance--, insn = prev_active_insn (insn))
14334 if (CALL_P (insn) || JUMP_P (insn))
14335 return;
14336
14337 if (insn == NULL_RTX)
14338 return;
14339
14340 set = single_set (insn);
14341
14342 if (set == NULL_RTX || !REG_P (SET_DEST (set))
14343 || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) != MODE_FLOAT)
14344 return;
14345
14346 flag = s390_safe_attr_type (insn);
14347
14348 if (flag == TYPE_FLOADSF || flag == TYPE_FLOADDF)
14349 return;
14350
14351 regno = REGNO (SET_DEST (set));
14352 i = nready - 1;
14353
14354 while (!s390_fpload_toreg (ready[i], regno) && i > 0)
14355 i--;
14356
14357 if (!i)
14358 return;
14359
14360 tmp = ready[i];
14361 memmove (&ready[1], &ready[0], sizeof (rtx_insn *) * i);
14362 ready[0] = tmp;
14363 }
14364
14365 /* Returns TRUE if BB is entered via a fallthru edge and all other
14366 incoming edges are less than likely. */
14367 static bool
s390_bb_fallthru_entry_likely(basic_block bb)14368 s390_bb_fallthru_entry_likely (basic_block bb)
14369 {
14370 edge e, fallthru_edge;
14371 edge_iterator ei;
14372
14373 if (!bb)
14374 return false;
14375
14376 fallthru_edge = find_fallthru_edge (bb->preds);
14377 if (!fallthru_edge)
14378 return false;
14379
14380 FOR_EACH_EDGE (e, ei, bb->preds)
14381 if (e != fallthru_edge
14382 && e->probability >= profile_probability::likely ())
14383 return false;
14384
14385 return true;
14386 }
14387
14388 struct s390_sched_state
14389 {
14390 /* Number of insns in the group. */
14391 int group_state;
14392 /* Execution side of the group. */
14393 int side;
14394 /* Group can only hold two insns. */
14395 bool group_of_two;
14396 } s390_sched_state;
14397
14398 static struct s390_sched_state sched_state = {0, 1, false};
14399
14400 #define S390_SCHED_ATTR_MASK_CRACKED 0x1
14401 #define S390_SCHED_ATTR_MASK_EXPANDED 0x2
14402 #define S390_SCHED_ATTR_MASK_ENDGROUP 0x4
14403 #define S390_SCHED_ATTR_MASK_GROUPALONE 0x8
14404 #define S390_SCHED_ATTR_MASK_GROUPOFTWO 0x10
14405
14406 static unsigned int
s390_get_sched_attrmask(rtx_insn * insn)14407 s390_get_sched_attrmask (rtx_insn *insn)
14408 {
14409 unsigned int mask = 0;
14410
14411 switch (s390_tune)
14412 {
14413 case PROCESSOR_2827_ZEC12:
14414 if (get_attr_zEC12_cracked (insn))
14415 mask |= S390_SCHED_ATTR_MASK_CRACKED;
14416 if (get_attr_zEC12_expanded (insn))
14417 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14418 if (get_attr_zEC12_endgroup (insn))
14419 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14420 if (get_attr_zEC12_groupalone (insn))
14421 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14422 break;
14423 case PROCESSOR_2964_Z13:
14424 if (get_attr_z13_cracked (insn))
14425 mask |= S390_SCHED_ATTR_MASK_CRACKED;
14426 if (get_attr_z13_expanded (insn))
14427 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14428 if (get_attr_z13_endgroup (insn))
14429 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14430 if (get_attr_z13_groupalone (insn))
14431 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14432 if (get_attr_z13_groupoftwo (insn))
14433 mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
14434 break;
14435 case PROCESSOR_3906_Z14:
14436 if (get_attr_z14_cracked (insn))
14437 mask |= S390_SCHED_ATTR_MASK_CRACKED;
14438 if (get_attr_z14_expanded (insn))
14439 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14440 if (get_attr_z14_endgroup (insn))
14441 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14442 if (get_attr_z14_groupalone (insn))
14443 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14444 if (get_attr_z14_groupoftwo (insn))
14445 mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
14446 break;
14447 case PROCESSOR_8561_Z15:
14448 if (get_attr_z15_cracked (insn))
14449 mask |= S390_SCHED_ATTR_MASK_CRACKED;
14450 if (get_attr_z15_expanded (insn))
14451 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14452 if (get_attr_z15_endgroup (insn))
14453 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14454 if (get_attr_z15_groupalone (insn))
14455 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14456 if (get_attr_z15_groupoftwo (insn))
14457 mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
14458 break;
14459 default:
14460 gcc_unreachable ();
14461 }
14462 return mask;
14463 }
14464
14465 static unsigned int
s390_get_unit_mask(rtx_insn * insn,int * units)14466 s390_get_unit_mask (rtx_insn *insn, int *units)
14467 {
14468 unsigned int mask = 0;
14469
14470 switch (s390_tune)
14471 {
14472 case PROCESSOR_2964_Z13:
14473 *units = 4;
14474 if (get_attr_z13_unit_lsu (insn))
14475 mask |= 1 << 0;
14476 if (get_attr_z13_unit_fxa (insn))
14477 mask |= 1 << 1;
14478 if (get_attr_z13_unit_fxb (insn))
14479 mask |= 1 << 2;
14480 if (get_attr_z13_unit_vfu (insn))
14481 mask |= 1 << 3;
14482 break;
14483 case PROCESSOR_3906_Z14:
14484 *units = 4;
14485 if (get_attr_z14_unit_lsu (insn))
14486 mask |= 1 << 0;
14487 if (get_attr_z14_unit_fxa (insn))
14488 mask |= 1 << 1;
14489 if (get_attr_z14_unit_fxb (insn))
14490 mask |= 1 << 2;
14491 if (get_attr_z14_unit_vfu (insn))
14492 mask |= 1 << 3;
14493 break;
14494 case PROCESSOR_8561_Z15:
14495 *units = 4;
14496 if (get_attr_z15_unit_lsu (insn))
14497 mask |= 1 << 0;
14498 if (get_attr_z15_unit_fxa (insn))
14499 mask |= 1 << 1;
14500 if (get_attr_z15_unit_fxb (insn))
14501 mask |= 1 << 2;
14502 if (get_attr_z15_unit_vfu (insn))
14503 mask |= 1 << 3;
14504 break;
14505 default:
14506 gcc_unreachable ();
14507 }
14508 return mask;
14509 }
14510
14511 static bool
s390_is_fpd(rtx_insn * insn)14512 s390_is_fpd (rtx_insn *insn)
14513 {
14514 if (insn == NULL_RTX)
14515 return false;
14516
14517 return get_attr_z13_unit_fpd (insn) || get_attr_z14_unit_fpd (insn)
14518 || get_attr_z15_unit_fpd (insn);
14519 }
14520
14521 static bool
s390_is_fxd(rtx_insn * insn)14522 s390_is_fxd (rtx_insn *insn)
14523 {
14524 if (insn == NULL_RTX)
14525 return false;
14526
14527 return get_attr_z13_unit_fxd (insn) || get_attr_z14_unit_fxd (insn)
14528 || get_attr_z15_unit_fxd (insn);
14529 }
14530
14531 /* Returns TRUE if INSN is a long-running instruction. */
14532 static bool
s390_is_longrunning(rtx_insn * insn)14533 s390_is_longrunning (rtx_insn *insn)
14534 {
14535 if (insn == NULL_RTX)
14536 return false;
14537
14538 return s390_is_fxd (insn) || s390_is_fpd (insn);
14539 }
14540
14541
14542 /* Return the scheduling score for INSN. The higher the score the
14543 better. The score is calculated from the OOO scheduling attributes
14544 of INSN and the scheduling state sched_state. */
14545 static int
s390_sched_score(rtx_insn * insn)14546 s390_sched_score (rtx_insn *insn)
14547 {
14548 unsigned int mask = s390_get_sched_attrmask (insn);
14549 int score = 0;
14550
14551 switch (sched_state.group_state)
14552 {
14553 case 0:
14554 /* Try to put insns into the first slot which would otherwise
14555 break a group. */
14556 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14557 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
14558 score += 5;
14559 if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
14560 score += 10;
14561 break;
14562 case 1:
14563 /* Prefer not cracked insns while trying to put together a
14564 group. */
14565 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14566 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
14567 && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
14568 score += 10;
14569 if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) == 0)
14570 score += 5;
14571 /* If we are in a group of two already, try to schedule another
14572 group-of-two insn to avoid shortening another group. */
14573 if (sched_state.group_of_two
14574 && (mask & S390_SCHED_ATTR_MASK_GROUPOFTWO) != 0)
14575 score += 15;
14576 break;
14577 case 2:
14578 /* Prefer not cracked insns while trying to put together a
14579 group. */
14580 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14581 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
14582 && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
14583 score += 10;
14584 /* Prefer endgroup insns in the last slot. */
14585 if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0)
14586 score += 10;
14587 /* Try to avoid group-of-two insns in the last slot as they will
14588 shorten this group as well as the next one. */
14589 if ((mask & S390_SCHED_ATTR_MASK_GROUPOFTWO) != 0)
14590 score = MAX (0, score - 15);
14591 break;
14592 }
14593
14594 if (s390_tune >= PROCESSOR_2964_Z13)
14595 {
14596 int units, i;
14597 unsigned unit_mask, m = 1;
14598
14599 unit_mask = s390_get_unit_mask (insn, &units);
14600 gcc_assert (units <= MAX_SCHED_UNITS);
14601
14602 /* Add a score in range 0..MAX_SCHED_MIX_SCORE depending on how long
14603 ago the last insn of this unit type got scheduled. This is
14604 supposed to help providing a proper instruction mix to the
14605 CPU. */
14606 for (i = 0; i < units; i++, m <<= 1)
14607 if (m & unit_mask)
14608 score += (last_scheduled_unit_distance[i][sched_state.side]
14609 * MAX_SCHED_MIX_SCORE / MAX_SCHED_MIX_DISTANCE);
14610
14611 int other_side = 1 - sched_state.side;
14612
14613 /* Try to delay long-running insns when side is busy. */
14614 if (s390_is_longrunning (insn))
14615 {
14616 if (s390_is_fxd (insn))
14617 {
14618 if (fxd_longrunning[sched_state.side]
14619 && fxd_longrunning[other_side]
14620 <= fxd_longrunning[sched_state.side])
14621 score = MAX (0, score - 10);
14622
14623 else if (fxd_longrunning[other_side]
14624 >= fxd_longrunning[sched_state.side])
14625 score += 10;
14626 }
14627
14628 if (s390_is_fpd (insn))
14629 {
14630 if (fpd_longrunning[sched_state.side]
14631 && fpd_longrunning[other_side]
14632 <= fpd_longrunning[sched_state.side])
14633 score = MAX (0, score - 10);
14634
14635 else if (fpd_longrunning[other_side]
14636 >= fpd_longrunning[sched_state.side])
14637 score += 10;
14638 }
14639 }
14640 }
14641
14642 return score;
14643 }
14644
14645 /* This function is called via hook TARGET_SCHED_REORDER before
14646 issuing one insn from list READY which contains *NREADYP entries.
14647 For target z10 it reorders load instructions to avoid early load
14648 conflicts in the floating point pipeline */
14649 static int
s390_sched_reorder(FILE * file,int verbose,rtx_insn ** ready,int * nreadyp,int clock ATTRIBUTE_UNUSED)14650 s390_sched_reorder (FILE *file, int verbose,
14651 rtx_insn **ready, int *nreadyp, int clock ATTRIBUTE_UNUSED)
14652 {
14653 if (s390_tune == PROCESSOR_2097_Z10
14654 && reload_completed
14655 && *nreadyp > 1)
14656 s390_z10_prevent_earlyload_conflicts (ready, nreadyp);
14657
14658 if (s390_tune >= PROCESSOR_2827_ZEC12
14659 && reload_completed
14660 && *nreadyp > 1)
14661 {
14662 int i;
14663 int last_index = *nreadyp - 1;
14664 int max_index = -1;
14665 int max_score = -1;
14666 rtx_insn *tmp;
14667
14668 /* Just move the insn with the highest score to the top (the
14669 end) of the list. A full sort is not needed since a conflict
14670 in the hazard recognition cannot happen. So the top insn in
14671 the ready list will always be taken. */
14672 for (i = last_index; i >= 0; i--)
14673 {
14674 int score;
14675
14676 if (recog_memoized (ready[i]) < 0)
14677 continue;
14678
14679 score = s390_sched_score (ready[i]);
14680 if (score > max_score)
14681 {
14682 max_score = score;
14683 max_index = i;
14684 }
14685 }
14686
14687 if (max_index != -1)
14688 {
14689 if (max_index != last_index)
14690 {
14691 tmp = ready[max_index];
14692 ready[max_index] = ready[last_index];
14693 ready[last_index] = tmp;
14694
14695 if (verbose > 5)
14696 fprintf (file,
14697 ";;\t\tBACKEND: move insn %d to the top of list\n",
14698 INSN_UID (ready[last_index]));
14699 }
14700 else if (verbose > 5)
14701 fprintf (file,
14702 ";;\t\tBACKEND: best insn %d already on top\n",
14703 INSN_UID (ready[last_index]));
14704 }
14705
14706 if (verbose > 5)
14707 {
14708 fprintf (file, "ready list ooo attributes - sched state: %d\n",
14709 sched_state.group_state);
14710
14711 for (i = last_index; i >= 0; i--)
14712 {
14713 unsigned int sched_mask;
14714 rtx_insn *insn = ready[i];
14715
14716 if (recog_memoized (insn) < 0)
14717 continue;
14718
14719 sched_mask = s390_get_sched_attrmask (insn);
14720 fprintf (file, ";;\t\tBACKEND: insn %d score: %d: ",
14721 INSN_UID (insn),
14722 s390_sched_score (insn));
14723 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ",\
14724 ((M) & sched_mask) ? #ATTR : "");
14725 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
14726 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
14727 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
14728 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
14729 #undef PRINT_SCHED_ATTR
14730 if (s390_tune >= PROCESSOR_2964_Z13)
14731 {
14732 unsigned int unit_mask, m = 1;
14733 int units, j;
14734
14735 unit_mask = s390_get_unit_mask (insn, &units);
14736 fprintf (file, "(units:");
14737 for (j = 0; j < units; j++, m <<= 1)
14738 if (m & unit_mask)
14739 fprintf (file, " u%d", j);
14740 fprintf (file, ")");
14741 }
14742 fprintf (file, "\n");
14743 }
14744 }
14745 }
14746
14747 return s390_issue_rate ();
14748 }
14749
14750
14751 /* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
14752 the scheduler has issued INSN. It stores the last issued insn into
14753 last_scheduled_insn in order to make it available for
14754 s390_sched_reorder. */
14755 static int
s390_sched_variable_issue(FILE * file,int verbose,rtx_insn * insn,int more)14756 s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
14757 {
14758 last_scheduled_insn = insn;
14759
14760 bool ends_group = false;
14761
14762 if (s390_tune >= PROCESSOR_2827_ZEC12
14763 && reload_completed
14764 && recog_memoized (insn) >= 0)
14765 {
14766 unsigned int mask = s390_get_sched_attrmask (insn);
14767
14768 if ((mask & S390_SCHED_ATTR_MASK_GROUPOFTWO) != 0)
14769 sched_state.group_of_two = true;
14770
14771 /* If this is a group-of-two insn, we actually ended the last group
14772 and this insn is the first one of the new group. */
14773 if (sched_state.group_state == 2 && sched_state.group_of_two)
14774 {
14775 sched_state.side = sched_state.side ? 0 : 1;
14776 sched_state.group_state = 0;
14777 }
14778
14779 /* Longrunning and side bookkeeping. */
14780 for (int i = 0; i < 2; i++)
14781 {
14782 fxd_longrunning[i] = MAX (0, fxd_longrunning[i] - 1);
14783 fpd_longrunning[i] = MAX (0, fpd_longrunning[i] - 1);
14784 }
14785
14786 unsigned latency = insn_default_latency (insn);
14787 if (s390_is_longrunning (insn))
14788 {
14789 if (s390_is_fxd (insn))
14790 fxd_longrunning[sched_state.side] = latency;
14791 else
14792 fpd_longrunning[sched_state.side] = latency;
14793 }
14794
14795 if (s390_tune >= PROCESSOR_2964_Z13)
14796 {
14797 int units, i;
14798 unsigned unit_mask, m = 1;
14799
14800 unit_mask = s390_get_unit_mask (insn, &units);
14801 gcc_assert (units <= MAX_SCHED_UNITS);
14802
14803 for (i = 0; i < units; i++, m <<= 1)
14804 if (m & unit_mask)
14805 last_scheduled_unit_distance[i][sched_state.side] = 0;
14806 else if (last_scheduled_unit_distance[i][sched_state.side]
14807 < MAX_SCHED_MIX_DISTANCE)
14808 last_scheduled_unit_distance[i][sched_state.side]++;
14809 }
14810
14811 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14812 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0
14813 || (mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0
14814 || (mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0)
14815 {
14816 sched_state.group_state = 0;
14817 ends_group = true;
14818 }
14819 else
14820 {
14821 switch (sched_state.group_state)
14822 {
14823 case 0:
14824 sched_state.group_state++;
14825 break;
14826 case 1:
14827 sched_state.group_state++;
14828 if (sched_state.group_of_two)
14829 {
14830 sched_state.group_state = 0;
14831 ends_group = true;
14832 }
14833 break;
14834 case 2:
14835 sched_state.group_state++;
14836 ends_group = true;
14837 break;
14838 }
14839 }
14840
14841 if (verbose > 5)
14842 {
14843 unsigned int sched_mask;
14844
14845 sched_mask = s390_get_sched_attrmask (insn);
14846
14847 fprintf (file, ";;\t\tBACKEND: insn %d: ", INSN_UID (insn));
14848 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ", ((M) & sched_mask) ? #ATTR : "");
14849 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
14850 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
14851 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
14852 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
14853 #undef PRINT_SCHED_ATTR
14854
14855 if (s390_tune >= PROCESSOR_2964_Z13)
14856 {
14857 unsigned int unit_mask, m = 1;
14858 int units, j;
14859
14860 unit_mask = s390_get_unit_mask (insn, &units);
14861 fprintf (file, "(units:");
14862 for (j = 0; j < units; j++, m <<= 1)
14863 if (m & unit_mask)
14864 fprintf (file, " %d", j);
14865 fprintf (file, ")");
14866 }
14867 fprintf (file, " sched state: %d\n", sched_state.group_state);
14868
14869 if (s390_tune >= PROCESSOR_2964_Z13)
14870 {
14871 int units, j;
14872
14873 s390_get_unit_mask (insn, &units);
14874
14875 fprintf (file, ";;\t\tBACKEND: units on this side unused for: ");
14876 for (j = 0; j < units; j++)
14877 fprintf (file, "%d:%d ", j,
14878 last_scheduled_unit_distance[j][sched_state.side]);
14879 fprintf (file, "\n");
14880 }
14881 }
14882
14883 /* If this insn ended a group, the next will be on the other side. */
14884 if (ends_group)
14885 {
14886 sched_state.group_state = 0;
14887 sched_state.side = sched_state.side ? 0 : 1;
14888 sched_state.group_of_two = false;
14889 }
14890 }
14891
14892 if (GET_CODE (PATTERN (insn)) != USE
14893 && GET_CODE (PATTERN (insn)) != CLOBBER)
14894 return more - 1;
14895 else
14896 return more;
14897 }
14898
14899 static void
s390_sched_init(FILE * file ATTRIBUTE_UNUSED,int verbose ATTRIBUTE_UNUSED,int max_ready ATTRIBUTE_UNUSED)14900 s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
14901 int verbose ATTRIBUTE_UNUSED,
14902 int max_ready ATTRIBUTE_UNUSED)
14903 {
14904 /* If the next basic block is most likely entered via a fallthru edge
14905 we keep the last sched state. Otherwise we start a new group.
14906 The scheduler traverses basic blocks in "instruction stream" ordering
14907 so if we see a fallthru edge here, sched_state will be of its
14908 source block.
14909
14910 current_sched_info->prev_head is the insn before the first insn of the
14911 block of insns to be scheduled.
14912 */
14913 rtx_insn *insn = current_sched_info->prev_head
14914 ? NEXT_INSN (current_sched_info->prev_head) : NULL;
14915 basic_block bb = insn ? BLOCK_FOR_INSN (insn) : NULL;
14916 if (s390_tune < PROCESSOR_2964_Z13 || !s390_bb_fallthru_entry_likely (bb))
14917 {
14918 last_scheduled_insn = NULL;
14919 memset (last_scheduled_unit_distance, 0,
14920 MAX_SCHED_UNITS * NUM_SIDES * sizeof (int));
14921 sched_state.group_state = 0;
14922 sched_state.group_of_two = false;
14923 }
14924 }
14925
14926 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
14927 a new number struct loop *loop should be unrolled if tuned for cpus with
14928 a built-in stride prefetcher.
14929 The loop is analyzed for memory accesses by calling check_dpu for
14930 each rtx of the loop. Depending on the loop_depth and the amount of
14931 memory accesses a new number <=nunroll is returned to improve the
14932 behavior of the hardware prefetch unit. */
14933 static unsigned
s390_loop_unroll_adjust(unsigned nunroll,struct loop * loop)14934 s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
14935 {
14936 basic_block *bbs;
14937 rtx_insn *insn;
14938 unsigned i;
14939 unsigned mem_count = 0;
14940
14941 if (s390_tune < PROCESSOR_2097_Z10)
14942 return nunroll;
14943
14944 /* Count the number of memory references within the loop body. */
14945 bbs = get_loop_body (loop);
14946 subrtx_iterator::array_type array;
14947 for (i = 0; i < loop->num_nodes; i++)
14948 FOR_BB_INSNS (bbs[i], insn)
14949 if (INSN_P (insn) && INSN_CODE (insn) != -1)
14950 {
14951 rtx set;
14952
14953 /* The runtime of small loops with memory block operations
14954 will be determined by the memory operation. Doing
14955 unrolling doesn't help here. Measurements to confirm
14956 this where only done on recent CPU levels. So better do
14957 not change anything for older CPUs. */
14958 if (s390_tune >= PROCESSOR_2964_Z13
14959 && loop->ninsns <= BLOCK_MEM_OPS_LOOP_INSNS
14960 && ((set = single_set (insn)) != NULL_RTX)
14961 && ((GET_MODE (SET_DEST (set)) == BLKmode
14962 && (GET_MODE (SET_SRC (set)) == BLKmode
14963 || SET_SRC (set) == const0_rtx))
14964 || (GET_CODE (SET_SRC (set)) == COMPARE
14965 && GET_MODE (XEXP (SET_SRC (set), 0)) == BLKmode
14966 && GET_MODE (XEXP (SET_SRC (set), 1)) == BLKmode)))
14967 return 1;
14968
14969 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
14970 if (MEM_P (*iter))
14971 mem_count += 1;
14972 }
14973 free (bbs);
14974
14975 /* Prevent division by zero, and we do not need to adjust nunroll in this case. */
14976 if (mem_count == 0)
14977 return nunroll;
14978
14979 switch (loop_depth(loop))
14980 {
14981 case 1:
14982 return MIN (nunroll, 28 / mem_count);
14983 case 2:
14984 return MIN (nunroll, 22 / mem_count);
14985 default:
14986 return MIN (nunroll, 16 / mem_count);
14987 }
14988 }
14989
14990 /* Restore the current options. This is a hook function and also called
14991 internally. */
14992
14993 static void
s390_function_specific_restore(struct gcc_options * opts,struct cl_target_option * ptr ATTRIBUTE_UNUSED)14994 s390_function_specific_restore (struct gcc_options *opts,
14995 struct cl_target_option *ptr ATTRIBUTE_UNUSED)
14996 {
14997 opts->x_s390_cost_pointer = (long)processor_table[opts->x_s390_tune].cost;
14998 }
14999
15000 static void
s390_default_align(struct gcc_options * opts)15001 s390_default_align (struct gcc_options *opts)
15002 {
15003 /* Set the default function alignment to 16 in order to get rid of
15004 some unwanted performance effects. */
15005 if (opts->x_flag_align_functions && !opts->x_str_align_functions
15006 && opts->x_s390_tune >= PROCESSOR_2964_Z13)
15007 opts->x_str_align_functions = "16";
15008 }
15009
15010 static void
s390_override_options_after_change(void)15011 s390_override_options_after_change (void)
15012 {
15013 s390_default_align (&global_options);
15014 }
15015
15016 static void
s390_option_override_internal(struct gcc_options * opts,const struct gcc_options * opts_set)15017 s390_option_override_internal (struct gcc_options *opts,
15018 const struct gcc_options *opts_set)
15019 {
15020 /* Architecture mode defaults according to ABI. */
15021 if (!(opts_set->x_target_flags & MASK_ZARCH))
15022 {
15023 if (TARGET_64BIT)
15024 opts->x_target_flags |= MASK_ZARCH;
15025 else
15026 opts->x_target_flags &= ~MASK_ZARCH;
15027 }
15028
15029 /* Set the march default in case it hasn't been specified on cmdline. */
15030 if (!opts_set->x_s390_arch)
15031 opts->x_s390_arch = PROCESSOR_2064_Z900;
15032
15033 opts->x_s390_arch_flags = processor_flags_table[(int) opts->x_s390_arch];
15034
15035 /* Determine processor to tune for. */
15036 if (!opts_set->x_s390_tune)
15037 opts->x_s390_tune = opts->x_s390_arch;
15038
15039 opts->x_s390_tune_flags = processor_flags_table[opts->x_s390_tune];
15040
15041 /* Sanity checks. */
15042 if (opts->x_s390_arch == PROCESSOR_NATIVE
15043 || opts->x_s390_tune == PROCESSOR_NATIVE)
15044 gcc_unreachable ();
15045 if (TARGET_64BIT && !TARGET_ZARCH_P (opts->x_target_flags))
15046 error ("64-bit ABI not supported in ESA/390 mode");
15047
15048 if (opts->x_s390_indirect_branch == indirect_branch_thunk_inline
15049 || opts->x_s390_indirect_branch_call == indirect_branch_thunk_inline
15050 || opts->x_s390_function_return == indirect_branch_thunk_inline
15051 || opts->x_s390_function_return_reg == indirect_branch_thunk_inline
15052 || opts->x_s390_function_return_mem == indirect_branch_thunk_inline)
15053 error ("thunk-inline is only supported with %<-mindirect-branch-jump%>");
15054
15055 if (opts->x_s390_indirect_branch != indirect_branch_keep)
15056 {
15057 if (!opts_set->x_s390_indirect_branch_call)
15058 opts->x_s390_indirect_branch_call = opts->x_s390_indirect_branch;
15059
15060 if (!opts_set->x_s390_indirect_branch_jump)
15061 opts->x_s390_indirect_branch_jump = opts->x_s390_indirect_branch;
15062 }
15063
15064 if (opts->x_s390_function_return != indirect_branch_keep)
15065 {
15066 if (!opts_set->x_s390_function_return_reg)
15067 opts->x_s390_function_return_reg = opts->x_s390_function_return;
15068
15069 if (!opts_set->x_s390_function_return_mem)
15070 opts->x_s390_function_return_mem = opts->x_s390_function_return;
15071 }
15072
15073 /* Enable hardware transactions if available and not explicitly
15074 disabled by user. E.g. with -m31 -march=zEC12 -mzarch */
15075 if (!TARGET_OPT_HTM_P (opts_set->x_target_flags))
15076 {
15077 if (TARGET_CPU_HTM_P (opts) && TARGET_ZARCH_P (opts->x_target_flags))
15078 opts->x_target_flags |= MASK_OPT_HTM;
15079 else
15080 opts->x_target_flags &= ~MASK_OPT_HTM;
15081 }
15082
15083 if (TARGET_OPT_VX_P (opts_set->x_target_flags))
15084 {
15085 if (TARGET_OPT_VX_P (opts->x_target_flags))
15086 {
15087 if (!TARGET_CPU_VX_P (opts))
15088 error ("hardware vector support not available on %s",
15089 processor_table[(int)opts->x_s390_arch].name);
15090 if (TARGET_SOFT_FLOAT_P (opts->x_target_flags))
15091 error ("hardware vector support not available with "
15092 "%<-msoft-float%>");
15093 }
15094 }
15095 else
15096 {
15097 if (TARGET_CPU_VX_P (opts))
15098 /* Enable vector support if available and not explicitly disabled
15099 by user. E.g. with -m31 -march=z13 -mzarch */
15100 opts->x_target_flags |= MASK_OPT_VX;
15101 else
15102 opts->x_target_flags &= ~MASK_OPT_VX;
15103 }
15104
15105 /* Use hardware DFP if available and not explicitly disabled by
15106 user. E.g. with -m31 -march=z10 -mzarch */
15107 if (!TARGET_HARD_DFP_P (opts_set->x_target_flags))
15108 {
15109 if (TARGET_DFP_P (opts))
15110 opts->x_target_flags |= MASK_HARD_DFP;
15111 else
15112 opts->x_target_flags &= ~MASK_HARD_DFP;
15113 }
15114
15115 if (TARGET_HARD_DFP_P (opts->x_target_flags) && !TARGET_DFP_P (opts))
15116 {
15117 if (TARGET_HARD_DFP_P (opts_set->x_target_flags))
15118 {
15119 if (!TARGET_CPU_DFP_P (opts))
15120 error ("hardware decimal floating point instructions"
15121 " not available on %s",
15122 processor_table[(int)opts->x_s390_arch].name);
15123 if (!TARGET_ZARCH_P (opts->x_target_flags))
15124 error ("hardware decimal floating point instructions"
15125 " not available in ESA/390 mode");
15126 }
15127 else
15128 opts->x_target_flags &= ~MASK_HARD_DFP;
15129 }
15130
15131 if (TARGET_SOFT_FLOAT_P (opts_set->x_target_flags)
15132 && TARGET_SOFT_FLOAT_P (opts->x_target_flags))
15133 {
15134 if (TARGET_HARD_DFP_P (opts_set->x_target_flags)
15135 && TARGET_HARD_DFP_P (opts->x_target_flags))
15136 error ("%<-mhard-dfp%> can%'t be used in conjunction with "
15137 "%<-msoft-float%>");
15138
15139 opts->x_target_flags &= ~MASK_HARD_DFP;
15140 }
15141
15142 if (TARGET_BACKCHAIN_P (opts->x_target_flags)
15143 && TARGET_PACKED_STACK_P (opts->x_target_flags)
15144 && TARGET_HARD_FLOAT_P (opts->x_target_flags))
15145 error ("%<-mbackchain%> %<-mpacked-stack%> %<-mhard-float%> are not "
15146 "supported in combination");
15147
15148 if (opts->x_s390_stack_size)
15149 {
15150 if (opts->x_s390_stack_guard >= opts->x_s390_stack_size)
15151 error ("stack size must be greater than the stack guard value");
15152 else if (opts->x_s390_stack_size > 1 << 16)
15153 error ("stack size must not be greater than 64k");
15154 }
15155 else if (opts->x_s390_stack_guard)
15156 error ("%<-mstack-guard%> implies use of %<-mstack-size%>");
15157
15158 /* Our implementation of the stack probe requires the probe interval
15159 to be used as displacement in an address operand. The maximum
15160 probe interval currently is 64k. This would exceed short
15161 displacements. Trim that value down to 4k if that happens. This
15162 might result in too many probes being generated only on the
15163 oldest supported machine level z900. */
15164 if (!DISP_IN_RANGE ((1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL))))
15165 set_param_value ("stack-clash-protection-probe-interval", 12,
15166 opts->x_param_values,
15167 opts_set->x_param_values);
15168
15169 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
15170 if (!TARGET_LONG_DOUBLE_128_P (opts_set->x_target_flags))
15171 opts->x_target_flags |= MASK_LONG_DOUBLE_128;
15172 #endif
15173
15174 if (opts->x_s390_tune >= PROCESSOR_2097_Z10)
15175 {
15176 maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 100,
15177 opts->x_param_values,
15178 opts_set->x_param_values);
15179 maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 32,
15180 opts->x_param_values,
15181 opts_set->x_param_values);
15182 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 2000,
15183 opts->x_param_values,
15184 opts_set->x_param_values);
15185 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 64,
15186 opts->x_param_values,
15187 opts_set->x_param_values);
15188 }
15189
15190 maybe_set_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 256,
15191 opts->x_param_values,
15192 opts_set->x_param_values);
15193 /* values for loop prefetching */
15194 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, 256,
15195 opts->x_param_values,
15196 opts_set->x_param_values);
15197 maybe_set_param_value (PARAM_L1_CACHE_SIZE, 128,
15198 opts->x_param_values,
15199 opts_set->x_param_values);
15200 /* s390 has more than 2 levels and the size is much larger. Since
15201 we are always running virtualized assume that we only get a small
15202 part of the caches above l1. */
15203 maybe_set_param_value (PARAM_L2_CACHE_SIZE, 1500,
15204 opts->x_param_values,
15205 opts_set->x_param_values);
15206 maybe_set_param_value (PARAM_PREFETCH_MIN_INSN_TO_MEM_RATIO, 2,
15207 opts->x_param_values,
15208 opts_set->x_param_values);
15209 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 6,
15210 opts->x_param_values,
15211 opts_set->x_param_values);
15212
15213 /* Use the alternative scheduling-pressure algorithm by default. */
15214 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
15215 opts->x_param_values,
15216 opts_set->x_param_values);
15217
15218 maybe_set_param_value (PARAM_MIN_VECT_LOOP_BOUND, 2,
15219 opts->x_param_values,
15220 opts_set->x_param_values);
15221
15222 /* Use aggressive inlining parameters. */
15223 if (opts->x_s390_tune >= PROCESSOR_2964_Z13)
15224 {
15225 maybe_set_param_value (PARAM_INLINE_MIN_SPEEDUP, 2,
15226 opts->x_param_values,
15227 opts_set->x_param_values);
15228
15229 maybe_set_param_value (PARAM_MAX_INLINE_INSNS_AUTO, 80,
15230 opts->x_param_values,
15231 opts_set->x_param_values);
15232 }
15233
15234 /* Set the default alignment. */
15235 s390_default_align (opts);
15236
15237 /* Call target specific restore function to do post-init work. At the moment,
15238 this just sets opts->x_s390_cost_pointer. */
15239 s390_function_specific_restore (opts, NULL);
15240
15241 /* Check whether -mfentry is supported. It cannot be used in 31-bit mode,
15242 because 31-bit PLT stubs assume that %r12 contains GOT address, which is
15243 not the case when the code runs before the prolog. */
15244 if (opts->x_flag_fentry && !TARGET_64BIT)
15245 error ("%<-mfentry%> is supported only for 64-bit CPUs");
15246 }
15247
15248 static void
s390_option_override(void)15249 s390_option_override (void)
15250 {
15251 unsigned int i;
15252 cl_deferred_option *opt;
15253 vec<cl_deferred_option> *v =
15254 (vec<cl_deferred_option> *) s390_deferred_options;
15255
15256 if (v)
15257 FOR_EACH_VEC_ELT (*v, i, opt)
15258 {
15259 switch (opt->opt_index)
15260 {
15261 case OPT_mhotpatch_:
15262 {
15263 int val1;
15264 int val2;
15265 char *s = strtok (ASTRDUP (opt->arg), ",");
15266 char *t = strtok (NULL, "\0");
15267
15268 if (t != NULL)
15269 {
15270 val1 = integral_argument (s);
15271 val2 = integral_argument (t);
15272 }
15273 else
15274 {
15275 val1 = -1;
15276 val2 = -1;
15277 }
15278 if (val1 == -1 || val2 == -1)
15279 {
15280 /* argument is not a plain number */
15281 error ("arguments to %qs should be non-negative integers",
15282 "-mhotpatch=n,m");
15283 break;
15284 }
15285 else if (val1 > s390_hotpatch_hw_max
15286 || val2 > s390_hotpatch_hw_max)
15287 {
15288 error ("argument to %qs is too large (max. %d)",
15289 "-mhotpatch=n,m", s390_hotpatch_hw_max);
15290 break;
15291 }
15292 s390_hotpatch_hw_before_label = val1;
15293 s390_hotpatch_hw_after_label = val2;
15294 break;
15295 }
15296 default:
15297 gcc_unreachable ();
15298 }
15299 }
15300
15301 /* Set up function hooks. */
15302 init_machine_status = s390_init_machine_status;
15303
15304 s390_option_override_internal (&global_options, &global_options_set);
15305
15306 /* Save the initial options in case the user does function specific
15307 options. */
15308 target_option_default_node = build_target_option_node (&global_options);
15309 target_option_current_node = target_option_default_node;
15310
15311 /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
15312 requires the arch flags to be evaluated already. Since prefetching
15313 is beneficial on s390, we enable it if available. */
15314 if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3)
15315 flag_prefetch_loop_arrays = 1;
15316
15317 if (!s390_pic_data_is_text_relative && !flag_pic)
15318 error ("%<-mno-pic-data-is-text-relative%> cannot be used without "
15319 "%<-fpic%>/%<-fPIC%>");
15320
15321 if (TARGET_TPF)
15322 {
15323 /* Don't emit DWARF3/4 unless specifically selected. The TPF
15324 debuggers do not yet support DWARF 3/4. */
15325 if (!global_options_set.x_dwarf_strict)
15326 dwarf_strict = 1;
15327 if (!global_options_set.x_dwarf_version)
15328 dwarf_version = 2;
15329 }
15330 }
15331
15332 #if S390_USE_TARGET_ATTRIBUTE
15333 /* Inner function to process the attribute((target(...))), take an argument and
15334 set the current options from the argument. If we have a list, recursively go
15335 over the list. */
15336
15337 static bool
s390_valid_target_attribute_inner_p(tree args,struct gcc_options * opts,struct gcc_options * new_opts_set,bool force_pragma)15338 s390_valid_target_attribute_inner_p (tree args,
15339 struct gcc_options *opts,
15340 struct gcc_options *new_opts_set,
15341 bool force_pragma)
15342 {
15343 char *next_optstr;
15344 bool ret = true;
15345
15346 #define S390_ATTRIB(S,O,A) { S, sizeof (S)-1, O, A, 0 }
15347 #define S390_PRAGMA(S,O,A) { S, sizeof (S)-1, O, A, 1 }
15348 static const struct
15349 {
15350 const char *string;
15351 size_t len;
15352 int opt;
15353 int has_arg;
15354 int only_as_pragma;
15355 } attrs[] = {
15356 /* enum options */
15357 S390_ATTRIB ("arch=", OPT_march_, 1),
15358 S390_ATTRIB ("tune=", OPT_mtune_, 1),
15359 /* uinteger options */
15360 S390_ATTRIB ("stack-guard=", OPT_mstack_guard_, 1),
15361 S390_ATTRIB ("stack-size=", OPT_mstack_size_, 1),
15362 S390_ATTRIB ("branch-cost=", OPT_mbranch_cost_, 1),
15363 S390_ATTRIB ("warn-framesize=", OPT_mwarn_framesize_, 1),
15364 /* flag options */
15365 S390_ATTRIB ("backchain", OPT_mbackchain, 0),
15366 S390_ATTRIB ("hard-dfp", OPT_mhard_dfp, 0),
15367 S390_ATTRIB ("hard-float", OPT_mhard_float, 0),
15368 S390_ATTRIB ("htm", OPT_mhtm, 0),
15369 S390_ATTRIB ("vx", OPT_mvx, 0),
15370 S390_ATTRIB ("packed-stack", OPT_mpacked_stack, 0),
15371 S390_ATTRIB ("small-exec", OPT_msmall_exec, 0),
15372 S390_ATTRIB ("soft-float", OPT_msoft_float, 0),
15373 S390_ATTRIB ("mvcle", OPT_mmvcle, 0),
15374 S390_PRAGMA ("zvector", OPT_mzvector, 0),
15375 /* boolean options */
15376 S390_ATTRIB ("warn-dynamicstack", OPT_mwarn_dynamicstack, 0),
15377 };
15378 #undef S390_ATTRIB
15379 #undef S390_PRAGMA
15380
15381 /* If this is a list, recurse to get the options. */
15382 if (TREE_CODE (args) == TREE_LIST)
15383 {
15384 bool ret = true;
15385 int num_pragma_values;
15386 int i;
15387
15388 /* Note: attribs.c:decl_attributes prepends the values from
15389 current_target_pragma to the list of target attributes. To determine
15390 whether we're looking at a value of the attribute or the pragma we
15391 assume that the first [list_length (current_target_pragma)] values in
15392 the list are the values from the pragma. */
15393 num_pragma_values = (!force_pragma && current_target_pragma != NULL)
15394 ? list_length (current_target_pragma) : 0;
15395 for (i = 0; args; args = TREE_CHAIN (args), i++)
15396 {
15397 bool is_pragma;
15398
15399 is_pragma = (force_pragma || i < num_pragma_values);
15400 if (TREE_VALUE (args)
15401 && !s390_valid_target_attribute_inner_p (TREE_VALUE (args),
15402 opts, new_opts_set,
15403 is_pragma))
15404 {
15405 ret = false;
15406 }
15407 }
15408 return ret;
15409 }
15410
15411 else if (TREE_CODE (args) != STRING_CST)
15412 {
15413 error ("attribute %<target%> argument not a string");
15414 return false;
15415 }
15416
15417 /* Handle multiple arguments separated by commas. */
15418 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
15419
15420 while (next_optstr && *next_optstr != '\0')
15421 {
15422 char *p = next_optstr;
15423 char *orig_p = p;
15424 char *comma = strchr (next_optstr, ',');
15425 size_t len, opt_len;
15426 int opt;
15427 bool opt_set_p;
15428 char ch;
15429 unsigned i;
15430 int mask = 0;
15431 enum cl_var_type var_type;
15432 bool found;
15433
15434 if (comma)
15435 {
15436 *comma = '\0';
15437 len = comma - next_optstr;
15438 next_optstr = comma + 1;
15439 }
15440 else
15441 {
15442 len = strlen (p);
15443 next_optstr = NULL;
15444 }
15445
15446 /* Recognize no-xxx. */
15447 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
15448 {
15449 opt_set_p = false;
15450 p += 3;
15451 len -= 3;
15452 }
15453 else
15454 opt_set_p = true;
15455
15456 /* Find the option. */
15457 ch = *p;
15458 found = false;
15459 for (i = 0; i < ARRAY_SIZE (attrs); i++)
15460 {
15461 opt_len = attrs[i].len;
15462 if (ch == attrs[i].string[0]
15463 && ((attrs[i].has_arg) ? len > opt_len : len == opt_len)
15464 && memcmp (p, attrs[i].string, opt_len) == 0)
15465 {
15466 opt = attrs[i].opt;
15467 if (!opt_set_p && cl_options[opt].cl_reject_negative)
15468 continue;
15469 mask = cl_options[opt].var_value;
15470 var_type = cl_options[opt].var_type;
15471 found = true;
15472 break;
15473 }
15474 }
15475
15476 /* Process the option. */
15477 if (!found)
15478 {
15479 error ("attribute(target(\"%s\")) is unknown", orig_p);
15480 return false;
15481 }
15482 else if (attrs[i].only_as_pragma && !force_pragma)
15483 {
15484 /* Value is not allowed for the target attribute. */
15485 error ("value %qs is not supported by attribute %<target%>",
15486 attrs[i].string);
15487 return false;
15488 }
15489
15490 else if (var_type == CLVC_BIT_SET || var_type == CLVC_BIT_CLEAR)
15491 {
15492 if (var_type == CLVC_BIT_CLEAR)
15493 opt_set_p = !opt_set_p;
15494
15495 if (opt_set_p)
15496 opts->x_target_flags |= mask;
15497 else
15498 opts->x_target_flags &= ~mask;
15499 new_opts_set->x_target_flags |= mask;
15500 }
15501
15502 else if (cl_options[opt].var_type == CLVC_BOOLEAN)
15503 {
15504 int value;
15505
15506 if (cl_options[opt].cl_uinteger)
15507 {
15508 /* Unsigned integer argument. Code based on the function
15509 decode_cmdline_option () in opts-common.c. */
15510 value = integral_argument (p + opt_len);
15511 }
15512 else
15513 value = (opt_set_p) ? 1 : 0;
15514
15515 if (value != -1)
15516 {
15517 struct cl_decoded_option decoded;
15518
15519 /* Value range check; only implemented for numeric and boolean
15520 options at the moment. */
15521 generate_option (opt, NULL, value, CL_TARGET, &decoded);
15522 s390_handle_option (opts, new_opts_set, &decoded, input_location);
15523 set_option (opts, new_opts_set, opt, value,
15524 p + opt_len, DK_UNSPECIFIED, input_location,
15525 global_dc);
15526 }
15527 else
15528 {
15529 error ("attribute(target(\"%s\")) is unknown", orig_p);
15530 ret = false;
15531 }
15532 }
15533
15534 else if (cl_options[opt].var_type == CLVC_ENUM)
15535 {
15536 bool arg_ok;
15537 int value;
15538
15539 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
15540 if (arg_ok)
15541 set_option (opts, new_opts_set, opt, value,
15542 p + opt_len, DK_UNSPECIFIED, input_location,
15543 global_dc);
15544 else
15545 {
15546 error ("attribute(target(\"%s\")) is unknown", orig_p);
15547 ret = false;
15548 }
15549 }
15550
15551 else
15552 gcc_unreachable ();
15553 }
15554 return ret;
15555 }
15556
15557 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
15558
15559 tree
s390_valid_target_attribute_tree(tree args,struct gcc_options * opts,const struct gcc_options * opts_set,bool force_pragma)15560 s390_valid_target_attribute_tree (tree args,
15561 struct gcc_options *opts,
15562 const struct gcc_options *opts_set,
15563 bool force_pragma)
15564 {
15565 tree t = NULL_TREE;
15566 struct gcc_options new_opts_set;
15567
15568 memset (&new_opts_set, 0, sizeof (new_opts_set));
15569
15570 /* Process each of the options on the chain. */
15571 if (! s390_valid_target_attribute_inner_p (args, opts, &new_opts_set,
15572 force_pragma))
15573 return error_mark_node;
15574
15575 /* If some option was set (even if it has not changed), rerun
15576 s390_option_override_internal, and then save the options away. */
15577 if (new_opts_set.x_target_flags
15578 || new_opts_set.x_s390_arch
15579 || new_opts_set.x_s390_tune
15580 || new_opts_set.x_s390_stack_guard
15581 || new_opts_set.x_s390_stack_size
15582 || new_opts_set.x_s390_branch_cost
15583 || new_opts_set.x_s390_warn_framesize
15584 || new_opts_set.x_s390_warn_dynamicstack_p)
15585 {
15586 const unsigned char *src = (const unsigned char *)opts_set;
15587 unsigned char *dest = (unsigned char *)&new_opts_set;
15588 unsigned int i;
15589
15590 /* Merge the original option flags into the new ones. */
15591 for (i = 0; i < sizeof(*opts_set); i++)
15592 dest[i] |= src[i];
15593
15594 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
15595 s390_option_override_internal (opts, &new_opts_set);
15596 /* Save the current options unless we are validating options for
15597 #pragma. */
15598 t = build_target_option_node (opts);
15599 }
15600 return t;
15601 }
15602
15603 /* Hook to validate attribute((target("string"))). */
15604
15605 static bool
s390_valid_target_attribute_p(tree fndecl,tree ARG_UNUSED (name),tree args,int ARG_UNUSED (flags))15606 s390_valid_target_attribute_p (tree fndecl,
15607 tree ARG_UNUSED (name),
15608 tree args,
15609 int ARG_UNUSED (flags))
15610 {
15611 struct gcc_options func_options;
15612 tree new_target, new_optimize;
15613 bool ret = true;
15614
15615 /* attribute((target("default"))) does nothing, beyond
15616 affecting multi-versioning. */
15617 if (TREE_VALUE (args)
15618 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
15619 && TREE_CHAIN (args) == NULL_TREE
15620 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
15621 return true;
15622
15623 tree old_optimize = build_optimization_node (&global_options);
15624
15625 /* Get the optimization options of the current function. */
15626 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
15627
15628 if (!func_optimize)
15629 func_optimize = old_optimize;
15630
15631 /* Init func_options. */
15632 memset (&func_options, 0, sizeof (func_options));
15633 init_options_struct (&func_options, NULL);
15634 lang_hooks.init_options_struct (&func_options);
15635
15636 cl_optimization_restore (&func_options, TREE_OPTIMIZATION (func_optimize));
15637
15638 /* Initialize func_options to the default before its target options can
15639 be set. */
15640 cl_target_option_restore (&func_options,
15641 TREE_TARGET_OPTION (target_option_default_node));
15642
15643 new_target = s390_valid_target_attribute_tree (args, &func_options,
15644 &global_options_set,
15645 (args ==
15646 current_target_pragma));
15647 new_optimize = build_optimization_node (&func_options);
15648 if (new_target == error_mark_node)
15649 ret = false;
15650 else if (fndecl && new_target)
15651 {
15652 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
15653 if (old_optimize != new_optimize)
15654 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
15655 }
15656 return ret;
15657 }
15658
15659 /* Hook to determine if one function can safely inline another. */
15660
15661 static bool
s390_can_inline_p(tree caller,tree callee)15662 s390_can_inline_p (tree caller, tree callee)
15663 {
15664 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
15665 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
15666
15667 if (!callee_tree)
15668 callee_tree = target_option_default_node;
15669 if (!caller_tree)
15670 caller_tree = target_option_default_node;
15671 if (callee_tree == caller_tree)
15672 return true;
15673
15674 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
15675 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
15676 bool ret = true;
15677
15678 if ((caller_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP))
15679 != (callee_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP)))
15680 ret = false;
15681
15682 /* Don't inline functions to be compiled for a more recent arch into a
15683 function for an older arch. */
15684 else if (caller_opts->x_s390_arch < callee_opts->x_s390_arch)
15685 ret = false;
15686
15687 /* Inlining a hard float function into a soft float function is only
15688 allowed if the hard float function doesn't actually make use of
15689 floating point.
15690
15691 We are called from FEs for multi-versioning call optimization, so
15692 beware of ipa_fn_summaries not available. */
15693 else if (((TARGET_SOFT_FLOAT_P (caller_opts->x_target_flags)
15694 && !TARGET_SOFT_FLOAT_P (callee_opts->x_target_flags))
15695 || (!TARGET_HARD_DFP_P (caller_opts->x_target_flags)
15696 && TARGET_HARD_DFP_P (callee_opts->x_target_flags)))
15697 && (! ipa_fn_summaries
15698 || ipa_fn_summaries->get
15699 (cgraph_node::get (callee))->fp_expressions))
15700 ret = false;
15701
15702 return ret;
15703 }
15704 #endif
15705
15706 /* Set VAL to correct enum value according to the indirect-branch or
15707 function-return attribute in ATTR. */
15708
15709 static inline void
s390_indirect_branch_attrvalue(tree attr,enum indirect_branch * val)15710 s390_indirect_branch_attrvalue (tree attr, enum indirect_branch *val)
15711 {
15712 const char *str = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
15713 if (strcmp (str, "keep") == 0)
15714 *val = indirect_branch_keep;
15715 else if (strcmp (str, "thunk") == 0)
15716 *val = indirect_branch_thunk;
15717 else if (strcmp (str, "thunk-inline") == 0)
15718 *val = indirect_branch_thunk_inline;
15719 else if (strcmp (str, "thunk-extern") == 0)
15720 *val = indirect_branch_thunk_extern;
15721 }
15722
15723 /* Memorize the setting for -mindirect-branch* and -mfunction-return*
15724 from either the cmdline or the function attributes in
15725 cfun->machine. */
15726
15727 static void
s390_indirect_branch_settings(tree fndecl)15728 s390_indirect_branch_settings (tree fndecl)
15729 {
15730 tree attr;
15731
15732 if (!fndecl)
15733 return;
15734
15735 /* Initialize with the cmdline options and let the attributes
15736 override it. */
15737 cfun->machine->indirect_branch_jump = s390_indirect_branch_jump;
15738 cfun->machine->indirect_branch_call = s390_indirect_branch_call;
15739
15740 cfun->machine->function_return_reg = s390_function_return_reg;
15741 cfun->machine->function_return_mem = s390_function_return_mem;
15742
15743 if ((attr = lookup_attribute ("indirect_branch",
15744 DECL_ATTRIBUTES (fndecl))))
15745 {
15746 s390_indirect_branch_attrvalue (attr,
15747 &cfun->machine->indirect_branch_jump);
15748 s390_indirect_branch_attrvalue (attr,
15749 &cfun->machine->indirect_branch_call);
15750 }
15751
15752 if ((attr = lookup_attribute ("indirect_branch_jump",
15753 DECL_ATTRIBUTES (fndecl))))
15754 s390_indirect_branch_attrvalue (attr, &cfun->machine->indirect_branch_jump);
15755
15756 if ((attr = lookup_attribute ("indirect_branch_call",
15757 DECL_ATTRIBUTES (fndecl))))
15758 s390_indirect_branch_attrvalue (attr, &cfun->machine->indirect_branch_call);
15759
15760 if ((attr = lookup_attribute ("function_return",
15761 DECL_ATTRIBUTES (fndecl))))
15762 {
15763 s390_indirect_branch_attrvalue (attr,
15764 &cfun->machine->function_return_reg);
15765 s390_indirect_branch_attrvalue (attr,
15766 &cfun->machine->function_return_mem);
15767 }
15768
15769 if ((attr = lookup_attribute ("function_return_reg",
15770 DECL_ATTRIBUTES (fndecl))))
15771 s390_indirect_branch_attrvalue (attr, &cfun->machine->function_return_reg);
15772
15773 if ((attr = lookup_attribute ("function_return_mem",
15774 DECL_ATTRIBUTES (fndecl))))
15775 s390_indirect_branch_attrvalue (attr, &cfun->machine->function_return_mem);
15776 }
15777
15778 #if S390_USE_TARGET_ATTRIBUTE
15779 /* Restore targets globals from NEW_TREE and invalidate s390_previous_fndecl
15780 cache. */
15781
15782 void
s390_activate_target_options(tree new_tree)15783 s390_activate_target_options (tree new_tree)
15784 {
15785 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
15786 if (TREE_TARGET_GLOBALS (new_tree))
15787 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
15788 else if (new_tree == target_option_default_node)
15789 restore_target_globals (&default_target_globals);
15790 else
15791 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
15792 s390_previous_fndecl = NULL_TREE;
15793 }
15794 #endif
15795
15796 /* Establish appropriate back-end context for processing the function
15797 FNDECL. The argument might be NULL to indicate processing at top
15798 level, outside of any function scope. */
15799 static void
s390_set_current_function(tree fndecl)15800 s390_set_current_function (tree fndecl)
15801 {
15802 #if S390_USE_TARGET_ATTRIBUTE
15803 /* Only change the context if the function changes. This hook is called
15804 several times in the course of compiling a function, and we don't want to
15805 slow things down too much or call target_reinit when it isn't safe. */
15806 if (fndecl == s390_previous_fndecl)
15807 {
15808 s390_indirect_branch_settings (fndecl);
15809 return;
15810 }
15811
15812 tree old_tree;
15813 if (s390_previous_fndecl == NULL_TREE)
15814 old_tree = target_option_current_node;
15815 else if (DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl))
15816 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl);
15817 else
15818 old_tree = target_option_default_node;
15819
15820 if (fndecl == NULL_TREE)
15821 {
15822 if (old_tree != target_option_current_node)
15823 s390_activate_target_options (target_option_current_node);
15824 return;
15825 }
15826
15827 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
15828 if (new_tree == NULL_TREE)
15829 new_tree = target_option_default_node;
15830
15831 if (old_tree != new_tree)
15832 s390_activate_target_options (new_tree);
15833 s390_previous_fndecl = fndecl;
15834 #endif
15835 s390_indirect_branch_settings (fndecl);
15836 }
15837
15838 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
15839
15840 static bool
s390_use_by_pieces_infrastructure_p(unsigned HOST_WIDE_INT size,unsigned int align ATTRIBUTE_UNUSED,enum by_pieces_operation op ATTRIBUTE_UNUSED,bool speed_p ATTRIBUTE_UNUSED)15841 s390_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
15842 unsigned int align ATTRIBUTE_UNUSED,
15843 enum by_pieces_operation op ATTRIBUTE_UNUSED,
15844 bool speed_p ATTRIBUTE_UNUSED)
15845 {
15846 return (size == 1 || size == 2
15847 || size == 4 || (TARGET_ZARCH && size == 8));
15848 }
15849
15850 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
15851
15852 static void
s390_atomic_assign_expand_fenv(tree * hold,tree * clear,tree * update)15853 s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
15854 {
15855 tree sfpc = s390_builtin_decls[S390_BUILTIN_s390_sfpc];
15856 tree efpc = s390_builtin_decls[S390_BUILTIN_s390_efpc];
15857 tree call_efpc = build_call_expr (efpc, 0);
15858 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
15859
15860 #define FPC_EXCEPTION_MASK HOST_WIDE_INT_UC (0xf8000000)
15861 #define FPC_FLAGS_MASK HOST_WIDE_INT_UC (0x00f80000)
15862 #define FPC_DXC_MASK HOST_WIDE_INT_UC (0x0000ff00)
15863 #define FPC_EXCEPTION_MASK_SHIFT HOST_WIDE_INT_UC (24)
15864 #define FPC_FLAGS_SHIFT HOST_WIDE_INT_UC (16)
15865 #define FPC_DXC_SHIFT HOST_WIDE_INT_UC (8)
15866
15867 /* Generates the equivalent of feholdexcept (&fenv_var)
15868
15869 fenv_var = __builtin_s390_efpc ();
15870 __builtin_s390_sfpc (fenv_var & mask) */
15871 tree old_fpc = build4 (TARGET_EXPR, unsigned_type_node, fenv_var, call_efpc,
15872 NULL_TREE, NULL_TREE);
15873 tree new_fpc
15874 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
15875 build_int_cst (unsigned_type_node,
15876 ~(FPC_DXC_MASK | FPC_FLAGS_MASK
15877 | FPC_EXCEPTION_MASK)));
15878 tree set_new_fpc = build_call_expr (sfpc, 1, new_fpc);
15879 *hold = build2 (COMPOUND_EXPR, void_type_node, old_fpc, set_new_fpc);
15880
15881 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT)
15882
15883 __builtin_s390_sfpc (__builtin_s390_efpc () & mask) */
15884 new_fpc = build2 (BIT_AND_EXPR, unsigned_type_node, call_efpc,
15885 build_int_cst (unsigned_type_node,
15886 ~(FPC_DXC_MASK | FPC_FLAGS_MASK)));
15887 *clear = build_call_expr (sfpc, 1, new_fpc);
15888
15889 /* Generates the equivalent of feupdateenv (fenv_var)
15890
15891 old_fpc = __builtin_s390_efpc ();
15892 __builtin_s390_sfpc (fenv_var);
15893 __atomic_feraiseexcept ((old_fpc & FPC_FLAGS_MASK) >> FPC_FLAGS_SHIFT); */
15894
15895 old_fpc = create_tmp_var_raw (unsigned_type_node);
15896 tree store_old_fpc = build4 (TARGET_EXPR, void_type_node, old_fpc, call_efpc,
15897 NULL_TREE, NULL_TREE);
15898
15899 set_new_fpc = build_call_expr (sfpc, 1, fenv_var);
15900
15901 tree raise_old_except = build2 (BIT_AND_EXPR, unsigned_type_node, old_fpc,
15902 build_int_cst (unsigned_type_node,
15903 FPC_FLAGS_MASK));
15904 raise_old_except = build2 (RSHIFT_EXPR, unsigned_type_node, raise_old_except,
15905 build_int_cst (unsigned_type_node,
15906 FPC_FLAGS_SHIFT));
15907 tree atomic_feraiseexcept
15908 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
15909 raise_old_except = build_call_expr (atomic_feraiseexcept,
15910 1, raise_old_except);
15911
15912 *update = build2 (COMPOUND_EXPR, void_type_node,
15913 build2 (COMPOUND_EXPR, void_type_node,
15914 store_old_fpc, set_new_fpc),
15915 raise_old_except);
15916
15917 #undef FPC_EXCEPTION_MASK
15918 #undef FPC_FLAGS_MASK
15919 #undef FPC_DXC_MASK
15920 #undef FPC_EXCEPTION_MASK_SHIFT
15921 #undef FPC_FLAGS_SHIFT
15922 #undef FPC_DXC_SHIFT
15923 }
15924
15925 /* Return the vector mode to be used for inner mode MODE when doing
15926 vectorization. */
15927 static machine_mode
s390_preferred_simd_mode(scalar_mode mode)15928 s390_preferred_simd_mode (scalar_mode mode)
15929 {
15930 if (TARGET_VXE)
15931 switch (mode)
15932 {
15933 case E_SFmode:
15934 return V4SFmode;
15935 default:;
15936 }
15937
15938 if (TARGET_VX)
15939 switch (mode)
15940 {
15941 case E_DFmode:
15942 return V2DFmode;
15943 case E_DImode:
15944 return V2DImode;
15945 case E_SImode:
15946 return V4SImode;
15947 case E_HImode:
15948 return V8HImode;
15949 case E_QImode:
15950 return V16QImode;
15951 default:;
15952 }
15953 return word_mode;
15954 }
15955
15956 /* Our hardware does not require vectors to be strictly aligned. */
15957 static bool
s390_support_vector_misalignment(machine_mode mode ATTRIBUTE_UNUSED,const_tree type ATTRIBUTE_UNUSED,int misalignment ATTRIBUTE_UNUSED,bool is_packed ATTRIBUTE_UNUSED)15958 s390_support_vector_misalignment (machine_mode mode ATTRIBUTE_UNUSED,
15959 const_tree type ATTRIBUTE_UNUSED,
15960 int misalignment ATTRIBUTE_UNUSED,
15961 bool is_packed ATTRIBUTE_UNUSED)
15962 {
15963 if (TARGET_VX)
15964 return true;
15965
15966 return default_builtin_support_vector_misalignment (mode, type, misalignment,
15967 is_packed);
15968 }
15969
15970 /* The vector ABI requires vector types to be aligned on an 8 byte
15971 boundary (our stack alignment). However, we allow this to be
15972 overriden by the user, while this definitely breaks the ABI. */
15973 static HOST_WIDE_INT
s390_vector_alignment(const_tree type)15974 s390_vector_alignment (const_tree type)
15975 {
15976 tree size = TYPE_SIZE (type);
15977
15978 if (!TARGET_VX_ABI)
15979 return default_vector_alignment (type);
15980
15981 if (TYPE_USER_ALIGN (type))
15982 return TYPE_ALIGN (type);
15983
15984 if (tree_fits_uhwi_p (size)
15985 && tree_to_uhwi (size) < BIGGEST_ALIGNMENT)
15986 return tree_to_uhwi (size);
15987
15988 return BIGGEST_ALIGNMENT;
15989 }
15990
15991 /* Implement TARGET_CONSTANT_ALIGNMENT. Alignment on even addresses for
15992 LARL instruction. */
15993
15994 static HOST_WIDE_INT
s390_constant_alignment(const_tree,HOST_WIDE_INT align)15995 s390_constant_alignment (const_tree, HOST_WIDE_INT align)
15996 {
15997 return MAX (align, 16);
15998 }
15999
16000 #ifdef HAVE_AS_MACHINE_MACHINEMODE
16001 /* Implement TARGET_ASM_FILE_START. */
16002 static void
s390_asm_file_start(void)16003 s390_asm_file_start (void)
16004 {
16005 default_file_start ();
16006 s390_asm_output_machine_for_arch (asm_out_file);
16007 }
16008 #endif
16009
16010 /* Implement TARGET_ASM_FILE_END. */
16011 static void
s390_asm_file_end(void)16012 s390_asm_file_end (void)
16013 {
16014 #ifdef HAVE_AS_GNU_ATTRIBUTE
16015 varpool_node *vnode;
16016 cgraph_node *cnode;
16017
16018 FOR_EACH_VARIABLE (vnode)
16019 if (TREE_PUBLIC (vnode->decl))
16020 s390_check_type_for_vector_abi (TREE_TYPE (vnode->decl), false, false);
16021
16022 FOR_EACH_FUNCTION (cnode)
16023 if (TREE_PUBLIC (cnode->decl))
16024 s390_check_type_for_vector_abi (TREE_TYPE (cnode->decl), false, false);
16025
16026
16027 if (s390_vector_abi != 0)
16028 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
16029 s390_vector_abi);
16030 #endif
16031 file_end_indicate_exec_stack ();
16032
16033 if (flag_split_stack)
16034 file_end_indicate_split_stack ();
16035 }
16036
16037 /* Return true if TYPE is a vector bool type. */
16038 static inline bool
s390_vector_bool_type_p(const_tree type)16039 s390_vector_bool_type_p (const_tree type)
16040 {
16041 return TYPE_VECTOR_OPAQUE (type);
16042 }
16043
16044 /* Return the diagnostic message string if the binary operation OP is
16045 not permitted on TYPE1 and TYPE2, NULL otherwise. */
16046 static const char*
s390_invalid_binary_op(int op ATTRIBUTE_UNUSED,const_tree type1,const_tree type2)16047 s390_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
16048 {
16049 bool bool1_p, bool2_p;
16050 bool plusminus_p;
16051 bool muldiv_p;
16052 bool compare_p;
16053 machine_mode mode1, mode2;
16054
16055 if (!TARGET_ZVECTOR)
16056 return NULL;
16057
16058 if (!VECTOR_TYPE_P (type1) || !VECTOR_TYPE_P (type2))
16059 return NULL;
16060
16061 bool1_p = s390_vector_bool_type_p (type1);
16062 bool2_p = s390_vector_bool_type_p (type2);
16063
16064 /* Mixing signed and unsigned types is forbidden for all
16065 operators. */
16066 if (!bool1_p && !bool2_p
16067 && TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
16068 return N_("types differ in signedness");
16069
16070 plusminus_p = (op == PLUS_EXPR || op == MINUS_EXPR);
16071 muldiv_p = (op == MULT_EXPR || op == RDIV_EXPR || op == TRUNC_DIV_EXPR
16072 || op == CEIL_DIV_EXPR || op == FLOOR_DIV_EXPR
16073 || op == ROUND_DIV_EXPR);
16074 compare_p = (op == LT_EXPR || op == LE_EXPR || op == GT_EXPR || op == GE_EXPR
16075 || op == EQ_EXPR || op == NE_EXPR);
16076
16077 if (bool1_p && bool2_p && (plusminus_p || muldiv_p))
16078 return N_("binary operator does not support two vector bool operands");
16079
16080 if (bool1_p != bool2_p && (muldiv_p || compare_p))
16081 return N_("binary operator does not support vector bool operand");
16082
16083 mode1 = TYPE_MODE (type1);
16084 mode2 = TYPE_MODE (type2);
16085
16086 if (bool1_p != bool2_p && plusminus_p
16087 && (GET_MODE_CLASS (mode1) == MODE_VECTOR_FLOAT
16088 || GET_MODE_CLASS (mode2) == MODE_VECTOR_FLOAT))
16089 return N_("binary operator does not support mixing vector "
16090 "bool with floating point vector operands");
16091
16092 return NULL;
16093 }
16094
16095 /* Implement TARGET_C_EXCESS_PRECISION.
16096
16097 FIXME: For historical reasons, float_t and double_t are typedef'ed to
16098 double on s390, causing operations on float_t to operate in a higher
16099 precision than is necessary. However, it is not the case that SFmode
16100 operations have implicit excess precision, and we generate more optimal
16101 code if we let the compiler know no implicit extra precision is added.
16102
16103 That means when we are compiling with -fexcess-precision=fast, the value
16104 we set for FLT_EVAL_METHOD will be out of line with the actual precision of
16105 float_t (though they would be correct for -fexcess-precision=standard).
16106
16107 A complete fix would modify glibc to remove the unnecessary typedef
16108 of float_t to double. */
16109
16110 static enum flt_eval_method
s390_excess_precision(enum excess_precision_type type)16111 s390_excess_precision (enum excess_precision_type type)
16112 {
16113 switch (type)
16114 {
16115 case EXCESS_PRECISION_TYPE_IMPLICIT:
16116 case EXCESS_PRECISION_TYPE_FAST:
16117 /* The fastest type to promote to will always be the native type,
16118 whether that occurs with implicit excess precision or
16119 otherwise. */
16120 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
16121 case EXCESS_PRECISION_TYPE_STANDARD:
16122 /* Otherwise, when we are in a standards compliant mode, to
16123 ensure consistency with the implementation in glibc, report that
16124 float is evaluated to the range and precision of double. */
16125 return FLT_EVAL_METHOD_PROMOTE_TO_DOUBLE;
16126 default:
16127 gcc_unreachable ();
16128 }
16129 return FLT_EVAL_METHOD_UNPREDICTABLE;
16130 }
16131
16132 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
16133
16134 static unsigned HOST_WIDE_INT
s390_asan_shadow_offset(void)16135 s390_asan_shadow_offset (void)
16136 {
16137 return TARGET_64BIT ? HOST_WIDE_INT_1U << 52 : HOST_WIDE_INT_UC (0x20000000);
16138 }
16139
16140 #ifdef HAVE_GAS_HIDDEN
16141 # define USE_HIDDEN_LINKONCE 1
16142 #else
16143 # define USE_HIDDEN_LINKONCE 0
16144 #endif
16145
16146 /* Output an indirect branch trampoline for target register REGNO. */
16147
16148 static void
s390_output_indirect_thunk_function(unsigned int regno,bool z10_p)16149 s390_output_indirect_thunk_function (unsigned int regno, bool z10_p)
16150 {
16151 tree decl;
16152 char thunk_label[32];
16153 int i;
16154
16155 if (z10_p)
16156 sprintf (thunk_label, TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL, regno);
16157 else
16158 sprintf (thunk_label, TARGET_INDIRECT_BRANCH_THUNK_NAME_EX,
16159 INDIRECT_BRANCH_THUNK_REGNUM, regno);
16160
16161 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
16162 get_identifier (thunk_label),
16163 build_function_type_list (void_type_node, NULL_TREE));
16164 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
16165 NULL_TREE, void_type_node);
16166 TREE_PUBLIC (decl) = 1;
16167 TREE_STATIC (decl) = 1;
16168 DECL_IGNORED_P (decl) = 1;
16169
16170 if (USE_HIDDEN_LINKONCE)
16171 {
16172 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
16173
16174 targetm.asm_out.unique_section (decl, 0);
16175 switch_to_section (get_named_section (decl, NULL, 0));
16176
16177 targetm.asm_out.globalize_label (asm_out_file, thunk_label);
16178 fputs ("\t.hidden\t", asm_out_file);
16179 assemble_name (asm_out_file, thunk_label);
16180 putc ('\n', asm_out_file);
16181 ASM_DECLARE_FUNCTION_NAME (asm_out_file, thunk_label, decl);
16182 }
16183 else
16184 {
16185 switch_to_section (text_section);
16186 ASM_OUTPUT_LABEL (asm_out_file, thunk_label);
16187 }
16188
16189 DECL_INITIAL (decl) = make_node (BLOCK);
16190 current_function_decl = decl;
16191 allocate_struct_function (decl, false);
16192 init_function_start (decl);
16193 cfun->is_thunk = true;
16194 first_function_block_is_cold = false;
16195 final_start_function (emit_barrier (), asm_out_file, 1);
16196
16197 /* This makes CFI at least usable for indirect jumps.
16198
16199 Stopping in the thunk: backtrace will point to the thunk target
16200 is if it was interrupted by a signal. For a call this means that
16201 the call chain will be: caller->callee->thunk */
16202 if (flag_asynchronous_unwind_tables && flag_dwarf2_cfi_asm)
16203 {
16204 fputs ("\t.cfi_signal_frame\n", asm_out_file);
16205 fprintf (asm_out_file, "\t.cfi_return_column %d\n", regno);
16206 for (i = 0; i < FPR15_REGNUM; i++)
16207 fprintf (asm_out_file, "\t.cfi_same_value %s\n", reg_names[i]);
16208 }
16209
16210 if (z10_p)
16211 {
16212 /* exrl 0,1f */
16213
16214 /* We generate a thunk for z10 compiled code although z10 is
16215 currently not enabled. Tell the assembler to accept the
16216 instruction. */
16217 if (!TARGET_CPU_Z10)
16218 {
16219 fputs ("\t.machine push\n", asm_out_file);
16220 fputs ("\t.machine z10\n", asm_out_file);
16221 }
16222 /* We use exrl even if -mzarch hasn't been specified on the
16223 command line so we have to tell the assembler to accept
16224 it. */
16225 if (!TARGET_ZARCH)
16226 fputs ("\t.machinemode zarch\n", asm_out_file);
16227
16228 fputs ("\texrl\t0,1f\n", asm_out_file);
16229
16230 if (!TARGET_ZARCH)
16231 fputs ("\t.machinemode esa\n", asm_out_file);
16232
16233 if (!TARGET_CPU_Z10)
16234 fputs ("\t.machine pop\n", asm_out_file);
16235 }
16236 else
16237 {
16238 /* larl %r1,1f */
16239 fprintf (asm_out_file, "\tlarl\t%%r%d,1f\n",
16240 INDIRECT_BRANCH_THUNK_REGNUM);
16241
16242 /* ex 0,0(%r1) */
16243 fprintf (asm_out_file, "\tex\t0,0(%%r%d)\n",
16244 INDIRECT_BRANCH_THUNK_REGNUM);
16245 }
16246
16247 /* 0: j 0b */
16248 fputs ("0:\tj\t0b\n", asm_out_file);
16249
16250 /* 1: br <regno> */
16251 fprintf (asm_out_file, "1:\tbr\t%%r%d\n", regno);
16252
16253 final_end_function ();
16254 init_insn_lengths ();
16255 free_after_compilation (cfun);
16256 set_cfun (NULL);
16257 current_function_decl = NULL;
16258 }
16259
16260 /* Implement the asm.code_end target hook. */
16261
16262 static void
s390_code_end(void)16263 s390_code_end (void)
16264 {
16265 int i;
16266
16267 for (i = 1; i < 16; i++)
16268 {
16269 if (indirect_branch_z10thunk_mask & (1 << i))
16270 s390_output_indirect_thunk_function (i, true);
16271
16272 if (indirect_branch_prez10thunk_mask & (1 << i))
16273 s390_output_indirect_thunk_function (i, false);
16274 }
16275
16276 if (TARGET_INDIRECT_BRANCH_TABLE)
16277 {
16278 int o;
16279 int i;
16280
16281 for (o = 0; o < INDIRECT_BRANCH_NUM_OPTIONS; o++)
16282 {
16283 if (indirect_branch_table_label_no[o] == 0)
16284 continue;
16285
16286 switch_to_section (get_section (indirect_branch_table_name[o],
16287 0,
16288 NULL_TREE));
16289 for (i = 0; i < indirect_branch_table_label_no[o]; i++)
16290 {
16291 char label_start[32];
16292
16293 ASM_GENERATE_INTERNAL_LABEL (label_start,
16294 indirect_branch_table_label[o], i);
16295
16296 fputs ("\t.long\t", asm_out_file);
16297 assemble_name_raw (asm_out_file, label_start);
16298 fputs ("-.\n", asm_out_file);
16299 }
16300 switch_to_section (current_function_section ());
16301 }
16302 }
16303 }
16304
16305 /* Implement the TARGET_CASE_VALUES_THRESHOLD target hook. */
16306
16307 unsigned int
s390_case_values_threshold(void)16308 s390_case_values_threshold (void)
16309 {
16310 /* Disabling branch prediction for indirect jumps makes jump tables
16311 much more expensive. */
16312 if (TARGET_INDIRECT_BRANCH_NOBP_JUMP)
16313 return 20;
16314
16315 return default_case_values_threshold ();
16316 }
16317
16318 /* Evaluate the insns between HEAD and TAIL and do back-end to install
16319 back-end specific dependencies.
16320
16321 Establish an ANTI dependency between r11 and r15 restores from FPRs
16322 to prevent the instructions scheduler from reordering them since
16323 this would break CFI. No further handling in the sched_reorder
16324 hook is required since the r11 and r15 restore will never appear in
16325 the same ready list with that change. */
16326 void
s390_sched_dependencies_evaluation(rtx_insn * head,rtx_insn * tail)16327 s390_sched_dependencies_evaluation (rtx_insn *head, rtx_insn *tail)
16328 {
16329 if (!frame_pointer_needed || !epilogue_completed)
16330 return;
16331
16332 while (head != tail && DEBUG_INSN_P (head))
16333 head = NEXT_INSN (head);
16334
16335 rtx_insn *r15_restore = NULL, *r11_restore = NULL;
16336
16337 for (rtx_insn *insn = tail; insn != head; insn = PREV_INSN (insn))
16338 {
16339 rtx set = single_set (insn);
16340 if (!INSN_P (insn)
16341 || !RTX_FRAME_RELATED_P (insn)
16342 || set == NULL_RTX
16343 || !REG_P (SET_DEST (set))
16344 || !FP_REG_P (SET_SRC (set)))
16345 continue;
16346
16347 if (REGNO (SET_DEST (set)) == HARD_FRAME_POINTER_REGNUM)
16348 r11_restore = insn;
16349
16350 if (REGNO (SET_DEST (set)) == STACK_POINTER_REGNUM)
16351 r15_restore = insn;
16352 }
16353
16354 if (r11_restore == NULL || r15_restore == NULL)
16355 return;
16356 add_dependence (r11_restore, r15_restore, REG_DEP_ANTI);
16357 }
16358
16359
16360
16361 /* Initialize GCC target structure. */
16362
16363 #undef TARGET_ASM_ALIGNED_HI_OP
16364 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
16365 #undef TARGET_ASM_ALIGNED_DI_OP
16366 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
16367 #undef TARGET_ASM_INTEGER
16368 #define TARGET_ASM_INTEGER s390_assemble_integer
16369
16370 #undef TARGET_ASM_OPEN_PAREN
16371 #define TARGET_ASM_OPEN_PAREN ""
16372
16373 #undef TARGET_ASM_CLOSE_PAREN
16374 #define TARGET_ASM_CLOSE_PAREN ""
16375
16376 #undef TARGET_OPTION_OVERRIDE
16377 #define TARGET_OPTION_OVERRIDE s390_option_override
16378
16379 #ifdef TARGET_THREAD_SSP_OFFSET
16380 #undef TARGET_STACK_PROTECT_GUARD
16381 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
16382 #endif
16383
16384 #undef TARGET_ENCODE_SECTION_INFO
16385 #define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
16386
16387 #undef TARGET_SCALAR_MODE_SUPPORTED_P
16388 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
16389
16390 #ifdef HAVE_AS_TLS
16391 #undef TARGET_HAVE_TLS
16392 #define TARGET_HAVE_TLS true
16393 #endif
16394 #undef TARGET_CANNOT_FORCE_CONST_MEM
16395 #define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
16396
16397 #undef TARGET_DELEGITIMIZE_ADDRESS
16398 #define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
16399
16400 #undef TARGET_LEGITIMIZE_ADDRESS
16401 #define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
16402
16403 #undef TARGET_RETURN_IN_MEMORY
16404 #define TARGET_RETURN_IN_MEMORY s390_return_in_memory
16405
16406 #undef TARGET_INIT_BUILTINS
16407 #define TARGET_INIT_BUILTINS s390_init_builtins
16408 #undef TARGET_EXPAND_BUILTIN
16409 #define TARGET_EXPAND_BUILTIN s390_expand_builtin
16410 #undef TARGET_BUILTIN_DECL
16411 #define TARGET_BUILTIN_DECL s390_builtin_decl
16412
16413 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
16414 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
16415
16416 #undef TARGET_ASM_OUTPUT_MI_THUNK
16417 #define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
16418 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
16419 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
16420
16421 #undef TARGET_C_EXCESS_PRECISION
16422 #define TARGET_C_EXCESS_PRECISION s390_excess_precision
16423
16424 #undef TARGET_SCHED_ADJUST_PRIORITY
16425 #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
16426 #undef TARGET_SCHED_ISSUE_RATE
16427 #define TARGET_SCHED_ISSUE_RATE s390_issue_rate
16428 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
16429 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
16430
16431 #undef TARGET_SCHED_VARIABLE_ISSUE
16432 #define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
16433 #undef TARGET_SCHED_REORDER
16434 #define TARGET_SCHED_REORDER s390_sched_reorder
16435 #undef TARGET_SCHED_INIT
16436 #define TARGET_SCHED_INIT s390_sched_init
16437
16438 #undef TARGET_CANNOT_COPY_INSN_P
16439 #define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
16440 #undef TARGET_RTX_COSTS
16441 #define TARGET_RTX_COSTS s390_rtx_costs
16442 #undef TARGET_ADDRESS_COST
16443 #define TARGET_ADDRESS_COST s390_address_cost
16444 #undef TARGET_REGISTER_MOVE_COST
16445 #define TARGET_REGISTER_MOVE_COST s390_register_move_cost
16446 #undef TARGET_MEMORY_MOVE_COST
16447 #define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
16448 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
16449 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
16450 s390_builtin_vectorization_cost
16451
16452 #undef TARGET_MACHINE_DEPENDENT_REORG
16453 #define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
16454
16455 #undef TARGET_VALID_POINTER_MODE
16456 #define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
16457
16458 #undef TARGET_BUILD_BUILTIN_VA_LIST
16459 #define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
16460 #undef TARGET_EXPAND_BUILTIN_VA_START
16461 #define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
16462 #undef TARGET_ASAN_SHADOW_OFFSET
16463 #define TARGET_ASAN_SHADOW_OFFSET s390_asan_shadow_offset
16464 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
16465 #define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
16466
16467 #undef TARGET_PROMOTE_FUNCTION_MODE
16468 #define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
16469 #undef TARGET_PASS_BY_REFERENCE
16470 #define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
16471
16472 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
16473 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE s390_override_options_after_change
16474
16475 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
16476 #define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
16477 #undef TARGET_FUNCTION_ARG
16478 #define TARGET_FUNCTION_ARG s390_function_arg
16479 #undef TARGET_FUNCTION_ARG_ADVANCE
16480 #define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
16481 #undef TARGET_FUNCTION_ARG_PADDING
16482 #define TARGET_FUNCTION_ARG_PADDING s390_function_arg_padding
16483 #undef TARGET_FUNCTION_VALUE
16484 #define TARGET_FUNCTION_VALUE s390_function_value
16485 #undef TARGET_LIBCALL_VALUE
16486 #define TARGET_LIBCALL_VALUE s390_libcall_value
16487 #undef TARGET_STRICT_ARGUMENT_NAMING
16488 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
16489
16490 #undef TARGET_KEEP_LEAF_WHEN_PROFILED
16491 #define TARGET_KEEP_LEAF_WHEN_PROFILED s390_keep_leaf_when_profiled
16492
16493 #undef TARGET_FIXED_CONDITION_CODE_REGS
16494 #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
16495
16496 #undef TARGET_CC_MODES_COMPATIBLE
16497 #define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
16498
16499 #undef TARGET_INVALID_WITHIN_DOLOOP
16500 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
16501
16502 #ifdef HAVE_AS_TLS
16503 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
16504 #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
16505 #endif
16506
16507 #undef TARGET_DWARF_FRAME_REG_MODE
16508 #define TARGET_DWARF_FRAME_REG_MODE s390_dwarf_frame_reg_mode
16509
16510 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
16511 #undef TARGET_MANGLE_TYPE
16512 #define TARGET_MANGLE_TYPE s390_mangle_type
16513 #endif
16514
16515 #undef TARGET_SCALAR_MODE_SUPPORTED_P
16516 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
16517
16518 #undef TARGET_VECTOR_MODE_SUPPORTED_P
16519 #define TARGET_VECTOR_MODE_SUPPORTED_P s390_vector_mode_supported_p
16520
16521 #undef TARGET_PREFERRED_RELOAD_CLASS
16522 #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
16523
16524 #undef TARGET_SECONDARY_RELOAD
16525 #define TARGET_SECONDARY_RELOAD s390_secondary_reload
16526 #undef TARGET_SECONDARY_MEMORY_NEEDED
16527 #define TARGET_SECONDARY_MEMORY_NEEDED s390_secondary_memory_needed
16528 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
16529 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE s390_secondary_memory_needed_mode
16530
16531 #undef TARGET_LIBGCC_CMP_RETURN_MODE
16532 #define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
16533
16534 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
16535 #define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
16536
16537 #undef TARGET_LEGITIMATE_ADDRESS_P
16538 #define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
16539
16540 #undef TARGET_LEGITIMATE_CONSTANT_P
16541 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
16542
16543 #undef TARGET_LRA_P
16544 #define TARGET_LRA_P s390_lra_p
16545
16546 #undef TARGET_CAN_ELIMINATE
16547 #define TARGET_CAN_ELIMINATE s390_can_eliminate
16548
16549 #undef TARGET_CONDITIONAL_REGISTER_USAGE
16550 #define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
16551
16552 #undef TARGET_LOOP_UNROLL_ADJUST
16553 #define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
16554
16555 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
16556 #define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
16557 #undef TARGET_TRAMPOLINE_INIT
16558 #define TARGET_TRAMPOLINE_INIT s390_trampoline_init
16559
16560 /* PR 79421 */
16561 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
16562 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
16563
16564 #undef TARGET_UNWIND_WORD_MODE
16565 #define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
16566
16567 #undef TARGET_CANONICALIZE_COMPARISON
16568 #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
16569
16570 #undef TARGET_HARD_REGNO_SCRATCH_OK
16571 #define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok
16572
16573 #undef TARGET_HARD_REGNO_NREGS
16574 #define TARGET_HARD_REGNO_NREGS s390_hard_regno_nregs
16575 #undef TARGET_HARD_REGNO_MODE_OK
16576 #define TARGET_HARD_REGNO_MODE_OK s390_hard_regno_mode_ok
16577 #undef TARGET_MODES_TIEABLE_P
16578 #define TARGET_MODES_TIEABLE_P s390_modes_tieable_p
16579
16580 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
16581 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
16582 s390_hard_regno_call_part_clobbered
16583
16584 #undef TARGET_ATTRIBUTE_TABLE
16585 #define TARGET_ATTRIBUTE_TABLE s390_attribute_table
16586
16587 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
16588 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
16589
16590 #undef TARGET_SET_UP_BY_PROLOGUE
16591 #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
16592
16593 #undef TARGET_EXTRA_LIVE_ON_ENTRY
16594 #define TARGET_EXTRA_LIVE_ON_ENTRY s390_live_on_entry
16595
16596 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
16597 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
16598 s390_use_by_pieces_infrastructure_p
16599
16600 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
16601 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV s390_atomic_assign_expand_fenv
16602
16603 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
16604 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN s390_invalid_arg_for_unprototyped_fn
16605
16606 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
16607 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE s390_preferred_simd_mode
16608
16609 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
16610 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT s390_support_vector_misalignment
16611
16612 #undef TARGET_VECTOR_ALIGNMENT
16613 #define TARGET_VECTOR_ALIGNMENT s390_vector_alignment
16614
16615 #undef TARGET_INVALID_BINARY_OP
16616 #define TARGET_INVALID_BINARY_OP s390_invalid_binary_op
16617
16618 #ifdef HAVE_AS_MACHINE_MACHINEMODE
16619 #undef TARGET_ASM_FILE_START
16620 #define TARGET_ASM_FILE_START s390_asm_file_start
16621 #endif
16622
16623 #undef TARGET_ASM_FILE_END
16624 #define TARGET_ASM_FILE_END s390_asm_file_end
16625
16626 #undef TARGET_SET_CURRENT_FUNCTION
16627 #define TARGET_SET_CURRENT_FUNCTION s390_set_current_function
16628
16629 #if S390_USE_TARGET_ATTRIBUTE
16630 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
16631 #define TARGET_OPTION_VALID_ATTRIBUTE_P s390_valid_target_attribute_p
16632
16633 #undef TARGET_CAN_INLINE_P
16634 #define TARGET_CAN_INLINE_P s390_can_inline_p
16635 #endif
16636
16637 #undef TARGET_OPTION_RESTORE
16638 #define TARGET_OPTION_RESTORE s390_function_specific_restore
16639
16640 #undef TARGET_CAN_CHANGE_MODE_CLASS
16641 #define TARGET_CAN_CHANGE_MODE_CLASS s390_can_change_mode_class
16642
16643 #undef TARGET_CONSTANT_ALIGNMENT
16644 #define TARGET_CONSTANT_ALIGNMENT s390_constant_alignment
16645
16646 #undef TARGET_ASM_CODE_END
16647 #define TARGET_ASM_CODE_END s390_code_end
16648
16649 #undef TARGET_CASE_VALUES_THRESHOLD
16650 #define TARGET_CASE_VALUES_THRESHOLD s390_case_values_threshold
16651
16652 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
16653 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
16654 s390_sched_dependencies_evaluation
16655
16656
16657 /* Use only short displacement, since long displacement is not available for
16658 the floating point instructions. */
16659 #undef TARGET_MAX_ANCHOR_OFFSET
16660 #define TARGET_MAX_ANCHOR_OFFSET 0xfff
16661
16662 struct gcc_target targetm = TARGET_INITIALIZER;
16663
16664 #include "gt-s390.h"
16665