1 /* Subroutines used for code generation on IBM S/390 and zSeries
2 Copyright (C) 1999-2018 Free Software Foundation, Inc.
3 Contributed by Hartmut Penner (hpenner@de.ibm.com) and
4 Ulrich Weigand (uweigand@de.ibm.com) and
5 Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #define IN_TARGET_CODE 1
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "backend.h"
29 #include "target.h"
30 #include "target-globals.h"
31 #include "rtl.h"
32 #include "tree.h"
33 #include "gimple.h"
34 #include "cfghooks.h"
35 #include "cfgloop.h"
36 #include "df.h"
37 #include "memmodel.h"
38 #include "tm_p.h"
39 #include "stringpool.h"
40 #include "attribs.h"
41 #include "expmed.h"
42 #include "optabs.h"
43 #include "regs.h"
44 #include "emit-rtl.h"
45 #include "recog.h"
46 #include "cgraph.h"
47 #include "diagnostic-core.h"
48 #include "diagnostic.h"
49 #include "alias.h"
50 #include "fold-const.h"
51 #include "print-tree.h"
52 #include "stor-layout.h"
53 #include "varasm.h"
54 #include "calls.h"
55 #include "conditions.h"
56 #include "output.h"
57 #include "insn-attr.h"
58 #include "flags.h"
59 #include "except.h"
60 #include "dojump.h"
61 #include "explow.h"
62 #include "stmt.h"
63 #include "expr.h"
64 #include "reload.h"
65 #include "cfgrtl.h"
66 #include "cfganal.h"
67 #include "lcm.h"
68 #include "cfgbuild.h"
69 #include "cfgcleanup.h"
70 #include "debug.h"
71 #include "langhooks.h"
72 #include "internal-fn.h"
73 #include "gimple-fold.h"
74 #include "tree-eh.h"
75 #include "gimplify.h"
76 #include "params.h"
77 #include "opts.h"
78 #include "tree-pass.h"
79 #include "context.h"
80 #include "builtins.h"
81 #include "rtl-iter.h"
82 #include "intl.h"
83 #include "tm-constrs.h"
84 #include "tree-vrp.h"
85 #include "symbol-summary.h"
86 #include "ipa-prop.h"
87 #include "ipa-fnsummary.h"
88 #include "sched-int.h"
89
90 /* This file should be included last. */
91 #include "target-def.h"
92
93 static bool s390_hard_regno_mode_ok (unsigned int, machine_mode);
94
95 /* Remember the last target of s390_set_current_function. */
96 static GTY(()) tree s390_previous_fndecl;
97
98 /* Define the specific costs for a given cpu. */
99
100 struct processor_costs
101 {
102 /* multiplication */
103 const int m; /* cost of an M instruction. */
104 const int mghi; /* cost of an MGHI instruction. */
105 const int mh; /* cost of an MH instruction. */
106 const int mhi; /* cost of an MHI instruction. */
107 const int ml; /* cost of an ML instruction. */
108 const int mr; /* cost of an MR instruction. */
109 const int ms; /* cost of an MS instruction. */
110 const int msg; /* cost of an MSG instruction. */
111 const int msgf; /* cost of an MSGF instruction. */
112 const int msgfr; /* cost of an MSGFR instruction. */
113 const int msgr; /* cost of an MSGR instruction. */
114 const int msr; /* cost of an MSR instruction. */
115 const int mult_df; /* cost of multiplication in DFmode. */
116 const int mxbr;
117 /* square root */
118 const int sqxbr; /* cost of square root in TFmode. */
119 const int sqdbr; /* cost of square root in DFmode. */
120 const int sqebr; /* cost of square root in SFmode. */
121 /* multiply and add */
122 const int madbr; /* cost of multiply and add in DFmode. */
123 const int maebr; /* cost of multiply and add in SFmode. */
124 /* division */
125 const int dxbr;
126 const int ddbr;
127 const int debr;
128 const int dlgr;
129 const int dlr;
130 const int dr;
131 const int dsgfr;
132 const int dsgr;
133 };
134
135 #define s390_cost ((const struct processor_costs *)(s390_cost_pointer))
136
137 static const
138 struct processor_costs z900_cost =
139 {
140 COSTS_N_INSNS (5), /* M */
141 COSTS_N_INSNS (10), /* MGHI */
142 COSTS_N_INSNS (5), /* MH */
143 COSTS_N_INSNS (4), /* MHI */
144 COSTS_N_INSNS (5), /* ML */
145 COSTS_N_INSNS (5), /* MR */
146 COSTS_N_INSNS (4), /* MS */
147 COSTS_N_INSNS (15), /* MSG */
148 COSTS_N_INSNS (7), /* MSGF */
149 COSTS_N_INSNS (7), /* MSGFR */
150 COSTS_N_INSNS (10), /* MSGR */
151 COSTS_N_INSNS (4), /* MSR */
152 COSTS_N_INSNS (7), /* multiplication in DFmode */
153 COSTS_N_INSNS (13), /* MXBR */
154 COSTS_N_INSNS (136), /* SQXBR */
155 COSTS_N_INSNS (44), /* SQDBR */
156 COSTS_N_INSNS (35), /* SQEBR */
157 COSTS_N_INSNS (18), /* MADBR */
158 COSTS_N_INSNS (13), /* MAEBR */
159 COSTS_N_INSNS (134), /* DXBR */
160 COSTS_N_INSNS (30), /* DDBR */
161 COSTS_N_INSNS (27), /* DEBR */
162 COSTS_N_INSNS (220), /* DLGR */
163 COSTS_N_INSNS (34), /* DLR */
164 COSTS_N_INSNS (34), /* DR */
165 COSTS_N_INSNS (32), /* DSGFR */
166 COSTS_N_INSNS (32), /* DSGR */
167 };
168
169 static const
170 struct processor_costs z990_cost =
171 {
172 COSTS_N_INSNS (4), /* M */
173 COSTS_N_INSNS (2), /* MGHI */
174 COSTS_N_INSNS (2), /* MH */
175 COSTS_N_INSNS (2), /* MHI */
176 COSTS_N_INSNS (4), /* ML */
177 COSTS_N_INSNS (4), /* MR */
178 COSTS_N_INSNS (5), /* MS */
179 COSTS_N_INSNS (6), /* MSG */
180 COSTS_N_INSNS (4), /* MSGF */
181 COSTS_N_INSNS (4), /* MSGFR */
182 COSTS_N_INSNS (4), /* MSGR */
183 COSTS_N_INSNS (4), /* MSR */
184 COSTS_N_INSNS (1), /* multiplication in DFmode */
185 COSTS_N_INSNS (28), /* MXBR */
186 COSTS_N_INSNS (130), /* SQXBR */
187 COSTS_N_INSNS (66), /* SQDBR */
188 COSTS_N_INSNS (38), /* SQEBR */
189 COSTS_N_INSNS (1), /* MADBR */
190 COSTS_N_INSNS (1), /* MAEBR */
191 COSTS_N_INSNS (60), /* DXBR */
192 COSTS_N_INSNS (40), /* DDBR */
193 COSTS_N_INSNS (26), /* DEBR */
194 COSTS_N_INSNS (176), /* DLGR */
195 COSTS_N_INSNS (31), /* DLR */
196 COSTS_N_INSNS (31), /* DR */
197 COSTS_N_INSNS (31), /* DSGFR */
198 COSTS_N_INSNS (31), /* DSGR */
199 };
200
201 static const
202 struct processor_costs z9_109_cost =
203 {
204 COSTS_N_INSNS (4), /* M */
205 COSTS_N_INSNS (2), /* MGHI */
206 COSTS_N_INSNS (2), /* MH */
207 COSTS_N_INSNS (2), /* MHI */
208 COSTS_N_INSNS (4), /* ML */
209 COSTS_N_INSNS (4), /* MR */
210 COSTS_N_INSNS (5), /* MS */
211 COSTS_N_INSNS (6), /* MSG */
212 COSTS_N_INSNS (4), /* MSGF */
213 COSTS_N_INSNS (4), /* MSGFR */
214 COSTS_N_INSNS (4), /* MSGR */
215 COSTS_N_INSNS (4), /* MSR */
216 COSTS_N_INSNS (1), /* multiplication in DFmode */
217 COSTS_N_INSNS (28), /* MXBR */
218 COSTS_N_INSNS (130), /* SQXBR */
219 COSTS_N_INSNS (66), /* SQDBR */
220 COSTS_N_INSNS (38), /* SQEBR */
221 COSTS_N_INSNS (1), /* MADBR */
222 COSTS_N_INSNS (1), /* MAEBR */
223 COSTS_N_INSNS (60), /* DXBR */
224 COSTS_N_INSNS (40), /* DDBR */
225 COSTS_N_INSNS (26), /* DEBR */
226 COSTS_N_INSNS (30), /* DLGR */
227 COSTS_N_INSNS (23), /* DLR */
228 COSTS_N_INSNS (23), /* DR */
229 COSTS_N_INSNS (24), /* DSGFR */
230 COSTS_N_INSNS (24), /* DSGR */
231 };
232
233 static const
234 struct processor_costs z10_cost =
235 {
236 COSTS_N_INSNS (10), /* M */
237 COSTS_N_INSNS (10), /* MGHI */
238 COSTS_N_INSNS (10), /* MH */
239 COSTS_N_INSNS (10), /* MHI */
240 COSTS_N_INSNS (10), /* ML */
241 COSTS_N_INSNS (10), /* MR */
242 COSTS_N_INSNS (10), /* MS */
243 COSTS_N_INSNS (10), /* MSG */
244 COSTS_N_INSNS (10), /* MSGF */
245 COSTS_N_INSNS (10), /* MSGFR */
246 COSTS_N_INSNS (10), /* MSGR */
247 COSTS_N_INSNS (10), /* MSR */
248 COSTS_N_INSNS (1) , /* multiplication in DFmode */
249 COSTS_N_INSNS (50), /* MXBR */
250 COSTS_N_INSNS (120), /* SQXBR */
251 COSTS_N_INSNS (52), /* SQDBR */
252 COSTS_N_INSNS (38), /* SQEBR */
253 COSTS_N_INSNS (1), /* MADBR */
254 COSTS_N_INSNS (1), /* MAEBR */
255 COSTS_N_INSNS (111), /* DXBR */
256 COSTS_N_INSNS (39), /* DDBR */
257 COSTS_N_INSNS (32), /* DEBR */
258 COSTS_N_INSNS (160), /* DLGR */
259 COSTS_N_INSNS (71), /* DLR */
260 COSTS_N_INSNS (71), /* DR */
261 COSTS_N_INSNS (71), /* DSGFR */
262 COSTS_N_INSNS (71), /* DSGR */
263 };
264
265 static const
266 struct processor_costs z196_cost =
267 {
268 COSTS_N_INSNS (7), /* M */
269 COSTS_N_INSNS (5), /* MGHI */
270 COSTS_N_INSNS (5), /* MH */
271 COSTS_N_INSNS (5), /* MHI */
272 COSTS_N_INSNS (7), /* ML */
273 COSTS_N_INSNS (7), /* MR */
274 COSTS_N_INSNS (6), /* MS */
275 COSTS_N_INSNS (8), /* MSG */
276 COSTS_N_INSNS (6), /* MSGF */
277 COSTS_N_INSNS (6), /* MSGFR */
278 COSTS_N_INSNS (8), /* MSGR */
279 COSTS_N_INSNS (6), /* MSR */
280 COSTS_N_INSNS (1) , /* multiplication in DFmode */
281 COSTS_N_INSNS (40), /* MXBR B+40 */
282 COSTS_N_INSNS (100), /* SQXBR B+100 */
283 COSTS_N_INSNS (42), /* SQDBR B+42 */
284 COSTS_N_INSNS (28), /* SQEBR B+28 */
285 COSTS_N_INSNS (1), /* MADBR B */
286 COSTS_N_INSNS (1), /* MAEBR B */
287 COSTS_N_INSNS (101), /* DXBR B+101 */
288 COSTS_N_INSNS (29), /* DDBR */
289 COSTS_N_INSNS (22), /* DEBR */
290 COSTS_N_INSNS (160), /* DLGR cracked */
291 COSTS_N_INSNS (160), /* DLR cracked */
292 COSTS_N_INSNS (160), /* DR expanded */
293 COSTS_N_INSNS (160), /* DSGFR cracked */
294 COSTS_N_INSNS (160), /* DSGR cracked */
295 };
296
297 static const
298 struct processor_costs zEC12_cost =
299 {
300 COSTS_N_INSNS (7), /* M */
301 COSTS_N_INSNS (5), /* MGHI */
302 COSTS_N_INSNS (5), /* MH */
303 COSTS_N_INSNS (5), /* MHI */
304 COSTS_N_INSNS (7), /* ML */
305 COSTS_N_INSNS (7), /* MR */
306 COSTS_N_INSNS (6), /* MS */
307 COSTS_N_INSNS (8), /* MSG */
308 COSTS_N_INSNS (6), /* MSGF */
309 COSTS_N_INSNS (6), /* MSGFR */
310 COSTS_N_INSNS (8), /* MSGR */
311 COSTS_N_INSNS (6), /* MSR */
312 COSTS_N_INSNS (1) , /* multiplication in DFmode */
313 COSTS_N_INSNS (40), /* MXBR B+40 */
314 COSTS_N_INSNS (100), /* SQXBR B+100 */
315 COSTS_N_INSNS (42), /* SQDBR B+42 */
316 COSTS_N_INSNS (28), /* SQEBR B+28 */
317 COSTS_N_INSNS (1), /* MADBR B */
318 COSTS_N_INSNS (1), /* MAEBR B */
319 COSTS_N_INSNS (131), /* DXBR B+131 */
320 COSTS_N_INSNS (29), /* DDBR */
321 COSTS_N_INSNS (22), /* DEBR */
322 COSTS_N_INSNS (160), /* DLGR cracked */
323 COSTS_N_INSNS (160), /* DLR cracked */
324 COSTS_N_INSNS (160), /* DR expanded */
325 COSTS_N_INSNS (160), /* DSGFR cracked */
326 COSTS_N_INSNS (160), /* DSGR cracked */
327 };
328
329 static struct
330 {
331 /* The preferred name to be used in user visible output. */
332 const char *const name;
333 /* CPU name as it should be passed to Binutils via .machine */
334 const char *const binutils_name;
335 const enum processor_type processor;
336 const struct processor_costs *cost;
337 }
338 const processor_table[] =
339 {
340 { "g5", "g5", PROCESSOR_9672_G5, &z900_cost },
341 { "g6", "g6", PROCESSOR_9672_G6, &z900_cost },
342 { "z900", "z900", PROCESSOR_2064_Z900, &z900_cost },
343 { "z990", "z990", PROCESSOR_2084_Z990, &z990_cost },
344 { "z9-109", "z9-109", PROCESSOR_2094_Z9_109, &z9_109_cost },
345 { "z9-ec", "z9-ec", PROCESSOR_2094_Z9_EC, &z9_109_cost },
346 { "z10", "z10", PROCESSOR_2097_Z10, &z10_cost },
347 { "z196", "z196", PROCESSOR_2817_Z196, &z196_cost },
348 { "zEC12", "zEC12", PROCESSOR_2827_ZEC12, &zEC12_cost },
349 { "z13", "z13", PROCESSOR_2964_Z13, &zEC12_cost },
350 { "z14", "arch12", PROCESSOR_3906_Z14, &zEC12_cost },
351 { "native", "", PROCESSOR_NATIVE, NULL }
352 };
353
354 extern int reload_completed;
355
356 /* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */
357 static rtx_insn *last_scheduled_insn;
358 #define MAX_SCHED_UNITS 3
359 static int last_scheduled_unit_distance[MAX_SCHED_UNITS];
360
361 #define NUM_SIDES 2
362 static int current_side = 1;
363 #define LONGRUNNING_THRESHOLD 5
364
365 /* Estimate of number of cycles a long-running insn occupies an
366 execution unit. */
367 static unsigned fxu_longrunning[NUM_SIDES];
368 static unsigned vfu_longrunning[NUM_SIDES];
369
370 /* Factor to scale latencies by, determined by measurements. */
371 #define LATENCY_FACTOR 4
372
373 /* The maximum score added for an instruction whose unit hasn't been
374 in use for MAX_SCHED_MIX_DISTANCE steps. Increase this value to
375 give instruction mix scheduling more priority over instruction
376 grouping. */
377 #define MAX_SCHED_MIX_SCORE 8
378
379 /* The maximum distance up to which individual scores will be
380 calculated. Everything beyond this gives MAX_SCHED_MIX_SCORE.
381 Increase this with the OOO windows size of the machine. */
382 #define MAX_SCHED_MIX_DISTANCE 100
383
384 /* Structure used to hold the components of a S/390 memory
385 address. A legitimate address on S/390 is of the general
386 form
387 base + index + displacement
388 where any of the components is optional.
389
390 base and index are registers of the class ADDR_REGS,
391 displacement is an unsigned 12-bit immediate constant. */
392
393 struct s390_address
394 {
395 rtx base;
396 rtx indx;
397 rtx disp;
398 bool pointer;
399 bool literal_pool;
400 };
401
402 /* Few accessor macros for struct cfun->machine->s390_frame_layout. */
403
404 #define cfun_frame_layout (cfun->machine->frame_layout)
405 #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
406 #define cfun_save_arg_fprs_p (!!(TARGET_64BIT \
407 ? cfun_frame_layout.fpr_bitmap & 0x0f \
408 : cfun_frame_layout.fpr_bitmap & 0x03))
409 #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
410 cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
411 #define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |= \
412 (1 << (REGNO - FPR0_REGNUM)))
413 #define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap & \
414 (1 << (REGNO - FPR0_REGNUM))))
415 #define cfun_gpr_save_slot(REGNO) \
416 cfun->machine->frame_layout.gpr_save_slots[REGNO]
417
418 /* Number of GPRs and FPRs used for argument passing. */
419 #define GP_ARG_NUM_REG 5
420 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
421 #define VEC_ARG_NUM_REG 8
422
423 /* A couple of shortcuts. */
424 #define CONST_OK_FOR_J(x) \
425 CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
426 #define CONST_OK_FOR_K(x) \
427 CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
428 #define CONST_OK_FOR_Os(x) \
429 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
430 #define CONST_OK_FOR_Op(x) \
431 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
432 #define CONST_OK_FOR_On(x) \
433 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
434
435 #define REGNO_PAIR_OK(REGNO, MODE) \
436 (s390_hard_regno_nregs ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
437
438 /* That's the read ahead of the dynamic branch prediction unit in
439 bytes on a z10 (or higher) CPU. */
440 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
441
442 /* Masks per jump target register indicating which thunk need to be
443 generated. */
444 static GTY(()) int indirect_branch_prez10thunk_mask = 0;
445 static GTY(()) int indirect_branch_z10thunk_mask = 0;
446
447 #define INDIRECT_BRANCH_NUM_OPTIONS 4
448
449 enum s390_indirect_branch_option
450 {
451 s390_opt_indirect_branch_jump = 0,
452 s390_opt_indirect_branch_call,
453 s390_opt_function_return_reg,
454 s390_opt_function_return_mem
455 };
456
457 static GTY(()) int indirect_branch_table_label_no[INDIRECT_BRANCH_NUM_OPTIONS] = { 0 };
458 const char *indirect_branch_table_label[INDIRECT_BRANCH_NUM_OPTIONS] = \
459 { "LJUMP", "LCALL", "LRETREG", "LRETMEM" };
460 const char *indirect_branch_table_name[INDIRECT_BRANCH_NUM_OPTIONS] = \
461 { ".s390_indirect_jump", ".s390_indirect_call",
462 ".s390_return_reg", ".s390_return_mem" };
463
464 bool
s390_return_addr_from_memory()465 s390_return_addr_from_memory ()
466 {
467 return cfun_gpr_save_slot(RETURN_REGNUM) == SAVE_SLOT_STACK;
468 }
469
470 /* Indicate which ABI has been used for passing vector args.
471 0 - no vector type arguments have been passed where the ABI is relevant
472 1 - the old ABI has been used
473 2 - a vector type argument has been passed either in a vector register
474 or on the stack by value */
475 static int s390_vector_abi = 0;
476
477 /* Set the vector ABI marker if TYPE is subject to the vector ABI
478 switch. The vector ABI affects only vector data types. There are
479 two aspects of the vector ABI relevant here:
480
481 1. vectors >= 16 bytes have an alignment of 8 bytes with the new
482 ABI and natural alignment with the old.
483
484 2. vector <= 16 bytes are passed in VRs or by value on the stack
485 with the new ABI but by reference on the stack with the old.
486
487 If ARG_P is true TYPE is used for a function argument or return
488 value. The ABI marker then is set for all vector data types. If
489 ARG_P is false only type 1 vectors are being checked. */
490
491 static void
s390_check_type_for_vector_abi(const_tree type,bool arg_p,bool in_struct_p)492 s390_check_type_for_vector_abi (const_tree type, bool arg_p, bool in_struct_p)
493 {
494 static hash_set<const_tree> visited_types_hash;
495
496 if (s390_vector_abi)
497 return;
498
499 if (type == NULL_TREE || TREE_CODE (type) == ERROR_MARK)
500 return;
501
502 if (visited_types_hash.contains (type))
503 return;
504
505 visited_types_hash.add (type);
506
507 if (VECTOR_TYPE_P (type))
508 {
509 int type_size = int_size_in_bytes (type);
510
511 /* Outside arguments only the alignment is changing and this
512 only happens for vector types >= 16 bytes. */
513 if (!arg_p && type_size < 16)
514 return;
515
516 /* In arguments vector types > 16 are passed as before (GCC
517 never enforced the bigger alignment for arguments which was
518 required by the old vector ABI). However, it might still be
519 ABI relevant due to the changed alignment if it is a struct
520 member. */
521 if (arg_p && type_size > 16 && !in_struct_p)
522 return;
523
524 s390_vector_abi = TARGET_VX_ABI ? 2 : 1;
525 }
526 else if (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE)
527 {
528 /* ARRAY_TYPE: Since with neither of the ABIs we have more than
529 natural alignment there will never be ABI dependent padding
530 in an array type. That's why we do not set in_struct_p to
531 true here. */
532 s390_check_type_for_vector_abi (TREE_TYPE (type), arg_p, in_struct_p);
533 }
534 else if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
535 {
536 tree arg_chain;
537
538 /* Check the return type. */
539 s390_check_type_for_vector_abi (TREE_TYPE (type), true, false);
540
541 for (arg_chain = TYPE_ARG_TYPES (type);
542 arg_chain;
543 arg_chain = TREE_CHAIN (arg_chain))
544 s390_check_type_for_vector_abi (TREE_VALUE (arg_chain), true, false);
545 }
546 else if (RECORD_OR_UNION_TYPE_P (type))
547 {
548 tree field;
549
550 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
551 {
552 if (TREE_CODE (field) != FIELD_DECL)
553 continue;
554
555 s390_check_type_for_vector_abi (TREE_TYPE (field), arg_p, true);
556 }
557 }
558 }
559
560
561 /* System z builtins. */
562
563 #include "s390-builtins.h"
564
565 const unsigned int bflags_builtin[S390_BUILTIN_MAX + 1] =
566 {
567 #undef B_DEF
568 #undef OB_DEF
569 #undef OB_DEF_VAR
570 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, ...) BFLAGS,
571 #define OB_DEF(...)
572 #define OB_DEF_VAR(...)
573 #include "s390-builtins.def"
574 0
575 };
576
577 const unsigned int opflags_builtin[S390_BUILTIN_MAX + 1] =
578 {
579 #undef B_DEF
580 #undef OB_DEF
581 #undef OB_DEF_VAR
582 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, ...) OPFLAGS,
583 #define OB_DEF(...)
584 #define OB_DEF_VAR(...)
585 #include "s390-builtins.def"
586 0
587 };
588
589 const unsigned int bflags_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1] =
590 {
591 #undef B_DEF
592 #undef OB_DEF
593 #undef OB_DEF_VAR
594 #define B_DEF(...)
595 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, ...) BFLAGS,
596 #define OB_DEF_VAR(...)
597 #include "s390-builtins.def"
598 0
599 };
600
601 const unsigned int
602 bflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
603 {
604 #undef B_DEF
605 #undef OB_DEF
606 #undef OB_DEF_VAR
607 #define B_DEF(...)
608 #define OB_DEF(...)
609 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) FLAGS,
610 #include "s390-builtins.def"
611 0
612 };
613
614 const unsigned int
615 opflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
616 {
617 #undef B_DEF
618 #undef OB_DEF
619 #undef OB_DEF_VAR
620 #define B_DEF(...)
621 #define OB_DEF(...)
622 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) OPFLAGS,
623 #include "s390-builtins.def"
624 0
625 };
626
627 tree s390_builtin_types[BT_MAX];
628 tree s390_builtin_fn_types[BT_FN_MAX];
629 tree s390_builtin_decls[S390_BUILTIN_MAX +
630 S390_OVERLOADED_BUILTIN_MAX +
631 S390_OVERLOADED_BUILTIN_VAR_MAX];
632
633 static enum insn_code const code_for_builtin[S390_BUILTIN_MAX + 1] = {
634 #undef B_DEF
635 #undef OB_DEF
636 #undef OB_DEF_VAR
637 #define B_DEF(NAME, PATTERN, ...) CODE_FOR_##PATTERN,
638 #define OB_DEF(...)
639 #define OB_DEF_VAR(...)
640
641 #include "s390-builtins.def"
642 CODE_FOR_nothing
643 };
644
645 static void
s390_init_builtins(void)646 s390_init_builtins (void)
647 {
648 /* These definitions are being used in s390-builtins.def. */
649 tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"),
650 NULL, NULL);
651 tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL);
652 tree c_uint64_type_node;
653
654 /* The uint64_type_node from tree.c is not compatible to the C99
655 uint64_t data type. What we want is c_uint64_type_node from
656 c-common.c. But since backend code is not supposed to interface
657 with the frontend we recreate it here. */
658 if (TARGET_64BIT)
659 c_uint64_type_node = long_unsigned_type_node;
660 else
661 c_uint64_type_node = long_long_unsigned_type_node;
662
663 #undef DEF_TYPE
664 #define DEF_TYPE(INDEX, NODE, CONST_P) \
665 if (s390_builtin_types[INDEX] == NULL) \
666 s390_builtin_types[INDEX] = (!CONST_P) ? \
667 (NODE) : build_type_variant ((NODE), 1, 0);
668
669 #undef DEF_POINTER_TYPE
670 #define DEF_POINTER_TYPE(INDEX, INDEX_BASE) \
671 if (s390_builtin_types[INDEX] == NULL) \
672 s390_builtin_types[INDEX] = \
673 build_pointer_type (s390_builtin_types[INDEX_BASE]);
674
675 #undef DEF_DISTINCT_TYPE
676 #define DEF_DISTINCT_TYPE(INDEX, INDEX_BASE) \
677 if (s390_builtin_types[INDEX] == NULL) \
678 s390_builtin_types[INDEX] = \
679 build_distinct_type_copy (s390_builtin_types[INDEX_BASE]);
680
681 #undef DEF_VECTOR_TYPE
682 #define DEF_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \
683 if (s390_builtin_types[INDEX] == NULL) \
684 s390_builtin_types[INDEX] = \
685 build_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
686
687 #undef DEF_OPAQUE_VECTOR_TYPE
688 #define DEF_OPAQUE_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \
689 if (s390_builtin_types[INDEX] == NULL) \
690 s390_builtin_types[INDEX] = \
691 build_opaque_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
692
693 #undef DEF_FN_TYPE
694 #define DEF_FN_TYPE(INDEX, args...) \
695 if (s390_builtin_fn_types[INDEX] == NULL) \
696 s390_builtin_fn_types[INDEX] = \
697 build_function_type_list (args, NULL_TREE);
698 #undef DEF_OV_TYPE
699 #define DEF_OV_TYPE(...)
700 #include "s390-builtin-types.def"
701
702 #undef B_DEF
703 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, FNTYPE) \
704 if (s390_builtin_decls[S390_BUILTIN_##NAME] == NULL) \
705 s390_builtin_decls[S390_BUILTIN_##NAME] = \
706 add_builtin_function ("__builtin_" #NAME, \
707 s390_builtin_fn_types[FNTYPE], \
708 S390_BUILTIN_##NAME, \
709 BUILT_IN_MD, \
710 NULL, \
711 ATTRS);
712 #undef OB_DEF
713 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, FNTYPE) \
714 if (s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] \
715 == NULL) \
716 s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \
717 add_builtin_function ("__builtin_" #NAME, \
718 s390_builtin_fn_types[FNTYPE], \
719 S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX, \
720 BUILT_IN_MD, \
721 NULL, \
722 0);
723 #undef OB_DEF_VAR
724 #define OB_DEF_VAR(...)
725 #include "s390-builtins.def"
726
727 }
728
729 /* Return true if ARG is appropriate as argument number ARGNUM of
730 builtin DECL. The operand flags from s390-builtins.def have to
731 passed as OP_FLAGS. */
732 bool
s390_const_operand_ok(tree arg,int argnum,int op_flags,tree decl)733 s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl)
734 {
735 if (O_UIMM_P (op_flags))
736 {
737 int bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32 };
738 int bitwidth = bitwidths[op_flags - O_U1];
739
740 if (!tree_fits_uhwi_p (arg)
741 || tree_to_uhwi (arg) > (HOST_WIDE_INT_1U << bitwidth) - 1)
742 {
743 error("constant argument %d for builtin %qF is out of range (0.."
744 HOST_WIDE_INT_PRINT_UNSIGNED ")",
745 argnum, decl,
746 (HOST_WIDE_INT_1U << bitwidth) - 1);
747 return false;
748 }
749 }
750
751 if (O_SIMM_P (op_flags))
752 {
753 int bitwidths[] = { 2, 3, 4, 5, 8, 12, 16, 32 };
754 int bitwidth = bitwidths[op_flags - O_S2];
755
756 if (!tree_fits_shwi_p (arg)
757 || tree_to_shwi (arg) < -(HOST_WIDE_INT_1 << (bitwidth - 1))
758 || tree_to_shwi (arg) > ((HOST_WIDE_INT_1 << (bitwidth - 1)) - 1))
759 {
760 error("constant argument %d for builtin %qF is out of range ("
761 HOST_WIDE_INT_PRINT_DEC ".."
762 HOST_WIDE_INT_PRINT_DEC ")",
763 argnum, decl,
764 -(HOST_WIDE_INT_1 << (bitwidth - 1)),
765 (HOST_WIDE_INT_1 << (bitwidth - 1)) - 1);
766 return false;
767 }
768 }
769 return true;
770 }
771
772 /* Expand an expression EXP that calls a built-in function,
773 with result going to TARGET if that's convenient
774 (and in mode MODE if that's convenient).
775 SUBTARGET may be used as the target for computing one of EXP's operands.
776 IGNORE is nonzero if the value is to be ignored. */
777
778 static rtx
s390_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,int ignore ATTRIBUTE_UNUSED)779 s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
780 machine_mode mode ATTRIBUTE_UNUSED,
781 int ignore ATTRIBUTE_UNUSED)
782 {
783 #define MAX_ARGS 6
784
785 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
786 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
787 enum insn_code icode;
788 rtx op[MAX_ARGS], pat;
789 int arity;
790 bool nonvoid;
791 tree arg;
792 call_expr_arg_iterator iter;
793 unsigned int all_op_flags = opflags_for_builtin (fcode);
794 machine_mode last_vec_mode = VOIDmode;
795
796 if (TARGET_DEBUG_ARG)
797 {
798 fprintf (stderr,
799 "s390_expand_builtin, code = %4d, %s, bflags = 0x%x\n",
800 (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl)),
801 bflags_for_builtin (fcode));
802 }
803
804 if (S390_USE_TARGET_ATTRIBUTE)
805 {
806 unsigned int bflags;
807
808 bflags = bflags_for_builtin (fcode);
809 if ((bflags & B_HTM) && !TARGET_HTM)
810 {
811 error ("builtin %qF is not supported without -mhtm "
812 "(default with -march=zEC12 and higher).", fndecl);
813 return const0_rtx;
814 }
815 if (((bflags & B_VX) || (bflags & B_VXE)) && !TARGET_VX)
816 {
817 error ("builtin %qF requires -mvx "
818 "(default with -march=z13 and higher).", fndecl);
819 return const0_rtx;
820 }
821
822 if ((bflags & B_VXE) && !TARGET_VXE)
823 {
824 error ("Builtin %qF requires z14 or higher.", fndecl);
825 return const0_rtx;
826 }
827 }
828 if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET
829 && fcode < S390_ALL_BUILTIN_MAX)
830 {
831 gcc_unreachable ();
832 }
833 else if (fcode < S390_OVERLOADED_BUILTIN_OFFSET)
834 {
835 icode = code_for_builtin[fcode];
836 /* Set a flag in the machine specific cfun part in order to support
837 saving/restoring of FPRs. */
838 if (fcode == S390_BUILTIN_tbegin || fcode == S390_BUILTIN_tbegin_retry)
839 cfun->machine->tbegin_p = true;
840 }
841 else if (fcode < S390_OVERLOADED_BUILTIN_VAR_OFFSET)
842 {
843 error ("unresolved overloaded builtin");
844 return const0_rtx;
845 }
846 else
847 internal_error ("bad builtin fcode");
848
849 if (icode == 0)
850 internal_error ("bad builtin icode");
851
852 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
853
854 if (nonvoid)
855 {
856 machine_mode tmode = insn_data[icode].operand[0].mode;
857 if (!target
858 || GET_MODE (target) != tmode
859 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
860 target = gen_reg_rtx (tmode);
861
862 /* There are builtins (e.g. vec_promote) with no vector
863 arguments but an element selector. So we have to also look
864 at the vector return type when emitting the modulo
865 operation. */
866 if (VECTOR_MODE_P (insn_data[icode].operand[0].mode))
867 last_vec_mode = insn_data[icode].operand[0].mode;
868 }
869
870 arity = 0;
871 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
872 {
873 rtx tmp_rtx;
874 const struct insn_operand_data *insn_op;
875 unsigned int op_flags = all_op_flags & ((1 << O_SHIFT) - 1);
876
877 all_op_flags = all_op_flags >> O_SHIFT;
878
879 if (arg == error_mark_node)
880 return NULL_RTX;
881 if (arity >= MAX_ARGS)
882 return NULL_RTX;
883
884 if (O_IMM_P (op_flags)
885 && TREE_CODE (arg) != INTEGER_CST)
886 {
887 error ("constant value required for builtin %qF argument %d",
888 fndecl, arity + 1);
889 return const0_rtx;
890 }
891
892 if (!s390_const_operand_ok (arg, arity + 1, op_flags, fndecl))
893 return const0_rtx;
894
895 insn_op = &insn_data[icode].operand[arity + nonvoid];
896 op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
897
898 /* expand_expr truncates constants to the target mode only if it
899 is "convenient". However, our checks below rely on this
900 being done. */
901 if (CONST_INT_P (op[arity])
902 && SCALAR_INT_MODE_P (insn_op->mode)
903 && GET_MODE (op[arity]) != insn_op->mode)
904 op[arity] = GEN_INT (trunc_int_for_mode (INTVAL (op[arity]),
905 insn_op->mode));
906
907 /* Wrap the expanded RTX for pointer types into a MEM expr with
908 the proper mode. This allows us to use e.g. (match_operand
909 "memory_operand"..) in the insn patterns instead of (mem
910 (match_operand "address_operand)). This is helpful for
911 patterns not just accepting MEMs. */
912 if (POINTER_TYPE_P (TREE_TYPE (arg))
913 && insn_op->predicate != address_operand)
914 op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]);
915
916 /* Expand the module operation required on element selectors. */
917 if (op_flags == O_ELEM)
918 {
919 gcc_assert (last_vec_mode != VOIDmode);
920 op[arity] = simplify_expand_binop (SImode, code_to_optab (AND),
921 op[arity],
922 GEN_INT (GET_MODE_NUNITS (last_vec_mode) - 1),
923 NULL_RTX, 1, OPTAB_DIRECT);
924 }
925
926 /* Record the vector mode used for an element selector. This assumes:
927 1. There is no builtin with two different vector modes and an element selector
928 2. The element selector comes after the vector type it is referring to.
929 This currently the true for all the builtins but FIXME we
930 should better check for that. */
931 if (VECTOR_MODE_P (insn_op->mode))
932 last_vec_mode = insn_op->mode;
933
934 if (insn_op->predicate (op[arity], insn_op->mode))
935 {
936 arity++;
937 continue;
938 }
939
940 /* A memory operand is rejected by the memory_operand predicate.
941 Try making the address legal by copying it into a register. */
942 if (MEM_P (op[arity])
943 && insn_op->predicate == memory_operand
944 && (GET_MODE (XEXP (op[arity], 0)) == Pmode
945 || GET_MODE (XEXP (op[arity], 0)) == VOIDmode))
946 {
947 op[arity] = replace_equiv_address (op[arity],
948 copy_to_mode_reg (Pmode,
949 XEXP (op[arity], 0)));
950 }
951 /* Some of the builtins require different modes/types than the
952 pattern in order to implement a specific API. Instead of
953 adding many expanders which do the mode change we do it here.
954 E.g. s390_vec_add_u128 required to have vector unsigned char
955 arguments is mapped to addti3. */
956 else if (insn_op->mode != VOIDmode
957 && GET_MODE (op[arity]) != VOIDmode
958 && GET_MODE (op[arity]) != insn_op->mode
959 && ((tmp_rtx = simplify_gen_subreg (insn_op->mode, op[arity],
960 GET_MODE (op[arity]), 0))
961 != NULL_RTX))
962 {
963 op[arity] = tmp_rtx;
964 }
965
966 /* The predicate rejects the operand although the mode is fine.
967 Copy the operand to register. */
968 if (!insn_op->predicate (op[arity], insn_op->mode)
969 && (GET_MODE (op[arity]) == insn_op->mode
970 || GET_MODE (op[arity]) == VOIDmode
971 || (insn_op->predicate == address_operand
972 && GET_MODE (op[arity]) == Pmode)))
973 {
974 /* An address_operand usually has VOIDmode in the expander
975 so we cannot use this. */
976 machine_mode target_mode =
977 (insn_op->predicate == address_operand
978 ? (machine_mode) Pmode : insn_op->mode);
979 op[arity] = copy_to_mode_reg (target_mode, op[arity]);
980 }
981
982 if (!insn_op->predicate (op[arity], insn_op->mode))
983 {
984 error ("invalid argument %d for builtin %qF", arity + 1, fndecl);
985 return const0_rtx;
986 }
987 arity++;
988 }
989
990 switch (arity)
991 {
992 case 0:
993 pat = GEN_FCN (icode) (target);
994 break;
995 case 1:
996 if (nonvoid)
997 pat = GEN_FCN (icode) (target, op[0]);
998 else
999 pat = GEN_FCN (icode) (op[0]);
1000 break;
1001 case 2:
1002 if (nonvoid)
1003 pat = GEN_FCN (icode) (target, op[0], op[1]);
1004 else
1005 pat = GEN_FCN (icode) (op[0], op[1]);
1006 break;
1007 case 3:
1008 if (nonvoid)
1009 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
1010 else
1011 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
1012 break;
1013 case 4:
1014 if (nonvoid)
1015 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
1016 else
1017 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
1018 break;
1019 case 5:
1020 if (nonvoid)
1021 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
1022 else
1023 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
1024 break;
1025 case 6:
1026 if (nonvoid)
1027 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4], op[5]);
1028 else
1029 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
1030 break;
1031 default:
1032 gcc_unreachable ();
1033 }
1034 if (!pat)
1035 return NULL_RTX;
1036 emit_insn (pat);
1037
1038 if (nonvoid)
1039 return target;
1040 else
1041 return const0_rtx;
1042 }
1043
1044
1045 static const int s390_hotpatch_hw_max = 1000000;
1046 static int s390_hotpatch_hw_before_label = 0;
1047 static int s390_hotpatch_hw_after_label = 0;
1048
1049 /* Check whether the hotpatch attribute is applied to a function and, if it has
1050 an argument, the argument is valid. */
1051
1052 static tree
s390_handle_hotpatch_attribute(tree * node,tree name,tree args,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)1053 s390_handle_hotpatch_attribute (tree *node, tree name, tree args,
1054 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1055 {
1056 tree expr;
1057 tree expr2;
1058 int err;
1059
1060 if (TREE_CODE (*node) != FUNCTION_DECL)
1061 {
1062 warning (OPT_Wattributes, "%qE attribute only applies to functions",
1063 name);
1064 *no_add_attrs = true;
1065 }
1066 if (args != NULL && TREE_CHAIN (args) != NULL)
1067 {
1068 expr = TREE_VALUE (args);
1069 expr2 = TREE_VALUE (TREE_CHAIN (args));
1070 }
1071 if (args == NULL || TREE_CHAIN (args) == NULL)
1072 err = 1;
1073 else if (TREE_CODE (expr) != INTEGER_CST
1074 || !INTEGRAL_TYPE_P (TREE_TYPE (expr))
1075 || wi::gtu_p (wi::to_wide (expr), s390_hotpatch_hw_max))
1076 err = 1;
1077 else if (TREE_CODE (expr2) != INTEGER_CST
1078 || !INTEGRAL_TYPE_P (TREE_TYPE (expr2))
1079 || wi::gtu_p (wi::to_wide (expr2), s390_hotpatch_hw_max))
1080 err = 1;
1081 else
1082 err = 0;
1083 if (err)
1084 {
1085 error ("requested %qE attribute is not a comma separated pair of"
1086 " non-negative integer constants or too large (max. %d)", name,
1087 s390_hotpatch_hw_max);
1088 *no_add_attrs = true;
1089 }
1090
1091 return NULL_TREE;
1092 }
1093
1094 /* Expand the s390_vector_bool type attribute. */
1095
1096 static tree
s390_handle_vectorbool_attribute(tree * node,tree name ATTRIBUTE_UNUSED,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)1097 s390_handle_vectorbool_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
1098 tree args ATTRIBUTE_UNUSED,
1099 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1100 {
1101 tree type = *node, result = NULL_TREE;
1102 machine_mode mode;
1103
1104 while (POINTER_TYPE_P (type)
1105 || TREE_CODE (type) == FUNCTION_TYPE
1106 || TREE_CODE (type) == METHOD_TYPE
1107 || TREE_CODE (type) == ARRAY_TYPE)
1108 type = TREE_TYPE (type);
1109
1110 mode = TYPE_MODE (type);
1111 switch (mode)
1112 {
1113 case E_DImode: case E_V2DImode:
1114 result = s390_builtin_types[BT_BV2DI];
1115 break;
1116 case E_SImode: case E_V4SImode:
1117 result = s390_builtin_types[BT_BV4SI];
1118 break;
1119 case E_HImode: case E_V8HImode:
1120 result = s390_builtin_types[BT_BV8HI];
1121 break;
1122 case E_QImode: case E_V16QImode:
1123 result = s390_builtin_types[BT_BV16QI];
1124 break;
1125 default:
1126 break;
1127 }
1128
1129 *no_add_attrs = true; /* No need to hang on to the attribute. */
1130
1131 if (result)
1132 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
1133
1134 return NULL_TREE;
1135 }
1136
1137 /* Check syntax of function decl attributes having a string type value. */
1138
1139 static tree
s390_handle_string_attribute(tree * node,tree name ATTRIBUTE_UNUSED,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)1140 s390_handle_string_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
1141 tree args ATTRIBUTE_UNUSED,
1142 int flags ATTRIBUTE_UNUSED,
1143 bool *no_add_attrs)
1144 {
1145 tree cst;
1146
1147 if (TREE_CODE (*node) != FUNCTION_DECL)
1148 {
1149 warning (OPT_Wattributes, "%qE attribute only applies to functions",
1150 name);
1151 *no_add_attrs = true;
1152 }
1153
1154 cst = TREE_VALUE (args);
1155
1156 if (TREE_CODE (cst) != STRING_CST)
1157 {
1158 warning (OPT_Wattributes,
1159 "%qE attribute requires a string constant argument",
1160 name);
1161 *no_add_attrs = true;
1162 }
1163
1164 if (is_attribute_p ("indirect_branch", name)
1165 || is_attribute_p ("indirect_branch_call", name)
1166 || is_attribute_p ("function_return", name)
1167 || is_attribute_p ("function_return_reg", name)
1168 || is_attribute_p ("function_return_mem", name))
1169 {
1170 if (strcmp (TREE_STRING_POINTER (cst), "keep") != 0
1171 && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0
1172 && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0)
1173 {
1174 warning (OPT_Wattributes,
1175 "argument to %qE attribute is not "
1176 "(keep|thunk|thunk-extern)", name);
1177 *no_add_attrs = true;
1178 }
1179 }
1180
1181 if (is_attribute_p ("indirect_branch_jump", name)
1182 && strcmp (TREE_STRING_POINTER (cst), "keep") != 0
1183 && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0
1184 && strcmp (TREE_STRING_POINTER (cst), "thunk-inline") != 0
1185 && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0)
1186 {
1187 warning (OPT_Wattributes,
1188 "argument to %qE attribute is not "
1189 "(keep|thunk|thunk-inline|thunk-extern)", name);
1190 *no_add_attrs = true;
1191 }
1192
1193 return NULL_TREE;
1194 }
1195
1196 static const struct attribute_spec s390_attribute_table[] = {
1197 { "hotpatch", 2, 2, true, false, false, false,
1198 s390_handle_hotpatch_attribute, NULL },
1199 { "s390_vector_bool", 0, 0, false, true, false, true,
1200 s390_handle_vectorbool_attribute, NULL },
1201 { "indirect_branch", 1, 1, true, false, false, false,
1202 s390_handle_string_attribute, NULL },
1203 { "indirect_branch_jump", 1, 1, true, false, false, false,
1204 s390_handle_string_attribute, NULL },
1205 { "indirect_branch_call", 1, 1, true, false, false, false,
1206 s390_handle_string_attribute, NULL },
1207 { "function_return", 1, 1, true, false, false, false,
1208 s390_handle_string_attribute, NULL },
1209 { "function_return_reg", 1, 1, true, false, false, false,
1210 s390_handle_string_attribute, NULL },
1211 { "function_return_mem", 1, 1, true, false, false, false,
1212 s390_handle_string_attribute, NULL },
1213
1214 /* End element. */
1215 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1216 };
1217
1218 /* Return the alignment for LABEL. We default to the -falign-labels
1219 value except for the literal pool base label. */
1220 int
s390_label_align(rtx_insn * label)1221 s390_label_align (rtx_insn *label)
1222 {
1223 rtx_insn *prev_insn = prev_active_insn (label);
1224 rtx set, src;
1225
1226 if (prev_insn == NULL_RTX)
1227 goto old;
1228
1229 set = single_set (prev_insn);
1230
1231 if (set == NULL_RTX)
1232 goto old;
1233
1234 src = SET_SRC (set);
1235
1236 /* Don't align literal pool base labels. */
1237 if (GET_CODE (src) == UNSPEC
1238 && XINT (src, 1) == UNSPEC_MAIN_BASE)
1239 return 0;
1240
1241 old:
1242 return align_labels_log;
1243 }
1244
1245 static GTY(()) rtx got_symbol;
1246
1247 /* Return the GOT table symbol. The symbol will be created when the
1248 function is invoked for the first time. */
1249
1250 static rtx
s390_got_symbol(void)1251 s390_got_symbol (void)
1252 {
1253 if (!got_symbol)
1254 {
1255 got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
1256 SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL;
1257 }
1258
1259 return got_symbol;
1260 }
1261
1262 static scalar_int_mode
s390_libgcc_cmp_return_mode(void)1263 s390_libgcc_cmp_return_mode (void)
1264 {
1265 return TARGET_64BIT ? DImode : SImode;
1266 }
1267
1268 static scalar_int_mode
s390_libgcc_shift_count_mode(void)1269 s390_libgcc_shift_count_mode (void)
1270 {
1271 return TARGET_64BIT ? DImode : SImode;
1272 }
1273
1274 static scalar_int_mode
s390_unwind_word_mode(void)1275 s390_unwind_word_mode (void)
1276 {
1277 return TARGET_64BIT ? DImode : SImode;
1278 }
1279
1280 /* Return true if the back end supports mode MODE. */
1281 static bool
s390_scalar_mode_supported_p(scalar_mode mode)1282 s390_scalar_mode_supported_p (scalar_mode mode)
1283 {
1284 /* In contrast to the default implementation reject TImode constants on 31bit
1285 TARGET_ZARCH for ABI compliance. */
1286 if (!TARGET_64BIT && TARGET_ZARCH && mode == TImode)
1287 return false;
1288
1289 if (DECIMAL_FLOAT_MODE_P (mode))
1290 return default_decimal_float_supported_p ();
1291
1292 return default_scalar_mode_supported_p (mode);
1293 }
1294
1295 /* Return true if the back end supports vector mode MODE. */
1296 static bool
s390_vector_mode_supported_p(machine_mode mode)1297 s390_vector_mode_supported_p (machine_mode mode)
1298 {
1299 machine_mode inner;
1300
1301 if (!VECTOR_MODE_P (mode)
1302 || !TARGET_VX
1303 || GET_MODE_SIZE (mode) > 16)
1304 return false;
1305
1306 inner = GET_MODE_INNER (mode);
1307
1308 switch (inner)
1309 {
1310 case E_QImode:
1311 case E_HImode:
1312 case E_SImode:
1313 case E_DImode:
1314 case E_TImode:
1315 case E_SFmode:
1316 case E_DFmode:
1317 case E_TFmode:
1318 return true;
1319 default:
1320 return false;
1321 }
1322 }
1323
1324 /* Set the has_landing_pad_p flag in struct machine_function to VALUE. */
1325
1326 void
s390_set_has_landing_pad_p(bool value)1327 s390_set_has_landing_pad_p (bool value)
1328 {
1329 cfun->machine->has_landing_pad_p = value;
1330 }
1331
1332 /* If two condition code modes are compatible, return a condition code
1333 mode which is compatible with both. Otherwise, return
1334 VOIDmode. */
1335
1336 static machine_mode
s390_cc_modes_compatible(machine_mode m1,machine_mode m2)1337 s390_cc_modes_compatible (machine_mode m1, machine_mode m2)
1338 {
1339 if (m1 == m2)
1340 return m1;
1341
1342 switch (m1)
1343 {
1344 case E_CCZmode:
1345 if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode
1346 || m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode)
1347 return m2;
1348 return VOIDmode;
1349
1350 case E_CCSmode:
1351 case E_CCUmode:
1352 case E_CCTmode:
1353 case E_CCSRmode:
1354 case E_CCURmode:
1355 case E_CCZ1mode:
1356 if (m2 == CCZmode)
1357 return m1;
1358
1359 return VOIDmode;
1360
1361 default:
1362 return VOIDmode;
1363 }
1364 return VOIDmode;
1365 }
1366
1367 /* Return true if SET either doesn't set the CC register, or else
1368 the source and destination have matching CC modes and that
1369 CC mode is at least as constrained as REQ_MODE. */
1370
1371 static bool
s390_match_ccmode_set(rtx set,machine_mode req_mode)1372 s390_match_ccmode_set (rtx set, machine_mode req_mode)
1373 {
1374 machine_mode set_mode;
1375
1376 gcc_assert (GET_CODE (set) == SET);
1377
1378 /* These modes are supposed to be used only in CC consumer
1379 patterns. */
1380 gcc_assert (req_mode != CCVIALLmode && req_mode != CCVIANYmode
1381 && req_mode != CCVFALLmode && req_mode != CCVFANYmode);
1382
1383 if (GET_CODE (SET_DEST (set)) != REG || !CC_REGNO_P (REGNO (SET_DEST (set))))
1384 return 1;
1385
1386 set_mode = GET_MODE (SET_DEST (set));
1387 switch (set_mode)
1388 {
1389 case E_CCZ1mode:
1390 case E_CCSmode:
1391 case E_CCSRmode:
1392 case E_CCUmode:
1393 case E_CCURmode:
1394 case E_CCLmode:
1395 case E_CCL1mode:
1396 case E_CCL2mode:
1397 case E_CCL3mode:
1398 case E_CCT1mode:
1399 case E_CCT2mode:
1400 case E_CCT3mode:
1401 case E_CCVEQmode:
1402 case E_CCVIHmode:
1403 case E_CCVIHUmode:
1404 case E_CCVFHmode:
1405 case E_CCVFHEmode:
1406 if (req_mode != set_mode)
1407 return 0;
1408 break;
1409
1410 case E_CCZmode:
1411 if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode
1412 && req_mode != CCSRmode && req_mode != CCURmode
1413 && req_mode != CCZ1mode)
1414 return 0;
1415 break;
1416
1417 case E_CCAPmode:
1418 case E_CCANmode:
1419 if (req_mode != CCAmode)
1420 return 0;
1421 break;
1422
1423 default:
1424 gcc_unreachable ();
1425 }
1426
1427 return (GET_MODE (SET_SRC (set)) == set_mode);
1428 }
1429
1430 /* Return true if every SET in INSN that sets the CC register
1431 has source and destination with matching CC modes and that
1432 CC mode is at least as constrained as REQ_MODE.
1433 If REQ_MODE is VOIDmode, always return false. */
1434
1435 bool
s390_match_ccmode(rtx_insn * insn,machine_mode req_mode)1436 s390_match_ccmode (rtx_insn *insn, machine_mode req_mode)
1437 {
1438 int i;
1439
1440 /* s390_tm_ccmode returns VOIDmode to indicate failure. */
1441 if (req_mode == VOIDmode)
1442 return false;
1443
1444 if (GET_CODE (PATTERN (insn)) == SET)
1445 return s390_match_ccmode_set (PATTERN (insn), req_mode);
1446
1447 if (GET_CODE (PATTERN (insn)) == PARALLEL)
1448 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1449 {
1450 rtx set = XVECEXP (PATTERN (insn), 0, i);
1451 if (GET_CODE (set) == SET)
1452 if (!s390_match_ccmode_set (set, req_mode))
1453 return false;
1454 }
1455
1456 return true;
1457 }
1458
1459 /* If a test-under-mask instruction can be used to implement
1460 (compare (and ... OP1) OP2), return the CC mode required
1461 to do that. Otherwise, return VOIDmode.
1462 MIXED is true if the instruction can distinguish between
1463 CC1 and CC2 for mixed selected bits (TMxx), it is false
1464 if the instruction cannot (TM). */
1465
1466 machine_mode
s390_tm_ccmode(rtx op1,rtx op2,bool mixed)1467 s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
1468 {
1469 int bit0, bit1;
1470
1471 /* ??? Fixme: should work on CONST_WIDE_INT as well. */
1472 if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT)
1473 return VOIDmode;
1474
1475 /* Selected bits all zero: CC0.
1476 e.g.: int a; if ((a & (16 + 128)) == 0) */
1477 if (INTVAL (op2) == 0)
1478 return CCTmode;
1479
1480 /* Selected bits all one: CC3.
1481 e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
1482 if (INTVAL (op2) == INTVAL (op1))
1483 return CCT3mode;
1484
1485 /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
1486 int a;
1487 if ((a & (16 + 128)) == 16) -> CCT1
1488 if ((a & (16 + 128)) == 128) -> CCT2 */
1489 if (mixed)
1490 {
1491 bit1 = exact_log2 (INTVAL (op2));
1492 bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2));
1493 if (bit0 != -1 && bit1 != -1)
1494 return bit0 > bit1 ? CCT1mode : CCT2mode;
1495 }
1496
1497 return VOIDmode;
1498 }
1499
1500 /* Given a comparison code OP (EQ, NE, etc.) and the operands
1501 OP0 and OP1 of a COMPARE, return the mode to be used for the
1502 comparison. */
1503
1504 machine_mode
s390_select_ccmode(enum rtx_code code,rtx op0,rtx op1)1505 s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
1506 {
1507 switch (code)
1508 {
1509 case EQ:
1510 case NE:
1511 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1512 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1513 return CCAPmode;
1514 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1515 && CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))))
1516 return CCAPmode;
1517 if ((GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS
1518 || GET_CODE (op1) == NEG)
1519 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1520 return CCLmode;
1521
1522 if (GET_CODE (op0) == AND)
1523 {
1524 /* Check whether we can potentially do it via TM. */
1525 machine_mode ccmode;
1526 ccmode = s390_tm_ccmode (XEXP (op0, 1), op1, 1);
1527 if (ccmode != VOIDmode)
1528 {
1529 /* Relax CCTmode to CCZmode to allow fall-back to AND
1530 if that turns out to be beneficial. */
1531 return ccmode == CCTmode ? CCZmode : ccmode;
1532 }
1533 }
1534
1535 if (register_operand (op0, HImode)
1536 && GET_CODE (op1) == CONST_INT
1537 && (INTVAL (op1) == -1 || INTVAL (op1) == 65535))
1538 return CCT3mode;
1539 if (register_operand (op0, QImode)
1540 && GET_CODE (op1) == CONST_INT
1541 && (INTVAL (op1) == -1 || INTVAL (op1) == 255))
1542 return CCT3mode;
1543
1544 return CCZmode;
1545
1546 case LE:
1547 case LT:
1548 case GE:
1549 case GT:
1550 /* The only overflow condition of NEG and ABS happens when
1551 -INT_MAX is used as parameter, which stays negative. So
1552 we have an overflow from a positive value to a negative.
1553 Using CCAP mode the resulting cc can be used for comparisons. */
1554 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1555 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1556 return CCAPmode;
1557
1558 /* If constants are involved in an add instruction it is possible to use
1559 the resulting cc for comparisons with zero. Knowing the sign of the
1560 constant the overflow behavior gets predictable. e.g.:
1561 int a, b; if ((b = a + c) > 0)
1562 with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP */
1563 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1564 && (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1)))
1565 || (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os")
1566 /* Avoid INT32_MIN on 32 bit. */
1567 && (!TARGET_ZARCH || INTVAL (XEXP (op0, 1)) != -0x7fffffff - 1))))
1568 {
1569 if (INTVAL (XEXP((op0), 1)) < 0)
1570 return CCANmode;
1571 else
1572 return CCAPmode;
1573 }
1574 /* Fall through. */
1575 case UNORDERED:
1576 case ORDERED:
1577 case UNEQ:
1578 case UNLE:
1579 case UNLT:
1580 case UNGE:
1581 case UNGT:
1582 case LTGT:
1583 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1584 && GET_CODE (op1) != CONST_INT)
1585 return CCSRmode;
1586 return CCSmode;
1587
1588 case LTU:
1589 case GEU:
1590 if (GET_CODE (op0) == PLUS
1591 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1592 return CCL1mode;
1593
1594 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1595 && GET_CODE (op1) != CONST_INT)
1596 return CCURmode;
1597 return CCUmode;
1598
1599 case LEU:
1600 case GTU:
1601 if (GET_CODE (op0) == MINUS
1602 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1603 return CCL2mode;
1604
1605 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1606 && GET_CODE (op1) != CONST_INT)
1607 return CCURmode;
1608 return CCUmode;
1609
1610 default:
1611 gcc_unreachable ();
1612 }
1613 }
1614
1615 /* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
1616 that we can implement more efficiently. */
1617
1618 static void
s390_canonicalize_comparison(int * code,rtx * op0,rtx * op1,bool op0_preserve_value)1619 s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1620 bool op0_preserve_value)
1621 {
1622 if (op0_preserve_value)
1623 return;
1624
1625 /* Convert ZERO_EXTRACT back to AND to enable TM patterns. */
1626 if ((*code == EQ || *code == NE)
1627 && *op1 == const0_rtx
1628 && GET_CODE (*op0) == ZERO_EXTRACT
1629 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1630 && GET_CODE (XEXP (*op0, 2)) == CONST_INT
1631 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1632 {
1633 rtx inner = XEXP (*op0, 0);
1634 HOST_WIDE_INT modesize = GET_MODE_BITSIZE (GET_MODE (inner));
1635 HOST_WIDE_INT len = INTVAL (XEXP (*op0, 1));
1636 HOST_WIDE_INT pos = INTVAL (XEXP (*op0, 2));
1637
1638 if (len > 0 && len < modesize
1639 && pos >= 0 && pos + len <= modesize
1640 && modesize <= HOST_BITS_PER_WIDE_INT)
1641 {
1642 unsigned HOST_WIDE_INT block;
1643 block = (HOST_WIDE_INT_1U << len) - 1;
1644 block <<= modesize - pos - len;
1645
1646 *op0 = gen_rtx_AND (GET_MODE (inner), inner,
1647 gen_int_mode (block, GET_MODE (inner)));
1648 }
1649 }
1650
1651 /* Narrow AND of memory against immediate to enable TM. */
1652 if ((*code == EQ || *code == NE)
1653 && *op1 == const0_rtx
1654 && GET_CODE (*op0) == AND
1655 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1656 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1657 {
1658 rtx inner = XEXP (*op0, 0);
1659 rtx mask = XEXP (*op0, 1);
1660
1661 /* Ignore paradoxical SUBREGs if all extra bits are masked out. */
1662 if (GET_CODE (inner) == SUBREG
1663 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner)))
1664 && (GET_MODE_SIZE (GET_MODE (inner))
1665 >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1666 && ((INTVAL (mask)
1667 & GET_MODE_MASK (GET_MODE (inner))
1668 & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner))))
1669 == 0))
1670 inner = SUBREG_REG (inner);
1671
1672 /* Do not change volatile MEMs. */
1673 if (MEM_P (inner) && !MEM_VOLATILE_P (inner))
1674 {
1675 int part = s390_single_part (XEXP (*op0, 1),
1676 GET_MODE (inner), QImode, 0);
1677 if (part >= 0)
1678 {
1679 mask = gen_int_mode (s390_extract_part (mask, QImode, 0), QImode);
1680 inner = adjust_address_nv (inner, QImode, part);
1681 *op0 = gen_rtx_AND (QImode, inner, mask);
1682 }
1683 }
1684 }
1685
1686 /* Narrow comparisons against 0xffff to HImode if possible. */
1687 if ((*code == EQ || *code == NE)
1688 && GET_CODE (*op1) == CONST_INT
1689 && INTVAL (*op1) == 0xffff
1690 && SCALAR_INT_MODE_P (GET_MODE (*op0))
1691 && (nonzero_bits (*op0, GET_MODE (*op0))
1692 & ~HOST_WIDE_INT_UC (0xffff)) == 0)
1693 {
1694 *op0 = gen_lowpart (HImode, *op0);
1695 *op1 = constm1_rtx;
1696 }
1697
1698 /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible. */
1699 if (GET_CODE (*op0) == UNSPEC
1700 && XINT (*op0, 1) == UNSPEC_STRCMPCC_TO_INT
1701 && XVECLEN (*op0, 0) == 1
1702 && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode
1703 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1704 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1705 && *op1 == const0_rtx)
1706 {
1707 enum rtx_code new_code = UNKNOWN;
1708 switch (*code)
1709 {
1710 case EQ: new_code = EQ; break;
1711 case NE: new_code = NE; break;
1712 case LT: new_code = GTU; break;
1713 case GT: new_code = LTU; break;
1714 case LE: new_code = GEU; break;
1715 case GE: new_code = LEU; break;
1716 default: break;
1717 }
1718
1719 if (new_code != UNKNOWN)
1720 {
1721 *op0 = XVECEXP (*op0, 0, 0);
1722 *code = new_code;
1723 }
1724 }
1725
1726 /* Remove redundant UNSPEC_CC_TO_INT conversions if possible. */
1727 if (GET_CODE (*op0) == UNSPEC
1728 && XINT (*op0, 1) == UNSPEC_CC_TO_INT
1729 && XVECLEN (*op0, 0) == 1
1730 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1731 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1732 && CONST_INT_P (*op1))
1733 {
1734 enum rtx_code new_code = UNKNOWN;
1735 switch (GET_MODE (XVECEXP (*op0, 0, 0)))
1736 {
1737 case E_CCZmode:
1738 case E_CCRAWmode:
1739 switch (*code)
1740 {
1741 case EQ: new_code = EQ; break;
1742 case NE: new_code = NE; break;
1743 default: break;
1744 }
1745 break;
1746 default: break;
1747 }
1748
1749 if (new_code != UNKNOWN)
1750 {
1751 /* For CCRAWmode put the required cc mask into the second
1752 operand. */
1753 if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode
1754 && INTVAL (*op1) >= 0 && INTVAL (*op1) <= 3)
1755 *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1)));
1756 *op0 = XVECEXP (*op0, 0, 0);
1757 *code = new_code;
1758 }
1759 }
1760
1761 /* Simplify cascaded EQ, NE with const0_rtx. */
1762 if ((*code == NE || *code == EQ)
1763 && (GET_CODE (*op0) == EQ || GET_CODE (*op0) == NE)
1764 && GET_MODE (*op0) == SImode
1765 && GET_MODE (XEXP (*op0, 0)) == CCZ1mode
1766 && REG_P (XEXP (*op0, 0))
1767 && XEXP (*op0, 1) == const0_rtx
1768 && *op1 == const0_rtx)
1769 {
1770 if ((*code == EQ && GET_CODE (*op0) == NE)
1771 || (*code == NE && GET_CODE (*op0) == EQ))
1772 *code = EQ;
1773 else
1774 *code = NE;
1775 *op0 = XEXP (*op0, 0);
1776 }
1777
1778 /* Prefer register over memory as first operand. */
1779 if (MEM_P (*op0) && REG_P (*op1))
1780 {
1781 rtx tem = *op0; *op0 = *op1; *op1 = tem;
1782 *code = (int)swap_condition ((enum rtx_code)*code);
1783 }
1784
1785 /* A comparison result is compared against zero. Replace it with
1786 the (perhaps inverted) original comparison.
1787 This probably should be done by simplify_relational_operation. */
1788 if ((*code == EQ || *code == NE)
1789 && *op1 == const0_rtx
1790 && COMPARISON_P (*op0)
1791 && CC_REG_P (XEXP (*op0, 0)))
1792 {
1793 enum rtx_code new_code;
1794
1795 if (*code == EQ)
1796 new_code = reversed_comparison_code_parts (GET_CODE (*op0),
1797 XEXP (*op0, 0),
1798 XEXP (*op1, 0), NULL);
1799 else
1800 new_code = GET_CODE (*op0);
1801
1802 if (new_code != UNKNOWN)
1803 {
1804 *code = new_code;
1805 *op1 = XEXP (*op0, 1);
1806 *op0 = XEXP (*op0, 0);
1807 }
1808 }
1809 }
1810
1811
1812 /* Emit a compare instruction suitable to implement the comparison
1813 OP0 CODE OP1. Return the correct condition RTL to be placed in
1814 the IF_THEN_ELSE of the conditional branch testing the result. */
1815
1816 rtx
s390_emit_compare(enum rtx_code code,rtx op0,rtx op1)1817 s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
1818 {
1819 machine_mode mode = s390_select_ccmode (code, op0, op1);
1820 rtx cc;
1821
1822 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
1823 {
1824 /* Do not output a redundant compare instruction if a
1825 compare_and_swap pattern already computed the result and the
1826 machine modes are compatible. */
1827 gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
1828 == GET_MODE (op0));
1829 cc = op0;
1830 }
1831 else
1832 {
1833 cc = gen_rtx_REG (mode, CC_REGNUM);
1834 emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, op0, op1)));
1835 }
1836
1837 return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
1838 }
1839
1840 /* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
1841 matches CMP.
1842 Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
1843 conditional branch testing the result. */
1844
1845 static rtx
s390_emit_compare_and_swap(enum rtx_code code,rtx old,rtx mem,rtx cmp,rtx new_rtx,machine_mode ccmode)1846 s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
1847 rtx cmp, rtx new_rtx, machine_mode ccmode)
1848 {
1849 rtx cc;
1850
1851 cc = gen_rtx_REG (ccmode, CC_REGNUM);
1852 switch (GET_MODE (mem))
1853 {
1854 case E_SImode:
1855 emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp,
1856 new_rtx, cc));
1857 break;
1858 case E_DImode:
1859 emit_insn (gen_atomic_compare_and_swapdi_internal (old, mem, cmp,
1860 new_rtx, cc));
1861 break;
1862 case E_TImode:
1863 emit_insn (gen_atomic_compare_and_swapti_internal (old, mem, cmp,
1864 new_rtx, cc));
1865 break;
1866 case E_QImode:
1867 case E_HImode:
1868 default:
1869 gcc_unreachable ();
1870 }
1871 return s390_emit_compare (code, cc, const0_rtx);
1872 }
1873
1874 /* Emit a jump instruction to TARGET and return it. If COND is
1875 NULL_RTX, emit an unconditional jump, else a conditional jump under
1876 condition COND. */
1877
1878 rtx_insn *
s390_emit_jump(rtx target,rtx cond)1879 s390_emit_jump (rtx target, rtx cond)
1880 {
1881 rtx insn;
1882
1883 target = gen_rtx_LABEL_REF (VOIDmode, target);
1884 if (cond)
1885 target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
1886
1887 insn = gen_rtx_SET (pc_rtx, target);
1888 return emit_jump_insn (insn);
1889 }
1890
1891 /* Return branch condition mask to implement a branch
1892 specified by CODE. Return -1 for invalid comparisons. */
1893
1894 int
s390_branch_condition_mask(rtx code)1895 s390_branch_condition_mask (rtx code)
1896 {
1897 const int CC0 = 1 << 3;
1898 const int CC1 = 1 << 2;
1899 const int CC2 = 1 << 1;
1900 const int CC3 = 1 << 0;
1901
1902 gcc_assert (GET_CODE (XEXP (code, 0)) == REG);
1903 gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM);
1904 gcc_assert (XEXP (code, 1) == const0_rtx
1905 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
1906 && CONST_INT_P (XEXP (code, 1))));
1907
1908
1909 switch (GET_MODE (XEXP (code, 0)))
1910 {
1911 case E_CCZmode:
1912 case E_CCZ1mode:
1913 switch (GET_CODE (code))
1914 {
1915 case EQ: return CC0;
1916 case NE: return CC1 | CC2 | CC3;
1917 default: return -1;
1918 }
1919 break;
1920
1921 case E_CCT1mode:
1922 switch (GET_CODE (code))
1923 {
1924 case EQ: return CC1;
1925 case NE: return CC0 | CC2 | CC3;
1926 default: return -1;
1927 }
1928 break;
1929
1930 case E_CCT2mode:
1931 switch (GET_CODE (code))
1932 {
1933 case EQ: return CC2;
1934 case NE: return CC0 | CC1 | CC3;
1935 default: return -1;
1936 }
1937 break;
1938
1939 case E_CCT3mode:
1940 switch (GET_CODE (code))
1941 {
1942 case EQ: return CC3;
1943 case NE: return CC0 | CC1 | CC2;
1944 default: return -1;
1945 }
1946 break;
1947
1948 case E_CCLmode:
1949 switch (GET_CODE (code))
1950 {
1951 case EQ: return CC0 | CC2;
1952 case NE: return CC1 | CC3;
1953 default: return -1;
1954 }
1955 break;
1956
1957 case E_CCL1mode:
1958 switch (GET_CODE (code))
1959 {
1960 case LTU: return CC2 | CC3; /* carry */
1961 case GEU: return CC0 | CC1; /* no carry */
1962 default: return -1;
1963 }
1964 break;
1965
1966 case E_CCL2mode:
1967 switch (GET_CODE (code))
1968 {
1969 case GTU: return CC0 | CC1; /* borrow */
1970 case LEU: return CC2 | CC3; /* no borrow */
1971 default: return -1;
1972 }
1973 break;
1974
1975 case E_CCL3mode:
1976 switch (GET_CODE (code))
1977 {
1978 case EQ: return CC0 | CC2;
1979 case NE: return CC1 | CC3;
1980 case LTU: return CC1;
1981 case GTU: return CC3;
1982 case LEU: return CC1 | CC2;
1983 case GEU: return CC2 | CC3;
1984 default: return -1;
1985 }
1986
1987 case E_CCUmode:
1988 switch (GET_CODE (code))
1989 {
1990 case EQ: return CC0;
1991 case NE: return CC1 | CC2 | CC3;
1992 case LTU: return CC1;
1993 case GTU: return CC2;
1994 case LEU: return CC0 | CC1;
1995 case GEU: return CC0 | CC2;
1996 default: return -1;
1997 }
1998 break;
1999
2000 case E_CCURmode:
2001 switch (GET_CODE (code))
2002 {
2003 case EQ: return CC0;
2004 case NE: return CC2 | CC1 | CC3;
2005 case LTU: return CC2;
2006 case GTU: return CC1;
2007 case LEU: return CC0 | CC2;
2008 case GEU: return CC0 | CC1;
2009 default: return -1;
2010 }
2011 break;
2012
2013 case E_CCAPmode:
2014 switch (GET_CODE (code))
2015 {
2016 case EQ: return CC0;
2017 case NE: return CC1 | CC2 | CC3;
2018 case LT: return CC1 | CC3;
2019 case GT: return CC2;
2020 case LE: return CC0 | CC1 | CC3;
2021 case GE: return CC0 | CC2;
2022 default: return -1;
2023 }
2024 break;
2025
2026 case E_CCANmode:
2027 switch (GET_CODE (code))
2028 {
2029 case EQ: return CC0;
2030 case NE: return CC1 | CC2 | CC3;
2031 case LT: return CC1;
2032 case GT: return CC2 | CC3;
2033 case LE: return CC0 | CC1;
2034 case GE: return CC0 | CC2 | CC3;
2035 default: return -1;
2036 }
2037 break;
2038
2039 case E_CCSmode:
2040 switch (GET_CODE (code))
2041 {
2042 case EQ: return CC0;
2043 case NE: return CC1 | CC2 | CC3;
2044 case LT: return CC1;
2045 case GT: return CC2;
2046 case LE: return CC0 | CC1;
2047 case GE: return CC0 | CC2;
2048 case UNORDERED: return CC3;
2049 case ORDERED: return CC0 | CC1 | CC2;
2050 case UNEQ: return CC0 | CC3;
2051 case UNLT: return CC1 | CC3;
2052 case UNGT: return CC2 | CC3;
2053 case UNLE: return CC0 | CC1 | CC3;
2054 case UNGE: return CC0 | CC2 | CC3;
2055 case LTGT: return CC1 | CC2;
2056 default: return -1;
2057 }
2058 break;
2059
2060 case E_CCSRmode:
2061 switch (GET_CODE (code))
2062 {
2063 case EQ: return CC0;
2064 case NE: return CC2 | CC1 | CC3;
2065 case LT: return CC2;
2066 case GT: return CC1;
2067 case LE: return CC0 | CC2;
2068 case GE: return CC0 | CC1;
2069 case UNORDERED: return CC3;
2070 case ORDERED: return CC0 | CC2 | CC1;
2071 case UNEQ: return CC0 | CC3;
2072 case UNLT: return CC2 | CC3;
2073 case UNGT: return CC1 | CC3;
2074 case UNLE: return CC0 | CC2 | CC3;
2075 case UNGE: return CC0 | CC1 | CC3;
2076 case LTGT: return CC2 | CC1;
2077 default: return -1;
2078 }
2079 break;
2080
2081 /* Vector comparison modes. */
2082 /* CC2 will never be set. It however is part of the negated
2083 masks. */
2084 case E_CCVIALLmode:
2085 switch (GET_CODE (code))
2086 {
2087 case EQ:
2088 case GTU:
2089 case GT:
2090 case GE: return CC0;
2091 /* The inverted modes are in fact *any* modes. */
2092 case NE:
2093 case LEU:
2094 case LE:
2095 case LT: return CC3 | CC1 | CC2;
2096 default: return -1;
2097 }
2098
2099 case E_CCVIANYmode:
2100 switch (GET_CODE (code))
2101 {
2102 case EQ:
2103 case GTU:
2104 case GT:
2105 case GE: return CC0 | CC1;
2106 /* The inverted modes are in fact *all* modes. */
2107 case NE:
2108 case LEU:
2109 case LE:
2110 case LT: return CC3 | CC2;
2111 default: return -1;
2112 }
2113 case E_CCVFALLmode:
2114 switch (GET_CODE (code))
2115 {
2116 case EQ:
2117 case GT:
2118 case GE: return CC0;
2119 /* The inverted modes are in fact *any* modes. */
2120 case NE:
2121 case UNLE:
2122 case UNLT: return CC3 | CC1 | CC2;
2123 default: return -1;
2124 }
2125
2126 case E_CCVFANYmode:
2127 switch (GET_CODE (code))
2128 {
2129 case EQ:
2130 case GT:
2131 case GE: return CC0 | CC1;
2132 /* The inverted modes are in fact *all* modes. */
2133 case NE:
2134 case UNLE:
2135 case UNLT: return CC3 | CC2;
2136 default: return -1;
2137 }
2138
2139 case E_CCRAWmode:
2140 switch (GET_CODE (code))
2141 {
2142 case EQ:
2143 return INTVAL (XEXP (code, 1));
2144 case NE:
2145 return (INTVAL (XEXP (code, 1))) ^ 0xf;
2146 default:
2147 gcc_unreachable ();
2148 }
2149
2150 default:
2151 return -1;
2152 }
2153 }
2154
2155
2156 /* Return branch condition mask to implement a compare and branch
2157 specified by CODE. Return -1 for invalid comparisons. */
2158
2159 int
s390_compare_and_branch_condition_mask(rtx code)2160 s390_compare_and_branch_condition_mask (rtx code)
2161 {
2162 const int CC0 = 1 << 3;
2163 const int CC1 = 1 << 2;
2164 const int CC2 = 1 << 1;
2165
2166 switch (GET_CODE (code))
2167 {
2168 case EQ:
2169 return CC0;
2170 case NE:
2171 return CC1 | CC2;
2172 case LT:
2173 case LTU:
2174 return CC1;
2175 case GT:
2176 case GTU:
2177 return CC2;
2178 case LE:
2179 case LEU:
2180 return CC0 | CC1;
2181 case GE:
2182 case GEU:
2183 return CC0 | CC2;
2184 default:
2185 gcc_unreachable ();
2186 }
2187 return -1;
2188 }
2189
2190 /* If INV is false, return assembler mnemonic string to implement
2191 a branch specified by CODE. If INV is true, return mnemonic
2192 for the corresponding inverted branch. */
2193
2194 static const char *
s390_branch_condition_mnemonic(rtx code,int inv)2195 s390_branch_condition_mnemonic (rtx code, int inv)
2196 {
2197 int mask;
2198
2199 static const char *const mnemonic[16] =
2200 {
2201 NULL, "o", "h", "nle",
2202 "l", "nhe", "lh", "ne",
2203 "e", "nlh", "he", "nl",
2204 "le", "nh", "no", NULL
2205 };
2206
2207 if (GET_CODE (XEXP (code, 0)) == REG
2208 && REGNO (XEXP (code, 0)) == CC_REGNUM
2209 && (XEXP (code, 1) == const0_rtx
2210 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
2211 && CONST_INT_P (XEXP (code, 1)))))
2212 mask = s390_branch_condition_mask (code);
2213 else
2214 mask = s390_compare_and_branch_condition_mask (code);
2215
2216 gcc_assert (mask >= 0);
2217
2218 if (inv)
2219 mask ^= 15;
2220
2221 gcc_assert (mask >= 1 && mask <= 14);
2222
2223 return mnemonic[mask];
2224 }
2225
2226 /* Return the part of op which has a value different from def.
2227 The size of the part is determined by mode.
2228 Use this function only if you already know that op really
2229 contains such a part. */
2230
2231 unsigned HOST_WIDE_INT
s390_extract_part(rtx op,machine_mode mode,int def)2232 s390_extract_part (rtx op, machine_mode mode, int def)
2233 {
2234 unsigned HOST_WIDE_INT value = 0;
2235 int max_parts = HOST_BITS_PER_WIDE_INT / GET_MODE_BITSIZE (mode);
2236 int part_bits = GET_MODE_BITSIZE (mode);
2237 unsigned HOST_WIDE_INT part_mask = (HOST_WIDE_INT_1U << part_bits) - 1;
2238 int i;
2239
2240 for (i = 0; i < max_parts; i++)
2241 {
2242 if (i == 0)
2243 value = UINTVAL (op);
2244 else
2245 value >>= part_bits;
2246
2247 if ((value & part_mask) != (def & part_mask))
2248 return value & part_mask;
2249 }
2250
2251 gcc_unreachable ();
2252 }
2253
2254 /* If OP is an integer constant of mode MODE with exactly one
2255 part of mode PART_MODE unequal to DEF, return the number of that
2256 part. Otherwise, return -1. */
2257
2258 int
s390_single_part(rtx op,machine_mode mode,machine_mode part_mode,int def)2259 s390_single_part (rtx op,
2260 machine_mode mode,
2261 machine_mode part_mode,
2262 int def)
2263 {
2264 unsigned HOST_WIDE_INT value = 0;
2265 int n_parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (part_mode);
2266 unsigned HOST_WIDE_INT part_mask
2267 = (HOST_WIDE_INT_1U << GET_MODE_BITSIZE (part_mode)) - 1;
2268 int i, part = -1;
2269
2270 if (GET_CODE (op) != CONST_INT)
2271 return -1;
2272
2273 for (i = 0; i < n_parts; i++)
2274 {
2275 if (i == 0)
2276 value = UINTVAL (op);
2277 else
2278 value >>= GET_MODE_BITSIZE (part_mode);
2279
2280 if ((value & part_mask) != (def & part_mask))
2281 {
2282 if (part != -1)
2283 return -1;
2284 else
2285 part = i;
2286 }
2287 }
2288 return part == -1 ? -1 : n_parts - 1 - part;
2289 }
2290
2291 /* Return true if IN contains a contiguous bitfield in the lower SIZE
2292 bits and no other bits are set in (the lower SIZE bits of) IN.
2293
2294 PSTART and PEND can be used to obtain the start and end
2295 position (inclusive) of the bitfield relative to 64
2296 bits. *PSTART / *PEND gives the position of the first/last bit
2297 of the bitfield counting from the highest order bit starting
2298 with zero. */
2299
2300 bool
s390_contiguous_bitmask_nowrap_p(unsigned HOST_WIDE_INT in,int size,int * pstart,int * pend)2301 s390_contiguous_bitmask_nowrap_p (unsigned HOST_WIDE_INT in, int size,
2302 int *pstart, int *pend)
2303 {
2304 int start;
2305 int end = -1;
2306 int lowbit = HOST_BITS_PER_WIDE_INT - 1;
2307 int highbit = HOST_BITS_PER_WIDE_INT - size;
2308 unsigned HOST_WIDE_INT bitmask = HOST_WIDE_INT_1U;
2309
2310 gcc_assert (!!pstart == !!pend);
2311 for (start = lowbit; start >= highbit; bitmask <<= 1, start--)
2312 if (end == -1)
2313 {
2314 /* Look for the rightmost bit of a contiguous range of ones. */
2315 if (bitmask & in)
2316 /* Found it. */
2317 end = start;
2318 }
2319 else
2320 {
2321 /* Look for the firt zero bit after the range of ones. */
2322 if (! (bitmask & in))
2323 /* Found it. */
2324 break;
2325 }
2326 /* We're one past the last one-bit. */
2327 start++;
2328
2329 if (end == -1)
2330 /* No one bits found. */
2331 return false;
2332
2333 if (start > highbit)
2334 {
2335 unsigned HOST_WIDE_INT mask;
2336
2337 /* Calculate a mask for all bits beyond the contiguous bits. */
2338 mask = ((~HOST_WIDE_INT_0U >> highbit)
2339 & (~HOST_WIDE_INT_0U << (lowbit - start + 1)));
2340 if (mask & in)
2341 /* There are more bits set beyond the first range of one bits. */
2342 return false;
2343 }
2344
2345 if (pstart)
2346 {
2347 *pstart = start;
2348 *pend = end;
2349 }
2350
2351 return true;
2352 }
2353
2354 /* Same as s390_contiguous_bitmask_nowrap_p but also returns true
2355 if ~IN contains a contiguous bitfield. In that case, *END is <
2356 *START.
2357
2358 If WRAP_P is true, a bitmask that wraps around is also tested.
2359 When a wraparoud occurs *START is greater than *END (in
2360 non-null pointers), and the uppermost (64 - SIZE) bits are thus
2361 part of the range. If WRAP_P is false, no wraparound is
2362 tested. */
2363
2364 bool
s390_contiguous_bitmask_p(unsigned HOST_WIDE_INT in,bool wrap_p,int size,int * start,int * end)2365 s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, bool wrap_p,
2366 int size, int *start, int *end)
2367 {
2368 int bs = HOST_BITS_PER_WIDE_INT;
2369 bool b;
2370
2371 gcc_assert (!!start == !!end);
2372 if ((in & ((~HOST_WIDE_INT_0U) >> (bs - size))) == 0)
2373 /* This cannot be expressed as a contiguous bitmask. Exit early because
2374 the second call of s390_contiguous_bitmask_nowrap_p would accept this as
2375 a valid bitmask. */
2376 return false;
2377 b = s390_contiguous_bitmask_nowrap_p (in, size, start, end);
2378 if (b)
2379 return true;
2380 if (! wrap_p)
2381 return false;
2382 b = s390_contiguous_bitmask_nowrap_p (~in, size, start, end);
2383 if (b && start)
2384 {
2385 int s = *start;
2386 int e = *end;
2387
2388 gcc_assert (s >= 1);
2389 *start = ((e + 1) & (bs - 1));
2390 *end = ((s - 1 + bs) & (bs - 1));
2391 }
2392
2393 return b;
2394 }
2395
2396 /* Return true if OP contains the same contiguous bitfield in *all*
2397 its elements. START and END can be used to obtain the start and
2398 end position of the bitfield.
2399
2400 START/STOP give the position of the first/last bit of the bitfield
2401 counting from the lowest order bit starting with zero. In order to
2402 use these values for S/390 instructions this has to be converted to
2403 "bits big endian" style. */
2404
2405 bool
s390_contiguous_bitmask_vector_p(rtx op,int * start,int * end)2406 s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end)
2407 {
2408 unsigned HOST_WIDE_INT mask;
2409 int size;
2410 rtx elt;
2411 bool b;
2412
2413 gcc_assert (!!start == !!end);
2414 if (!const_vec_duplicate_p (op, &elt)
2415 || !CONST_INT_P (elt))
2416 return false;
2417
2418 size = GET_MODE_UNIT_BITSIZE (GET_MODE (op));
2419
2420 /* We cannot deal with V1TI/V1TF. This would require a vgmq. */
2421 if (size > 64)
2422 return false;
2423
2424 mask = UINTVAL (elt);
2425
2426 b = s390_contiguous_bitmask_p (mask, true, size, start, end);
2427 if (b)
2428 {
2429 if (start)
2430 {
2431 *start -= (HOST_BITS_PER_WIDE_INT - size);
2432 *end -= (HOST_BITS_PER_WIDE_INT - size);
2433 }
2434 return true;
2435 }
2436 else
2437 return false;
2438 }
2439
2440 /* Return true if C consists only of byte chunks being either 0 or
2441 0xff. If MASK is !=NULL a byte mask is generated which is
2442 appropriate for the vector generate byte mask instruction. */
2443
2444 bool
s390_bytemask_vector_p(rtx op,unsigned * mask)2445 s390_bytemask_vector_p (rtx op, unsigned *mask)
2446 {
2447 int i;
2448 unsigned tmp_mask = 0;
2449 int nunit, unit_size;
2450
2451 if (!VECTOR_MODE_P (GET_MODE (op))
2452 || GET_CODE (op) != CONST_VECTOR
2453 || !CONST_INT_P (XVECEXP (op, 0, 0)))
2454 return false;
2455
2456 nunit = GET_MODE_NUNITS (GET_MODE (op));
2457 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (op));
2458
2459 for (i = 0; i < nunit; i++)
2460 {
2461 unsigned HOST_WIDE_INT c;
2462 int j;
2463
2464 if (!CONST_INT_P (XVECEXP (op, 0, i)))
2465 return false;
2466
2467 c = UINTVAL (XVECEXP (op, 0, i));
2468 for (j = 0; j < unit_size; j++)
2469 {
2470 if ((c & 0xff) != 0 && (c & 0xff) != 0xff)
2471 return false;
2472 tmp_mask |= (c & 1) << ((nunit - 1 - i) * unit_size + j);
2473 c = c >> BITS_PER_UNIT;
2474 }
2475 }
2476
2477 if (mask != NULL)
2478 *mask = tmp_mask;
2479
2480 return true;
2481 }
2482
2483 /* Check whether a rotate of ROTL followed by an AND of CONTIG is
2484 equivalent to a shift followed by the AND. In particular, CONTIG
2485 should not overlap the (rotated) bit 0/bit 63 gap. Negative values
2486 for ROTL indicate a rotate to the right. */
2487
2488 bool
s390_extzv_shift_ok(int bitsize,int rotl,unsigned HOST_WIDE_INT contig)2489 s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig)
2490 {
2491 int start, end;
2492 bool ok;
2493
2494 ok = s390_contiguous_bitmask_nowrap_p (contig, bitsize, &start, &end);
2495 gcc_assert (ok);
2496
2497 if (rotl >= 0)
2498 return (64 - end >= rotl);
2499 else
2500 {
2501 /* Translate "- rotate right" in BITSIZE mode to "rotate left" in
2502 DIMode. */
2503 rotl = -rotl + (64 - bitsize);
2504 return (start >= rotl);
2505 }
2506 }
2507
2508 /* Check whether we can (and want to) split a double-word
2509 move in mode MODE from SRC to DST into two single-word
2510 moves, moving the subword FIRST_SUBWORD first. */
2511
2512 bool
s390_split_ok_p(rtx dst,rtx src,machine_mode mode,int first_subword)2513 s390_split_ok_p (rtx dst, rtx src, machine_mode mode, int first_subword)
2514 {
2515 /* Floating point and vector registers cannot be split. */
2516 if (FP_REG_P (src) || FP_REG_P (dst) || VECTOR_REG_P (src) || VECTOR_REG_P (dst))
2517 return false;
2518
2519 /* Non-offsettable memory references cannot be split. */
2520 if ((GET_CODE (src) == MEM && !offsettable_memref_p (src))
2521 || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst)))
2522 return false;
2523
2524 /* Moving the first subword must not clobber a register
2525 needed to move the second subword. */
2526 if (register_operand (dst, mode))
2527 {
2528 rtx subreg = operand_subword (dst, first_subword, 0, mode);
2529 if (reg_overlap_mentioned_p (subreg, src))
2530 return false;
2531 }
2532
2533 return true;
2534 }
2535
2536 /* Return true if it can be proven that [MEM1, MEM1 + SIZE]
2537 and [MEM2, MEM2 + SIZE] do overlap and false
2538 otherwise. */
2539
2540 bool
s390_overlap_p(rtx mem1,rtx mem2,HOST_WIDE_INT size)2541 s390_overlap_p (rtx mem1, rtx mem2, HOST_WIDE_INT size)
2542 {
2543 rtx addr1, addr2, addr_delta;
2544 HOST_WIDE_INT delta;
2545
2546 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2547 return true;
2548
2549 if (size == 0)
2550 return false;
2551
2552 addr1 = XEXP (mem1, 0);
2553 addr2 = XEXP (mem2, 0);
2554
2555 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2556
2557 /* This overlapping check is used by peepholes merging memory block operations.
2558 Overlapping operations would otherwise be recognized by the S/390 hardware
2559 and would fall back to a slower implementation. Allowing overlapping
2560 operations would lead to slow code but not to wrong code. Therefore we are
2561 somewhat optimistic if we cannot prove that the memory blocks are
2562 overlapping.
2563 That's why we return false here although this may accept operations on
2564 overlapping memory areas. */
2565 if (!addr_delta || GET_CODE (addr_delta) != CONST_INT)
2566 return false;
2567
2568 delta = INTVAL (addr_delta);
2569
2570 if (delta == 0
2571 || (delta > 0 && delta < size)
2572 || (delta < 0 && -delta < size))
2573 return true;
2574
2575 return false;
2576 }
2577
2578 /* Check whether the address of memory reference MEM2 equals exactly
2579 the address of memory reference MEM1 plus DELTA. Return true if
2580 we can prove this to be the case, false otherwise. */
2581
2582 bool
s390_offset_p(rtx mem1,rtx mem2,rtx delta)2583 s390_offset_p (rtx mem1, rtx mem2, rtx delta)
2584 {
2585 rtx addr1, addr2, addr_delta;
2586
2587 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2588 return false;
2589
2590 addr1 = XEXP (mem1, 0);
2591 addr2 = XEXP (mem2, 0);
2592
2593 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2594 if (!addr_delta || !rtx_equal_p (addr_delta, delta))
2595 return false;
2596
2597 return true;
2598 }
2599
2600 /* Expand logical operator CODE in mode MODE with operands OPERANDS. */
2601
2602 void
s390_expand_logical_operator(enum rtx_code code,machine_mode mode,rtx * operands)2603 s390_expand_logical_operator (enum rtx_code code, machine_mode mode,
2604 rtx *operands)
2605 {
2606 machine_mode wmode = mode;
2607 rtx dst = operands[0];
2608 rtx src1 = operands[1];
2609 rtx src2 = operands[2];
2610 rtx op, clob, tem;
2611
2612 /* If we cannot handle the operation directly, use a temp register. */
2613 if (!s390_logical_operator_ok_p (operands))
2614 dst = gen_reg_rtx (mode);
2615
2616 /* QImode and HImode patterns make sense only if we have a destination
2617 in memory. Otherwise perform the operation in SImode. */
2618 if ((mode == QImode || mode == HImode) && GET_CODE (dst) != MEM)
2619 wmode = SImode;
2620
2621 /* Widen operands if required. */
2622 if (mode != wmode)
2623 {
2624 if (GET_CODE (dst) == SUBREG
2625 && (tem = simplify_subreg (wmode, dst, mode, 0)) != 0)
2626 dst = tem;
2627 else if (REG_P (dst))
2628 dst = gen_rtx_SUBREG (wmode, dst, 0);
2629 else
2630 dst = gen_reg_rtx (wmode);
2631
2632 if (GET_CODE (src1) == SUBREG
2633 && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0)
2634 src1 = tem;
2635 else if (GET_MODE (src1) != VOIDmode)
2636 src1 = gen_rtx_SUBREG (wmode, force_reg (mode, src1), 0);
2637
2638 if (GET_CODE (src2) == SUBREG
2639 && (tem = simplify_subreg (wmode, src2, mode, 0)) != 0)
2640 src2 = tem;
2641 else if (GET_MODE (src2) != VOIDmode)
2642 src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0);
2643 }
2644
2645 /* Emit the instruction. */
2646 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, wmode, src1, src2));
2647 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
2648 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
2649
2650 /* Fix up the destination if needed. */
2651 if (dst != operands[0])
2652 emit_move_insn (operands[0], gen_lowpart (mode, dst));
2653 }
2654
2655 /* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR). */
2656
2657 bool
s390_logical_operator_ok_p(rtx * operands)2658 s390_logical_operator_ok_p (rtx *operands)
2659 {
2660 /* If the destination operand is in memory, it needs to coincide
2661 with one of the source operands. After reload, it has to be
2662 the first source operand. */
2663 if (GET_CODE (operands[0]) == MEM)
2664 return rtx_equal_p (operands[0], operands[1])
2665 || (!reload_completed && rtx_equal_p (operands[0], operands[2]));
2666
2667 return true;
2668 }
2669
2670 /* Narrow logical operation CODE of memory operand MEMOP with immediate
2671 operand IMMOP to switch from SS to SI type instructions. */
2672
2673 void
s390_narrow_logical_operator(enum rtx_code code,rtx * memop,rtx * immop)2674 s390_narrow_logical_operator (enum rtx_code code, rtx *memop, rtx *immop)
2675 {
2676 int def = code == AND ? -1 : 0;
2677 HOST_WIDE_INT mask;
2678 int part;
2679
2680 gcc_assert (GET_CODE (*memop) == MEM);
2681 gcc_assert (!MEM_VOLATILE_P (*memop));
2682
2683 mask = s390_extract_part (*immop, QImode, def);
2684 part = s390_single_part (*immop, GET_MODE (*memop), QImode, def);
2685 gcc_assert (part >= 0);
2686
2687 *memop = adjust_address (*memop, QImode, part);
2688 *immop = gen_int_mode (mask, QImode);
2689 }
2690
2691
2692 /* How to allocate a 'struct machine_function'. */
2693
2694 static struct machine_function *
s390_init_machine_status(void)2695 s390_init_machine_status (void)
2696 {
2697 return ggc_cleared_alloc<machine_function> ();
2698 }
2699
2700 /* Map for smallest class containing reg regno. */
2701
2702 const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
2703 { GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 0 */
2704 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 4 */
2705 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 8 */
2706 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 12 */
2707 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 16 */
2708 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 20 */
2709 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 24 */
2710 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 28 */
2711 ADDR_REGS, CC_REGS, ADDR_REGS, ADDR_REGS, /* 32 */
2712 ACCESS_REGS, ACCESS_REGS, VEC_REGS, VEC_REGS, /* 36 */
2713 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 40 */
2714 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 44 */
2715 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 48 */
2716 VEC_REGS, VEC_REGS /* 52 */
2717 };
2718
2719 /* Return attribute type of insn. */
2720
2721 static enum attr_type
s390_safe_attr_type(rtx_insn * insn)2722 s390_safe_attr_type (rtx_insn *insn)
2723 {
2724 if (recog_memoized (insn) >= 0)
2725 return get_attr_type (insn);
2726 else
2727 return TYPE_NONE;
2728 }
2729
2730 /* Return true if DISP is a valid short displacement. */
2731
2732 static bool
s390_short_displacement(rtx disp)2733 s390_short_displacement (rtx disp)
2734 {
2735 /* No displacement is OK. */
2736 if (!disp)
2737 return true;
2738
2739 /* Without the long displacement facility we don't need to
2740 distingiush between long and short displacement. */
2741 if (!TARGET_LONG_DISPLACEMENT)
2742 return true;
2743
2744 /* Integer displacement in range. */
2745 if (GET_CODE (disp) == CONST_INT)
2746 return INTVAL (disp) >= 0 && INTVAL (disp) < 4096;
2747
2748 /* GOT offset is not OK, the GOT can be large. */
2749 if (GET_CODE (disp) == CONST
2750 && GET_CODE (XEXP (disp, 0)) == UNSPEC
2751 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOT
2752 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF))
2753 return false;
2754
2755 /* All other symbolic constants are literal pool references,
2756 which are OK as the literal pool must be small. */
2757 if (GET_CODE (disp) == CONST)
2758 return true;
2759
2760 return false;
2761 }
2762
2763 /* Decompose a RTL expression ADDR for a memory address into
2764 its components, returned in OUT.
2765
2766 Returns false if ADDR is not a valid memory address, true
2767 otherwise. If OUT is NULL, don't return the components,
2768 but check for validity only.
2769
2770 Note: Only addresses in canonical form are recognized.
2771 LEGITIMIZE_ADDRESS should convert non-canonical forms to the
2772 canonical form so that they will be recognized. */
2773
2774 static int
s390_decompose_address(rtx addr,struct s390_address * out)2775 s390_decompose_address (rtx addr, struct s390_address *out)
2776 {
2777 HOST_WIDE_INT offset = 0;
2778 rtx base = NULL_RTX;
2779 rtx indx = NULL_RTX;
2780 rtx disp = NULL_RTX;
2781 rtx orig_disp;
2782 bool pointer = false;
2783 bool base_ptr = false;
2784 bool indx_ptr = false;
2785 bool literal_pool = false;
2786
2787 /* We may need to substitute the literal pool base register into the address
2788 below. However, at this point we do not know which register is going to
2789 be used as base, so we substitute the arg pointer register. This is going
2790 to be treated as holding a pointer below -- it shouldn't be used for any
2791 other purpose. */
2792 rtx fake_pool_base = gen_rtx_REG (Pmode, ARG_POINTER_REGNUM);
2793
2794 /* Decompose address into base + index + displacement. */
2795
2796 if (GET_CODE (addr) == REG || GET_CODE (addr) == UNSPEC)
2797 base = addr;
2798
2799 else if (GET_CODE (addr) == PLUS)
2800 {
2801 rtx op0 = XEXP (addr, 0);
2802 rtx op1 = XEXP (addr, 1);
2803 enum rtx_code code0 = GET_CODE (op0);
2804 enum rtx_code code1 = GET_CODE (op1);
2805
2806 if (code0 == REG || code0 == UNSPEC)
2807 {
2808 if (code1 == REG || code1 == UNSPEC)
2809 {
2810 indx = op0; /* index + base */
2811 base = op1;
2812 }
2813
2814 else
2815 {
2816 base = op0; /* base + displacement */
2817 disp = op1;
2818 }
2819 }
2820
2821 else if (code0 == PLUS)
2822 {
2823 indx = XEXP (op0, 0); /* index + base + disp */
2824 base = XEXP (op0, 1);
2825 disp = op1;
2826 }
2827
2828 else
2829 {
2830 return false;
2831 }
2832 }
2833
2834 else
2835 disp = addr; /* displacement */
2836
2837 /* Extract integer part of displacement. */
2838 orig_disp = disp;
2839 if (disp)
2840 {
2841 if (GET_CODE (disp) == CONST_INT)
2842 {
2843 offset = INTVAL (disp);
2844 disp = NULL_RTX;
2845 }
2846 else if (GET_CODE (disp) == CONST
2847 && GET_CODE (XEXP (disp, 0)) == PLUS
2848 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
2849 {
2850 offset = INTVAL (XEXP (XEXP (disp, 0), 1));
2851 disp = XEXP (XEXP (disp, 0), 0);
2852 }
2853 }
2854
2855 /* Strip off CONST here to avoid special case tests later. */
2856 if (disp && GET_CODE (disp) == CONST)
2857 disp = XEXP (disp, 0);
2858
2859 /* We can convert literal pool addresses to
2860 displacements by basing them off the base register. */
2861 if (disp && GET_CODE (disp) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (disp))
2862 {
2863 if (base || indx)
2864 return false;
2865
2866 base = fake_pool_base, literal_pool = true;
2867
2868 /* Mark up the displacement. */
2869 disp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, disp),
2870 UNSPEC_LTREL_OFFSET);
2871 }
2872
2873 /* Validate base register. */
2874 if (base)
2875 {
2876 if (GET_CODE (base) == UNSPEC)
2877 switch (XINT (base, 1))
2878 {
2879 case UNSPEC_LTREF:
2880 if (!disp)
2881 disp = gen_rtx_UNSPEC (Pmode,
2882 gen_rtvec (1, XVECEXP (base, 0, 0)),
2883 UNSPEC_LTREL_OFFSET);
2884 else
2885 return false;
2886
2887 base = XVECEXP (base, 0, 1);
2888 break;
2889
2890 case UNSPEC_LTREL_BASE:
2891 if (XVECLEN (base, 0) == 1)
2892 base = fake_pool_base, literal_pool = true;
2893 else
2894 base = XVECEXP (base, 0, 1);
2895 break;
2896
2897 default:
2898 return false;
2899 }
2900
2901 if (!REG_P (base) || GET_MODE (base) != Pmode)
2902 return false;
2903
2904 if (REGNO (base) == STACK_POINTER_REGNUM
2905 || REGNO (base) == FRAME_POINTER_REGNUM
2906 || ((reload_completed || reload_in_progress)
2907 && frame_pointer_needed
2908 && REGNO (base) == HARD_FRAME_POINTER_REGNUM)
2909 || REGNO (base) == ARG_POINTER_REGNUM
2910 || (flag_pic
2911 && REGNO (base) == PIC_OFFSET_TABLE_REGNUM))
2912 pointer = base_ptr = true;
2913
2914 if ((reload_completed || reload_in_progress)
2915 && base == cfun->machine->base_reg)
2916 pointer = base_ptr = literal_pool = true;
2917 }
2918
2919 /* Validate index register. */
2920 if (indx)
2921 {
2922 if (GET_CODE (indx) == UNSPEC)
2923 switch (XINT (indx, 1))
2924 {
2925 case UNSPEC_LTREF:
2926 if (!disp)
2927 disp = gen_rtx_UNSPEC (Pmode,
2928 gen_rtvec (1, XVECEXP (indx, 0, 0)),
2929 UNSPEC_LTREL_OFFSET);
2930 else
2931 return false;
2932
2933 indx = XVECEXP (indx, 0, 1);
2934 break;
2935
2936 case UNSPEC_LTREL_BASE:
2937 if (XVECLEN (indx, 0) == 1)
2938 indx = fake_pool_base, literal_pool = true;
2939 else
2940 indx = XVECEXP (indx, 0, 1);
2941 break;
2942
2943 default:
2944 return false;
2945 }
2946
2947 if (!REG_P (indx) || GET_MODE (indx) != Pmode)
2948 return false;
2949
2950 if (REGNO (indx) == STACK_POINTER_REGNUM
2951 || REGNO (indx) == FRAME_POINTER_REGNUM
2952 || ((reload_completed || reload_in_progress)
2953 && frame_pointer_needed
2954 && REGNO (indx) == HARD_FRAME_POINTER_REGNUM)
2955 || REGNO (indx) == ARG_POINTER_REGNUM
2956 || (flag_pic
2957 && REGNO (indx) == PIC_OFFSET_TABLE_REGNUM))
2958 pointer = indx_ptr = true;
2959
2960 if ((reload_completed || reload_in_progress)
2961 && indx == cfun->machine->base_reg)
2962 pointer = indx_ptr = literal_pool = true;
2963 }
2964
2965 /* Prefer to use pointer as base, not index. */
2966 if (base && indx && !base_ptr
2967 && (indx_ptr || (!REG_POINTER (base) && REG_POINTER (indx))))
2968 {
2969 rtx tmp = base;
2970 base = indx;
2971 indx = tmp;
2972 }
2973
2974 /* Validate displacement. */
2975 if (!disp)
2976 {
2977 /* If virtual registers are involved, the displacement will change later
2978 anyway as the virtual registers get eliminated. This could make a
2979 valid displacement invalid, but it is more likely to make an invalid
2980 displacement valid, because we sometimes access the register save area
2981 via negative offsets to one of those registers.
2982 Thus we don't check the displacement for validity here. If after
2983 elimination the displacement turns out to be invalid after all,
2984 this is fixed up by reload in any case. */
2985 /* LRA maintains always displacements up to date and we need to
2986 know the displacement is right during all LRA not only at the
2987 final elimination. */
2988 if (lra_in_progress
2989 || (base != arg_pointer_rtx
2990 && indx != arg_pointer_rtx
2991 && base != return_address_pointer_rtx
2992 && indx != return_address_pointer_rtx
2993 && base != frame_pointer_rtx
2994 && indx != frame_pointer_rtx
2995 && base != virtual_stack_vars_rtx
2996 && indx != virtual_stack_vars_rtx))
2997 if (!DISP_IN_RANGE (offset))
2998 return false;
2999 }
3000 else
3001 {
3002 /* All the special cases are pointers. */
3003 pointer = true;
3004
3005 /* In the small-PIC case, the linker converts @GOT
3006 and @GOTNTPOFF offsets to possible displacements. */
3007 if (GET_CODE (disp) == UNSPEC
3008 && (XINT (disp, 1) == UNSPEC_GOT
3009 || XINT (disp, 1) == UNSPEC_GOTNTPOFF)
3010 && flag_pic == 1)
3011 {
3012 ;
3013 }
3014
3015 /* Accept pool label offsets. */
3016 else if (GET_CODE (disp) == UNSPEC
3017 && XINT (disp, 1) == UNSPEC_POOL_OFFSET)
3018 ;
3019
3020 /* Accept literal pool references. */
3021 else if (GET_CODE (disp) == UNSPEC
3022 && XINT (disp, 1) == UNSPEC_LTREL_OFFSET)
3023 {
3024 /* In case CSE pulled a non literal pool reference out of
3025 the pool we have to reject the address. This is
3026 especially important when loading the GOT pointer on non
3027 zarch CPUs. In this case the literal pool contains an lt
3028 relative offset to the _GLOBAL_OFFSET_TABLE_ label which
3029 will most likely exceed the displacement. */
3030 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
3031 || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp, 0, 0)))
3032 return false;
3033
3034 orig_disp = gen_rtx_CONST (Pmode, disp);
3035 if (offset)
3036 {
3037 /* If we have an offset, make sure it does not
3038 exceed the size of the constant pool entry. */
3039 rtx sym = XVECEXP (disp, 0, 0);
3040 if (offset >= GET_MODE_SIZE (get_pool_mode (sym)))
3041 return false;
3042
3043 orig_disp = plus_constant (Pmode, orig_disp, offset);
3044 }
3045 }
3046
3047 else
3048 return false;
3049 }
3050
3051 if (!base && !indx)
3052 pointer = true;
3053
3054 if (out)
3055 {
3056 out->base = base;
3057 out->indx = indx;
3058 out->disp = orig_disp;
3059 out->pointer = pointer;
3060 out->literal_pool = literal_pool;
3061 }
3062
3063 return true;
3064 }
3065
3066 /* Decompose a RTL expression OP for an address style operand into its
3067 components, and return the base register in BASE and the offset in
3068 OFFSET. While OP looks like an address it is never supposed to be
3069 used as such.
3070
3071 Return true if OP is a valid address operand, false if not. */
3072
3073 bool
s390_decompose_addrstyle_without_index(rtx op,rtx * base,HOST_WIDE_INT * offset)3074 s390_decompose_addrstyle_without_index (rtx op, rtx *base,
3075 HOST_WIDE_INT *offset)
3076 {
3077 rtx off = NULL_RTX;
3078
3079 /* We can have an integer constant, an address register,
3080 or a sum of the two. */
3081 if (CONST_SCALAR_INT_P (op))
3082 {
3083 off = op;
3084 op = NULL_RTX;
3085 }
3086 if (op && GET_CODE (op) == PLUS && CONST_SCALAR_INT_P (XEXP (op, 1)))
3087 {
3088 off = XEXP (op, 1);
3089 op = XEXP (op, 0);
3090 }
3091 while (op && GET_CODE (op) == SUBREG)
3092 op = SUBREG_REG (op);
3093
3094 if (op && GET_CODE (op) != REG)
3095 return false;
3096
3097 if (offset)
3098 {
3099 if (off == NULL_RTX)
3100 *offset = 0;
3101 else if (CONST_INT_P (off))
3102 *offset = INTVAL (off);
3103 else if (CONST_WIDE_INT_P (off))
3104 /* The offset will anyway be cut down to 12 bits so take just
3105 the lowest order chunk of the wide int. */
3106 *offset = CONST_WIDE_INT_ELT (off, 0);
3107 else
3108 gcc_unreachable ();
3109 }
3110 if (base)
3111 *base = op;
3112
3113 return true;
3114 }
3115
3116
3117 /* Return true if CODE is a valid address without index. */
3118
3119 bool
s390_legitimate_address_without_index_p(rtx op)3120 s390_legitimate_address_without_index_p (rtx op)
3121 {
3122 struct s390_address addr;
3123
3124 if (!s390_decompose_address (XEXP (op, 0), &addr))
3125 return false;
3126 if (addr.indx)
3127 return false;
3128
3129 return true;
3130 }
3131
3132
3133 /* Return TRUE if ADDR is an operand valid for a load/store relative
3134 instruction. Be aware that the alignment of the operand needs to
3135 be checked separately.
3136 Valid addresses are single references or a sum of a reference and a
3137 constant integer. Return these parts in SYMREF and ADDEND. You can
3138 pass NULL in REF and/or ADDEND if you are not interested in these
3139 values. Literal pool references are *not* considered symbol
3140 references. */
3141
3142 static bool
s390_loadrelative_operand_p(rtx addr,rtx * symref,HOST_WIDE_INT * addend)3143 s390_loadrelative_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
3144 {
3145 HOST_WIDE_INT tmpaddend = 0;
3146
3147 if (GET_CODE (addr) == CONST)
3148 addr = XEXP (addr, 0);
3149
3150 if (GET_CODE (addr) == PLUS)
3151 {
3152 if (!CONST_INT_P (XEXP (addr, 1)))
3153 return false;
3154
3155 tmpaddend = INTVAL (XEXP (addr, 1));
3156 addr = XEXP (addr, 0);
3157 }
3158
3159 if ((GET_CODE (addr) == SYMBOL_REF && !CONSTANT_POOL_ADDRESS_P (addr))
3160 || (GET_CODE (addr) == UNSPEC
3161 && (XINT (addr, 1) == UNSPEC_GOTENT
3162 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
3163 {
3164 if (symref)
3165 *symref = addr;
3166 if (addend)
3167 *addend = tmpaddend;
3168
3169 return true;
3170 }
3171 return false;
3172 }
3173
3174 /* Return true if the address in OP is valid for constraint letter C
3175 if wrapped in a MEM rtx. Set LIT_POOL_OK to true if it literal
3176 pool MEMs should be accepted. Only the Q, R, S, T constraint
3177 letters are allowed for C. */
3178
3179 static int
s390_check_qrst_address(char c,rtx op,bool lit_pool_ok)3180 s390_check_qrst_address (char c, rtx op, bool lit_pool_ok)
3181 {
3182 struct s390_address addr;
3183 bool decomposed = false;
3184
3185 if (!address_operand (op, GET_MODE (op)))
3186 return 0;
3187
3188 /* This check makes sure that no symbolic address (except literal
3189 pool references) are accepted by the R or T constraints. */
3190 if (s390_loadrelative_operand_p (op, NULL, NULL))
3191 return 0;
3192
3193 /* Ensure literal pool references are only accepted if LIT_POOL_OK. */
3194 if (!lit_pool_ok)
3195 {
3196 if (!s390_decompose_address (op, &addr))
3197 return 0;
3198 if (addr.literal_pool)
3199 return 0;
3200 decomposed = true;
3201 }
3202
3203 /* With reload, we sometimes get intermediate address forms that are
3204 actually invalid as-is, but we need to accept them in the most
3205 generic cases below ('R' or 'T'), since reload will in fact fix
3206 them up. LRA behaves differently here; we never see such forms,
3207 but on the other hand, we need to strictly reject every invalid
3208 address form. Perform this check right up front. */
3209 if (lra_in_progress)
3210 {
3211 if (!decomposed && !s390_decompose_address (op, &addr))
3212 return 0;
3213 decomposed = true;
3214 }
3215
3216 switch (c)
3217 {
3218 case 'Q': /* no index short displacement */
3219 if (!decomposed && !s390_decompose_address (op, &addr))
3220 return 0;
3221 if (addr.indx)
3222 return 0;
3223 if (!s390_short_displacement (addr.disp))
3224 return 0;
3225 break;
3226
3227 case 'R': /* with index short displacement */
3228 if (TARGET_LONG_DISPLACEMENT)
3229 {
3230 if (!decomposed && !s390_decompose_address (op, &addr))
3231 return 0;
3232 if (!s390_short_displacement (addr.disp))
3233 return 0;
3234 }
3235 /* Any invalid address here will be fixed up by reload,
3236 so accept it for the most generic constraint. */
3237 break;
3238
3239 case 'S': /* no index long displacement */
3240 if (!decomposed && !s390_decompose_address (op, &addr))
3241 return 0;
3242 if (addr.indx)
3243 return 0;
3244 break;
3245
3246 case 'T': /* with index long displacement */
3247 /* Any invalid address here will be fixed up by reload,
3248 so accept it for the most generic constraint. */
3249 break;
3250
3251 default:
3252 return 0;
3253 }
3254 return 1;
3255 }
3256
3257
3258 /* Evaluates constraint strings described by the regular expression
3259 ([A|B|Z](Q|R|S|T))|Y and returns 1 if OP is a valid operand for
3260 the constraint given in STR, or 0 else. */
3261
3262 int
s390_mem_constraint(const char * str,rtx op)3263 s390_mem_constraint (const char *str, rtx op)
3264 {
3265 char c = str[0];
3266
3267 switch (c)
3268 {
3269 case 'A':
3270 /* Check for offsettable variants of memory constraints. */
3271 if (!MEM_P (op) || MEM_VOLATILE_P (op))
3272 return 0;
3273 if ((reload_completed || reload_in_progress)
3274 ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op))
3275 return 0;
3276 return s390_check_qrst_address (str[1], XEXP (op, 0), true);
3277 case 'B':
3278 /* Check for non-literal-pool variants of memory constraints. */
3279 if (!MEM_P (op))
3280 return 0;
3281 return s390_check_qrst_address (str[1], XEXP (op, 0), false);
3282 case 'Q':
3283 case 'R':
3284 case 'S':
3285 case 'T':
3286 if (GET_CODE (op) != MEM)
3287 return 0;
3288 return s390_check_qrst_address (c, XEXP (op, 0), true);
3289 case 'Y':
3290 /* Simply check for the basic form of a shift count. Reload will
3291 take care of making sure we have a proper base register. */
3292 if (!s390_decompose_addrstyle_without_index (op, NULL, NULL))
3293 return 0;
3294 break;
3295 case 'Z':
3296 return s390_check_qrst_address (str[1], op, true);
3297 default:
3298 return 0;
3299 }
3300 return 1;
3301 }
3302
3303
3304 /* Evaluates constraint strings starting with letter O. Input
3305 parameter C is the second letter following the "O" in the constraint
3306 string. Returns 1 if VALUE meets the respective constraint and 0
3307 otherwise. */
3308
3309 int
s390_O_constraint_str(const char c,HOST_WIDE_INT value)3310 s390_O_constraint_str (const char c, HOST_WIDE_INT value)
3311 {
3312 if (!TARGET_EXTIMM)
3313 return 0;
3314
3315 switch (c)
3316 {
3317 case 's':
3318 return trunc_int_for_mode (value, SImode) == value;
3319
3320 case 'p':
3321 return value == 0
3322 || s390_single_part (GEN_INT (value), DImode, SImode, 0) == 1;
3323
3324 case 'n':
3325 return s390_single_part (GEN_INT (value - 1), DImode, SImode, -1) == 1;
3326
3327 default:
3328 gcc_unreachable ();
3329 }
3330 }
3331
3332
3333 /* Evaluates constraint strings starting with letter N. Parameter STR
3334 contains the letters following letter "N" in the constraint string.
3335 Returns true if VALUE matches the constraint. */
3336
3337 int
s390_N_constraint_str(const char * str,HOST_WIDE_INT value)3338 s390_N_constraint_str (const char *str, HOST_WIDE_INT value)
3339 {
3340 machine_mode mode, part_mode;
3341 int def;
3342 int part, part_goal;
3343
3344
3345 if (str[0] == 'x')
3346 part_goal = -1;
3347 else
3348 part_goal = str[0] - '0';
3349
3350 switch (str[1])
3351 {
3352 case 'Q':
3353 part_mode = QImode;
3354 break;
3355 case 'H':
3356 part_mode = HImode;
3357 break;
3358 case 'S':
3359 part_mode = SImode;
3360 break;
3361 default:
3362 return 0;
3363 }
3364
3365 switch (str[2])
3366 {
3367 case 'H':
3368 mode = HImode;
3369 break;
3370 case 'S':
3371 mode = SImode;
3372 break;
3373 case 'D':
3374 mode = DImode;
3375 break;
3376 default:
3377 return 0;
3378 }
3379
3380 switch (str[3])
3381 {
3382 case '0':
3383 def = 0;
3384 break;
3385 case 'F':
3386 def = -1;
3387 break;
3388 default:
3389 return 0;
3390 }
3391
3392 if (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (part_mode))
3393 return 0;
3394
3395 part = s390_single_part (GEN_INT (value), mode, part_mode, def);
3396 if (part < 0)
3397 return 0;
3398 if (part_goal != -1 && part_goal != part)
3399 return 0;
3400
3401 return 1;
3402 }
3403
3404
3405 /* Returns true if the input parameter VALUE is a float zero. */
3406
3407 int
s390_float_const_zero_p(rtx value)3408 s390_float_const_zero_p (rtx value)
3409 {
3410 return (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT
3411 && value == CONST0_RTX (GET_MODE (value)));
3412 }
3413
3414 /* Implement TARGET_REGISTER_MOVE_COST. */
3415
3416 static int
s390_register_move_cost(machine_mode mode,reg_class_t from,reg_class_t to)3417 s390_register_move_cost (machine_mode mode,
3418 reg_class_t from, reg_class_t to)
3419 {
3420 /* On s390, copy between fprs and gprs is expensive. */
3421
3422 /* It becomes somewhat faster having ldgr/lgdr. */
3423 if (TARGET_Z10 && GET_MODE_SIZE (mode) == 8)
3424 {
3425 /* ldgr is single cycle. */
3426 if (reg_classes_intersect_p (from, GENERAL_REGS)
3427 && reg_classes_intersect_p (to, FP_REGS))
3428 return 1;
3429 /* lgdr needs 3 cycles. */
3430 if (reg_classes_intersect_p (to, GENERAL_REGS)
3431 && reg_classes_intersect_p (from, FP_REGS))
3432 return 3;
3433 }
3434
3435 /* Otherwise copying is done via memory. */
3436 if ((reg_classes_intersect_p (from, GENERAL_REGS)
3437 && reg_classes_intersect_p (to, FP_REGS))
3438 || (reg_classes_intersect_p (from, FP_REGS)
3439 && reg_classes_intersect_p (to, GENERAL_REGS)))
3440 return 10;
3441
3442 return 1;
3443 }
3444
3445 /* Implement TARGET_MEMORY_MOVE_COST. */
3446
3447 static int
s390_memory_move_cost(machine_mode mode ATTRIBUTE_UNUSED,reg_class_t rclass ATTRIBUTE_UNUSED,bool in ATTRIBUTE_UNUSED)3448 s390_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
3449 reg_class_t rclass ATTRIBUTE_UNUSED,
3450 bool in ATTRIBUTE_UNUSED)
3451 {
3452 return 2;
3453 }
3454
3455 /* Compute a (partial) cost for rtx X. Return true if the complete
3456 cost has been computed, and false if subexpressions should be
3457 scanned. In either case, *TOTAL contains the cost result. The
3458 initial value of *TOTAL is the default value computed by
3459 rtx_cost. It may be left unmodified. OUTER_CODE contains the
3460 code of the superexpression of x. */
3461
3462 static bool
s390_rtx_costs(rtx x,machine_mode mode,int outer_code,int opno ATTRIBUTE_UNUSED,int * total,bool speed ATTRIBUTE_UNUSED)3463 s390_rtx_costs (rtx x, machine_mode mode, int outer_code,
3464 int opno ATTRIBUTE_UNUSED,
3465 int *total, bool speed ATTRIBUTE_UNUSED)
3466 {
3467 int code = GET_CODE (x);
3468 switch (code)
3469 {
3470 case CONST:
3471 case CONST_INT:
3472 case LABEL_REF:
3473 case SYMBOL_REF:
3474 case CONST_DOUBLE:
3475 case CONST_WIDE_INT:
3476 case MEM:
3477 *total = 0;
3478 return true;
3479
3480 case SET:
3481 {
3482 /* Without this a conditional move instruction would be
3483 accounted as 3 * COSTS_N_INSNS (set, if_then_else,
3484 comparison operator). That's a bit pessimistic. */
3485
3486 if (!TARGET_Z196 || GET_CODE (SET_SRC (x)) != IF_THEN_ELSE)
3487 return false;
3488
3489 rtx cond = XEXP (SET_SRC (x), 0);
3490
3491 if (!CC_REG_P (XEXP (cond, 0)) || !CONST_INT_P (XEXP (cond, 1)))
3492 return false;
3493
3494 /* It is going to be a load/store on condition. Make it
3495 slightly more expensive than a normal load. */
3496 *total = COSTS_N_INSNS (1) + 1;
3497
3498 rtx dst = SET_DEST (x);
3499 rtx then = XEXP (SET_SRC (x), 1);
3500 rtx els = XEXP (SET_SRC (x), 2);
3501
3502 /* It is a real IF-THEN-ELSE. An additional move will be
3503 needed to implement that. */
3504 if (reload_completed
3505 && !rtx_equal_p (dst, then)
3506 && !rtx_equal_p (dst, els))
3507 *total += COSTS_N_INSNS (1) / 2;
3508
3509 /* A minor penalty for constants we cannot directly handle. */
3510 if ((CONST_INT_P (then) || CONST_INT_P (els))
3511 && (!TARGET_Z13 || MEM_P (dst)
3512 || (CONST_INT_P (then) && !satisfies_constraint_K (then))
3513 || (CONST_INT_P (els) && !satisfies_constraint_K (els))))
3514 *total += COSTS_N_INSNS (1) / 2;
3515
3516 /* A store on condition can only handle register src operands. */
3517 if (MEM_P (dst) && (!REG_P (then) || !REG_P (els)))
3518 *total += COSTS_N_INSNS (1) / 2;
3519
3520 return true;
3521 }
3522 case IOR:
3523 /* risbg */
3524 if (GET_CODE (XEXP (x, 0)) == AND
3525 && GET_CODE (XEXP (x, 1)) == ASHIFT
3526 && REG_P (XEXP (XEXP (x, 0), 0))
3527 && REG_P (XEXP (XEXP (x, 1), 0))
3528 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3529 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3530 && (UINTVAL (XEXP (XEXP (x, 0), 1)) ==
3531 (HOST_WIDE_INT_1U << UINTVAL (XEXP (XEXP (x, 1), 1))) - 1))
3532 {
3533 *total = COSTS_N_INSNS (2);
3534 return true;
3535 }
3536
3537 /* ~AND on a 128 bit mode. This can be done using a vector
3538 instruction. */
3539 if (TARGET_VXE
3540 && GET_CODE (XEXP (x, 0)) == NOT
3541 && GET_CODE (XEXP (x, 1)) == NOT
3542 && REG_P (XEXP (XEXP (x, 0), 0))
3543 && REG_P (XEXP (XEXP (x, 1), 0))
3544 && GET_MODE_SIZE (GET_MODE (XEXP (XEXP (x, 0), 0))) == 16
3545 && s390_hard_regno_mode_ok (VR0_REGNUM,
3546 GET_MODE (XEXP (XEXP (x, 0), 0))))
3547 {
3548 *total = COSTS_N_INSNS (1);
3549 return true;
3550 }
3551 /* fallthrough */
3552 case ASHIFT:
3553 case ASHIFTRT:
3554 case LSHIFTRT:
3555 case ROTATE:
3556 case ROTATERT:
3557 case AND:
3558 case XOR:
3559 case NEG:
3560 case NOT:
3561 *total = COSTS_N_INSNS (1);
3562 return false;
3563
3564 case PLUS:
3565 case MINUS:
3566 *total = COSTS_N_INSNS (1);
3567 return false;
3568
3569 case MULT:
3570 switch (mode)
3571 {
3572 case E_SImode:
3573 {
3574 rtx left = XEXP (x, 0);
3575 rtx right = XEXP (x, 1);
3576 if (GET_CODE (right) == CONST_INT
3577 && CONST_OK_FOR_K (INTVAL (right)))
3578 *total = s390_cost->mhi;
3579 else if (GET_CODE (left) == SIGN_EXTEND)
3580 *total = s390_cost->mh;
3581 else
3582 *total = s390_cost->ms; /* msr, ms, msy */
3583 break;
3584 }
3585 case E_DImode:
3586 {
3587 rtx left = XEXP (x, 0);
3588 rtx right = XEXP (x, 1);
3589 if (TARGET_ZARCH)
3590 {
3591 if (GET_CODE (right) == CONST_INT
3592 && CONST_OK_FOR_K (INTVAL (right)))
3593 *total = s390_cost->mghi;
3594 else if (GET_CODE (left) == SIGN_EXTEND)
3595 *total = s390_cost->msgf;
3596 else
3597 *total = s390_cost->msg; /* msgr, msg */
3598 }
3599 else /* TARGET_31BIT */
3600 {
3601 if (GET_CODE (left) == SIGN_EXTEND
3602 && GET_CODE (right) == SIGN_EXTEND)
3603 /* mulsidi case: mr, m */
3604 *total = s390_cost->m;
3605 else if (GET_CODE (left) == ZERO_EXTEND
3606 && GET_CODE (right) == ZERO_EXTEND
3607 && TARGET_CPU_ZARCH)
3608 /* umulsidi case: ml, mlr */
3609 *total = s390_cost->ml;
3610 else
3611 /* Complex calculation is required. */
3612 *total = COSTS_N_INSNS (40);
3613 }
3614 break;
3615 }
3616 case E_SFmode:
3617 case E_DFmode:
3618 *total = s390_cost->mult_df;
3619 break;
3620 case E_TFmode:
3621 *total = s390_cost->mxbr;
3622 break;
3623 default:
3624 return false;
3625 }
3626 return false;
3627
3628 case FMA:
3629 switch (mode)
3630 {
3631 case E_DFmode:
3632 *total = s390_cost->madbr;
3633 break;
3634 case E_SFmode:
3635 *total = s390_cost->maebr;
3636 break;
3637 default:
3638 return false;
3639 }
3640 /* Negate in the third argument is free: FMSUB. */
3641 if (GET_CODE (XEXP (x, 2)) == NEG)
3642 {
3643 *total += (rtx_cost (XEXP (x, 0), mode, FMA, 0, speed)
3644 + rtx_cost (XEXP (x, 1), mode, FMA, 1, speed)
3645 + rtx_cost (XEXP (XEXP (x, 2), 0), mode, FMA, 2, speed));
3646 return true;
3647 }
3648 return false;
3649
3650 case UDIV:
3651 case UMOD:
3652 if (mode == TImode) /* 128 bit division */
3653 *total = s390_cost->dlgr;
3654 else if (mode == DImode)
3655 {
3656 rtx right = XEXP (x, 1);
3657 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3658 *total = s390_cost->dlr;
3659 else /* 64 by 64 bit division */
3660 *total = s390_cost->dlgr;
3661 }
3662 else if (mode == SImode) /* 32 bit division */
3663 *total = s390_cost->dlr;
3664 return false;
3665
3666 case DIV:
3667 case MOD:
3668 if (mode == DImode)
3669 {
3670 rtx right = XEXP (x, 1);
3671 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3672 if (TARGET_ZARCH)
3673 *total = s390_cost->dsgfr;
3674 else
3675 *total = s390_cost->dr;
3676 else /* 64 by 64 bit division */
3677 *total = s390_cost->dsgr;
3678 }
3679 else if (mode == SImode) /* 32 bit division */
3680 *total = s390_cost->dlr;
3681 else if (mode == SFmode)
3682 {
3683 *total = s390_cost->debr;
3684 }
3685 else if (mode == DFmode)
3686 {
3687 *total = s390_cost->ddbr;
3688 }
3689 else if (mode == TFmode)
3690 {
3691 *total = s390_cost->dxbr;
3692 }
3693 return false;
3694
3695 case SQRT:
3696 if (mode == SFmode)
3697 *total = s390_cost->sqebr;
3698 else if (mode == DFmode)
3699 *total = s390_cost->sqdbr;
3700 else /* TFmode */
3701 *total = s390_cost->sqxbr;
3702 return false;
3703
3704 case SIGN_EXTEND:
3705 case ZERO_EXTEND:
3706 if (outer_code == MULT || outer_code == DIV || outer_code == MOD
3707 || outer_code == PLUS || outer_code == MINUS
3708 || outer_code == COMPARE)
3709 *total = 0;
3710 return false;
3711
3712 case COMPARE:
3713 *total = COSTS_N_INSNS (1);
3714 if (GET_CODE (XEXP (x, 0)) == AND
3715 && GET_CODE (XEXP (x, 1)) == CONST_INT
3716 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
3717 {
3718 rtx op0 = XEXP (XEXP (x, 0), 0);
3719 rtx op1 = XEXP (XEXP (x, 0), 1);
3720 rtx op2 = XEXP (x, 1);
3721
3722 if (memory_operand (op0, GET_MODE (op0))
3723 && s390_tm_ccmode (op1, op2, 0) != VOIDmode)
3724 return true;
3725 if (register_operand (op0, GET_MODE (op0))
3726 && s390_tm_ccmode (op1, op2, 1) != VOIDmode)
3727 return true;
3728 }
3729 return false;
3730
3731 default:
3732 return false;
3733 }
3734 }
3735
3736 /* Return the cost of an address rtx ADDR. */
3737
3738 static int
s390_address_cost(rtx addr,machine_mode mode ATTRIBUTE_UNUSED,addr_space_t as ATTRIBUTE_UNUSED,bool speed ATTRIBUTE_UNUSED)3739 s390_address_cost (rtx addr, machine_mode mode ATTRIBUTE_UNUSED,
3740 addr_space_t as ATTRIBUTE_UNUSED,
3741 bool speed ATTRIBUTE_UNUSED)
3742 {
3743 struct s390_address ad;
3744 if (!s390_decompose_address (addr, &ad))
3745 return 1000;
3746
3747 return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
3748 }
3749
3750 /* Implement targetm.vectorize.builtin_vectorization_cost. */
3751 static int
s390_builtin_vectorization_cost(enum vect_cost_for_stmt type_of_cost,tree vectype,int misalign ATTRIBUTE_UNUSED)3752 s390_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
3753 tree vectype,
3754 int misalign ATTRIBUTE_UNUSED)
3755 {
3756 switch (type_of_cost)
3757 {
3758 case scalar_stmt:
3759 case scalar_load:
3760 case scalar_store:
3761 case vector_stmt:
3762 case vector_load:
3763 case vector_store:
3764 case vector_gather_load:
3765 case vector_scatter_store:
3766 case vec_to_scalar:
3767 case scalar_to_vec:
3768 case cond_branch_not_taken:
3769 case vec_perm:
3770 case vec_promote_demote:
3771 case unaligned_load:
3772 case unaligned_store:
3773 return 1;
3774
3775 case cond_branch_taken:
3776 return 3;
3777
3778 case vec_construct:
3779 return TYPE_VECTOR_SUBPARTS (vectype) - 1;
3780
3781 default:
3782 gcc_unreachable ();
3783 }
3784 }
3785
3786 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
3787 otherwise return 0. */
3788
3789 int
tls_symbolic_operand(rtx op)3790 tls_symbolic_operand (rtx op)
3791 {
3792 if (GET_CODE (op) != SYMBOL_REF)
3793 return 0;
3794 return SYMBOL_REF_TLS_MODEL (op);
3795 }
3796
3797 /* Split DImode access register reference REG (on 64-bit) into its constituent
3798 low and high parts, and store them into LO and HI. Note that gen_lowpart/
3799 gen_highpart cannot be used as they assume all registers are word-sized,
3800 while our access registers have only half that size. */
3801
3802 void
s390_split_access_reg(rtx reg,rtx * lo,rtx * hi)3803 s390_split_access_reg (rtx reg, rtx *lo, rtx *hi)
3804 {
3805 gcc_assert (TARGET_64BIT);
3806 gcc_assert (ACCESS_REG_P (reg));
3807 gcc_assert (GET_MODE (reg) == DImode);
3808 gcc_assert (!(REGNO (reg) & 1));
3809
3810 *lo = gen_rtx_REG (SImode, REGNO (reg) + 1);
3811 *hi = gen_rtx_REG (SImode, REGNO (reg));
3812 }
3813
3814 /* Return true if OP contains a symbol reference */
3815
3816 bool
symbolic_reference_mentioned_p(rtx op)3817 symbolic_reference_mentioned_p (rtx op)
3818 {
3819 const char *fmt;
3820 int i;
3821
3822 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3823 return 1;
3824
3825 fmt = GET_RTX_FORMAT (GET_CODE (op));
3826 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3827 {
3828 if (fmt[i] == 'E')
3829 {
3830 int j;
3831
3832 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3833 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3834 return 1;
3835 }
3836
3837 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3838 return 1;
3839 }
3840
3841 return 0;
3842 }
3843
3844 /* Return true if OP contains a reference to a thread-local symbol. */
3845
3846 bool
tls_symbolic_reference_mentioned_p(rtx op)3847 tls_symbolic_reference_mentioned_p (rtx op)
3848 {
3849 const char *fmt;
3850 int i;
3851
3852 if (GET_CODE (op) == SYMBOL_REF)
3853 return tls_symbolic_operand (op);
3854
3855 fmt = GET_RTX_FORMAT (GET_CODE (op));
3856 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3857 {
3858 if (fmt[i] == 'E')
3859 {
3860 int j;
3861
3862 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3863 if (tls_symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3864 return true;
3865 }
3866
3867 else if (fmt[i] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op, i)))
3868 return true;
3869 }
3870
3871 return false;
3872 }
3873
3874
3875 /* Return true if OP is a legitimate general operand when
3876 generating PIC code. It is given that flag_pic is on
3877 and that OP satisfies CONSTANT_P. */
3878
3879 int
legitimate_pic_operand_p(rtx op)3880 legitimate_pic_operand_p (rtx op)
3881 {
3882 /* Accept all non-symbolic constants. */
3883 if (!SYMBOLIC_CONST (op))
3884 return 1;
3885
3886 /* Reject everything else; must be handled
3887 via emit_symbolic_move. */
3888 return 0;
3889 }
3890
3891 /* Returns true if the constant value OP is a legitimate general operand.
3892 It is given that OP satisfies CONSTANT_P. */
3893
3894 static bool
s390_legitimate_constant_p(machine_mode mode,rtx op)3895 s390_legitimate_constant_p (machine_mode mode, rtx op)
3896 {
3897 if (TARGET_VX && VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR)
3898 {
3899 if (GET_MODE_SIZE (mode) != 16)
3900 return 0;
3901
3902 if (!satisfies_constraint_j00 (op)
3903 && !satisfies_constraint_jm1 (op)
3904 && !satisfies_constraint_jKK (op)
3905 && !satisfies_constraint_jxx (op)
3906 && !satisfies_constraint_jyy (op))
3907 return 0;
3908 }
3909
3910 /* Accept all non-symbolic constants. */
3911 if (!SYMBOLIC_CONST (op))
3912 return 1;
3913
3914 /* Accept immediate LARL operands. */
3915 if (TARGET_CPU_ZARCH && larl_operand (op, mode))
3916 return 1;
3917
3918 /* Thread-local symbols are never legal constants. This is
3919 so that emit_call knows that computing such addresses
3920 might require a function call. */
3921 if (TLS_SYMBOLIC_CONST (op))
3922 return 0;
3923
3924 /* In the PIC case, symbolic constants must *not* be
3925 forced into the literal pool. We accept them here,
3926 so that they will be handled by emit_symbolic_move. */
3927 if (flag_pic)
3928 return 1;
3929
3930 /* All remaining non-PIC symbolic constants are
3931 forced into the literal pool. */
3932 return 0;
3933 }
3934
3935 /* Determine if it's legal to put X into the constant pool. This
3936 is not possible if X contains the address of a symbol that is
3937 not constant (TLS) or not known at final link time (PIC). */
3938
3939 static bool
s390_cannot_force_const_mem(machine_mode mode,rtx x)3940 s390_cannot_force_const_mem (machine_mode mode, rtx x)
3941 {
3942 switch (GET_CODE (x))
3943 {
3944 case CONST_INT:
3945 case CONST_DOUBLE:
3946 case CONST_WIDE_INT:
3947 case CONST_VECTOR:
3948 /* Accept all non-symbolic constants. */
3949 return false;
3950
3951 case LABEL_REF:
3952 /* Labels are OK iff we are non-PIC. */
3953 return flag_pic != 0;
3954
3955 case SYMBOL_REF:
3956 /* 'Naked' TLS symbol references are never OK,
3957 non-TLS symbols are OK iff we are non-PIC. */
3958 if (tls_symbolic_operand (x))
3959 return true;
3960 else
3961 return flag_pic != 0;
3962
3963 case CONST:
3964 return s390_cannot_force_const_mem (mode, XEXP (x, 0));
3965 case PLUS:
3966 case MINUS:
3967 return s390_cannot_force_const_mem (mode, XEXP (x, 0))
3968 || s390_cannot_force_const_mem (mode, XEXP (x, 1));
3969
3970 case UNSPEC:
3971 switch (XINT (x, 1))
3972 {
3973 /* Only lt-relative or GOT-relative UNSPECs are OK. */
3974 case UNSPEC_LTREL_OFFSET:
3975 case UNSPEC_GOT:
3976 case UNSPEC_GOTOFF:
3977 case UNSPEC_PLTOFF:
3978 case UNSPEC_TLSGD:
3979 case UNSPEC_TLSLDM:
3980 case UNSPEC_NTPOFF:
3981 case UNSPEC_DTPOFF:
3982 case UNSPEC_GOTNTPOFF:
3983 case UNSPEC_INDNTPOFF:
3984 return false;
3985
3986 /* If the literal pool shares the code section, be put
3987 execute template placeholders into the pool as well. */
3988 case UNSPEC_INSN:
3989 return TARGET_CPU_ZARCH;
3990
3991 default:
3992 return true;
3993 }
3994 break;
3995
3996 default:
3997 gcc_unreachable ();
3998 }
3999 }
4000
4001 /* Returns true if the constant value OP is a legitimate general
4002 operand during and after reload. The difference to
4003 legitimate_constant_p is that this function will not accept
4004 a constant that would need to be forced to the literal pool
4005 before it can be used as operand.
4006 This function accepts all constants which can be loaded directly
4007 into a GPR. */
4008
4009 bool
legitimate_reload_constant_p(rtx op)4010 legitimate_reload_constant_p (rtx op)
4011 {
4012 /* Accept la(y) operands. */
4013 if (GET_CODE (op) == CONST_INT
4014 && DISP_IN_RANGE (INTVAL (op)))
4015 return true;
4016
4017 /* Accept l(g)hi/l(g)fi operands. */
4018 if (GET_CODE (op) == CONST_INT
4019 && (CONST_OK_FOR_K (INTVAL (op)) || CONST_OK_FOR_Os (INTVAL (op))))
4020 return true;
4021
4022 /* Accept lliXX operands. */
4023 if (TARGET_ZARCH
4024 && GET_CODE (op) == CONST_INT
4025 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
4026 && s390_single_part (op, word_mode, HImode, 0) >= 0)
4027 return true;
4028
4029 if (TARGET_EXTIMM
4030 && GET_CODE (op) == CONST_INT
4031 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
4032 && s390_single_part (op, word_mode, SImode, 0) >= 0)
4033 return true;
4034
4035 /* Accept larl operands. */
4036 if (TARGET_CPU_ZARCH
4037 && larl_operand (op, VOIDmode))
4038 return true;
4039
4040 /* Accept floating-point zero operands that fit into a single GPR. */
4041 if (GET_CODE (op) == CONST_DOUBLE
4042 && s390_float_const_zero_p (op)
4043 && GET_MODE_SIZE (GET_MODE (op)) <= UNITS_PER_WORD)
4044 return true;
4045
4046 /* Accept double-word operands that can be split. */
4047 if (GET_CODE (op) == CONST_WIDE_INT
4048 || (GET_CODE (op) == CONST_INT
4049 && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op)))
4050 {
4051 machine_mode dword_mode = word_mode == SImode ? DImode : TImode;
4052 rtx hi = operand_subword (op, 0, 0, dword_mode);
4053 rtx lo = operand_subword (op, 1, 0, dword_mode);
4054 return legitimate_reload_constant_p (hi)
4055 && legitimate_reload_constant_p (lo);
4056 }
4057
4058 /* Everything else cannot be handled without reload. */
4059 return false;
4060 }
4061
4062 /* Returns true if the constant value OP is a legitimate fp operand
4063 during and after reload.
4064 This function accepts all constants which can be loaded directly
4065 into an FPR. */
4066
4067 static bool
legitimate_reload_fp_constant_p(rtx op)4068 legitimate_reload_fp_constant_p (rtx op)
4069 {
4070 /* Accept floating-point zero operands if the load zero instruction
4071 can be used. Prior to z196 the load fp zero instruction caused a
4072 performance penalty if the result is used as BFP number. */
4073 if (TARGET_Z196
4074 && GET_CODE (op) == CONST_DOUBLE
4075 && s390_float_const_zero_p (op))
4076 return true;
4077
4078 return false;
4079 }
4080
4081 /* Returns true if the constant value OP is a legitimate vector operand
4082 during and after reload.
4083 This function accepts all constants which can be loaded directly
4084 into an VR. */
4085
4086 static bool
legitimate_reload_vector_constant_p(rtx op)4087 legitimate_reload_vector_constant_p (rtx op)
4088 {
4089 if (TARGET_VX && GET_MODE_SIZE (GET_MODE (op)) == 16
4090 && (satisfies_constraint_j00 (op)
4091 || satisfies_constraint_jm1 (op)
4092 || satisfies_constraint_jKK (op)
4093 || satisfies_constraint_jxx (op)
4094 || satisfies_constraint_jyy (op)))
4095 return true;
4096
4097 return false;
4098 }
4099
4100 /* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
4101 return the class of reg to actually use. */
4102
4103 static reg_class_t
s390_preferred_reload_class(rtx op,reg_class_t rclass)4104 s390_preferred_reload_class (rtx op, reg_class_t rclass)
4105 {
4106 switch (GET_CODE (op))
4107 {
4108 /* Constants we cannot reload into general registers
4109 must be forced into the literal pool. */
4110 case CONST_VECTOR:
4111 case CONST_DOUBLE:
4112 case CONST_INT:
4113 case CONST_WIDE_INT:
4114 if (reg_class_subset_p (GENERAL_REGS, rclass)
4115 && legitimate_reload_constant_p (op))
4116 return GENERAL_REGS;
4117 else if (reg_class_subset_p (ADDR_REGS, rclass)
4118 && legitimate_reload_constant_p (op))
4119 return ADDR_REGS;
4120 else if (reg_class_subset_p (FP_REGS, rclass)
4121 && legitimate_reload_fp_constant_p (op))
4122 return FP_REGS;
4123 else if (reg_class_subset_p (VEC_REGS, rclass)
4124 && legitimate_reload_vector_constant_p (op))
4125 return VEC_REGS;
4126
4127 return NO_REGS;
4128
4129 /* If a symbolic constant or a PLUS is reloaded,
4130 it is most likely being used as an address, so
4131 prefer ADDR_REGS. If 'class' is not a superset
4132 of ADDR_REGS, e.g. FP_REGS, reject this reload. */
4133 case CONST:
4134 /* Symrefs cannot be pushed into the literal pool with -fPIC
4135 so we *MUST NOT* return NO_REGS for these cases
4136 (s390_cannot_force_const_mem will return true).
4137
4138 On the other hand we MUST return NO_REGS for symrefs with
4139 invalid addend which might have been pushed to the literal
4140 pool (no -fPIC). Usually we would expect them to be
4141 handled via secondary reload but this does not happen if
4142 they are used as literal pool slot replacement in reload
4143 inheritance (see emit_input_reload_insns). */
4144 if (TARGET_CPU_ZARCH
4145 && GET_CODE (XEXP (op, 0)) == PLUS
4146 && GET_CODE (XEXP (XEXP(op, 0), 0)) == SYMBOL_REF
4147 && GET_CODE (XEXP (XEXP(op, 0), 1)) == CONST_INT)
4148 {
4149 if (flag_pic && reg_class_subset_p (ADDR_REGS, rclass))
4150 return ADDR_REGS;
4151 else
4152 return NO_REGS;
4153 }
4154 /* fallthrough */
4155 case LABEL_REF:
4156 case SYMBOL_REF:
4157 if (!legitimate_reload_constant_p (op))
4158 return NO_REGS;
4159 /* fallthrough */
4160 case PLUS:
4161 /* load address will be used. */
4162 if (reg_class_subset_p (ADDR_REGS, rclass))
4163 return ADDR_REGS;
4164 else
4165 return NO_REGS;
4166
4167 default:
4168 break;
4169 }
4170
4171 return rclass;
4172 }
4173
4174 /* Return true if ADDR is SYMBOL_REF + addend with addend being a
4175 multiple of ALIGNMENT and the SYMBOL_REF being naturally
4176 aligned. */
4177
4178 bool
s390_check_symref_alignment(rtx addr,HOST_WIDE_INT alignment)4179 s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment)
4180 {
4181 HOST_WIDE_INT addend;
4182 rtx symref;
4183
4184 /* The "required alignment" might be 0 (e.g. for certain structs
4185 accessed via BLKmode). Early abort in this case, as well as when
4186 an alignment > 8 is required. */
4187 if (alignment < 2 || alignment > 8)
4188 return false;
4189
4190 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4191 return false;
4192
4193 if (addend & (alignment - 1))
4194 return false;
4195
4196 if (GET_CODE (symref) == SYMBOL_REF)
4197 {
4198 /* We have load-relative instructions for 2-byte, 4-byte, and
4199 8-byte alignment so allow only these. */
4200 switch (alignment)
4201 {
4202 case 8: return !SYMBOL_FLAG_NOTALIGN8_P (symref);
4203 case 4: return !SYMBOL_FLAG_NOTALIGN4_P (symref);
4204 case 2: return !SYMBOL_FLAG_NOTALIGN2_P (symref);
4205 default: return false;
4206 }
4207 }
4208
4209 if (GET_CODE (symref) == UNSPEC
4210 && alignment <= UNITS_PER_LONG)
4211 return true;
4212
4213 return false;
4214 }
4215
4216 /* ADDR is moved into REG using larl. If ADDR isn't a valid larl
4217 operand SCRATCH is used to reload the even part of the address and
4218 adding one. */
4219
4220 void
s390_reload_larl_operand(rtx reg,rtx addr,rtx scratch)4221 s390_reload_larl_operand (rtx reg, rtx addr, rtx scratch)
4222 {
4223 HOST_WIDE_INT addend;
4224 rtx symref;
4225
4226 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4227 gcc_unreachable ();
4228
4229 if (!(addend & 1))
4230 /* Easy case. The addend is even so larl will do fine. */
4231 emit_move_insn (reg, addr);
4232 else
4233 {
4234 /* We can leave the scratch register untouched if the target
4235 register is a valid base register. */
4236 if (REGNO (reg) < FIRST_PSEUDO_REGISTER
4237 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS)
4238 scratch = reg;
4239
4240 gcc_assert (REGNO (scratch) < FIRST_PSEUDO_REGISTER);
4241 gcc_assert (REGNO_REG_CLASS (REGNO (scratch)) == ADDR_REGS);
4242
4243 if (addend != 1)
4244 emit_move_insn (scratch,
4245 gen_rtx_CONST (Pmode,
4246 gen_rtx_PLUS (Pmode, symref,
4247 GEN_INT (addend - 1))));
4248 else
4249 emit_move_insn (scratch, symref);
4250
4251 /* Increment the address using la in order to avoid clobbering cc. */
4252 s390_load_address (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx));
4253 }
4254 }
4255
4256 /* Generate what is necessary to move between REG and MEM using
4257 SCRATCH. The direction is given by TOMEM. */
4258
4259 void
s390_reload_symref_address(rtx reg,rtx mem,rtx scratch,bool tomem)4260 s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem)
4261 {
4262 /* Reload might have pulled a constant out of the literal pool.
4263 Force it back in. */
4264 if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
4265 || GET_CODE (mem) == CONST_WIDE_INT
4266 || GET_CODE (mem) == CONST_VECTOR
4267 || GET_CODE (mem) == CONST)
4268 mem = force_const_mem (GET_MODE (reg), mem);
4269
4270 gcc_assert (MEM_P (mem));
4271
4272 /* For a load from memory we can leave the scratch register
4273 untouched if the target register is a valid base register. */
4274 if (!tomem
4275 && REGNO (reg) < FIRST_PSEUDO_REGISTER
4276 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS
4277 && GET_MODE (reg) == GET_MODE (scratch))
4278 scratch = reg;
4279
4280 /* Load address into scratch register. Since we can't have a
4281 secondary reload for a secondary reload we have to cover the case
4282 where larl would need a secondary reload here as well. */
4283 s390_reload_larl_operand (scratch, XEXP (mem, 0), scratch);
4284
4285 /* Now we can use a standard load/store to do the move. */
4286 if (tomem)
4287 emit_move_insn (replace_equiv_address (mem, scratch), reg);
4288 else
4289 emit_move_insn (reg, replace_equiv_address (mem, scratch));
4290 }
4291
4292 /* Inform reload about cases where moving X with a mode MODE to a register in
4293 RCLASS requires an extra scratch or immediate register. Return the class
4294 needed for the immediate register. */
4295
4296 static reg_class_t
s390_secondary_reload(bool in_p,rtx x,reg_class_t rclass_i,machine_mode mode,secondary_reload_info * sri)4297 s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
4298 machine_mode mode, secondary_reload_info *sri)
4299 {
4300 enum reg_class rclass = (enum reg_class) rclass_i;
4301
4302 /* Intermediate register needed. */
4303 if (reg_classes_intersect_p (CC_REGS, rclass))
4304 return GENERAL_REGS;
4305
4306 if (TARGET_VX)
4307 {
4308 /* The vst/vl vector move instructions allow only for short
4309 displacements. */
4310 if (MEM_P (x)
4311 && GET_CODE (XEXP (x, 0)) == PLUS
4312 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4313 && !SHORT_DISP_IN_RANGE(INTVAL (XEXP (XEXP (x, 0), 1)))
4314 && reg_class_subset_p (rclass, VEC_REGS)
4315 && (!reg_class_subset_p (rclass, FP_REGS)
4316 || (GET_MODE_SIZE (mode) > 8
4317 && s390_class_max_nregs (FP_REGS, mode) == 1)))
4318 {
4319 if (in_p)
4320 sri->icode = (TARGET_64BIT ?
4321 CODE_FOR_reloaddi_la_in :
4322 CODE_FOR_reloadsi_la_in);
4323 else
4324 sri->icode = (TARGET_64BIT ?
4325 CODE_FOR_reloaddi_la_out :
4326 CODE_FOR_reloadsi_la_out);
4327 }
4328 }
4329
4330 if (TARGET_Z10)
4331 {
4332 HOST_WIDE_INT offset;
4333 rtx symref;
4334
4335 /* On z10 several optimizer steps may generate larl operands with
4336 an odd addend. */
4337 if (in_p
4338 && s390_loadrelative_operand_p (x, &symref, &offset)
4339 && mode == Pmode
4340 && !SYMBOL_FLAG_NOTALIGN2_P (symref)
4341 && (offset & 1) == 1)
4342 sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10
4343 : CODE_FOR_reloadsi_larl_odd_addend_z10);
4344
4345 /* Handle all the (mem (symref)) accesses we cannot use the z10
4346 instructions for. */
4347 if (MEM_P (x)
4348 && s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL)
4349 && (mode == QImode
4350 || !reg_class_subset_p (rclass, GENERAL_REGS)
4351 || GET_MODE_SIZE (mode) > UNITS_PER_WORD
4352 || !s390_check_symref_alignment (XEXP (x, 0),
4353 GET_MODE_SIZE (mode))))
4354 {
4355 #define __SECONDARY_RELOAD_CASE(M,m) \
4356 case E_##M##mode: \
4357 if (TARGET_64BIT) \
4358 sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 : \
4359 CODE_FOR_reload##m##di_tomem_z10; \
4360 else \
4361 sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 : \
4362 CODE_FOR_reload##m##si_tomem_z10; \
4363 break;
4364
4365 switch (GET_MODE (x))
4366 {
4367 __SECONDARY_RELOAD_CASE (QI, qi);
4368 __SECONDARY_RELOAD_CASE (HI, hi);
4369 __SECONDARY_RELOAD_CASE (SI, si);
4370 __SECONDARY_RELOAD_CASE (DI, di);
4371 __SECONDARY_RELOAD_CASE (TI, ti);
4372 __SECONDARY_RELOAD_CASE (SF, sf);
4373 __SECONDARY_RELOAD_CASE (DF, df);
4374 __SECONDARY_RELOAD_CASE (TF, tf);
4375 __SECONDARY_RELOAD_CASE (SD, sd);
4376 __SECONDARY_RELOAD_CASE (DD, dd);
4377 __SECONDARY_RELOAD_CASE (TD, td);
4378 __SECONDARY_RELOAD_CASE (V1QI, v1qi);
4379 __SECONDARY_RELOAD_CASE (V2QI, v2qi);
4380 __SECONDARY_RELOAD_CASE (V4QI, v4qi);
4381 __SECONDARY_RELOAD_CASE (V8QI, v8qi);
4382 __SECONDARY_RELOAD_CASE (V16QI, v16qi);
4383 __SECONDARY_RELOAD_CASE (V1HI, v1hi);
4384 __SECONDARY_RELOAD_CASE (V2HI, v2hi);
4385 __SECONDARY_RELOAD_CASE (V4HI, v4hi);
4386 __SECONDARY_RELOAD_CASE (V8HI, v8hi);
4387 __SECONDARY_RELOAD_CASE (V1SI, v1si);
4388 __SECONDARY_RELOAD_CASE (V2SI, v2si);
4389 __SECONDARY_RELOAD_CASE (V4SI, v4si);
4390 __SECONDARY_RELOAD_CASE (V1DI, v1di);
4391 __SECONDARY_RELOAD_CASE (V2DI, v2di);
4392 __SECONDARY_RELOAD_CASE (V1TI, v1ti);
4393 __SECONDARY_RELOAD_CASE (V1SF, v1sf);
4394 __SECONDARY_RELOAD_CASE (V2SF, v2sf);
4395 __SECONDARY_RELOAD_CASE (V4SF, v4sf);
4396 __SECONDARY_RELOAD_CASE (V1DF, v1df);
4397 __SECONDARY_RELOAD_CASE (V2DF, v2df);
4398 __SECONDARY_RELOAD_CASE (V1TF, v1tf);
4399 default:
4400 gcc_unreachable ();
4401 }
4402 #undef __SECONDARY_RELOAD_CASE
4403 }
4404 }
4405
4406 /* We need a scratch register when loading a PLUS expression which
4407 is not a legitimate operand of the LOAD ADDRESS instruction. */
4408 /* LRA can deal with transformation of plus op very well -- so we
4409 don't need to prompt LRA in this case. */
4410 if (! lra_in_progress && in_p && s390_plus_operand (x, mode))
4411 sri->icode = (TARGET_64BIT ?
4412 CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
4413
4414 /* Performing a multiword move from or to memory we have to make sure the
4415 second chunk in memory is addressable without causing a displacement
4416 overflow. If that would be the case we calculate the address in
4417 a scratch register. */
4418 if (MEM_P (x)
4419 && GET_CODE (XEXP (x, 0)) == PLUS
4420 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4421 && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1))
4422 + GET_MODE_SIZE (mode) - 1))
4423 {
4424 /* For GENERAL_REGS a displacement overflow is no problem if occurring
4425 in a s_operand address since we may fallback to lm/stm. So we only
4426 have to care about overflows in the b+i+d case. */
4427 if ((reg_classes_intersect_p (GENERAL_REGS, rclass)
4428 && s390_class_max_nregs (GENERAL_REGS, mode) > 1
4429 && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
4430 /* For FP_REGS no lm/stm is available so this check is triggered
4431 for displacement overflows in b+i+d and b+d like addresses. */
4432 || (reg_classes_intersect_p (FP_REGS, rclass)
4433 && s390_class_max_nregs (FP_REGS, mode) > 1))
4434 {
4435 if (in_p)
4436 sri->icode = (TARGET_64BIT ?
4437 CODE_FOR_reloaddi_la_in :
4438 CODE_FOR_reloadsi_la_in);
4439 else
4440 sri->icode = (TARGET_64BIT ?
4441 CODE_FOR_reloaddi_la_out :
4442 CODE_FOR_reloadsi_la_out);
4443 }
4444 }
4445
4446 /* A scratch address register is needed when a symbolic constant is
4447 copied to r0 compiling with -fPIC. In other cases the target
4448 register might be used as temporary (see legitimize_pic_address). */
4449 if (in_p && SYMBOLIC_CONST (x) && flag_pic == 2 && rclass != ADDR_REGS)
4450 sri->icode = (TARGET_64BIT ?
4451 CODE_FOR_reloaddi_PIC_addr :
4452 CODE_FOR_reloadsi_PIC_addr);
4453
4454 /* Either scratch or no register needed. */
4455 return NO_REGS;
4456 }
4457
4458 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
4459
4460 We need secondary memory to move data between GPRs and FPRs.
4461
4462 - With DFP the ldgr lgdr instructions are available. Due to the
4463 different alignment we cannot use them for SFmode. For 31 bit a
4464 64 bit value in GPR would be a register pair so here we still
4465 need to go via memory.
4466
4467 - With z13 we can do the SF/SImode moves with vlgvf. Due to the
4468 overlapping of FPRs and VRs we still disallow TF/TD modes to be
4469 in full VRs so as before also on z13 we do these moves via
4470 memory.
4471
4472 FIXME: Should we try splitting it into two vlgvg's/vlvg's instead? */
4473
4474 static bool
s390_secondary_memory_needed(machine_mode mode,reg_class_t class1,reg_class_t class2)4475 s390_secondary_memory_needed (machine_mode mode,
4476 reg_class_t class1, reg_class_t class2)
4477 {
4478 return (((reg_classes_intersect_p (class1, VEC_REGS)
4479 && reg_classes_intersect_p (class2, GENERAL_REGS))
4480 || (reg_classes_intersect_p (class1, GENERAL_REGS)
4481 && reg_classes_intersect_p (class2, VEC_REGS)))
4482 && (!TARGET_DFP || !TARGET_64BIT || GET_MODE_SIZE (mode) != 8)
4483 && (!TARGET_VX || (SCALAR_FLOAT_MODE_P (mode)
4484 && GET_MODE_SIZE (mode) > 8)));
4485 }
4486
4487 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
4488
4489 get_secondary_mem widens its argument to BITS_PER_WORD which loses on 64bit
4490 because the movsi and movsf patterns don't handle r/f moves. */
4491
4492 static machine_mode
s390_secondary_memory_needed_mode(machine_mode mode)4493 s390_secondary_memory_needed_mode (machine_mode mode)
4494 {
4495 if (GET_MODE_BITSIZE (mode) < 32)
4496 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
4497 return mode;
4498 }
4499
4500 /* Generate code to load SRC, which is PLUS that is not a
4501 legitimate operand for the LA instruction, into TARGET.
4502 SCRATCH may be used as scratch register. */
4503
4504 void
s390_expand_plus_operand(rtx target,rtx src,rtx scratch)4505 s390_expand_plus_operand (rtx target, rtx src,
4506 rtx scratch)
4507 {
4508 rtx sum1, sum2;
4509 struct s390_address ad;
4510
4511 /* src must be a PLUS; get its two operands. */
4512 gcc_assert (GET_CODE (src) == PLUS);
4513 gcc_assert (GET_MODE (src) == Pmode);
4514
4515 /* Check if any of the two operands is already scheduled
4516 for replacement by reload. This can happen e.g. when
4517 float registers occur in an address. */
4518 sum1 = find_replacement (&XEXP (src, 0));
4519 sum2 = find_replacement (&XEXP (src, 1));
4520 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4521
4522 /* If the address is already strictly valid, there's nothing to do. */
4523 if (!s390_decompose_address (src, &ad)
4524 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4525 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
4526 {
4527 /* Otherwise, one of the operands cannot be an address register;
4528 we reload its value into the scratch register. */
4529 if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15)
4530 {
4531 emit_move_insn (scratch, sum1);
4532 sum1 = scratch;
4533 }
4534 if (true_regnum (sum2) < 1 || true_regnum (sum2) > 15)
4535 {
4536 emit_move_insn (scratch, sum2);
4537 sum2 = scratch;
4538 }
4539
4540 /* According to the way these invalid addresses are generated
4541 in reload.c, it should never happen (at least on s390) that
4542 *neither* of the PLUS components, after find_replacements
4543 was applied, is an address register. */
4544 if (sum1 == scratch && sum2 == scratch)
4545 {
4546 debug_rtx (src);
4547 gcc_unreachable ();
4548 }
4549
4550 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4551 }
4552
4553 /* Emit the LOAD ADDRESS pattern. Note that reload of PLUS
4554 is only ever performed on addresses, so we can mark the
4555 sum as legitimate for LA in any case. */
4556 s390_load_address (target, src);
4557 }
4558
4559
4560 /* Return true if ADDR is a valid memory address.
4561 STRICT specifies whether strict register checking applies. */
4562
4563 static bool
s390_legitimate_address_p(machine_mode mode,rtx addr,bool strict)4564 s390_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4565 {
4566 struct s390_address ad;
4567
4568 if (TARGET_Z10
4569 && larl_operand (addr, VOIDmode)
4570 && (mode == VOIDmode
4571 || s390_check_symref_alignment (addr, GET_MODE_SIZE (mode))))
4572 return true;
4573
4574 if (!s390_decompose_address (addr, &ad))
4575 return false;
4576
4577 if (strict)
4578 {
4579 if (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4580 return false;
4581
4582 if (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))
4583 return false;
4584 }
4585 else
4586 {
4587 if (ad.base
4588 && !(REGNO (ad.base) >= FIRST_PSEUDO_REGISTER
4589 || REGNO_REG_CLASS (REGNO (ad.base)) == ADDR_REGS))
4590 return false;
4591
4592 if (ad.indx
4593 && !(REGNO (ad.indx) >= FIRST_PSEUDO_REGISTER
4594 || REGNO_REG_CLASS (REGNO (ad.indx)) == ADDR_REGS))
4595 return false;
4596 }
4597 return true;
4598 }
4599
4600 /* Return true if OP is a valid operand for the LA instruction.
4601 In 31-bit, we need to prove that the result is used as an
4602 address, as LA performs only a 31-bit addition. */
4603
4604 bool
legitimate_la_operand_p(rtx op)4605 legitimate_la_operand_p (rtx op)
4606 {
4607 struct s390_address addr;
4608 if (!s390_decompose_address (op, &addr))
4609 return false;
4610
4611 return (TARGET_64BIT || addr.pointer);
4612 }
4613
4614 /* Return true if it is valid *and* preferable to use LA to
4615 compute the sum of OP1 and OP2. */
4616
4617 bool
preferred_la_operand_p(rtx op1,rtx op2)4618 preferred_la_operand_p (rtx op1, rtx op2)
4619 {
4620 struct s390_address addr;
4621
4622 if (op2 != const0_rtx)
4623 op1 = gen_rtx_PLUS (Pmode, op1, op2);
4624
4625 if (!s390_decompose_address (op1, &addr))
4626 return false;
4627 if (addr.base && !REGNO_OK_FOR_BASE_P (REGNO (addr.base)))
4628 return false;
4629 if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx)))
4630 return false;
4631
4632 /* Avoid LA instructions with index register on z196; it is
4633 preferable to use regular add instructions when possible.
4634 Starting with zEC12 the la with index register is "uncracked"
4635 again. */
4636 if (addr.indx && s390_tune == PROCESSOR_2817_Z196)
4637 return false;
4638
4639 if (!TARGET_64BIT && !addr.pointer)
4640 return false;
4641
4642 if (addr.pointer)
4643 return true;
4644
4645 if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base))
4646 || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx)))
4647 return true;
4648
4649 return false;
4650 }
4651
4652 /* Emit a forced load-address operation to load SRC into DST.
4653 This will use the LOAD ADDRESS instruction even in situations
4654 where legitimate_la_operand_p (SRC) returns false. */
4655
4656 void
s390_load_address(rtx dst,rtx src)4657 s390_load_address (rtx dst, rtx src)
4658 {
4659 if (TARGET_64BIT)
4660 emit_move_insn (dst, src);
4661 else
4662 emit_insn (gen_force_la_31 (dst, src));
4663 }
4664
4665 /* Return true if it ok to use SYMBOL_REF in a relative address. */
4666
4667 bool
s390_rel_address_ok_p(rtx symbol_ref)4668 s390_rel_address_ok_p (rtx symbol_ref)
4669 {
4670 tree decl;
4671
4672 if (symbol_ref == s390_got_symbol () || CONSTANT_POOL_ADDRESS_P (symbol_ref))
4673 return true;
4674
4675 decl = SYMBOL_REF_DECL (symbol_ref);
4676
4677 if (!flag_pic || SYMBOL_REF_LOCAL_P (symbol_ref))
4678 return (s390_pic_data_is_text_relative
4679 || (decl
4680 && TREE_CODE (decl) == FUNCTION_DECL));
4681
4682 return false;
4683 }
4684
4685 /* Return a legitimate reference for ORIG (an address) using the
4686 register REG. If REG is 0, a new pseudo is generated.
4687
4688 There are two types of references that must be handled:
4689
4690 1. Global data references must load the address from the GOT, via
4691 the PIC reg. An insn is emitted to do this load, and the reg is
4692 returned.
4693
4694 2. Static data references, constant pool addresses, and code labels
4695 compute the address as an offset from the GOT, whose base is in
4696 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
4697 differentiate them from global data objects. The returned
4698 address is the PIC reg + an unspec constant.
4699
4700 TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
4701 reg also appears in the address. */
4702
4703 rtx
legitimize_pic_address(rtx orig,rtx reg)4704 legitimize_pic_address (rtx orig, rtx reg)
4705 {
4706 rtx addr = orig;
4707 rtx addend = const0_rtx;
4708 rtx new_rtx = orig;
4709
4710 gcc_assert (!TLS_SYMBOLIC_CONST (addr));
4711
4712 if (GET_CODE (addr) == CONST)
4713 addr = XEXP (addr, 0);
4714
4715 if (GET_CODE (addr) == PLUS)
4716 {
4717 addend = XEXP (addr, 1);
4718 addr = XEXP (addr, 0);
4719 }
4720
4721 if ((GET_CODE (addr) == LABEL_REF
4722 || (SYMBOL_REF_P (addr) && s390_rel_address_ok_p (addr))
4723 || (GET_CODE (addr) == UNSPEC &&
4724 (XINT (addr, 1) == UNSPEC_GOTENT
4725 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
4726 && GET_CODE (addend) == CONST_INT)
4727 {
4728 /* This can be locally addressed. */
4729
4730 /* larl_operand requires UNSPECs to be wrapped in a const rtx. */
4731 rtx const_addr = (GET_CODE (addr) == UNSPEC ?
4732 gen_rtx_CONST (Pmode, addr) : addr);
4733
4734 if (TARGET_CPU_ZARCH
4735 && larl_operand (const_addr, VOIDmode)
4736 && INTVAL (addend) < HOST_WIDE_INT_1 << 31
4737 && INTVAL (addend) >= -(HOST_WIDE_INT_1 << 31))
4738 {
4739 if (INTVAL (addend) & 1)
4740 {
4741 /* LARL can't handle odd offsets, so emit a pair of LARL
4742 and LA. */
4743 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4744
4745 if (!DISP_IN_RANGE (INTVAL (addend)))
4746 {
4747 HOST_WIDE_INT even = INTVAL (addend) - 1;
4748 addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (even));
4749 addr = gen_rtx_CONST (Pmode, addr);
4750 addend = const1_rtx;
4751 }
4752
4753 emit_move_insn (temp, addr);
4754 new_rtx = gen_rtx_PLUS (Pmode, temp, addend);
4755
4756 if (reg != 0)
4757 {
4758 s390_load_address (reg, new_rtx);
4759 new_rtx = reg;
4760 }
4761 }
4762 else
4763 {
4764 /* If the offset is even, we can just use LARL. This
4765 will happen automatically. */
4766 }
4767 }
4768 else
4769 {
4770 /* No larl - Access local symbols relative to the GOT. */
4771
4772 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4773
4774 if (reload_in_progress || reload_completed)
4775 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4776
4777 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
4778 if (addend != const0_rtx)
4779 addr = gen_rtx_PLUS (Pmode, addr, addend);
4780 addr = gen_rtx_CONST (Pmode, addr);
4781 addr = force_const_mem (Pmode, addr);
4782 emit_move_insn (temp, addr);
4783
4784 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4785 if (reg != 0)
4786 {
4787 s390_load_address (reg, new_rtx);
4788 new_rtx = reg;
4789 }
4790 }
4791 }
4792 else if (GET_CODE (addr) == SYMBOL_REF && addend == const0_rtx)
4793 {
4794 /* A non-local symbol reference without addend.
4795
4796 The symbol ref is wrapped into an UNSPEC to make sure the
4797 proper operand modifier (@GOT or @GOTENT) will be emitted.
4798 This will tell the linker to put the symbol into the GOT.
4799
4800 Additionally the code dereferencing the GOT slot is emitted here.
4801
4802 An addend to the symref needs to be added afterwards.
4803 legitimize_pic_address calls itself recursively to handle
4804 that case. So no need to do it here. */
4805
4806 if (reg == 0)
4807 reg = gen_reg_rtx (Pmode);
4808
4809 if (TARGET_Z10)
4810 {
4811 /* Use load relative if possible.
4812 lgrl <target>, sym@GOTENT */
4813 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4814 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4815 new_rtx = gen_const_mem (GET_MODE (reg), new_rtx);
4816
4817 emit_move_insn (reg, new_rtx);
4818 new_rtx = reg;
4819 }
4820 else if (flag_pic == 1)
4821 {
4822 /* Assume GOT offset is a valid displacement operand (< 4k
4823 or < 512k with z990). This is handled the same way in
4824 both 31- and 64-bit code (@GOT).
4825 lg <target>, sym@GOT(r12) */
4826
4827 if (reload_in_progress || reload_completed)
4828 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4829
4830 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
4831 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4832 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
4833 new_rtx = gen_const_mem (Pmode, new_rtx);
4834 emit_move_insn (reg, new_rtx);
4835 new_rtx = reg;
4836 }
4837 else if (TARGET_CPU_ZARCH)
4838 {
4839 /* If the GOT offset might be >= 4k, we determine the position
4840 of the GOT entry via a PC-relative LARL (@GOTENT).
4841 larl temp, sym@GOTENT
4842 lg <target>, 0(temp) */
4843
4844 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
4845
4846 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
4847 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
4848
4849 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4850 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4851 emit_move_insn (temp, new_rtx);
4852
4853 new_rtx = gen_const_mem (Pmode, temp);
4854 emit_move_insn (reg, new_rtx);
4855
4856 new_rtx = reg;
4857 }
4858 else
4859 {
4860 /* If the GOT offset might be >= 4k, we have to load it
4861 from the literal pool (@GOT).
4862
4863 lg temp, lit-litbase(r13)
4864 lg <target>, 0(temp)
4865 lit: .long sym@GOT */
4866
4867 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
4868
4869 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
4870 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
4871
4872 if (reload_in_progress || reload_completed)
4873 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4874
4875 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
4876 addr = gen_rtx_CONST (Pmode, addr);
4877 addr = force_const_mem (Pmode, addr);
4878 emit_move_insn (temp, addr);
4879
4880 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4881 new_rtx = gen_const_mem (Pmode, new_rtx);
4882 emit_move_insn (reg, new_rtx);
4883 new_rtx = reg;
4884 }
4885 }
4886 else if (GET_CODE (addr) == UNSPEC && GET_CODE (addend) == CONST_INT)
4887 {
4888 gcc_assert (XVECLEN (addr, 0) == 1);
4889 switch (XINT (addr, 1))
4890 {
4891 /* These address symbols (or PLT slots) relative to the GOT
4892 (not GOT slots!). In general this will exceed the
4893 displacement range so these value belong into the literal
4894 pool. */
4895 case UNSPEC_GOTOFF:
4896 case UNSPEC_PLTOFF:
4897 new_rtx = force_const_mem (Pmode, orig);
4898 break;
4899
4900 /* For -fPIC the GOT size might exceed the displacement
4901 range so make sure the value is in the literal pool. */
4902 case UNSPEC_GOT:
4903 if (flag_pic == 2)
4904 new_rtx = force_const_mem (Pmode, orig);
4905 break;
4906
4907 /* For @GOTENT larl is used. This is handled like local
4908 symbol refs. */
4909 case UNSPEC_GOTENT:
4910 gcc_unreachable ();
4911 break;
4912
4913 /* @PLT is OK as is on 64-bit, must be converted to
4914 GOT-relative @PLTOFF on 31-bit. */
4915 case UNSPEC_PLT:
4916 if (!TARGET_CPU_ZARCH)
4917 {
4918 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4919
4920 if (reload_in_progress || reload_completed)
4921 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4922
4923 addr = XVECEXP (addr, 0, 0);
4924 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
4925 UNSPEC_PLTOFF);
4926 if (addend != const0_rtx)
4927 addr = gen_rtx_PLUS (Pmode, addr, addend);
4928 addr = gen_rtx_CONST (Pmode, addr);
4929 addr = force_const_mem (Pmode, addr);
4930 emit_move_insn (temp, addr);
4931
4932 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4933 if (reg != 0)
4934 {
4935 s390_load_address (reg, new_rtx);
4936 new_rtx = reg;
4937 }
4938 }
4939 else
4940 /* On 64 bit larl can be used. This case is handled like
4941 local symbol refs. */
4942 gcc_unreachable ();
4943 break;
4944
4945 /* Everything else cannot happen. */
4946 default:
4947 gcc_unreachable ();
4948 }
4949 }
4950 else if (addend != const0_rtx)
4951 {
4952 /* Otherwise, compute the sum. */
4953
4954 rtx base = legitimize_pic_address (addr, reg);
4955 new_rtx = legitimize_pic_address (addend,
4956 base == reg ? NULL_RTX : reg);
4957 if (GET_CODE (new_rtx) == CONST_INT)
4958 new_rtx = plus_constant (Pmode, base, INTVAL (new_rtx));
4959 else
4960 {
4961 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
4962 {
4963 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
4964 new_rtx = XEXP (new_rtx, 1);
4965 }
4966 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
4967 }
4968
4969 if (GET_CODE (new_rtx) == CONST)
4970 new_rtx = XEXP (new_rtx, 0);
4971 new_rtx = force_operand (new_rtx, 0);
4972 }
4973
4974 return new_rtx;
4975 }
4976
4977 /* Load the thread pointer into a register. */
4978
4979 rtx
s390_get_thread_pointer(void)4980 s390_get_thread_pointer (void)
4981 {
4982 rtx tp = gen_reg_rtx (Pmode);
4983
4984 emit_move_insn (tp, gen_rtx_REG (Pmode, TP_REGNUM));
4985 mark_reg_pointer (tp, BITS_PER_WORD);
4986
4987 return tp;
4988 }
4989
4990 /* Emit a tls call insn. The call target is the SYMBOL_REF stored
4991 in s390_tls_symbol which always refers to __tls_get_offset.
4992 The returned offset is written to RESULT_REG and an USE rtx is
4993 generated for TLS_CALL. */
4994
4995 static GTY(()) rtx s390_tls_symbol;
4996
4997 static void
s390_emit_tls_call_insn(rtx result_reg,rtx tls_call)4998 s390_emit_tls_call_insn (rtx result_reg, rtx tls_call)
4999 {
5000 rtx insn;
5001
5002 if (!flag_pic)
5003 emit_insn (s390_load_got ());
5004
5005 if (!s390_tls_symbol)
5006 s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset");
5007
5008 insn = s390_emit_call (s390_tls_symbol, tls_call, result_reg,
5009 gen_rtx_REG (Pmode, RETURN_REGNUM));
5010
5011 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), result_reg);
5012 RTL_CONST_CALL_P (insn) = 1;
5013 }
5014
5015 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
5016 this (thread-local) address. REG may be used as temporary. */
5017
5018 static rtx
legitimize_tls_address(rtx addr,rtx reg)5019 legitimize_tls_address (rtx addr, rtx reg)
5020 {
5021 rtx new_rtx, tls_call, temp, base, r2;
5022 rtx_insn *insn;
5023
5024 if (GET_CODE (addr) == SYMBOL_REF)
5025 switch (tls_symbolic_operand (addr))
5026 {
5027 case TLS_MODEL_GLOBAL_DYNAMIC:
5028 start_sequence ();
5029 r2 = gen_rtx_REG (Pmode, 2);
5030 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD);
5031 new_rtx = gen_rtx_CONST (Pmode, tls_call);
5032 new_rtx = force_const_mem (Pmode, new_rtx);
5033 emit_move_insn (r2, new_rtx);
5034 s390_emit_tls_call_insn (r2, tls_call);
5035 insn = get_insns ();
5036 end_sequence ();
5037
5038 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
5039 temp = gen_reg_rtx (Pmode);
5040 emit_libcall_block (insn, temp, r2, new_rtx);
5041
5042 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5043 if (reg != 0)
5044 {
5045 s390_load_address (reg, new_rtx);
5046 new_rtx = reg;
5047 }
5048 break;
5049
5050 case TLS_MODEL_LOCAL_DYNAMIC:
5051 start_sequence ();
5052 r2 = gen_rtx_REG (Pmode, 2);
5053 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM);
5054 new_rtx = gen_rtx_CONST (Pmode, tls_call);
5055 new_rtx = force_const_mem (Pmode, new_rtx);
5056 emit_move_insn (r2, new_rtx);
5057 s390_emit_tls_call_insn (r2, tls_call);
5058 insn = get_insns ();
5059 end_sequence ();
5060
5061 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
5062 temp = gen_reg_rtx (Pmode);
5063 emit_libcall_block (insn, temp, r2, new_rtx);
5064
5065 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5066 base = gen_reg_rtx (Pmode);
5067 s390_load_address (base, new_rtx);
5068
5069 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF);
5070 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5071 new_rtx = force_const_mem (Pmode, new_rtx);
5072 temp = gen_reg_rtx (Pmode);
5073 emit_move_insn (temp, new_rtx);
5074
5075 new_rtx = gen_rtx_PLUS (Pmode, base, temp);
5076 if (reg != 0)
5077 {
5078 s390_load_address (reg, new_rtx);
5079 new_rtx = reg;
5080 }
5081 break;
5082
5083 case TLS_MODEL_INITIAL_EXEC:
5084 if (flag_pic == 1)
5085 {
5086 /* Assume GOT offset < 4k. This is handled the same way
5087 in both 31- and 64-bit code. */
5088
5089 if (reload_in_progress || reload_completed)
5090 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
5091
5092 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
5093 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5094 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
5095 new_rtx = gen_const_mem (Pmode, new_rtx);
5096 temp = gen_reg_rtx (Pmode);
5097 emit_move_insn (temp, new_rtx);
5098 }
5099 else if (TARGET_CPU_ZARCH)
5100 {
5101 /* If the GOT offset might be >= 4k, we determine the position
5102 of the GOT entry via a PC-relative LARL. */
5103
5104 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
5105 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5106 temp = gen_reg_rtx (Pmode);
5107 emit_move_insn (temp, new_rtx);
5108
5109 new_rtx = gen_const_mem (Pmode, temp);
5110 temp = gen_reg_rtx (Pmode);
5111 emit_move_insn (temp, new_rtx);
5112 }
5113 else if (flag_pic)
5114 {
5115 /* If the GOT offset might be >= 4k, we have to load it
5116 from the literal pool. */
5117
5118 if (reload_in_progress || reload_completed)
5119 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
5120
5121 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
5122 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5123 new_rtx = force_const_mem (Pmode, new_rtx);
5124 temp = gen_reg_rtx (Pmode);
5125 emit_move_insn (temp, new_rtx);
5126
5127 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
5128 new_rtx = gen_const_mem (Pmode, new_rtx);
5129
5130 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
5131 temp = gen_reg_rtx (Pmode);
5132 emit_insn (gen_rtx_SET (temp, new_rtx));
5133 }
5134 else
5135 {
5136 /* In position-dependent code, load the absolute address of
5137 the GOT entry from the literal pool. */
5138
5139 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
5140 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5141 new_rtx = force_const_mem (Pmode, new_rtx);
5142 temp = gen_reg_rtx (Pmode);
5143 emit_move_insn (temp, new_rtx);
5144
5145 new_rtx = temp;
5146 new_rtx = gen_const_mem (Pmode, new_rtx);
5147 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
5148 temp = gen_reg_rtx (Pmode);
5149 emit_insn (gen_rtx_SET (temp, new_rtx));
5150 }
5151
5152 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5153 if (reg != 0)
5154 {
5155 s390_load_address (reg, new_rtx);
5156 new_rtx = reg;
5157 }
5158 break;
5159
5160 case TLS_MODEL_LOCAL_EXEC:
5161 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
5162 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5163 new_rtx = force_const_mem (Pmode, new_rtx);
5164 temp = gen_reg_rtx (Pmode);
5165 emit_move_insn (temp, new_rtx);
5166
5167 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5168 if (reg != 0)
5169 {
5170 s390_load_address (reg, new_rtx);
5171 new_rtx = reg;
5172 }
5173 break;
5174
5175 default:
5176 gcc_unreachable ();
5177 }
5178
5179 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == UNSPEC)
5180 {
5181 switch (XINT (XEXP (addr, 0), 1))
5182 {
5183 case UNSPEC_INDNTPOFF:
5184 gcc_assert (TARGET_CPU_ZARCH);
5185 new_rtx = addr;
5186 break;
5187
5188 default:
5189 gcc_unreachable ();
5190 }
5191 }
5192
5193 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
5194 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
5195 {
5196 new_rtx = XEXP (XEXP (addr, 0), 0);
5197 if (GET_CODE (new_rtx) != SYMBOL_REF)
5198 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5199
5200 new_rtx = legitimize_tls_address (new_rtx, reg);
5201 new_rtx = plus_constant (Pmode, new_rtx,
5202 INTVAL (XEXP (XEXP (addr, 0), 1)));
5203 new_rtx = force_operand (new_rtx, 0);
5204 }
5205
5206 else
5207 gcc_unreachable (); /* for now ... */
5208
5209 return new_rtx;
5210 }
5211
5212 /* Emit insns making the address in operands[1] valid for a standard
5213 move to operands[0]. operands[1] is replaced by an address which
5214 should be used instead of the former RTX to emit the move
5215 pattern. */
5216
5217 void
emit_symbolic_move(rtx * operands)5218 emit_symbolic_move (rtx *operands)
5219 {
5220 rtx temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
5221
5222 if (GET_CODE (operands[0]) == MEM)
5223 operands[1] = force_reg (Pmode, operands[1]);
5224 else if (TLS_SYMBOLIC_CONST (operands[1]))
5225 operands[1] = legitimize_tls_address (operands[1], temp);
5226 else if (flag_pic)
5227 operands[1] = legitimize_pic_address (operands[1], temp);
5228 }
5229
5230 /* Try machine-dependent ways of modifying an illegitimate address X
5231 to be legitimate. If we find one, return the new, valid address.
5232
5233 OLDX is the address as it was before break_out_memory_refs was called.
5234 In some cases it is useful to look at this to decide what needs to be done.
5235
5236 MODE is the mode of the operand pointed to by X.
5237
5238 When -fpic is used, special handling is needed for symbolic references.
5239 See comments by legitimize_pic_address for details. */
5240
5241 static rtx
s390_legitimize_address(rtx x,rtx oldx ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED)5242 s390_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
5243 machine_mode mode ATTRIBUTE_UNUSED)
5244 {
5245 rtx constant_term = const0_rtx;
5246
5247 if (TLS_SYMBOLIC_CONST (x))
5248 {
5249 x = legitimize_tls_address (x, 0);
5250
5251 if (s390_legitimate_address_p (mode, x, FALSE))
5252 return x;
5253 }
5254 else if (GET_CODE (x) == PLUS
5255 && (TLS_SYMBOLIC_CONST (XEXP (x, 0))
5256 || TLS_SYMBOLIC_CONST (XEXP (x, 1))))
5257 {
5258 return x;
5259 }
5260 else if (flag_pic)
5261 {
5262 if (SYMBOLIC_CONST (x)
5263 || (GET_CODE (x) == PLUS
5264 && (SYMBOLIC_CONST (XEXP (x, 0))
5265 || SYMBOLIC_CONST (XEXP (x, 1)))))
5266 x = legitimize_pic_address (x, 0);
5267
5268 if (s390_legitimate_address_p (mode, x, FALSE))
5269 return x;
5270 }
5271
5272 x = eliminate_constant_term (x, &constant_term);
5273
5274 /* Optimize loading of large displacements by splitting them
5275 into the multiple of 4K and the rest; this allows the
5276 former to be CSE'd if possible.
5277
5278 Don't do this if the displacement is added to a register
5279 pointing into the stack frame, as the offsets will
5280 change later anyway. */
5281
5282 if (GET_CODE (constant_term) == CONST_INT
5283 && !TARGET_LONG_DISPLACEMENT
5284 && !DISP_IN_RANGE (INTVAL (constant_term))
5285 && !(REG_P (x) && REGNO_PTR_FRAME_P (REGNO (x))))
5286 {
5287 HOST_WIDE_INT lower = INTVAL (constant_term) & 0xfff;
5288 HOST_WIDE_INT upper = INTVAL (constant_term) ^ lower;
5289
5290 rtx temp = gen_reg_rtx (Pmode);
5291 rtx val = force_operand (GEN_INT (upper), temp);
5292 if (val != temp)
5293 emit_move_insn (temp, val);
5294
5295 x = gen_rtx_PLUS (Pmode, x, temp);
5296 constant_term = GEN_INT (lower);
5297 }
5298
5299 if (GET_CODE (x) == PLUS)
5300 {
5301 if (GET_CODE (XEXP (x, 0)) == REG)
5302 {
5303 rtx temp = gen_reg_rtx (Pmode);
5304 rtx val = force_operand (XEXP (x, 1), temp);
5305 if (val != temp)
5306 emit_move_insn (temp, val);
5307
5308 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), temp);
5309 }
5310
5311 else if (GET_CODE (XEXP (x, 1)) == REG)
5312 {
5313 rtx temp = gen_reg_rtx (Pmode);
5314 rtx val = force_operand (XEXP (x, 0), temp);
5315 if (val != temp)
5316 emit_move_insn (temp, val);
5317
5318 x = gen_rtx_PLUS (Pmode, temp, XEXP (x, 1));
5319 }
5320 }
5321
5322 if (constant_term != const0_rtx)
5323 x = gen_rtx_PLUS (Pmode, x, constant_term);
5324
5325 return x;
5326 }
5327
5328 /* Try a machine-dependent way of reloading an illegitimate address AD
5329 operand. If we find one, push the reload and return the new address.
5330
5331 MODE is the mode of the enclosing MEM. OPNUM is the operand number
5332 and TYPE is the reload type of the current reload. */
5333
5334 rtx
legitimize_reload_address(rtx ad,machine_mode mode ATTRIBUTE_UNUSED,int opnum,int type)5335 legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
5336 int opnum, int type)
5337 {
5338 if (!optimize || TARGET_LONG_DISPLACEMENT)
5339 return NULL_RTX;
5340
5341 if (GET_CODE (ad) == PLUS)
5342 {
5343 rtx tem = simplify_binary_operation (PLUS, Pmode,
5344 XEXP (ad, 0), XEXP (ad, 1));
5345 if (tem)
5346 ad = tem;
5347 }
5348
5349 if (GET_CODE (ad) == PLUS
5350 && GET_CODE (XEXP (ad, 0)) == REG
5351 && GET_CODE (XEXP (ad, 1)) == CONST_INT
5352 && !DISP_IN_RANGE (INTVAL (XEXP (ad, 1))))
5353 {
5354 HOST_WIDE_INT lower = INTVAL (XEXP (ad, 1)) & 0xfff;
5355 HOST_WIDE_INT upper = INTVAL (XEXP (ad, 1)) ^ lower;
5356 rtx cst, tem, new_rtx;
5357
5358 cst = GEN_INT (upper);
5359 if (!legitimate_reload_constant_p (cst))
5360 cst = force_const_mem (Pmode, cst);
5361
5362 tem = gen_rtx_PLUS (Pmode, XEXP (ad, 0), cst);
5363 new_rtx = gen_rtx_PLUS (Pmode, tem, GEN_INT (lower));
5364
5365 push_reload (XEXP (tem, 1), 0, &XEXP (tem, 1), 0,
5366 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
5367 opnum, (enum reload_type) type);
5368 return new_rtx;
5369 }
5370
5371 return NULL_RTX;
5372 }
5373
5374 /* Emit code to move LEN bytes from DST to SRC. */
5375
5376 bool
s390_expand_movmem(rtx dst,rtx src,rtx len)5377 s390_expand_movmem (rtx dst, rtx src, rtx len)
5378 {
5379 /* When tuning for z10 or higher we rely on the Glibc functions to
5380 do the right thing. Only for constant lengths below 64k we will
5381 generate inline code. */
5382 if (s390_tune >= PROCESSOR_2097_Z10
5383 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5384 return false;
5385
5386 /* Expand memcpy for constant length operands without a loop if it
5387 is shorter that way.
5388
5389 With a constant length argument a
5390 memcpy loop (without pfd) is 36 bytes -> 6 * mvc */
5391 if (GET_CODE (len) == CONST_INT
5392 && INTVAL (len) >= 0
5393 && INTVAL (len) <= 256 * 6
5394 && (!TARGET_MVCLE || INTVAL (len) <= 256))
5395 {
5396 HOST_WIDE_INT o, l;
5397
5398 for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
5399 {
5400 rtx newdst = adjust_address (dst, BLKmode, o);
5401 rtx newsrc = adjust_address (src, BLKmode, o);
5402 emit_insn (gen_movmem_short (newdst, newsrc,
5403 GEN_INT (l > 256 ? 255 : l - 1)));
5404 }
5405 }
5406
5407 else if (TARGET_MVCLE)
5408 {
5409 emit_insn (gen_movmem_long (dst, src, convert_to_mode (Pmode, len, 1)));
5410 }
5411
5412 else
5413 {
5414 rtx dst_addr, src_addr, count, blocks, temp;
5415 rtx_code_label *loop_start_label = gen_label_rtx ();
5416 rtx_code_label *loop_end_label = gen_label_rtx ();
5417 rtx_code_label *end_label = gen_label_rtx ();
5418 machine_mode mode;
5419
5420 mode = GET_MODE (len);
5421 if (mode == VOIDmode)
5422 mode = Pmode;
5423
5424 dst_addr = gen_reg_rtx (Pmode);
5425 src_addr = gen_reg_rtx (Pmode);
5426 count = gen_reg_rtx (mode);
5427 blocks = gen_reg_rtx (mode);
5428
5429 convert_move (count, len, 1);
5430 emit_cmp_and_jump_insns (count, const0_rtx,
5431 EQ, NULL_RTX, mode, 1, end_label);
5432
5433 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5434 emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX));
5435 dst = change_address (dst, VOIDmode, dst_addr);
5436 src = change_address (src, VOIDmode, src_addr);
5437
5438 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5439 OPTAB_DIRECT);
5440 if (temp != count)
5441 emit_move_insn (count, temp);
5442
5443 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5444 OPTAB_DIRECT);
5445 if (temp != blocks)
5446 emit_move_insn (blocks, temp);
5447
5448 emit_cmp_and_jump_insns (blocks, const0_rtx,
5449 EQ, NULL_RTX, mode, 1, loop_end_label);
5450
5451 emit_label (loop_start_label);
5452
5453 if (TARGET_Z10
5454 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 768))
5455 {
5456 rtx prefetch;
5457
5458 /* Issue a read prefetch for the +3 cache line. */
5459 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, src_addr, GEN_INT (768)),
5460 const0_rtx, const0_rtx);
5461 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5462 emit_insn (prefetch);
5463
5464 /* Issue a write prefetch for the +3 cache line. */
5465 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (768)),
5466 const1_rtx, const0_rtx);
5467 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5468 emit_insn (prefetch);
5469 }
5470
5471 emit_insn (gen_movmem_short (dst, src, GEN_INT (255)));
5472 s390_load_address (dst_addr,
5473 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5474 s390_load_address (src_addr,
5475 gen_rtx_PLUS (Pmode, src_addr, GEN_INT (256)));
5476
5477 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5478 OPTAB_DIRECT);
5479 if (temp != blocks)
5480 emit_move_insn (blocks, temp);
5481
5482 emit_cmp_and_jump_insns (blocks, const0_rtx,
5483 EQ, NULL_RTX, mode, 1, loop_end_label);
5484
5485 emit_jump (loop_start_label);
5486 emit_label (loop_end_label);
5487
5488 emit_insn (gen_movmem_short (dst, src,
5489 convert_to_mode (Pmode, count, 1)));
5490 emit_label (end_label);
5491 }
5492 return true;
5493 }
5494
5495 /* Emit code to set LEN bytes at DST to VAL.
5496 Make use of clrmem if VAL is zero. */
5497
5498 void
s390_expand_setmem(rtx dst,rtx len,rtx val)5499 s390_expand_setmem (rtx dst, rtx len, rtx val)
5500 {
5501 if (GET_CODE (len) == CONST_INT && INTVAL (len) <= 0)
5502 return;
5503
5504 gcc_assert (GET_CODE (val) == CONST_INT || GET_MODE (val) == QImode);
5505
5506 /* Expand setmem/clrmem for a constant length operand without a
5507 loop if it will be shorter that way.
5508 With a constant length and without pfd argument a
5509 clrmem loop is 32 bytes -> 5.3 * xc
5510 setmem loop is 36 bytes -> 3.6 * (mvi/stc + mvc) */
5511 if (GET_CODE (len) == CONST_INT
5512 && ((INTVAL (len) <= 256 * 5 && val == const0_rtx)
5513 || INTVAL (len) <= 257 * 3)
5514 && (!TARGET_MVCLE || INTVAL (len) <= 256))
5515 {
5516 HOST_WIDE_INT o, l;
5517
5518 if (val == const0_rtx)
5519 /* clrmem: emit 256 byte blockwise XCs. */
5520 for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
5521 {
5522 rtx newdst = adjust_address (dst, BLKmode, o);
5523 emit_insn (gen_clrmem_short (newdst,
5524 GEN_INT (l > 256 ? 255 : l - 1)));
5525 }
5526 else
5527 /* setmem: emit 1(mvi) + 256(mvc) byte blockwise memsets by
5528 setting first byte to val and using a 256 byte mvc with one
5529 byte overlap to propagate the byte. */
5530 for (l = INTVAL (len), o = 0; l > 0; l -= 257, o += 257)
5531 {
5532 rtx newdst = adjust_address (dst, BLKmode, o);
5533 emit_move_insn (adjust_address (dst, QImode, o), val);
5534 if (l > 1)
5535 {
5536 rtx newdstp1 = adjust_address (dst, BLKmode, o + 1);
5537 emit_insn (gen_movmem_short (newdstp1, newdst,
5538 GEN_INT (l > 257 ? 255 : l - 2)));
5539 }
5540 }
5541 }
5542
5543 else if (TARGET_MVCLE)
5544 {
5545 val = force_not_mem (convert_modes (Pmode, QImode, val, 1));
5546 if (TARGET_64BIT)
5547 emit_insn (gen_setmem_long_di (dst, convert_to_mode (Pmode, len, 1),
5548 val));
5549 else
5550 emit_insn (gen_setmem_long_si (dst, convert_to_mode (Pmode, len, 1),
5551 val));
5552 }
5553
5554 else
5555 {
5556 rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX;
5557 rtx_code_label *loop_start_label = gen_label_rtx ();
5558 rtx_code_label *onebyte_end_label = gen_label_rtx ();
5559 rtx_code_label *zerobyte_end_label = gen_label_rtx ();
5560 rtx_code_label *restbyte_end_label = gen_label_rtx ();
5561 machine_mode mode;
5562
5563 mode = GET_MODE (len);
5564 if (mode == VOIDmode)
5565 mode = Pmode;
5566
5567 dst_addr = gen_reg_rtx (Pmode);
5568 count = gen_reg_rtx (mode);
5569 blocks = gen_reg_rtx (mode);
5570
5571 convert_move (count, len, 1);
5572 emit_cmp_and_jump_insns (count, const0_rtx,
5573 EQ, NULL_RTX, mode, 1, zerobyte_end_label,
5574 profile_probability::very_unlikely ());
5575
5576 /* We need to make a copy of the target address since memset is
5577 supposed to return it unmodified. We have to make it here
5578 already since the new reg is used at onebyte_end_label. */
5579 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5580 dst = change_address (dst, VOIDmode, dst_addr);
5581
5582 if (val != const0_rtx)
5583 {
5584 /* When using the overlapping mvc the original target
5585 address is only accessed as single byte entity (even by
5586 the mvc reading this value). */
5587 set_mem_size (dst, 1);
5588 dstp1 = adjust_address (dst, VOIDmode, 1);
5589 emit_cmp_and_jump_insns (count,
5590 const1_rtx, EQ, NULL_RTX, mode, 1,
5591 onebyte_end_label,
5592 profile_probability::very_unlikely ());
5593 }
5594
5595 /* There is one unconditional (mvi+mvc)/xc after the loop
5596 dealing with the rest of the bytes, subtracting two (mvi+mvc)
5597 or one (xc) here leaves this number of bytes to be handled by
5598 it. */
5599 temp = expand_binop (mode, add_optab, count,
5600 val == const0_rtx ? constm1_rtx : GEN_INT (-2),
5601 count, 1, OPTAB_DIRECT);
5602 if (temp != count)
5603 emit_move_insn (count, temp);
5604
5605 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5606 OPTAB_DIRECT);
5607 if (temp != blocks)
5608 emit_move_insn (blocks, temp);
5609
5610 emit_cmp_and_jump_insns (blocks, const0_rtx,
5611 EQ, NULL_RTX, mode, 1, restbyte_end_label);
5612
5613 emit_jump (loop_start_label);
5614
5615 if (val != const0_rtx)
5616 {
5617 /* The 1 byte != 0 special case. Not handled efficiently
5618 since we require two jumps for that. However, this
5619 should be very rare. */
5620 emit_label (onebyte_end_label);
5621 emit_move_insn (adjust_address (dst, QImode, 0), val);
5622 emit_jump (zerobyte_end_label);
5623 }
5624
5625 emit_label (loop_start_label);
5626
5627 if (TARGET_Z10
5628 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 1024))
5629 {
5630 /* Issue a write prefetch for the +4 cache line. */
5631 rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr,
5632 GEN_INT (1024)),
5633 const1_rtx, const0_rtx);
5634 emit_insn (prefetch);
5635 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5636 }
5637
5638 if (val == const0_rtx)
5639 emit_insn (gen_clrmem_short (dst, GEN_INT (255)));
5640 else
5641 {
5642 /* Set the first byte in the block to the value and use an
5643 overlapping mvc for the block. */
5644 emit_move_insn (adjust_address (dst, QImode, 0), val);
5645 emit_insn (gen_movmem_short (dstp1, dst, GEN_INT (254)));
5646 }
5647 s390_load_address (dst_addr,
5648 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5649
5650 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5651 OPTAB_DIRECT);
5652 if (temp != blocks)
5653 emit_move_insn (blocks, temp);
5654
5655 emit_cmp_and_jump_insns (blocks, const0_rtx,
5656 NE, NULL_RTX, mode, 1, loop_start_label);
5657
5658 emit_label (restbyte_end_label);
5659
5660 if (val == const0_rtx)
5661 emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1)));
5662 else
5663 {
5664 /* Set the first byte in the block to the value and use an
5665 overlapping mvc for the block. */
5666 emit_move_insn (adjust_address (dst, QImode, 0), val);
5667 /* execute only uses the lowest 8 bits of count that's
5668 exactly what we need here. */
5669 emit_insn (gen_movmem_short (dstp1, dst,
5670 convert_to_mode (Pmode, count, 1)));
5671 }
5672
5673 emit_label (zerobyte_end_label);
5674 }
5675 }
5676
5677 /* Emit code to compare LEN bytes at OP0 with those at OP1,
5678 and return the result in TARGET. */
5679
5680 bool
s390_expand_cmpmem(rtx target,rtx op0,rtx op1,rtx len)5681 s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
5682 {
5683 rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM);
5684 rtx tmp;
5685
5686 /* When tuning for z10 or higher we rely on the Glibc functions to
5687 do the right thing. Only for constant lengths below 64k we will
5688 generate inline code. */
5689 if (s390_tune >= PROCESSOR_2097_Z10
5690 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5691 return false;
5692
5693 /* As the result of CMPINT is inverted compared to what we need,
5694 we have to swap the operands. */
5695 tmp = op0; op0 = op1; op1 = tmp;
5696
5697 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
5698 {
5699 if (INTVAL (len) > 0)
5700 {
5701 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1)));
5702 emit_insn (gen_cmpint (target, ccreg));
5703 }
5704 else
5705 emit_move_insn (target, const0_rtx);
5706 }
5707 else if (TARGET_MVCLE)
5708 {
5709 emit_insn (gen_cmpmem_long (op0, op1, convert_to_mode (Pmode, len, 1)));
5710 emit_insn (gen_cmpint (target, ccreg));
5711 }
5712 else
5713 {
5714 rtx addr0, addr1, count, blocks, temp;
5715 rtx_code_label *loop_start_label = gen_label_rtx ();
5716 rtx_code_label *loop_end_label = gen_label_rtx ();
5717 rtx_code_label *end_label = gen_label_rtx ();
5718 machine_mode mode;
5719
5720 mode = GET_MODE (len);
5721 if (mode == VOIDmode)
5722 mode = Pmode;
5723
5724 addr0 = gen_reg_rtx (Pmode);
5725 addr1 = gen_reg_rtx (Pmode);
5726 count = gen_reg_rtx (mode);
5727 blocks = gen_reg_rtx (mode);
5728
5729 convert_move (count, len, 1);
5730 emit_cmp_and_jump_insns (count, const0_rtx,
5731 EQ, NULL_RTX, mode, 1, end_label);
5732
5733 emit_move_insn (addr0, force_operand (XEXP (op0, 0), NULL_RTX));
5734 emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX));
5735 op0 = change_address (op0, VOIDmode, addr0);
5736 op1 = change_address (op1, VOIDmode, addr1);
5737
5738 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5739 OPTAB_DIRECT);
5740 if (temp != count)
5741 emit_move_insn (count, temp);
5742
5743 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5744 OPTAB_DIRECT);
5745 if (temp != blocks)
5746 emit_move_insn (blocks, temp);
5747
5748 emit_cmp_and_jump_insns (blocks, const0_rtx,
5749 EQ, NULL_RTX, mode, 1, loop_end_label);
5750
5751 emit_label (loop_start_label);
5752
5753 if (TARGET_Z10
5754 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 512))
5755 {
5756 rtx prefetch;
5757
5758 /* Issue a read prefetch for the +2 cache line of operand 1. */
5759 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr0, GEN_INT (512)),
5760 const0_rtx, const0_rtx);
5761 emit_insn (prefetch);
5762 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5763
5764 /* Issue a read prefetch for the +2 cache line of operand 2. */
5765 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr1, GEN_INT (512)),
5766 const0_rtx, const0_rtx);
5767 emit_insn (prefetch);
5768 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5769 }
5770
5771 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (255)));
5772 temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
5773 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5774 gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx);
5775 temp = gen_rtx_SET (pc_rtx, temp);
5776 emit_jump_insn (temp);
5777
5778 s390_load_address (addr0,
5779 gen_rtx_PLUS (Pmode, addr0, GEN_INT (256)));
5780 s390_load_address (addr1,
5781 gen_rtx_PLUS (Pmode, addr1, GEN_INT (256)));
5782
5783 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5784 OPTAB_DIRECT);
5785 if (temp != blocks)
5786 emit_move_insn (blocks, temp);
5787
5788 emit_cmp_and_jump_insns (blocks, const0_rtx,
5789 EQ, NULL_RTX, mode, 1, loop_end_label);
5790
5791 emit_jump (loop_start_label);
5792 emit_label (loop_end_label);
5793
5794 emit_insn (gen_cmpmem_short (op0, op1,
5795 convert_to_mode (Pmode, count, 1)));
5796 emit_label (end_label);
5797
5798 emit_insn (gen_cmpint (target, ccreg));
5799 }
5800 return true;
5801 }
5802
5803 /* Emit a conditional jump to LABEL for condition code mask MASK using
5804 comparsion operator COMPARISON. Return the emitted jump insn. */
5805
5806 static rtx_insn *
s390_emit_ccraw_jump(HOST_WIDE_INT mask,enum rtx_code comparison,rtx label)5807 s390_emit_ccraw_jump (HOST_WIDE_INT mask, enum rtx_code comparison, rtx label)
5808 {
5809 rtx temp;
5810
5811 gcc_assert (comparison == EQ || comparison == NE);
5812 gcc_assert (mask > 0 && mask < 15);
5813
5814 temp = gen_rtx_fmt_ee (comparison, VOIDmode,
5815 gen_rtx_REG (CCRAWmode, CC_REGNUM), GEN_INT (mask));
5816 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5817 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
5818 temp = gen_rtx_SET (pc_rtx, temp);
5819 return emit_jump_insn (temp);
5820 }
5821
5822 /* Emit the instructions to implement strlen of STRING and store the
5823 result in TARGET. The string has the known ALIGNMENT. This
5824 version uses vector instructions and is therefore not appropriate
5825 for targets prior to z13. */
5826
5827 void
s390_expand_vec_strlen(rtx target,rtx string,rtx alignment)5828 s390_expand_vec_strlen (rtx target, rtx string, rtx alignment)
5829 {
5830 rtx highest_index_to_load_reg = gen_reg_rtx (Pmode);
5831 rtx str_reg = gen_reg_rtx (V16QImode);
5832 rtx str_addr_base_reg = gen_reg_rtx (Pmode);
5833 rtx str_idx_reg = gen_reg_rtx (Pmode);
5834 rtx result_reg = gen_reg_rtx (V16QImode);
5835 rtx is_aligned_label = gen_label_rtx ();
5836 rtx into_loop_label = NULL_RTX;
5837 rtx loop_start_label = gen_label_rtx ();
5838 rtx temp;
5839 rtx len = gen_reg_rtx (QImode);
5840 rtx cond;
5841
5842 s390_load_address (str_addr_base_reg, XEXP (string, 0));
5843 emit_move_insn (str_idx_reg, const0_rtx);
5844
5845 if (INTVAL (alignment) < 16)
5846 {
5847 /* Check whether the address happens to be aligned properly so
5848 jump directly to the aligned loop. */
5849 emit_cmp_and_jump_insns (gen_rtx_AND (Pmode,
5850 str_addr_base_reg, GEN_INT (15)),
5851 const0_rtx, EQ, NULL_RTX,
5852 Pmode, 1, is_aligned_label);
5853
5854 temp = gen_reg_rtx (Pmode);
5855 temp = expand_binop (Pmode, and_optab, str_addr_base_reg,
5856 GEN_INT (15), temp, 1, OPTAB_DIRECT);
5857 gcc_assert (REG_P (temp));
5858 highest_index_to_load_reg =
5859 expand_binop (Pmode, sub_optab, GEN_INT (15), temp,
5860 highest_index_to_load_reg, 1, OPTAB_DIRECT);
5861 gcc_assert (REG_P (highest_index_to_load_reg));
5862 emit_insn (gen_vllv16qi (str_reg,
5863 convert_to_mode (SImode, highest_index_to_load_reg, 1),
5864 gen_rtx_MEM (BLKmode, str_addr_base_reg)));
5865
5866 into_loop_label = gen_label_rtx ();
5867 s390_emit_jump (into_loop_label, NULL_RTX);
5868 emit_barrier ();
5869 }
5870
5871 emit_label (is_aligned_label);
5872 LABEL_NUSES (is_aligned_label) = INTVAL (alignment) < 16 ? 2 : 1;
5873
5874 /* Reaching this point we are only performing 16 bytes aligned
5875 loads. */
5876 emit_move_insn (highest_index_to_load_reg, GEN_INT (15));
5877
5878 emit_label (loop_start_label);
5879 LABEL_NUSES (loop_start_label) = 1;
5880
5881 /* Load 16 bytes of the string into VR. */
5882 emit_move_insn (str_reg,
5883 gen_rtx_MEM (V16QImode,
5884 gen_rtx_PLUS (Pmode, str_idx_reg,
5885 str_addr_base_reg)));
5886 if (into_loop_label != NULL_RTX)
5887 {
5888 emit_label (into_loop_label);
5889 LABEL_NUSES (into_loop_label) = 1;
5890 }
5891
5892 /* Increment string index by 16 bytes. */
5893 expand_binop (Pmode, add_optab, str_idx_reg, GEN_INT (16),
5894 str_idx_reg, 1, OPTAB_DIRECT);
5895
5896 emit_insn (gen_vec_vfenesv16qi (result_reg, str_reg, str_reg,
5897 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
5898
5899 add_int_reg_note (s390_emit_ccraw_jump (8, NE, loop_start_label),
5900 REG_BR_PROB,
5901 profile_probability::very_likely ().to_reg_br_prob_note ());
5902 emit_insn (gen_vec_extractv16qiqi (len, result_reg, GEN_INT (7)));
5903
5904 /* If the string pointer wasn't aligned we have loaded less then 16
5905 bytes and the remaining bytes got filled with zeros (by vll).
5906 Now we have to check whether the resulting index lies within the
5907 bytes actually part of the string. */
5908
5909 cond = s390_emit_compare (GT, convert_to_mode (Pmode, len, 1),
5910 highest_index_to_load_reg);
5911 s390_load_address (highest_index_to_load_reg,
5912 gen_rtx_PLUS (Pmode, highest_index_to_load_reg,
5913 const1_rtx));
5914 if (TARGET_64BIT)
5915 emit_insn (gen_movdicc (str_idx_reg, cond,
5916 highest_index_to_load_reg, str_idx_reg));
5917 else
5918 emit_insn (gen_movsicc (str_idx_reg, cond,
5919 highest_index_to_load_reg, str_idx_reg));
5920
5921 add_reg_br_prob_note (s390_emit_jump (is_aligned_label, cond),
5922 profile_probability::very_unlikely ());
5923
5924 expand_binop (Pmode, add_optab, str_idx_reg,
5925 GEN_INT (-16), str_idx_reg, 1, OPTAB_DIRECT);
5926 /* FIXME: len is already zero extended - so avoid the llgcr emitted
5927 here. */
5928 temp = expand_binop (Pmode, add_optab, str_idx_reg,
5929 convert_to_mode (Pmode, len, 1),
5930 target, 1, OPTAB_DIRECT);
5931 if (temp != target)
5932 emit_move_insn (target, temp);
5933 }
5934
5935 void
s390_expand_vec_movstr(rtx result,rtx dst,rtx src)5936 s390_expand_vec_movstr (rtx result, rtx dst, rtx src)
5937 {
5938 rtx temp = gen_reg_rtx (Pmode);
5939 rtx src_addr = XEXP (src, 0);
5940 rtx dst_addr = XEXP (dst, 0);
5941 rtx src_addr_reg = gen_reg_rtx (Pmode);
5942 rtx dst_addr_reg = gen_reg_rtx (Pmode);
5943 rtx offset = gen_reg_rtx (Pmode);
5944 rtx vsrc = gen_reg_rtx (V16QImode);
5945 rtx vpos = gen_reg_rtx (V16QImode);
5946 rtx loadlen = gen_reg_rtx (SImode);
5947 rtx gpos_qi = gen_reg_rtx(QImode);
5948 rtx gpos = gen_reg_rtx (SImode);
5949 rtx done_label = gen_label_rtx ();
5950 rtx loop_label = gen_label_rtx ();
5951 rtx exit_label = gen_label_rtx ();
5952 rtx full_label = gen_label_rtx ();
5953
5954 /* Perform a quick check for string ending on the first up to 16
5955 bytes and exit early if successful. */
5956
5957 emit_insn (gen_vlbb (vsrc, src, GEN_INT (6)));
5958 emit_insn (gen_lcbb (loadlen, src_addr, GEN_INT (6)));
5959 emit_insn (gen_vfenezv16qi (vpos, vsrc, vsrc));
5960 emit_insn (gen_vec_extractv16qiqi (gpos_qi, vpos, GEN_INT (7)));
5961 emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
5962 /* gpos is the byte index if a zero was found and 16 otherwise.
5963 So if it is lower than the loaded bytes we have a hit. */
5964 emit_cmp_and_jump_insns (gpos, loadlen, GE, NULL_RTX, SImode, 1,
5965 full_label);
5966 emit_insn (gen_vstlv16qi (vsrc, gpos, dst));
5967
5968 force_expand_binop (Pmode, add_optab, dst_addr, gpos, result,
5969 1, OPTAB_DIRECT);
5970 emit_jump (exit_label);
5971 emit_barrier ();
5972
5973 emit_label (full_label);
5974 LABEL_NUSES (full_label) = 1;
5975
5976 /* Calculate `offset' so that src + offset points to the last byte
5977 before 16 byte alignment. */
5978
5979 /* temp = src_addr & 0xf */
5980 force_expand_binop (Pmode, and_optab, src_addr, GEN_INT (15), temp,
5981 1, OPTAB_DIRECT);
5982
5983 /* offset = 0xf - temp */
5984 emit_move_insn (offset, GEN_INT (15));
5985 force_expand_binop (Pmode, sub_optab, offset, temp, offset,
5986 1, OPTAB_DIRECT);
5987
5988 /* Store `offset' bytes in the dstination string. The quick check
5989 has loaded at least `offset' bytes into vsrc. */
5990
5991 emit_insn (gen_vstlv16qi (vsrc, gen_lowpart (SImode, offset), dst));
5992
5993 /* Advance to the next byte to be loaded. */
5994 force_expand_binop (Pmode, add_optab, offset, const1_rtx, offset,
5995 1, OPTAB_DIRECT);
5996
5997 /* Make sure the addresses are single regs which can be used as a
5998 base. */
5999 emit_move_insn (src_addr_reg, src_addr);
6000 emit_move_insn (dst_addr_reg, dst_addr);
6001
6002 /* MAIN LOOP */
6003
6004 emit_label (loop_label);
6005 LABEL_NUSES (loop_label) = 1;
6006
6007 emit_move_insn (vsrc,
6008 gen_rtx_MEM (V16QImode,
6009 gen_rtx_PLUS (Pmode, src_addr_reg, offset)));
6010
6011 emit_insn (gen_vec_vfenesv16qi (vpos, vsrc, vsrc,
6012 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
6013 add_int_reg_note (s390_emit_ccraw_jump (8, EQ, done_label),
6014 REG_BR_PROB, profile_probability::very_unlikely ()
6015 .to_reg_br_prob_note ());
6016
6017 emit_move_insn (gen_rtx_MEM (V16QImode,
6018 gen_rtx_PLUS (Pmode, dst_addr_reg, offset)),
6019 vsrc);
6020 /* offset += 16 */
6021 force_expand_binop (Pmode, add_optab, offset, GEN_INT (16),
6022 offset, 1, OPTAB_DIRECT);
6023
6024 emit_jump (loop_label);
6025 emit_barrier ();
6026
6027 /* REGULAR EXIT */
6028
6029 /* We are done. Add the offset of the zero character to the dst_addr
6030 pointer to get the result. */
6031
6032 emit_label (done_label);
6033 LABEL_NUSES (done_label) = 1;
6034
6035 force_expand_binop (Pmode, add_optab, dst_addr_reg, offset, dst_addr_reg,
6036 1, OPTAB_DIRECT);
6037
6038 emit_insn (gen_vec_extractv16qiqi (gpos_qi, vpos, GEN_INT (7)));
6039 emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
6040
6041 emit_insn (gen_vstlv16qi (vsrc, gpos, gen_rtx_MEM (BLKmode, dst_addr_reg)));
6042
6043 force_expand_binop (Pmode, add_optab, dst_addr_reg, gpos, result,
6044 1, OPTAB_DIRECT);
6045
6046 /* EARLY EXIT */
6047
6048 emit_label (exit_label);
6049 LABEL_NUSES (exit_label) = 1;
6050 }
6051
6052
6053 /* Expand conditional increment or decrement using alc/slb instructions.
6054 Should generate code setting DST to either SRC or SRC + INCREMENT,
6055 depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
6056 Returns true if successful, false otherwise.
6057
6058 That makes it possible to implement some if-constructs without jumps e.g.:
6059 (borrow = CC0 | CC1 and carry = CC2 | CC3)
6060 unsigned int a, b, c;
6061 if (a < b) c++; -> CCU b > a -> CC2; c += carry;
6062 if (a < b) c--; -> CCL3 a - b -> borrow; c -= borrow;
6063 if (a <= b) c++; -> CCL3 b - a -> borrow; c += carry;
6064 if (a <= b) c--; -> CCU a <= b -> borrow; c -= borrow;
6065
6066 Checks for EQ and NE with a nonzero value need an additional xor e.g.:
6067 if (a == b) c++; -> CCL3 a ^= b; 0 - a -> borrow; c += carry;
6068 if (a == b) c--; -> CCU a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
6069 if (a != b) c++; -> CCU a ^= b; a > 0 -> CC2; c += carry;
6070 if (a != b) c--; -> CCL3 a ^= b; 0 - a -> borrow; c -= borrow; */
6071
6072 bool
s390_expand_addcc(enum rtx_code cmp_code,rtx cmp_op0,rtx cmp_op1,rtx dst,rtx src,rtx increment)6073 s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1,
6074 rtx dst, rtx src, rtx increment)
6075 {
6076 machine_mode cmp_mode;
6077 machine_mode cc_mode;
6078 rtx op_res;
6079 rtx insn;
6080 rtvec p;
6081 int ret;
6082
6083 if ((GET_MODE (cmp_op0) == SImode || GET_MODE (cmp_op0) == VOIDmode)
6084 && (GET_MODE (cmp_op1) == SImode || GET_MODE (cmp_op1) == VOIDmode))
6085 cmp_mode = SImode;
6086 else if ((GET_MODE (cmp_op0) == DImode || GET_MODE (cmp_op0) == VOIDmode)
6087 && (GET_MODE (cmp_op1) == DImode || GET_MODE (cmp_op1) == VOIDmode))
6088 cmp_mode = DImode;
6089 else
6090 return false;
6091
6092 /* Try ADD LOGICAL WITH CARRY. */
6093 if (increment == const1_rtx)
6094 {
6095 /* Determine CC mode to use. */
6096 if (cmp_code == EQ || cmp_code == NE)
6097 {
6098 if (cmp_op1 != const0_rtx)
6099 {
6100 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
6101 NULL_RTX, 0, OPTAB_WIDEN);
6102 cmp_op1 = const0_rtx;
6103 }
6104
6105 cmp_code = cmp_code == EQ ? LEU : GTU;
6106 }
6107
6108 if (cmp_code == LTU || cmp_code == LEU)
6109 {
6110 rtx tem = cmp_op0;
6111 cmp_op0 = cmp_op1;
6112 cmp_op1 = tem;
6113 cmp_code = swap_condition (cmp_code);
6114 }
6115
6116 switch (cmp_code)
6117 {
6118 case GTU:
6119 cc_mode = CCUmode;
6120 break;
6121
6122 case GEU:
6123 cc_mode = CCL3mode;
6124 break;
6125
6126 default:
6127 return false;
6128 }
6129
6130 /* Emit comparison instruction pattern. */
6131 if (!register_operand (cmp_op0, cmp_mode))
6132 cmp_op0 = force_reg (cmp_mode, cmp_op0);
6133
6134 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
6135 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
6136 /* We use insn_invalid_p here to add clobbers if required. */
6137 ret = insn_invalid_p (emit_insn (insn), false);
6138 gcc_assert (!ret);
6139
6140 /* Emit ALC instruction pattern. */
6141 op_res = gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
6142 gen_rtx_REG (cc_mode, CC_REGNUM),
6143 const0_rtx);
6144
6145 if (src != const0_rtx)
6146 {
6147 if (!register_operand (src, GET_MODE (dst)))
6148 src = force_reg (GET_MODE (dst), src);
6149
6150 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, src);
6151 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, const0_rtx);
6152 }
6153
6154 p = rtvec_alloc (2);
6155 RTVEC_ELT (p, 0) =
6156 gen_rtx_SET (dst, op_res);
6157 RTVEC_ELT (p, 1) =
6158 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6159 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
6160
6161 return true;
6162 }
6163
6164 /* Try SUBTRACT LOGICAL WITH BORROW. */
6165 if (increment == constm1_rtx)
6166 {
6167 /* Determine CC mode to use. */
6168 if (cmp_code == EQ || cmp_code == NE)
6169 {
6170 if (cmp_op1 != const0_rtx)
6171 {
6172 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
6173 NULL_RTX, 0, OPTAB_WIDEN);
6174 cmp_op1 = const0_rtx;
6175 }
6176
6177 cmp_code = cmp_code == EQ ? LEU : GTU;
6178 }
6179
6180 if (cmp_code == GTU || cmp_code == GEU)
6181 {
6182 rtx tem = cmp_op0;
6183 cmp_op0 = cmp_op1;
6184 cmp_op1 = tem;
6185 cmp_code = swap_condition (cmp_code);
6186 }
6187
6188 switch (cmp_code)
6189 {
6190 case LEU:
6191 cc_mode = CCUmode;
6192 break;
6193
6194 case LTU:
6195 cc_mode = CCL3mode;
6196 break;
6197
6198 default:
6199 return false;
6200 }
6201
6202 /* Emit comparison instruction pattern. */
6203 if (!register_operand (cmp_op0, cmp_mode))
6204 cmp_op0 = force_reg (cmp_mode, cmp_op0);
6205
6206 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
6207 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
6208 /* We use insn_invalid_p here to add clobbers if required. */
6209 ret = insn_invalid_p (emit_insn (insn), false);
6210 gcc_assert (!ret);
6211
6212 /* Emit SLB instruction pattern. */
6213 if (!register_operand (src, GET_MODE (dst)))
6214 src = force_reg (GET_MODE (dst), src);
6215
6216 op_res = gen_rtx_MINUS (GET_MODE (dst),
6217 gen_rtx_MINUS (GET_MODE (dst), src, const0_rtx),
6218 gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
6219 gen_rtx_REG (cc_mode, CC_REGNUM),
6220 const0_rtx));
6221 p = rtvec_alloc (2);
6222 RTVEC_ELT (p, 0) =
6223 gen_rtx_SET (dst, op_res);
6224 RTVEC_ELT (p, 1) =
6225 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6226 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
6227
6228 return true;
6229 }
6230
6231 return false;
6232 }
6233
6234 /* Expand code for the insv template. Return true if successful. */
6235
6236 bool
s390_expand_insv(rtx dest,rtx op1,rtx op2,rtx src)6237 s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
6238 {
6239 int bitsize = INTVAL (op1);
6240 int bitpos = INTVAL (op2);
6241 machine_mode mode = GET_MODE (dest);
6242 machine_mode smode;
6243 int smode_bsize, mode_bsize;
6244 rtx op, clobber;
6245
6246 if (bitsize + bitpos > GET_MODE_BITSIZE (mode))
6247 return false;
6248
6249 /* Generate INSERT IMMEDIATE (IILL et al). */
6250 /* (set (ze (reg)) (const_int)). */
6251 if (TARGET_ZARCH
6252 && register_operand (dest, word_mode)
6253 && (bitpos % 16) == 0
6254 && (bitsize % 16) == 0
6255 && const_int_operand (src, VOIDmode))
6256 {
6257 HOST_WIDE_INT val = INTVAL (src);
6258 int regpos = bitpos + bitsize;
6259
6260 while (regpos > bitpos)
6261 {
6262 machine_mode putmode;
6263 int putsize;
6264
6265 if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32))
6266 putmode = SImode;
6267 else
6268 putmode = HImode;
6269
6270 putsize = GET_MODE_BITSIZE (putmode);
6271 regpos -= putsize;
6272 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6273 GEN_INT (putsize),
6274 GEN_INT (regpos)),
6275 gen_int_mode (val, putmode));
6276 val >>= putsize;
6277 }
6278 gcc_assert (regpos == bitpos);
6279 return true;
6280 }
6281
6282 smode = smallest_int_mode_for_size (bitsize);
6283 smode_bsize = GET_MODE_BITSIZE (smode);
6284 mode_bsize = GET_MODE_BITSIZE (mode);
6285
6286 /* Generate STORE CHARACTERS UNDER MASK (STCM et al). */
6287 if (bitpos == 0
6288 && (bitsize % BITS_PER_UNIT) == 0
6289 && MEM_P (dest)
6290 && (register_operand (src, word_mode)
6291 || const_int_operand (src, VOIDmode)))
6292 {
6293 /* Emit standard pattern if possible. */
6294 if (smode_bsize == bitsize)
6295 {
6296 emit_move_insn (adjust_address (dest, smode, 0),
6297 gen_lowpart (smode, src));
6298 return true;
6299 }
6300
6301 /* (set (ze (mem)) (const_int)). */
6302 else if (const_int_operand (src, VOIDmode))
6303 {
6304 int size = bitsize / BITS_PER_UNIT;
6305 rtx src_mem = adjust_address (force_const_mem (word_mode, src),
6306 BLKmode,
6307 UNITS_PER_WORD - size);
6308
6309 dest = adjust_address (dest, BLKmode, 0);
6310 set_mem_size (dest, size);
6311 s390_expand_movmem (dest, src_mem, GEN_INT (size));
6312 return true;
6313 }
6314
6315 /* (set (ze (mem)) (reg)). */
6316 else if (register_operand (src, word_mode))
6317 {
6318 if (bitsize <= 32)
6319 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1,
6320 const0_rtx), src);
6321 else
6322 {
6323 /* Emit st,stcmh sequence. */
6324 int stcmh_width = bitsize - 32;
6325 int size = stcmh_width / BITS_PER_UNIT;
6326
6327 emit_move_insn (adjust_address (dest, SImode, size),
6328 gen_lowpart (SImode, src));
6329 set_mem_size (dest, size);
6330 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6331 GEN_INT (stcmh_width),
6332 const0_rtx),
6333 gen_rtx_LSHIFTRT (word_mode, src, GEN_INT (32)));
6334 }
6335 return true;
6336 }
6337 }
6338
6339 /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al). */
6340 if ((bitpos % BITS_PER_UNIT) == 0
6341 && (bitsize % BITS_PER_UNIT) == 0
6342 && (bitpos & 32) == ((bitpos + bitsize - 1) & 32)
6343 && MEM_P (src)
6344 && (mode == DImode || mode == SImode)
6345 && register_operand (dest, mode))
6346 {
6347 /* Emit a strict_low_part pattern if possible. */
6348 if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
6349 {
6350 op = gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (smode, dest));
6351 op = gen_rtx_SET (op, gen_lowpart (smode, src));
6352 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6353 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)));
6354 return true;
6355 }
6356
6357 /* ??? There are more powerful versions of ICM that are not
6358 completely represented in the md file. */
6359 }
6360
6361 /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al). */
6362 if (TARGET_Z10 && (mode == DImode || mode == SImode))
6363 {
6364 machine_mode mode_s = GET_MODE (src);
6365
6366 if (CONSTANT_P (src))
6367 {
6368 /* For constant zero values the representation with AND
6369 appears to be folded in more situations than the (set
6370 (zero_extract) ...).
6371 We only do this when the start and end of the bitfield
6372 remain in the same SImode chunk. That way nihf or nilf
6373 can be used.
6374 The AND patterns might still generate a risbg for this. */
6375 if (src == const0_rtx && bitpos / 32 == (bitpos + bitsize - 1) / 32)
6376 return false;
6377 else
6378 src = force_reg (mode, src);
6379 }
6380 else if (mode_s != mode)
6381 {
6382 gcc_assert (GET_MODE_BITSIZE (mode_s) >= bitsize);
6383 src = force_reg (mode_s, src);
6384 src = gen_lowpart (mode, src);
6385 }
6386
6387 op = gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2),
6388 op = gen_rtx_SET (op, src);
6389
6390 if (!TARGET_ZEC12)
6391 {
6392 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6393 op = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber));
6394 }
6395 emit_insn (op);
6396
6397 return true;
6398 }
6399
6400 return false;
6401 }
6402
6403 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
6404 register that holds VAL of mode MODE shifted by COUNT bits. */
6405
6406 static inline rtx
s390_expand_mask_and_shift(rtx val,machine_mode mode,rtx count)6407 s390_expand_mask_and_shift (rtx val, machine_mode mode, rtx count)
6408 {
6409 val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
6410 NULL_RTX, 1, OPTAB_DIRECT);
6411 return expand_simple_binop (SImode, ASHIFT, val, count,
6412 NULL_RTX, 1, OPTAB_DIRECT);
6413 }
6414
6415 /* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
6416 the result in TARGET. */
6417
6418 void
s390_expand_vec_compare(rtx target,enum rtx_code cond,rtx cmp_op1,rtx cmp_op2)6419 s390_expand_vec_compare (rtx target, enum rtx_code cond,
6420 rtx cmp_op1, rtx cmp_op2)
6421 {
6422 machine_mode mode = GET_MODE (target);
6423 bool neg_p = false, swap_p = false;
6424 rtx tmp;
6425
6426 if (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_VECTOR_FLOAT)
6427 {
6428 switch (cond)
6429 {
6430 /* NE a != b -> !(a == b) */
6431 case NE: cond = EQ; neg_p = true; break;
6432 /* UNGT a u> b -> !(b >= a) */
6433 case UNGT: cond = GE; neg_p = true; swap_p = true; break;
6434 /* UNGE a u>= b -> !(b > a) */
6435 case UNGE: cond = GT; neg_p = true; swap_p = true; break;
6436 /* LE: a <= b -> b >= a */
6437 case LE: cond = GE; swap_p = true; break;
6438 /* UNLE: a u<= b -> !(a > b) */
6439 case UNLE: cond = GT; neg_p = true; break;
6440 /* LT: a < b -> b > a */
6441 case LT: cond = GT; swap_p = true; break;
6442 /* UNLT: a u< b -> !(a >= b) */
6443 case UNLT: cond = GE; neg_p = true; break;
6444 case UNEQ:
6445 emit_insn (gen_vec_cmpuneq (target, cmp_op1, cmp_op2));
6446 return;
6447 case LTGT:
6448 emit_insn (gen_vec_cmpltgt (target, cmp_op1, cmp_op2));
6449 return;
6450 case ORDERED:
6451 emit_insn (gen_vec_ordered (target, cmp_op1, cmp_op2));
6452 return;
6453 case UNORDERED:
6454 emit_insn (gen_vec_unordered (target, cmp_op1, cmp_op2));
6455 return;
6456 default: break;
6457 }
6458 }
6459 else
6460 {
6461 switch (cond)
6462 {
6463 /* NE: a != b -> !(a == b) */
6464 case NE: cond = EQ; neg_p = true; break;
6465 /* GE: a >= b -> !(b > a) */
6466 case GE: cond = GT; neg_p = true; swap_p = true; break;
6467 /* GEU: a >= b -> !(b > a) */
6468 case GEU: cond = GTU; neg_p = true; swap_p = true; break;
6469 /* LE: a <= b -> !(a > b) */
6470 case LE: cond = GT; neg_p = true; break;
6471 /* LEU: a <= b -> !(a > b) */
6472 case LEU: cond = GTU; neg_p = true; break;
6473 /* LT: a < b -> b > a */
6474 case LT: cond = GT; swap_p = true; break;
6475 /* LTU: a < b -> b > a */
6476 case LTU: cond = GTU; swap_p = true; break;
6477 default: break;
6478 }
6479 }
6480
6481 if (swap_p)
6482 {
6483 tmp = cmp_op1; cmp_op1 = cmp_op2; cmp_op2 = tmp;
6484 }
6485
6486 emit_insn (gen_rtx_SET (target, gen_rtx_fmt_ee (cond,
6487 mode,
6488 cmp_op1, cmp_op2)));
6489 if (neg_p)
6490 emit_insn (gen_rtx_SET (target, gen_rtx_NOT (mode, target)));
6491 }
6492
6493 /* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into
6494 TARGET if either all (ALL_P is true) or any (ALL_P is false) of the
6495 elements in CMP1 and CMP2 fulfill the comparison.
6496 This function is only used to emit patterns for the vx builtins and
6497 therefore only handles comparison codes required by the
6498 builtins. */
6499 void
s390_expand_vec_compare_cc(rtx target,enum rtx_code code,rtx cmp1,rtx cmp2,bool all_p)6500 s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
6501 rtx cmp1, rtx cmp2, bool all_p)
6502 {
6503 machine_mode cc_producer_mode, cc_consumer_mode, scratch_mode;
6504 rtx tmp_reg = gen_reg_rtx (SImode);
6505 bool swap_p = false;
6506
6507 if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_INT)
6508 {
6509 switch (code)
6510 {
6511 case EQ:
6512 case NE:
6513 cc_producer_mode = CCVEQmode;
6514 break;
6515 case GE:
6516 case LT:
6517 code = swap_condition (code);
6518 swap_p = true;
6519 /* fallthrough */
6520 case GT:
6521 case LE:
6522 cc_producer_mode = CCVIHmode;
6523 break;
6524 case GEU:
6525 case LTU:
6526 code = swap_condition (code);
6527 swap_p = true;
6528 /* fallthrough */
6529 case GTU:
6530 case LEU:
6531 cc_producer_mode = CCVIHUmode;
6532 break;
6533 default:
6534 gcc_unreachable ();
6535 }
6536
6537 scratch_mode = GET_MODE (cmp1);
6538 /* These codes represent inverted CC interpretations. Inverting
6539 an ALL CC mode results in an ANY CC mode and the other way
6540 around. Invert the all_p flag here to compensate for
6541 that. */
6542 if (code == NE || code == LE || code == LEU)
6543 all_p = !all_p;
6544
6545 cc_consumer_mode = all_p ? CCVIALLmode : CCVIANYmode;
6546 }
6547 else if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_FLOAT)
6548 {
6549 bool inv_p = false;
6550
6551 switch (code)
6552 {
6553 case EQ: cc_producer_mode = CCVEQmode; break;
6554 case NE: cc_producer_mode = CCVEQmode; inv_p = true; break;
6555 case GT: cc_producer_mode = CCVFHmode; break;
6556 case GE: cc_producer_mode = CCVFHEmode; break;
6557 case UNLE: cc_producer_mode = CCVFHmode; inv_p = true; break;
6558 case UNLT: cc_producer_mode = CCVFHEmode; inv_p = true; break;
6559 case LT: cc_producer_mode = CCVFHmode; code = GT; swap_p = true; break;
6560 case LE: cc_producer_mode = CCVFHEmode; code = GE; swap_p = true; break;
6561 default: gcc_unreachable ();
6562 }
6563 scratch_mode = mode_for_int_vector (GET_MODE (cmp1)).require ();
6564
6565 if (inv_p)
6566 all_p = !all_p;
6567
6568 cc_consumer_mode = all_p ? CCVFALLmode : CCVFANYmode;
6569 }
6570 else
6571 gcc_unreachable ();
6572
6573 if (swap_p)
6574 {
6575 rtx tmp = cmp2;
6576 cmp2 = cmp1;
6577 cmp1 = tmp;
6578 }
6579
6580 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6581 gen_rtvec (2, gen_rtx_SET (
6582 gen_rtx_REG (cc_producer_mode, CC_REGNUM),
6583 gen_rtx_COMPARE (cc_producer_mode, cmp1, cmp2)),
6584 gen_rtx_CLOBBER (VOIDmode,
6585 gen_rtx_SCRATCH (scratch_mode)))));
6586 emit_move_insn (target, const0_rtx);
6587 emit_move_insn (tmp_reg, const1_rtx);
6588
6589 emit_move_insn (target,
6590 gen_rtx_IF_THEN_ELSE (SImode,
6591 gen_rtx_fmt_ee (code, VOIDmode,
6592 gen_rtx_REG (cc_consumer_mode, CC_REGNUM),
6593 const0_rtx),
6594 tmp_reg, target));
6595 }
6596
6597 /* Invert the comparison CODE applied to a CC mode. This is only safe
6598 if we know whether there result was created by a floating point
6599 compare or not. For the CCV modes this is encoded as part of the
6600 mode. */
6601 enum rtx_code
s390_reverse_condition(machine_mode mode,enum rtx_code code)6602 s390_reverse_condition (machine_mode mode, enum rtx_code code)
6603 {
6604 /* Reversal of FP compares takes care -- an ordered compare
6605 becomes an unordered compare and vice versa. */
6606 if (mode == CCVFALLmode || mode == CCVFANYmode)
6607 return reverse_condition_maybe_unordered (code);
6608 else if (mode == CCVIALLmode || mode == CCVIANYmode)
6609 return reverse_condition (code);
6610 else
6611 gcc_unreachable ();
6612 }
6613
6614 /* Generate a vector comparison expression loading either elements of
6615 THEN or ELS into TARGET depending on the comparison COND of CMP_OP1
6616 and CMP_OP2. */
6617
6618 void
s390_expand_vcond(rtx target,rtx then,rtx els,enum rtx_code cond,rtx cmp_op1,rtx cmp_op2)6619 s390_expand_vcond (rtx target, rtx then, rtx els,
6620 enum rtx_code cond, rtx cmp_op1, rtx cmp_op2)
6621 {
6622 rtx tmp;
6623 machine_mode result_mode;
6624 rtx result_target;
6625
6626 machine_mode target_mode = GET_MODE (target);
6627 machine_mode cmp_mode = GET_MODE (cmp_op1);
6628 rtx op = (cond == LT) ? els : then;
6629
6630 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
6631 and x < 0 ? 1 : 0 into (unsigned) x >> 31. Likewise
6632 for short and byte (x >> 15 and x >> 7 respectively). */
6633 if ((cond == LT || cond == GE)
6634 && target_mode == cmp_mode
6635 && cmp_op2 == CONST0_RTX (cmp_mode)
6636 && op == CONST0_RTX (target_mode)
6637 && s390_vector_mode_supported_p (target_mode)
6638 && GET_MODE_CLASS (target_mode) == MODE_VECTOR_INT)
6639 {
6640 rtx negop = (cond == LT) ? then : els;
6641
6642 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (target_mode)) - 1;
6643
6644 /* if x < 0 ? 1 : 0 or if x >= 0 ? 0 : 1 */
6645 if (negop == CONST1_RTX (target_mode))
6646 {
6647 rtx res = expand_simple_binop (cmp_mode, LSHIFTRT, cmp_op1,
6648 GEN_INT (shift), target,
6649 1, OPTAB_DIRECT);
6650 if (res != target)
6651 emit_move_insn (target, res);
6652 return;
6653 }
6654
6655 /* if x < 0 ? -1 : 0 or if x >= 0 ? 0 : -1 */
6656 else if (all_ones_operand (negop, target_mode))
6657 {
6658 rtx res = expand_simple_binop (cmp_mode, ASHIFTRT, cmp_op1,
6659 GEN_INT (shift), target,
6660 0, OPTAB_DIRECT);
6661 if (res != target)
6662 emit_move_insn (target, res);
6663 return;
6664 }
6665 }
6666
6667 /* We always use an integral type vector to hold the comparison
6668 result. */
6669 result_mode = mode_for_int_vector (cmp_mode).require ();
6670 result_target = gen_reg_rtx (result_mode);
6671
6672 /* We allow vector immediates as comparison operands that
6673 can be handled by the optimization above but not by the
6674 following code. Hence, force them into registers here. */
6675 if (!REG_P (cmp_op1))
6676 cmp_op1 = force_reg (GET_MODE (cmp_op1), cmp_op1);
6677
6678 if (!REG_P (cmp_op2))
6679 cmp_op2 = force_reg (GET_MODE (cmp_op2), cmp_op2);
6680
6681 s390_expand_vec_compare (result_target, cond,
6682 cmp_op1, cmp_op2);
6683
6684 /* If the results are supposed to be either -1 or 0 we are done
6685 since this is what our compare instructions generate anyway. */
6686 if (all_ones_operand (then, GET_MODE (then))
6687 && const0_operand (els, GET_MODE (els)))
6688 {
6689 emit_move_insn (target, gen_rtx_SUBREG (target_mode,
6690 result_target, 0));
6691 return;
6692 }
6693
6694 /* Otherwise we will do a vsel afterwards. */
6695 /* This gets triggered e.g.
6696 with gcc.c-torture/compile/pr53410-1.c */
6697 if (!REG_P (then))
6698 then = force_reg (target_mode, then);
6699
6700 if (!REG_P (els))
6701 els = force_reg (target_mode, els);
6702
6703 tmp = gen_rtx_fmt_ee (EQ, VOIDmode,
6704 result_target,
6705 CONST0_RTX (result_mode));
6706
6707 /* We compared the result against zero above so we have to swap then
6708 and els here. */
6709 tmp = gen_rtx_IF_THEN_ELSE (target_mode, tmp, els, then);
6710
6711 gcc_assert (target_mode == GET_MODE (then));
6712 emit_insn (gen_rtx_SET (target, tmp));
6713 }
6714
6715 /* Emit the RTX necessary to initialize the vector TARGET with values
6716 in VALS. */
6717 void
s390_expand_vec_init(rtx target,rtx vals)6718 s390_expand_vec_init (rtx target, rtx vals)
6719 {
6720 machine_mode mode = GET_MODE (target);
6721 machine_mode inner_mode = GET_MODE_INNER (mode);
6722 int n_elts = GET_MODE_NUNITS (mode);
6723 bool all_same = true, all_regs = true, all_const_int = true;
6724 rtx x;
6725 int i;
6726
6727 for (i = 0; i < n_elts; ++i)
6728 {
6729 x = XVECEXP (vals, 0, i);
6730
6731 if (!CONST_INT_P (x))
6732 all_const_int = false;
6733
6734 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6735 all_same = false;
6736
6737 if (!REG_P (x))
6738 all_regs = false;
6739 }
6740
6741 /* Use vector gen mask or vector gen byte mask if possible. */
6742 if (all_same && all_const_int
6743 && (XVECEXP (vals, 0, 0) == const0_rtx
6744 || s390_contiguous_bitmask_vector_p (XVECEXP (vals, 0, 0),
6745 NULL, NULL)
6746 || s390_bytemask_vector_p (XVECEXP (vals, 0, 0), NULL)))
6747 {
6748 emit_insn (gen_rtx_SET (target,
6749 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))));
6750 return;
6751 }
6752
6753 /* Use vector replicate instructions. vlrep/vrepi/vrep */
6754 if (all_same)
6755 {
6756 rtx elem = XVECEXP (vals, 0, 0);
6757
6758 /* vec_splats accepts general_operand as source. */
6759 if (!general_operand (elem, GET_MODE (elem)))
6760 elem = force_reg (inner_mode, elem);
6761
6762 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, elem)));
6763 return;
6764 }
6765
6766 if (all_regs
6767 && REG_P (target)
6768 && n_elts == 2
6769 && GET_MODE_SIZE (inner_mode) == 8)
6770 {
6771 /* Use vector load pair. */
6772 emit_insn (gen_rtx_SET (target,
6773 gen_rtx_VEC_CONCAT (mode,
6774 XVECEXP (vals, 0, 0),
6775 XVECEXP (vals, 0, 1))));
6776 return;
6777 }
6778
6779 /* Use vector load logical element and zero. */
6780 if (TARGET_VXE && (mode == V4SImode || mode == V4SFmode))
6781 {
6782 bool found = true;
6783
6784 x = XVECEXP (vals, 0, 0);
6785 if (memory_operand (x, inner_mode))
6786 {
6787 for (i = 1; i < n_elts; ++i)
6788 found = found && XVECEXP (vals, 0, i) == const0_rtx;
6789
6790 if (found)
6791 {
6792 machine_mode half_mode = (inner_mode == SFmode
6793 ? V2SFmode : V2SImode);
6794 emit_insn (gen_rtx_SET (target,
6795 gen_rtx_VEC_CONCAT (mode,
6796 gen_rtx_VEC_CONCAT (half_mode,
6797 x,
6798 const0_rtx),
6799 gen_rtx_VEC_CONCAT (half_mode,
6800 const0_rtx,
6801 const0_rtx))));
6802 return;
6803 }
6804 }
6805 }
6806
6807 /* We are about to set the vector elements one by one. Zero out the
6808 full register first in order to help the data flow framework to
6809 detect it as full VR set. */
6810 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
6811
6812 /* Unfortunately the vec_init expander is not allowed to fail. So
6813 we have to implement the fallback ourselves. */
6814 for (i = 0; i < n_elts; i++)
6815 {
6816 rtx elem = XVECEXP (vals, 0, i);
6817 if (!general_operand (elem, GET_MODE (elem)))
6818 elem = force_reg (inner_mode, elem);
6819
6820 emit_insn (gen_rtx_SET (target,
6821 gen_rtx_UNSPEC (mode,
6822 gen_rtvec (3, elem,
6823 GEN_INT (i), target),
6824 UNSPEC_VEC_SET)));
6825 }
6826 }
6827
6828 /* Structure to hold the initial parameters for a compare_and_swap operation
6829 in HImode and QImode. */
6830
6831 struct alignment_context
6832 {
6833 rtx memsi; /* SI aligned memory location. */
6834 rtx shift; /* Bit offset with regard to lsb. */
6835 rtx modemask; /* Mask of the HQImode shifted by SHIFT bits. */
6836 rtx modemaski; /* ~modemask */
6837 bool aligned; /* True if memory is aligned, false else. */
6838 };
6839
6840 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
6841 structure AC for transparent simplifying, if the memory alignment is known
6842 to be at least 32bit. MEM is the memory location for the actual operation
6843 and MODE its mode. */
6844
6845 static void
init_alignment_context(struct alignment_context * ac,rtx mem,machine_mode mode)6846 init_alignment_context (struct alignment_context *ac, rtx mem,
6847 machine_mode mode)
6848 {
6849 ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode));
6850 ac->aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode));
6851
6852 if (ac->aligned)
6853 ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned. */
6854 else
6855 {
6856 /* Alignment is unknown. */
6857 rtx byteoffset, addr, align;
6858
6859 /* Force the address into a register. */
6860 addr = force_reg (Pmode, XEXP (mem, 0));
6861
6862 /* Align it to SImode. */
6863 align = expand_simple_binop (Pmode, AND, addr,
6864 GEN_INT (-GET_MODE_SIZE (SImode)),
6865 NULL_RTX, 1, OPTAB_DIRECT);
6866 /* Generate MEM. */
6867 ac->memsi = gen_rtx_MEM (SImode, align);
6868 MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem);
6869 set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER);
6870 set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode));
6871
6872 /* Calculate shiftcount. */
6873 byteoffset = expand_simple_binop (Pmode, AND, addr,
6874 GEN_INT (GET_MODE_SIZE (SImode) - 1),
6875 NULL_RTX, 1, OPTAB_DIRECT);
6876 /* As we already have some offset, evaluate the remaining distance. */
6877 ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
6878 NULL_RTX, 1, OPTAB_DIRECT);
6879 }
6880
6881 /* Shift is the byte count, but we need the bitcount. */
6882 ac->shift = expand_simple_binop (SImode, ASHIFT, ac->shift, GEN_INT (3),
6883 NULL_RTX, 1, OPTAB_DIRECT);
6884
6885 /* Calculate masks. */
6886 ac->modemask = expand_simple_binop (SImode, ASHIFT,
6887 GEN_INT (GET_MODE_MASK (mode)),
6888 ac->shift, NULL_RTX, 1, OPTAB_DIRECT);
6889 ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask,
6890 NULL_RTX, 1);
6891 }
6892
6893 /* A subroutine of s390_expand_cs_hqi. Insert INS into VAL. If possible,
6894 use a single insv insn into SEQ2. Otherwise, put prep insns in SEQ1 and
6895 perform the merge in SEQ2. */
6896
6897 static rtx
s390_two_part_insv(struct alignment_context * ac,rtx * seq1,rtx * seq2,machine_mode mode,rtx val,rtx ins)6898 s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
6899 machine_mode mode, rtx val, rtx ins)
6900 {
6901 rtx tmp;
6902
6903 if (ac->aligned)
6904 {
6905 start_sequence ();
6906 tmp = copy_to_mode_reg (SImode, val);
6907 if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)),
6908 const0_rtx, ins))
6909 {
6910 *seq1 = NULL;
6911 *seq2 = get_insns ();
6912 end_sequence ();
6913 return tmp;
6914 }
6915 end_sequence ();
6916 }
6917
6918 /* Failed to use insv. Generate a two part shift and mask. */
6919 start_sequence ();
6920 tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
6921 *seq1 = get_insns ();
6922 end_sequence ();
6923
6924 start_sequence ();
6925 tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
6926 *seq2 = get_insns ();
6927 end_sequence ();
6928
6929 return tmp;
6930 }
6931
6932 /* Expand an atomic compare and swap operation for HImode and QImode. MEM is
6933 the memory location, CMP the old value to compare MEM with and NEW_RTX the
6934 value to set if CMP == MEM. */
6935
6936 static void
s390_expand_cs_hqi(machine_mode mode,rtx btarget,rtx vtarget,rtx mem,rtx cmp,rtx new_rtx,bool is_weak)6937 s390_expand_cs_hqi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
6938 rtx cmp, rtx new_rtx, bool is_weak)
6939 {
6940 struct alignment_context ac;
6941 rtx cmpv, newv, val, cc, seq0, seq1, seq2, seq3;
6942 rtx res = gen_reg_rtx (SImode);
6943 rtx_code_label *csloop = NULL, *csend = NULL;
6944
6945 gcc_assert (MEM_P (mem));
6946
6947 init_alignment_context (&ac, mem, mode);
6948
6949 /* Load full word. Subsequent loads are performed by CS. */
6950 val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski,
6951 NULL_RTX, 1, OPTAB_DIRECT);
6952
6953 /* Prepare insertions of cmp and new_rtx into the loaded value. When
6954 possible, we try to use insv to make this happen efficiently. If
6955 that fails we'll generate code both inside and outside the loop. */
6956 cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp);
6957 newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx);
6958
6959 if (seq0)
6960 emit_insn (seq0);
6961 if (seq1)
6962 emit_insn (seq1);
6963
6964 /* Start CS loop. */
6965 if (!is_weak)
6966 {
6967 /* Begin assuming success. */
6968 emit_move_insn (btarget, const1_rtx);
6969
6970 csloop = gen_label_rtx ();
6971 csend = gen_label_rtx ();
6972 emit_label (csloop);
6973 }
6974
6975 /* val = "<mem>00..0<mem>"
6976 * cmp = "00..0<cmp>00..0"
6977 * new = "00..0<new>00..0"
6978 */
6979
6980 emit_insn (seq2);
6981 emit_insn (seq3);
6982
6983 cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv, CCZ1mode);
6984 if (is_weak)
6985 emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1)));
6986 else
6987 {
6988 rtx tmp;
6989
6990 /* Jump to end if we're done (likely?). */
6991 s390_emit_jump (csend, cc);
6992
6993 /* Check for changes outside mode, and loop internal if so.
6994 Arrange the moves so that the compare is adjacent to the
6995 branch so that we can generate CRJ. */
6996 tmp = copy_to_reg (val);
6997 force_expand_binop (SImode, and_optab, res, ac.modemaski, val,
6998 1, OPTAB_DIRECT);
6999 cc = s390_emit_compare (NE, val, tmp);
7000 s390_emit_jump (csloop, cc);
7001
7002 /* Failed. */
7003 emit_move_insn (btarget, const0_rtx);
7004 emit_label (csend);
7005 }
7006
7007 /* Return the correct part of the bitfield. */
7008 convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
7009 NULL_RTX, 1, OPTAB_DIRECT), 1);
7010 }
7011
7012 /* Variant of s390_expand_cs for SI, DI and TI modes. */
7013 static void
s390_expand_cs_tdsi(machine_mode mode,rtx btarget,rtx vtarget,rtx mem,rtx cmp,rtx new_rtx,bool is_weak)7014 s390_expand_cs_tdsi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
7015 rtx cmp, rtx new_rtx, bool is_weak)
7016 {
7017 rtx output = vtarget;
7018 rtx_code_label *skip_cs_label = NULL;
7019 bool do_const_opt = false;
7020
7021 if (!register_operand (output, mode))
7022 output = gen_reg_rtx (mode);
7023
7024 /* If IS_WEAK is true and the INPUT value is a constant, compare the memory
7025 with the constant first and skip the compare_and_swap because its very
7026 expensive and likely to fail anyway.
7027 Note 1: This is done only for IS_WEAK. C11 allows optimizations that may
7028 cause spurious in that case.
7029 Note 2: It may be useful to do this also for non-constant INPUT.
7030 Note 3: Currently only targets with "load on condition" are supported
7031 (z196 and newer). */
7032
7033 if (TARGET_Z196
7034 && (mode == SImode || mode == DImode))
7035 do_const_opt = (is_weak && CONST_INT_P (cmp));
7036
7037 if (do_const_opt)
7038 {
7039 rtx cc = gen_rtx_REG (CCZmode, CC_REGNUM);
7040
7041 skip_cs_label = gen_label_rtx ();
7042 emit_move_insn (btarget, const0_rtx);
7043 if (CONST_INT_P (cmp) && INTVAL (cmp) == 0)
7044 {
7045 rtvec lt = rtvec_alloc (2);
7046
7047 /* Load-and-test + conditional jump. */
7048 RTVEC_ELT (lt, 0)
7049 = gen_rtx_SET (cc, gen_rtx_COMPARE (CCZmode, mem, cmp));
7050 RTVEC_ELT (lt, 1) = gen_rtx_SET (output, mem);
7051 emit_insn (gen_rtx_PARALLEL (VOIDmode, lt));
7052 }
7053 else
7054 {
7055 emit_move_insn (output, mem);
7056 emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (CCZmode, output, cmp)));
7057 }
7058 s390_emit_jump (skip_cs_label, gen_rtx_NE (VOIDmode, cc, const0_rtx));
7059 add_reg_br_prob_note (get_last_insn (),
7060 profile_probability::very_unlikely ());
7061 /* If the jump is not taken, OUTPUT is the expected value. */
7062 cmp = output;
7063 /* Reload newval to a register manually, *after* the compare and jump
7064 above. Otherwise Reload might place it before the jump. */
7065 }
7066 else
7067 cmp = force_reg (mode, cmp);
7068 new_rtx = force_reg (mode, new_rtx);
7069 s390_emit_compare_and_swap (EQ, output, mem, cmp, new_rtx,
7070 (do_const_opt) ? CCZmode : CCZ1mode);
7071 if (skip_cs_label != NULL)
7072 emit_label (skip_cs_label);
7073
7074 /* We deliberately accept non-register operands in the predicate
7075 to ensure the write back to the output operand happens *before*
7076 the store-flags code below. This makes it easier for combine
7077 to merge the store-flags code with a potential test-and-branch
7078 pattern following (immediately!) afterwards. */
7079 if (output != vtarget)
7080 emit_move_insn (vtarget, output);
7081
7082 if (do_const_opt)
7083 {
7084 rtx cc, cond, ite;
7085
7086 /* Do not use gen_cstorecc4 here because it writes either 1 or 0, but
7087 btarget has already been initialized with 0 above. */
7088 cc = gen_rtx_REG (CCZmode, CC_REGNUM);
7089 cond = gen_rtx_EQ (VOIDmode, cc, const0_rtx);
7090 ite = gen_rtx_IF_THEN_ELSE (SImode, cond, const1_rtx, btarget);
7091 emit_insn (gen_rtx_SET (btarget, ite));
7092 }
7093 else
7094 {
7095 rtx cc, cond;
7096
7097 cc = gen_rtx_REG (CCZ1mode, CC_REGNUM);
7098 cond = gen_rtx_EQ (SImode, cc, const0_rtx);
7099 emit_insn (gen_cstorecc4 (btarget, cond, cc, const0_rtx));
7100 }
7101 }
7102
7103 /* Expand an atomic compare and swap operation. MEM is the memory location,
7104 CMP the old value to compare MEM with and NEW_RTX the value to set if
7105 CMP == MEM. */
7106
7107 void
s390_expand_cs(machine_mode mode,rtx btarget,rtx vtarget,rtx mem,rtx cmp,rtx new_rtx,bool is_weak)7108 s390_expand_cs (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
7109 rtx cmp, rtx new_rtx, bool is_weak)
7110 {
7111 switch (mode)
7112 {
7113 case E_TImode:
7114 case E_DImode:
7115 case E_SImode:
7116 s390_expand_cs_tdsi (mode, btarget, vtarget, mem, cmp, new_rtx, is_weak);
7117 break;
7118 case E_HImode:
7119 case E_QImode:
7120 s390_expand_cs_hqi (mode, btarget, vtarget, mem, cmp, new_rtx, is_weak);
7121 break;
7122 default:
7123 gcc_unreachable ();
7124 }
7125 }
7126
7127 /* Expand an atomic_exchange operation simulated with a compare-and-swap loop.
7128 The memory location MEM is set to INPUT. OUTPUT is set to the previous value
7129 of MEM. */
7130
7131 void
s390_expand_atomic_exchange_tdsi(rtx output,rtx mem,rtx input)7132 s390_expand_atomic_exchange_tdsi (rtx output, rtx mem, rtx input)
7133 {
7134 machine_mode mode = GET_MODE (mem);
7135 rtx_code_label *csloop;
7136
7137 if (TARGET_Z196
7138 && (mode == DImode || mode == SImode)
7139 && CONST_INT_P (input) && INTVAL (input) == 0)
7140 {
7141 emit_move_insn (output, const0_rtx);
7142 if (mode == DImode)
7143 emit_insn (gen_atomic_fetch_anddi (output, mem, const0_rtx, input));
7144 else
7145 emit_insn (gen_atomic_fetch_andsi (output, mem, const0_rtx, input));
7146 return;
7147 }
7148
7149 input = force_reg (mode, input);
7150 emit_move_insn (output, mem);
7151 csloop = gen_label_rtx ();
7152 emit_label (csloop);
7153 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, output, mem, output,
7154 input, CCZ1mode));
7155 }
7156
7157 /* Expand an atomic operation CODE of mode MODE. MEM is the memory location
7158 and VAL the value to play with. If AFTER is true then store the value
7159 MEM holds after the operation, if AFTER is false then store the value MEM
7160 holds before the operation. If TARGET is zero then discard that value, else
7161 store it to TARGET. */
7162
7163 void
s390_expand_atomic(machine_mode mode,enum rtx_code code,rtx target,rtx mem,rtx val,bool after)7164 s390_expand_atomic (machine_mode mode, enum rtx_code code,
7165 rtx target, rtx mem, rtx val, bool after)
7166 {
7167 struct alignment_context ac;
7168 rtx cmp;
7169 rtx new_rtx = gen_reg_rtx (SImode);
7170 rtx orig = gen_reg_rtx (SImode);
7171 rtx_code_label *csloop = gen_label_rtx ();
7172
7173 gcc_assert (!target || register_operand (target, VOIDmode));
7174 gcc_assert (MEM_P (mem));
7175
7176 init_alignment_context (&ac, mem, mode);
7177
7178 /* Shift val to the correct bit positions.
7179 Preserve "icm", but prevent "ex icm". */
7180 if (!(ac.aligned && code == SET && MEM_P (val)))
7181 val = s390_expand_mask_and_shift (val, mode, ac.shift);
7182
7183 /* Further preparation insns. */
7184 if (code == PLUS || code == MINUS)
7185 emit_move_insn (orig, val);
7186 else if (code == MULT || code == AND) /* val = "11..1<val>11..1" */
7187 val = expand_simple_binop (SImode, XOR, val, ac.modemaski,
7188 NULL_RTX, 1, OPTAB_DIRECT);
7189
7190 /* Load full word. Subsequent loads are performed by CS. */
7191 cmp = force_reg (SImode, ac.memsi);
7192
7193 /* Start CS loop. */
7194 emit_label (csloop);
7195 emit_move_insn (new_rtx, cmp);
7196
7197 /* Patch new with val at correct position. */
7198 switch (code)
7199 {
7200 case PLUS:
7201 case MINUS:
7202 val = expand_simple_binop (SImode, code, new_rtx, orig,
7203 NULL_RTX, 1, OPTAB_DIRECT);
7204 val = expand_simple_binop (SImode, AND, val, ac.modemask,
7205 NULL_RTX, 1, OPTAB_DIRECT);
7206 /* FALLTHRU */
7207 case SET:
7208 if (ac.aligned && MEM_P (val))
7209 store_bit_field (new_rtx, GET_MODE_BITSIZE (mode), 0,
7210 0, 0, SImode, val, false);
7211 else
7212 {
7213 new_rtx = expand_simple_binop (SImode, AND, new_rtx, ac.modemaski,
7214 NULL_RTX, 1, OPTAB_DIRECT);
7215 new_rtx = expand_simple_binop (SImode, IOR, new_rtx, val,
7216 NULL_RTX, 1, OPTAB_DIRECT);
7217 }
7218 break;
7219 case AND:
7220 case IOR:
7221 case XOR:
7222 new_rtx = expand_simple_binop (SImode, code, new_rtx, val,
7223 NULL_RTX, 1, OPTAB_DIRECT);
7224 break;
7225 case MULT: /* NAND */
7226 new_rtx = expand_simple_binop (SImode, AND, new_rtx, val,
7227 NULL_RTX, 1, OPTAB_DIRECT);
7228 new_rtx = expand_simple_binop (SImode, XOR, new_rtx, ac.modemask,
7229 NULL_RTX, 1, OPTAB_DIRECT);
7230 break;
7231 default:
7232 gcc_unreachable ();
7233 }
7234
7235 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, cmp,
7236 ac.memsi, cmp, new_rtx,
7237 CCZ1mode));
7238
7239 /* Return the correct part of the bitfield. */
7240 if (target)
7241 convert_move (target, expand_simple_binop (SImode, LSHIFTRT,
7242 after ? new_rtx : cmp, ac.shift,
7243 NULL_RTX, 1, OPTAB_DIRECT), 1);
7244 }
7245
7246 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7247 We need to emit DTP-relative relocations. */
7248
7249 static void s390_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
7250
7251 static void
s390_output_dwarf_dtprel(FILE * file,int size,rtx x)7252 s390_output_dwarf_dtprel (FILE *file, int size, rtx x)
7253 {
7254 switch (size)
7255 {
7256 case 4:
7257 fputs ("\t.long\t", file);
7258 break;
7259 case 8:
7260 fputs ("\t.quad\t", file);
7261 break;
7262 default:
7263 gcc_unreachable ();
7264 }
7265 output_addr_const (file, x);
7266 fputs ("@DTPOFF", file);
7267 }
7268
7269 /* Return the proper mode for REGNO being represented in the dwarf
7270 unwind table. */
7271 machine_mode
s390_dwarf_frame_reg_mode(int regno)7272 s390_dwarf_frame_reg_mode (int regno)
7273 {
7274 machine_mode save_mode = default_dwarf_frame_reg_mode (regno);
7275
7276 /* Make sure not to return DImode for any GPR with -m31 -mzarch. */
7277 if (GENERAL_REGNO_P (regno))
7278 save_mode = Pmode;
7279
7280 /* The rightmost 64 bits of vector registers are call-clobbered. */
7281 if (GET_MODE_SIZE (save_mode) > 8)
7282 save_mode = DImode;
7283
7284 return save_mode;
7285 }
7286
7287 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
7288 /* Implement TARGET_MANGLE_TYPE. */
7289
7290 static const char *
s390_mangle_type(const_tree type)7291 s390_mangle_type (const_tree type)
7292 {
7293 type = TYPE_MAIN_VARIANT (type);
7294
7295 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
7296 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
7297 return NULL;
7298
7299 if (type == s390_builtin_types[BT_BV16QI]) return "U6__boolc";
7300 if (type == s390_builtin_types[BT_BV8HI]) return "U6__bools";
7301 if (type == s390_builtin_types[BT_BV4SI]) return "U6__booli";
7302 if (type == s390_builtin_types[BT_BV2DI]) return "U6__booll";
7303
7304 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
7305 && TARGET_LONG_DOUBLE_128)
7306 return "g";
7307
7308 /* For all other types, use normal C++ mangling. */
7309 return NULL;
7310 }
7311 #endif
7312
7313 /* In the name of slightly smaller debug output, and to cater to
7314 general assembler lossage, recognize various UNSPEC sequences
7315 and turn them back into a direct symbol reference. */
7316
7317 static rtx
s390_delegitimize_address(rtx orig_x)7318 s390_delegitimize_address (rtx orig_x)
7319 {
7320 rtx x, y;
7321
7322 orig_x = delegitimize_mem_from_attrs (orig_x);
7323 x = orig_x;
7324
7325 /* Extract the symbol ref from:
7326 (plus:SI (reg:SI 12 %r12)
7327 (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
7328 UNSPEC_GOTOFF/PLTOFF)))
7329 and
7330 (plus:SI (reg:SI 12 %r12)
7331 (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
7332 UNSPEC_GOTOFF/PLTOFF)
7333 (const_int 4 [0x4])))) */
7334 if (GET_CODE (x) == PLUS
7335 && REG_P (XEXP (x, 0))
7336 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
7337 && GET_CODE (XEXP (x, 1)) == CONST)
7338 {
7339 HOST_WIDE_INT offset = 0;
7340
7341 /* The const operand. */
7342 y = XEXP (XEXP (x, 1), 0);
7343
7344 if (GET_CODE (y) == PLUS
7345 && GET_CODE (XEXP (y, 1)) == CONST_INT)
7346 {
7347 offset = INTVAL (XEXP (y, 1));
7348 y = XEXP (y, 0);
7349 }
7350
7351 if (GET_CODE (y) == UNSPEC
7352 && (XINT (y, 1) == UNSPEC_GOTOFF
7353 || XINT (y, 1) == UNSPEC_PLTOFF))
7354 return plus_constant (Pmode, XVECEXP (y, 0, 0), offset);
7355 }
7356
7357 if (GET_CODE (x) != MEM)
7358 return orig_x;
7359
7360 x = XEXP (x, 0);
7361 if (GET_CODE (x) == PLUS
7362 && GET_CODE (XEXP (x, 1)) == CONST
7363 && GET_CODE (XEXP (x, 0)) == REG
7364 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7365 {
7366 y = XEXP (XEXP (x, 1), 0);
7367 if (GET_CODE (y) == UNSPEC
7368 && XINT (y, 1) == UNSPEC_GOT)
7369 y = XVECEXP (y, 0, 0);
7370 else
7371 return orig_x;
7372 }
7373 else if (GET_CODE (x) == CONST)
7374 {
7375 /* Extract the symbol ref from:
7376 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
7377 UNSPEC_PLT/GOTENT))) */
7378
7379 y = XEXP (x, 0);
7380 if (GET_CODE (y) == UNSPEC
7381 && (XINT (y, 1) == UNSPEC_GOTENT
7382 || XINT (y, 1) == UNSPEC_PLT))
7383 y = XVECEXP (y, 0, 0);
7384 else
7385 return orig_x;
7386 }
7387 else
7388 return orig_x;
7389
7390 if (GET_MODE (orig_x) != Pmode)
7391 {
7392 if (GET_MODE (orig_x) == BLKmode)
7393 return orig_x;
7394 y = lowpart_subreg (GET_MODE (orig_x), y, Pmode);
7395 if (y == NULL_RTX)
7396 return orig_x;
7397 }
7398 return y;
7399 }
7400
7401 /* Output operand OP to stdio stream FILE.
7402 OP is an address (register + offset) which is not used to address data;
7403 instead the rightmost bits are interpreted as the value. */
7404
7405 static void
print_addrstyle_operand(FILE * file,rtx op)7406 print_addrstyle_operand (FILE *file, rtx op)
7407 {
7408 HOST_WIDE_INT offset;
7409 rtx base;
7410
7411 /* Extract base register and offset. */
7412 if (!s390_decompose_addrstyle_without_index (op, &base, &offset))
7413 gcc_unreachable ();
7414
7415 /* Sanity check. */
7416 if (base)
7417 {
7418 gcc_assert (GET_CODE (base) == REG);
7419 gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER);
7420 gcc_assert (REGNO_REG_CLASS (REGNO (base)) == ADDR_REGS);
7421 }
7422
7423 /* Offsets are constricted to twelve bits. */
7424 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset & ((1 << 12) - 1));
7425 if (base)
7426 fprintf (file, "(%s)", reg_names[REGNO (base)]);
7427 }
7428
7429 /* Assigns the number of NOP halfwords to be emitted before and after the
7430 function label to *HW_BEFORE and *HW_AFTER. Both pointers must not be NULL.
7431 If hotpatching is disabled for the function, the values are set to zero.
7432 */
7433
7434 static void
s390_function_num_hotpatch_hw(tree decl,int * hw_before,int * hw_after)7435 s390_function_num_hotpatch_hw (tree decl,
7436 int *hw_before,
7437 int *hw_after)
7438 {
7439 tree attr;
7440
7441 attr = lookup_attribute ("hotpatch", DECL_ATTRIBUTES (decl));
7442
7443 /* Handle the arguments of the hotpatch attribute. The values
7444 specified via attribute might override the cmdline argument
7445 values. */
7446 if (attr)
7447 {
7448 tree args = TREE_VALUE (attr);
7449
7450 *hw_before = TREE_INT_CST_LOW (TREE_VALUE (args));
7451 *hw_after = TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (args)));
7452 }
7453 else
7454 {
7455 /* Use the values specified by the cmdline arguments. */
7456 *hw_before = s390_hotpatch_hw_before_label;
7457 *hw_after = s390_hotpatch_hw_after_label;
7458 }
7459 }
7460
7461 /* Write the current .machine and .machinemode specification to the assembler
7462 file. */
7463
7464 #ifdef HAVE_AS_MACHINE_MACHINEMODE
7465 static void
s390_asm_output_machine_for_arch(FILE * asm_out_file)7466 s390_asm_output_machine_for_arch (FILE *asm_out_file)
7467 {
7468 fprintf (asm_out_file, "\t.machinemode %s\n",
7469 (TARGET_ZARCH) ? "zarch" : "esa");
7470 fprintf (asm_out_file, "\t.machine \"%s",
7471 processor_table[s390_arch].binutils_name);
7472 if (S390_USE_ARCHITECTURE_MODIFIERS)
7473 {
7474 int cpu_flags;
7475
7476 cpu_flags = processor_flags_table[(int) s390_arch];
7477 if (TARGET_HTM && !(cpu_flags & PF_TX))
7478 fprintf (asm_out_file, "+htm");
7479 else if (!TARGET_HTM && (cpu_flags & PF_TX))
7480 fprintf (asm_out_file, "+nohtm");
7481 if (TARGET_VX && !(cpu_flags & PF_VX))
7482 fprintf (asm_out_file, "+vx");
7483 else if (!TARGET_VX && (cpu_flags & PF_VX))
7484 fprintf (asm_out_file, "+novx");
7485 }
7486 fprintf (asm_out_file, "\"\n");
7487 }
7488
7489 /* Write an extra function header before the very start of the function. */
7490
7491 void
s390_asm_output_function_prefix(FILE * asm_out_file,const char * fnname ATTRIBUTE_UNUSED)7492 s390_asm_output_function_prefix (FILE *asm_out_file,
7493 const char *fnname ATTRIBUTE_UNUSED)
7494 {
7495 if (DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl) == NULL)
7496 return;
7497 /* Since only the function specific options are saved but not the indications
7498 which options are set, it's too much work here to figure out which options
7499 have actually changed. Thus, generate .machine and .machinemode whenever a
7500 function has the target attribute or pragma. */
7501 fprintf (asm_out_file, "\t.machinemode push\n");
7502 fprintf (asm_out_file, "\t.machine push\n");
7503 s390_asm_output_machine_for_arch (asm_out_file);
7504 }
7505
7506 /* Write an extra function footer after the very end of the function. */
7507
7508 void
s390_asm_declare_function_size(FILE * asm_out_file,const char * fnname,tree decl)7509 s390_asm_declare_function_size (FILE *asm_out_file,
7510 const char *fnname, tree decl)
7511 {
7512 if (!flag_inhibit_size_directive)
7513 ASM_OUTPUT_MEASURED_SIZE (asm_out_file, fnname);
7514 if (DECL_FUNCTION_SPECIFIC_TARGET (decl) == NULL)
7515 return;
7516 fprintf (asm_out_file, "\t.machine pop\n");
7517 fprintf (asm_out_file, "\t.machinemode pop\n");
7518 }
7519 #endif
7520
7521 /* Write the extra assembler code needed to declare a function properly. */
7522
7523 void
s390_asm_output_function_label(FILE * asm_out_file,const char * fname,tree decl)7524 s390_asm_output_function_label (FILE *asm_out_file, const char *fname,
7525 tree decl)
7526 {
7527 int hw_before, hw_after;
7528
7529 s390_function_num_hotpatch_hw (decl, &hw_before, &hw_after);
7530 if (hw_before > 0)
7531 {
7532 unsigned int function_alignment;
7533 int i;
7534
7535 /* Add a trampoline code area before the function label and initialize it
7536 with two-byte nop instructions. This area can be overwritten with code
7537 that jumps to a patched version of the function. */
7538 asm_fprintf (asm_out_file, "\tnopr\t%%r0"
7539 "\t# pre-label NOPs for hotpatch (%d halfwords)\n",
7540 hw_before);
7541 for (i = 1; i < hw_before; i++)
7542 fputs ("\tnopr\t%r0\n", asm_out_file);
7543
7544 /* Note: The function label must be aligned so that (a) the bytes of the
7545 following nop do not cross a cacheline boundary, and (b) a jump address
7546 (eight bytes for 64 bit targets, 4 bytes for 32 bit targets) can be
7547 stored directly before the label without crossing a cacheline
7548 boundary. All this is necessary to make sure the trampoline code can
7549 be changed atomically.
7550 This alignment is done automatically using the FOUNCTION_BOUNDARY, but
7551 if there are NOPs before the function label, the alignment is placed
7552 before them. So it is necessary to duplicate the alignment after the
7553 NOPs. */
7554 function_alignment = MAX (8, DECL_ALIGN (decl) / BITS_PER_UNIT);
7555 if (! DECL_USER_ALIGN (decl))
7556 function_alignment = MAX (function_alignment,
7557 (unsigned int) align_functions);
7558 fputs ("\t# alignment for hotpatch\n", asm_out_file);
7559 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (function_alignment));
7560 }
7561
7562 if (S390_USE_TARGET_ATTRIBUTE && TARGET_DEBUG_ARG)
7563 {
7564 asm_fprintf (asm_out_file, "\t# fn:%s ar%d\n", fname, s390_arch);
7565 asm_fprintf (asm_out_file, "\t# fn:%s tu%d\n", fname, s390_tune);
7566 asm_fprintf (asm_out_file, "\t# fn:%s sg%d\n", fname, s390_stack_guard);
7567 asm_fprintf (asm_out_file, "\t# fn:%s ss%d\n", fname, s390_stack_size);
7568 asm_fprintf (asm_out_file, "\t# fn:%s bc%d\n", fname, s390_branch_cost);
7569 asm_fprintf (asm_out_file, "\t# fn:%s wf%d\n", fname,
7570 s390_warn_framesize);
7571 asm_fprintf (asm_out_file, "\t# fn:%s ba%d\n", fname, TARGET_BACKCHAIN);
7572 asm_fprintf (asm_out_file, "\t# fn:%s hd%d\n", fname, TARGET_HARD_DFP);
7573 asm_fprintf (asm_out_file, "\t# fn:%s hf%d\n", fname, !TARGET_SOFT_FLOAT);
7574 asm_fprintf (asm_out_file, "\t# fn:%s ht%d\n", fname, TARGET_OPT_HTM);
7575 asm_fprintf (asm_out_file, "\t# fn:%s vx%d\n", fname, TARGET_OPT_VX);
7576 asm_fprintf (asm_out_file, "\t# fn:%s ps%d\n", fname,
7577 TARGET_PACKED_STACK);
7578 asm_fprintf (asm_out_file, "\t# fn:%s se%d\n", fname, TARGET_SMALL_EXEC);
7579 asm_fprintf (asm_out_file, "\t# fn:%s mv%d\n", fname, TARGET_MVCLE);
7580 asm_fprintf (asm_out_file, "\t# fn:%s zv%d\n", fname, TARGET_ZVECTOR);
7581 asm_fprintf (asm_out_file, "\t# fn:%s wd%d\n", fname,
7582 s390_warn_dynamicstack_p);
7583 }
7584 ASM_OUTPUT_LABEL (asm_out_file, fname);
7585 if (hw_after > 0)
7586 asm_fprintf (asm_out_file,
7587 "\t# post-label NOPs for hotpatch (%d halfwords)\n",
7588 hw_after);
7589 }
7590
7591 /* Output machine-dependent UNSPECs occurring in address constant X
7592 in assembler syntax to stdio stream FILE. Returns true if the
7593 constant X could be recognized, false otherwise. */
7594
7595 static bool
s390_output_addr_const_extra(FILE * file,rtx x)7596 s390_output_addr_const_extra (FILE *file, rtx x)
7597 {
7598 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1)
7599 switch (XINT (x, 1))
7600 {
7601 case UNSPEC_GOTENT:
7602 output_addr_const (file, XVECEXP (x, 0, 0));
7603 fprintf (file, "@GOTENT");
7604 return true;
7605 case UNSPEC_GOT:
7606 output_addr_const (file, XVECEXP (x, 0, 0));
7607 fprintf (file, "@GOT");
7608 return true;
7609 case UNSPEC_GOTOFF:
7610 output_addr_const (file, XVECEXP (x, 0, 0));
7611 fprintf (file, "@GOTOFF");
7612 return true;
7613 case UNSPEC_PLT:
7614 output_addr_const (file, XVECEXP (x, 0, 0));
7615 fprintf (file, "@PLT");
7616 return true;
7617 case UNSPEC_PLTOFF:
7618 output_addr_const (file, XVECEXP (x, 0, 0));
7619 fprintf (file, "@PLTOFF");
7620 return true;
7621 case UNSPEC_TLSGD:
7622 output_addr_const (file, XVECEXP (x, 0, 0));
7623 fprintf (file, "@TLSGD");
7624 return true;
7625 case UNSPEC_TLSLDM:
7626 assemble_name (file, get_some_local_dynamic_name ());
7627 fprintf (file, "@TLSLDM");
7628 return true;
7629 case UNSPEC_DTPOFF:
7630 output_addr_const (file, XVECEXP (x, 0, 0));
7631 fprintf (file, "@DTPOFF");
7632 return true;
7633 case UNSPEC_NTPOFF:
7634 output_addr_const (file, XVECEXP (x, 0, 0));
7635 fprintf (file, "@NTPOFF");
7636 return true;
7637 case UNSPEC_GOTNTPOFF:
7638 output_addr_const (file, XVECEXP (x, 0, 0));
7639 fprintf (file, "@GOTNTPOFF");
7640 return true;
7641 case UNSPEC_INDNTPOFF:
7642 output_addr_const (file, XVECEXP (x, 0, 0));
7643 fprintf (file, "@INDNTPOFF");
7644 return true;
7645 }
7646
7647 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 2)
7648 switch (XINT (x, 1))
7649 {
7650 case UNSPEC_POOL_OFFSET:
7651 x = gen_rtx_MINUS (GET_MODE (x), XVECEXP (x, 0, 0), XVECEXP (x, 0, 1));
7652 output_addr_const (file, x);
7653 return true;
7654 }
7655 return false;
7656 }
7657
7658 /* Output address operand ADDR in assembler syntax to
7659 stdio stream FILE. */
7660
7661 void
print_operand_address(FILE * file,rtx addr)7662 print_operand_address (FILE *file, rtx addr)
7663 {
7664 struct s390_address ad;
7665 memset (&ad, 0, sizeof (s390_address));
7666
7667 if (s390_loadrelative_operand_p (addr, NULL, NULL))
7668 {
7669 if (!TARGET_Z10)
7670 {
7671 output_operand_lossage ("symbolic memory references are "
7672 "only supported on z10 or later");
7673 return;
7674 }
7675 output_addr_const (file, addr);
7676 return;
7677 }
7678
7679 if (!s390_decompose_address (addr, &ad)
7680 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7681 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
7682 output_operand_lossage ("cannot decompose address");
7683
7684 if (ad.disp)
7685 output_addr_const (file, ad.disp);
7686 else
7687 fprintf (file, "0");
7688
7689 if (ad.base && ad.indx)
7690 fprintf (file, "(%s,%s)", reg_names[REGNO (ad.indx)],
7691 reg_names[REGNO (ad.base)]);
7692 else if (ad.base)
7693 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7694 }
7695
7696 /* Output operand X in assembler syntax to stdio stream FILE.
7697 CODE specified the format flag. The following format flags
7698 are recognized:
7699
7700 'C': print opcode suffix for branch condition.
7701 'D': print opcode suffix for inverse branch condition.
7702 'E': print opcode suffix for branch on index instruction.
7703 'G': print the size of the operand in bytes.
7704 'J': print tls_load/tls_gdcall/tls_ldcall suffix
7705 'M': print the second word of a TImode operand.
7706 'N': print the second word of a DImode operand.
7707 'O': print only the displacement of a memory reference or address.
7708 'R': print only the base register of a memory reference or address.
7709 'S': print S-type memory reference (base+displacement).
7710 'Y': print address style operand without index (e.g. shift count or setmem
7711 operand).
7712
7713 'b': print integer X as if it's an unsigned byte.
7714 'c': print integer X as if it's an signed byte.
7715 'e': "end" contiguous bitmask X in either DImode or vector inner mode.
7716 'f': "end" contiguous bitmask X in SImode.
7717 'h': print integer X as if it's a signed halfword.
7718 'i': print the first nonzero HImode part of X.
7719 'j': print the first HImode part unequal to -1 of X.
7720 'k': print the first nonzero SImode part of X.
7721 'm': print the first SImode part unequal to -1 of X.
7722 'o': print integer X as if it's an unsigned 32bit word.
7723 's': "start" of contiguous bitmask X in either DImode or vector inner mode.
7724 't': CONST_INT: "start" of contiguous bitmask X in SImode.
7725 CONST_VECTOR: Generate a bitmask for vgbm instruction.
7726 'x': print integer X as if it's an unsigned halfword.
7727 'v': print register number as vector register (v1 instead of f1).
7728 */
7729
7730 void
print_operand(FILE * file,rtx x,int code)7731 print_operand (FILE *file, rtx x, int code)
7732 {
7733 HOST_WIDE_INT ival;
7734
7735 switch (code)
7736 {
7737 case 'C':
7738 fprintf (file, s390_branch_condition_mnemonic (x, FALSE));
7739 return;
7740
7741 case 'D':
7742 fprintf (file, s390_branch_condition_mnemonic (x, TRUE));
7743 return;
7744
7745 case 'E':
7746 if (GET_CODE (x) == LE)
7747 fprintf (file, "l");
7748 else if (GET_CODE (x) == GT)
7749 fprintf (file, "h");
7750 else
7751 output_operand_lossage ("invalid comparison operator "
7752 "for 'E' output modifier");
7753 return;
7754
7755 case 'J':
7756 if (GET_CODE (x) == SYMBOL_REF)
7757 {
7758 fprintf (file, "%s", ":tls_load:");
7759 output_addr_const (file, x);
7760 }
7761 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
7762 {
7763 fprintf (file, "%s", ":tls_gdcall:");
7764 output_addr_const (file, XVECEXP (x, 0, 0));
7765 }
7766 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM)
7767 {
7768 fprintf (file, "%s", ":tls_ldcall:");
7769 const char *name = get_some_local_dynamic_name ();
7770 gcc_assert (name);
7771 assemble_name (file, name);
7772 }
7773 else
7774 output_operand_lossage ("invalid reference for 'J' output modifier");
7775 return;
7776
7777 case 'G':
7778 fprintf (file, "%u", GET_MODE_SIZE (GET_MODE (x)));
7779 return;
7780
7781 case 'O':
7782 {
7783 struct s390_address ad;
7784 int ret;
7785
7786 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7787
7788 if (!ret
7789 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7790 || ad.indx)
7791 {
7792 output_operand_lossage ("invalid address for 'O' output modifier");
7793 return;
7794 }
7795
7796 if (ad.disp)
7797 output_addr_const (file, ad.disp);
7798 else
7799 fprintf (file, "0");
7800 }
7801 return;
7802
7803 case 'R':
7804 {
7805 struct s390_address ad;
7806 int ret;
7807
7808 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7809
7810 if (!ret
7811 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7812 || ad.indx)
7813 {
7814 output_operand_lossage ("invalid address for 'R' output modifier");
7815 return;
7816 }
7817
7818 if (ad.base)
7819 fprintf (file, "%s", reg_names[REGNO (ad.base)]);
7820 else
7821 fprintf (file, "0");
7822 }
7823 return;
7824
7825 case 'S':
7826 {
7827 struct s390_address ad;
7828 int ret;
7829
7830 if (!MEM_P (x))
7831 {
7832 output_operand_lossage ("memory reference expected for "
7833 "'S' output modifier");
7834 return;
7835 }
7836 ret = s390_decompose_address (XEXP (x, 0), &ad);
7837
7838 if (!ret
7839 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7840 || ad.indx)
7841 {
7842 output_operand_lossage ("invalid address for 'S' output modifier");
7843 return;
7844 }
7845
7846 if (ad.disp)
7847 output_addr_const (file, ad.disp);
7848 else
7849 fprintf (file, "0");
7850
7851 if (ad.base)
7852 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7853 }
7854 return;
7855
7856 case 'N':
7857 if (GET_CODE (x) == REG)
7858 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7859 else if (GET_CODE (x) == MEM)
7860 x = change_address (x, VOIDmode,
7861 plus_constant (Pmode, XEXP (x, 0), 4));
7862 else
7863 output_operand_lossage ("register or memory expression expected "
7864 "for 'N' output modifier");
7865 break;
7866
7867 case 'M':
7868 if (GET_CODE (x) == REG)
7869 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7870 else if (GET_CODE (x) == MEM)
7871 x = change_address (x, VOIDmode,
7872 plus_constant (Pmode, XEXP (x, 0), 8));
7873 else
7874 output_operand_lossage ("register or memory expression expected "
7875 "for 'M' output modifier");
7876 break;
7877
7878 case 'Y':
7879 print_addrstyle_operand (file, x);
7880 return;
7881 }
7882
7883 switch (GET_CODE (x))
7884 {
7885 case REG:
7886 /* Print FP regs as fx instead of vx when they are accessed
7887 through non-vector mode. */
7888 if (code == 'v'
7889 || VECTOR_NOFP_REG_P (x)
7890 || (FP_REG_P (x) && VECTOR_MODE_P (GET_MODE (x)))
7891 || (VECTOR_REG_P (x)
7892 && (GET_MODE_SIZE (GET_MODE (x)) /
7893 s390_class_max_nregs (FP_REGS, GET_MODE (x))) > 8))
7894 fprintf (file, "%%v%s", reg_names[REGNO (x)] + 2);
7895 else
7896 fprintf (file, "%s", reg_names[REGNO (x)]);
7897 break;
7898
7899 case MEM:
7900 output_address (GET_MODE (x), XEXP (x, 0));
7901 break;
7902
7903 case CONST:
7904 case CODE_LABEL:
7905 case LABEL_REF:
7906 case SYMBOL_REF:
7907 output_addr_const (file, x);
7908 break;
7909
7910 case CONST_INT:
7911 ival = INTVAL (x);
7912 switch (code)
7913 {
7914 case 0:
7915 break;
7916 case 'b':
7917 ival &= 0xff;
7918 break;
7919 case 'c':
7920 ival = ((ival & 0xff) ^ 0x80) - 0x80;
7921 break;
7922 case 'x':
7923 ival &= 0xffff;
7924 break;
7925 case 'h':
7926 ival = ((ival & 0xffff) ^ 0x8000) - 0x8000;
7927 break;
7928 case 'i':
7929 ival = s390_extract_part (x, HImode, 0);
7930 break;
7931 case 'j':
7932 ival = s390_extract_part (x, HImode, -1);
7933 break;
7934 case 'k':
7935 ival = s390_extract_part (x, SImode, 0);
7936 break;
7937 case 'm':
7938 ival = s390_extract_part (x, SImode, -1);
7939 break;
7940 case 'o':
7941 ival &= 0xffffffff;
7942 break;
7943 case 'e': case 'f':
7944 case 's': case 't':
7945 {
7946 int start, end;
7947 int len;
7948 bool ok;
7949
7950 len = (code == 's' || code == 'e' ? 64 : 32);
7951 ok = s390_contiguous_bitmask_p (ival, true, len, &start, &end);
7952 gcc_assert (ok);
7953 if (code == 's' || code == 't')
7954 ival = start;
7955 else
7956 ival = end;
7957 }
7958 break;
7959 default:
7960 output_operand_lossage ("invalid constant for output modifier '%c'", code);
7961 }
7962 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
7963 break;
7964
7965 case CONST_WIDE_INT:
7966 if (code == 'b')
7967 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7968 CONST_WIDE_INT_ELT (x, 0) & 0xff);
7969 else if (code == 'x')
7970 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7971 CONST_WIDE_INT_ELT (x, 0) & 0xffff);
7972 else if (code == 'h')
7973 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7974 ((CONST_WIDE_INT_ELT (x, 0) & 0xffff) ^ 0x8000) - 0x8000);
7975 else
7976 {
7977 if (code == 0)
7978 output_operand_lossage ("invalid constant - try using "
7979 "an output modifier");
7980 else
7981 output_operand_lossage ("invalid constant for output modifier '%c'",
7982 code);
7983 }
7984 break;
7985 case CONST_VECTOR:
7986 switch (code)
7987 {
7988 case 'h':
7989 gcc_assert (const_vec_duplicate_p (x));
7990 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7991 ((INTVAL (XVECEXP (x, 0, 0)) & 0xffff) ^ 0x8000) - 0x8000);
7992 break;
7993 case 'e':
7994 case 's':
7995 {
7996 int start, end;
7997 bool ok;
7998
7999 ok = s390_contiguous_bitmask_vector_p (x, &start, &end);
8000 gcc_assert (ok);
8001 ival = (code == 's') ? start : end;
8002 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
8003 }
8004 break;
8005 case 't':
8006 {
8007 unsigned mask;
8008 bool ok = s390_bytemask_vector_p (x, &mask);
8009 gcc_assert (ok);
8010 fprintf (file, "%u", mask);
8011 }
8012 break;
8013
8014 default:
8015 output_operand_lossage ("invalid constant vector for output "
8016 "modifier '%c'", code);
8017 }
8018 break;
8019
8020 default:
8021 if (code == 0)
8022 output_operand_lossage ("invalid expression - try using "
8023 "an output modifier");
8024 else
8025 output_operand_lossage ("invalid expression for output "
8026 "modifier '%c'", code);
8027 break;
8028 }
8029 }
8030
8031 /* Target hook for assembling integer objects. We need to define it
8032 here to work a round a bug in some versions of GAS, which couldn't
8033 handle values smaller than INT_MIN when printed in decimal. */
8034
8035 static bool
s390_assemble_integer(rtx x,unsigned int size,int aligned_p)8036 s390_assemble_integer (rtx x, unsigned int size, int aligned_p)
8037 {
8038 if (size == 8 && aligned_p
8039 && GET_CODE (x) == CONST_INT && INTVAL (x) < INT_MIN)
8040 {
8041 fprintf (asm_out_file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n",
8042 INTVAL (x));
8043 return true;
8044 }
8045 return default_assemble_integer (x, size, aligned_p);
8046 }
8047
8048 /* Returns true if register REGNO is used for forming
8049 a memory address in expression X. */
8050
8051 static bool
reg_used_in_mem_p(int regno,rtx x)8052 reg_used_in_mem_p (int regno, rtx x)
8053 {
8054 enum rtx_code code = GET_CODE (x);
8055 int i, j;
8056 const char *fmt;
8057
8058 if (code == MEM)
8059 {
8060 if (refers_to_regno_p (regno, XEXP (x, 0)))
8061 return true;
8062 }
8063 else if (code == SET
8064 && GET_CODE (SET_DEST (x)) == PC)
8065 {
8066 if (refers_to_regno_p (regno, SET_SRC (x)))
8067 return true;
8068 }
8069
8070 fmt = GET_RTX_FORMAT (code);
8071 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8072 {
8073 if (fmt[i] == 'e'
8074 && reg_used_in_mem_p (regno, XEXP (x, i)))
8075 return true;
8076
8077 else if (fmt[i] == 'E')
8078 for (j = 0; j < XVECLEN (x, i); j++)
8079 if (reg_used_in_mem_p (regno, XVECEXP (x, i, j)))
8080 return true;
8081 }
8082 return false;
8083 }
8084
8085 /* Returns true if expression DEP_RTX sets an address register
8086 used by instruction INSN to address memory. */
8087
8088 static bool
addr_generation_dependency_p(rtx dep_rtx,rtx_insn * insn)8089 addr_generation_dependency_p (rtx dep_rtx, rtx_insn *insn)
8090 {
8091 rtx target, pat;
8092
8093 if (NONJUMP_INSN_P (dep_rtx))
8094 dep_rtx = PATTERN (dep_rtx);
8095
8096 if (GET_CODE (dep_rtx) == SET)
8097 {
8098 target = SET_DEST (dep_rtx);
8099 if (GET_CODE (target) == STRICT_LOW_PART)
8100 target = XEXP (target, 0);
8101 while (GET_CODE (target) == SUBREG)
8102 target = SUBREG_REG (target);
8103
8104 if (GET_CODE (target) == REG)
8105 {
8106 int regno = REGNO (target);
8107
8108 if (s390_safe_attr_type (insn) == TYPE_LA)
8109 {
8110 pat = PATTERN (insn);
8111 if (GET_CODE (pat) == PARALLEL)
8112 {
8113 gcc_assert (XVECLEN (pat, 0) == 2);
8114 pat = XVECEXP (pat, 0, 0);
8115 }
8116 gcc_assert (GET_CODE (pat) == SET);
8117 return refers_to_regno_p (regno, SET_SRC (pat));
8118 }
8119 else if (get_attr_atype (insn) == ATYPE_AGEN)
8120 return reg_used_in_mem_p (regno, PATTERN (insn));
8121 }
8122 }
8123 return false;
8124 }
8125
8126 /* Return 1, if dep_insn sets register used in insn in the agen unit. */
8127
8128 int
s390_agen_dep_p(rtx_insn * dep_insn,rtx_insn * insn)8129 s390_agen_dep_p (rtx_insn *dep_insn, rtx_insn *insn)
8130 {
8131 rtx dep_rtx = PATTERN (dep_insn);
8132 int i;
8133
8134 if (GET_CODE (dep_rtx) == SET
8135 && addr_generation_dependency_p (dep_rtx, insn))
8136 return 1;
8137 else if (GET_CODE (dep_rtx) == PARALLEL)
8138 {
8139 for (i = 0; i < XVECLEN (dep_rtx, 0); i++)
8140 {
8141 if (addr_generation_dependency_p (XVECEXP (dep_rtx, 0, i), insn))
8142 return 1;
8143 }
8144 }
8145 return 0;
8146 }
8147
8148
8149 /* A C statement (sans semicolon) to update the integer scheduling priority
8150 INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier,
8151 reduce the priority to execute INSN later. Do not define this macro if
8152 you do not need to adjust the scheduling priorities of insns.
8153
8154 A STD instruction should be scheduled earlier,
8155 in order to use the bypass. */
8156 static int
s390_adjust_priority(rtx_insn * insn,int priority)8157 s390_adjust_priority (rtx_insn *insn, int priority)
8158 {
8159 if (! INSN_P (insn))
8160 return priority;
8161
8162 if (s390_tune <= PROCESSOR_2064_Z900)
8163 return priority;
8164
8165 switch (s390_safe_attr_type (insn))
8166 {
8167 case TYPE_FSTOREDF:
8168 case TYPE_FSTORESF:
8169 priority = priority << 3;
8170 break;
8171 case TYPE_STORE:
8172 case TYPE_STM:
8173 priority = priority << 1;
8174 break;
8175 default:
8176 break;
8177 }
8178 return priority;
8179 }
8180
8181
8182 /* The number of instructions that can be issued per cycle. */
8183
8184 static int
s390_issue_rate(void)8185 s390_issue_rate (void)
8186 {
8187 switch (s390_tune)
8188 {
8189 case PROCESSOR_2084_Z990:
8190 case PROCESSOR_2094_Z9_109:
8191 case PROCESSOR_2094_Z9_EC:
8192 case PROCESSOR_2817_Z196:
8193 return 3;
8194 case PROCESSOR_2097_Z10:
8195 return 2;
8196 case PROCESSOR_9672_G5:
8197 case PROCESSOR_9672_G6:
8198 case PROCESSOR_2064_Z900:
8199 /* Starting with EC12 we use the sched_reorder hook to take care
8200 of instruction dispatch constraints. The algorithm only
8201 picks the best instruction and assumes only a single
8202 instruction gets issued per cycle. */
8203 case PROCESSOR_2827_ZEC12:
8204 case PROCESSOR_2964_Z13:
8205 case PROCESSOR_3906_Z14:
8206 default:
8207 return 1;
8208 }
8209 }
8210
8211 static int
s390_first_cycle_multipass_dfa_lookahead(void)8212 s390_first_cycle_multipass_dfa_lookahead (void)
8213 {
8214 return 4;
8215 }
8216
8217 /* Annotate every literal pool reference in X by an UNSPEC_LTREF expression.
8218 Fix up MEMs as required. */
8219
8220 static void
annotate_constant_pool_refs(rtx * x)8221 annotate_constant_pool_refs (rtx *x)
8222 {
8223 int i, j;
8224 const char *fmt;
8225
8226 gcc_assert (GET_CODE (*x) != SYMBOL_REF
8227 || !CONSTANT_POOL_ADDRESS_P (*x));
8228
8229 /* Literal pool references can only occur inside a MEM ... */
8230 if (GET_CODE (*x) == MEM)
8231 {
8232 rtx memref = XEXP (*x, 0);
8233
8234 if (GET_CODE (memref) == SYMBOL_REF
8235 && CONSTANT_POOL_ADDRESS_P (memref))
8236 {
8237 rtx base = cfun->machine->base_reg;
8238 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, memref, base),
8239 UNSPEC_LTREF);
8240
8241 *x = replace_equiv_address (*x, addr);
8242 return;
8243 }
8244
8245 if (GET_CODE (memref) == CONST
8246 && GET_CODE (XEXP (memref, 0)) == PLUS
8247 && GET_CODE (XEXP (XEXP (memref, 0), 1)) == CONST_INT
8248 && GET_CODE (XEXP (XEXP (memref, 0), 0)) == SYMBOL_REF
8249 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref, 0), 0)))
8250 {
8251 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (memref, 0), 1));
8252 rtx sym = XEXP (XEXP (memref, 0), 0);
8253 rtx base = cfun->machine->base_reg;
8254 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
8255 UNSPEC_LTREF);
8256
8257 *x = replace_equiv_address (*x, plus_constant (Pmode, addr, off));
8258 return;
8259 }
8260 }
8261
8262 /* ... or a load-address type pattern. */
8263 if (GET_CODE (*x) == SET)
8264 {
8265 rtx addrref = SET_SRC (*x);
8266
8267 if (GET_CODE (addrref) == SYMBOL_REF
8268 && CONSTANT_POOL_ADDRESS_P (addrref))
8269 {
8270 rtx base = cfun->machine->base_reg;
8271 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addrref, base),
8272 UNSPEC_LTREF);
8273
8274 SET_SRC (*x) = addr;
8275 return;
8276 }
8277
8278 if (GET_CODE (addrref) == CONST
8279 && GET_CODE (XEXP (addrref, 0)) == PLUS
8280 && GET_CODE (XEXP (XEXP (addrref, 0), 1)) == CONST_INT
8281 && GET_CODE (XEXP (XEXP (addrref, 0), 0)) == SYMBOL_REF
8282 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref, 0), 0)))
8283 {
8284 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (addrref, 0), 1));
8285 rtx sym = XEXP (XEXP (addrref, 0), 0);
8286 rtx base = cfun->machine->base_reg;
8287 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
8288 UNSPEC_LTREF);
8289
8290 SET_SRC (*x) = plus_constant (Pmode, addr, off);
8291 return;
8292 }
8293 }
8294
8295 /* Annotate LTREL_BASE as well. */
8296 if (GET_CODE (*x) == UNSPEC
8297 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
8298 {
8299 rtx base = cfun->machine->base_reg;
8300 *x = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XVECEXP (*x, 0, 0), base),
8301 UNSPEC_LTREL_BASE);
8302 return;
8303 }
8304
8305 fmt = GET_RTX_FORMAT (GET_CODE (*x));
8306 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8307 {
8308 if (fmt[i] == 'e')
8309 {
8310 annotate_constant_pool_refs (&XEXP (*x, i));
8311 }
8312 else if (fmt[i] == 'E')
8313 {
8314 for (j = 0; j < XVECLEN (*x, i); j++)
8315 annotate_constant_pool_refs (&XVECEXP (*x, i, j));
8316 }
8317 }
8318 }
8319
8320 /* Split all branches that exceed the maximum distance.
8321 Returns true if this created a new literal pool entry. */
8322
8323 static int
s390_split_branches(void)8324 s390_split_branches (void)
8325 {
8326 rtx temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
8327 int new_literal = 0, ret;
8328 rtx_insn *insn;
8329 rtx pat, target;
8330 rtx *label;
8331
8332 /* We need correct insn addresses. */
8333
8334 shorten_branches (get_insns ());
8335
8336 /* Find all branches that exceed 64KB, and split them. */
8337
8338 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8339 {
8340 if (! JUMP_P (insn) || tablejump_p (insn, NULL, NULL))
8341 continue;
8342
8343 pat = PATTERN (insn);
8344 if (GET_CODE (pat) == PARALLEL)
8345 pat = XVECEXP (pat, 0, 0);
8346 if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
8347 continue;
8348
8349 if (GET_CODE (SET_SRC (pat)) == LABEL_REF)
8350 {
8351 label = &SET_SRC (pat);
8352 }
8353 else if (GET_CODE (SET_SRC (pat)) == IF_THEN_ELSE)
8354 {
8355 if (GET_CODE (XEXP (SET_SRC (pat), 1)) == LABEL_REF)
8356 label = &XEXP (SET_SRC (pat), 1);
8357 else if (GET_CODE (XEXP (SET_SRC (pat), 2)) == LABEL_REF)
8358 label = &XEXP (SET_SRC (pat), 2);
8359 else
8360 continue;
8361 }
8362 else
8363 continue;
8364
8365 if (get_attr_length (insn) <= 4)
8366 continue;
8367
8368 /* We are going to use the return register as scratch register,
8369 make sure it will be saved/restored by the prologue/epilogue. */
8370 cfun_frame_layout.save_return_addr_p = 1;
8371
8372 if (!flag_pic)
8373 {
8374 new_literal = 1;
8375 rtx mem = force_const_mem (Pmode, *label);
8376 rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, mem),
8377 insn);
8378 INSN_ADDRESSES_NEW (set_insn, -1);
8379 annotate_constant_pool_refs (&PATTERN (set_insn));
8380
8381 target = temp_reg;
8382 }
8383 else
8384 {
8385 new_literal = 1;
8386 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, *label),
8387 UNSPEC_LTREL_OFFSET);
8388 target = gen_rtx_CONST (Pmode, target);
8389 target = force_const_mem (Pmode, target);
8390 rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, target),
8391 insn);
8392 INSN_ADDRESSES_NEW (set_insn, -1);
8393 annotate_constant_pool_refs (&PATTERN (set_insn));
8394
8395 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XEXP (target, 0),
8396 cfun->machine->base_reg),
8397 UNSPEC_LTREL_BASE);
8398 target = gen_rtx_PLUS (Pmode, temp_reg, target);
8399 }
8400
8401 ret = validate_change (insn, label, target, 0);
8402 gcc_assert (ret);
8403 }
8404
8405 return new_literal;
8406 }
8407
8408
8409 /* Find an annotated literal pool symbol referenced in RTX X,
8410 and store it at REF. Will abort if X contains references to
8411 more than one such pool symbol; multiple references to the same
8412 symbol are allowed, however.
8413
8414 The rtx pointed to by REF must be initialized to NULL_RTX
8415 by the caller before calling this routine. */
8416
8417 static void
find_constant_pool_ref(rtx x,rtx * ref)8418 find_constant_pool_ref (rtx x, rtx *ref)
8419 {
8420 int i, j;
8421 const char *fmt;
8422
8423 /* Ignore LTREL_BASE references. */
8424 if (GET_CODE (x) == UNSPEC
8425 && XINT (x, 1) == UNSPEC_LTREL_BASE)
8426 return;
8427 /* Likewise POOL_ENTRY insns. */
8428 if (GET_CODE (x) == UNSPEC_VOLATILE
8429 && XINT (x, 1) == UNSPECV_POOL_ENTRY)
8430 return;
8431
8432 gcc_assert (GET_CODE (x) != SYMBOL_REF
8433 || !CONSTANT_POOL_ADDRESS_P (x));
8434
8435 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_LTREF)
8436 {
8437 rtx sym = XVECEXP (x, 0, 0);
8438 gcc_assert (GET_CODE (sym) == SYMBOL_REF
8439 && CONSTANT_POOL_ADDRESS_P (sym));
8440
8441 if (*ref == NULL_RTX)
8442 *ref = sym;
8443 else
8444 gcc_assert (*ref == sym);
8445
8446 return;
8447 }
8448
8449 fmt = GET_RTX_FORMAT (GET_CODE (x));
8450 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8451 {
8452 if (fmt[i] == 'e')
8453 {
8454 find_constant_pool_ref (XEXP (x, i), ref);
8455 }
8456 else if (fmt[i] == 'E')
8457 {
8458 for (j = 0; j < XVECLEN (x, i); j++)
8459 find_constant_pool_ref (XVECEXP (x, i, j), ref);
8460 }
8461 }
8462 }
8463
8464 /* Replace every reference to the annotated literal pool
8465 symbol REF in X by its base plus OFFSET. */
8466
8467 static void
replace_constant_pool_ref(rtx * x,rtx ref,rtx offset)8468 replace_constant_pool_ref (rtx *x, rtx ref, rtx offset)
8469 {
8470 int i, j;
8471 const char *fmt;
8472
8473 gcc_assert (*x != ref);
8474
8475 if (GET_CODE (*x) == UNSPEC
8476 && XINT (*x, 1) == UNSPEC_LTREF
8477 && XVECEXP (*x, 0, 0) == ref)
8478 {
8479 *x = gen_rtx_PLUS (Pmode, XVECEXP (*x, 0, 1), offset);
8480 return;
8481 }
8482
8483 if (GET_CODE (*x) == PLUS
8484 && GET_CODE (XEXP (*x, 1)) == CONST_INT
8485 && GET_CODE (XEXP (*x, 0)) == UNSPEC
8486 && XINT (XEXP (*x, 0), 1) == UNSPEC_LTREF
8487 && XVECEXP (XEXP (*x, 0), 0, 0) == ref)
8488 {
8489 rtx addr = gen_rtx_PLUS (Pmode, XVECEXP (XEXP (*x, 0), 0, 1), offset);
8490 *x = plus_constant (Pmode, addr, INTVAL (XEXP (*x, 1)));
8491 return;
8492 }
8493
8494 fmt = GET_RTX_FORMAT (GET_CODE (*x));
8495 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8496 {
8497 if (fmt[i] == 'e')
8498 {
8499 replace_constant_pool_ref (&XEXP (*x, i), ref, offset);
8500 }
8501 else if (fmt[i] == 'E')
8502 {
8503 for (j = 0; j < XVECLEN (*x, i); j++)
8504 replace_constant_pool_ref (&XVECEXP (*x, i, j), ref, offset);
8505 }
8506 }
8507 }
8508
8509 /* Check whether X contains an UNSPEC_LTREL_BASE.
8510 Return its constant pool symbol if found, NULL_RTX otherwise. */
8511
8512 static rtx
find_ltrel_base(rtx x)8513 find_ltrel_base (rtx x)
8514 {
8515 int i, j;
8516 const char *fmt;
8517
8518 if (GET_CODE (x) == UNSPEC
8519 && XINT (x, 1) == UNSPEC_LTREL_BASE)
8520 return XVECEXP (x, 0, 0);
8521
8522 fmt = GET_RTX_FORMAT (GET_CODE (x));
8523 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8524 {
8525 if (fmt[i] == 'e')
8526 {
8527 rtx fnd = find_ltrel_base (XEXP (x, i));
8528 if (fnd)
8529 return fnd;
8530 }
8531 else if (fmt[i] == 'E')
8532 {
8533 for (j = 0; j < XVECLEN (x, i); j++)
8534 {
8535 rtx fnd = find_ltrel_base (XVECEXP (x, i, j));
8536 if (fnd)
8537 return fnd;
8538 }
8539 }
8540 }
8541
8542 return NULL_RTX;
8543 }
8544
8545 /* Replace any occurrence of UNSPEC_LTREL_BASE in X with its base. */
8546
8547 static void
replace_ltrel_base(rtx * x)8548 replace_ltrel_base (rtx *x)
8549 {
8550 int i, j;
8551 const char *fmt;
8552
8553 if (GET_CODE (*x) == UNSPEC
8554 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
8555 {
8556 *x = XVECEXP (*x, 0, 1);
8557 return;
8558 }
8559
8560 fmt = GET_RTX_FORMAT (GET_CODE (*x));
8561 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8562 {
8563 if (fmt[i] == 'e')
8564 {
8565 replace_ltrel_base (&XEXP (*x, i));
8566 }
8567 else if (fmt[i] == 'E')
8568 {
8569 for (j = 0; j < XVECLEN (*x, i); j++)
8570 replace_ltrel_base (&XVECEXP (*x, i, j));
8571 }
8572 }
8573 }
8574
8575
8576 /* We keep a list of constants which we have to add to internal
8577 constant tables in the middle of large functions. */
8578
8579 #define NR_C_MODES 32
8580 machine_mode constant_modes[NR_C_MODES] =
8581 {
8582 TFmode, TImode, TDmode,
8583 V16QImode, V8HImode, V4SImode, V2DImode, V1TImode,
8584 V4SFmode, V2DFmode, V1TFmode,
8585 DFmode, DImode, DDmode,
8586 V8QImode, V4HImode, V2SImode, V1DImode, V2SFmode, V1DFmode,
8587 SFmode, SImode, SDmode,
8588 V4QImode, V2HImode, V1SImode, V1SFmode,
8589 HImode,
8590 V2QImode, V1HImode,
8591 QImode,
8592 V1QImode
8593 };
8594
8595 struct constant
8596 {
8597 struct constant *next;
8598 rtx value;
8599 rtx_code_label *label;
8600 };
8601
8602 struct constant_pool
8603 {
8604 struct constant_pool *next;
8605 rtx_insn *first_insn;
8606 rtx_insn *pool_insn;
8607 bitmap insns;
8608 rtx_insn *emit_pool_after;
8609
8610 struct constant *constants[NR_C_MODES];
8611 struct constant *execute;
8612 rtx_code_label *label;
8613 int size;
8614 };
8615
8616 /* Allocate new constant_pool structure. */
8617
8618 static struct constant_pool *
s390_alloc_pool(void)8619 s390_alloc_pool (void)
8620 {
8621 struct constant_pool *pool;
8622 int i;
8623
8624 pool = (struct constant_pool *) xmalloc (sizeof *pool);
8625 pool->next = NULL;
8626 for (i = 0; i < NR_C_MODES; i++)
8627 pool->constants[i] = NULL;
8628
8629 pool->execute = NULL;
8630 pool->label = gen_label_rtx ();
8631 pool->first_insn = NULL;
8632 pool->pool_insn = NULL;
8633 pool->insns = BITMAP_ALLOC (NULL);
8634 pool->size = 0;
8635 pool->emit_pool_after = NULL;
8636
8637 return pool;
8638 }
8639
8640 /* Create new constant pool covering instructions starting at INSN
8641 and chain it to the end of POOL_LIST. */
8642
8643 static struct constant_pool *
s390_start_pool(struct constant_pool ** pool_list,rtx_insn * insn)8644 s390_start_pool (struct constant_pool **pool_list, rtx_insn *insn)
8645 {
8646 struct constant_pool *pool, **prev;
8647
8648 pool = s390_alloc_pool ();
8649 pool->first_insn = insn;
8650
8651 for (prev = pool_list; *prev; prev = &(*prev)->next)
8652 ;
8653 *prev = pool;
8654
8655 return pool;
8656 }
8657
8658 /* End range of instructions covered by POOL at INSN and emit
8659 placeholder insn representing the pool. */
8660
8661 static void
s390_end_pool(struct constant_pool * pool,rtx_insn * insn)8662 s390_end_pool (struct constant_pool *pool, rtx_insn *insn)
8663 {
8664 rtx pool_size = GEN_INT (pool->size + 8 /* alignment slop */);
8665
8666 if (!insn)
8667 insn = get_last_insn ();
8668
8669 pool->pool_insn = emit_insn_after (gen_pool (pool_size), insn);
8670 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8671 }
8672
8673 /* Add INSN to the list of insns covered by POOL. */
8674
8675 static void
s390_add_pool_insn(struct constant_pool * pool,rtx insn)8676 s390_add_pool_insn (struct constant_pool *pool, rtx insn)
8677 {
8678 bitmap_set_bit (pool->insns, INSN_UID (insn));
8679 }
8680
8681 /* Return pool out of POOL_LIST that covers INSN. */
8682
8683 static struct constant_pool *
s390_find_pool(struct constant_pool * pool_list,rtx insn)8684 s390_find_pool (struct constant_pool *pool_list, rtx insn)
8685 {
8686 struct constant_pool *pool;
8687
8688 for (pool = pool_list; pool; pool = pool->next)
8689 if (bitmap_bit_p (pool->insns, INSN_UID (insn)))
8690 break;
8691
8692 return pool;
8693 }
8694
8695 /* Add constant VAL of mode MODE to the constant pool POOL. */
8696
8697 static void
s390_add_constant(struct constant_pool * pool,rtx val,machine_mode mode)8698 s390_add_constant (struct constant_pool *pool, rtx val, machine_mode mode)
8699 {
8700 struct constant *c;
8701 int i;
8702
8703 for (i = 0; i < NR_C_MODES; i++)
8704 if (constant_modes[i] == mode)
8705 break;
8706 gcc_assert (i != NR_C_MODES);
8707
8708 for (c = pool->constants[i]; c != NULL; c = c->next)
8709 if (rtx_equal_p (val, c->value))
8710 break;
8711
8712 if (c == NULL)
8713 {
8714 c = (struct constant *) xmalloc (sizeof *c);
8715 c->value = val;
8716 c->label = gen_label_rtx ();
8717 c->next = pool->constants[i];
8718 pool->constants[i] = c;
8719 pool->size += GET_MODE_SIZE (mode);
8720 }
8721 }
8722
8723 /* Return an rtx that represents the offset of X from the start of
8724 pool POOL. */
8725
8726 static rtx
s390_pool_offset(struct constant_pool * pool,rtx x)8727 s390_pool_offset (struct constant_pool *pool, rtx x)
8728 {
8729 rtx label;
8730
8731 label = gen_rtx_LABEL_REF (GET_MODE (x), pool->label);
8732 x = gen_rtx_UNSPEC (GET_MODE (x), gen_rtvec (2, x, label),
8733 UNSPEC_POOL_OFFSET);
8734 return gen_rtx_CONST (GET_MODE (x), x);
8735 }
8736
8737 /* Find constant VAL of mode MODE in the constant pool POOL.
8738 Return an RTX describing the distance from the start of
8739 the pool to the location of the new constant. */
8740
8741 static rtx
s390_find_constant(struct constant_pool * pool,rtx val,machine_mode mode)8742 s390_find_constant (struct constant_pool *pool, rtx val,
8743 machine_mode mode)
8744 {
8745 struct constant *c;
8746 int i;
8747
8748 for (i = 0; i < NR_C_MODES; i++)
8749 if (constant_modes[i] == mode)
8750 break;
8751 gcc_assert (i != NR_C_MODES);
8752
8753 for (c = pool->constants[i]; c != NULL; c = c->next)
8754 if (rtx_equal_p (val, c->value))
8755 break;
8756
8757 gcc_assert (c);
8758
8759 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8760 }
8761
8762 /* Check whether INSN is an execute. Return the label_ref to its
8763 execute target template if so, NULL_RTX otherwise. */
8764
8765 static rtx
s390_execute_label(rtx insn)8766 s390_execute_label (rtx insn)
8767 {
8768 if (INSN_P (insn)
8769 && GET_CODE (PATTERN (insn)) == PARALLEL
8770 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
8771 && (XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE
8772 || XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE_JUMP))
8773 {
8774 if (XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE)
8775 return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2);
8776 else
8777 {
8778 gcc_assert (JUMP_P (insn));
8779 /* For jump insns as execute target:
8780 - There is one operand less in the parallel (the
8781 modification register of the execute is always 0).
8782 - The execute target label is wrapped into an
8783 if_then_else in order to hide it from jump analysis. */
8784 return XEXP (XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 0), 0);
8785 }
8786 }
8787
8788 return NULL_RTX;
8789 }
8790
8791 /* Add execute target for INSN to the constant pool POOL. */
8792
8793 static void
s390_add_execute(struct constant_pool * pool,rtx insn)8794 s390_add_execute (struct constant_pool *pool, rtx insn)
8795 {
8796 struct constant *c;
8797
8798 for (c = pool->execute; c != NULL; c = c->next)
8799 if (INSN_UID (insn) == INSN_UID (c->value))
8800 break;
8801
8802 if (c == NULL)
8803 {
8804 c = (struct constant *) xmalloc (sizeof *c);
8805 c->value = insn;
8806 c->label = gen_label_rtx ();
8807 c->next = pool->execute;
8808 pool->execute = c;
8809 pool->size += 6;
8810 }
8811 }
8812
8813 /* Find execute target for INSN in the constant pool POOL.
8814 Return an RTX describing the distance from the start of
8815 the pool to the location of the execute target. */
8816
8817 static rtx
s390_find_execute(struct constant_pool * pool,rtx insn)8818 s390_find_execute (struct constant_pool *pool, rtx insn)
8819 {
8820 struct constant *c;
8821
8822 for (c = pool->execute; c != NULL; c = c->next)
8823 if (INSN_UID (insn) == INSN_UID (c->value))
8824 break;
8825
8826 gcc_assert (c);
8827
8828 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8829 }
8830
8831 /* For an execute INSN, extract the execute target template. */
8832
8833 static rtx
s390_execute_target(rtx insn)8834 s390_execute_target (rtx insn)
8835 {
8836 rtx pattern = PATTERN (insn);
8837 gcc_assert (s390_execute_label (insn));
8838
8839 if (XVECLEN (pattern, 0) == 2)
8840 {
8841 pattern = copy_rtx (XVECEXP (pattern, 0, 1));
8842 }
8843 else
8844 {
8845 rtvec vec = rtvec_alloc (XVECLEN (pattern, 0) - 1);
8846 int i;
8847
8848 for (i = 0; i < XVECLEN (pattern, 0) - 1; i++)
8849 RTVEC_ELT (vec, i) = copy_rtx (XVECEXP (pattern, 0, i + 1));
8850
8851 pattern = gen_rtx_PARALLEL (VOIDmode, vec);
8852 }
8853
8854 return pattern;
8855 }
8856
8857 /* Indicate that INSN cannot be duplicated. This is the case for
8858 execute insns that carry a unique label. */
8859
8860 static bool
s390_cannot_copy_insn_p(rtx_insn * insn)8861 s390_cannot_copy_insn_p (rtx_insn *insn)
8862 {
8863 rtx label = s390_execute_label (insn);
8864 return label && label != const0_rtx;
8865 }
8866
8867 /* Dump out the constants in POOL. If REMOTE_LABEL is true,
8868 do not emit the pool base label. */
8869
8870 static void
s390_dump_pool(struct constant_pool * pool,bool remote_label)8871 s390_dump_pool (struct constant_pool *pool, bool remote_label)
8872 {
8873 struct constant *c;
8874 rtx_insn *insn = pool->pool_insn;
8875 int i;
8876
8877 /* Switch to rodata section. */
8878 if (TARGET_CPU_ZARCH)
8879 {
8880 insn = emit_insn_after (gen_pool_section_start (), insn);
8881 INSN_ADDRESSES_NEW (insn, -1);
8882 }
8883
8884 /* Ensure minimum pool alignment. */
8885 if (TARGET_CPU_ZARCH)
8886 insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn);
8887 else
8888 insn = emit_insn_after (gen_pool_align (GEN_INT (4)), insn);
8889 INSN_ADDRESSES_NEW (insn, -1);
8890
8891 /* Emit pool base label. */
8892 if (!remote_label)
8893 {
8894 insn = emit_label_after (pool->label, insn);
8895 INSN_ADDRESSES_NEW (insn, -1);
8896 }
8897
8898 /* Dump constants in descending alignment requirement order,
8899 ensuring proper alignment for every constant. */
8900 for (i = 0; i < NR_C_MODES; i++)
8901 for (c = pool->constants[i]; c; c = c->next)
8902 {
8903 /* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references. */
8904 rtx value = copy_rtx (c->value);
8905 if (GET_CODE (value) == CONST
8906 && GET_CODE (XEXP (value, 0)) == UNSPEC
8907 && XINT (XEXP (value, 0), 1) == UNSPEC_LTREL_OFFSET
8908 && XVECLEN (XEXP (value, 0), 0) == 1)
8909 value = s390_pool_offset (pool, XVECEXP (XEXP (value, 0), 0, 0));
8910
8911 insn = emit_label_after (c->label, insn);
8912 INSN_ADDRESSES_NEW (insn, -1);
8913
8914 value = gen_rtx_UNSPEC_VOLATILE (constant_modes[i],
8915 gen_rtvec (1, value),
8916 UNSPECV_POOL_ENTRY);
8917 insn = emit_insn_after (value, insn);
8918 INSN_ADDRESSES_NEW (insn, -1);
8919 }
8920
8921 /* Ensure minimum alignment for instructions. */
8922 insn = emit_insn_after (gen_pool_align (GEN_INT (2)), insn);
8923 INSN_ADDRESSES_NEW (insn, -1);
8924
8925 /* Output in-pool execute template insns. */
8926 for (c = pool->execute; c; c = c->next)
8927 {
8928 insn = emit_label_after (c->label, insn);
8929 INSN_ADDRESSES_NEW (insn, -1);
8930
8931 insn = emit_insn_after (s390_execute_target (c->value), insn);
8932 INSN_ADDRESSES_NEW (insn, -1);
8933 }
8934
8935 /* Switch back to previous section. */
8936 if (TARGET_CPU_ZARCH)
8937 {
8938 insn = emit_insn_after (gen_pool_section_end (), insn);
8939 INSN_ADDRESSES_NEW (insn, -1);
8940 }
8941
8942 insn = emit_barrier_after (insn);
8943 INSN_ADDRESSES_NEW (insn, -1);
8944
8945 /* Remove placeholder insn. */
8946 remove_insn (pool->pool_insn);
8947 }
8948
8949 /* Free all memory used by POOL. */
8950
8951 static void
s390_free_pool(struct constant_pool * pool)8952 s390_free_pool (struct constant_pool *pool)
8953 {
8954 struct constant *c, *next;
8955 int i;
8956
8957 for (i = 0; i < NR_C_MODES; i++)
8958 for (c = pool->constants[i]; c; c = next)
8959 {
8960 next = c->next;
8961 free (c);
8962 }
8963
8964 for (c = pool->execute; c; c = next)
8965 {
8966 next = c->next;
8967 free (c);
8968 }
8969
8970 BITMAP_FREE (pool->insns);
8971 free (pool);
8972 }
8973
8974
8975 /* Collect main literal pool. Return NULL on overflow. */
8976
8977 static struct constant_pool *
s390_mainpool_start(void)8978 s390_mainpool_start (void)
8979 {
8980 struct constant_pool *pool;
8981 rtx_insn *insn;
8982
8983 pool = s390_alloc_pool ();
8984
8985 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8986 {
8987 if (NONJUMP_INSN_P (insn)
8988 && GET_CODE (PATTERN (insn)) == SET
8989 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC_VOLATILE
8990 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPECV_MAIN_POOL)
8991 {
8992 /* There might be two main_pool instructions if base_reg
8993 is call-clobbered; one for shrink-wrapped code and one
8994 for the rest. We want to keep the first. */
8995 if (pool->pool_insn)
8996 {
8997 insn = PREV_INSN (insn);
8998 delete_insn (NEXT_INSN (insn));
8999 continue;
9000 }
9001 pool->pool_insn = insn;
9002 }
9003
9004 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
9005 {
9006 s390_add_execute (pool, insn);
9007 }
9008 else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9009 {
9010 rtx pool_ref = NULL_RTX;
9011 find_constant_pool_ref (PATTERN (insn), &pool_ref);
9012 if (pool_ref)
9013 {
9014 rtx constant = get_pool_constant (pool_ref);
9015 machine_mode mode = get_pool_mode (pool_ref);
9016 s390_add_constant (pool, constant, mode);
9017 }
9018 }
9019
9020 /* If hot/cold partitioning is enabled we have to make sure that
9021 the literal pool is emitted in the same section where the
9022 initialization of the literal pool base pointer takes place.
9023 emit_pool_after is only used in the non-overflow case on non
9024 Z cpus where we can emit the literal pool at the end of the
9025 function body within the text section. */
9026 if (NOTE_P (insn)
9027 && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS
9028 && !pool->emit_pool_after)
9029 pool->emit_pool_after = PREV_INSN (insn);
9030 }
9031
9032 gcc_assert (pool->pool_insn || pool->size == 0);
9033
9034 if (pool->size >= 4096)
9035 {
9036 /* We're going to chunkify the pool, so remove the main
9037 pool placeholder insn. */
9038 remove_insn (pool->pool_insn);
9039
9040 s390_free_pool (pool);
9041 pool = NULL;
9042 }
9043
9044 /* If the functions ends with the section where the literal pool
9045 should be emitted set the marker to its end. */
9046 if (pool && !pool->emit_pool_after)
9047 pool->emit_pool_after = get_last_insn ();
9048
9049 return pool;
9050 }
9051
9052 /* POOL holds the main literal pool as collected by s390_mainpool_start.
9053 Modify the current function to output the pool constants as well as
9054 the pool register setup instruction. */
9055
9056 static void
s390_mainpool_finish(struct constant_pool * pool)9057 s390_mainpool_finish (struct constant_pool *pool)
9058 {
9059 rtx base_reg = cfun->machine->base_reg;
9060
9061 /* If the pool is empty, we're done. */
9062 if (pool->size == 0)
9063 {
9064 /* We don't actually need a base register after all. */
9065 cfun->machine->base_reg = NULL_RTX;
9066
9067 if (pool->pool_insn)
9068 remove_insn (pool->pool_insn);
9069 s390_free_pool (pool);
9070 return;
9071 }
9072
9073 /* We need correct insn addresses. */
9074 shorten_branches (get_insns ());
9075
9076 /* On zSeries, we use a LARL to load the pool register. The pool is
9077 located in the .rodata section, so we emit it after the function. */
9078 if (TARGET_CPU_ZARCH)
9079 {
9080 rtx set = gen_main_base_64 (base_reg, pool->label);
9081 rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
9082 INSN_ADDRESSES_NEW (insn, -1);
9083 remove_insn (pool->pool_insn);
9084
9085 insn = get_last_insn ();
9086 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
9087 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
9088
9089 s390_dump_pool (pool, 0);
9090 }
9091
9092 /* On S/390, if the total size of the function's code plus literal pool
9093 does not exceed 4096 bytes, we use BASR to set up a function base
9094 pointer, and emit the literal pool at the end of the function. */
9095 else if (INSN_ADDRESSES (INSN_UID (pool->emit_pool_after))
9096 + pool->size + 8 /* alignment slop */ < 4096)
9097 {
9098 rtx set = gen_main_base_31_small (base_reg, pool->label);
9099 rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
9100 INSN_ADDRESSES_NEW (insn, -1);
9101 remove_insn (pool->pool_insn);
9102
9103 insn = emit_label_after (pool->label, insn);
9104 INSN_ADDRESSES_NEW (insn, -1);
9105
9106 /* emit_pool_after will be set by s390_mainpool_start to the
9107 last insn of the section where the literal pool should be
9108 emitted. */
9109 insn = pool->emit_pool_after;
9110
9111 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
9112 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
9113
9114 s390_dump_pool (pool, 1);
9115 }
9116
9117 /* Otherwise, we emit an inline literal pool and use BASR to branch
9118 over it, setting up the pool register at the same time. */
9119 else
9120 {
9121 rtx_code_label *pool_end = gen_label_rtx ();
9122
9123 rtx pat = gen_main_base_31_large (base_reg, pool->label, pool_end);
9124 rtx_insn *insn = emit_jump_insn_after (pat, pool->pool_insn);
9125 JUMP_LABEL (insn) = pool_end;
9126 INSN_ADDRESSES_NEW (insn, -1);
9127 remove_insn (pool->pool_insn);
9128
9129 insn = emit_label_after (pool->label, insn);
9130 INSN_ADDRESSES_NEW (insn, -1);
9131
9132 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
9133 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
9134
9135 insn = emit_label_after (pool_end, pool->pool_insn);
9136 INSN_ADDRESSES_NEW (insn, -1);
9137
9138 s390_dump_pool (pool, 1);
9139 }
9140
9141
9142 /* Replace all literal pool references. */
9143
9144 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
9145 {
9146 if (INSN_P (insn))
9147 replace_ltrel_base (&PATTERN (insn));
9148
9149 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9150 {
9151 rtx addr, pool_ref = NULL_RTX;
9152 find_constant_pool_ref (PATTERN (insn), &pool_ref);
9153 if (pool_ref)
9154 {
9155 if (s390_execute_label (insn))
9156 addr = s390_find_execute (pool, insn);
9157 else
9158 addr = s390_find_constant (pool, get_pool_constant (pool_ref),
9159 get_pool_mode (pool_ref));
9160
9161 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
9162 INSN_CODE (insn) = -1;
9163 }
9164 }
9165 }
9166
9167
9168 /* Free the pool. */
9169 s390_free_pool (pool);
9170 }
9171
9172 /* POOL holds the main literal pool as collected by s390_mainpool_start.
9173 We have decided we cannot use this pool, so revert all changes
9174 to the current function that were done by s390_mainpool_start. */
9175 static void
s390_mainpool_cancel(struct constant_pool * pool)9176 s390_mainpool_cancel (struct constant_pool *pool)
9177 {
9178 /* We didn't actually change the instruction stream, so simply
9179 free the pool memory. */
9180 s390_free_pool (pool);
9181 }
9182
9183
9184 /* Chunkify the literal pool. */
9185
9186 #define S390_POOL_CHUNK_MIN 0xc00
9187 #define S390_POOL_CHUNK_MAX 0xe00
9188
9189 static struct constant_pool *
s390_chunkify_start(void)9190 s390_chunkify_start (void)
9191 {
9192 struct constant_pool *curr_pool = NULL, *pool_list = NULL;
9193 int extra_size = 0;
9194 bitmap far_labels;
9195 rtx pending_ltrel = NULL_RTX;
9196 rtx_insn *insn;
9197
9198 rtx (*gen_reload_base) (rtx, rtx) =
9199 TARGET_CPU_ZARCH? gen_reload_base_64 : gen_reload_base_31;
9200
9201
9202 /* We need correct insn addresses. */
9203
9204 shorten_branches (get_insns ());
9205
9206 /* Scan all insns and move literals to pool chunks. */
9207
9208 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9209 {
9210 bool section_switch_p = false;
9211
9212 /* Check for pending LTREL_BASE. */
9213 if (INSN_P (insn))
9214 {
9215 rtx ltrel_base = find_ltrel_base (PATTERN (insn));
9216 if (ltrel_base)
9217 {
9218 gcc_assert (ltrel_base == pending_ltrel);
9219 pending_ltrel = NULL_RTX;
9220 }
9221 }
9222
9223 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
9224 {
9225 if (!curr_pool)
9226 curr_pool = s390_start_pool (&pool_list, insn);
9227
9228 s390_add_execute (curr_pool, insn);
9229 s390_add_pool_insn (curr_pool, insn);
9230 }
9231 else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9232 {
9233 rtx pool_ref = NULL_RTX;
9234 find_constant_pool_ref (PATTERN (insn), &pool_ref);
9235 if (pool_ref)
9236 {
9237 rtx constant = get_pool_constant (pool_ref);
9238 machine_mode mode = get_pool_mode (pool_ref);
9239
9240 if (!curr_pool)
9241 curr_pool = s390_start_pool (&pool_list, insn);
9242
9243 s390_add_constant (curr_pool, constant, mode);
9244 s390_add_pool_insn (curr_pool, insn);
9245
9246 /* Don't split the pool chunk between a LTREL_OFFSET load
9247 and the corresponding LTREL_BASE. */
9248 if (GET_CODE (constant) == CONST
9249 && GET_CODE (XEXP (constant, 0)) == UNSPEC
9250 && XINT (XEXP (constant, 0), 1) == UNSPEC_LTREL_OFFSET)
9251 {
9252 gcc_assert (!pending_ltrel);
9253 pending_ltrel = pool_ref;
9254 }
9255 }
9256 }
9257
9258 if (JUMP_P (insn) || JUMP_TABLE_DATA_P (insn) || LABEL_P (insn))
9259 {
9260 if (curr_pool)
9261 s390_add_pool_insn (curr_pool, insn);
9262 /* An LTREL_BASE must follow within the same basic block. */
9263 gcc_assert (!pending_ltrel);
9264 }
9265
9266 if (NOTE_P (insn))
9267 switch (NOTE_KIND (insn))
9268 {
9269 case NOTE_INSN_SWITCH_TEXT_SECTIONS:
9270 section_switch_p = true;
9271 break;
9272 case NOTE_INSN_VAR_LOCATION:
9273 continue;
9274 default:
9275 break;
9276 }
9277
9278 if (!curr_pool
9279 || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn)
9280 || INSN_ADDRESSES (INSN_UID (insn)) == -1)
9281 continue;
9282
9283 if (TARGET_CPU_ZARCH)
9284 {
9285 if (curr_pool->size < S390_POOL_CHUNK_MAX)
9286 continue;
9287
9288 s390_end_pool (curr_pool, NULL);
9289 curr_pool = NULL;
9290 }
9291 else
9292 {
9293 int chunk_size = INSN_ADDRESSES (INSN_UID (insn))
9294 - INSN_ADDRESSES (INSN_UID (curr_pool->first_insn))
9295 + extra_size;
9296
9297 /* We will later have to insert base register reload insns.
9298 Those will have an effect on code size, which we need to
9299 consider here. This calculation makes rather pessimistic
9300 worst-case assumptions. */
9301 if (LABEL_P (insn))
9302 extra_size += 6;
9303
9304 if (chunk_size < S390_POOL_CHUNK_MIN
9305 && curr_pool->size < S390_POOL_CHUNK_MIN
9306 && !section_switch_p)
9307 continue;
9308
9309 /* Pool chunks can only be inserted after BARRIERs ... */
9310 if (BARRIER_P (insn))
9311 {
9312 s390_end_pool (curr_pool, insn);
9313 curr_pool = NULL;
9314 extra_size = 0;
9315 }
9316
9317 /* ... so if we don't find one in time, create one. */
9318 else if (chunk_size > S390_POOL_CHUNK_MAX
9319 || curr_pool->size > S390_POOL_CHUNK_MAX
9320 || section_switch_p)
9321 {
9322 rtx_insn *label, *jump, *barrier, *next, *prev;
9323
9324 if (!section_switch_p)
9325 {
9326 /* We can insert the barrier only after a 'real' insn. */
9327 if (! NONJUMP_INSN_P (insn) && ! CALL_P (insn))
9328 continue;
9329 if (get_attr_length (insn) == 0)
9330 continue;
9331 /* Don't separate LTREL_BASE from the corresponding
9332 LTREL_OFFSET load. */
9333 if (pending_ltrel)
9334 continue;
9335 next = insn;
9336 do
9337 {
9338 insn = next;
9339 next = NEXT_INSN (insn);
9340 }
9341 while (next
9342 && NOTE_P (next)
9343 && NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION);
9344 }
9345 else
9346 {
9347 gcc_assert (!pending_ltrel);
9348
9349 /* The old pool has to end before the section switch
9350 note in order to make it part of the current
9351 section. */
9352 insn = PREV_INSN (insn);
9353 }
9354
9355 label = gen_label_rtx ();
9356 prev = insn;
9357 if (prev && NOTE_P (prev))
9358 prev = prev_nonnote_insn (prev);
9359 if (prev)
9360 jump = emit_jump_insn_after_setloc (gen_jump (label), insn,
9361 INSN_LOCATION (prev));
9362 else
9363 jump = emit_jump_insn_after_noloc (gen_jump (label), insn);
9364 barrier = emit_barrier_after (jump);
9365 insn = emit_label_after (label, barrier);
9366 JUMP_LABEL (jump) = label;
9367 LABEL_NUSES (label) = 1;
9368
9369 INSN_ADDRESSES_NEW (jump, -1);
9370 INSN_ADDRESSES_NEW (barrier, -1);
9371 INSN_ADDRESSES_NEW (insn, -1);
9372
9373 s390_end_pool (curr_pool, barrier);
9374 curr_pool = NULL;
9375 extra_size = 0;
9376 }
9377 }
9378 }
9379
9380 if (curr_pool)
9381 s390_end_pool (curr_pool, NULL);
9382 gcc_assert (!pending_ltrel);
9383
9384 /* Find all labels that are branched into
9385 from an insn belonging to a different chunk. */
9386
9387 far_labels = BITMAP_ALLOC (NULL);
9388
9389 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9390 {
9391 rtx_jump_table_data *table;
9392
9393 /* Labels marked with LABEL_PRESERVE_P can be target
9394 of non-local jumps, so we have to mark them.
9395 The same holds for named labels.
9396
9397 Don't do that, however, if it is the label before
9398 a jump table. */
9399
9400 if (LABEL_P (insn)
9401 && (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn)))
9402 {
9403 rtx_insn *vec_insn = NEXT_INSN (insn);
9404 if (! vec_insn || ! JUMP_TABLE_DATA_P (vec_insn))
9405 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn));
9406 }
9407 /* Check potential targets in a table jump (casesi_jump). */
9408 else if (tablejump_p (insn, NULL, &table))
9409 {
9410 rtx vec_pat = PATTERN (table);
9411 int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
9412
9413 for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
9414 {
9415 rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
9416
9417 if (s390_find_pool (pool_list, label)
9418 != s390_find_pool (pool_list, insn))
9419 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
9420 }
9421 }
9422 /* If we have a direct jump (conditional or unconditional),
9423 check all potential targets. */
9424 else if (JUMP_P (insn))
9425 {
9426 rtx pat = PATTERN (insn);
9427
9428 if (GET_CODE (pat) == PARALLEL)
9429 pat = XVECEXP (pat, 0, 0);
9430
9431 if (GET_CODE (pat) == SET)
9432 {
9433 rtx label = JUMP_LABEL (insn);
9434 if (label && !ANY_RETURN_P (label))
9435 {
9436 if (s390_find_pool (pool_list, label)
9437 != s390_find_pool (pool_list, insn))
9438 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
9439 }
9440 }
9441 }
9442 }
9443
9444 /* Insert base register reload insns before every pool. */
9445
9446 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9447 {
9448 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
9449 curr_pool->label);
9450 rtx_insn *insn = curr_pool->first_insn;
9451 INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1);
9452 }
9453
9454 /* Insert base register reload insns at every far label. */
9455
9456 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9457 if (LABEL_P (insn)
9458 && bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn)))
9459 {
9460 struct constant_pool *pool = s390_find_pool (pool_list, insn);
9461 if (pool)
9462 {
9463 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
9464 pool->label);
9465 INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1);
9466 }
9467 }
9468
9469
9470 BITMAP_FREE (far_labels);
9471
9472
9473 /* Recompute insn addresses. */
9474
9475 init_insn_lengths ();
9476 shorten_branches (get_insns ());
9477
9478 return pool_list;
9479 }
9480
9481 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
9482 After we have decided to use this list, finish implementing
9483 all changes to the current function as required. */
9484
9485 static void
s390_chunkify_finish(struct constant_pool * pool_list)9486 s390_chunkify_finish (struct constant_pool *pool_list)
9487 {
9488 struct constant_pool *curr_pool = NULL;
9489 rtx_insn *insn;
9490
9491
9492 /* Replace all literal pool references. */
9493
9494 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9495 {
9496 if (INSN_P (insn))
9497 replace_ltrel_base (&PATTERN (insn));
9498
9499 curr_pool = s390_find_pool (pool_list, insn);
9500 if (!curr_pool)
9501 continue;
9502
9503 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9504 {
9505 rtx addr, pool_ref = NULL_RTX;
9506 find_constant_pool_ref (PATTERN (insn), &pool_ref);
9507 if (pool_ref)
9508 {
9509 if (s390_execute_label (insn))
9510 addr = s390_find_execute (curr_pool, insn);
9511 else
9512 addr = s390_find_constant (curr_pool,
9513 get_pool_constant (pool_ref),
9514 get_pool_mode (pool_ref));
9515
9516 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
9517 INSN_CODE (insn) = -1;
9518 }
9519 }
9520 }
9521
9522 /* Dump out all literal pools. */
9523
9524 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9525 s390_dump_pool (curr_pool, 0);
9526
9527 /* Free pool list. */
9528
9529 while (pool_list)
9530 {
9531 struct constant_pool *next = pool_list->next;
9532 s390_free_pool (pool_list);
9533 pool_list = next;
9534 }
9535 }
9536
9537 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
9538 We have decided we cannot use this list, so revert all changes
9539 to the current function that were done by s390_chunkify_start. */
9540
9541 static void
s390_chunkify_cancel(struct constant_pool * pool_list)9542 s390_chunkify_cancel (struct constant_pool *pool_list)
9543 {
9544 struct constant_pool *curr_pool = NULL;
9545 rtx_insn *insn;
9546
9547 /* Remove all pool placeholder insns. */
9548
9549 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9550 {
9551 /* Did we insert an extra barrier? Remove it. */
9552 rtx_insn *barrier = PREV_INSN (curr_pool->pool_insn);
9553 rtx_insn *jump = barrier? PREV_INSN (barrier) : NULL;
9554 rtx_insn *label = NEXT_INSN (curr_pool->pool_insn);
9555
9556 if (jump && JUMP_P (jump)
9557 && barrier && BARRIER_P (barrier)
9558 && label && LABEL_P (label)
9559 && GET_CODE (PATTERN (jump)) == SET
9560 && SET_DEST (PATTERN (jump)) == pc_rtx
9561 && GET_CODE (SET_SRC (PATTERN (jump))) == LABEL_REF
9562 && XEXP (SET_SRC (PATTERN (jump)), 0) == label)
9563 {
9564 remove_insn (jump);
9565 remove_insn (barrier);
9566 remove_insn (label);
9567 }
9568
9569 remove_insn (curr_pool->pool_insn);
9570 }
9571
9572 /* Remove all base register reload insns. */
9573
9574 for (insn = get_insns (); insn; )
9575 {
9576 rtx_insn *next_insn = NEXT_INSN (insn);
9577
9578 if (NONJUMP_INSN_P (insn)
9579 && GET_CODE (PATTERN (insn)) == SET
9580 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
9581 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_RELOAD_BASE)
9582 remove_insn (insn);
9583
9584 insn = next_insn;
9585 }
9586
9587 /* Free pool list. */
9588
9589 while (pool_list)
9590 {
9591 struct constant_pool *next = pool_list->next;
9592 s390_free_pool (pool_list);
9593 pool_list = next;
9594 }
9595 }
9596
9597 /* Output the constant pool entry EXP in mode MODE with alignment ALIGN. */
9598
9599 void
s390_output_pool_entry(rtx exp,machine_mode mode,unsigned int align)9600 s390_output_pool_entry (rtx exp, machine_mode mode, unsigned int align)
9601 {
9602 switch (GET_MODE_CLASS (mode))
9603 {
9604 case MODE_FLOAT:
9605 case MODE_DECIMAL_FLOAT:
9606 gcc_assert (GET_CODE (exp) == CONST_DOUBLE);
9607
9608 assemble_real (*CONST_DOUBLE_REAL_VALUE (exp),
9609 as_a <scalar_float_mode> (mode), align);
9610 break;
9611
9612 case MODE_INT:
9613 assemble_integer (exp, GET_MODE_SIZE (mode), align, 1);
9614 mark_symbol_refs_as_used (exp);
9615 break;
9616
9617 case MODE_VECTOR_INT:
9618 case MODE_VECTOR_FLOAT:
9619 {
9620 int i;
9621 machine_mode inner_mode;
9622 gcc_assert (GET_CODE (exp) == CONST_VECTOR);
9623
9624 inner_mode = GET_MODE_INNER (GET_MODE (exp));
9625 for (i = 0; i < XVECLEN (exp, 0); i++)
9626 s390_output_pool_entry (XVECEXP (exp, 0, i),
9627 inner_mode,
9628 i == 0
9629 ? align
9630 : GET_MODE_BITSIZE (inner_mode));
9631 }
9632 break;
9633
9634 default:
9635 gcc_unreachable ();
9636 }
9637 }
9638
9639
9640 /* Return an RTL expression representing the value of the return address
9641 for the frame COUNT steps up from the current frame. FRAME is the
9642 frame pointer of that frame. */
9643
9644 rtx
s390_return_addr_rtx(int count,rtx frame ATTRIBUTE_UNUSED)9645 s390_return_addr_rtx (int count, rtx frame ATTRIBUTE_UNUSED)
9646 {
9647 int offset;
9648 rtx addr;
9649
9650 /* Without backchain, we fail for all but the current frame. */
9651
9652 if (!TARGET_BACKCHAIN && count > 0)
9653 return NULL_RTX;
9654
9655 /* For the current frame, we need to make sure the initial
9656 value of RETURN_REGNUM is actually saved. */
9657
9658 if (count == 0)
9659 {
9660 /* On non-z architectures branch splitting could overwrite r14. */
9661 if (TARGET_CPU_ZARCH)
9662 return get_hard_reg_initial_val (Pmode, RETURN_REGNUM);
9663 else
9664 {
9665 cfun_frame_layout.save_return_addr_p = true;
9666 return gen_rtx_MEM (Pmode, return_address_pointer_rtx);
9667 }
9668 }
9669
9670 if (TARGET_PACKED_STACK)
9671 offset = -2 * UNITS_PER_LONG;
9672 else
9673 offset = RETURN_REGNUM * UNITS_PER_LONG;
9674
9675 addr = plus_constant (Pmode, frame, offset);
9676 addr = memory_address (Pmode, addr);
9677 return gen_rtx_MEM (Pmode, addr);
9678 }
9679
9680 /* Return an RTL expression representing the back chain stored in
9681 the current stack frame. */
9682
9683 rtx
s390_back_chain_rtx(void)9684 s390_back_chain_rtx (void)
9685 {
9686 rtx chain;
9687
9688 gcc_assert (TARGET_BACKCHAIN);
9689
9690 if (TARGET_PACKED_STACK)
9691 chain = plus_constant (Pmode, stack_pointer_rtx,
9692 STACK_POINTER_OFFSET - UNITS_PER_LONG);
9693 else
9694 chain = stack_pointer_rtx;
9695
9696 chain = gen_rtx_MEM (Pmode, chain);
9697 return chain;
9698 }
9699
9700 /* Find first call clobbered register unused in a function.
9701 This could be used as base register in a leaf function
9702 or for holding the return address before epilogue. */
9703
9704 static int
find_unused_clobbered_reg(void)9705 find_unused_clobbered_reg (void)
9706 {
9707 int i;
9708 for (i = 0; i < 6; i++)
9709 if (!df_regs_ever_live_p (i))
9710 return i;
9711 return 0;
9712 }
9713
9714
9715 /* Helper function for s390_regs_ever_clobbered. Sets the fields in DATA for all
9716 clobbered hard regs in SETREG. */
9717
9718 static void
s390_reg_clobbered_rtx(rtx setreg,const_rtx set_insn ATTRIBUTE_UNUSED,void * data)9719 s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data)
9720 {
9721 char *regs_ever_clobbered = (char *)data;
9722 unsigned int i, regno;
9723 machine_mode mode = GET_MODE (setreg);
9724
9725 if (GET_CODE (setreg) == SUBREG)
9726 {
9727 rtx inner = SUBREG_REG (setreg);
9728 if (!GENERAL_REG_P (inner) && !FP_REG_P (inner))
9729 return;
9730 regno = subreg_regno (setreg);
9731 }
9732 else if (GENERAL_REG_P (setreg) || FP_REG_P (setreg))
9733 regno = REGNO (setreg);
9734 else
9735 return;
9736
9737 for (i = regno;
9738 i < end_hard_regno (mode, regno);
9739 i++)
9740 regs_ever_clobbered[i] = 1;
9741 }
9742
9743 /* Walks through all basic blocks of the current function looking
9744 for clobbered hard regs using s390_reg_clobbered_rtx. The fields
9745 of the passed integer array REGS_EVER_CLOBBERED are set to one for
9746 each of those regs. */
9747
9748 static void
s390_regs_ever_clobbered(char regs_ever_clobbered[])9749 s390_regs_ever_clobbered (char regs_ever_clobbered[])
9750 {
9751 basic_block cur_bb;
9752 rtx_insn *cur_insn;
9753 unsigned int i;
9754
9755 memset (regs_ever_clobbered, 0, 32);
9756
9757 /* For non-leaf functions we have to consider all call clobbered regs to be
9758 clobbered. */
9759 if (!crtl->is_leaf)
9760 {
9761 for (i = 0; i < 32; i++)
9762 regs_ever_clobbered[i] = call_really_used_regs[i];
9763 }
9764
9765 /* Make the "magic" eh_return registers live if necessary. For regs_ever_live
9766 this work is done by liveness analysis (mark_regs_live_at_end).
9767 Special care is needed for functions containing landing pads. Landing pads
9768 may use the eh registers, but the code which sets these registers is not
9769 contained in that function. Hence s390_regs_ever_clobbered is not able to
9770 deal with this automatically. */
9771 if (crtl->calls_eh_return || cfun->machine->has_landing_pad_p)
9772 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM ; i++)
9773 if (crtl->calls_eh_return
9774 || (cfun->machine->has_landing_pad_p
9775 && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i))))
9776 regs_ever_clobbered[EH_RETURN_DATA_REGNO (i)] = 1;
9777
9778 /* For nonlocal gotos all call-saved registers have to be saved.
9779 This flag is also set for the unwinding code in libgcc.
9780 See expand_builtin_unwind_init. For regs_ever_live this is done by
9781 reload. */
9782 if (crtl->saves_all_registers)
9783 for (i = 0; i < 32; i++)
9784 if (!call_really_used_regs[i])
9785 regs_ever_clobbered[i] = 1;
9786
9787 FOR_EACH_BB_FN (cur_bb, cfun)
9788 {
9789 FOR_BB_INSNS (cur_bb, cur_insn)
9790 {
9791 rtx pat;
9792
9793 if (!INSN_P (cur_insn))
9794 continue;
9795
9796 pat = PATTERN (cur_insn);
9797
9798 /* Ignore GPR restore insns. */
9799 if (epilogue_completed && RTX_FRAME_RELATED_P (cur_insn))
9800 {
9801 if (GET_CODE (pat) == SET
9802 && GENERAL_REG_P (SET_DEST (pat)))
9803 {
9804 /* lgdr */
9805 if (GET_MODE (SET_SRC (pat)) == DImode
9806 && FP_REG_P (SET_SRC (pat)))
9807 continue;
9808
9809 /* l / lg */
9810 if (GET_CODE (SET_SRC (pat)) == MEM)
9811 continue;
9812 }
9813
9814 /* lm / lmg */
9815 if (GET_CODE (pat) == PARALLEL
9816 && load_multiple_operation (pat, VOIDmode))
9817 continue;
9818 }
9819
9820 note_stores (pat,
9821 s390_reg_clobbered_rtx,
9822 regs_ever_clobbered);
9823 }
9824 }
9825 }
9826
9827 /* Determine the frame area which actually has to be accessed
9828 in the function epilogue. The values are stored at the
9829 given pointers AREA_BOTTOM (address of the lowest used stack
9830 address) and AREA_TOP (address of the first item which does
9831 not belong to the stack frame). */
9832
9833 static void
s390_frame_area(int * area_bottom,int * area_top)9834 s390_frame_area (int *area_bottom, int *area_top)
9835 {
9836 int b, t;
9837
9838 b = INT_MAX;
9839 t = INT_MIN;
9840
9841 if (cfun_frame_layout.first_restore_gpr != -1)
9842 {
9843 b = (cfun_frame_layout.gprs_offset
9844 + cfun_frame_layout.first_restore_gpr * UNITS_PER_LONG);
9845 t = b + (cfun_frame_layout.last_restore_gpr
9846 - cfun_frame_layout.first_restore_gpr + 1) * UNITS_PER_LONG;
9847 }
9848
9849 if (TARGET_64BIT && cfun_save_high_fprs_p)
9850 {
9851 b = MIN (b, cfun_frame_layout.f8_offset);
9852 t = MAX (t, (cfun_frame_layout.f8_offset
9853 + cfun_frame_layout.high_fprs * 8));
9854 }
9855
9856 if (!TARGET_64BIT)
9857 {
9858 if (cfun_fpr_save_p (FPR4_REGNUM))
9859 {
9860 b = MIN (b, cfun_frame_layout.f4_offset);
9861 t = MAX (t, cfun_frame_layout.f4_offset + 8);
9862 }
9863 if (cfun_fpr_save_p (FPR6_REGNUM))
9864 {
9865 b = MIN (b, cfun_frame_layout.f4_offset + 8);
9866 t = MAX (t, cfun_frame_layout.f4_offset + 16);
9867 }
9868 }
9869 *area_bottom = b;
9870 *area_top = t;
9871 }
9872 /* Update gpr_save_slots in the frame layout trying to make use of
9873 FPRs as GPR save slots.
9874 This is a helper routine of s390_register_info. */
9875
9876 static void
s390_register_info_gprtofpr()9877 s390_register_info_gprtofpr ()
9878 {
9879 int save_reg_slot = FPR0_REGNUM;
9880 int i, j;
9881
9882 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
9883 return;
9884
9885 /* builtin_eh_return needs to be able to modify the return address
9886 on the stack. It could also adjust the FPR save slot instead but
9887 is it worth the trouble?! */
9888 if (crtl->calls_eh_return)
9889 return;
9890
9891 for (i = 15; i >= 6; i--)
9892 {
9893 if (cfun_gpr_save_slot (i) == SAVE_SLOT_NONE)
9894 continue;
9895
9896 /* Advance to the next FP register which can be used as a
9897 GPR save slot. */
9898 while ((!call_really_used_regs[save_reg_slot]
9899 || df_regs_ever_live_p (save_reg_slot)
9900 || cfun_fpr_save_p (save_reg_slot))
9901 && FP_REGNO_P (save_reg_slot))
9902 save_reg_slot++;
9903 if (!FP_REGNO_P (save_reg_slot))
9904 {
9905 /* We only want to use ldgr/lgdr if we can get rid of
9906 stm/lm entirely. So undo the gpr slot allocation in
9907 case we ran out of FPR save slots. */
9908 for (j = 6; j <= 15; j++)
9909 if (FP_REGNO_P (cfun_gpr_save_slot (j)))
9910 cfun_gpr_save_slot (j) = SAVE_SLOT_STACK;
9911 break;
9912 }
9913 cfun_gpr_save_slot (i) = save_reg_slot++;
9914 }
9915 }
9916
9917 /* Set the bits in fpr_bitmap for FPRs which need to be saved due to
9918 stdarg.
9919 This is a helper routine for s390_register_info. */
9920
9921 static void
s390_register_info_stdarg_fpr()9922 s390_register_info_stdarg_fpr ()
9923 {
9924 int i;
9925 int min_fpr;
9926 int max_fpr;
9927
9928 /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and
9929 f0-f4 for 64 bit. */
9930 if (!cfun->stdarg
9931 || !TARGET_HARD_FLOAT
9932 || !cfun->va_list_fpr_size
9933 || crtl->args.info.fprs >= FP_ARG_NUM_REG)
9934 return;
9935
9936 min_fpr = crtl->args.info.fprs;
9937 max_fpr = min_fpr + cfun->va_list_fpr_size - 1;
9938 if (max_fpr >= FP_ARG_NUM_REG)
9939 max_fpr = FP_ARG_NUM_REG - 1;
9940
9941 /* FPR argument regs start at f0. */
9942 min_fpr += FPR0_REGNUM;
9943 max_fpr += FPR0_REGNUM;
9944
9945 for (i = min_fpr; i <= max_fpr; i++)
9946 cfun_set_fpr_save (i);
9947 }
9948
9949 /* Reserve the GPR save slots for GPRs which need to be saved due to
9950 stdarg.
9951 This is a helper routine for s390_register_info. */
9952
9953 static void
s390_register_info_stdarg_gpr()9954 s390_register_info_stdarg_gpr ()
9955 {
9956 int i;
9957 int min_gpr;
9958 int max_gpr;
9959
9960 if (!cfun->stdarg
9961 || !cfun->va_list_gpr_size
9962 || crtl->args.info.gprs >= GP_ARG_NUM_REG)
9963 return;
9964
9965 min_gpr = crtl->args.info.gprs;
9966 max_gpr = min_gpr + cfun->va_list_gpr_size - 1;
9967 if (max_gpr >= GP_ARG_NUM_REG)
9968 max_gpr = GP_ARG_NUM_REG - 1;
9969
9970 /* GPR argument regs start at r2. */
9971 min_gpr += GPR2_REGNUM;
9972 max_gpr += GPR2_REGNUM;
9973
9974 /* If r6 was supposed to be saved into an FPR and now needs to go to
9975 the stack for vararg we have to adjust the restore range to make
9976 sure that the restore is done from stack as well. */
9977 if (FP_REGNO_P (cfun_gpr_save_slot (GPR6_REGNUM))
9978 && min_gpr <= GPR6_REGNUM
9979 && max_gpr >= GPR6_REGNUM)
9980 {
9981 if (cfun_frame_layout.first_restore_gpr == -1
9982 || cfun_frame_layout.first_restore_gpr > GPR6_REGNUM)
9983 cfun_frame_layout.first_restore_gpr = GPR6_REGNUM;
9984 if (cfun_frame_layout.last_restore_gpr == -1
9985 || cfun_frame_layout.last_restore_gpr < GPR6_REGNUM)
9986 cfun_frame_layout.last_restore_gpr = GPR6_REGNUM;
9987 }
9988
9989 if (cfun_frame_layout.first_save_gpr == -1
9990 || cfun_frame_layout.first_save_gpr > min_gpr)
9991 cfun_frame_layout.first_save_gpr = min_gpr;
9992
9993 if (cfun_frame_layout.last_save_gpr == -1
9994 || cfun_frame_layout.last_save_gpr < max_gpr)
9995 cfun_frame_layout.last_save_gpr = max_gpr;
9996
9997 for (i = min_gpr; i <= max_gpr; i++)
9998 cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
9999 }
10000
10001 /* Calculate the save and restore ranges for stm(g) and lm(g) in the
10002 prologue and epilogue. */
10003
10004 static void
s390_register_info_set_ranges()10005 s390_register_info_set_ranges ()
10006 {
10007 int i, j;
10008
10009 /* Find the first and the last save slot supposed to use the stack
10010 to set the restore range.
10011 Vararg regs might be marked as save to stack but only the
10012 call-saved regs really need restoring (i.e. r6). This code
10013 assumes that the vararg regs have not yet been recorded in
10014 cfun_gpr_save_slot. */
10015 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != SAVE_SLOT_STACK; i++);
10016 for (j = 15; j > i && cfun_gpr_save_slot (j) != SAVE_SLOT_STACK; j--);
10017 cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
10018 cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
10019 cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
10020 cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
10021 }
10022
10023 /* The GPR and FPR save slots in cfun->machine->frame_layout are set
10024 for registers which need to be saved in function prologue.
10025 This function can be used until the insns emitted for save/restore
10026 of the regs are visible in the RTL stream. */
10027
10028 static void
s390_register_info()10029 s390_register_info ()
10030 {
10031 int i;
10032 char clobbered_regs[32];
10033
10034 gcc_assert (!epilogue_completed);
10035
10036 if (reload_completed)
10037 /* After reload we rely on our own routine to determine which
10038 registers need saving. */
10039 s390_regs_ever_clobbered (clobbered_regs);
10040 else
10041 /* During reload we use regs_ever_live as a base since reload
10042 does changes in there which we otherwise would not be aware
10043 of. */
10044 for (i = 0; i < 32; i++)
10045 clobbered_regs[i] = df_regs_ever_live_p (i);
10046
10047 for (i = 0; i < 32; i++)
10048 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
10049
10050 /* Mark the call-saved FPRs which need to be saved.
10051 This needs to be done before checking the special GPRs since the
10052 stack pointer usage depends on whether high FPRs have to be saved
10053 or not. */
10054 cfun_frame_layout.fpr_bitmap = 0;
10055 cfun_frame_layout.high_fprs = 0;
10056 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
10057 if (clobbered_regs[i] && !call_really_used_regs[i])
10058 {
10059 cfun_set_fpr_save (i);
10060 if (i >= FPR8_REGNUM)
10061 cfun_frame_layout.high_fprs++;
10062 }
10063
10064 /* Register 12 is used for GOT address, but also as temp in prologue
10065 for split-stack stdarg functions (unless r14 is available). */
10066 clobbered_regs[12]
10067 |= ((flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
10068 || (flag_split_stack && cfun->stdarg
10069 && (crtl->is_leaf || TARGET_TPF_PROFILING
10070 || has_hard_reg_initial_val (Pmode, RETURN_REGNUM))));
10071
10072 clobbered_regs[BASE_REGNUM]
10073 |= (cfun->machine->base_reg
10074 && REGNO (cfun->machine->base_reg) == BASE_REGNUM);
10075
10076 clobbered_regs[HARD_FRAME_POINTER_REGNUM]
10077 |= !!frame_pointer_needed;
10078
10079 /* On pre z900 machines this might take until machine dependent
10080 reorg to decide.
10081 save_return_addr_p will only be set on non-zarch machines so
10082 there is no risk that r14 goes into an FPR instead of a stack
10083 slot. */
10084 clobbered_regs[RETURN_REGNUM]
10085 |= (!crtl->is_leaf
10086 || TARGET_TPF_PROFILING
10087 || cfun->machine->split_branches_pending_p
10088 || cfun_frame_layout.save_return_addr_p
10089 || crtl->calls_eh_return);
10090
10091 clobbered_regs[STACK_POINTER_REGNUM]
10092 |= (!crtl->is_leaf
10093 || TARGET_TPF_PROFILING
10094 || cfun_save_high_fprs_p
10095 || get_frame_size () > 0
10096 || (reload_completed && cfun_frame_layout.frame_size > 0)
10097 || cfun->calls_alloca);
10098
10099 memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 16);
10100
10101 for (i = 6; i < 16; i++)
10102 if (clobbered_regs[i])
10103 cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
10104
10105 s390_register_info_stdarg_fpr ();
10106 s390_register_info_gprtofpr ();
10107 s390_register_info_set_ranges ();
10108 /* stdarg functions might need to save GPRs 2 to 6. This might
10109 override the GPR->FPR save decision made by
10110 s390_register_info_gprtofpr for r6 since vararg regs must go to
10111 the stack. */
10112 s390_register_info_stdarg_gpr ();
10113 }
10114
10115 /* Return true if REGNO is a global register, but not one
10116 of the special ones that need to be saved/restored in anyway. */
10117
10118 static inline bool
global_not_special_regno_p(int regno)10119 global_not_special_regno_p (int regno)
10120 {
10121 return (global_regs[regno]
10122 /* These registers are special and need to be
10123 restored in any case. */
10124 && !(regno == STACK_POINTER_REGNUM
10125 || regno == RETURN_REGNUM
10126 || regno == BASE_REGNUM
10127 || (flag_pic && regno == (int)PIC_OFFSET_TABLE_REGNUM)));
10128 }
10129
10130 /* This function is called by s390_optimize_prologue in order to get
10131 rid of unnecessary GPR save/restore instructions. The register info
10132 for the GPRs is re-computed and the ranges are re-calculated. */
10133
10134 static void
s390_optimize_register_info()10135 s390_optimize_register_info ()
10136 {
10137 char clobbered_regs[32];
10138 int i;
10139
10140 gcc_assert (epilogue_completed);
10141 gcc_assert (!cfun->machine->split_branches_pending_p);
10142
10143 s390_regs_ever_clobbered (clobbered_regs);
10144
10145 /* Global registers do not need to be saved and restored unless it
10146 is one of our special regs. (r12, r13, r14, or r15). */
10147 for (i = 0; i < 32; i++)
10148 clobbered_regs[i] = clobbered_regs[i] && !global_not_special_regno_p (i);
10149
10150 /* There is still special treatment needed for cases invisible to
10151 s390_regs_ever_clobbered. */
10152 clobbered_regs[RETURN_REGNUM]
10153 |= (TARGET_TPF_PROFILING
10154 /* When expanding builtin_return_addr in ESA mode we do not
10155 know whether r14 will later be needed as scratch reg when
10156 doing branch splitting. So the builtin always accesses the
10157 r14 save slot and we need to stick to the save/restore
10158 decision for r14 even if it turns out that it didn't get
10159 clobbered. */
10160 || cfun_frame_layout.save_return_addr_p
10161 || crtl->calls_eh_return);
10162
10163 memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 6);
10164
10165 for (i = 6; i < 16; i++)
10166 if (!clobbered_regs[i])
10167 cfun_gpr_save_slot (i) = SAVE_SLOT_NONE;
10168
10169 s390_register_info_set_ranges ();
10170 s390_register_info_stdarg_gpr ();
10171 }
10172
10173 /* Fill cfun->machine with info about frame of current function. */
10174
10175 static void
s390_frame_info(void)10176 s390_frame_info (void)
10177 {
10178 HOST_WIDE_INT lowest_offset;
10179
10180 cfun_frame_layout.first_save_gpr_slot = cfun_frame_layout.first_save_gpr;
10181 cfun_frame_layout.last_save_gpr_slot = cfun_frame_layout.last_save_gpr;
10182
10183 /* The va_arg builtin uses a constant distance of 16 *
10184 UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area
10185 pointer. So even if we are going to save the stack pointer in an
10186 FPR we need the stack space in order to keep the offsets
10187 correct. */
10188 if (cfun->stdarg && cfun_save_arg_fprs_p)
10189 {
10190 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
10191
10192 if (cfun_frame_layout.first_save_gpr_slot == -1)
10193 cfun_frame_layout.first_save_gpr_slot = STACK_POINTER_REGNUM;
10194 }
10195
10196 cfun_frame_layout.frame_size = get_frame_size ();
10197 if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000)
10198 fatal_error (input_location,
10199 "total size of local variables exceeds architecture limit");
10200
10201 if (!TARGET_PACKED_STACK)
10202 {
10203 /* Fixed stack layout. */
10204 cfun_frame_layout.backchain_offset = 0;
10205 cfun_frame_layout.f0_offset = 16 * UNITS_PER_LONG;
10206 cfun_frame_layout.f4_offset = cfun_frame_layout.f0_offset + 2 * 8;
10207 cfun_frame_layout.f8_offset = -cfun_frame_layout.high_fprs * 8;
10208 cfun_frame_layout.gprs_offset = (cfun_frame_layout.first_save_gpr_slot
10209 * UNITS_PER_LONG);
10210 }
10211 else if (TARGET_BACKCHAIN)
10212 {
10213 /* Kernel stack layout - packed stack, backchain, no float */
10214 gcc_assert (TARGET_SOFT_FLOAT);
10215 cfun_frame_layout.backchain_offset = (STACK_POINTER_OFFSET
10216 - UNITS_PER_LONG);
10217
10218 /* The distance between the backchain and the return address
10219 save slot must not change. So we always need a slot for the
10220 stack pointer which resides in between. */
10221 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
10222
10223 cfun_frame_layout.gprs_offset
10224 = cfun_frame_layout.backchain_offset - cfun_gprs_save_area_size;
10225
10226 /* FPRs will not be saved. Nevertheless pick sane values to
10227 keep area calculations valid. */
10228 cfun_frame_layout.f0_offset =
10229 cfun_frame_layout.f4_offset =
10230 cfun_frame_layout.f8_offset = cfun_frame_layout.gprs_offset;
10231 }
10232 else
10233 {
10234 int num_fprs;
10235
10236 /* Packed stack layout without backchain. */
10237
10238 /* With stdarg FPRs need their dedicated slots. */
10239 num_fprs = (TARGET_64BIT && cfun->stdarg ? 2
10240 : (cfun_fpr_save_p (FPR4_REGNUM) +
10241 cfun_fpr_save_p (FPR6_REGNUM)));
10242 cfun_frame_layout.f4_offset = STACK_POINTER_OFFSET - 8 * num_fprs;
10243
10244 num_fprs = (cfun->stdarg ? 2
10245 : (cfun_fpr_save_p (FPR0_REGNUM)
10246 + cfun_fpr_save_p (FPR2_REGNUM)));
10247 cfun_frame_layout.f0_offset = cfun_frame_layout.f4_offset - 8 * num_fprs;
10248
10249 cfun_frame_layout.gprs_offset
10250 = cfun_frame_layout.f0_offset - cfun_gprs_save_area_size;
10251
10252 cfun_frame_layout.f8_offset = (cfun_frame_layout.gprs_offset
10253 - cfun_frame_layout.high_fprs * 8);
10254 }
10255
10256 if (cfun_save_high_fprs_p)
10257 cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8;
10258
10259 if (!crtl->is_leaf)
10260 cfun_frame_layout.frame_size += crtl->outgoing_args_size;
10261
10262 /* In the following cases we have to allocate a STACK_POINTER_OFFSET
10263 sized area at the bottom of the stack. This is required also for
10264 leaf functions. When GCC generates a local stack reference it
10265 will always add STACK_POINTER_OFFSET to all these references. */
10266 if (crtl->is_leaf
10267 && !TARGET_TPF_PROFILING
10268 && cfun_frame_layout.frame_size == 0
10269 && !cfun->calls_alloca)
10270 return;
10271
10272 /* Calculate the number of bytes we have used in our own register
10273 save area. With the packed stack layout we can re-use the
10274 remaining bytes for normal stack elements. */
10275
10276 if (TARGET_PACKED_STACK)
10277 lowest_offset = MIN (MIN (cfun_frame_layout.f0_offset,
10278 cfun_frame_layout.f4_offset),
10279 cfun_frame_layout.gprs_offset);
10280 else
10281 lowest_offset = 0;
10282
10283 if (TARGET_BACKCHAIN)
10284 lowest_offset = MIN (lowest_offset, cfun_frame_layout.backchain_offset);
10285
10286 cfun_frame_layout.frame_size += STACK_POINTER_OFFSET - lowest_offset;
10287
10288 /* If under 31 bit an odd number of gprs has to be saved we have to
10289 adjust the frame size to sustain 8 byte alignment of stack
10290 frames. */
10291 cfun_frame_layout.frame_size = ((cfun_frame_layout.frame_size +
10292 STACK_BOUNDARY / BITS_PER_UNIT - 1)
10293 & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1));
10294 }
10295
10296 /* Generate frame layout. Fills in register and frame data for the current
10297 function in cfun->machine. This routine can be called multiple times;
10298 it will re-do the complete frame layout every time. */
10299
10300 static void
s390_init_frame_layout(void)10301 s390_init_frame_layout (void)
10302 {
10303 HOST_WIDE_INT frame_size;
10304 int base_used;
10305
10306 /* After LRA the frame layout is supposed to be read-only and should
10307 not be re-computed. */
10308 if (reload_completed)
10309 return;
10310
10311 /* On S/390 machines, we may need to perform branch splitting, which
10312 will require both base and return address register. We have no
10313 choice but to assume we're going to need them until right at the
10314 end of the machine dependent reorg phase. */
10315 if (!TARGET_CPU_ZARCH)
10316 cfun->machine->split_branches_pending_p = true;
10317
10318 do
10319 {
10320 frame_size = cfun_frame_layout.frame_size;
10321
10322 /* Try to predict whether we'll need the base register. */
10323 base_used = cfun->machine->split_branches_pending_p
10324 || crtl->uses_const_pool
10325 || (!DISP_IN_RANGE (frame_size)
10326 && !CONST_OK_FOR_K (frame_size));
10327
10328 /* Decide which register to use as literal pool base. In small
10329 leaf functions, try to use an unused call-clobbered register
10330 as base register to avoid save/restore overhead. */
10331 if (!base_used)
10332 cfun->machine->base_reg = NULL_RTX;
10333 else
10334 {
10335 int br = 0;
10336
10337 if (crtl->is_leaf)
10338 /* Prefer r5 (most likely to be free). */
10339 for (br = 5; br >= 2 && df_regs_ever_live_p (br); br--)
10340 ;
10341 cfun->machine->base_reg =
10342 gen_rtx_REG (Pmode, (br >= 2) ? br : BASE_REGNUM);
10343 }
10344
10345 s390_register_info ();
10346 s390_frame_info ();
10347 }
10348 while (frame_size != cfun_frame_layout.frame_size);
10349 }
10350
10351 /* Remove the FPR clobbers from a tbegin insn if it can be proven that
10352 the TX is nonescaping. A transaction is considered escaping if
10353 there is at least one path from tbegin returning CC0 to the
10354 function exit block without an tend.
10355
10356 The check so far has some limitations:
10357 - only single tbegin/tend BBs are supported
10358 - the first cond jump after tbegin must separate the CC0 path from ~CC0
10359 - when CC is copied to a GPR and the CC0 check is done with the GPR
10360 this is not supported
10361 */
10362
10363 static void
s390_optimize_nonescaping_tx(void)10364 s390_optimize_nonescaping_tx (void)
10365 {
10366 const unsigned int CC0 = 1 << 3;
10367 basic_block tbegin_bb = NULL;
10368 basic_block tend_bb = NULL;
10369 basic_block bb;
10370 rtx_insn *insn;
10371 bool result = true;
10372 int bb_index;
10373 rtx_insn *tbegin_insn = NULL;
10374
10375 if (!cfun->machine->tbegin_p)
10376 return;
10377
10378 for (bb_index = 0; bb_index < n_basic_blocks_for_fn (cfun); bb_index++)
10379 {
10380 bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
10381
10382 if (!bb)
10383 continue;
10384
10385 FOR_BB_INSNS (bb, insn)
10386 {
10387 rtx ite, cc, pat, target;
10388 unsigned HOST_WIDE_INT mask;
10389
10390 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
10391 continue;
10392
10393 pat = PATTERN (insn);
10394
10395 if (GET_CODE (pat) == PARALLEL)
10396 pat = XVECEXP (pat, 0, 0);
10397
10398 if (GET_CODE (pat) != SET
10399 || GET_CODE (SET_SRC (pat)) != UNSPEC_VOLATILE)
10400 continue;
10401
10402 if (XINT (SET_SRC (pat), 1) == UNSPECV_TBEGIN)
10403 {
10404 rtx_insn *tmp;
10405
10406 tbegin_insn = insn;
10407
10408 /* Just return if the tbegin doesn't have clobbers. */
10409 if (GET_CODE (PATTERN (insn)) != PARALLEL)
10410 return;
10411
10412 if (tbegin_bb != NULL)
10413 return;
10414
10415 /* Find the next conditional jump. */
10416 for (tmp = NEXT_INSN (insn);
10417 tmp != NULL_RTX;
10418 tmp = NEXT_INSN (tmp))
10419 {
10420 if (reg_set_p (gen_rtx_REG (CCmode, CC_REGNUM), tmp))
10421 return;
10422 if (!JUMP_P (tmp))
10423 continue;
10424
10425 ite = SET_SRC (PATTERN (tmp));
10426 if (GET_CODE (ite) != IF_THEN_ELSE)
10427 continue;
10428
10429 cc = XEXP (XEXP (ite, 0), 0);
10430 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc))
10431 || GET_MODE (cc) != CCRAWmode
10432 || GET_CODE (XEXP (XEXP (ite, 0), 1)) != CONST_INT)
10433 return;
10434
10435 if (bb->succs->length () != 2)
10436 return;
10437
10438 mask = INTVAL (XEXP (XEXP (ite, 0), 1));
10439 if (GET_CODE (XEXP (ite, 0)) == NE)
10440 mask ^= 0xf;
10441
10442 if (mask == CC0)
10443 target = XEXP (ite, 1);
10444 else if (mask == (CC0 ^ 0xf))
10445 target = XEXP (ite, 2);
10446 else
10447 return;
10448
10449 {
10450 edge_iterator ei;
10451 edge e1, e2;
10452
10453 ei = ei_start (bb->succs);
10454 e1 = ei_safe_edge (ei);
10455 ei_next (&ei);
10456 e2 = ei_safe_edge (ei);
10457
10458 if (e2->flags & EDGE_FALLTHRU)
10459 {
10460 e2 = e1;
10461 e1 = ei_safe_edge (ei);
10462 }
10463
10464 if (!(e1->flags & EDGE_FALLTHRU))
10465 return;
10466
10467 tbegin_bb = (target == pc_rtx) ? e1->dest : e2->dest;
10468 }
10469 if (tmp == BB_END (bb))
10470 break;
10471 }
10472 }
10473
10474 if (XINT (SET_SRC (pat), 1) == UNSPECV_TEND)
10475 {
10476 if (tend_bb != NULL)
10477 return;
10478 tend_bb = bb;
10479 }
10480 }
10481 }
10482
10483 /* Either we successfully remove the FPR clobbers here or we are not
10484 able to do anything for this TX. Both cases don't qualify for
10485 another look. */
10486 cfun->machine->tbegin_p = false;
10487
10488 if (tbegin_bb == NULL || tend_bb == NULL)
10489 return;
10490
10491 calculate_dominance_info (CDI_POST_DOMINATORS);
10492 result = dominated_by_p (CDI_POST_DOMINATORS, tbegin_bb, tend_bb);
10493 free_dominance_info (CDI_POST_DOMINATORS);
10494
10495 if (!result)
10496 return;
10497
10498 PATTERN (tbegin_insn) = gen_rtx_PARALLEL (VOIDmode,
10499 gen_rtvec (2,
10500 XVECEXP (PATTERN (tbegin_insn), 0, 0),
10501 XVECEXP (PATTERN (tbegin_insn), 0, 1)));
10502 INSN_CODE (tbegin_insn) = -1;
10503 df_insn_rescan (tbegin_insn);
10504
10505 return;
10506 }
10507
10508 /* Implement TARGET_HARD_REGNO_NREGS. Because all registers in a class
10509 have the same size, this is equivalent to CLASS_MAX_NREGS. */
10510
10511 static unsigned int
s390_hard_regno_nregs(unsigned int regno,machine_mode mode)10512 s390_hard_regno_nregs (unsigned int regno, machine_mode mode)
10513 {
10514 return s390_class_max_nregs (REGNO_REG_CLASS (regno), mode);
10515 }
10516
10517 /* Implement TARGET_HARD_REGNO_MODE_OK.
10518
10519 Integer modes <= word size fit into any GPR.
10520 Integer modes > word size fit into successive GPRs, starting with
10521 an even-numbered register.
10522 SImode and DImode fit into FPRs as well.
10523
10524 Floating point modes <= word size fit into any FPR or GPR.
10525 Floating point modes > word size (i.e. DFmode on 32-bit) fit
10526 into any FPR, or an even-odd GPR pair.
10527 TFmode fits only into an even-odd FPR pair.
10528
10529 Complex floating point modes fit either into two FPRs, or into
10530 successive GPRs (again starting with an even number).
10531 TCmode fits only into two successive even-odd FPR pairs.
10532
10533 Condition code modes fit only into the CC register. */
10534
10535 static bool
s390_hard_regno_mode_ok(unsigned int regno,machine_mode mode)10536 s390_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
10537 {
10538 if (!TARGET_VX && VECTOR_NOFP_REGNO_P (regno))
10539 return false;
10540
10541 switch (REGNO_REG_CLASS (regno))
10542 {
10543 case VEC_REGS:
10544 return ((GET_MODE_CLASS (mode) == MODE_INT
10545 && s390_class_max_nregs (VEC_REGS, mode) == 1)
10546 || mode == DFmode
10547 || (TARGET_VXE && mode == SFmode)
10548 || s390_vector_mode_supported_p (mode));
10549 break;
10550 case FP_REGS:
10551 if (TARGET_VX
10552 && ((GET_MODE_CLASS (mode) == MODE_INT
10553 && s390_class_max_nregs (FP_REGS, mode) == 1)
10554 || mode == DFmode
10555 || s390_vector_mode_supported_p (mode)))
10556 return true;
10557
10558 if (REGNO_PAIR_OK (regno, mode))
10559 {
10560 if (mode == SImode || mode == DImode)
10561 return true;
10562
10563 if (FLOAT_MODE_P (mode) && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
10564 return true;
10565 }
10566 break;
10567 case ADDR_REGS:
10568 if (FRAME_REGNO_P (regno) && mode == Pmode)
10569 return true;
10570
10571 /* fallthrough */
10572 case GENERAL_REGS:
10573 if (REGNO_PAIR_OK (regno, mode))
10574 {
10575 if (TARGET_ZARCH
10576 || (mode != TFmode && mode != TCmode && mode != TDmode))
10577 return true;
10578 }
10579 break;
10580 case CC_REGS:
10581 if (GET_MODE_CLASS (mode) == MODE_CC)
10582 return true;
10583 break;
10584 case ACCESS_REGS:
10585 if (REGNO_PAIR_OK (regno, mode))
10586 {
10587 if (mode == SImode || mode == Pmode)
10588 return true;
10589 }
10590 break;
10591 default:
10592 return false;
10593 }
10594
10595 return false;
10596 }
10597
10598 /* Implement TARGET_MODES_TIEABLE_P. */
10599
10600 static bool
s390_modes_tieable_p(machine_mode mode1,machine_mode mode2)10601 s390_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10602 {
10603 return ((mode1 == SFmode || mode1 == DFmode)
10604 == (mode2 == SFmode || mode2 == DFmode));
10605 }
10606
10607 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
10608
10609 bool
s390_hard_regno_rename_ok(unsigned int old_reg,unsigned int new_reg)10610 s390_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg)
10611 {
10612 /* Once we've decided upon a register to use as base register, it must
10613 no longer be used for any other purpose. */
10614 if (cfun->machine->base_reg)
10615 if (REGNO (cfun->machine->base_reg) == old_reg
10616 || REGNO (cfun->machine->base_reg) == new_reg)
10617 return false;
10618
10619 /* Prevent regrename from using call-saved regs which haven't
10620 actually been saved. This is necessary since regrename assumes
10621 the backend save/restore decisions are based on
10622 df_regs_ever_live. Since we have our own routine we have to tell
10623 regrename manually about it. */
10624 if (GENERAL_REGNO_P (new_reg)
10625 && !call_really_used_regs[new_reg]
10626 && cfun_gpr_save_slot (new_reg) == SAVE_SLOT_NONE)
10627 return false;
10628
10629 return true;
10630 }
10631
10632 /* Return nonzero if register REGNO can be used as a scratch register
10633 in peephole2. */
10634
10635 static bool
s390_hard_regno_scratch_ok(unsigned int regno)10636 s390_hard_regno_scratch_ok (unsigned int regno)
10637 {
10638 /* See s390_hard_regno_rename_ok. */
10639 if (GENERAL_REGNO_P (regno)
10640 && !call_really_used_regs[regno]
10641 && cfun_gpr_save_slot (regno) == SAVE_SLOT_NONE)
10642 return false;
10643
10644 return true;
10645 }
10646
10647 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. When generating
10648 code that runs in z/Architecture mode, but conforms to the 31-bit
10649 ABI, GPRs can hold 8 bytes; the ABI guarantees only that the lower 4
10650 bytes are saved across calls, however. */
10651
10652 static bool
s390_hard_regno_call_part_clobbered(unsigned int regno,machine_mode mode)10653 s390_hard_regno_call_part_clobbered (unsigned int regno, machine_mode mode)
10654 {
10655 if (!TARGET_64BIT
10656 && TARGET_ZARCH
10657 && GET_MODE_SIZE (mode) > 4
10658 && ((regno >= 6 && regno <= 15) || regno == 32))
10659 return true;
10660
10661 if (TARGET_VX
10662 && GET_MODE_SIZE (mode) > 8
10663 && (((TARGET_64BIT && regno >= 24 && regno <= 31))
10664 || (!TARGET_64BIT && (regno == 18 || regno == 19))))
10665 return true;
10666
10667 return false;
10668 }
10669
10670 /* Maximum number of registers to represent a value of mode MODE
10671 in a register of class RCLASS. */
10672
10673 int
s390_class_max_nregs(enum reg_class rclass,machine_mode mode)10674 s390_class_max_nregs (enum reg_class rclass, machine_mode mode)
10675 {
10676 int reg_size;
10677 bool reg_pair_required_p = false;
10678
10679 switch (rclass)
10680 {
10681 case FP_REGS:
10682 case VEC_REGS:
10683 reg_size = TARGET_VX ? 16 : 8;
10684
10685 /* TF and TD modes would fit into a VR but we put them into a
10686 register pair since we do not have 128bit FP instructions on
10687 full VRs. */
10688 if (TARGET_VX
10689 && SCALAR_FLOAT_MODE_P (mode)
10690 && GET_MODE_SIZE (mode) >= 16)
10691 reg_pair_required_p = true;
10692
10693 /* Even if complex types would fit into a single FPR/VR we force
10694 them into a register pair to deal with the parts more easily.
10695 (FIXME: What about complex ints?) */
10696 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
10697 reg_pair_required_p = true;
10698 break;
10699 case ACCESS_REGS:
10700 reg_size = 4;
10701 break;
10702 default:
10703 reg_size = UNITS_PER_WORD;
10704 break;
10705 }
10706
10707 if (reg_pair_required_p)
10708 return 2 * ((GET_MODE_SIZE (mode) / 2 + reg_size - 1) / reg_size);
10709
10710 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
10711 }
10712
10713 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
10714
10715 static bool
s390_can_change_mode_class(machine_mode from_mode,machine_mode to_mode,reg_class_t rclass)10716 s390_can_change_mode_class (machine_mode from_mode,
10717 machine_mode to_mode,
10718 reg_class_t rclass)
10719 {
10720 machine_mode small_mode;
10721 machine_mode big_mode;
10722
10723 /* V1TF and TF have different representations in vector
10724 registers. */
10725 if (reg_classes_intersect_p (VEC_REGS, rclass)
10726 && ((from_mode == V1TFmode && to_mode == TFmode)
10727 || (from_mode == TFmode && to_mode == V1TFmode)))
10728 return false;
10729
10730 if (GET_MODE_SIZE (from_mode) == GET_MODE_SIZE (to_mode))
10731 return true;
10732
10733 if (GET_MODE_SIZE (from_mode) < GET_MODE_SIZE (to_mode))
10734 {
10735 small_mode = from_mode;
10736 big_mode = to_mode;
10737 }
10738 else
10739 {
10740 small_mode = to_mode;
10741 big_mode = from_mode;
10742 }
10743
10744 /* Values residing in VRs are little-endian style. All modes are
10745 placed left-aligned in an VR. This means that we cannot allow
10746 switching between modes with differing sizes. Also if the vector
10747 facility is available we still place TFmode values in VR register
10748 pairs, since the only instructions we have operating on TFmodes
10749 only deal with register pairs. Therefore we have to allow DFmode
10750 subregs of TFmodes to enable the TFmode splitters. */
10751 if (reg_classes_intersect_p (VEC_REGS, rclass)
10752 && (GET_MODE_SIZE (small_mode) < 8
10753 || s390_class_max_nregs (VEC_REGS, big_mode) == 1))
10754 return false;
10755
10756 /* Likewise for access registers, since they have only half the
10757 word size on 64-bit. */
10758 if (reg_classes_intersect_p (ACCESS_REGS, rclass))
10759 return false;
10760
10761 return true;
10762 }
10763
10764 /* Return true if we use LRA instead of reload pass. */
10765 static bool
s390_lra_p(void)10766 s390_lra_p (void)
10767 {
10768 return s390_lra_flag;
10769 }
10770
10771 /* Return true if register FROM can be eliminated via register TO. */
10772
10773 static bool
s390_can_eliminate(const int from,const int to)10774 s390_can_eliminate (const int from, const int to)
10775 {
10776 /* On zSeries machines, we have not marked the base register as fixed.
10777 Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
10778 If a function requires the base register, we say here that this
10779 elimination cannot be performed. This will cause reload to free
10780 up the base register (as if it were fixed). On the other hand,
10781 if the current function does *not* require the base register, we
10782 say here the elimination succeeds, which in turn allows reload
10783 to allocate the base register for any other purpose. */
10784 if (from == BASE_REGNUM && to == BASE_REGNUM)
10785 {
10786 if (TARGET_CPU_ZARCH)
10787 {
10788 s390_init_frame_layout ();
10789 return cfun->machine->base_reg == NULL_RTX;
10790 }
10791
10792 return false;
10793 }
10794
10795 /* Everything else must point into the stack frame. */
10796 gcc_assert (to == STACK_POINTER_REGNUM
10797 || to == HARD_FRAME_POINTER_REGNUM);
10798
10799 gcc_assert (from == FRAME_POINTER_REGNUM
10800 || from == ARG_POINTER_REGNUM
10801 || from == RETURN_ADDRESS_POINTER_REGNUM);
10802
10803 /* Make sure we actually saved the return address. */
10804 if (from == RETURN_ADDRESS_POINTER_REGNUM)
10805 if (!crtl->calls_eh_return
10806 && !cfun->stdarg
10807 && !cfun_frame_layout.save_return_addr_p)
10808 return false;
10809
10810 return true;
10811 }
10812
10813 /* Return offset between register FROM and TO initially after prolog. */
10814
10815 HOST_WIDE_INT
s390_initial_elimination_offset(int from,int to)10816 s390_initial_elimination_offset (int from, int to)
10817 {
10818 HOST_WIDE_INT offset;
10819
10820 /* ??? Why are we called for non-eliminable pairs? */
10821 if (!s390_can_eliminate (from, to))
10822 return 0;
10823
10824 switch (from)
10825 {
10826 case FRAME_POINTER_REGNUM:
10827 offset = (get_frame_size()
10828 + STACK_POINTER_OFFSET
10829 + crtl->outgoing_args_size);
10830 break;
10831
10832 case ARG_POINTER_REGNUM:
10833 s390_init_frame_layout ();
10834 offset = cfun_frame_layout.frame_size + STACK_POINTER_OFFSET;
10835 break;
10836
10837 case RETURN_ADDRESS_POINTER_REGNUM:
10838 s390_init_frame_layout ();
10839
10840 if (cfun_frame_layout.first_save_gpr_slot == -1)
10841 {
10842 /* If it turns out that for stdarg nothing went into the reg
10843 save area we also do not need the return address
10844 pointer. */
10845 if (cfun->stdarg && !cfun_save_arg_fprs_p)
10846 return 0;
10847
10848 gcc_unreachable ();
10849 }
10850
10851 /* In order to make the following work it is not necessary for
10852 r14 to have a save slot. It is sufficient if one other GPR
10853 got one. Since the GPRs are always stored without gaps we
10854 are able to calculate where the r14 save slot would
10855 reside. */
10856 offset = (cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset +
10857 (RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot) *
10858 UNITS_PER_LONG);
10859 break;
10860
10861 case BASE_REGNUM:
10862 offset = 0;
10863 break;
10864
10865 default:
10866 gcc_unreachable ();
10867 }
10868
10869 return offset;
10870 }
10871
10872 /* Emit insn to save fpr REGNUM at offset OFFSET relative
10873 to register BASE. Return generated insn. */
10874
10875 static rtx
save_fpr(rtx base,int offset,int regnum)10876 save_fpr (rtx base, int offset, int regnum)
10877 {
10878 rtx addr;
10879 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10880
10881 if (regnum >= 16 && regnum <= (16 + FP_ARG_NUM_REG))
10882 set_mem_alias_set (addr, get_varargs_alias_set ());
10883 else
10884 set_mem_alias_set (addr, get_frame_alias_set ());
10885
10886 return emit_move_insn (addr, gen_rtx_REG (DFmode, regnum));
10887 }
10888
10889 /* Emit insn to restore fpr REGNUM from offset OFFSET relative
10890 to register BASE. Return generated insn. */
10891
10892 static rtx
restore_fpr(rtx base,int offset,int regnum)10893 restore_fpr (rtx base, int offset, int regnum)
10894 {
10895 rtx addr;
10896 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10897 set_mem_alias_set (addr, get_frame_alias_set ());
10898
10899 return emit_move_insn (gen_rtx_REG (DFmode, regnum), addr);
10900 }
10901
10902 /* Generate insn to save registers FIRST to LAST into
10903 the register save area located at offset OFFSET
10904 relative to register BASE. */
10905
10906 static rtx
save_gprs(rtx base,int offset,int first,int last)10907 save_gprs (rtx base, int offset, int first, int last)
10908 {
10909 rtx addr, insn, note;
10910 int i;
10911
10912 addr = plus_constant (Pmode, base, offset);
10913 addr = gen_rtx_MEM (Pmode, addr);
10914
10915 set_mem_alias_set (addr, get_frame_alias_set ());
10916
10917 /* Special-case single register. */
10918 if (first == last)
10919 {
10920 if (TARGET_64BIT)
10921 insn = gen_movdi (addr, gen_rtx_REG (Pmode, first));
10922 else
10923 insn = gen_movsi (addr, gen_rtx_REG (Pmode, first));
10924
10925 if (!global_not_special_regno_p (first))
10926 RTX_FRAME_RELATED_P (insn) = 1;
10927 return insn;
10928 }
10929
10930
10931 insn = gen_store_multiple (addr,
10932 gen_rtx_REG (Pmode, first),
10933 GEN_INT (last - first + 1));
10934
10935 if (first <= 6 && cfun->stdarg)
10936 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
10937 {
10938 rtx mem = XEXP (XVECEXP (PATTERN (insn), 0, i), 0);
10939
10940 if (first + i <= 6)
10941 set_mem_alias_set (mem, get_varargs_alias_set ());
10942 }
10943
10944 /* We need to set the FRAME_RELATED flag on all SETs
10945 inside the store-multiple pattern.
10946
10947 However, we must not emit DWARF records for registers 2..5
10948 if they are stored for use by variable arguments ...
10949
10950 ??? Unfortunately, it is not enough to simply not the
10951 FRAME_RELATED flags for those SETs, because the first SET
10952 of the PARALLEL is always treated as if it had the flag
10953 set, even if it does not. Therefore we emit a new pattern
10954 without those registers as REG_FRAME_RELATED_EXPR note. */
10955
10956 if (first >= 6 && !global_not_special_regno_p (first))
10957 {
10958 rtx pat = PATTERN (insn);
10959
10960 for (i = 0; i < XVECLEN (pat, 0); i++)
10961 if (GET_CODE (XVECEXP (pat, 0, i)) == SET
10962 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat,
10963 0, i)))))
10964 RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1;
10965
10966 RTX_FRAME_RELATED_P (insn) = 1;
10967 }
10968 else if (last >= 6)
10969 {
10970 int start;
10971
10972 for (start = first >= 6 ? first : 6; start <= last; start++)
10973 if (!global_not_special_regno_p (start))
10974 break;
10975
10976 if (start > last)
10977 return insn;
10978
10979 addr = plus_constant (Pmode, base,
10980 offset + (start - first) * UNITS_PER_LONG);
10981
10982 if (start == last)
10983 {
10984 if (TARGET_64BIT)
10985 note = gen_movdi (gen_rtx_MEM (Pmode, addr),
10986 gen_rtx_REG (Pmode, start));
10987 else
10988 note = gen_movsi (gen_rtx_MEM (Pmode, addr),
10989 gen_rtx_REG (Pmode, start));
10990 note = PATTERN (note);
10991
10992 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10993 RTX_FRAME_RELATED_P (insn) = 1;
10994
10995 return insn;
10996 }
10997
10998 note = gen_store_multiple (gen_rtx_MEM (Pmode, addr),
10999 gen_rtx_REG (Pmode, start),
11000 GEN_INT (last - start + 1));
11001 note = PATTERN (note);
11002
11003 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
11004
11005 for (i = 0; i < XVECLEN (note, 0); i++)
11006 if (GET_CODE (XVECEXP (note, 0, i)) == SET
11007 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note,
11008 0, i)))))
11009 RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1;
11010
11011 RTX_FRAME_RELATED_P (insn) = 1;
11012 }
11013
11014 return insn;
11015 }
11016
11017 /* Generate insn to restore registers FIRST to LAST from
11018 the register save area located at offset OFFSET
11019 relative to register BASE. */
11020
11021 static rtx
restore_gprs(rtx base,int offset,int first,int last)11022 restore_gprs (rtx base, int offset, int first, int last)
11023 {
11024 rtx addr, insn;
11025
11026 addr = plus_constant (Pmode, base, offset);
11027 addr = gen_rtx_MEM (Pmode, addr);
11028 set_mem_alias_set (addr, get_frame_alias_set ());
11029
11030 /* Special-case single register. */
11031 if (first == last)
11032 {
11033 if (TARGET_64BIT)
11034 insn = gen_movdi (gen_rtx_REG (Pmode, first), addr);
11035 else
11036 insn = gen_movsi (gen_rtx_REG (Pmode, first), addr);
11037
11038 RTX_FRAME_RELATED_P (insn) = 1;
11039 return insn;
11040 }
11041
11042 insn = gen_load_multiple (gen_rtx_REG (Pmode, first),
11043 addr,
11044 GEN_INT (last - first + 1));
11045 RTX_FRAME_RELATED_P (insn) = 1;
11046 return insn;
11047 }
11048
11049 /* Return insn sequence to load the GOT register. */
11050
11051 rtx_insn *
s390_load_got(void)11052 s390_load_got (void)
11053 {
11054 rtx_insn *insns;
11055
11056 /* We cannot use pic_offset_table_rtx here since we use this
11057 function also for non-pic if __tls_get_offset is called and in
11058 that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
11059 aren't usable. */
11060 rtx got_rtx = gen_rtx_REG (Pmode, 12);
11061
11062 start_sequence ();
11063
11064 if (TARGET_CPU_ZARCH)
11065 {
11066 emit_move_insn (got_rtx, s390_got_symbol ());
11067 }
11068 else
11069 {
11070 rtx offset;
11071
11072 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, s390_got_symbol ()),
11073 UNSPEC_LTREL_OFFSET);
11074 offset = gen_rtx_CONST (Pmode, offset);
11075 offset = force_const_mem (Pmode, offset);
11076
11077 emit_move_insn (got_rtx, offset);
11078
11079 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (offset, 0)),
11080 UNSPEC_LTREL_BASE);
11081 offset = gen_rtx_PLUS (Pmode, got_rtx, offset);
11082
11083 emit_move_insn (got_rtx, offset);
11084 }
11085
11086 insns = get_insns ();
11087 end_sequence ();
11088 return insns;
11089 }
11090
11091 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
11092 and the change to the stack pointer. */
11093
11094 static void
s390_emit_stack_tie(void)11095 s390_emit_stack_tie (void)
11096 {
11097 rtx mem = gen_frame_mem (BLKmode,
11098 gen_rtx_REG (Pmode, STACK_POINTER_REGNUM));
11099
11100 emit_insn (gen_stack_tie (mem));
11101 }
11102
11103 /* Copy GPRS into FPR save slots. */
11104
11105 static void
s390_save_gprs_to_fprs(void)11106 s390_save_gprs_to_fprs (void)
11107 {
11108 int i;
11109
11110 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
11111 return;
11112
11113 for (i = 6; i < 16; i++)
11114 {
11115 if (FP_REGNO_P (cfun_gpr_save_slot (i)))
11116 {
11117 rtx_insn *insn =
11118 emit_move_insn (gen_rtx_REG (DImode, cfun_gpr_save_slot (i)),
11119 gen_rtx_REG (DImode, i));
11120 RTX_FRAME_RELATED_P (insn) = 1;
11121 /* This prevents dwarf2cfi from interpreting the set. Doing
11122 so it might emit def_cfa_register infos setting an FPR as
11123 new CFA. */
11124 add_reg_note (insn, REG_CFA_REGISTER, copy_rtx (PATTERN (insn)));
11125 }
11126 }
11127 }
11128
11129 /* Restore GPRs from FPR save slots. */
11130
11131 static void
s390_restore_gprs_from_fprs(void)11132 s390_restore_gprs_from_fprs (void)
11133 {
11134 int i;
11135
11136 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
11137 return;
11138
11139 for (i = 6; i < 16; i++)
11140 {
11141 rtx_insn *insn;
11142
11143 if (!FP_REGNO_P (cfun_gpr_save_slot (i)))
11144 continue;
11145
11146 rtx fpr = gen_rtx_REG (DImode, cfun_gpr_save_slot (i));
11147
11148 if (i == STACK_POINTER_REGNUM)
11149 insn = emit_insn (gen_stack_restore_from_fpr (fpr));
11150 else
11151 insn = emit_move_insn (gen_rtx_REG (DImode, i), fpr);
11152
11153 df_set_regs_ever_live (i, true);
11154 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, i));
11155 if (i == STACK_POINTER_REGNUM)
11156 add_reg_note (insn, REG_CFA_DEF_CFA,
11157 plus_constant (Pmode, stack_pointer_rtx,
11158 STACK_POINTER_OFFSET));
11159 RTX_FRAME_RELATED_P (insn) = 1;
11160 }
11161 }
11162
11163
11164 /* A pass run immediately before shrink-wrapping and prologue and epilogue
11165 generation. */
11166
11167 namespace {
11168
11169 const pass_data pass_data_s390_early_mach =
11170 {
11171 RTL_PASS, /* type */
11172 "early_mach", /* name */
11173 OPTGROUP_NONE, /* optinfo_flags */
11174 TV_MACH_DEP, /* tv_id */
11175 0, /* properties_required */
11176 0, /* properties_provided */
11177 0, /* properties_destroyed */
11178 0, /* todo_flags_start */
11179 ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */
11180 };
11181
11182 class pass_s390_early_mach : public rtl_opt_pass
11183 {
11184 public:
pass_s390_early_mach(gcc::context * ctxt)11185 pass_s390_early_mach (gcc::context *ctxt)
11186 : rtl_opt_pass (pass_data_s390_early_mach, ctxt)
11187 {}
11188
11189 /* opt_pass methods: */
11190 virtual unsigned int execute (function *);
11191
11192 }; // class pass_s390_early_mach
11193
11194 unsigned int
execute(function * fun)11195 pass_s390_early_mach::execute (function *fun)
11196 {
11197 rtx_insn *insn;
11198
11199 /* Try to get rid of the FPR clobbers. */
11200 s390_optimize_nonescaping_tx ();
11201
11202 /* Re-compute register info. */
11203 s390_register_info ();
11204
11205 /* If we're using a base register, ensure that it is always valid for
11206 the first non-prologue instruction. */
11207 if (fun->machine->base_reg)
11208 emit_insn_at_entry (gen_main_pool (fun->machine->base_reg));
11209
11210 /* Annotate all constant pool references to let the scheduler know
11211 they implicitly use the base register. */
11212 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
11213 if (INSN_P (insn))
11214 {
11215 annotate_constant_pool_refs (&PATTERN (insn));
11216 df_insn_rescan (insn);
11217 }
11218 return 0;
11219 }
11220
11221 } // anon namespace
11222
11223 /* Calculate TARGET = REG + OFFSET as s390_emit_prologue would do it.
11224 - push too big immediates to the literal pool and annotate the refs
11225 - emit frame related notes for stack pointer changes. */
11226
11227 static rtx
s390_prologue_plus_offset(rtx target,rtx reg,rtx offset,bool frame_related_p)11228 s390_prologue_plus_offset (rtx target, rtx reg, rtx offset, bool frame_related_p)
11229 {
11230 rtx insn;
11231 rtx orig_offset = offset;
11232
11233 gcc_assert (REG_P (target));
11234 gcc_assert (REG_P (reg));
11235 gcc_assert (CONST_INT_P (offset));
11236
11237 if (offset == const0_rtx) /* lr/lgr */
11238 {
11239 insn = emit_move_insn (target, reg);
11240 }
11241 else if (DISP_IN_RANGE (INTVAL (offset))) /* la */
11242 {
11243 insn = emit_move_insn (target, gen_rtx_PLUS (Pmode, reg,
11244 offset));
11245 }
11246 else
11247 {
11248 if (!satisfies_constraint_K (offset) /* ahi/aghi */
11249 && (!TARGET_EXTIMM
11250 || (!satisfies_constraint_Op (offset) /* alfi/algfi */
11251 && !satisfies_constraint_On (offset)))) /* slfi/slgfi */
11252 offset = force_const_mem (Pmode, offset);
11253
11254 if (target != reg)
11255 {
11256 insn = emit_move_insn (target, reg);
11257 RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0;
11258 }
11259
11260 insn = emit_insn (gen_add2_insn (target, offset));
11261
11262 if (!CONST_INT_P (offset))
11263 {
11264 annotate_constant_pool_refs (&PATTERN (insn));
11265
11266 if (frame_related_p)
11267 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11268 gen_rtx_SET (target,
11269 gen_rtx_PLUS (Pmode, target,
11270 orig_offset)));
11271 }
11272 }
11273
11274 RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0;
11275
11276 /* If this is a stack adjustment and we are generating a stack clash
11277 prologue, then add a REG_STACK_CHECK note to signal that this insn
11278 should be left alone. */
11279 if (flag_stack_clash_protection && target == stack_pointer_rtx)
11280 add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
11281
11282 return insn;
11283 }
11284
11285 /* Emit a compare instruction with a volatile memory access as stack
11286 probe. It does not waste store tags and does not clobber any
11287 registers apart from the condition code. */
11288 static void
s390_emit_stack_probe(rtx addr)11289 s390_emit_stack_probe (rtx addr)
11290 {
11291 rtx tmp = gen_rtx_MEM (Pmode, addr);
11292 MEM_VOLATILE_P (tmp) = 1;
11293 s390_emit_compare (EQ, gen_rtx_REG (Pmode, 0), tmp);
11294 emit_insn (gen_blockage ());
11295 }
11296
11297 /* Use a runtime loop if we have to emit more probes than this. */
11298 #define MIN_UNROLL_PROBES 3
11299
11300 /* Allocate SIZE bytes of stack space, using TEMP_REG as a temporary
11301 if necessary. LAST_PROBE_OFFSET contains the offset of the closest
11302 probe relative to the stack pointer.
11303
11304 Note that SIZE is negative.
11305
11306 The return value is true if TEMP_REG has been clobbered. */
11307 static bool
allocate_stack_space(rtx size,HOST_WIDE_INT last_probe_offset,rtx temp_reg)11308 allocate_stack_space (rtx size, HOST_WIDE_INT last_probe_offset,
11309 rtx temp_reg)
11310 {
11311 bool temp_reg_clobbered_p = false;
11312 HOST_WIDE_INT probe_interval
11313 = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL);
11314 HOST_WIDE_INT guard_size
11315 = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE);
11316
11317 if (flag_stack_clash_protection)
11318 {
11319 if (last_probe_offset + -INTVAL (size) < guard_size)
11320 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
11321 else
11322 {
11323 rtx offset = GEN_INT (probe_interval - UNITS_PER_LONG);
11324 HOST_WIDE_INT rounded_size = -INTVAL (size) & -probe_interval;
11325 HOST_WIDE_INT num_probes = rounded_size / probe_interval;
11326 HOST_WIDE_INT residual = -INTVAL (size) - rounded_size;
11327
11328 if (num_probes < MIN_UNROLL_PROBES)
11329 {
11330 /* Emit unrolled probe statements. */
11331
11332 for (unsigned int i = 0; i < num_probes; i++)
11333 {
11334 s390_prologue_plus_offset (stack_pointer_rtx,
11335 stack_pointer_rtx,
11336 GEN_INT (-probe_interval), true);
11337 s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
11338 stack_pointer_rtx,
11339 offset));
11340 }
11341 dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
11342 }
11343 else
11344 {
11345 /* Emit a loop probing the pages. */
11346
11347 rtx_code_label *loop_start_label = gen_label_rtx ();
11348
11349 /* From now on temp_reg will be the CFA register. */
11350 s390_prologue_plus_offset (temp_reg, stack_pointer_rtx,
11351 GEN_INT (-rounded_size), true);
11352 emit_label (loop_start_label);
11353
11354 s390_prologue_plus_offset (stack_pointer_rtx,
11355 stack_pointer_rtx,
11356 GEN_INT (-probe_interval), false);
11357 s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
11358 stack_pointer_rtx,
11359 offset));
11360 emit_cmp_and_jump_insns (stack_pointer_rtx, temp_reg,
11361 GT, NULL_RTX,
11362 Pmode, 1, loop_start_label);
11363
11364 /* Without this make_edges ICEes. */
11365 JUMP_LABEL (get_last_insn ()) = loop_start_label;
11366 LABEL_NUSES (loop_start_label) = 1;
11367
11368 /* That's going to be a NOP since stack pointer and
11369 temp_reg are supposed to be the same here. We just
11370 emit it to set the CFA reg back to r15. */
11371 s390_prologue_plus_offset (stack_pointer_rtx, temp_reg,
11372 const0_rtx, true);
11373 temp_reg_clobbered_p = true;
11374 dump_stack_clash_frame_info (PROBE_LOOP, residual != 0);
11375 }
11376
11377 /* Handle any residual allocation request. */
11378 s390_prologue_plus_offset (stack_pointer_rtx,
11379 stack_pointer_rtx,
11380 GEN_INT (-residual), true);
11381 last_probe_offset += residual;
11382 if (last_probe_offset >= probe_interval)
11383 s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
11384 stack_pointer_rtx,
11385 GEN_INT (residual
11386 - UNITS_PER_LONG)));
11387
11388 return temp_reg_clobbered_p;
11389 }
11390 }
11391
11392 /* Subtract frame size from stack pointer. */
11393 s390_prologue_plus_offset (stack_pointer_rtx,
11394 stack_pointer_rtx,
11395 size, true);
11396
11397 return temp_reg_clobbered_p;
11398 }
11399
11400 /* Expand the prologue into a bunch of separate insns. */
11401
11402 void
s390_emit_prologue(void)11403 s390_emit_prologue (void)
11404 {
11405 rtx insn, addr;
11406 rtx temp_reg;
11407 int i;
11408 int offset;
11409 int next_fpr = 0;
11410
11411 /* Choose best register to use for temp use within prologue.
11412 TPF with profiling must avoid the register 14 - the tracing function
11413 needs the original contents of r14 to be preserved. */
11414
11415 if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM)
11416 && !crtl->is_leaf
11417 && !TARGET_TPF_PROFILING)
11418 temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
11419 else if (flag_split_stack && cfun->stdarg)
11420 temp_reg = gen_rtx_REG (Pmode, 12);
11421 else
11422 temp_reg = gen_rtx_REG (Pmode, 1);
11423
11424 /* When probing for stack-clash mitigation, we have to track the distance
11425 between the stack pointer and closest known reference.
11426
11427 Most of the time we have to make a worst case assumption. The
11428 only exception is when TARGET_BACKCHAIN is active, in which case
11429 we know *sp (offset 0) was written. */
11430 HOST_WIDE_INT probe_interval
11431 = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL);
11432 HOST_WIDE_INT last_probe_offset
11433 = (TARGET_BACKCHAIN
11434 ? (TARGET_PACKED_STACK ? STACK_POINTER_OFFSET - UNITS_PER_LONG : 0)
11435 : probe_interval - (STACK_BOUNDARY / UNITS_PER_WORD));
11436
11437 s390_save_gprs_to_fprs ();
11438
11439 /* Save call saved gprs. */
11440 if (cfun_frame_layout.first_save_gpr != -1)
11441 {
11442 insn = save_gprs (stack_pointer_rtx,
11443 cfun_frame_layout.gprs_offset +
11444 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
11445 - cfun_frame_layout.first_save_gpr_slot),
11446 cfun_frame_layout.first_save_gpr,
11447 cfun_frame_layout.last_save_gpr);
11448
11449 /* This is not 100% correct. If we have more than one register saved,
11450 then LAST_PROBE_OFFSET can move even closer to sp. */
11451 last_probe_offset
11452 = (cfun_frame_layout.gprs_offset +
11453 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
11454 - cfun_frame_layout.first_save_gpr_slot));
11455
11456 emit_insn (insn);
11457 }
11458
11459 /* Dummy insn to mark literal pool slot. */
11460
11461 if (cfun->machine->base_reg)
11462 emit_insn (gen_main_pool (cfun->machine->base_reg));
11463
11464 offset = cfun_frame_layout.f0_offset;
11465
11466 /* Save f0 and f2. */
11467 for (i = FPR0_REGNUM; i <= FPR0_REGNUM + 1; i++)
11468 {
11469 if (cfun_fpr_save_p (i))
11470 {
11471 save_fpr (stack_pointer_rtx, offset, i);
11472 if (offset < last_probe_offset)
11473 last_probe_offset = offset;
11474 offset += 8;
11475 }
11476 else if (!TARGET_PACKED_STACK || cfun->stdarg)
11477 offset += 8;
11478 }
11479
11480 /* Save f4 and f6. */
11481 offset = cfun_frame_layout.f4_offset;
11482 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
11483 {
11484 if (cfun_fpr_save_p (i))
11485 {
11486 insn = save_fpr (stack_pointer_rtx, offset, i);
11487 if (offset < last_probe_offset)
11488 last_probe_offset = offset;
11489 offset += 8;
11490
11491 /* If f4 and f6 are call clobbered they are saved due to
11492 stdargs and therefore are not frame related. */
11493 if (!call_really_used_regs[i])
11494 RTX_FRAME_RELATED_P (insn) = 1;
11495 }
11496 else if (!TARGET_PACKED_STACK || call_really_used_regs[i])
11497 offset += 8;
11498 }
11499
11500 if (TARGET_PACKED_STACK
11501 && cfun_save_high_fprs_p
11502 && cfun_frame_layout.f8_offset + cfun_frame_layout.high_fprs * 8 > 0)
11503 {
11504 offset = (cfun_frame_layout.f8_offset
11505 + (cfun_frame_layout.high_fprs - 1) * 8);
11506
11507 for (i = FPR15_REGNUM; i >= FPR8_REGNUM && offset >= 0; i--)
11508 if (cfun_fpr_save_p (i))
11509 {
11510 insn = save_fpr (stack_pointer_rtx, offset, i);
11511 if (offset < last_probe_offset)
11512 last_probe_offset = offset;
11513
11514 RTX_FRAME_RELATED_P (insn) = 1;
11515 offset -= 8;
11516 }
11517 if (offset >= cfun_frame_layout.f8_offset)
11518 next_fpr = i;
11519 }
11520
11521 if (!TARGET_PACKED_STACK)
11522 next_fpr = cfun_save_high_fprs_p ? FPR15_REGNUM : 0;
11523
11524 if (flag_stack_usage_info)
11525 current_function_static_stack_size = cfun_frame_layout.frame_size;
11526
11527 /* Decrement stack pointer. */
11528
11529 if (cfun_frame_layout.frame_size > 0)
11530 {
11531 rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
11532 rtx_insn *stack_pointer_backup_loc;
11533 bool temp_reg_clobbered_p;
11534
11535 if (s390_stack_size)
11536 {
11537 HOST_WIDE_INT stack_guard;
11538
11539 if (s390_stack_guard)
11540 stack_guard = s390_stack_guard;
11541 else
11542 {
11543 /* If no value for stack guard is provided the smallest power of 2
11544 larger than the current frame size is chosen. */
11545 stack_guard = 1;
11546 while (stack_guard < cfun_frame_layout.frame_size)
11547 stack_guard <<= 1;
11548 }
11549
11550 if (cfun_frame_layout.frame_size >= s390_stack_size)
11551 {
11552 warning (0, "frame size of function %qs is %wd"
11553 " bytes exceeding user provided stack limit of "
11554 "%d bytes. "
11555 "An unconditional trap is added.",
11556 current_function_name(), cfun_frame_layout.frame_size,
11557 s390_stack_size);
11558 emit_insn (gen_trap ());
11559 emit_barrier ();
11560 }
11561 else
11562 {
11563 /* stack_guard has to be smaller than s390_stack_size.
11564 Otherwise we would emit an AND with zero which would
11565 not match the test under mask pattern. */
11566 if (stack_guard >= s390_stack_size)
11567 {
11568 warning (0, "frame size of function %qs is %wd"
11569 " bytes which is more than half the stack size. "
11570 "The dynamic check would not be reliable. "
11571 "No check emitted for this function.",
11572 current_function_name(),
11573 cfun_frame_layout.frame_size);
11574 }
11575 else
11576 {
11577 HOST_WIDE_INT stack_check_mask = ((s390_stack_size - 1)
11578 & ~(stack_guard - 1));
11579
11580 rtx t = gen_rtx_AND (Pmode, stack_pointer_rtx,
11581 GEN_INT (stack_check_mask));
11582 if (TARGET_64BIT)
11583 emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode,
11584 t, const0_rtx),
11585 t, const0_rtx, const0_rtx));
11586 else
11587 emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode,
11588 t, const0_rtx),
11589 t, const0_rtx, const0_rtx));
11590 }
11591 }
11592 }
11593
11594 if (s390_warn_framesize > 0
11595 && cfun_frame_layout.frame_size >= s390_warn_framesize)
11596 warning (0, "frame size of %qs is %wd bytes",
11597 current_function_name (), cfun_frame_layout.frame_size);
11598
11599 if (s390_warn_dynamicstack_p && cfun->calls_alloca)
11600 warning (0, "%qs uses dynamic stack allocation", current_function_name ());
11601
11602 /* Save the location where we could backup the incoming stack
11603 pointer. */
11604 stack_pointer_backup_loc = get_last_insn ();
11605
11606 temp_reg_clobbered_p = allocate_stack_space (frame_off, last_probe_offset,
11607 temp_reg);
11608
11609 if (TARGET_BACKCHAIN || next_fpr)
11610 {
11611 if (temp_reg_clobbered_p)
11612 {
11613 /* allocate_stack_space had to make use of temp_reg and
11614 we need it to hold a backup of the incoming stack
11615 pointer. Calculate back that value from the current
11616 stack pointer. */
11617 s390_prologue_plus_offset (temp_reg, stack_pointer_rtx,
11618 GEN_INT (cfun_frame_layout.frame_size),
11619 false);
11620 }
11621 else
11622 {
11623 /* allocate_stack_space didn't actually required
11624 temp_reg. Insert the stack pointer backup insn
11625 before the stack pointer decrement code - knowing now
11626 that the value will survive. */
11627 emit_insn_after (gen_move_insn (temp_reg, stack_pointer_rtx),
11628 stack_pointer_backup_loc);
11629 }
11630 }
11631
11632 /* Set backchain. */
11633
11634 if (TARGET_BACKCHAIN)
11635 {
11636 if (cfun_frame_layout.backchain_offset)
11637 addr = gen_rtx_MEM (Pmode,
11638 plus_constant (Pmode, stack_pointer_rtx,
11639 cfun_frame_layout.backchain_offset));
11640 else
11641 addr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
11642 set_mem_alias_set (addr, get_frame_alias_set ());
11643 insn = emit_insn (gen_move_insn (addr, temp_reg));
11644 }
11645
11646 /* If we support non-call exceptions (e.g. for Java),
11647 we need to make sure the backchain pointer is set up
11648 before any possibly trapping memory access. */
11649 if (TARGET_BACKCHAIN && cfun->can_throw_non_call_exceptions)
11650 {
11651 addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
11652 emit_clobber (addr);
11653 }
11654 }
11655 else if (flag_stack_clash_protection)
11656 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
11657
11658 /* Save fprs 8 - 15 (64 bit ABI). */
11659
11660 if (cfun_save_high_fprs_p && next_fpr)
11661 {
11662 /* If the stack might be accessed through a different register
11663 we have to make sure that the stack pointer decrement is not
11664 moved below the use of the stack slots. */
11665 s390_emit_stack_tie ();
11666
11667 insn = emit_insn (gen_add2_insn (temp_reg,
11668 GEN_INT (cfun_frame_layout.f8_offset)));
11669
11670 offset = 0;
11671
11672 for (i = FPR8_REGNUM; i <= next_fpr; i++)
11673 if (cfun_fpr_save_p (i))
11674 {
11675 rtx addr = plus_constant (Pmode, stack_pointer_rtx,
11676 cfun_frame_layout.frame_size
11677 + cfun_frame_layout.f8_offset
11678 + offset);
11679
11680 insn = save_fpr (temp_reg, offset, i);
11681 offset += 8;
11682 RTX_FRAME_RELATED_P (insn) = 1;
11683 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11684 gen_rtx_SET (gen_rtx_MEM (DFmode, addr),
11685 gen_rtx_REG (DFmode, i)));
11686 }
11687 }
11688
11689 /* Set frame pointer, if needed. */
11690
11691 if (frame_pointer_needed)
11692 {
11693 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11694 RTX_FRAME_RELATED_P (insn) = 1;
11695 }
11696
11697 /* Set up got pointer, if needed. */
11698
11699 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
11700 {
11701 rtx_insn *insns = s390_load_got ();
11702
11703 for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn))
11704 annotate_constant_pool_refs (&PATTERN (insn));
11705
11706 emit_insn (insns);
11707 }
11708
11709 if (TARGET_TPF_PROFILING)
11710 {
11711 /* Generate a BAS instruction to serve as a function
11712 entry intercept to facilitate the use of tracing
11713 algorithms located at the branch target. */
11714 emit_insn (gen_prologue_tpf ());
11715
11716 /* Emit a blockage here so that all code
11717 lies between the profiling mechanisms. */
11718 emit_insn (gen_blockage ());
11719 }
11720 }
11721
11722 /* Expand the epilogue into a bunch of separate insns. */
11723
11724 void
s390_emit_epilogue(bool sibcall)11725 s390_emit_epilogue (bool sibcall)
11726 {
11727 rtx frame_pointer, return_reg, cfa_restores = NULL_RTX;
11728 int area_bottom, area_top, offset = 0;
11729 int next_offset;
11730 int i;
11731
11732 if (TARGET_TPF_PROFILING)
11733 {
11734
11735 /* Generate a BAS instruction to serve as a function
11736 entry intercept to facilitate the use of tracing
11737 algorithms located at the branch target. */
11738
11739 /* Emit a blockage here so that all code
11740 lies between the profiling mechanisms. */
11741 emit_insn (gen_blockage ());
11742
11743 emit_insn (gen_epilogue_tpf ());
11744 }
11745
11746 /* Check whether to use frame or stack pointer for restore. */
11747
11748 frame_pointer = (frame_pointer_needed
11749 ? hard_frame_pointer_rtx : stack_pointer_rtx);
11750
11751 s390_frame_area (&area_bottom, &area_top);
11752
11753 /* Check whether we can access the register save area.
11754 If not, increment the frame pointer as required. */
11755
11756 if (area_top <= area_bottom)
11757 {
11758 /* Nothing to restore. */
11759 }
11760 else if (DISP_IN_RANGE (cfun_frame_layout.frame_size + area_bottom)
11761 && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1))
11762 {
11763 /* Area is in range. */
11764 offset = cfun_frame_layout.frame_size;
11765 }
11766 else
11767 {
11768 rtx insn, frame_off, cfa;
11769
11770 offset = area_bottom < 0 ? -area_bottom : 0;
11771 frame_off = GEN_INT (cfun_frame_layout.frame_size - offset);
11772
11773 cfa = gen_rtx_SET (frame_pointer,
11774 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11775 if (DISP_IN_RANGE (INTVAL (frame_off)))
11776 {
11777 insn = gen_rtx_SET (frame_pointer,
11778 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11779 insn = emit_insn (insn);
11780 }
11781 else
11782 {
11783 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
11784 frame_off = force_const_mem (Pmode, frame_off);
11785
11786 insn = emit_insn (gen_add2_insn (frame_pointer, frame_off));
11787 annotate_constant_pool_refs (&PATTERN (insn));
11788 }
11789 add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa);
11790 RTX_FRAME_RELATED_P (insn) = 1;
11791 }
11792
11793 /* Restore call saved fprs. */
11794
11795 if (TARGET_64BIT)
11796 {
11797 if (cfun_save_high_fprs_p)
11798 {
11799 next_offset = cfun_frame_layout.f8_offset;
11800 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
11801 {
11802 if (cfun_fpr_save_p (i))
11803 {
11804 restore_fpr (frame_pointer,
11805 offset + next_offset, i);
11806 cfa_restores
11807 = alloc_reg_note (REG_CFA_RESTORE,
11808 gen_rtx_REG (DFmode, i), cfa_restores);
11809 next_offset += 8;
11810 }
11811 }
11812 }
11813
11814 }
11815 else
11816 {
11817 next_offset = cfun_frame_layout.f4_offset;
11818 /* f4, f6 */
11819 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
11820 {
11821 if (cfun_fpr_save_p (i))
11822 {
11823 restore_fpr (frame_pointer,
11824 offset + next_offset, i);
11825 cfa_restores
11826 = alloc_reg_note (REG_CFA_RESTORE,
11827 gen_rtx_REG (DFmode, i), cfa_restores);
11828 next_offset += 8;
11829 }
11830 else if (!TARGET_PACKED_STACK)
11831 next_offset += 8;
11832 }
11833
11834 }
11835
11836 /* Return register. */
11837
11838 return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
11839
11840 /* Restore call saved gprs. */
11841
11842 if (cfun_frame_layout.first_restore_gpr != -1)
11843 {
11844 rtx insn, addr;
11845 int i;
11846
11847 /* Check for global register and save them
11848 to stack location from where they get restored. */
11849
11850 for (i = cfun_frame_layout.first_restore_gpr;
11851 i <= cfun_frame_layout.last_restore_gpr;
11852 i++)
11853 {
11854 if (global_not_special_regno_p (i))
11855 {
11856 addr = plus_constant (Pmode, frame_pointer,
11857 offset + cfun_frame_layout.gprs_offset
11858 + (i - cfun_frame_layout.first_save_gpr_slot)
11859 * UNITS_PER_LONG);
11860 addr = gen_rtx_MEM (Pmode, addr);
11861 set_mem_alias_set (addr, get_frame_alias_set ());
11862 emit_move_insn (addr, gen_rtx_REG (Pmode, i));
11863 }
11864 else
11865 cfa_restores
11866 = alloc_reg_note (REG_CFA_RESTORE,
11867 gen_rtx_REG (Pmode, i), cfa_restores);
11868 }
11869
11870 /* Fetch return address from stack before load multiple,
11871 this will do good for scheduling.
11872
11873 Only do this if we already decided that r14 needs to be
11874 saved to a stack slot. (And not just because r14 happens to
11875 be in between two GPRs which need saving.) Otherwise it
11876 would be difficult to take that decision back in
11877 s390_optimize_prologue.
11878
11879 This optimization is only helpful on in-order machines. */
11880 if (! sibcall
11881 && cfun_gpr_save_slot (RETURN_REGNUM) == SAVE_SLOT_STACK
11882 && s390_tune <= PROCESSOR_2097_Z10)
11883 {
11884 int return_regnum = find_unused_clobbered_reg();
11885 if (!return_regnum
11886 || (TARGET_INDIRECT_BRANCH_NOBP_RET_OPTION
11887 && !TARGET_CPU_Z10
11888 && return_regnum == INDIRECT_BRANCH_THUNK_REGNUM))
11889 {
11890 gcc_assert (INDIRECT_BRANCH_THUNK_REGNUM != 4);
11891 return_regnum = 4;
11892 }
11893 return_reg = gen_rtx_REG (Pmode, return_regnum);
11894
11895 addr = plus_constant (Pmode, frame_pointer,
11896 offset + cfun_frame_layout.gprs_offset
11897 + (RETURN_REGNUM
11898 - cfun_frame_layout.first_save_gpr_slot)
11899 * UNITS_PER_LONG);
11900 addr = gen_rtx_MEM (Pmode, addr);
11901 set_mem_alias_set (addr, get_frame_alias_set ());
11902 emit_move_insn (return_reg, addr);
11903
11904 /* Once we did that optimization we have to make sure
11905 s390_optimize_prologue does not try to remove the store
11906 of r14 since we will not be able to find the load issued
11907 here. */
11908 cfun_frame_layout.save_return_addr_p = true;
11909 }
11910
11911 insn = restore_gprs (frame_pointer,
11912 offset + cfun_frame_layout.gprs_offset
11913 + (cfun_frame_layout.first_restore_gpr
11914 - cfun_frame_layout.first_save_gpr_slot)
11915 * UNITS_PER_LONG,
11916 cfun_frame_layout.first_restore_gpr,
11917 cfun_frame_layout.last_restore_gpr);
11918 insn = emit_insn (insn);
11919 REG_NOTES (insn) = cfa_restores;
11920 add_reg_note (insn, REG_CFA_DEF_CFA,
11921 plus_constant (Pmode, stack_pointer_rtx,
11922 STACK_POINTER_OFFSET));
11923 RTX_FRAME_RELATED_P (insn) = 1;
11924 }
11925
11926 s390_restore_gprs_from_fprs ();
11927
11928 if (! sibcall)
11929 emit_jump_insn (gen_return_use (return_reg));
11930 }
11931
11932 /* Implement TARGET_SET_UP_BY_PROLOGUE. */
11933
11934 static void
s300_set_up_by_prologue(hard_reg_set_container * regs)11935 s300_set_up_by_prologue (hard_reg_set_container *regs)
11936 {
11937 if (cfun->machine->base_reg
11938 && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
11939 SET_HARD_REG_BIT (regs->set, REGNO (cfun->machine->base_reg));
11940 }
11941
11942 /* -fsplit-stack support. */
11943
11944 /* A SYMBOL_REF for __morestack. */
11945 static GTY(()) rtx morestack_ref;
11946
11947 /* When using -fsplit-stack, the allocation routines set a field in
11948 the TCB to the bottom of the stack plus this much space, measured
11949 in bytes. */
11950
11951 #define SPLIT_STACK_AVAILABLE 1024
11952
11953 /* Emit -fsplit-stack prologue, which goes before the regular function
11954 prologue. */
11955
11956 void
s390_expand_split_stack_prologue(void)11957 s390_expand_split_stack_prologue (void)
11958 {
11959 rtx r1, guard, cc = NULL;
11960 rtx_insn *insn;
11961 /* Offset from thread pointer to __private_ss. */
11962 int psso = TARGET_64BIT ? 0x38 : 0x20;
11963 /* Pointer size in bytes. */
11964 /* Frame size and argument size - the two parameters to __morestack. */
11965 HOST_WIDE_INT frame_size = cfun_frame_layout.frame_size;
11966 /* Align argument size to 8 bytes - simplifies __morestack code. */
11967 HOST_WIDE_INT args_size = crtl->args.size >= 0
11968 ? ((crtl->args.size + 7) & ~7)
11969 : 0;
11970 /* Label to be called by __morestack. */
11971 rtx_code_label *call_done = NULL;
11972 rtx_code_label *parm_base = NULL;
11973 rtx tmp;
11974
11975 gcc_assert (flag_split_stack && reload_completed);
11976 if (!TARGET_CPU_ZARCH)
11977 {
11978 sorry ("CPUs older than z900 are not supported for -fsplit-stack");
11979 return;
11980 }
11981
11982 r1 = gen_rtx_REG (Pmode, 1);
11983
11984 /* If no stack frame will be allocated, don't do anything. */
11985 if (!frame_size)
11986 {
11987 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11988 {
11989 /* If va_start is used, just use r15. */
11990 emit_move_insn (r1,
11991 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11992 GEN_INT (STACK_POINTER_OFFSET)));
11993
11994 }
11995 return;
11996 }
11997
11998 if (morestack_ref == NULL_RTX)
11999 {
12000 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
12001 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
12002 | SYMBOL_FLAG_FUNCTION);
12003 }
12004
12005 if (CONST_OK_FOR_K (frame_size) || CONST_OK_FOR_Op (frame_size))
12006 {
12007 /* If frame_size will fit in an add instruction, do a stack space
12008 check, and only call __morestack if there's not enough space. */
12009
12010 /* Get thread pointer. r1 is the only register we can always destroy - r0
12011 could contain a static chain (and cannot be used to address memory
12012 anyway), r2-r6 can contain parameters, and r6-r15 are callee-saved. */
12013 emit_move_insn (r1, gen_rtx_REG (Pmode, TP_REGNUM));
12014 /* Aim at __private_ss. */
12015 guard = gen_rtx_MEM (Pmode, plus_constant (Pmode, r1, psso));
12016
12017 /* If less that 1kiB used, skip addition and compare directly with
12018 __private_ss. */
12019 if (frame_size > SPLIT_STACK_AVAILABLE)
12020 {
12021 emit_move_insn (r1, guard);
12022 if (TARGET_64BIT)
12023 emit_insn (gen_adddi3 (r1, r1, GEN_INT (frame_size)));
12024 else
12025 emit_insn (gen_addsi3 (r1, r1, GEN_INT (frame_size)));
12026 guard = r1;
12027 }
12028
12029 /* Compare the (maybe adjusted) guard with the stack pointer. */
12030 cc = s390_emit_compare (LT, stack_pointer_rtx, guard);
12031 }
12032
12033 call_done = gen_label_rtx ();
12034 parm_base = gen_label_rtx ();
12035
12036 /* Emit the parameter block. */
12037 tmp = gen_split_stack_data (parm_base, call_done,
12038 GEN_INT (frame_size),
12039 GEN_INT (args_size));
12040 insn = emit_insn (tmp);
12041 add_reg_note (insn, REG_LABEL_OPERAND, call_done);
12042 LABEL_NUSES (call_done)++;
12043 add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
12044 LABEL_NUSES (parm_base)++;
12045
12046 /* %r1 = litbase. */
12047 insn = emit_move_insn (r1, gen_rtx_LABEL_REF (VOIDmode, parm_base));
12048 add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
12049 LABEL_NUSES (parm_base)++;
12050
12051 /* Now, we need to call __morestack. It has very special calling
12052 conventions: it preserves param/return/static chain registers for
12053 calling main function body, and looks for its own parameters at %r1. */
12054
12055 if (cc != NULL)
12056 {
12057 tmp = gen_split_stack_cond_call (morestack_ref, cc, call_done);
12058
12059 insn = emit_jump_insn (tmp);
12060 JUMP_LABEL (insn) = call_done;
12061 LABEL_NUSES (call_done)++;
12062
12063 /* Mark the jump as very unlikely to be taken. */
12064 add_reg_br_prob_note (insn,
12065 profile_probability::very_unlikely ());
12066
12067 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12068 {
12069 /* If va_start is used, and __morestack was not called, just use
12070 r15. */
12071 emit_move_insn (r1,
12072 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12073 GEN_INT (STACK_POINTER_OFFSET)));
12074 }
12075 }
12076 else
12077 {
12078 tmp = gen_split_stack_call (morestack_ref, call_done);
12079 insn = emit_jump_insn (tmp);
12080 JUMP_LABEL (insn) = call_done;
12081 LABEL_NUSES (call_done)++;
12082 emit_barrier ();
12083 }
12084
12085 /* __morestack will call us here. */
12086
12087 emit_label (call_done);
12088 }
12089
12090 /* We may have to tell the dataflow pass that the split stack prologue
12091 is initializing a register. */
12092
12093 static void
s390_live_on_entry(bitmap regs)12094 s390_live_on_entry (bitmap regs)
12095 {
12096 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12097 {
12098 gcc_assert (flag_split_stack);
12099 bitmap_set_bit (regs, 1);
12100 }
12101 }
12102
12103 /* Return true if the function can use simple_return to return outside
12104 of a shrink-wrapped region. At present shrink-wrapping is supported
12105 in all cases. */
12106
12107 bool
s390_can_use_simple_return_insn(void)12108 s390_can_use_simple_return_insn (void)
12109 {
12110 return true;
12111 }
12112
12113 /* Return true if the epilogue is guaranteed to contain only a return
12114 instruction and if a direct return can therefore be used instead.
12115 One of the main advantages of using direct return instructions
12116 is that we can then use conditional returns. */
12117
12118 bool
s390_can_use_return_insn(void)12119 s390_can_use_return_insn (void)
12120 {
12121 int i;
12122
12123 if (!reload_completed)
12124 return false;
12125
12126 if (crtl->profile)
12127 return false;
12128
12129 if (TARGET_TPF_PROFILING)
12130 return false;
12131
12132 for (i = 0; i < 16; i++)
12133 if (cfun_gpr_save_slot (i) != SAVE_SLOT_NONE)
12134 return false;
12135
12136 /* For 31 bit this is not covered by the frame_size check below
12137 since f4, f6 are saved in the register save area without needing
12138 additional stack space. */
12139 if (!TARGET_64BIT
12140 && (cfun_fpr_save_p (FPR4_REGNUM) || cfun_fpr_save_p (FPR6_REGNUM)))
12141 return false;
12142
12143 if (cfun->machine->base_reg
12144 && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
12145 return false;
12146
12147 return cfun_frame_layout.frame_size == 0;
12148 }
12149
12150 /* The VX ABI differs for vararg functions. Therefore we need the
12151 prototype of the callee to be available when passing vector type
12152 values. */
12153 static const char *
s390_invalid_arg_for_unprototyped_fn(const_tree typelist,const_tree funcdecl,const_tree val)12154 s390_invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
12155 {
12156 return ((TARGET_VX_ABI
12157 && typelist == 0
12158 && VECTOR_TYPE_P (TREE_TYPE (val))
12159 && (funcdecl == NULL_TREE
12160 || (TREE_CODE (funcdecl) == FUNCTION_DECL
12161 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
12162 ? N_("vector argument passed to unprototyped function")
12163 : NULL);
12164 }
12165
12166
12167 /* Return the size in bytes of a function argument of
12168 type TYPE and/or mode MODE. At least one of TYPE or
12169 MODE must be specified. */
12170
12171 static int
s390_function_arg_size(machine_mode mode,const_tree type)12172 s390_function_arg_size (machine_mode mode, const_tree type)
12173 {
12174 if (type)
12175 return int_size_in_bytes (type);
12176
12177 /* No type info available for some library calls ... */
12178 if (mode != BLKmode)
12179 return GET_MODE_SIZE (mode);
12180
12181 /* If we have neither type nor mode, abort */
12182 gcc_unreachable ();
12183 }
12184
12185 /* Return true if a function argument of type TYPE and mode MODE
12186 is to be passed in a vector register, if available. */
12187
12188 bool
s390_function_arg_vector(machine_mode mode,const_tree type)12189 s390_function_arg_vector (machine_mode mode, const_tree type)
12190 {
12191 if (!TARGET_VX_ABI)
12192 return false;
12193
12194 if (s390_function_arg_size (mode, type) > 16)
12195 return false;
12196
12197 /* No type info available for some library calls ... */
12198 if (!type)
12199 return VECTOR_MODE_P (mode);
12200
12201 /* The ABI says that record types with a single member are treated
12202 just like that member would be. */
12203 while (TREE_CODE (type) == RECORD_TYPE)
12204 {
12205 tree field, single = NULL_TREE;
12206
12207 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
12208 {
12209 if (TREE_CODE (field) != FIELD_DECL)
12210 continue;
12211
12212 if (single == NULL_TREE)
12213 single = TREE_TYPE (field);
12214 else
12215 return false;
12216 }
12217
12218 if (single == NULL_TREE)
12219 return false;
12220 else
12221 {
12222 /* If the field declaration adds extra byte due to
12223 e.g. padding this is not accepted as vector type. */
12224 if (int_size_in_bytes (single) <= 0
12225 || int_size_in_bytes (single) != int_size_in_bytes (type))
12226 return false;
12227 type = single;
12228 }
12229 }
12230
12231 return VECTOR_TYPE_P (type);
12232 }
12233
12234 /* Return true if a function argument of type TYPE and mode MODE
12235 is to be passed in a floating-point register, if available. */
12236
12237 static bool
s390_function_arg_float(machine_mode mode,const_tree type)12238 s390_function_arg_float (machine_mode mode, const_tree type)
12239 {
12240 if (s390_function_arg_size (mode, type) > 8)
12241 return false;
12242
12243 /* Soft-float changes the ABI: no floating-point registers are used. */
12244 if (TARGET_SOFT_FLOAT)
12245 return false;
12246
12247 /* No type info available for some library calls ... */
12248 if (!type)
12249 return mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode;
12250
12251 /* The ABI says that record types with a single member are treated
12252 just like that member would be. */
12253 while (TREE_CODE (type) == RECORD_TYPE)
12254 {
12255 tree field, single = NULL_TREE;
12256
12257 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
12258 {
12259 if (TREE_CODE (field) != FIELD_DECL)
12260 continue;
12261
12262 if (single == NULL_TREE)
12263 single = TREE_TYPE (field);
12264 else
12265 return false;
12266 }
12267
12268 if (single == NULL_TREE)
12269 return false;
12270 else
12271 type = single;
12272 }
12273
12274 return TREE_CODE (type) == REAL_TYPE;
12275 }
12276
12277 /* Return true if a function argument of type TYPE and mode MODE
12278 is to be passed in an integer register, or a pair of integer
12279 registers, if available. */
12280
12281 static bool
s390_function_arg_integer(machine_mode mode,const_tree type)12282 s390_function_arg_integer (machine_mode mode, const_tree type)
12283 {
12284 int size = s390_function_arg_size (mode, type);
12285 if (size > 8)
12286 return false;
12287
12288 /* No type info available for some library calls ... */
12289 if (!type)
12290 return GET_MODE_CLASS (mode) == MODE_INT
12291 || (TARGET_SOFT_FLOAT && SCALAR_FLOAT_MODE_P (mode));
12292
12293 /* We accept small integral (and similar) types. */
12294 if (INTEGRAL_TYPE_P (type)
12295 || POINTER_TYPE_P (type)
12296 || TREE_CODE (type) == NULLPTR_TYPE
12297 || TREE_CODE (type) == OFFSET_TYPE
12298 || (TARGET_SOFT_FLOAT && TREE_CODE (type) == REAL_TYPE))
12299 return true;
12300
12301 /* We also accept structs of size 1, 2, 4, 8 that are not
12302 passed in floating-point registers. */
12303 if (AGGREGATE_TYPE_P (type)
12304 && exact_log2 (size) >= 0
12305 && !s390_function_arg_float (mode, type))
12306 return true;
12307
12308 return false;
12309 }
12310
12311 /* Return 1 if a function argument of type TYPE and mode MODE
12312 is to be passed by reference. The ABI specifies that only
12313 structures of size 1, 2, 4, or 8 bytes are passed by value,
12314 all other structures (and complex numbers) are passed by
12315 reference. */
12316
12317 static bool
s390_pass_by_reference(cumulative_args_t ca ATTRIBUTE_UNUSED,machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)12318 s390_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
12319 machine_mode mode, const_tree type,
12320 bool named ATTRIBUTE_UNUSED)
12321 {
12322 int size = s390_function_arg_size (mode, type);
12323
12324 if (s390_function_arg_vector (mode, type))
12325 return false;
12326
12327 if (size > 8)
12328 return true;
12329
12330 if (type)
12331 {
12332 if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0)
12333 return true;
12334
12335 if (TREE_CODE (type) == COMPLEX_TYPE
12336 || TREE_CODE (type) == VECTOR_TYPE)
12337 return true;
12338 }
12339
12340 return false;
12341 }
12342
12343 /* Update the data in CUM to advance over an argument of mode MODE and
12344 data type TYPE. (TYPE is null for libcalls where that information
12345 may not be available.). The boolean NAMED specifies whether the
12346 argument is a named argument (as opposed to an unnamed argument
12347 matching an ellipsis). */
12348
12349 static void
s390_function_arg_advance(cumulative_args_t cum_v,machine_mode mode,const_tree type,bool named)12350 s390_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
12351 const_tree type, bool named)
12352 {
12353 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12354
12355 if (s390_function_arg_vector (mode, type))
12356 {
12357 /* We are called for unnamed vector stdarg arguments which are
12358 passed on the stack. In this case this hook does not have to
12359 do anything since stack arguments are tracked by common
12360 code. */
12361 if (!named)
12362 return;
12363 cum->vrs += 1;
12364 }
12365 else if (s390_function_arg_float (mode, type))
12366 {
12367 cum->fprs += 1;
12368 }
12369 else if (s390_function_arg_integer (mode, type))
12370 {
12371 int size = s390_function_arg_size (mode, type);
12372 cum->gprs += ((size + UNITS_PER_LONG - 1) / UNITS_PER_LONG);
12373 }
12374 else
12375 gcc_unreachable ();
12376 }
12377
12378 /* Define where to put the arguments to a function.
12379 Value is zero to push the argument on the stack,
12380 or a hard register in which to store the argument.
12381
12382 MODE is the argument's machine mode.
12383 TYPE is the data type of the argument (as a tree).
12384 This is null for libcalls where that information may
12385 not be available.
12386 CUM is a variable of type CUMULATIVE_ARGS which gives info about
12387 the preceding args and about the function being called.
12388 NAMED is nonzero if this argument is a named parameter
12389 (otherwise it is an extra parameter matching an ellipsis).
12390
12391 On S/390, we use general purpose registers 2 through 6 to
12392 pass integer, pointer, and certain structure arguments, and
12393 floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
12394 to pass floating point arguments. All remaining arguments
12395 are pushed to the stack. */
12396
12397 static rtx
s390_function_arg(cumulative_args_t cum_v,machine_mode mode,const_tree type,bool named)12398 s390_function_arg (cumulative_args_t cum_v, machine_mode mode,
12399 const_tree type, bool named)
12400 {
12401 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12402
12403 if (!named)
12404 s390_check_type_for_vector_abi (type, true, false);
12405
12406 if (s390_function_arg_vector (mode, type))
12407 {
12408 /* Vector arguments being part of the ellipsis are passed on the
12409 stack. */
12410 if (!named || (cum->vrs + 1 > VEC_ARG_NUM_REG))
12411 return NULL_RTX;
12412
12413 return gen_rtx_REG (mode, cum->vrs + FIRST_VEC_ARG_REGNO);
12414 }
12415 else if (s390_function_arg_float (mode, type))
12416 {
12417 if (cum->fprs + 1 > FP_ARG_NUM_REG)
12418 return NULL_RTX;
12419 else
12420 return gen_rtx_REG (mode, cum->fprs + 16);
12421 }
12422 else if (s390_function_arg_integer (mode, type))
12423 {
12424 int size = s390_function_arg_size (mode, type);
12425 int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
12426
12427 if (cum->gprs + n_gprs > GP_ARG_NUM_REG)
12428 return NULL_RTX;
12429 else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG)
12430 return gen_rtx_REG (mode, cum->gprs + 2);
12431 else if (n_gprs == 2)
12432 {
12433 rtvec p = rtvec_alloc (2);
12434
12435 RTVEC_ELT (p, 0)
12436 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 2),
12437 const0_rtx);
12438 RTVEC_ELT (p, 1)
12439 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 3),
12440 GEN_INT (4));
12441
12442 return gen_rtx_PARALLEL (mode, p);
12443 }
12444 }
12445
12446 /* After the real arguments, expand_call calls us once again
12447 with a void_type_node type. Whatever we return here is
12448 passed as operand 2 to the call expanders.
12449
12450 We don't need this feature ... */
12451 else if (type == void_type_node)
12452 return const0_rtx;
12453
12454 gcc_unreachable ();
12455 }
12456
12457 /* Implement TARGET_FUNCTION_ARG_BOUNDARY. Vector arguments are
12458 left-justified when placed on the stack during parameter passing. */
12459
12460 static pad_direction
s390_function_arg_padding(machine_mode mode,const_tree type)12461 s390_function_arg_padding (machine_mode mode, const_tree type)
12462 {
12463 if (s390_function_arg_vector (mode, type))
12464 return PAD_UPWARD;
12465
12466 return default_function_arg_padding (mode, type);
12467 }
12468
12469 /* Return true if return values of type TYPE should be returned
12470 in a memory buffer whose address is passed by the caller as
12471 hidden first argument. */
12472
12473 static bool
s390_return_in_memory(const_tree type,const_tree fundecl ATTRIBUTE_UNUSED)12474 s390_return_in_memory (const_tree type, const_tree fundecl ATTRIBUTE_UNUSED)
12475 {
12476 /* We accept small integral (and similar) types. */
12477 if (INTEGRAL_TYPE_P (type)
12478 || POINTER_TYPE_P (type)
12479 || TREE_CODE (type) == OFFSET_TYPE
12480 || TREE_CODE (type) == REAL_TYPE)
12481 return int_size_in_bytes (type) > 8;
12482
12483 /* vector types which fit into a VR. */
12484 if (TARGET_VX_ABI
12485 && VECTOR_TYPE_P (type)
12486 && int_size_in_bytes (type) <= 16)
12487 return false;
12488
12489 /* Aggregates and similar constructs are always returned
12490 in memory. */
12491 if (AGGREGATE_TYPE_P (type)
12492 || TREE_CODE (type) == COMPLEX_TYPE
12493 || VECTOR_TYPE_P (type))
12494 return true;
12495
12496 /* ??? We get called on all sorts of random stuff from
12497 aggregate_value_p. We can't abort, but it's not clear
12498 what's safe to return. Pretend it's a struct I guess. */
12499 return true;
12500 }
12501
12502 /* Function arguments and return values are promoted to word size. */
12503
12504 static machine_mode
s390_promote_function_mode(const_tree type,machine_mode mode,int * punsignedp,const_tree fntype ATTRIBUTE_UNUSED,int for_return ATTRIBUTE_UNUSED)12505 s390_promote_function_mode (const_tree type, machine_mode mode,
12506 int *punsignedp,
12507 const_tree fntype ATTRIBUTE_UNUSED,
12508 int for_return ATTRIBUTE_UNUSED)
12509 {
12510 if (INTEGRAL_MODE_P (mode)
12511 && GET_MODE_SIZE (mode) < UNITS_PER_LONG)
12512 {
12513 if (type != NULL_TREE && POINTER_TYPE_P (type))
12514 *punsignedp = POINTERS_EXTEND_UNSIGNED;
12515 return Pmode;
12516 }
12517
12518 return mode;
12519 }
12520
12521 /* Define where to return a (scalar) value of type RET_TYPE.
12522 If RET_TYPE is null, define where to return a (scalar)
12523 value of mode MODE from a libcall. */
12524
12525 static rtx
s390_function_and_libcall_value(machine_mode mode,const_tree ret_type,const_tree fntype_or_decl,bool outgoing ATTRIBUTE_UNUSED)12526 s390_function_and_libcall_value (machine_mode mode,
12527 const_tree ret_type,
12528 const_tree fntype_or_decl,
12529 bool outgoing ATTRIBUTE_UNUSED)
12530 {
12531 /* For vector return types it is important to use the RET_TYPE
12532 argument whenever available since the middle-end might have
12533 changed the mode to a scalar mode. */
12534 bool vector_ret_type_p = ((ret_type && VECTOR_TYPE_P (ret_type))
12535 || (!ret_type && VECTOR_MODE_P (mode)));
12536
12537 /* For normal functions perform the promotion as
12538 promote_function_mode would do. */
12539 if (ret_type)
12540 {
12541 int unsignedp = TYPE_UNSIGNED (ret_type);
12542 mode = promote_function_mode (ret_type, mode, &unsignedp,
12543 fntype_or_decl, 1);
12544 }
12545
12546 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
12547 || SCALAR_FLOAT_MODE_P (mode)
12548 || (TARGET_VX_ABI && vector_ret_type_p));
12549 gcc_assert (GET_MODE_SIZE (mode) <= (TARGET_VX_ABI ? 16 : 8));
12550
12551 if (TARGET_VX_ABI && vector_ret_type_p)
12552 return gen_rtx_REG (mode, FIRST_VEC_ARG_REGNO);
12553 else if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
12554 return gen_rtx_REG (mode, 16);
12555 else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG
12556 || UNITS_PER_LONG == UNITS_PER_WORD)
12557 return gen_rtx_REG (mode, 2);
12558 else if (GET_MODE_SIZE (mode) == 2 * UNITS_PER_LONG)
12559 {
12560 /* This case is triggered when returning a 64 bit value with
12561 -m31 -mzarch. Although the value would fit into a single
12562 register it has to be forced into a 32 bit register pair in
12563 order to match the ABI. */
12564 rtvec p = rtvec_alloc (2);
12565
12566 RTVEC_ELT (p, 0)
12567 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 2), const0_rtx);
12568 RTVEC_ELT (p, 1)
12569 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 3), GEN_INT (4));
12570
12571 return gen_rtx_PARALLEL (mode, p);
12572 }
12573
12574 gcc_unreachable ();
12575 }
12576
12577 /* Define where to return a scalar return value of type RET_TYPE. */
12578
12579 static rtx
s390_function_value(const_tree ret_type,const_tree fn_decl_or_type,bool outgoing)12580 s390_function_value (const_tree ret_type, const_tree fn_decl_or_type,
12581 bool outgoing)
12582 {
12583 return s390_function_and_libcall_value (TYPE_MODE (ret_type), ret_type,
12584 fn_decl_or_type, outgoing);
12585 }
12586
12587 /* Define where to return a scalar libcall return value of mode
12588 MODE. */
12589
12590 static rtx
s390_libcall_value(machine_mode mode,const_rtx fun ATTRIBUTE_UNUSED)12591 s390_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
12592 {
12593 return s390_function_and_libcall_value (mode, NULL_TREE,
12594 NULL_TREE, true);
12595 }
12596
12597
12598 /* Create and return the va_list datatype.
12599
12600 On S/390, va_list is an array type equivalent to
12601
12602 typedef struct __va_list_tag
12603 {
12604 long __gpr;
12605 long __fpr;
12606 void *__overflow_arg_area;
12607 void *__reg_save_area;
12608 } va_list[1];
12609
12610 where __gpr and __fpr hold the number of general purpose
12611 or floating point arguments used up to now, respectively,
12612 __overflow_arg_area points to the stack location of the
12613 next argument passed on the stack, and __reg_save_area
12614 always points to the start of the register area in the
12615 call frame of the current function. The function prologue
12616 saves all registers used for argument passing into this
12617 area if the function uses variable arguments. */
12618
12619 static tree
s390_build_builtin_va_list(void)12620 s390_build_builtin_va_list (void)
12621 {
12622 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
12623
12624 record = lang_hooks.types.make_type (RECORD_TYPE);
12625
12626 type_decl =
12627 build_decl (BUILTINS_LOCATION,
12628 TYPE_DECL, get_identifier ("__va_list_tag"), record);
12629
12630 f_gpr = build_decl (BUILTINS_LOCATION,
12631 FIELD_DECL, get_identifier ("__gpr"),
12632 long_integer_type_node);
12633 f_fpr = build_decl (BUILTINS_LOCATION,
12634 FIELD_DECL, get_identifier ("__fpr"),
12635 long_integer_type_node);
12636 f_ovf = build_decl (BUILTINS_LOCATION,
12637 FIELD_DECL, get_identifier ("__overflow_arg_area"),
12638 ptr_type_node);
12639 f_sav = build_decl (BUILTINS_LOCATION,
12640 FIELD_DECL, get_identifier ("__reg_save_area"),
12641 ptr_type_node);
12642
12643 va_list_gpr_counter_field = f_gpr;
12644 va_list_fpr_counter_field = f_fpr;
12645
12646 DECL_FIELD_CONTEXT (f_gpr) = record;
12647 DECL_FIELD_CONTEXT (f_fpr) = record;
12648 DECL_FIELD_CONTEXT (f_ovf) = record;
12649 DECL_FIELD_CONTEXT (f_sav) = record;
12650
12651 TYPE_STUB_DECL (record) = type_decl;
12652 TYPE_NAME (record) = type_decl;
12653 TYPE_FIELDS (record) = f_gpr;
12654 DECL_CHAIN (f_gpr) = f_fpr;
12655 DECL_CHAIN (f_fpr) = f_ovf;
12656 DECL_CHAIN (f_ovf) = f_sav;
12657
12658 layout_type (record);
12659
12660 /* The correct type is an array type of one element. */
12661 return build_array_type (record, build_index_type (size_zero_node));
12662 }
12663
12664 /* Implement va_start by filling the va_list structure VALIST.
12665 STDARG_P is always true, and ignored.
12666 NEXTARG points to the first anonymous stack argument.
12667
12668 The following global variables are used to initialize
12669 the va_list structure:
12670
12671 crtl->args.info:
12672 holds number of gprs and fprs used for named arguments.
12673 crtl->args.arg_offset_rtx:
12674 holds the offset of the first anonymous stack argument
12675 (relative to the virtual arg pointer). */
12676
12677 static void
s390_va_start(tree valist,rtx nextarg ATTRIBUTE_UNUSED)12678 s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
12679 {
12680 HOST_WIDE_INT n_gpr, n_fpr;
12681 int off;
12682 tree f_gpr, f_fpr, f_ovf, f_sav;
12683 tree gpr, fpr, ovf, sav, t;
12684
12685 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12686 f_fpr = DECL_CHAIN (f_gpr);
12687 f_ovf = DECL_CHAIN (f_fpr);
12688 f_sav = DECL_CHAIN (f_ovf);
12689
12690 valist = build_simple_mem_ref (valist);
12691 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12692 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
12693 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
12694 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
12695
12696 /* Count number of gp and fp argument registers used. */
12697
12698 n_gpr = crtl->args.info.gprs;
12699 n_fpr = crtl->args.info.fprs;
12700
12701 if (cfun->va_list_gpr_size)
12702 {
12703 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
12704 build_int_cst (NULL_TREE, n_gpr));
12705 TREE_SIDE_EFFECTS (t) = 1;
12706 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12707 }
12708
12709 if (cfun->va_list_fpr_size)
12710 {
12711 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
12712 build_int_cst (NULL_TREE, n_fpr));
12713 TREE_SIDE_EFFECTS (t) = 1;
12714 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12715 }
12716
12717 if (flag_split_stack
12718 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
12719 == NULL)
12720 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
12721 {
12722 rtx reg;
12723 rtx_insn *seq;
12724
12725 reg = gen_reg_rtx (Pmode);
12726 cfun->machine->split_stack_varargs_pointer = reg;
12727
12728 start_sequence ();
12729 emit_move_insn (reg, gen_rtx_REG (Pmode, 1));
12730 seq = get_insns ();
12731 end_sequence ();
12732
12733 push_topmost_sequence ();
12734 emit_insn_after (seq, entry_of_function ());
12735 pop_topmost_sequence ();
12736 }
12737
12738 /* Find the overflow area.
12739 FIXME: This currently is too pessimistic when the vector ABI is
12740 enabled. In that case we *always* set up the overflow area
12741 pointer. */
12742 if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG
12743 || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG
12744 || TARGET_VX_ABI)
12745 {
12746 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
12747 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
12748 else
12749 t = make_tree (TREE_TYPE (ovf), cfun->machine->split_stack_varargs_pointer);
12750
12751 off = INTVAL (crtl->args.arg_offset_rtx);
12752 off = off < 0 ? 0 : off;
12753 if (TARGET_DEBUG_ARG)
12754 fprintf (stderr, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
12755 (int)n_gpr, (int)n_fpr, off);
12756
12757 t = fold_build_pointer_plus_hwi (t, off);
12758
12759 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
12760 TREE_SIDE_EFFECTS (t) = 1;
12761 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12762 }
12763
12764 /* Find the register save area. */
12765 if ((cfun->va_list_gpr_size && n_gpr < GP_ARG_NUM_REG)
12766 || (cfun->va_list_fpr_size && n_fpr < FP_ARG_NUM_REG))
12767 {
12768 t = make_tree (TREE_TYPE (sav), return_address_pointer_rtx);
12769 t = fold_build_pointer_plus_hwi (t, -RETURN_REGNUM * UNITS_PER_LONG);
12770
12771 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
12772 TREE_SIDE_EFFECTS (t) = 1;
12773 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12774 }
12775 }
12776
12777 /* Implement va_arg by updating the va_list structure
12778 VALIST as required to retrieve an argument of type
12779 TYPE, and returning that argument.
12780
12781 Generates code equivalent to:
12782
12783 if (integral value) {
12784 if (size <= 4 && args.gpr < 5 ||
12785 size > 4 && args.gpr < 4 )
12786 ret = args.reg_save_area[args.gpr+8]
12787 else
12788 ret = *args.overflow_arg_area++;
12789 } else if (vector value) {
12790 ret = *args.overflow_arg_area;
12791 args.overflow_arg_area += size / 8;
12792 } else if (float value) {
12793 if (args.fgpr < 2)
12794 ret = args.reg_save_area[args.fpr+64]
12795 else
12796 ret = *args.overflow_arg_area++;
12797 } else if (aggregate value) {
12798 if (args.gpr < 5)
12799 ret = *args.reg_save_area[args.gpr]
12800 else
12801 ret = **args.overflow_arg_area++;
12802 } */
12803
12804 static tree
s390_gimplify_va_arg(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p ATTRIBUTE_UNUSED)12805 s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
12806 gimple_seq *post_p ATTRIBUTE_UNUSED)
12807 {
12808 tree f_gpr, f_fpr, f_ovf, f_sav;
12809 tree gpr, fpr, ovf, sav, reg, t, u;
12810 int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg;
12811 tree lab_false, lab_over = NULL_TREE;
12812 tree addr = create_tmp_var (ptr_type_node, "addr");
12813 bool left_align_p; /* How a value < UNITS_PER_LONG is aligned within
12814 a stack slot. */
12815
12816 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12817 f_fpr = DECL_CHAIN (f_gpr);
12818 f_ovf = DECL_CHAIN (f_fpr);
12819 f_sav = DECL_CHAIN (f_ovf);
12820
12821 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12822 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
12823 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
12824
12825 /* The tree for args* cannot be shared between gpr/fpr and ovf since
12826 both appear on a lhs. */
12827 valist = unshare_expr (valist);
12828 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
12829
12830 size = int_size_in_bytes (type);
12831
12832 s390_check_type_for_vector_abi (type, true, false);
12833
12834 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
12835 {
12836 if (TARGET_DEBUG_ARG)
12837 {
12838 fprintf (stderr, "va_arg: aggregate type");
12839 debug_tree (type);
12840 }
12841
12842 /* Aggregates are passed by reference. */
12843 indirect_p = 1;
12844 reg = gpr;
12845 n_reg = 1;
12846
12847 /* kernel stack layout on 31 bit: It is assumed here that no padding
12848 will be added by s390_frame_info because for va_args always an even
12849 number of gprs has to be saved r15-r2 = 14 regs. */
12850 sav_ofs = 2 * UNITS_PER_LONG;
12851 sav_scale = UNITS_PER_LONG;
12852 size = UNITS_PER_LONG;
12853 max_reg = GP_ARG_NUM_REG - n_reg;
12854 left_align_p = false;
12855 }
12856 else if (s390_function_arg_vector (TYPE_MODE (type), type))
12857 {
12858 if (TARGET_DEBUG_ARG)
12859 {
12860 fprintf (stderr, "va_arg: vector type");
12861 debug_tree (type);
12862 }
12863
12864 indirect_p = 0;
12865 reg = NULL_TREE;
12866 n_reg = 0;
12867 sav_ofs = 0;
12868 sav_scale = 8;
12869 max_reg = 0;
12870 left_align_p = true;
12871 }
12872 else if (s390_function_arg_float (TYPE_MODE (type), type))
12873 {
12874 if (TARGET_DEBUG_ARG)
12875 {
12876 fprintf (stderr, "va_arg: float type");
12877 debug_tree (type);
12878 }
12879
12880 /* FP args go in FP registers, if present. */
12881 indirect_p = 0;
12882 reg = fpr;
12883 n_reg = 1;
12884 sav_ofs = 16 * UNITS_PER_LONG;
12885 sav_scale = 8;
12886 max_reg = FP_ARG_NUM_REG - n_reg;
12887 left_align_p = false;
12888 }
12889 else
12890 {
12891 if (TARGET_DEBUG_ARG)
12892 {
12893 fprintf (stderr, "va_arg: other type");
12894 debug_tree (type);
12895 }
12896
12897 /* Otherwise into GP registers. */
12898 indirect_p = 0;
12899 reg = gpr;
12900 n_reg = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
12901
12902 /* kernel stack layout on 31 bit: It is assumed here that no padding
12903 will be added by s390_frame_info because for va_args always an even
12904 number of gprs has to be saved r15-r2 = 14 regs. */
12905 sav_ofs = 2 * UNITS_PER_LONG;
12906
12907 if (size < UNITS_PER_LONG)
12908 sav_ofs += UNITS_PER_LONG - size;
12909
12910 sav_scale = UNITS_PER_LONG;
12911 max_reg = GP_ARG_NUM_REG - n_reg;
12912 left_align_p = false;
12913 }
12914
12915 /* Pull the value out of the saved registers ... */
12916
12917 if (reg != NULL_TREE)
12918 {
12919 /*
12920 if (reg > ((typeof (reg))max_reg))
12921 goto lab_false;
12922
12923 addr = sav + sav_ofs + reg * save_scale;
12924
12925 goto lab_over;
12926
12927 lab_false:
12928 */
12929
12930 lab_false = create_artificial_label (UNKNOWN_LOCATION);
12931 lab_over = create_artificial_label (UNKNOWN_LOCATION);
12932
12933 t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
12934 t = build2 (GT_EXPR, boolean_type_node, reg, t);
12935 u = build1 (GOTO_EXPR, void_type_node, lab_false);
12936 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
12937 gimplify_and_add (t, pre_p);
12938
12939 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
12940 u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
12941 fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
12942 t = fold_build_pointer_plus (t, u);
12943
12944 gimplify_assign (addr, t, pre_p);
12945
12946 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
12947
12948 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
12949 }
12950
12951 /* ... Otherwise out of the overflow area. */
12952
12953 t = ovf;
12954 if (size < UNITS_PER_LONG && !left_align_p)
12955 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size);
12956
12957 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
12958
12959 gimplify_assign (addr, t, pre_p);
12960
12961 if (size < UNITS_PER_LONG && left_align_p)
12962 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG);
12963 else
12964 t = fold_build_pointer_plus_hwi (t, size);
12965
12966 gimplify_assign (ovf, t, pre_p);
12967
12968 if (reg != NULL_TREE)
12969 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
12970
12971
12972 /* Increment register save count. */
12973
12974 if (n_reg > 0)
12975 {
12976 u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
12977 fold_convert (TREE_TYPE (reg), size_int (n_reg)));
12978 gimplify_and_add (u, pre_p);
12979 }
12980
12981 if (indirect_p)
12982 {
12983 t = build_pointer_type_for_mode (build_pointer_type (type),
12984 ptr_mode, true);
12985 addr = fold_convert (t, addr);
12986 addr = build_va_arg_indirect_ref (addr);
12987 }
12988 else
12989 {
12990 t = build_pointer_type_for_mode (type, ptr_mode, true);
12991 addr = fold_convert (t, addr);
12992 }
12993
12994 return build_va_arg_indirect_ref (addr);
12995 }
12996
12997 /* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX)
12998 expanders.
12999 DEST - Register location where CC will be stored.
13000 TDB - Pointer to a 256 byte area where to store the transaction.
13001 diagnostic block. NULL if TDB is not needed.
13002 RETRY - Retry count value. If non-NULL a retry loop for CC2
13003 is emitted
13004 CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
13005 of the tbegin instruction pattern. */
13006
13007 void
s390_expand_tbegin(rtx dest,rtx tdb,rtx retry,bool clobber_fprs_p)13008 s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
13009 {
13010 rtx retry_plus_two = gen_reg_rtx (SImode);
13011 rtx retry_reg = gen_reg_rtx (SImode);
13012 rtx_code_label *retry_label = NULL;
13013
13014 if (retry != NULL_RTX)
13015 {
13016 emit_move_insn (retry_reg, retry);
13017 emit_insn (gen_addsi3 (retry_plus_two, retry_reg, const2_rtx));
13018 emit_insn (gen_addsi3 (retry_reg, retry_reg, const1_rtx));
13019 retry_label = gen_label_rtx ();
13020 emit_label (retry_label);
13021 }
13022
13023 if (clobber_fprs_p)
13024 {
13025 if (TARGET_VX)
13026 emit_insn (gen_tbegin_1_z13 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
13027 tdb));
13028 else
13029 emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
13030 tdb));
13031 }
13032 else
13033 emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
13034 tdb));
13035
13036 emit_move_insn (dest, gen_rtx_UNSPEC (SImode,
13037 gen_rtvec (1, gen_rtx_REG (CCRAWmode,
13038 CC_REGNUM)),
13039 UNSPEC_CC_TO_INT));
13040 if (retry != NULL_RTX)
13041 {
13042 const int CC0 = 1 << 3;
13043 const int CC1 = 1 << 2;
13044 const int CC3 = 1 << 0;
13045 rtx jump;
13046 rtx count = gen_reg_rtx (SImode);
13047 rtx_code_label *leave_label = gen_label_rtx ();
13048
13049 /* Exit for success and permanent failures. */
13050 jump = s390_emit_jump (leave_label,
13051 gen_rtx_EQ (VOIDmode,
13052 gen_rtx_REG (CCRAWmode, CC_REGNUM),
13053 gen_rtx_CONST_INT (VOIDmode, CC0 | CC1 | CC3)));
13054 LABEL_NUSES (leave_label) = 1;
13055
13056 /* CC2 - transient failure. Perform retry with ppa. */
13057 emit_move_insn (count, retry_plus_two);
13058 emit_insn (gen_subsi3 (count, count, retry_reg));
13059 emit_insn (gen_tx_assist (count));
13060 jump = emit_jump_insn (gen_doloop_si64 (retry_label,
13061 retry_reg,
13062 retry_reg));
13063 JUMP_LABEL (jump) = retry_label;
13064 LABEL_NUSES (retry_label) = 1;
13065 emit_label (leave_label);
13066 }
13067 }
13068
13069
13070 /* Return the decl for the target specific builtin with the function
13071 code FCODE. */
13072
13073 static tree
s390_builtin_decl(unsigned fcode,bool initialized_p ATTRIBUTE_UNUSED)13074 s390_builtin_decl (unsigned fcode, bool initialized_p ATTRIBUTE_UNUSED)
13075 {
13076 if (fcode >= S390_BUILTIN_MAX)
13077 return error_mark_node;
13078
13079 return s390_builtin_decls[fcode];
13080 }
13081
13082 /* We call mcount before the function prologue. So a profiled leaf
13083 function should stay a leaf function. */
13084
13085 static bool
s390_keep_leaf_when_profiled()13086 s390_keep_leaf_when_profiled ()
13087 {
13088 return true;
13089 }
13090
13091 /* Output assembly code for the trampoline template to
13092 stdio stream FILE.
13093
13094 On S/390, we use gpr 1 internally in the trampoline code;
13095 gpr 0 is used to hold the static chain. */
13096
13097 static void
s390_asm_trampoline_template(FILE * file)13098 s390_asm_trampoline_template (FILE *file)
13099 {
13100 rtx op[2];
13101 op[0] = gen_rtx_REG (Pmode, 0);
13102 op[1] = gen_rtx_REG (Pmode, 1);
13103
13104 if (TARGET_64BIT)
13105 {
13106 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
13107 output_asm_insn ("lmg\t%0,%1,14(%1)", op); /* 6 byte */
13108 output_asm_insn ("br\t%1", op); /* 2 byte */
13109 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10));
13110 }
13111 else
13112 {
13113 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
13114 output_asm_insn ("lm\t%0,%1,6(%1)", op); /* 4 byte */
13115 output_asm_insn ("br\t%1", op); /* 2 byte */
13116 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8));
13117 }
13118 }
13119
13120 /* Emit RTL insns to initialize the variable parts of a trampoline.
13121 FNADDR is an RTX for the address of the function's pure code.
13122 CXT is an RTX for the static chain value for the function. */
13123
13124 static void
s390_trampoline_init(rtx m_tramp,tree fndecl,rtx cxt)13125 s390_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
13126 {
13127 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
13128 rtx mem;
13129
13130 emit_block_move (m_tramp, assemble_trampoline_template (),
13131 GEN_INT (2 * UNITS_PER_LONG), BLOCK_OP_NORMAL);
13132
13133 mem = adjust_address (m_tramp, Pmode, 2 * UNITS_PER_LONG);
13134 emit_move_insn (mem, cxt);
13135 mem = adjust_address (m_tramp, Pmode, 3 * UNITS_PER_LONG);
13136 emit_move_insn (mem, fnaddr);
13137 }
13138
13139 /* Output assembler code to FILE to increment profiler label # LABELNO
13140 for profiling a function entry. */
13141
13142 void
s390_function_profiler(FILE * file,int labelno)13143 s390_function_profiler (FILE *file, int labelno)
13144 {
13145 rtx op[7];
13146
13147 char label[128];
13148 ASM_GENERATE_INTERNAL_LABEL (label, "LP", labelno);
13149
13150 fprintf (file, "# function profiler \n");
13151
13152 op[0] = gen_rtx_REG (Pmode, RETURN_REGNUM);
13153 op[1] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
13154 op[1] = gen_rtx_MEM (Pmode, plus_constant (Pmode, op[1], UNITS_PER_LONG));
13155
13156 op[2] = gen_rtx_REG (Pmode, 1);
13157 op[3] = gen_rtx_SYMBOL_REF (Pmode, label);
13158 SYMBOL_REF_FLAGS (op[3]) = SYMBOL_FLAG_LOCAL;
13159
13160 op[4] = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
13161 if (flag_pic)
13162 {
13163 op[4] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[4]), UNSPEC_PLT);
13164 op[4] = gen_rtx_CONST (Pmode, op[4]);
13165 }
13166
13167 if (TARGET_64BIT)
13168 {
13169 output_asm_insn ("stg\t%0,%1", op);
13170 output_asm_insn ("larl\t%2,%3", op);
13171 output_asm_insn ("brasl\t%0,%4", op);
13172 output_asm_insn ("lg\t%0,%1", op);
13173 }
13174 else if (TARGET_CPU_ZARCH)
13175 {
13176 output_asm_insn ("st\t%0,%1", op);
13177 output_asm_insn ("larl\t%2,%3", op);
13178 output_asm_insn ("brasl\t%0,%4", op);
13179 output_asm_insn ("l\t%0,%1", op);
13180 }
13181 else if (!flag_pic)
13182 {
13183 op[6] = gen_label_rtx ();
13184
13185 output_asm_insn ("st\t%0,%1", op);
13186 output_asm_insn ("bras\t%2,%l6", op);
13187 output_asm_insn (".long\t%4", op);
13188 output_asm_insn (".long\t%3", op);
13189 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
13190 output_asm_insn ("l\t%0,0(%2)", op);
13191 output_asm_insn ("l\t%2,4(%2)", op);
13192 output_asm_insn ("basr\t%0,%0", op);
13193 output_asm_insn ("l\t%0,%1", op);
13194 }
13195 else
13196 {
13197 op[5] = gen_label_rtx ();
13198 op[6] = gen_label_rtx ();
13199
13200 output_asm_insn ("st\t%0,%1", op);
13201 output_asm_insn ("bras\t%2,%l6", op);
13202 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[5]));
13203 output_asm_insn (".long\t%4-%l5", op);
13204 output_asm_insn (".long\t%3-%l5", op);
13205 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
13206 output_asm_insn ("lr\t%0,%2", op);
13207 output_asm_insn ("a\t%0,0(%2)", op);
13208 output_asm_insn ("a\t%2,4(%2)", op);
13209 output_asm_insn ("basr\t%0,%0", op);
13210 output_asm_insn ("l\t%0,%1", op);
13211 }
13212 }
13213
13214 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
13215 into its SYMBOL_REF_FLAGS. */
13216
13217 static void
s390_encode_section_info(tree decl,rtx rtl,int first)13218 s390_encode_section_info (tree decl, rtx rtl, int first)
13219 {
13220 default_encode_section_info (decl, rtl, first);
13221
13222 if (TREE_CODE (decl) == VAR_DECL)
13223 {
13224 /* Store the alignment to be able to check if we can use
13225 a larl/load-relative instruction. We only handle the cases
13226 that can go wrong (i.e. no FUNC_DECLs). */
13227 if (DECL_ALIGN (decl) == 0 || DECL_ALIGN (decl) % 16)
13228 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
13229 else if (DECL_ALIGN (decl) % 32)
13230 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
13231 else if (DECL_ALIGN (decl) % 64)
13232 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
13233 }
13234
13235 /* Literal pool references don't have a decl so they are handled
13236 differently here. We rely on the information in the MEM_ALIGN
13237 entry to decide upon the alignment. */
13238 if (MEM_P (rtl)
13239 && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF
13240 && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0)))
13241 {
13242 if (MEM_ALIGN (rtl) == 0 || MEM_ALIGN (rtl) % 16)
13243 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
13244 else if (MEM_ALIGN (rtl) % 32)
13245 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
13246 else if (MEM_ALIGN (rtl) % 64)
13247 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
13248 }
13249 }
13250
13251 /* Output thunk to FILE that implements a C++ virtual function call (with
13252 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
13253 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
13254 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
13255 relative to the resulting this pointer. */
13256
13257 static void
s390_output_mi_thunk(FILE * file,tree thunk ATTRIBUTE_UNUSED,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)13258 s390_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
13259 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
13260 tree function)
13261 {
13262 rtx op[10];
13263 int nonlocal = 0;
13264
13265 /* Make sure unwind info is emitted for the thunk if needed. */
13266 final_start_function (emit_barrier (), file, 1);
13267
13268 /* Operand 0 is the target function. */
13269 op[0] = XEXP (DECL_RTL (function), 0);
13270 if (flag_pic && !SYMBOL_REF_LOCAL_P (op[0]))
13271 {
13272 nonlocal = 1;
13273 op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]),
13274 TARGET_64BIT ? UNSPEC_PLT : UNSPEC_GOT);
13275 op[0] = gen_rtx_CONST (Pmode, op[0]);
13276 }
13277
13278 /* Operand 1 is the 'this' pointer. */
13279 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
13280 op[1] = gen_rtx_REG (Pmode, 3);
13281 else
13282 op[1] = gen_rtx_REG (Pmode, 2);
13283
13284 /* Operand 2 is the delta. */
13285 op[2] = GEN_INT (delta);
13286
13287 /* Operand 3 is the vcall_offset. */
13288 op[3] = GEN_INT (vcall_offset);
13289
13290 /* Operand 4 is the temporary register. */
13291 op[4] = gen_rtx_REG (Pmode, 1);
13292
13293 /* Operands 5 to 8 can be used as labels. */
13294 op[5] = NULL_RTX;
13295 op[6] = NULL_RTX;
13296 op[7] = NULL_RTX;
13297 op[8] = NULL_RTX;
13298
13299 /* Operand 9 can be used for temporary register. */
13300 op[9] = NULL_RTX;
13301
13302 /* Generate code. */
13303 if (TARGET_64BIT)
13304 {
13305 /* Setup literal pool pointer if required. */
13306 if ((!DISP_IN_RANGE (delta)
13307 && !CONST_OK_FOR_K (delta)
13308 && !CONST_OK_FOR_Os (delta))
13309 || (!DISP_IN_RANGE (vcall_offset)
13310 && !CONST_OK_FOR_K (vcall_offset)
13311 && !CONST_OK_FOR_Os (vcall_offset)))
13312 {
13313 op[5] = gen_label_rtx ();
13314 output_asm_insn ("larl\t%4,%5", op);
13315 }
13316
13317 /* Add DELTA to this pointer. */
13318 if (delta)
13319 {
13320 if (CONST_OK_FOR_J (delta))
13321 output_asm_insn ("la\t%1,%2(%1)", op);
13322 else if (DISP_IN_RANGE (delta))
13323 output_asm_insn ("lay\t%1,%2(%1)", op);
13324 else if (CONST_OK_FOR_K (delta))
13325 output_asm_insn ("aghi\t%1,%2", op);
13326 else if (CONST_OK_FOR_Os (delta))
13327 output_asm_insn ("agfi\t%1,%2", op);
13328 else
13329 {
13330 op[6] = gen_label_rtx ();
13331 output_asm_insn ("agf\t%1,%6-%5(%4)", op);
13332 }
13333 }
13334
13335 /* Perform vcall adjustment. */
13336 if (vcall_offset)
13337 {
13338 if (DISP_IN_RANGE (vcall_offset))
13339 {
13340 output_asm_insn ("lg\t%4,0(%1)", op);
13341 output_asm_insn ("ag\t%1,%3(%4)", op);
13342 }
13343 else if (CONST_OK_FOR_K (vcall_offset))
13344 {
13345 output_asm_insn ("lghi\t%4,%3", op);
13346 output_asm_insn ("ag\t%4,0(%1)", op);
13347 output_asm_insn ("ag\t%1,0(%4)", op);
13348 }
13349 else if (CONST_OK_FOR_Os (vcall_offset))
13350 {
13351 output_asm_insn ("lgfi\t%4,%3", op);
13352 output_asm_insn ("ag\t%4,0(%1)", op);
13353 output_asm_insn ("ag\t%1,0(%4)", op);
13354 }
13355 else
13356 {
13357 op[7] = gen_label_rtx ();
13358 output_asm_insn ("llgf\t%4,%7-%5(%4)", op);
13359 output_asm_insn ("ag\t%4,0(%1)", op);
13360 output_asm_insn ("ag\t%1,0(%4)", op);
13361 }
13362 }
13363
13364 /* Jump to target. */
13365 output_asm_insn ("jg\t%0", op);
13366
13367 /* Output literal pool if required. */
13368 if (op[5])
13369 {
13370 output_asm_insn (".align\t4", op);
13371 targetm.asm_out.internal_label (file, "L",
13372 CODE_LABEL_NUMBER (op[5]));
13373 }
13374 if (op[6])
13375 {
13376 targetm.asm_out.internal_label (file, "L",
13377 CODE_LABEL_NUMBER (op[6]));
13378 output_asm_insn (".long\t%2", op);
13379 }
13380 if (op[7])
13381 {
13382 targetm.asm_out.internal_label (file, "L",
13383 CODE_LABEL_NUMBER (op[7]));
13384 output_asm_insn (".long\t%3", op);
13385 }
13386 }
13387 else
13388 {
13389 /* Setup base pointer if required. */
13390 if (!vcall_offset
13391 || (!DISP_IN_RANGE (delta)
13392 && !CONST_OK_FOR_K (delta)
13393 && !CONST_OK_FOR_Os (delta))
13394 || (!DISP_IN_RANGE (delta)
13395 && !CONST_OK_FOR_K (vcall_offset)
13396 && !CONST_OK_FOR_Os (vcall_offset)))
13397 {
13398 op[5] = gen_label_rtx ();
13399 output_asm_insn ("basr\t%4,0", op);
13400 targetm.asm_out.internal_label (file, "L",
13401 CODE_LABEL_NUMBER (op[5]));
13402 }
13403
13404 /* Add DELTA to this pointer. */
13405 if (delta)
13406 {
13407 if (CONST_OK_FOR_J (delta))
13408 output_asm_insn ("la\t%1,%2(%1)", op);
13409 else if (DISP_IN_RANGE (delta))
13410 output_asm_insn ("lay\t%1,%2(%1)", op);
13411 else if (CONST_OK_FOR_K (delta))
13412 output_asm_insn ("ahi\t%1,%2", op);
13413 else if (CONST_OK_FOR_Os (delta))
13414 output_asm_insn ("afi\t%1,%2", op);
13415 else
13416 {
13417 op[6] = gen_label_rtx ();
13418 output_asm_insn ("a\t%1,%6-%5(%4)", op);
13419 }
13420 }
13421
13422 /* Perform vcall adjustment. */
13423 if (vcall_offset)
13424 {
13425 if (CONST_OK_FOR_J (vcall_offset))
13426 {
13427 output_asm_insn ("l\t%4,0(%1)", op);
13428 output_asm_insn ("a\t%1,%3(%4)", op);
13429 }
13430 else if (DISP_IN_RANGE (vcall_offset))
13431 {
13432 output_asm_insn ("l\t%4,0(%1)", op);
13433 output_asm_insn ("ay\t%1,%3(%4)", op);
13434 }
13435 else if (CONST_OK_FOR_K (vcall_offset))
13436 {
13437 output_asm_insn ("lhi\t%4,%3", op);
13438 output_asm_insn ("a\t%4,0(%1)", op);
13439 output_asm_insn ("a\t%1,0(%4)", op);
13440 }
13441 else if (CONST_OK_FOR_Os (vcall_offset))
13442 {
13443 output_asm_insn ("iilf\t%4,%3", op);
13444 output_asm_insn ("a\t%4,0(%1)", op);
13445 output_asm_insn ("a\t%1,0(%4)", op);
13446 }
13447 else
13448 {
13449 op[7] = gen_label_rtx ();
13450 output_asm_insn ("l\t%4,%7-%5(%4)", op);
13451 output_asm_insn ("a\t%4,0(%1)", op);
13452 output_asm_insn ("a\t%1,0(%4)", op);
13453 }
13454
13455 /* We had to clobber the base pointer register.
13456 Re-setup the base pointer (with a different base). */
13457 op[5] = gen_label_rtx ();
13458 output_asm_insn ("basr\t%4,0", op);
13459 targetm.asm_out.internal_label (file, "L",
13460 CODE_LABEL_NUMBER (op[5]));
13461 }
13462
13463 /* Jump to target. */
13464 op[8] = gen_label_rtx ();
13465
13466 if (!flag_pic)
13467 output_asm_insn ("l\t%4,%8-%5(%4)", op);
13468 else if (!nonlocal)
13469 output_asm_insn ("a\t%4,%8-%5(%4)", op);
13470 /* We cannot call through .plt, since .plt requires %r12 loaded. */
13471 else if (flag_pic == 1)
13472 {
13473 output_asm_insn ("a\t%4,%8-%5(%4)", op);
13474 output_asm_insn ("l\t%4,%0(%4)", op);
13475 }
13476 else if (flag_pic == 2)
13477 {
13478 op[9] = gen_rtx_REG (Pmode, 0);
13479 output_asm_insn ("l\t%9,%8-4-%5(%4)", op);
13480 output_asm_insn ("a\t%4,%8-%5(%4)", op);
13481 output_asm_insn ("ar\t%4,%9", op);
13482 output_asm_insn ("l\t%4,0(%4)", op);
13483 }
13484
13485 output_asm_insn ("br\t%4", op);
13486
13487 /* Output literal pool. */
13488 output_asm_insn (".align\t4", op);
13489
13490 if (nonlocal && flag_pic == 2)
13491 output_asm_insn (".long\t%0", op);
13492 if (nonlocal)
13493 {
13494 op[0] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
13495 SYMBOL_REF_FLAGS (op[0]) = SYMBOL_FLAG_LOCAL;
13496 }
13497
13498 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[8]));
13499 if (!flag_pic)
13500 output_asm_insn (".long\t%0", op);
13501 else
13502 output_asm_insn (".long\t%0-%5", op);
13503
13504 if (op[6])
13505 {
13506 targetm.asm_out.internal_label (file, "L",
13507 CODE_LABEL_NUMBER (op[6]));
13508 output_asm_insn (".long\t%2", op);
13509 }
13510 if (op[7])
13511 {
13512 targetm.asm_out.internal_label (file, "L",
13513 CODE_LABEL_NUMBER (op[7]));
13514 output_asm_insn (".long\t%3", op);
13515 }
13516 }
13517 final_end_function ();
13518 }
13519
13520 /* Output either an indirect jump or a an indirect call
13521 (RETURN_ADDR_REGNO != INVALID_REGNUM) with target register REGNO
13522 using a branch trampoline disabling branch target prediction. */
13523
13524 void
s390_indirect_branch_via_thunk(unsigned int regno,unsigned int return_addr_regno,rtx comparison_operator,enum s390_indirect_branch_type type)13525 s390_indirect_branch_via_thunk (unsigned int regno,
13526 unsigned int return_addr_regno,
13527 rtx comparison_operator,
13528 enum s390_indirect_branch_type type)
13529 {
13530 enum s390_indirect_branch_option option;
13531
13532 if (type == s390_indirect_branch_type_return)
13533 {
13534 if (s390_return_addr_from_memory ())
13535 option = s390_opt_function_return_mem;
13536 else
13537 option = s390_opt_function_return_reg;
13538 }
13539 else if (type == s390_indirect_branch_type_jump)
13540 option = s390_opt_indirect_branch_jump;
13541 else if (type == s390_indirect_branch_type_call)
13542 option = s390_opt_indirect_branch_call;
13543 else
13544 gcc_unreachable ();
13545
13546 if (TARGET_INDIRECT_BRANCH_TABLE)
13547 {
13548 char label[32];
13549
13550 ASM_GENERATE_INTERNAL_LABEL (label,
13551 indirect_branch_table_label[option],
13552 indirect_branch_table_label_no[option]++);
13553 ASM_OUTPUT_LABEL (asm_out_file, label);
13554 }
13555
13556 if (return_addr_regno != INVALID_REGNUM)
13557 {
13558 gcc_assert (comparison_operator == NULL_RTX);
13559 fprintf (asm_out_file, " \tbrasl\t%%r%d,", return_addr_regno);
13560 }
13561 else
13562 {
13563 fputs (" \tjg", asm_out_file);
13564 if (comparison_operator != NULL_RTX)
13565 print_operand (asm_out_file, comparison_operator, 'C');
13566
13567 fputs ("\t", asm_out_file);
13568 }
13569
13570 if (TARGET_CPU_Z10)
13571 fprintf (asm_out_file,
13572 TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL "\n",
13573 regno);
13574 else
13575 fprintf (asm_out_file,
13576 TARGET_INDIRECT_BRANCH_THUNK_NAME_EX "\n",
13577 INDIRECT_BRANCH_THUNK_REGNUM, regno);
13578
13579 if ((option == s390_opt_indirect_branch_jump
13580 && cfun->machine->indirect_branch_jump == indirect_branch_thunk)
13581 || (option == s390_opt_indirect_branch_call
13582 && cfun->machine->indirect_branch_call == indirect_branch_thunk)
13583 || (option == s390_opt_function_return_reg
13584 && cfun->machine->function_return_reg == indirect_branch_thunk)
13585 || (option == s390_opt_function_return_mem
13586 && cfun->machine->function_return_mem == indirect_branch_thunk))
13587 {
13588 if (TARGET_CPU_Z10)
13589 indirect_branch_z10thunk_mask |= (1 << regno);
13590 else
13591 indirect_branch_prez10thunk_mask |= (1 << regno);
13592 }
13593 }
13594
13595 /* Output an inline thunk for indirect jumps. EXECUTE_TARGET can
13596 either be an address register or a label pointing to the location
13597 of the jump instruction. */
13598
13599 void
s390_indirect_branch_via_inline_thunk(rtx execute_target)13600 s390_indirect_branch_via_inline_thunk (rtx execute_target)
13601 {
13602 if (TARGET_INDIRECT_BRANCH_TABLE)
13603 {
13604 char label[32];
13605
13606 ASM_GENERATE_INTERNAL_LABEL (label,
13607 indirect_branch_table_label[s390_opt_indirect_branch_jump],
13608 indirect_branch_table_label_no[s390_opt_indirect_branch_jump]++);
13609 ASM_OUTPUT_LABEL (asm_out_file, label);
13610 }
13611
13612 if (!TARGET_ZARCH)
13613 fputs ("\t.machinemode zarch\n", asm_out_file);
13614
13615 if (REG_P (execute_target))
13616 fprintf (asm_out_file, "\tex\t%%r0,0(%%r%d)\n", REGNO (execute_target));
13617 else
13618 output_asm_insn ("\texrl\t%%r0,%0", &execute_target);
13619
13620 if (!TARGET_ZARCH)
13621 fputs ("\t.machinemode esa\n", asm_out_file);
13622
13623 fputs ("0:\tj\t0b\n", asm_out_file);
13624 }
13625
13626 static bool
s390_valid_pointer_mode(scalar_int_mode mode)13627 s390_valid_pointer_mode (scalar_int_mode mode)
13628 {
13629 return (mode == SImode || (TARGET_64BIT && mode == DImode));
13630 }
13631
13632 /* Checks whether the given CALL_EXPR would use a caller
13633 saved register. This is used to decide whether sibling call
13634 optimization could be performed on the respective function
13635 call. */
13636
13637 static bool
s390_call_saved_register_used(tree call_expr)13638 s390_call_saved_register_used (tree call_expr)
13639 {
13640 CUMULATIVE_ARGS cum_v;
13641 cumulative_args_t cum;
13642 tree parameter;
13643 machine_mode mode;
13644 tree type;
13645 rtx parm_rtx;
13646 int reg, i;
13647
13648 INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
13649 cum = pack_cumulative_args (&cum_v);
13650
13651 for (i = 0; i < call_expr_nargs (call_expr); i++)
13652 {
13653 parameter = CALL_EXPR_ARG (call_expr, i);
13654 gcc_assert (parameter);
13655
13656 /* For an undeclared variable passed as parameter we will get
13657 an ERROR_MARK node here. */
13658 if (TREE_CODE (parameter) == ERROR_MARK)
13659 return true;
13660
13661 type = TREE_TYPE (parameter);
13662 gcc_assert (type);
13663
13664 mode = TYPE_MODE (type);
13665 gcc_assert (mode);
13666
13667 /* We assume that in the target function all parameters are
13668 named. This only has an impact on vector argument register
13669 usage none of which is call-saved. */
13670 if (pass_by_reference (&cum_v, mode, type, true))
13671 {
13672 mode = Pmode;
13673 type = build_pointer_type (type);
13674 }
13675
13676 parm_rtx = s390_function_arg (cum, mode, type, true);
13677
13678 s390_function_arg_advance (cum, mode, type, true);
13679
13680 if (!parm_rtx)
13681 continue;
13682
13683 if (REG_P (parm_rtx))
13684 {
13685 for (reg = 0; reg < REG_NREGS (parm_rtx); reg++)
13686 if (!call_used_regs[reg + REGNO (parm_rtx)])
13687 return true;
13688 }
13689
13690 if (GET_CODE (parm_rtx) == PARALLEL)
13691 {
13692 int i;
13693
13694 for (i = 0; i < XVECLEN (parm_rtx, 0); i++)
13695 {
13696 rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0);
13697
13698 gcc_assert (REG_P (r));
13699
13700 for (reg = 0; reg < REG_NREGS (r); reg++)
13701 if (!call_used_regs[reg + REGNO (r)])
13702 return true;
13703 }
13704 }
13705
13706 }
13707 return false;
13708 }
13709
13710 /* Return true if the given call expression can be
13711 turned into a sibling call.
13712 DECL holds the declaration of the function to be called whereas
13713 EXP is the call expression itself. */
13714
13715 static bool
s390_function_ok_for_sibcall(tree decl,tree exp)13716 s390_function_ok_for_sibcall (tree decl, tree exp)
13717 {
13718 /* The TPF epilogue uses register 1. */
13719 if (TARGET_TPF_PROFILING)
13720 return false;
13721
13722 /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
13723 which would have to be restored before the sibcall. */
13724 if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl))
13725 return false;
13726
13727 /* The thunks for indirect branches require r1 if no exrl is
13728 available. r1 might not be available when doing a sibling
13729 call. */
13730 if (TARGET_INDIRECT_BRANCH_NOBP_CALL
13731 && !TARGET_CPU_Z10
13732 && !decl)
13733 return false;
13734
13735 /* Register 6 on s390 is available as an argument register but unfortunately
13736 "caller saved". This makes functions needing this register for arguments
13737 not suitable for sibcalls. */
13738 return !s390_call_saved_register_used (exp);
13739 }
13740
13741 /* Return the fixed registers used for condition codes. */
13742
13743 static bool
s390_fixed_condition_code_regs(unsigned int * p1,unsigned int * p2)13744 s390_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
13745 {
13746 *p1 = CC_REGNUM;
13747 *p2 = INVALID_REGNUM;
13748
13749 return true;
13750 }
13751
13752 /* This function is used by the call expanders of the machine description.
13753 It emits the call insn itself together with the necessary operations
13754 to adjust the target address and returns the emitted insn.
13755 ADDR_LOCATION is the target address rtx
13756 TLS_CALL the location of the thread-local symbol
13757 RESULT_REG the register where the result of the call should be stored
13758 RETADDR_REG the register where the return address should be stored
13759 If this parameter is NULL_RTX the call is considered
13760 to be a sibling call. */
13761
13762 rtx_insn *
s390_emit_call(rtx addr_location,rtx tls_call,rtx result_reg,rtx retaddr_reg)13763 s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg,
13764 rtx retaddr_reg)
13765 {
13766 bool plt_call = false;
13767 rtx_insn *insn;
13768 rtx vec[4] = { NULL_RTX };
13769 int elts = 0;
13770 rtx *call = &vec[0];
13771 rtx *clobber_ret_reg = &vec[1];
13772 rtx *use = &vec[2];
13773 rtx *clobber_thunk_reg = &vec[3];
13774 int i;
13775
13776 /* Direct function calls need special treatment. */
13777 if (GET_CODE (addr_location) == SYMBOL_REF)
13778 {
13779 /* When calling a global routine in PIC mode, we must
13780 replace the symbol itself with the PLT stub. */
13781 if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location))
13782 {
13783 if (TARGET_64BIT || retaddr_reg != NULL_RTX)
13784 {
13785 addr_location = gen_rtx_UNSPEC (Pmode,
13786 gen_rtvec (1, addr_location),
13787 UNSPEC_PLT);
13788 addr_location = gen_rtx_CONST (Pmode, addr_location);
13789 plt_call = true;
13790 }
13791 else
13792 /* For -fpic code the PLT entries might use r12 which is
13793 call-saved. Therefore we cannot do a sibcall when
13794 calling directly using a symbol ref. When reaching
13795 this point we decided (in s390_function_ok_for_sibcall)
13796 to do a sibcall for a function pointer but one of the
13797 optimizers was able to get rid of the function pointer
13798 by propagating the symbol ref into the call. This
13799 optimization is illegal for S/390 so we turn the direct
13800 call into a indirect call again. */
13801 addr_location = force_reg (Pmode, addr_location);
13802 }
13803
13804 /* Unless we can use the bras(l) insn, force the
13805 routine address into a register. */
13806 if (!TARGET_SMALL_EXEC && !TARGET_CPU_ZARCH)
13807 {
13808 if (flag_pic)
13809 addr_location = legitimize_pic_address (addr_location, 0);
13810 else
13811 addr_location = force_reg (Pmode, addr_location);
13812 }
13813 }
13814
13815 /* If it is already an indirect call or the code above moved the
13816 SYMBOL_REF to somewhere else make sure the address can be found in
13817 register 1. */
13818 if (retaddr_reg == NULL_RTX
13819 && GET_CODE (addr_location) != SYMBOL_REF
13820 && !plt_call)
13821 {
13822 emit_move_insn (gen_rtx_REG (Pmode, SIBCALL_REGNUM), addr_location);
13823 addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM);
13824 }
13825
13826 if (TARGET_INDIRECT_BRANCH_NOBP_CALL
13827 && GET_CODE (addr_location) != SYMBOL_REF
13828 && !plt_call)
13829 {
13830 /* Indirect branch thunks require the target to be a single GPR. */
13831 addr_location = force_reg (Pmode, addr_location);
13832
13833 /* Without exrl the indirect branch thunks need an additional
13834 register for larl;ex */
13835 if (!TARGET_CPU_Z10)
13836 {
13837 *clobber_thunk_reg = gen_rtx_REG (Pmode, INDIRECT_BRANCH_THUNK_REGNUM);
13838 *clobber_thunk_reg = gen_rtx_CLOBBER (VOIDmode, *clobber_thunk_reg);
13839 }
13840 }
13841
13842 addr_location = gen_rtx_MEM (QImode, addr_location);
13843 *call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx);
13844
13845 if (result_reg != NULL_RTX)
13846 *call = gen_rtx_SET (result_reg, *call);
13847
13848 if (retaddr_reg != NULL_RTX)
13849 {
13850 *clobber_ret_reg = gen_rtx_CLOBBER (VOIDmode, retaddr_reg);
13851
13852 if (tls_call != NULL_RTX)
13853 *use = gen_rtx_USE (VOIDmode, tls_call);
13854 }
13855
13856
13857 for (i = 0; i < 4; i++)
13858 if (vec[i] != NULL_RTX)
13859 elts++;
13860
13861 if (elts > 1)
13862 {
13863 rtvec v;
13864 int e = 0;
13865
13866 v = rtvec_alloc (elts);
13867 for (i = 0; i < 4; i++)
13868 if (vec[i] != NULL_RTX)
13869 {
13870 RTVEC_ELT (v, e) = vec[i];
13871 e++;
13872 }
13873
13874 *call = gen_rtx_PARALLEL (VOIDmode, v);
13875 }
13876
13877 insn = emit_call_insn (*call);
13878
13879 /* 31-bit PLT stubs and tls calls use the GOT register implicitly. */
13880 if ((!TARGET_64BIT && plt_call) || tls_call != NULL_RTX)
13881 {
13882 /* s390_function_ok_for_sibcall should
13883 have denied sibcalls in this case. */
13884 gcc_assert (retaddr_reg != NULL_RTX);
13885 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, 12));
13886 }
13887 return insn;
13888 }
13889
13890 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
13891
13892 static void
s390_conditional_register_usage(void)13893 s390_conditional_register_usage (void)
13894 {
13895 int i;
13896
13897 if (flag_pic)
13898 {
13899 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13900 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13901 }
13902 if (TARGET_CPU_ZARCH)
13903 {
13904 fixed_regs[BASE_REGNUM] = 0;
13905 call_used_regs[BASE_REGNUM] = 0;
13906 fixed_regs[RETURN_REGNUM] = 0;
13907 call_used_regs[RETURN_REGNUM] = 0;
13908 }
13909 if (TARGET_64BIT)
13910 {
13911 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
13912 call_used_regs[i] = call_really_used_regs[i] = 0;
13913 }
13914 else
13915 {
13916 call_used_regs[FPR4_REGNUM] = call_really_used_regs[FPR4_REGNUM] = 0;
13917 call_used_regs[FPR6_REGNUM] = call_really_used_regs[FPR6_REGNUM] = 0;
13918 }
13919
13920 if (TARGET_SOFT_FLOAT)
13921 {
13922 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
13923 call_used_regs[i] = fixed_regs[i] = 1;
13924 }
13925
13926 /* Disable v16 - v31 for non-vector target. */
13927 if (!TARGET_VX)
13928 {
13929 for (i = VR16_REGNUM; i <= VR31_REGNUM; i++)
13930 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
13931 }
13932 }
13933
13934 /* Corresponding function to eh_return expander. */
13935
13936 static GTY(()) rtx s390_tpf_eh_return_symbol;
13937 void
s390_emit_tpf_eh_return(rtx target)13938 s390_emit_tpf_eh_return (rtx target)
13939 {
13940 rtx_insn *insn;
13941 rtx reg, orig_ra;
13942
13943 if (!s390_tpf_eh_return_symbol)
13944 s390_tpf_eh_return_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tpf_eh_return");
13945
13946 reg = gen_rtx_REG (Pmode, 2);
13947 orig_ra = gen_rtx_REG (Pmode, 3);
13948
13949 emit_move_insn (reg, target);
13950 emit_move_insn (orig_ra, get_hard_reg_initial_val (Pmode, RETURN_REGNUM));
13951 insn = s390_emit_call (s390_tpf_eh_return_symbol, NULL_RTX, reg,
13952 gen_rtx_REG (Pmode, RETURN_REGNUM));
13953 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
13954 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), orig_ra);
13955
13956 emit_move_insn (EH_RETURN_HANDLER_RTX, reg);
13957 }
13958
13959 /* Rework the prologue/epilogue to avoid saving/restoring
13960 registers unnecessarily. */
13961
13962 static void
s390_optimize_prologue(void)13963 s390_optimize_prologue (void)
13964 {
13965 rtx_insn *insn, *new_insn, *next_insn;
13966
13967 /* Do a final recompute of the frame-related data. */
13968 s390_optimize_register_info ();
13969
13970 /* If all special registers are in fact used, there's nothing we
13971 can do, so no point in walking the insn list. */
13972
13973 if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM
13974 && cfun_frame_layout.last_save_gpr >= BASE_REGNUM
13975 && (TARGET_CPU_ZARCH
13976 || (cfun_frame_layout.first_save_gpr <= RETURN_REGNUM
13977 && cfun_frame_layout.last_save_gpr >= RETURN_REGNUM)))
13978 return;
13979
13980 /* Search for prologue/epilogue insns and replace them. */
13981
13982 for (insn = get_insns (); insn; insn = next_insn)
13983 {
13984 int first, last, off;
13985 rtx set, base, offset;
13986 rtx pat;
13987
13988 next_insn = NEXT_INSN (insn);
13989
13990 if (! NONJUMP_INSN_P (insn) || ! RTX_FRAME_RELATED_P (insn))
13991 continue;
13992
13993 pat = PATTERN (insn);
13994
13995 /* Remove ldgr/lgdr instructions used for saving and restore
13996 GPRs if possible. */
13997 if (TARGET_Z10)
13998 {
13999 rtx tmp_pat = pat;
14000
14001 if (INSN_CODE (insn) == CODE_FOR_stack_restore_from_fpr)
14002 tmp_pat = XVECEXP (pat, 0, 0);
14003
14004 if (GET_CODE (tmp_pat) == SET
14005 && GET_MODE (SET_SRC (tmp_pat)) == DImode
14006 && REG_P (SET_SRC (tmp_pat))
14007 && REG_P (SET_DEST (tmp_pat)))
14008 {
14009 int src_regno = REGNO (SET_SRC (tmp_pat));
14010 int dest_regno = REGNO (SET_DEST (tmp_pat));
14011 int gpr_regno;
14012 int fpr_regno;
14013
14014 if (!((GENERAL_REGNO_P (src_regno)
14015 && FP_REGNO_P (dest_regno))
14016 || (FP_REGNO_P (src_regno)
14017 && GENERAL_REGNO_P (dest_regno))))
14018 continue;
14019
14020 gpr_regno = GENERAL_REGNO_P (src_regno) ? src_regno : dest_regno;
14021 fpr_regno = FP_REGNO_P (src_regno) ? src_regno : dest_regno;
14022
14023 /* GPR must be call-saved, FPR must be call-clobbered. */
14024 if (!call_really_used_regs[fpr_regno]
14025 || call_really_used_regs[gpr_regno])
14026 continue;
14027
14028 /* It must not happen that what we once saved in an FPR now
14029 needs a stack slot. */
14030 gcc_assert (cfun_gpr_save_slot (gpr_regno) != SAVE_SLOT_STACK);
14031
14032 if (cfun_gpr_save_slot (gpr_regno) == SAVE_SLOT_NONE)
14033 {
14034 remove_insn (insn);
14035 continue;
14036 }
14037 }
14038 }
14039
14040 if (GET_CODE (pat) == PARALLEL
14041 && store_multiple_operation (pat, VOIDmode))
14042 {
14043 set = XVECEXP (pat, 0, 0);
14044 first = REGNO (SET_SRC (set));
14045 last = first + XVECLEN (pat, 0) - 1;
14046 offset = const0_rtx;
14047 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
14048 off = INTVAL (offset);
14049
14050 if (GET_CODE (base) != REG || off < 0)
14051 continue;
14052 if (cfun_frame_layout.first_save_gpr != -1
14053 && (cfun_frame_layout.first_save_gpr < first
14054 || cfun_frame_layout.last_save_gpr > last))
14055 continue;
14056 if (REGNO (base) != STACK_POINTER_REGNUM
14057 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
14058 continue;
14059 if (first > BASE_REGNUM || last < BASE_REGNUM)
14060 continue;
14061
14062 if (cfun_frame_layout.first_save_gpr != -1)
14063 {
14064 rtx s_pat = save_gprs (base,
14065 off + (cfun_frame_layout.first_save_gpr
14066 - first) * UNITS_PER_LONG,
14067 cfun_frame_layout.first_save_gpr,
14068 cfun_frame_layout.last_save_gpr);
14069 new_insn = emit_insn_before (s_pat, insn);
14070 INSN_ADDRESSES_NEW (new_insn, -1);
14071 }
14072
14073 remove_insn (insn);
14074 continue;
14075 }
14076
14077 if (cfun_frame_layout.first_save_gpr == -1
14078 && GET_CODE (pat) == SET
14079 && GENERAL_REG_P (SET_SRC (pat))
14080 && GET_CODE (SET_DEST (pat)) == MEM)
14081 {
14082 set = pat;
14083 first = REGNO (SET_SRC (set));
14084 offset = const0_rtx;
14085 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
14086 off = INTVAL (offset);
14087
14088 if (GET_CODE (base) != REG || off < 0)
14089 continue;
14090 if (REGNO (base) != STACK_POINTER_REGNUM
14091 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
14092 continue;
14093
14094 remove_insn (insn);
14095 continue;
14096 }
14097
14098 if (GET_CODE (pat) == PARALLEL
14099 && load_multiple_operation (pat, VOIDmode))
14100 {
14101 set = XVECEXP (pat, 0, 0);
14102 first = REGNO (SET_DEST (set));
14103 last = first + XVECLEN (pat, 0) - 1;
14104 offset = const0_rtx;
14105 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
14106 off = INTVAL (offset);
14107
14108 if (GET_CODE (base) != REG || off < 0)
14109 continue;
14110
14111 if (cfun_frame_layout.first_restore_gpr != -1
14112 && (cfun_frame_layout.first_restore_gpr < first
14113 || cfun_frame_layout.last_restore_gpr > last))
14114 continue;
14115 if (REGNO (base) != STACK_POINTER_REGNUM
14116 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
14117 continue;
14118 if (first > BASE_REGNUM || last < BASE_REGNUM)
14119 continue;
14120
14121 if (cfun_frame_layout.first_restore_gpr != -1)
14122 {
14123 rtx rpat = restore_gprs (base,
14124 off + (cfun_frame_layout.first_restore_gpr
14125 - first) * UNITS_PER_LONG,
14126 cfun_frame_layout.first_restore_gpr,
14127 cfun_frame_layout.last_restore_gpr);
14128
14129 /* Remove REG_CFA_RESTOREs for registers that we no
14130 longer need to save. */
14131 REG_NOTES (rpat) = REG_NOTES (insn);
14132 for (rtx *ptr = ®_NOTES (rpat); *ptr; )
14133 if (REG_NOTE_KIND (*ptr) == REG_CFA_RESTORE
14134 && ((int) REGNO (XEXP (*ptr, 0))
14135 < cfun_frame_layout.first_restore_gpr))
14136 *ptr = XEXP (*ptr, 1);
14137 else
14138 ptr = &XEXP (*ptr, 1);
14139 new_insn = emit_insn_before (rpat, insn);
14140 RTX_FRAME_RELATED_P (new_insn) = 1;
14141 INSN_ADDRESSES_NEW (new_insn, -1);
14142 }
14143
14144 remove_insn (insn);
14145 continue;
14146 }
14147
14148 if (cfun_frame_layout.first_restore_gpr == -1
14149 && GET_CODE (pat) == SET
14150 && GENERAL_REG_P (SET_DEST (pat))
14151 && GET_CODE (SET_SRC (pat)) == MEM)
14152 {
14153 set = pat;
14154 first = REGNO (SET_DEST (set));
14155 offset = const0_rtx;
14156 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
14157 off = INTVAL (offset);
14158
14159 if (GET_CODE (base) != REG || off < 0)
14160 continue;
14161
14162 if (REGNO (base) != STACK_POINTER_REGNUM
14163 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
14164 continue;
14165
14166 remove_insn (insn);
14167 continue;
14168 }
14169 }
14170 }
14171
14172 /* On z10 and later the dynamic branch prediction must see the
14173 backward jump within a certain windows. If not it falls back to
14174 the static prediction. This function rearranges the loop backward
14175 branch in a way which makes the static prediction always correct.
14176 The function returns true if it added an instruction. */
14177 static bool
s390_fix_long_loop_prediction(rtx_insn * insn)14178 s390_fix_long_loop_prediction (rtx_insn *insn)
14179 {
14180 rtx set = single_set (insn);
14181 rtx code_label, label_ref;
14182 rtx_insn *uncond_jump;
14183 rtx_insn *cur_insn;
14184 rtx tmp;
14185 int distance;
14186
14187 /* This will exclude branch on count and branch on index patterns
14188 since these are correctly statically predicted. */
14189 if (!set
14190 || SET_DEST (set) != pc_rtx
14191 || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
14192 return false;
14193
14194 /* Skip conditional returns. */
14195 if (ANY_RETURN_P (XEXP (SET_SRC (set), 1))
14196 && XEXP (SET_SRC (set), 2) == pc_rtx)
14197 return false;
14198
14199 label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
14200 XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
14201
14202 gcc_assert (GET_CODE (label_ref) == LABEL_REF);
14203
14204 code_label = XEXP (label_ref, 0);
14205
14206 if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
14207 || INSN_ADDRESSES (INSN_UID (insn)) == -1
14208 || (INSN_ADDRESSES (INSN_UID (insn))
14209 - INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE))
14210 return false;
14211
14212 for (distance = 0, cur_insn = PREV_INSN (insn);
14213 distance < PREDICT_DISTANCE - 6;
14214 distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
14215 if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
14216 return false;
14217
14218 rtx_code_label *new_label = gen_label_rtx ();
14219 uncond_jump = emit_jump_insn_after (
14220 gen_rtx_SET (pc_rtx,
14221 gen_rtx_LABEL_REF (VOIDmode, code_label)),
14222 insn);
14223 emit_label_after (new_label, uncond_jump);
14224
14225 tmp = XEXP (SET_SRC (set), 1);
14226 XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
14227 XEXP (SET_SRC (set), 2) = tmp;
14228 INSN_CODE (insn) = -1;
14229
14230 XEXP (label_ref, 0) = new_label;
14231 JUMP_LABEL (insn) = new_label;
14232 JUMP_LABEL (uncond_jump) = code_label;
14233
14234 return true;
14235 }
14236
14237 /* Returns 1 if INSN reads the value of REG for purposes not related
14238 to addressing of memory, and 0 otherwise. */
14239 static int
s390_non_addr_reg_read_p(rtx reg,rtx_insn * insn)14240 s390_non_addr_reg_read_p (rtx reg, rtx_insn *insn)
14241 {
14242 return reg_referenced_p (reg, PATTERN (insn))
14243 && !reg_used_in_mem_p (REGNO (reg), PATTERN (insn));
14244 }
14245
14246 /* Starting from INSN find_cond_jump looks downwards in the insn
14247 stream for a single jump insn which is the last user of the
14248 condition code set in INSN. */
14249 static rtx_insn *
find_cond_jump(rtx_insn * insn)14250 find_cond_jump (rtx_insn *insn)
14251 {
14252 for (; insn; insn = NEXT_INSN (insn))
14253 {
14254 rtx ite, cc;
14255
14256 if (LABEL_P (insn))
14257 break;
14258
14259 if (!JUMP_P (insn))
14260 {
14261 if (reg_mentioned_p (gen_rtx_REG (CCmode, CC_REGNUM), insn))
14262 break;
14263 continue;
14264 }
14265
14266 /* This will be triggered by a return. */
14267 if (GET_CODE (PATTERN (insn)) != SET)
14268 break;
14269
14270 gcc_assert (SET_DEST (PATTERN (insn)) == pc_rtx);
14271 ite = SET_SRC (PATTERN (insn));
14272
14273 if (GET_CODE (ite) != IF_THEN_ELSE)
14274 break;
14275
14276 cc = XEXP (XEXP (ite, 0), 0);
14277 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)))
14278 break;
14279
14280 if (find_reg_note (insn, REG_DEAD, cc))
14281 return insn;
14282 break;
14283 }
14284
14285 return NULL;
14286 }
14287
14288 /* Swap the condition in COND and the operands in OP0 and OP1 so that
14289 the semantics does not change. If NULL_RTX is passed as COND the
14290 function tries to find the conditional jump starting with INSN. */
14291 static void
s390_swap_cmp(rtx cond,rtx * op0,rtx * op1,rtx_insn * insn)14292 s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx_insn *insn)
14293 {
14294 rtx tmp = *op0;
14295
14296 if (cond == NULL_RTX)
14297 {
14298 rtx_insn *jump = find_cond_jump (NEXT_INSN (insn));
14299 rtx set = jump ? single_set (jump) : NULL_RTX;
14300
14301 if (set == NULL_RTX)
14302 return;
14303
14304 cond = XEXP (SET_SRC (set), 0);
14305 }
14306
14307 *op0 = *op1;
14308 *op1 = tmp;
14309 PUT_CODE (cond, swap_condition (GET_CODE (cond)));
14310 }
14311
14312 /* On z10, instructions of the compare-and-branch family have the
14313 property to access the register occurring as second operand with
14314 its bits complemented. If such a compare is grouped with a second
14315 instruction that accesses the same register non-complemented, and
14316 if that register's value is delivered via a bypass, then the
14317 pipeline recycles, thereby causing significant performance decline.
14318 This function locates such situations and exchanges the two
14319 operands of the compare. The function return true whenever it
14320 added an insn. */
14321 static bool
s390_z10_optimize_cmp(rtx_insn * insn)14322 s390_z10_optimize_cmp (rtx_insn *insn)
14323 {
14324 rtx_insn *prev_insn, *next_insn;
14325 bool insn_added_p = false;
14326 rtx cond, *op0, *op1;
14327
14328 if (GET_CODE (PATTERN (insn)) == PARALLEL)
14329 {
14330 /* Handle compare and branch and branch on count
14331 instructions. */
14332 rtx pattern = single_set (insn);
14333
14334 if (!pattern
14335 || SET_DEST (pattern) != pc_rtx
14336 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
14337 return false;
14338
14339 cond = XEXP (SET_SRC (pattern), 0);
14340 op0 = &XEXP (cond, 0);
14341 op1 = &XEXP (cond, 1);
14342 }
14343 else if (GET_CODE (PATTERN (insn)) == SET)
14344 {
14345 rtx src, dest;
14346
14347 /* Handle normal compare instructions. */
14348 src = SET_SRC (PATTERN (insn));
14349 dest = SET_DEST (PATTERN (insn));
14350
14351 if (!REG_P (dest)
14352 || !CC_REGNO_P (REGNO (dest))
14353 || GET_CODE (src) != COMPARE)
14354 return false;
14355
14356 /* s390_swap_cmp will try to find the conditional
14357 jump when passing NULL_RTX as condition. */
14358 cond = NULL_RTX;
14359 op0 = &XEXP (src, 0);
14360 op1 = &XEXP (src, 1);
14361 }
14362 else
14363 return false;
14364
14365 if (!REG_P (*op0) || !REG_P (*op1))
14366 return false;
14367
14368 if (GET_MODE_CLASS (GET_MODE (*op0)) != MODE_INT)
14369 return false;
14370
14371 /* Swap the COMPARE arguments and its mask if there is a
14372 conflicting access in the previous insn. */
14373 prev_insn = prev_active_insn (insn);
14374 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
14375 && reg_referenced_p (*op1, PATTERN (prev_insn)))
14376 s390_swap_cmp (cond, op0, op1, insn);
14377
14378 /* Check if there is a conflict with the next insn. If there
14379 was no conflict with the previous insn, then swap the
14380 COMPARE arguments and its mask. If we already swapped
14381 the operands, or if swapping them would cause a conflict
14382 with the previous insn, issue a NOP after the COMPARE in
14383 order to separate the two instuctions. */
14384 next_insn = next_active_insn (insn);
14385 if (next_insn != NULL_RTX && INSN_P (next_insn)
14386 && s390_non_addr_reg_read_p (*op1, next_insn))
14387 {
14388 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
14389 && s390_non_addr_reg_read_p (*op0, prev_insn))
14390 {
14391 if (REGNO (*op1) == 0)
14392 emit_insn_after (gen_nop_lr1 (), insn);
14393 else
14394 emit_insn_after (gen_nop_lr0 (), insn);
14395 insn_added_p = true;
14396 }
14397 else
14398 s390_swap_cmp (cond, op0, op1, insn);
14399 }
14400 return insn_added_p;
14401 }
14402
14403 /* Number of INSNs to be scanned backward in the last BB of the loop
14404 and forward in the first BB of the loop. This usually should be a
14405 bit more than the number of INSNs which could go into one
14406 group. */
14407 #define S390_OSC_SCAN_INSN_NUM 5
14408
14409 /* Scan LOOP for static OSC collisions and return true if a osc_break
14410 should be issued for this loop. */
14411 static bool
s390_adjust_loop_scan_osc(struct loop * loop)14412 s390_adjust_loop_scan_osc (struct loop* loop)
14413
14414 {
14415 HARD_REG_SET modregs, newregs;
14416 rtx_insn *insn, *store_insn = NULL;
14417 rtx set;
14418 struct s390_address addr_store, addr_load;
14419 subrtx_iterator::array_type array;
14420 int insn_count;
14421
14422 CLEAR_HARD_REG_SET (modregs);
14423
14424 insn_count = 0;
14425 FOR_BB_INSNS_REVERSE (loop->latch, insn)
14426 {
14427 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14428 continue;
14429
14430 insn_count++;
14431 if (insn_count > S390_OSC_SCAN_INSN_NUM)
14432 return false;
14433
14434 find_all_hard_reg_sets (insn, &newregs, true);
14435 IOR_HARD_REG_SET (modregs, newregs);
14436
14437 set = single_set (insn);
14438 if (!set)
14439 continue;
14440
14441 if (MEM_P (SET_DEST (set))
14442 && s390_decompose_address (XEXP (SET_DEST (set), 0), &addr_store))
14443 {
14444 store_insn = insn;
14445 break;
14446 }
14447 }
14448
14449 if (store_insn == NULL_RTX)
14450 return false;
14451
14452 insn_count = 0;
14453 FOR_BB_INSNS (loop->header, insn)
14454 {
14455 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14456 continue;
14457
14458 if (insn == store_insn)
14459 return false;
14460
14461 insn_count++;
14462 if (insn_count > S390_OSC_SCAN_INSN_NUM)
14463 return false;
14464
14465 find_all_hard_reg_sets (insn, &newregs, true);
14466 IOR_HARD_REG_SET (modregs, newregs);
14467
14468 set = single_set (insn);
14469 if (!set)
14470 continue;
14471
14472 /* An intermediate store disrupts static OSC checking
14473 anyway. */
14474 if (MEM_P (SET_DEST (set))
14475 && s390_decompose_address (XEXP (SET_DEST (set), 0), NULL))
14476 return false;
14477
14478 FOR_EACH_SUBRTX (iter, array, SET_SRC (set), NONCONST)
14479 if (MEM_P (*iter)
14480 && s390_decompose_address (XEXP (*iter, 0), &addr_load)
14481 && rtx_equal_p (addr_load.base, addr_store.base)
14482 && rtx_equal_p (addr_load.indx, addr_store.indx)
14483 && rtx_equal_p (addr_load.disp, addr_store.disp))
14484 {
14485 if ((addr_load.base != NULL_RTX
14486 && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.base)))
14487 || (addr_load.indx != NULL_RTX
14488 && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.indx))))
14489 return true;
14490 }
14491 }
14492 return false;
14493 }
14494
14495 /* Look for adjustments which can be done on simple innermost
14496 loops. */
14497 static void
s390_adjust_loops()14498 s390_adjust_loops ()
14499 {
14500 struct loop *loop = NULL;
14501
14502 df_analyze ();
14503 compute_bb_for_insn ();
14504
14505 /* Find the loops. */
14506 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
14507
14508 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
14509 {
14510 if (dump_file)
14511 {
14512 flow_loop_dump (loop, dump_file, NULL, 0);
14513 fprintf (dump_file, ";; OSC loop scan Loop: ");
14514 }
14515 if (loop->latch == NULL
14516 || pc_set (BB_END (loop->latch)) == NULL_RTX
14517 || !s390_adjust_loop_scan_osc (loop))
14518 {
14519 if (dump_file)
14520 {
14521 if (loop->latch == NULL)
14522 fprintf (dump_file, " muliple backward jumps\n");
14523 else
14524 {
14525 fprintf (dump_file, " header insn: %d latch insn: %d ",
14526 INSN_UID (BB_HEAD (loop->header)),
14527 INSN_UID (BB_END (loop->latch)));
14528 if (pc_set (BB_END (loop->latch)) == NULL_RTX)
14529 fprintf (dump_file, " loop does not end with jump\n");
14530 else
14531 fprintf (dump_file, " not instrumented\n");
14532 }
14533 }
14534 }
14535 else
14536 {
14537 rtx_insn *new_insn;
14538
14539 if (dump_file)
14540 fprintf (dump_file, " adding OSC break insn: ");
14541 new_insn = emit_insn_before (gen_osc_break (),
14542 BB_END (loop->latch));
14543 INSN_ADDRESSES_NEW (new_insn, -1);
14544 }
14545 }
14546
14547 loop_optimizer_finalize ();
14548
14549 df_finish_pass (false);
14550 }
14551
14552 /* Perform machine-dependent processing. */
14553
14554 static void
s390_reorg(void)14555 s390_reorg (void)
14556 {
14557 bool pool_overflow = false;
14558 int hw_before, hw_after;
14559
14560 if (s390_tune == PROCESSOR_2964_Z13)
14561 s390_adjust_loops ();
14562
14563 /* Make sure all splits have been performed; splits after
14564 machine_dependent_reorg might confuse insn length counts. */
14565 split_all_insns_noflow ();
14566
14567 /* Install the main literal pool and the associated base
14568 register load insns.
14569
14570 In addition, there are two problematic situations we need
14571 to correct:
14572
14573 - the literal pool might be > 4096 bytes in size, so that
14574 some of its elements cannot be directly accessed
14575
14576 - a branch target might be > 64K away from the branch, so that
14577 it is not possible to use a PC-relative instruction.
14578
14579 To fix those, we split the single literal pool into multiple
14580 pool chunks, reloading the pool base register at various
14581 points throughout the function to ensure it always points to
14582 the pool chunk the following code expects, and / or replace
14583 PC-relative branches by absolute branches.
14584
14585 However, the two problems are interdependent: splitting the
14586 literal pool can move a branch further away from its target,
14587 causing the 64K limit to overflow, and on the other hand,
14588 replacing a PC-relative branch by an absolute branch means
14589 we need to put the branch target address into the literal
14590 pool, possibly causing it to overflow.
14591
14592 So, we loop trying to fix up both problems until we manage
14593 to satisfy both conditions at the same time. Note that the
14594 loop is guaranteed to terminate as every pass of the loop
14595 strictly decreases the total number of PC-relative branches
14596 in the function. (This is not completely true as there
14597 might be branch-over-pool insns introduced by chunkify_start.
14598 Those never need to be split however.) */
14599
14600 for (;;)
14601 {
14602 struct constant_pool *pool = NULL;
14603
14604 /* Collect the literal pool. */
14605 if (!pool_overflow)
14606 {
14607 pool = s390_mainpool_start ();
14608 if (!pool)
14609 pool_overflow = true;
14610 }
14611
14612 /* If literal pool overflowed, start to chunkify it. */
14613 if (pool_overflow)
14614 pool = s390_chunkify_start ();
14615
14616 /* Split out-of-range branches. If this has created new
14617 literal pool entries, cancel current chunk list and
14618 recompute it. zSeries machines have large branch
14619 instructions, so we never need to split a branch. */
14620 if (!TARGET_CPU_ZARCH && s390_split_branches ())
14621 {
14622 if (pool_overflow)
14623 s390_chunkify_cancel (pool);
14624 else
14625 s390_mainpool_cancel (pool);
14626
14627 continue;
14628 }
14629
14630 /* If we made it up to here, both conditions are satisfied.
14631 Finish up literal pool related changes. */
14632 if (pool_overflow)
14633 s390_chunkify_finish (pool);
14634 else
14635 s390_mainpool_finish (pool);
14636
14637 /* We're done splitting branches. */
14638 cfun->machine->split_branches_pending_p = false;
14639 break;
14640 }
14641
14642 /* Generate out-of-pool execute target insns. */
14643 if (TARGET_CPU_ZARCH)
14644 {
14645 rtx_insn *insn, *target;
14646 rtx label;
14647
14648 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14649 {
14650 label = s390_execute_label (insn);
14651 if (!label)
14652 continue;
14653
14654 gcc_assert (label != const0_rtx);
14655
14656 target = emit_label (XEXP (label, 0));
14657 INSN_ADDRESSES_NEW (target, -1);
14658
14659 if (JUMP_P (insn))
14660 {
14661 target = emit_jump_insn (s390_execute_target (insn));
14662 /* This is important in order to keep a table jump
14663 pointing at the jump table label. Only this makes it
14664 being recognized as table jump. */
14665 JUMP_LABEL (target) = JUMP_LABEL (insn);
14666 }
14667 else
14668 target = emit_insn (s390_execute_target (insn));
14669 INSN_ADDRESSES_NEW (target, -1);
14670 }
14671 }
14672
14673 /* Try to optimize prologue and epilogue further. */
14674 s390_optimize_prologue ();
14675
14676 /* Walk over the insns and do some >=z10 specific changes. */
14677 if (s390_tune >= PROCESSOR_2097_Z10)
14678 {
14679 rtx_insn *insn;
14680 bool insn_added_p = false;
14681
14682 /* The insn lengths and addresses have to be up to date for the
14683 following manipulations. */
14684 shorten_branches (get_insns ());
14685
14686 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14687 {
14688 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14689 continue;
14690
14691 if (JUMP_P (insn))
14692 insn_added_p |= s390_fix_long_loop_prediction (insn);
14693
14694 if ((GET_CODE (PATTERN (insn)) == PARALLEL
14695 || GET_CODE (PATTERN (insn)) == SET)
14696 && s390_tune == PROCESSOR_2097_Z10)
14697 insn_added_p |= s390_z10_optimize_cmp (insn);
14698 }
14699
14700 /* Adjust branches if we added new instructions. */
14701 if (insn_added_p)
14702 shorten_branches (get_insns ());
14703 }
14704
14705 s390_function_num_hotpatch_hw (current_function_decl, &hw_before, &hw_after);
14706 if (hw_after > 0)
14707 {
14708 rtx_insn *insn;
14709
14710 /* Insert NOPs for hotpatching. */
14711 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14712 /* Emit NOPs
14713 1. inside the area covered by debug information to allow setting
14714 breakpoints at the NOPs,
14715 2. before any insn which results in an asm instruction,
14716 3. before in-function labels to avoid jumping to the NOPs, for
14717 example as part of a loop,
14718 4. before any barrier in case the function is completely empty
14719 (__builtin_unreachable ()) and has neither internal labels nor
14720 active insns.
14721 */
14722 if (active_insn_p (insn) || BARRIER_P (insn) || LABEL_P (insn))
14723 break;
14724 /* Output a series of NOPs before the first active insn. */
14725 while (insn && hw_after > 0)
14726 {
14727 if (hw_after >= 3 && TARGET_CPU_ZARCH)
14728 {
14729 emit_insn_before (gen_nop_6_byte (), insn);
14730 hw_after -= 3;
14731 }
14732 else if (hw_after >= 2)
14733 {
14734 emit_insn_before (gen_nop_4_byte (), insn);
14735 hw_after -= 2;
14736 }
14737 else
14738 {
14739 emit_insn_before (gen_nop_2_byte (), insn);
14740 hw_after -= 1;
14741 }
14742 }
14743 }
14744 }
14745
14746 /* Return true if INSN is a fp load insn writing register REGNO. */
14747 static inline bool
s390_fpload_toreg(rtx_insn * insn,unsigned int regno)14748 s390_fpload_toreg (rtx_insn *insn, unsigned int regno)
14749 {
14750 rtx set;
14751 enum attr_type flag = s390_safe_attr_type (insn);
14752
14753 if (flag != TYPE_FLOADSF && flag != TYPE_FLOADDF)
14754 return false;
14755
14756 set = single_set (insn);
14757
14758 if (set == NULL_RTX)
14759 return false;
14760
14761 if (!REG_P (SET_DEST (set)) || !MEM_P (SET_SRC (set)))
14762 return false;
14763
14764 if (REGNO (SET_DEST (set)) != regno)
14765 return false;
14766
14767 return true;
14768 }
14769
14770 /* This value describes the distance to be avoided between an
14771 arithmetic fp instruction and an fp load writing the same register.
14772 Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
14773 fine but the exact value has to be avoided. Otherwise the FP
14774 pipeline will throw an exception causing a major penalty. */
14775 #define Z10_EARLYLOAD_DISTANCE 7
14776
14777 /* Rearrange the ready list in order to avoid the situation described
14778 for Z10_EARLYLOAD_DISTANCE. A problematic load instruction is
14779 moved to the very end of the ready list. */
14780 static void
s390_z10_prevent_earlyload_conflicts(rtx_insn ** ready,int * nready_p)14781 s390_z10_prevent_earlyload_conflicts (rtx_insn **ready, int *nready_p)
14782 {
14783 unsigned int regno;
14784 int nready = *nready_p;
14785 rtx_insn *tmp;
14786 int i;
14787 rtx_insn *insn;
14788 rtx set;
14789 enum attr_type flag;
14790 int distance;
14791
14792 /* Skip DISTANCE - 1 active insns. */
14793 for (insn = last_scheduled_insn, distance = Z10_EARLYLOAD_DISTANCE - 1;
14794 distance > 0 && insn != NULL_RTX;
14795 distance--, insn = prev_active_insn (insn))
14796 if (CALL_P (insn) || JUMP_P (insn))
14797 return;
14798
14799 if (insn == NULL_RTX)
14800 return;
14801
14802 set = single_set (insn);
14803
14804 if (set == NULL_RTX || !REG_P (SET_DEST (set))
14805 || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) != MODE_FLOAT)
14806 return;
14807
14808 flag = s390_safe_attr_type (insn);
14809
14810 if (flag == TYPE_FLOADSF || flag == TYPE_FLOADDF)
14811 return;
14812
14813 regno = REGNO (SET_DEST (set));
14814 i = nready - 1;
14815
14816 while (!s390_fpload_toreg (ready[i], regno) && i > 0)
14817 i--;
14818
14819 if (!i)
14820 return;
14821
14822 tmp = ready[i];
14823 memmove (&ready[1], &ready[0], sizeof (rtx_insn *) * i);
14824 ready[0] = tmp;
14825 }
14826
14827 /* Returns TRUE if BB is entered via a fallthru edge and all other
14828 incoming edges are less than unlikely. */
14829 static bool
s390_bb_fallthru_entry_likely(basic_block bb)14830 s390_bb_fallthru_entry_likely (basic_block bb)
14831 {
14832 edge e, fallthru_edge;
14833 edge_iterator ei;
14834
14835 if (!bb)
14836 return false;
14837
14838 fallthru_edge = find_fallthru_edge (bb->preds);
14839 if (!fallthru_edge)
14840 return false;
14841
14842 FOR_EACH_EDGE (e, ei, bb->preds)
14843 if (e != fallthru_edge
14844 && e->probability >= profile_probability::unlikely ())
14845 return false;
14846
14847 return true;
14848 }
14849
14850 /* The s390_sched_state variable tracks the state of the current or
14851 the last instruction group.
14852
14853 0,1,2 number of instructions scheduled in the current group
14854 3 the last group is complete - normal insns
14855 4 the last group was a cracked/expanded insn */
14856
14857 static int s390_sched_state = 0;
14858
14859 #define S390_SCHED_STATE_NORMAL 3
14860 #define S390_SCHED_STATE_CRACKED 4
14861
14862 #define S390_SCHED_ATTR_MASK_CRACKED 0x1
14863 #define S390_SCHED_ATTR_MASK_EXPANDED 0x2
14864 #define S390_SCHED_ATTR_MASK_ENDGROUP 0x4
14865 #define S390_SCHED_ATTR_MASK_GROUPALONE 0x8
14866
14867 static unsigned int
s390_get_sched_attrmask(rtx_insn * insn)14868 s390_get_sched_attrmask (rtx_insn *insn)
14869 {
14870 unsigned int mask = 0;
14871
14872 switch (s390_tune)
14873 {
14874 case PROCESSOR_2827_ZEC12:
14875 if (get_attr_zEC12_cracked (insn))
14876 mask |= S390_SCHED_ATTR_MASK_CRACKED;
14877 if (get_attr_zEC12_expanded (insn))
14878 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14879 if (get_attr_zEC12_endgroup (insn))
14880 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14881 if (get_attr_zEC12_groupalone (insn))
14882 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14883 break;
14884 case PROCESSOR_2964_Z13:
14885 case PROCESSOR_3906_Z14:
14886 if (get_attr_z13_cracked (insn))
14887 mask |= S390_SCHED_ATTR_MASK_CRACKED;
14888 if (get_attr_z13_expanded (insn))
14889 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14890 if (get_attr_z13_endgroup (insn))
14891 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14892 if (get_attr_z13_groupalone (insn))
14893 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14894 break;
14895 default:
14896 gcc_unreachable ();
14897 }
14898 return mask;
14899 }
14900
14901 static unsigned int
s390_get_unit_mask(rtx_insn * insn,int * units)14902 s390_get_unit_mask (rtx_insn *insn, int *units)
14903 {
14904 unsigned int mask = 0;
14905
14906 switch (s390_tune)
14907 {
14908 case PROCESSOR_2964_Z13:
14909 case PROCESSOR_3906_Z14:
14910 *units = 3;
14911 if (get_attr_z13_unit_lsu (insn))
14912 mask |= 1 << 0;
14913 if (get_attr_z13_unit_fxu (insn))
14914 mask |= 1 << 1;
14915 if (get_attr_z13_unit_vfu (insn))
14916 mask |= 1 << 2;
14917 break;
14918 default:
14919 gcc_unreachable ();
14920 }
14921 return mask;
14922 }
14923
14924 /* Return the scheduling score for INSN. The higher the score the
14925 better. The score is calculated from the OOO scheduling attributes
14926 of INSN and the scheduling state s390_sched_state. */
14927 static int
s390_sched_score(rtx_insn * insn)14928 s390_sched_score (rtx_insn *insn)
14929 {
14930 unsigned int mask = s390_get_sched_attrmask (insn);
14931 int score = 0;
14932
14933 switch (s390_sched_state)
14934 {
14935 case 0:
14936 /* Try to put insns into the first slot which would otherwise
14937 break a group. */
14938 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14939 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
14940 score += 5;
14941 if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
14942 score += 10;
14943 /* fallthrough */
14944 case 1:
14945 /* Prefer not cracked insns while trying to put together a
14946 group. */
14947 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14948 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
14949 && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
14950 score += 10;
14951 if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) == 0)
14952 score += 5;
14953 break;
14954 case 2:
14955 /* Prefer not cracked insns while trying to put together a
14956 group. */
14957 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14958 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
14959 && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
14960 score += 10;
14961 /* Prefer endgroup insns in the last slot. */
14962 if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0)
14963 score += 10;
14964 break;
14965 case S390_SCHED_STATE_NORMAL:
14966 /* Prefer not cracked insns if the last was not cracked. */
14967 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14968 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0)
14969 score += 5;
14970 if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
14971 score += 10;
14972 break;
14973 case S390_SCHED_STATE_CRACKED:
14974 /* Try to keep cracked insns together to prevent them from
14975 interrupting groups. */
14976 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14977 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
14978 score += 5;
14979 break;
14980 }
14981
14982 if (s390_tune >= PROCESSOR_2964_Z13)
14983 {
14984 int units, i;
14985 unsigned unit_mask, m = 1;
14986
14987 unit_mask = s390_get_unit_mask (insn, &units);
14988 gcc_assert (units <= MAX_SCHED_UNITS);
14989
14990 /* Add a score in range 0..MAX_SCHED_MIX_SCORE depending on how long
14991 ago the last insn of this unit type got scheduled. This is
14992 supposed to help providing a proper instruction mix to the
14993 CPU. */
14994 for (i = 0; i < units; i++, m <<= 1)
14995 if (m & unit_mask)
14996 score += (last_scheduled_unit_distance[i] * MAX_SCHED_MIX_SCORE /
14997 MAX_SCHED_MIX_DISTANCE);
14998
14999 unsigned latency = insn_default_latency (insn);
15000
15001 int other_side = 1 - current_side;
15002
15003 /* Try to delay long-running insns when side is busy. */
15004 if (latency > LONGRUNNING_THRESHOLD)
15005 {
15006 if (get_attr_z13_unit_fxu (insn) && fxu_longrunning[current_side]
15007 && fxu_longrunning[other_side] <= fxu_longrunning[current_side])
15008 score = MAX (0, score - 10);
15009
15010 if (get_attr_z13_unit_vfu (insn) && vfu_longrunning[current_side]
15011 && vfu_longrunning[other_side] <= vfu_longrunning[current_side])
15012 score = MAX (0, score - 10);
15013 }
15014 }
15015
15016 return score;
15017 }
15018
15019 /* This function is called via hook TARGET_SCHED_REORDER before
15020 issuing one insn from list READY which contains *NREADYP entries.
15021 For target z10 it reorders load instructions to avoid early load
15022 conflicts in the floating point pipeline */
15023 static int
s390_sched_reorder(FILE * file,int verbose,rtx_insn ** ready,int * nreadyp,int clock ATTRIBUTE_UNUSED)15024 s390_sched_reorder (FILE *file, int verbose,
15025 rtx_insn **ready, int *nreadyp, int clock ATTRIBUTE_UNUSED)
15026 {
15027 if (s390_tune == PROCESSOR_2097_Z10
15028 && reload_completed
15029 && *nreadyp > 1)
15030 s390_z10_prevent_earlyload_conflicts (ready, nreadyp);
15031
15032 if (s390_tune >= PROCESSOR_2827_ZEC12
15033 && reload_completed
15034 && *nreadyp > 1)
15035 {
15036 int i;
15037 int last_index = *nreadyp - 1;
15038 int max_index = -1;
15039 int max_score = -1;
15040 rtx_insn *tmp;
15041
15042 /* Just move the insn with the highest score to the top (the
15043 end) of the list. A full sort is not needed since a conflict
15044 in the hazard recognition cannot happen. So the top insn in
15045 the ready list will always be taken. */
15046 for (i = last_index; i >= 0; i--)
15047 {
15048 int score;
15049
15050 if (recog_memoized (ready[i]) < 0)
15051 continue;
15052
15053 score = s390_sched_score (ready[i]);
15054 if (score > max_score)
15055 {
15056 max_score = score;
15057 max_index = i;
15058 }
15059 }
15060
15061 if (max_index != -1)
15062 {
15063 if (max_index != last_index)
15064 {
15065 tmp = ready[max_index];
15066 ready[max_index] = ready[last_index];
15067 ready[last_index] = tmp;
15068
15069 if (verbose > 5)
15070 fprintf (file,
15071 ";;\t\tBACKEND: move insn %d to the top of list\n",
15072 INSN_UID (ready[last_index]));
15073 }
15074 else if (verbose > 5)
15075 fprintf (file,
15076 ";;\t\tBACKEND: best insn %d already on top\n",
15077 INSN_UID (ready[last_index]));
15078 }
15079
15080 if (verbose > 5)
15081 {
15082 fprintf (file, "ready list ooo attributes - sched state: %d\n",
15083 s390_sched_state);
15084
15085 for (i = last_index; i >= 0; i--)
15086 {
15087 unsigned int sched_mask;
15088 rtx_insn *insn = ready[i];
15089
15090 if (recog_memoized (insn) < 0)
15091 continue;
15092
15093 sched_mask = s390_get_sched_attrmask (insn);
15094 fprintf (file, ";;\t\tBACKEND: insn %d score: %d: ",
15095 INSN_UID (insn),
15096 s390_sched_score (insn));
15097 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ",\
15098 ((M) & sched_mask) ? #ATTR : "");
15099 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
15100 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
15101 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
15102 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
15103 #undef PRINT_SCHED_ATTR
15104 if (s390_tune >= PROCESSOR_2964_Z13)
15105 {
15106 unsigned int unit_mask, m = 1;
15107 int units, j;
15108
15109 unit_mask = s390_get_unit_mask (insn, &units);
15110 fprintf (file, "(units:");
15111 for (j = 0; j < units; j++, m <<= 1)
15112 if (m & unit_mask)
15113 fprintf (file, " u%d", j);
15114 fprintf (file, ")");
15115 }
15116 fprintf (file, "\n");
15117 }
15118 }
15119 }
15120
15121 return s390_issue_rate ();
15122 }
15123
15124
15125 /* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
15126 the scheduler has issued INSN. It stores the last issued insn into
15127 last_scheduled_insn in order to make it available for
15128 s390_sched_reorder. */
15129 static int
s390_sched_variable_issue(FILE * file,int verbose,rtx_insn * insn,int more)15130 s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
15131 {
15132 last_scheduled_insn = insn;
15133
15134 bool starts_group = false;
15135
15136 if (s390_tune >= PROCESSOR_2827_ZEC12
15137 && reload_completed
15138 && recog_memoized (insn) >= 0)
15139 {
15140 unsigned int mask = s390_get_sched_attrmask (insn);
15141
15142 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
15143 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0
15144 || (mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
15145 starts_group = true;
15146
15147 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
15148 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
15149 s390_sched_state = S390_SCHED_STATE_CRACKED;
15150 else if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0
15151 || (mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
15152 s390_sched_state = S390_SCHED_STATE_NORMAL;
15153 else
15154 {
15155 /* Only normal insns are left (mask == 0). */
15156 switch (s390_sched_state)
15157 {
15158 case 0:
15159 starts_group = true;
15160 /* fallthrough */
15161 case 1:
15162 case 2:
15163 s390_sched_state++;
15164 break;
15165 case S390_SCHED_STATE_NORMAL:
15166 starts_group = true;
15167 s390_sched_state = 1;
15168 break;
15169 case S390_SCHED_STATE_CRACKED:
15170 s390_sched_state = S390_SCHED_STATE_NORMAL;
15171 break;
15172 }
15173 }
15174
15175 if (s390_tune >= PROCESSOR_2964_Z13)
15176 {
15177 int units, i;
15178 unsigned unit_mask, m = 1;
15179
15180 unit_mask = s390_get_unit_mask (insn, &units);
15181 gcc_assert (units <= MAX_SCHED_UNITS);
15182
15183 for (i = 0; i < units; i++, m <<= 1)
15184 if (m & unit_mask)
15185 last_scheduled_unit_distance[i] = 0;
15186 else if (last_scheduled_unit_distance[i] < MAX_SCHED_MIX_DISTANCE)
15187 last_scheduled_unit_distance[i]++;
15188 }
15189
15190 /* If this insn started a new group, the side flipped. */
15191 if (starts_group)
15192 current_side = current_side ? 0 : 1;
15193
15194 for (int i = 0; i < 2; i++)
15195 {
15196 if (fxu_longrunning[i] >= 1)
15197 fxu_longrunning[i] -= 1;
15198 if (vfu_longrunning[i] >= 1)
15199 vfu_longrunning[i] -= 1;
15200 }
15201
15202 unsigned latency = insn_default_latency (insn);
15203 if (latency > LONGRUNNING_THRESHOLD)
15204 {
15205 if (get_attr_z13_unit_fxu (insn))
15206 fxu_longrunning[current_side] = latency * LATENCY_FACTOR;
15207 else
15208 vfu_longrunning[current_side] = latency * LATENCY_FACTOR;
15209 }
15210
15211 if (verbose > 5)
15212 {
15213 unsigned int sched_mask;
15214
15215 sched_mask = s390_get_sched_attrmask (insn);
15216
15217 fprintf (file, ";;\t\tBACKEND: insn %d: ", INSN_UID (insn));
15218 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ", ((M) & sched_mask) ? #ATTR : "");
15219 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
15220 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
15221 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
15222 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
15223 #undef PRINT_SCHED_ATTR
15224
15225 if (s390_tune >= PROCESSOR_2964_Z13)
15226 {
15227 unsigned int unit_mask, m = 1;
15228 int units, j;
15229
15230 unit_mask = s390_get_unit_mask (insn, &units);
15231 fprintf (file, "(units:");
15232 for (j = 0; j < units; j++, m <<= 1)
15233 if (m & unit_mask)
15234 fprintf (file, " %d", j);
15235 fprintf (file, ")");
15236 }
15237 fprintf (file, " sched state: %d\n", s390_sched_state);
15238
15239 if (s390_tune >= PROCESSOR_2964_Z13)
15240 {
15241 int units, j;
15242
15243 s390_get_unit_mask (insn, &units);
15244
15245 fprintf (file, ";;\t\tBACKEND: units unused for: ");
15246 for (j = 0; j < units; j++)
15247 fprintf (file, "%d:%d ", j, last_scheduled_unit_distance[j]);
15248 fprintf (file, "\n");
15249 }
15250 }
15251 }
15252
15253 if (GET_CODE (PATTERN (insn)) != USE
15254 && GET_CODE (PATTERN (insn)) != CLOBBER)
15255 return more - 1;
15256 else
15257 return more;
15258 }
15259
15260 static void
s390_sched_init(FILE * file ATTRIBUTE_UNUSED,int verbose ATTRIBUTE_UNUSED,int max_ready ATTRIBUTE_UNUSED)15261 s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
15262 int verbose ATTRIBUTE_UNUSED,
15263 int max_ready ATTRIBUTE_UNUSED)
15264 {
15265 last_scheduled_insn = NULL;
15266 memset (last_scheduled_unit_distance, 0, MAX_SCHED_UNITS * sizeof (int));
15267
15268 /* If the next basic block is most likely entered via a fallthru edge
15269 we keep the last sched state. Otherwise we start a new group.
15270 The scheduler traverses basic blocks in "instruction stream" ordering
15271 so if we see a fallthru edge here, s390_sched_state will be of its
15272 source block.
15273
15274 current_sched_info->prev_head is the insn before the first insn of the
15275 block of insns to be scheduled.
15276 */
15277 rtx_insn *insn = current_sched_info->prev_head
15278 ? NEXT_INSN (current_sched_info->prev_head) : NULL;
15279 basic_block bb = insn ? BLOCK_FOR_INSN (insn) : NULL;
15280 if (s390_tune < PROCESSOR_2964_Z13 || !s390_bb_fallthru_entry_likely (bb))
15281 s390_sched_state = 0;
15282 }
15283
15284 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
15285 a new number struct loop *loop should be unrolled if tuned for cpus with
15286 a built-in stride prefetcher.
15287 The loop is analyzed for memory accesses by calling check_dpu for
15288 each rtx of the loop. Depending on the loop_depth and the amount of
15289 memory accesses a new number <=nunroll is returned to improve the
15290 behavior of the hardware prefetch unit. */
15291 static unsigned
s390_loop_unroll_adjust(unsigned nunroll,struct loop * loop)15292 s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
15293 {
15294 basic_block *bbs;
15295 rtx_insn *insn;
15296 unsigned i;
15297 unsigned mem_count = 0;
15298
15299 if (s390_tune < PROCESSOR_2097_Z10)
15300 return nunroll;
15301
15302 /* Count the number of memory references within the loop body. */
15303 bbs = get_loop_body (loop);
15304 subrtx_iterator::array_type array;
15305 for (i = 0; i < loop->num_nodes; i++)
15306 FOR_BB_INSNS (bbs[i], insn)
15307 if (INSN_P (insn) && INSN_CODE (insn) != -1)
15308 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
15309 if (MEM_P (*iter))
15310 mem_count += 1;
15311 free (bbs);
15312
15313 /* Prevent division by zero, and we do not need to adjust nunroll in this case. */
15314 if (mem_count == 0)
15315 return nunroll;
15316
15317 switch (loop_depth(loop))
15318 {
15319 case 1:
15320 return MIN (nunroll, 28 / mem_count);
15321 case 2:
15322 return MIN (nunroll, 22 / mem_count);
15323 default:
15324 return MIN (nunroll, 16 / mem_count);
15325 }
15326 }
15327
15328 /* Restore the current options. This is a hook function and also called
15329 internally. */
15330
15331 static void
s390_function_specific_restore(struct gcc_options * opts,struct cl_target_option * ptr ATTRIBUTE_UNUSED)15332 s390_function_specific_restore (struct gcc_options *opts,
15333 struct cl_target_option *ptr ATTRIBUTE_UNUSED)
15334 {
15335 opts->x_s390_cost_pointer = (long)processor_table[opts->x_s390_tune].cost;
15336 }
15337
15338 static void
s390_option_override_internal(bool main_args_p,struct gcc_options * opts,const struct gcc_options * opts_set)15339 s390_option_override_internal (bool main_args_p,
15340 struct gcc_options *opts,
15341 const struct gcc_options *opts_set)
15342 {
15343 const char *prefix;
15344 const char *suffix;
15345
15346 /* Set up prefix/suffix so the error messages refer to either the command
15347 line argument, or the attribute(target). */
15348 if (main_args_p)
15349 {
15350 prefix = "-m";
15351 suffix = "";
15352 }
15353 else
15354 {
15355 prefix = "option(\"";
15356 suffix = "\")";
15357 }
15358
15359
15360 /* Architecture mode defaults according to ABI. */
15361 if (!(opts_set->x_target_flags & MASK_ZARCH))
15362 {
15363 if (TARGET_64BIT)
15364 opts->x_target_flags |= MASK_ZARCH;
15365 else
15366 opts->x_target_flags &= ~MASK_ZARCH;
15367 }
15368
15369 /* Set the march default in case it hasn't been specified on cmdline. */
15370 if (!opts_set->x_s390_arch)
15371 opts->x_s390_arch = PROCESSOR_2064_Z900;
15372 else if (opts->x_s390_arch == PROCESSOR_9672_G5
15373 || opts->x_s390_arch == PROCESSOR_9672_G6)
15374 warning (OPT_Wdeprecated, "%sarch=%s%s is deprecated and will be removed "
15375 "in future releases; use at least %sarch=z900%s",
15376 prefix, opts->x_s390_arch == PROCESSOR_9672_G5 ? "g5" : "g6",
15377 suffix, prefix, suffix);
15378
15379 opts->x_s390_arch_flags = processor_flags_table[(int) opts->x_s390_arch];
15380
15381 /* Determine processor to tune for. */
15382 if (!opts_set->x_s390_tune)
15383 opts->x_s390_tune = opts->x_s390_arch;
15384 else if (opts->x_s390_tune == PROCESSOR_9672_G5
15385 || opts->x_s390_tune == PROCESSOR_9672_G6)
15386 warning (OPT_Wdeprecated, "%stune=%s%s is deprecated and will be removed "
15387 "in future releases; use at least %stune=z900%s",
15388 prefix, opts->x_s390_tune == PROCESSOR_9672_G5 ? "g5" : "g6",
15389 suffix, prefix, suffix);
15390
15391 opts->x_s390_tune_flags = processor_flags_table[opts->x_s390_tune];
15392
15393 /* Sanity checks. */
15394 if (opts->x_s390_arch == PROCESSOR_NATIVE
15395 || opts->x_s390_tune == PROCESSOR_NATIVE)
15396 gcc_unreachable ();
15397 if (TARGET_ZARCH_P (opts->x_target_flags) && !TARGET_CPU_ZARCH_P (opts))
15398 error ("z/Architecture mode not supported on %s",
15399 processor_table[(int)opts->x_s390_arch].name);
15400 if (TARGET_64BIT && !TARGET_ZARCH_P (opts->x_target_flags))
15401 error ("64-bit ABI not supported in ESA/390 mode");
15402
15403 if (opts->x_s390_indirect_branch == indirect_branch_thunk_inline
15404 || opts->x_s390_indirect_branch_call == indirect_branch_thunk_inline
15405 || opts->x_s390_function_return == indirect_branch_thunk_inline
15406 || opts->x_s390_function_return_reg == indirect_branch_thunk_inline
15407 || opts->x_s390_function_return_mem == indirect_branch_thunk_inline)
15408 error ("thunk-inline is only supported with -mindirect-branch-jump");
15409
15410 if (opts->x_s390_indirect_branch != indirect_branch_keep)
15411 {
15412 if (!opts_set->x_s390_indirect_branch_call)
15413 opts->x_s390_indirect_branch_call = opts->x_s390_indirect_branch;
15414
15415 if (!opts_set->x_s390_indirect_branch_jump)
15416 opts->x_s390_indirect_branch_jump = opts->x_s390_indirect_branch;
15417 }
15418
15419 if (opts->x_s390_function_return != indirect_branch_keep)
15420 {
15421 if (!opts_set->x_s390_function_return_reg)
15422 opts->x_s390_function_return_reg = opts->x_s390_function_return;
15423
15424 if (!opts_set->x_s390_function_return_mem)
15425 opts->x_s390_function_return_mem = opts->x_s390_function_return;
15426 }
15427
15428 if (!TARGET_CPU_ZARCH)
15429 {
15430 if (opts->x_s390_indirect_branch_call != indirect_branch_keep
15431 || opts->x_s390_indirect_branch_jump != indirect_branch_keep)
15432 error ("-mindirect-branch* options require -march=z900 or higher");
15433 if (opts->x_s390_function_return_reg != indirect_branch_keep
15434 || opts->x_s390_function_return_mem != indirect_branch_keep)
15435 error ("-mfunction-return* options require -march=z900 or higher");
15436 }
15437
15438
15439 /* Enable hardware transactions if available and not explicitly
15440 disabled by user. E.g. with -m31 -march=zEC12 -mzarch */
15441 if (!TARGET_OPT_HTM_P (opts_set->x_target_flags))
15442 {
15443 if (TARGET_CPU_HTM_P (opts) && TARGET_ZARCH_P (opts->x_target_flags))
15444 opts->x_target_flags |= MASK_OPT_HTM;
15445 else
15446 opts->x_target_flags &= ~MASK_OPT_HTM;
15447 }
15448
15449 if (TARGET_OPT_VX_P (opts_set->x_target_flags))
15450 {
15451 if (TARGET_OPT_VX_P (opts->x_target_flags))
15452 {
15453 if (!TARGET_CPU_VX_P (opts))
15454 error ("hardware vector support not available on %s",
15455 processor_table[(int)opts->x_s390_arch].name);
15456 if (TARGET_SOFT_FLOAT_P (opts->x_target_flags))
15457 error ("hardware vector support not available with -msoft-float");
15458 }
15459 }
15460 else
15461 {
15462 if (TARGET_CPU_VX_P (opts))
15463 /* Enable vector support if available and not explicitly disabled
15464 by user. E.g. with -m31 -march=z13 -mzarch */
15465 opts->x_target_flags |= MASK_OPT_VX;
15466 else
15467 opts->x_target_flags &= ~MASK_OPT_VX;
15468 }
15469
15470 /* Use hardware DFP if available and not explicitly disabled by
15471 user. E.g. with -m31 -march=z10 -mzarch */
15472 if (!TARGET_HARD_DFP_P (opts_set->x_target_flags))
15473 {
15474 if (TARGET_DFP_P (opts))
15475 opts->x_target_flags |= MASK_HARD_DFP;
15476 else
15477 opts->x_target_flags &= ~MASK_HARD_DFP;
15478 }
15479
15480 if (TARGET_HARD_DFP_P (opts->x_target_flags) && !TARGET_DFP_P (opts))
15481 {
15482 if (TARGET_HARD_DFP_P (opts_set->x_target_flags))
15483 {
15484 if (!TARGET_CPU_DFP_P (opts))
15485 error ("hardware decimal floating point instructions"
15486 " not available on %s",
15487 processor_table[(int)opts->x_s390_arch].name);
15488 if (!TARGET_ZARCH_P (opts->x_target_flags))
15489 error ("hardware decimal floating point instructions"
15490 " not available in ESA/390 mode");
15491 }
15492 else
15493 opts->x_target_flags &= ~MASK_HARD_DFP;
15494 }
15495
15496 if (TARGET_SOFT_FLOAT_P (opts_set->x_target_flags)
15497 && TARGET_SOFT_FLOAT_P (opts->x_target_flags))
15498 {
15499 if (TARGET_HARD_DFP_P (opts_set->x_target_flags)
15500 && TARGET_HARD_DFP_P (opts->x_target_flags))
15501 error ("-mhard-dfp can%'t be used in conjunction with -msoft-float");
15502
15503 opts->x_target_flags &= ~MASK_HARD_DFP;
15504 }
15505
15506 if (TARGET_BACKCHAIN_P (opts->x_target_flags)
15507 && TARGET_PACKED_STACK_P (opts->x_target_flags)
15508 && TARGET_HARD_FLOAT_P (opts->x_target_flags))
15509 error ("-mbackchain -mpacked-stack -mhard-float are not supported "
15510 "in combination");
15511
15512 if (opts->x_s390_stack_size)
15513 {
15514 if (opts->x_s390_stack_guard >= opts->x_s390_stack_size)
15515 error ("stack size must be greater than the stack guard value");
15516 else if (opts->x_s390_stack_size > 1 << 16)
15517 error ("stack size must not be greater than 64k");
15518 }
15519 else if (opts->x_s390_stack_guard)
15520 error ("-mstack-guard implies use of -mstack-size");
15521
15522 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
15523 if (!TARGET_LONG_DOUBLE_128_P (opts_set->x_target_flags))
15524 opts->x_target_flags |= MASK_LONG_DOUBLE_128;
15525 #endif
15526
15527 if (opts->x_s390_tune >= PROCESSOR_2097_Z10)
15528 {
15529 maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 100,
15530 opts->x_param_values,
15531 opts_set->x_param_values);
15532 maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 32,
15533 opts->x_param_values,
15534 opts_set->x_param_values);
15535 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 2000,
15536 opts->x_param_values,
15537 opts_set->x_param_values);
15538 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 64,
15539 opts->x_param_values,
15540 opts_set->x_param_values);
15541 }
15542
15543 maybe_set_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 256,
15544 opts->x_param_values,
15545 opts_set->x_param_values);
15546 /* values for loop prefetching */
15547 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, 256,
15548 opts->x_param_values,
15549 opts_set->x_param_values);
15550 maybe_set_param_value (PARAM_L1_CACHE_SIZE, 128,
15551 opts->x_param_values,
15552 opts_set->x_param_values);
15553 /* s390 has more than 2 levels and the size is much larger. Since
15554 we are always running virtualized assume that we only get a small
15555 part of the caches above l1. */
15556 maybe_set_param_value (PARAM_L2_CACHE_SIZE, 1500,
15557 opts->x_param_values,
15558 opts_set->x_param_values);
15559 maybe_set_param_value (PARAM_PREFETCH_MIN_INSN_TO_MEM_RATIO, 2,
15560 opts->x_param_values,
15561 opts_set->x_param_values);
15562 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 6,
15563 opts->x_param_values,
15564 opts_set->x_param_values);
15565
15566 /* Use the alternative scheduling-pressure algorithm by default. */
15567 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
15568 opts->x_param_values,
15569 opts_set->x_param_values);
15570
15571 maybe_set_param_value (PARAM_MIN_VECT_LOOP_BOUND, 2,
15572 opts->x_param_values,
15573 opts_set->x_param_values);
15574
15575 /* Call target specific restore function to do post-init work. At the moment,
15576 this just sets opts->x_s390_cost_pointer. */
15577 s390_function_specific_restore (opts, NULL);
15578 }
15579
15580 static void
s390_option_override(void)15581 s390_option_override (void)
15582 {
15583 unsigned int i;
15584 cl_deferred_option *opt;
15585 vec<cl_deferred_option> *v =
15586 (vec<cl_deferred_option> *) s390_deferred_options;
15587
15588 if (v)
15589 FOR_EACH_VEC_ELT (*v, i, opt)
15590 {
15591 switch (opt->opt_index)
15592 {
15593 case OPT_mhotpatch_:
15594 {
15595 int val1;
15596 int val2;
15597 char *s = strtok (ASTRDUP (opt->arg), ",");
15598 char *t = strtok (NULL, "\0");
15599
15600 if (t != NULL)
15601 {
15602 val1 = integral_argument (s);
15603 val2 = integral_argument (t);
15604 }
15605 else
15606 {
15607 val1 = -1;
15608 val2 = -1;
15609 }
15610 if (val1 == -1 || val2 == -1)
15611 {
15612 /* argument is not a plain number */
15613 error ("arguments to %qs should be non-negative integers",
15614 "-mhotpatch=n,m");
15615 break;
15616 }
15617 else if (val1 > s390_hotpatch_hw_max
15618 || val2 > s390_hotpatch_hw_max)
15619 {
15620 error ("argument to %qs is too large (max. %d)",
15621 "-mhotpatch=n,m", s390_hotpatch_hw_max);
15622 break;
15623 }
15624 s390_hotpatch_hw_before_label = val1;
15625 s390_hotpatch_hw_after_label = val2;
15626 break;
15627 }
15628 default:
15629 gcc_unreachable ();
15630 }
15631 }
15632
15633 /* Set up function hooks. */
15634 init_machine_status = s390_init_machine_status;
15635
15636 s390_option_override_internal (true, &global_options, &global_options_set);
15637
15638 /* Save the initial options in case the user does function specific
15639 options. */
15640 target_option_default_node = build_target_option_node (&global_options);
15641 target_option_current_node = target_option_default_node;
15642
15643 /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
15644 requires the arch flags to be evaluated already. Since prefetching
15645 is beneficial on s390, we enable it if available. */
15646 if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3)
15647 flag_prefetch_loop_arrays = 1;
15648
15649 if (!s390_pic_data_is_text_relative && !flag_pic)
15650 error ("-mno-pic-data-is-text-relative cannot be used without -fpic/-fPIC");
15651
15652 if (TARGET_TPF)
15653 {
15654 /* Don't emit DWARF3/4 unless specifically selected. The TPF
15655 debuggers do not yet support DWARF 3/4. */
15656 if (!global_options_set.x_dwarf_strict)
15657 dwarf_strict = 1;
15658 if (!global_options_set.x_dwarf_version)
15659 dwarf_version = 2;
15660 }
15661
15662 /* Register a target-specific optimization-and-lowering pass
15663 to run immediately before prologue and epilogue generation.
15664
15665 Registering the pass must be done at start up. It's
15666 convenient to do it here. */
15667 opt_pass *new_pass = new pass_s390_early_mach (g);
15668 struct register_pass_info insert_pass_s390_early_mach =
15669 {
15670 new_pass, /* pass */
15671 "pro_and_epilogue", /* reference_pass_name */
15672 1, /* ref_pass_instance_number */
15673 PASS_POS_INSERT_BEFORE /* po_op */
15674 };
15675 register_pass (&insert_pass_s390_early_mach);
15676 }
15677
15678 #if S390_USE_TARGET_ATTRIBUTE
15679 /* Inner function to process the attribute((target(...))), take an argument and
15680 set the current options from the argument. If we have a list, recursively go
15681 over the list. */
15682
15683 static bool
s390_valid_target_attribute_inner_p(tree args,struct gcc_options * opts,struct gcc_options * new_opts_set,bool force_pragma)15684 s390_valid_target_attribute_inner_p (tree args,
15685 struct gcc_options *opts,
15686 struct gcc_options *new_opts_set,
15687 bool force_pragma)
15688 {
15689 char *next_optstr;
15690 bool ret = true;
15691
15692 #define S390_ATTRIB(S,O,A) { S, sizeof (S)-1, O, A, 0 }
15693 #define S390_PRAGMA(S,O,A) { S, sizeof (S)-1, O, A, 1 }
15694 static const struct
15695 {
15696 const char *string;
15697 size_t len;
15698 int opt;
15699 int has_arg;
15700 int only_as_pragma;
15701 } attrs[] = {
15702 /* enum options */
15703 S390_ATTRIB ("arch=", OPT_march_, 1),
15704 S390_ATTRIB ("tune=", OPT_mtune_, 1),
15705 /* uinteger options */
15706 S390_ATTRIB ("stack-guard=", OPT_mstack_guard_, 1),
15707 S390_ATTRIB ("stack-size=", OPT_mstack_size_, 1),
15708 S390_ATTRIB ("branch-cost=", OPT_mbranch_cost_, 1),
15709 S390_ATTRIB ("warn-framesize=", OPT_mwarn_framesize_, 1),
15710 /* flag options */
15711 S390_ATTRIB ("backchain", OPT_mbackchain, 0),
15712 S390_ATTRIB ("hard-dfp", OPT_mhard_dfp, 0),
15713 S390_ATTRIB ("hard-float", OPT_mhard_float, 0),
15714 S390_ATTRIB ("htm", OPT_mhtm, 0),
15715 S390_ATTRIB ("vx", OPT_mvx, 0),
15716 S390_ATTRIB ("packed-stack", OPT_mpacked_stack, 0),
15717 S390_ATTRIB ("small-exec", OPT_msmall_exec, 0),
15718 S390_ATTRIB ("soft-float", OPT_msoft_float, 0),
15719 S390_ATTRIB ("mvcle", OPT_mmvcle, 0),
15720 S390_PRAGMA ("zvector", OPT_mzvector, 0),
15721 /* boolean options */
15722 S390_ATTRIB ("warn-dynamicstack", OPT_mwarn_dynamicstack, 0),
15723 };
15724 #undef S390_ATTRIB
15725 #undef S390_PRAGMA
15726
15727 /* If this is a list, recurse to get the options. */
15728 if (TREE_CODE (args) == TREE_LIST)
15729 {
15730 bool ret = true;
15731 int num_pragma_values;
15732 int i;
15733
15734 /* Note: attribs.c:decl_attributes prepends the values from
15735 current_target_pragma to the list of target attributes. To determine
15736 whether we're looking at a value of the attribute or the pragma we
15737 assume that the first [list_length (current_target_pragma)] values in
15738 the list are the values from the pragma. */
15739 num_pragma_values = (!force_pragma && current_target_pragma != NULL)
15740 ? list_length (current_target_pragma) : 0;
15741 for (i = 0; args; args = TREE_CHAIN (args), i++)
15742 {
15743 bool is_pragma;
15744
15745 is_pragma = (force_pragma || i < num_pragma_values);
15746 if (TREE_VALUE (args)
15747 && !s390_valid_target_attribute_inner_p (TREE_VALUE (args),
15748 opts, new_opts_set,
15749 is_pragma))
15750 {
15751 ret = false;
15752 }
15753 }
15754 return ret;
15755 }
15756
15757 else if (TREE_CODE (args) != STRING_CST)
15758 {
15759 error ("attribute %<target%> argument not a string");
15760 return false;
15761 }
15762
15763 /* Handle multiple arguments separated by commas. */
15764 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
15765
15766 while (next_optstr && *next_optstr != '\0')
15767 {
15768 char *p = next_optstr;
15769 char *orig_p = p;
15770 char *comma = strchr (next_optstr, ',');
15771 size_t len, opt_len;
15772 int opt;
15773 bool opt_set_p;
15774 char ch;
15775 unsigned i;
15776 int mask = 0;
15777 enum cl_var_type var_type;
15778 bool found;
15779
15780 if (comma)
15781 {
15782 *comma = '\0';
15783 len = comma - next_optstr;
15784 next_optstr = comma + 1;
15785 }
15786 else
15787 {
15788 len = strlen (p);
15789 next_optstr = NULL;
15790 }
15791
15792 /* Recognize no-xxx. */
15793 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
15794 {
15795 opt_set_p = false;
15796 p += 3;
15797 len -= 3;
15798 }
15799 else
15800 opt_set_p = true;
15801
15802 /* Find the option. */
15803 ch = *p;
15804 found = false;
15805 for (i = 0; i < ARRAY_SIZE (attrs); i++)
15806 {
15807 opt_len = attrs[i].len;
15808 if (ch == attrs[i].string[0]
15809 && ((attrs[i].has_arg) ? len > opt_len : len == opt_len)
15810 && memcmp (p, attrs[i].string, opt_len) == 0)
15811 {
15812 opt = attrs[i].opt;
15813 if (!opt_set_p && cl_options[opt].cl_reject_negative)
15814 continue;
15815 mask = cl_options[opt].var_value;
15816 var_type = cl_options[opt].var_type;
15817 found = true;
15818 break;
15819 }
15820 }
15821
15822 /* Process the option. */
15823 if (!found)
15824 {
15825 error ("attribute(target(\"%s\")) is unknown", orig_p);
15826 return false;
15827 }
15828 else if (attrs[i].only_as_pragma && !force_pragma)
15829 {
15830 /* Value is not allowed for the target attribute. */
15831 error ("value %qs is not supported by attribute %<target%>",
15832 attrs[i].string);
15833 return false;
15834 }
15835
15836 else if (var_type == CLVC_BIT_SET || var_type == CLVC_BIT_CLEAR)
15837 {
15838 if (var_type == CLVC_BIT_CLEAR)
15839 opt_set_p = !opt_set_p;
15840
15841 if (opt_set_p)
15842 opts->x_target_flags |= mask;
15843 else
15844 opts->x_target_flags &= ~mask;
15845 new_opts_set->x_target_flags |= mask;
15846 }
15847
15848 else if (cl_options[opt].var_type == CLVC_BOOLEAN)
15849 {
15850 int value;
15851
15852 if (cl_options[opt].cl_uinteger)
15853 {
15854 /* Unsigned integer argument. Code based on the function
15855 decode_cmdline_option () in opts-common.c. */
15856 value = integral_argument (p + opt_len);
15857 }
15858 else
15859 value = (opt_set_p) ? 1 : 0;
15860
15861 if (value != -1)
15862 {
15863 struct cl_decoded_option decoded;
15864
15865 /* Value range check; only implemented for numeric and boolean
15866 options at the moment. */
15867 generate_option (opt, NULL, value, CL_TARGET, &decoded);
15868 s390_handle_option (opts, new_opts_set, &decoded, input_location);
15869 set_option (opts, new_opts_set, opt, value,
15870 p + opt_len, DK_UNSPECIFIED, input_location,
15871 global_dc);
15872 }
15873 else
15874 {
15875 error ("attribute(target(\"%s\")) is unknown", orig_p);
15876 ret = false;
15877 }
15878 }
15879
15880 else if (cl_options[opt].var_type == CLVC_ENUM)
15881 {
15882 bool arg_ok;
15883 int value;
15884
15885 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
15886 if (arg_ok)
15887 set_option (opts, new_opts_set, opt, value,
15888 p + opt_len, DK_UNSPECIFIED, input_location,
15889 global_dc);
15890 else
15891 {
15892 error ("attribute(target(\"%s\")) is unknown", orig_p);
15893 ret = false;
15894 }
15895 }
15896
15897 else
15898 gcc_unreachable ();
15899 }
15900 return ret;
15901 }
15902
15903 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
15904
15905 tree
s390_valid_target_attribute_tree(tree args,struct gcc_options * opts,const struct gcc_options * opts_set,bool force_pragma)15906 s390_valid_target_attribute_tree (tree args,
15907 struct gcc_options *opts,
15908 const struct gcc_options *opts_set,
15909 bool force_pragma)
15910 {
15911 tree t = NULL_TREE;
15912 struct gcc_options new_opts_set;
15913
15914 memset (&new_opts_set, 0, sizeof (new_opts_set));
15915
15916 /* Process each of the options on the chain. */
15917 if (! s390_valid_target_attribute_inner_p (args, opts, &new_opts_set,
15918 force_pragma))
15919 return error_mark_node;
15920
15921 /* If some option was set (even if it has not changed), rerun
15922 s390_option_override_internal, and then save the options away. */
15923 if (new_opts_set.x_target_flags
15924 || new_opts_set.x_s390_arch
15925 || new_opts_set.x_s390_tune
15926 || new_opts_set.x_s390_stack_guard
15927 || new_opts_set.x_s390_stack_size
15928 || new_opts_set.x_s390_branch_cost
15929 || new_opts_set.x_s390_warn_framesize
15930 || new_opts_set.x_s390_warn_dynamicstack_p)
15931 {
15932 const unsigned char *src = (const unsigned char *)opts_set;
15933 unsigned char *dest = (unsigned char *)&new_opts_set;
15934 unsigned int i;
15935
15936 /* Merge the original option flags into the new ones. */
15937 for (i = 0; i < sizeof(*opts_set); i++)
15938 dest[i] |= src[i];
15939
15940 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
15941 s390_option_override_internal (false, opts, &new_opts_set);
15942 /* Save the current options unless we are validating options for
15943 #pragma. */
15944 t = build_target_option_node (opts);
15945 }
15946 return t;
15947 }
15948
15949 /* Hook to validate attribute((target("string"))). */
15950
15951 static bool
s390_valid_target_attribute_p(tree fndecl,tree ARG_UNUSED (name),tree args,int ARG_UNUSED (flags))15952 s390_valid_target_attribute_p (tree fndecl,
15953 tree ARG_UNUSED (name),
15954 tree args,
15955 int ARG_UNUSED (flags))
15956 {
15957 struct gcc_options func_options;
15958 tree new_target, new_optimize;
15959 bool ret = true;
15960
15961 /* attribute((target("default"))) does nothing, beyond
15962 affecting multi-versioning. */
15963 if (TREE_VALUE (args)
15964 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
15965 && TREE_CHAIN (args) == NULL_TREE
15966 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
15967 return true;
15968
15969 tree old_optimize = build_optimization_node (&global_options);
15970
15971 /* Get the optimization options of the current function. */
15972 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
15973
15974 if (!func_optimize)
15975 func_optimize = old_optimize;
15976
15977 /* Init func_options. */
15978 memset (&func_options, 0, sizeof (func_options));
15979 init_options_struct (&func_options, NULL);
15980 lang_hooks.init_options_struct (&func_options);
15981
15982 cl_optimization_restore (&func_options, TREE_OPTIMIZATION (func_optimize));
15983
15984 /* Initialize func_options to the default before its target options can
15985 be set. */
15986 cl_target_option_restore (&func_options,
15987 TREE_TARGET_OPTION (target_option_default_node));
15988
15989 new_target = s390_valid_target_attribute_tree (args, &func_options,
15990 &global_options_set,
15991 (args ==
15992 current_target_pragma));
15993 new_optimize = build_optimization_node (&func_options);
15994 if (new_target == error_mark_node)
15995 ret = false;
15996 else if (fndecl && new_target)
15997 {
15998 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
15999 if (old_optimize != new_optimize)
16000 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
16001 }
16002 return ret;
16003 }
16004
16005 /* Hook to determine if one function can safely inline another. */
16006
16007 static bool
s390_can_inline_p(tree caller,tree callee)16008 s390_can_inline_p (tree caller, tree callee)
16009 {
16010 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
16011 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
16012
16013 if (!callee_tree)
16014 callee_tree = target_option_default_node;
16015 if (!caller_tree)
16016 caller_tree = target_option_default_node;
16017 if (callee_tree == caller_tree)
16018 return true;
16019
16020 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
16021 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
16022 bool ret = true;
16023
16024 if ((caller_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP))
16025 != (callee_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP)))
16026 ret = false;
16027
16028 /* Don't inline functions to be compiled for a more recent arch into a
16029 function for an older arch. */
16030 else if (caller_opts->x_s390_arch < callee_opts->x_s390_arch)
16031 ret = false;
16032
16033 /* Inlining a hard float function into a soft float function is only
16034 allowed if the hard float function doesn't actually make use of
16035 floating point.
16036
16037 We are called from FEs for multi-versioning call optimization, so
16038 beware of ipa_fn_summaries not available. */
16039 else if (((TARGET_SOFT_FLOAT_P (caller_opts->x_target_flags)
16040 && !TARGET_SOFT_FLOAT_P (callee_opts->x_target_flags))
16041 || (!TARGET_HARD_DFP_P (caller_opts->x_target_flags)
16042 && TARGET_HARD_DFP_P (callee_opts->x_target_flags)))
16043 && (! ipa_fn_summaries
16044 || ipa_fn_summaries->get
16045 (cgraph_node::get (callee))->fp_expressions))
16046 ret = false;
16047
16048 return ret;
16049 }
16050 #endif
16051
16052 /* Set VAL to correct enum value according to the indirect-branch or
16053 function-return attribute in ATTR. */
16054
16055 static inline void
s390_indirect_branch_attrvalue(tree attr,enum indirect_branch * val)16056 s390_indirect_branch_attrvalue (tree attr, enum indirect_branch *val)
16057 {
16058 const char *str = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
16059 if (strcmp (str, "keep") == 0)
16060 *val = indirect_branch_keep;
16061 else if (strcmp (str, "thunk") == 0)
16062 *val = indirect_branch_thunk;
16063 else if (strcmp (str, "thunk-inline") == 0)
16064 *val = indirect_branch_thunk_inline;
16065 else if (strcmp (str, "thunk-extern") == 0)
16066 *val = indirect_branch_thunk_extern;
16067 }
16068
16069 /* Memorize the setting for -mindirect-branch* and -mfunction-return*
16070 from either the cmdline or the function attributes in
16071 cfun->machine. */
16072
16073 static void
s390_indirect_branch_settings(tree fndecl)16074 s390_indirect_branch_settings (tree fndecl)
16075 {
16076 tree attr;
16077
16078 if (!fndecl)
16079 return;
16080
16081 /* Initialize with the cmdline options and let the attributes
16082 override it. */
16083 cfun->machine->indirect_branch_jump = s390_indirect_branch_jump;
16084 cfun->machine->indirect_branch_call = s390_indirect_branch_call;
16085
16086 cfun->machine->function_return_reg = s390_function_return_reg;
16087 cfun->machine->function_return_mem = s390_function_return_mem;
16088
16089 if ((attr = lookup_attribute ("indirect_branch",
16090 DECL_ATTRIBUTES (fndecl))))
16091 {
16092 s390_indirect_branch_attrvalue (attr,
16093 &cfun->machine->indirect_branch_jump);
16094 s390_indirect_branch_attrvalue (attr,
16095 &cfun->machine->indirect_branch_call);
16096 }
16097
16098 if ((attr = lookup_attribute ("indirect_branch_jump",
16099 DECL_ATTRIBUTES (fndecl))))
16100 s390_indirect_branch_attrvalue (attr, &cfun->machine->indirect_branch_jump);
16101
16102 if ((attr = lookup_attribute ("indirect_branch_call",
16103 DECL_ATTRIBUTES (fndecl))))
16104 s390_indirect_branch_attrvalue (attr, &cfun->machine->indirect_branch_call);
16105
16106 if ((attr = lookup_attribute ("function_return",
16107 DECL_ATTRIBUTES (fndecl))))
16108 {
16109 s390_indirect_branch_attrvalue (attr,
16110 &cfun->machine->function_return_reg);
16111 s390_indirect_branch_attrvalue (attr,
16112 &cfun->machine->function_return_mem);
16113 }
16114
16115 if ((attr = lookup_attribute ("function_return_reg",
16116 DECL_ATTRIBUTES (fndecl))))
16117 s390_indirect_branch_attrvalue (attr, &cfun->machine->function_return_reg);
16118
16119 if ((attr = lookup_attribute ("function_return_mem",
16120 DECL_ATTRIBUTES (fndecl))))
16121 s390_indirect_branch_attrvalue (attr, &cfun->machine->function_return_mem);
16122 }
16123
16124 #if S390_USE_TARGET_ATTRIBUTE
16125 /* Restore targets globals from NEW_TREE and invalidate s390_previous_fndecl
16126 cache. */
16127
16128 void
s390_activate_target_options(tree new_tree)16129 s390_activate_target_options (tree new_tree)
16130 {
16131 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
16132 if (TREE_TARGET_GLOBALS (new_tree))
16133 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
16134 else if (new_tree == target_option_default_node)
16135 restore_target_globals (&default_target_globals);
16136 else
16137 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
16138 s390_previous_fndecl = NULL_TREE;
16139 }
16140 #endif
16141
16142 /* Establish appropriate back-end context for processing the function
16143 FNDECL. The argument might be NULL to indicate processing at top
16144 level, outside of any function scope. */
16145 static void
s390_set_current_function(tree fndecl)16146 s390_set_current_function (tree fndecl)
16147 {
16148 #if S390_USE_TARGET_ATTRIBUTE
16149 /* Only change the context if the function changes. This hook is called
16150 several times in the course of compiling a function, and we don't want to
16151 slow things down too much or call target_reinit when it isn't safe. */
16152 if (fndecl == s390_previous_fndecl)
16153 {
16154 s390_indirect_branch_settings (fndecl);
16155 return;
16156 }
16157
16158 tree old_tree;
16159 if (s390_previous_fndecl == NULL_TREE)
16160 old_tree = target_option_current_node;
16161 else if (DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl))
16162 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl);
16163 else
16164 old_tree = target_option_default_node;
16165
16166 if (fndecl == NULL_TREE)
16167 {
16168 if (old_tree != target_option_current_node)
16169 s390_activate_target_options (target_option_current_node);
16170 return;
16171 }
16172
16173 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
16174 if (new_tree == NULL_TREE)
16175 new_tree = target_option_default_node;
16176
16177 if (old_tree != new_tree)
16178 s390_activate_target_options (new_tree);
16179 s390_previous_fndecl = fndecl;
16180 #endif
16181 s390_indirect_branch_settings (fndecl);
16182 }
16183
16184 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
16185
16186 static bool
s390_use_by_pieces_infrastructure_p(unsigned HOST_WIDE_INT size,unsigned int align ATTRIBUTE_UNUSED,enum by_pieces_operation op ATTRIBUTE_UNUSED,bool speed_p ATTRIBUTE_UNUSED)16187 s390_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
16188 unsigned int align ATTRIBUTE_UNUSED,
16189 enum by_pieces_operation op ATTRIBUTE_UNUSED,
16190 bool speed_p ATTRIBUTE_UNUSED)
16191 {
16192 return (size == 1 || size == 2
16193 || size == 4 || (TARGET_ZARCH && size == 8));
16194 }
16195
16196 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
16197
16198 static void
s390_atomic_assign_expand_fenv(tree * hold,tree * clear,tree * update)16199 s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
16200 {
16201 tree sfpc = s390_builtin_decls[S390_BUILTIN_s390_sfpc];
16202 tree efpc = s390_builtin_decls[S390_BUILTIN_s390_efpc];
16203 tree call_efpc = build_call_expr (efpc, 0);
16204 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
16205
16206 #define FPC_EXCEPTION_MASK HOST_WIDE_INT_UC (0xf8000000)
16207 #define FPC_FLAGS_MASK HOST_WIDE_INT_UC (0x00f80000)
16208 #define FPC_DXC_MASK HOST_WIDE_INT_UC (0x0000ff00)
16209 #define FPC_EXCEPTION_MASK_SHIFT HOST_WIDE_INT_UC (24)
16210 #define FPC_FLAGS_SHIFT HOST_WIDE_INT_UC (16)
16211 #define FPC_DXC_SHIFT HOST_WIDE_INT_UC (8)
16212
16213 /* Generates the equivalent of feholdexcept (&fenv_var)
16214
16215 fenv_var = __builtin_s390_efpc ();
16216 __builtin_s390_sfpc (fenv_var & mask) */
16217 tree old_fpc = build2 (MODIFY_EXPR, unsigned_type_node, fenv_var, call_efpc);
16218 tree new_fpc =
16219 build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
16220 build_int_cst (unsigned_type_node,
16221 ~(FPC_DXC_MASK | FPC_FLAGS_MASK |
16222 FPC_EXCEPTION_MASK)));
16223 tree set_new_fpc = build_call_expr (sfpc, 1, new_fpc);
16224 *hold = build2 (COMPOUND_EXPR, void_type_node, old_fpc, set_new_fpc);
16225
16226 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT)
16227
16228 __builtin_s390_sfpc (__builtin_s390_efpc () & mask) */
16229 new_fpc = build2 (BIT_AND_EXPR, unsigned_type_node, call_efpc,
16230 build_int_cst (unsigned_type_node,
16231 ~(FPC_DXC_MASK | FPC_FLAGS_MASK)));
16232 *clear = build_call_expr (sfpc, 1, new_fpc);
16233
16234 /* Generates the equivalent of feupdateenv (fenv_var)
16235
16236 old_fpc = __builtin_s390_efpc ();
16237 __builtin_s390_sfpc (fenv_var);
16238 __atomic_feraiseexcept ((old_fpc & FPC_FLAGS_MASK) >> FPC_FLAGS_SHIFT); */
16239
16240 old_fpc = create_tmp_var_raw (unsigned_type_node);
16241 tree store_old_fpc = build2 (MODIFY_EXPR, void_type_node,
16242 old_fpc, call_efpc);
16243
16244 set_new_fpc = build_call_expr (sfpc, 1, fenv_var);
16245
16246 tree raise_old_except = build2 (BIT_AND_EXPR, unsigned_type_node, old_fpc,
16247 build_int_cst (unsigned_type_node,
16248 FPC_FLAGS_MASK));
16249 raise_old_except = build2 (RSHIFT_EXPR, unsigned_type_node, raise_old_except,
16250 build_int_cst (unsigned_type_node,
16251 FPC_FLAGS_SHIFT));
16252 tree atomic_feraiseexcept
16253 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
16254 raise_old_except = build_call_expr (atomic_feraiseexcept,
16255 1, raise_old_except);
16256
16257 *update = build2 (COMPOUND_EXPR, void_type_node,
16258 build2 (COMPOUND_EXPR, void_type_node,
16259 store_old_fpc, set_new_fpc),
16260 raise_old_except);
16261
16262 #undef FPC_EXCEPTION_MASK
16263 #undef FPC_FLAGS_MASK
16264 #undef FPC_DXC_MASK
16265 #undef FPC_EXCEPTION_MASK_SHIFT
16266 #undef FPC_FLAGS_SHIFT
16267 #undef FPC_DXC_SHIFT
16268 }
16269
16270 /* Return the vector mode to be used for inner mode MODE when doing
16271 vectorization. */
16272 static machine_mode
s390_preferred_simd_mode(scalar_mode mode)16273 s390_preferred_simd_mode (scalar_mode mode)
16274 {
16275 if (TARGET_VXE)
16276 switch (mode)
16277 {
16278 case E_SFmode:
16279 return V4SFmode;
16280 default:;
16281 }
16282
16283 if (TARGET_VX)
16284 switch (mode)
16285 {
16286 case E_DFmode:
16287 return V2DFmode;
16288 case E_DImode:
16289 return V2DImode;
16290 case E_SImode:
16291 return V4SImode;
16292 case E_HImode:
16293 return V8HImode;
16294 case E_QImode:
16295 return V16QImode;
16296 default:;
16297 }
16298 return word_mode;
16299 }
16300
16301 /* Our hardware does not require vectors to be strictly aligned. */
16302 static bool
s390_support_vector_misalignment(machine_mode mode ATTRIBUTE_UNUSED,const_tree type ATTRIBUTE_UNUSED,int misalignment ATTRIBUTE_UNUSED,bool is_packed ATTRIBUTE_UNUSED)16303 s390_support_vector_misalignment (machine_mode mode ATTRIBUTE_UNUSED,
16304 const_tree type ATTRIBUTE_UNUSED,
16305 int misalignment ATTRIBUTE_UNUSED,
16306 bool is_packed ATTRIBUTE_UNUSED)
16307 {
16308 if (TARGET_VX)
16309 return true;
16310
16311 return default_builtin_support_vector_misalignment (mode, type, misalignment,
16312 is_packed);
16313 }
16314
16315 /* The vector ABI requires vector types to be aligned on an 8 byte
16316 boundary (our stack alignment). However, we allow this to be
16317 overriden by the user, while this definitely breaks the ABI. */
16318 static HOST_WIDE_INT
s390_vector_alignment(const_tree type)16319 s390_vector_alignment (const_tree type)
16320 {
16321 if (!TARGET_VX_ABI)
16322 return default_vector_alignment (type);
16323
16324 if (TYPE_USER_ALIGN (type))
16325 return TYPE_ALIGN (type);
16326
16327 return MIN (64, tree_to_shwi (TYPE_SIZE (type)));
16328 }
16329
16330 /* Implement TARGET_CONSTANT_ALIGNMENT. Alignment on even addresses for
16331 LARL instruction. */
16332
16333 static HOST_WIDE_INT
s390_constant_alignment(const_tree,HOST_WIDE_INT align)16334 s390_constant_alignment (const_tree, HOST_WIDE_INT align)
16335 {
16336 return MAX (align, 16);
16337 }
16338
16339 #ifdef HAVE_AS_MACHINE_MACHINEMODE
16340 /* Implement TARGET_ASM_FILE_START. */
16341 static void
s390_asm_file_start(void)16342 s390_asm_file_start (void)
16343 {
16344 default_file_start ();
16345 s390_asm_output_machine_for_arch (asm_out_file);
16346 }
16347 #endif
16348
16349 /* Implement TARGET_ASM_FILE_END. */
16350 static void
s390_asm_file_end(void)16351 s390_asm_file_end (void)
16352 {
16353 #ifdef HAVE_AS_GNU_ATTRIBUTE
16354 varpool_node *vnode;
16355 cgraph_node *cnode;
16356
16357 FOR_EACH_VARIABLE (vnode)
16358 if (TREE_PUBLIC (vnode->decl))
16359 s390_check_type_for_vector_abi (TREE_TYPE (vnode->decl), false, false);
16360
16361 FOR_EACH_FUNCTION (cnode)
16362 if (TREE_PUBLIC (cnode->decl))
16363 s390_check_type_for_vector_abi (TREE_TYPE (cnode->decl), false, false);
16364
16365
16366 if (s390_vector_abi != 0)
16367 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
16368 s390_vector_abi);
16369 #endif
16370 file_end_indicate_exec_stack ();
16371
16372 if (flag_split_stack)
16373 file_end_indicate_split_stack ();
16374 }
16375
16376 /* Return true if TYPE is a vector bool type. */
16377 static inline bool
s390_vector_bool_type_p(const_tree type)16378 s390_vector_bool_type_p (const_tree type)
16379 {
16380 return TYPE_VECTOR_OPAQUE (type);
16381 }
16382
16383 /* Return the diagnostic message string if the binary operation OP is
16384 not permitted on TYPE1 and TYPE2, NULL otherwise. */
16385 static const char*
s390_invalid_binary_op(int op ATTRIBUTE_UNUSED,const_tree type1,const_tree type2)16386 s390_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
16387 {
16388 bool bool1_p, bool2_p;
16389 bool plusminus_p;
16390 bool muldiv_p;
16391 bool compare_p;
16392 machine_mode mode1, mode2;
16393
16394 if (!TARGET_ZVECTOR)
16395 return NULL;
16396
16397 if (!VECTOR_TYPE_P (type1) || !VECTOR_TYPE_P (type2))
16398 return NULL;
16399
16400 bool1_p = s390_vector_bool_type_p (type1);
16401 bool2_p = s390_vector_bool_type_p (type2);
16402
16403 /* Mixing signed and unsigned types is forbidden for all
16404 operators. */
16405 if (!bool1_p && !bool2_p
16406 && TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
16407 return N_("types differ in signedness");
16408
16409 plusminus_p = (op == PLUS_EXPR || op == MINUS_EXPR);
16410 muldiv_p = (op == MULT_EXPR || op == RDIV_EXPR || op == TRUNC_DIV_EXPR
16411 || op == CEIL_DIV_EXPR || op == FLOOR_DIV_EXPR
16412 || op == ROUND_DIV_EXPR);
16413 compare_p = (op == LT_EXPR || op == LE_EXPR || op == GT_EXPR || op == GE_EXPR
16414 || op == EQ_EXPR || op == NE_EXPR);
16415
16416 if (bool1_p && bool2_p && (plusminus_p || muldiv_p))
16417 return N_("binary operator does not support two vector bool operands");
16418
16419 if (bool1_p != bool2_p && (muldiv_p || compare_p))
16420 return N_("binary operator does not support vector bool operand");
16421
16422 mode1 = TYPE_MODE (type1);
16423 mode2 = TYPE_MODE (type2);
16424
16425 if (bool1_p != bool2_p && plusminus_p
16426 && (GET_MODE_CLASS (mode1) == MODE_VECTOR_FLOAT
16427 || GET_MODE_CLASS (mode2) == MODE_VECTOR_FLOAT))
16428 return N_("binary operator does not support mixing vector "
16429 "bool with floating point vector operands");
16430
16431 return NULL;
16432 }
16433
16434 /* Implement TARGET_C_EXCESS_PRECISION.
16435
16436 FIXME: For historical reasons, float_t and double_t are typedef'ed to
16437 double on s390, causing operations on float_t to operate in a higher
16438 precision than is necessary. However, it is not the case that SFmode
16439 operations have implicit excess precision, and we generate more optimal
16440 code if we let the compiler know no implicit extra precision is added.
16441
16442 That means when we are compiling with -fexcess-precision=fast, the value
16443 we set for FLT_EVAL_METHOD will be out of line with the actual precision of
16444 float_t (though they would be correct for -fexcess-precision=standard).
16445
16446 A complete fix would modify glibc to remove the unnecessary typedef
16447 of float_t to double. */
16448
16449 static enum flt_eval_method
s390_excess_precision(enum excess_precision_type type)16450 s390_excess_precision (enum excess_precision_type type)
16451 {
16452 switch (type)
16453 {
16454 case EXCESS_PRECISION_TYPE_IMPLICIT:
16455 case EXCESS_PRECISION_TYPE_FAST:
16456 /* The fastest type to promote to will always be the native type,
16457 whether that occurs with implicit excess precision or
16458 otherwise. */
16459 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
16460 case EXCESS_PRECISION_TYPE_STANDARD:
16461 /* Otherwise, when we are in a standards compliant mode, to
16462 ensure consistency with the implementation in glibc, report that
16463 float is evaluated to the range and precision of double. */
16464 return FLT_EVAL_METHOD_PROMOTE_TO_DOUBLE;
16465 default:
16466 gcc_unreachable ();
16467 }
16468 return FLT_EVAL_METHOD_UNPREDICTABLE;
16469 }
16470
16471 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
16472
16473 static unsigned HOST_WIDE_INT
s390_asan_shadow_offset(void)16474 s390_asan_shadow_offset (void)
16475 {
16476 return TARGET_64BIT ? HOST_WIDE_INT_1U << 52 : HOST_WIDE_INT_UC (0x20000000);
16477 }
16478
16479 #ifdef HAVE_GAS_HIDDEN
16480 # define USE_HIDDEN_LINKONCE 1
16481 #else
16482 # define USE_HIDDEN_LINKONCE 0
16483 #endif
16484
16485 /* Output an indirect branch trampoline for target register REGNO. */
16486
16487 static void
s390_output_indirect_thunk_function(unsigned int regno,bool z10_p)16488 s390_output_indirect_thunk_function (unsigned int regno, bool z10_p)
16489 {
16490 tree decl;
16491 char thunk_label[32];
16492 int i;
16493
16494 if (z10_p)
16495 sprintf (thunk_label, TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL, regno);
16496 else
16497 sprintf (thunk_label, TARGET_INDIRECT_BRANCH_THUNK_NAME_EX,
16498 INDIRECT_BRANCH_THUNK_REGNUM, regno);
16499
16500 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
16501 get_identifier (thunk_label),
16502 build_function_type_list (void_type_node, NULL_TREE));
16503 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
16504 NULL_TREE, void_type_node);
16505 TREE_PUBLIC (decl) = 1;
16506 TREE_STATIC (decl) = 1;
16507 DECL_IGNORED_P (decl) = 1;
16508
16509 if (USE_HIDDEN_LINKONCE)
16510 {
16511 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
16512
16513 targetm.asm_out.unique_section (decl, 0);
16514 switch_to_section (get_named_section (decl, NULL, 0));
16515
16516 targetm.asm_out.globalize_label (asm_out_file, thunk_label);
16517 fputs ("\t.hidden\t", asm_out_file);
16518 assemble_name (asm_out_file, thunk_label);
16519 putc ('\n', asm_out_file);
16520 ASM_DECLARE_FUNCTION_NAME (asm_out_file, thunk_label, decl);
16521 }
16522 else
16523 {
16524 switch_to_section (text_section);
16525 ASM_OUTPUT_LABEL (asm_out_file, thunk_label);
16526 }
16527
16528 DECL_INITIAL (decl) = make_node (BLOCK);
16529 current_function_decl = decl;
16530 allocate_struct_function (decl, false);
16531 init_function_start (decl);
16532 cfun->is_thunk = true;
16533 first_function_block_is_cold = false;
16534 final_start_function (emit_barrier (), asm_out_file, 1);
16535
16536 /* This makes CFI at least usable for indirect jumps.
16537
16538 Stopping in the thunk: backtrace will point to the thunk target
16539 is if it was interrupted by a signal. For a call this means that
16540 the call chain will be: caller->callee->thunk */
16541 if (flag_asynchronous_unwind_tables && flag_dwarf2_cfi_asm)
16542 {
16543 fputs ("\t.cfi_signal_frame\n", asm_out_file);
16544 fprintf (asm_out_file, "\t.cfi_return_column %d\n", regno);
16545 for (i = 0; i < FPR15_REGNUM; i++)
16546 fprintf (asm_out_file, "\t.cfi_same_value %s\n", reg_names[i]);
16547 }
16548
16549 if (z10_p)
16550 {
16551 /* exrl 0,1f */
16552
16553 /* We generate a thunk for z10 compiled code although z10 is
16554 currently not enabled. Tell the assembler to accept the
16555 instruction. */
16556 if (!TARGET_CPU_Z10)
16557 {
16558 fputs ("\t.machine push\n", asm_out_file);
16559 fputs ("\t.machine z10\n", asm_out_file);
16560 }
16561 /* We use exrl even if -mzarch hasn't been specified on the
16562 command line so we have to tell the assembler to accept
16563 it. */
16564 if (!TARGET_ZARCH)
16565 fputs ("\t.machinemode zarch\n", asm_out_file);
16566
16567 fputs ("\texrl\t0,1f\n", asm_out_file);
16568
16569 if (!TARGET_ZARCH)
16570 fputs ("\t.machinemode esa\n", asm_out_file);
16571
16572 if (!TARGET_CPU_Z10)
16573 fputs ("\t.machine pop\n", asm_out_file);
16574 }
16575 else if (TARGET_CPU_ZARCH)
16576 {
16577 /* larl %r1,1f */
16578 fprintf (asm_out_file, "\tlarl\t%%r%d,1f\n",
16579 INDIRECT_BRANCH_THUNK_REGNUM);
16580
16581 /* ex 0,0(%r1) */
16582 fprintf (asm_out_file, "\tex\t0,0(%%r%d)\n",
16583 INDIRECT_BRANCH_THUNK_REGNUM);
16584 }
16585 else
16586 gcc_unreachable ();
16587
16588 /* 0: j 0b */
16589 fputs ("0:\tj\t0b\n", asm_out_file);
16590
16591 /* 1: br <regno> */
16592 fprintf (asm_out_file, "1:\tbr\t%%r%d\n", regno);
16593
16594 final_end_function ();
16595 init_insn_lengths ();
16596 free_after_compilation (cfun);
16597 set_cfun (NULL);
16598 current_function_decl = NULL;
16599 }
16600
16601 /* Implement the asm.code_end target hook. */
16602
16603 static void
s390_code_end(void)16604 s390_code_end (void)
16605 {
16606 int i;
16607
16608 for (i = 1; i < 16; i++)
16609 {
16610 if (indirect_branch_z10thunk_mask & (1 << i))
16611 s390_output_indirect_thunk_function (i, true);
16612
16613 if (indirect_branch_prez10thunk_mask & (1 << i))
16614 s390_output_indirect_thunk_function (i, false);
16615 }
16616
16617 if (TARGET_INDIRECT_BRANCH_TABLE)
16618 {
16619 int o;
16620 int i;
16621
16622 for (o = 0; o < INDIRECT_BRANCH_NUM_OPTIONS; o++)
16623 {
16624 if (indirect_branch_table_label_no[o] == 0)
16625 continue;
16626
16627 switch_to_section (get_section (indirect_branch_table_name[o],
16628 0,
16629 NULL_TREE));
16630 for (i = 0; i < indirect_branch_table_label_no[o]; i++)
16631 {
16632 char label_start[32];
16633
16634 ASM_GENERATE_INTERNAL_LABEL (label_start,
16635 indirect_branch_table_label[o], i);
16636
16637 fputs ("\t.long\t", asm_out_file);
16638 assemble_name_raw (asm_out_file, label_start);
16639 fputs ("-.\n", asm_out_file);
16640 }
16641 switch_to_section (current_function_section ());
16642 }
16643 }
16644 }
16645
16646 /* Implement the TARGET_CASE_VALUES_THRESHOLD target hook. */
16647
16648 unsigned int
s390_case_values_threshold(void)16649 s390_case_values_threshold (void)
16650 {
16651 /* Disabling branch prediction for indirect jumps makes jump tables
16652 much more expensive. */
16653 if (TARGET_INDIRECT_BRANCH_NOBP_JUMP)
16654 return 20;
16655
16656 return default_case_values_threshold ();
16657 }
16658
16659 /* Initialize GCC target structure. */
16660
16661 #undef TARGET_ASM_ALIGNED_HI_OP
16662 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
16663 #undef TARGET_ASM_ALIGNED_DI_OP
16664 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
16665 #undef TARGET_ASM_INTEGER
16666 #define TARGET_ASM_INTEGER s390_assemble_integer
16667
16668 #undef TARGET_ASM_OPEN_PAREN
16669 #define TARGET_ASM_OPEN_PAREN ""
16670
16671 #undef TARGET_ASM_CLOSE_PAREN
16672 #define TARGET_ASM_CLOSE_PAREN ""
16673
16674 #undef TARGET_OPTION_OVERRIDE
16675 #define TARGET_OPTION_OVERRIDE s390_option_override
16676
16677 #ifdef TARGET_THREAD_SSP_OFFSET
16678 #undef TARGET_STACK_PROTECT_GUARD
16679 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
16680 #endif
16681
16682 #undef TARGET_ENCODE_SECTION_INFO
16683 #define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
16684
16685 #undef TARGET_SCALAR_MODE_SUPPORTED_P
16686 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
16687
16688 #ifdef HAVE_AS_TLS
16689 #undef TARGET_HAVE_TLS
16690 #define TARGET_HAVE_TLS true
16691 #endif
16692 #undef TARGET_CANNOT_FORCE_CONST_MEM
16693 #define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
16694
16695 #undef TARGET_DELEGITIMIZE_ADDRESS
16696 #define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
16697
16698 #undef TARGET_LEGITIMIZE_ADDRESS
16699 #define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
16700
16701 #undef TARGET_RETURN_IN_MEMORY
16702 #define TARGET_RETURN_IN_MEMORY s390_return_in_memory
16703
16704 #undef TARGET_INIT_BUILTINS
16705 #define TARGET_INIT_BUILTINS s390_init_builtins
16706 #undef TARGET_EXPAND_BUILTIN
16707 #define TARGET_EXPAND_BUILTIN s390_expand_builtin
16708 #undef TARGET_BUILTIN_DECL
16709 #define TARGET_BUILTIN_DECL s390_builtin_decl
16710
16711 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
16712 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
16713
16714 #undef TARGET_ASM_OUTPUT_MI_THUNK
16715 #define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
16716 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
16717 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
16718
16719 #undef TARGET_C_EXCESS_PRECISION
16720 #define TARGET_C_EXCESS_PRECISION s390_excess_precision
16721
16722 #undef TARGET_SCHED_ADJUST_PRIORITY
16723 #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
16724 #undef TARGET_SCHED_ISSUE_RATE
16725 #define TARGET_SCHED_ISSUE_RATE s390_issue_rate
16726 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
16727 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
16728
16729 #undef TARGET_SCHED_VARIABLE_ISSUE
16730 #define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
16731 #undef TARGET_SCHED_REORDER
16732 #define TARGET_SCHED_REORDER s390_sched_reorder
16733 #undef TARGET_SCHED_INIT
16734 #define TARGET_SCHED_INIT s390_sched_init
16735
16736 #undef TARGET_CANNOT_COPY_INSN_P
16737 #define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
16738 #undef TARGET_RTX_COSTS
16739 #define TARGET_RTX_COSTS s390_rtx_costs
16740 #undef TARGET_ADDRESS_COST
16741 #define TARGET_ADDRESS_COST s390_address_cost
16742 #undef TARGET_REGISTER_MOVE_COST
16743 #define TARGET_REGISTER_MOVE_COST s390_register_move_cost
16744 #undef TARGET_MEMORY_MOVE_COST
16745 #define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
16746 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
16747 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
16748 s390_builtin_vectorization_cost
16749
16750 #undef TARGET_MACHINE_DEPENDENT_REORG
16751 #define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
16752
16753 #undef TARGET_VALID_POINTER_MODE
16754 #define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
16755
16756 #undef TARGET_BUILD_BUILTIN_VA_LIST
16757 #define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
16758 #undef TARGET_EXPAND_BUILTIN_VA_START
16759 #define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
16760 #undef TARGET_ASAN_SHADOW_OFFSET
16761 #define TARGET_ASAN_SHADOW_OFFSET s390_asan_shadow_offset
16762 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
16763 #define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
16764
16765 #undef TARGET_PROMOTE_FUNCTION_MODE
16766 #define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
16767 #undef TARGET_PASS_BY_REFERENCE
16768 #define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
16769
16770 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
16771 #define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
16772 #undef TARGET_FUNCTION_ARG
16773 #define TARGET_FUNCTION_ARG s390_function_arg
16774 #undef TARGET_FUNCTION_ARG_ADVANCE
16775 #define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
16776 #undef TARGET_FUNCTION_ARG_PADDING
16777 #define TARGET_FUNCTION_ARG_PADDING s390_function_arg_padding
16778 #undef TARGET_FUNCTION_VALUE
16779 #define TARGET_FUNCTION_VALUE s390_function_value
16780 #undef TARGET_LIBCALL_VALUE
16781 #define TARGET_LIBCALL_VALUE s390_libcall_value
16782 #undef TARGET_STRICT_ARGUMENT_NAMING
16783 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
16784
16785 #undef TARGET_KEEP_LEAF_WHEN_PROFILED
16786 #define TARGET_KEEP_LEAF_WHEN_PROFILED s390_keep_leaf_when_profiled
16787
16788 #undef TARGET_FIXED_CONDITION_CODE_REGS
16789 #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
16790
16791 #undef TARGET_CC_MODES_COMPATIBLE
16792 #define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
16793
16794 #undef TARGET_INVALID_WITHIN_DOLOOP
16795 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
16796
16797 #ifdef HAVE_AS_TLS
16798 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
16799 #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
16800 #endif
16801
16802 #undef TARGET_DWARF_FRAME_REG_MODE
16803 #define TARGET_DWARF_FRAME_REG_MODE s390_dwarf_frame_reg_mode
16804
16805 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
16806 #undef TARGET_MANGLE_TYPE
16807 #define TARGET_MANGLE_TYPE s390_mangle_type
16808 #endif
16809
16810 #undef TARGET_SCALAR_MODE_SUPPORTED_P
16811 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
16812
16813 #undef TARGET_VECTOR_MODE_SUPPORTED_P
16814 #define TARGET_VECTOR_MODE_SUPPORTED_P s390_vector_mode_supported_p
16815
16816 #undef TARGET_PREFERRED_RELOAD_CLASS
16817 #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
16818
16819 #undef TARGET_SECONDARY_RELOAD
16820 #define TARGET_SECONDARY_RELOAD s390_secondary_reload
16821 #undef TARGET_SECONDARY_MEMORY_NEEDED
16822 #define TARGET_SECONDARY_MEMORY_NEEDED s390_secondary_memory_needed
16823 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
16824 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE s390_secondary_memory_needed_mode
16825
16826 #undef TARGET_LIBGCC_CMP_RETURN_MODE
16827 #define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
16828
16829 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
16830 #define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
16831
16832 #undef TARGET_LEGITIMATE_ADDRESS_P
16833 #define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
16834
16835 #undef TARGET_LEGITIMATE_CONSTANT_P
16836 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
16837
16838 #undef TARGET_LRA_P
16839 #define TARGET_LRA_P s390_lra_p
16840
16841 #undef TARGET_CAN_ELIMINATE
16842 #define TARGET_CAN_ELIMINATE s390_can_eliminate
16843
16844 #undef TARGET_CONDITIONAL_REGISTER_USAGE
16845 #define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
16846
16847 #undef TARGET_LOOP_UNROLL_ADJUST
16848 #define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
16849
16850 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
16851 #define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
16852 #undef TARGET_TRAMPOLINE_INIT
16853 #define TARGET_TRAMPOLINE_INIT s390_trampoline_init
16854
16855 /* PR 79421 */
16856 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
16857 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
16858
16859 #undef TARGET_UNWIND_WORD_MODE
16860 #define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
16861
16862 #undef TARGET_CANONICALIZE_COMPARISON
16863 #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
16864
16865 #undef TARGET_HARD_REGNO_SCRATCH_OK
16866 #define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok
16867
16868 #undef TARGET_HARD_REGNO_NREGS
16869 #define TARGET_HARD_REGNO_NREGS s390_hard_regno_nregs
16870 #undef TARGET_HARD_REGNO_MODE_OK
16871 #define TARGET_HARD_REGNO_MODE_OK s390_hard_regno_mode_ok
16872 #undef TARGET_MODES_TIEABLE_P
16873 #define TARGET_MODES_TIEABLE_P s390_modes_tieable_p
16874
16875 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
16876 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
16877 s390_hard_regno_call_part_clobbered
16878
16879 #undef TARGET_ATTRIBUTE_TABLE
16880 #define TARGET_ATTRIBUTE_TABLE s390_attribute_table
16881
16882 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
16883 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
16884
16885 #undef TARGET_SET_UP_BY_PROLOGUE
16886 #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
16887
16888 #undef TARGET_EXTRA_LIVE_ON_ENTRY
16889 #define TARGET_EXTRA_LIVE_ON_ENTRY s390_live_on_entry
16890
16891 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
16892 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
16893 s390_use_by_pieces_infrastructure_p
16894
16895 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
16896 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV s390_atomic_assign_expand_fenv
16897
16898 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
16899 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN s390_invalid_arg_for_unprototyped_fn
16900
16901 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
16902 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE s390_preferred_simd_mode
16903
16904 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
16905 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT s390_support_vector_misalignment
16906
16907 #undef TARGET_VECTOR_ALIGNMENT
16908 #define TARGET_VECTOR_ALIGNMENT s390_vector_alignment
16909
16910 #undef TARGET_INVALID_BINARY_OP
16911 #define TARGET_INVALID_BINARY_OP s390_invalid_binary_op
16912
16913 #ifdef HAVE_AS_MACHINE_MACHINEMODE
16914 #undef TARGET_ASM_FILE_START
16915 #define TARGET_ASM_FILE_START s390_asm_file_start
16916 #endif
16917
16918 #undef TARGET_ASM_FILE_END
16919 #define TARGET_ASM_FILE_END s390_asm_file_end
16920
16921 #undef TARGET_SET_CURRENT_FUNCTION
16922 #define TARGET_SET_CURRENT_FUNCTION s390_set_current_function
16923
16924 #if S390_USE_TARGET_ATTRIBUTE
16925 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
16926 #define TARGET_OPTION_VALID_ATTRIBUTE_P s390_valid_target_attribute_p
16927
16928 #undef TARGET_CAN_INLINE_P
16929 #define TARGET_CAN_INLINE_P s390_can_inline_p
16930 #endif
16931
16932 #undef TARGET_OPTION_RESTORE
16933 #define TARGET_OPTION_RESTORE s390_function_specific_restore
16934
16935 #undef TARGET_CAN_CHANGE_MODE_CLASS
16936 #define TARGET_CAN_CHANGE_MODE_CLASS s390_can_change_mode_class
16937
16938 #undef TARGET_CONSTANT_ALIGNMENT
16939 #define TARGET_CONSTANT_ALIGNMENT s390_constant_alignment
16940
16941 #undef TARGET_ASM_CODE_END
16942 #define TARGET_ASM_CODE_END s390_code_end
16943
16944 #undef TARGET_CASE_VALUES_THRESHOLD
16945 #define TARGET_CASE_VALUES_THRESHOLD s390_case_values_threshold
16946
16947 struct gcc_target targetm = TARGET_INITIALIZER;
16948
16949 #include "gt-s390.h"
16950