1 /* Subroutines used for code generation on IBM S/390 and zSeries
2 Copyright (C) 1999-2018 Free Software Foundation, Inc.
3 Contributed by Hartmut Penner (hpenner@de.ibm.com) and
4 Ulrich Weigand (uweigand@de.ibm.com) and
5 Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #define IN_TARGET_CODE 1
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "backend.h"
29 #include "target.h"
30 #include "target-globals.h"
31 #include "rtl.h"
32 #include "tree.h"
33 #include "gimple.h"
34 #include "cfghooks.h"
35 #include "cfgloop.h"
36 #include "df.h"
37 #include "memmodel.h"
38 #include "tm_p.h"
39 #include "stringpool.h"
40 #include "attribs.h"
41 #include "expmed.h"
42 #include "optabs.h"
43 #include "regs.h"
44 #include "emit-rtl.h"
45 #include "recog.h"
46 #include "cgraph.h"
47 #include "diagnostic-core.h"
48 #include "diagnostic.h"
49 #include "alias.h"
50 #include "fold-const.h"
51 #include "print-tree.h"
52 #include "stor-layout.h"
53 #include "varasm.h"
54 #include "calls.h"
55 #include "conditions.h"
56 #include "output.h"
57 #include "insn-attr.h"
58 #include "flags.h"
59 #include "except.h"
60 #include "dojump.h"
61 #include "explow.h"
62 #include "stmt.h"
63 #include "expr.h"
64 #include "reload.h"
65 #include "cfgrtl.h"
66 #include "cfganal.h"
67 #include "lcm.h"
68 #include "cfgbuild.h"
69 #include "cfgcleanup.h"
70 #include "debug.h"
71 #include "langhooks.h"
72 #include "internal-fn.h"
73 #include "gimple-fold.h"
74 #include "tree-eh.h"
75 #include "gimplify.h"
76 #include "params.h"
77 #include "opts.h"
78 #include "tree-pass.h"
79 #include "context.h"
80 #include "builtins.h"
81 #include "rtl-iter.h"
82 #include "intl.h"
83 #include "tm-constrs.h"
84 #include "tree-vrp.h"
85 #include "symbol-summary.h"
86 #include "ipa-prop.h"
87 #include "ipa-fnsummary.h"
88 #include "sched-int.h"
89
90 /* This file should be included last. */
91 #include "target-def.h"
92
93 static bool s390_hard_regno_mode_ok (unsigned int, machine_mode);
94
95 /* Remember the last target of s390_set_current_function. */
96 static GTY(()) tree s390_previous_fndecl;
97
98 /* Define the specific costs for a given cpu. */
99
100 struct processor_costs
101 {
102 /* multiplication */
103 const int m; /* cost of an M instruction. */
104 const int mghi; /* cost of an MGHI instruction. */
105 const int mh; /* cost of an MH instruction. */
106 const int mhi; /* cost of an MHI instruction. */
107 const int ml; /* cost of an ML instruction. */
108 const int mr; /* cost of an MR instruction. */
109 const int ms; /* cost of an MS instruction. */
110 const int msg; /* cost of an MSG instruction. */
111 const int msgf; /* cost of an MSGF instruction. */
112 const int msgfr; /* cost of an MSGFR instruction. */
113 const int msgr; /* cost of an MSGR instruction. */
114 const int msr; /* cost of an MSR instruction. */
115 const int mult_df; /* cost of multiplication in DFmode. */
116 const int mxbr;
117 /* square root */
118 const int sqxbr; /* cost of square root in TFmode. */
119 const int sqdbr; /* cost of square root in DFmode. */
120 const int sqebr; /* cost of square root in SFmode. */
121 /* multiply and add */
122 const int madbr; /* cost of multiply and add in DFmode. */
123 const int maebr; /* cost of multiply and add in SFmode. */
124 /* division */
125 const int dxbr;
126 const int ddbr;
127 const int debr;
128 const int dlgr;
129 const int dlr;
130 const int dr;
131 const int dsgfr;
132 const int dsgr;
133 };
134
135 #define s390_cost ((const struct processor_costs *)(s390_cost_pointer))
136
137 static const
138 struct processor_costs z900_cost =
139 {
140 COSTS_N_INSNS (5), /* M */
141 COSTS_N_INSNS (10), /* MGHI */
142 COSTS_N_INSNS (5), /* MH */
143 COSTS_N_INSNS (4), /* MHI */
144 COSTS_N_INSNS (5), /* ML */
145 COSTS_N_INSNS (5), /* MR */
146 COSTS_N_INSNS (4), /* MS */
147 COSTS_N_INSNS (15), /* MSG */
148 COSTS_N_INSNS (7), /* MSGF */
149 COSTS_N_INSNS (7), /* MSGFR */
150 COSTS_N_INSNS (10), /* MSGR */
151 COSTS_N_INSNS (4), /* MSR */
152 COSTS_N_INSNS (7), /* multiplication in DFmode */
153 COSTS_N_INSNS (13), /* MXBR */
154 COSTS_N_INSNS (136), /* SQXBR */
155 COSTS_N_INSNS (44), /* SQDBR */
156 COSTS_N_INSNS (35), /* SQEBR */
157 COSTS_N_INSNS (18), /* MADBR */
158 COSTS_N_INSNS (13), /* MAEBR */
159 COSTS_N_INSNS (134), /* DXBR */
160 COSTS_N_INSNS (30), /* DDBR */
161 COSTS_N_INSNS (27), /* DEBR */
162 COSTS_N_INSNS (220), /* DLGR */
163 COSTS_N_INSNS (34), /* DLR */
164 COSTS_N_INSNS (34), /* DR */
165 COSTS_N_INSNS (32), /* DSGFR */
166 COSTS_N_INSNS (32), /* DSGR */
167 };
168
169 static const
170 struct processor_costs z990_cost =
171 {
172 COSTS_N_INSNS (4), /* M */
173 COSTS_N_INSNS (2), /* MGHI */
174 COSTS_N_INSNS (2), /* MH */
175 COSTS_N_INSNS (2), /* MHI */
176 COSTS_N_INSNS (4), /* ML */
177 COSTS_N_INSNS (4), /* MR */
178 COSTS_N_INSNS (5), /* MS */
179 COSTS_N_INSNS (6), /* MSG */
180 COSTS_N_INSNS (4), /* MSGF */
181 COSTS_N_INSNS (4), /* MSGFR */
182 COSTS_N_INSNS (4), /* MSGR */
183 COSTS_N_INSNS (4), /* MSR */
184 COSTS_N_INSNS (1), /* multiplication in DFmode */
185 COSTS_N_INSNS (28), /* MXBR */
186 COSTS_N_INSNS (130), /* SQXBR */
187 COSTS_N_INSNS (66), /* SQDBR */
188 COSTS_N_INSNS (38), /* SQEBR */
189 COSTS_N_INSNS (1), /* MADBR */
190 COSTS_N_INSNS (1), /* MAEBR */
191 COSTS_N_INSNS (60), /* DXBR */
192 COSTS_N_INSNS (40), /* DDBR */
193 COSTS_N_INSNS (26), /* DEBR */
194 COSTS_N_INSNS (176), /* DLGR */
195 COSTS_N_INSNS (31), /* DLR */
196 COSTS_N_INSNS (31), /* DR */
197 COSTS_N_INSNS (31), /* DSGFR */
198 COSTS_N_INSNS (31), /* DSGR */
199 };
200
201 static const
202 struct processor_costs z9_109_cost =
203 {
204 COSTS_N_INSNS (4), /* M */
205 COSTS_N_INSNS (2), /* MGHI */
206 COSTS_N_INSNS (2), /* MH */
207 COSTS_N_INSNS (2), /* MHI */
208 COSTS_N_INSNS (4), /* ML */
209 COSTS_N_INSNS (4), /* MR */
210 COSTS_N_INSNS (5), /* MS */
211 COSTS_N_INSNS (6), /* MSG */
212 COSTS_N_INSNS (4), /* MSGF */
213 COSTS_N_INSNS (4), /* MSGFR */
214 COSTS_N_INSNS (4), /* MSGR */
215 COSTS_N_INSNS (4), /* MSR */
216 COSTS_N_INSNS (1), /* multiplication in DFmode */
217 COSTS_N_INSNS (28), /* MXBR */
218 COSTS_N_INSNS (130), /* SQXBR */
219 COSTS_N_INSNS (66), /* SQDBR */
220 COSTS_N_INSNS (38), /* SQEBR */
221 COSTS_N_INSNS (1), /* MADBR */
222 COSTS_N_INSNS (1), /* MAEBR */
223 COSTS_N_INSNS (60), /* DXBR */
224 COSTS_N_INSNS (40), /* DDBR */
225 COSTS_N_INSNS (26), /* DEBR */
226 COSTS_N_INSNS (30), /* DLGR */
227 COSTS_N_INSNS (23), /* DLR */
228 COSTS_N_INSNS (23), /* DR */
229 COSTS_N_INSNS (24), /* DSGFR */
230 COSTS_N_INSNS (24), /* DSGR */
231 };
232
233 static const
234 struct processor_costs z10_cost =
235 {
236 COSTS_N_INSNS (10), /* M */
237 COSTS_N_INSNS (10), /* MGHI */
238 COSTS_N_INSNS (10), /* MH */
239 COSTS_N_INSNS (10), /* MHI */
240 COSTS_N_INSNS (10), /* ML */
241 COSTS_N_INSNS (10), /* MR */
242 COSTS_N_INSNS (10), /* MS */
243 COSTS_N_INSNS (10), /* MSG */
244 COSTS_N_INSNS (10), /* MSGF */
245 COSTS_N_INSNS (10), /* MSGFR */
246 COSTS_N_INSNS (10), /* MSGR */
247 COSTS_N_INSNS (10), /* MSR */
248 COSTS_N_INSNS (1) , /* multiplication in DFmode */
249 COSTS_N_INSNS (50), /* MXBR */
250 COSTS_N_INSNS (120), /* SQXBR */
251 COSTS_N_INSNS (52), /* SQDBR */
252 COSTS_N_INSNS (38), /* SQEBR */
253 COSTS_N_INSNS (1), /* MADBR */
254 COSTS_N_INSNS (1), /* MAEBR */
255 COSTS_N_INSNS (111), /* DXBR */
256 COSTS_N_INSNS (39), /* DDBR */
257 COSTS_N_INSNS (32), /* DEBR */
258 COSTS_N_INSNS (160), /* DLGR */
259 COSTS_N_INSNS (71), /* DLR */
260 COSTS_N_INSNS (71), /* DR */
261 COSTS_N_INSNS (71), /* DSGFR */
262 COSTS_N_INSNS (71), /* DSGR */
263 };
264
265 static const
266 struct processor_costs z196_cost =
267 {
268 COSTS_N_INSNS (7), /* M */
269 COSTS_N_INSNS (5), /* MGHI */
270 COSTS_N_INSNS (5), /* MH */
271 COSTS_N_INSNS (5), /* MHI */
272 COSTS_N_INSNS (7), /* ML */
273 COSTS_N_INSNS (7), /* MR */
274 COSTS_N_INSNS (6), /* MS */
275 COSTS_N_INSNS (8), /* MSG */
276 COSTS_N_INSNS (6), /* MSGF */
277 COSTS_N_INSNS (6), /* MSGFR */
278 COSTS_N_INSNS (8), /* MSGR */
279 COSTS_N_INSNS (6), /* MSR */
280 COSTS_N_INSNS (1) , /* multiplication in DFmode */
281 COSTS_N_INSNS (40), /* MXBR B+40 */
282 COSTS_N_INSNS (100), /* SQXBR B+100 */
283 COSTS_N_INSNS (42), /* SQDBR B+42 */
284 COSTS_N_INSNS (28), /* SQEBR B+28 */
285 COSTS_N_INSNS (1), /* MADBR B */
286 COSTS_N_INSNS (1), /* MAEBR B */
287 COSTS_N_INSNS (101), /* DXBR B+101 */
288 COSTS_N_INSNS (29), /* DDBR */
289 COSTS_N_INSNS (22), /* DEBR */
290 COSTS_N_INSNS (160), /* DLGR cracked */
291 COSTS_N_INSNS (160), /* DLR cracked */
292 COSTS_N_INSNS (160), /* DR expanded */
293 COSTS_N_INSNS (160), /* DSGFR cracked */
294 COSTS_N_INSNS (160), /* DSGR cracked */
295 };
296
297 static const
298 struct processor_costs zEC12_cost =
299 {
300 COSTS_N_INSNS (7), /* M */
301 COSTS_N_INSNS (5), /* MGHI */
302 COSTS_N_INSNS (5), /* MH */
303 COSTS_N_INSNS (5), /* MHI */
304 COSTS_N_INSNS (7), /* ML */
305 COSTS_N_INSNS (7), /* MR */
306 COSTS_N_INSNS (6), /* MS */
307 COSTS_N_INSNS (8), /* MSG */
308 COSTS_N_INSNS (6), /* MSGF */
309 COSTS_N_INSNS (6), /* MSGFR */
310 COSTS_N_INSNS (8), /* MSGR */
311 COSTS_N_INSNS (6), /* MSR */
312 COSTS_N_INSNS (1) , /* multiplication in DFmode */
313 COSTS_N_INSNS (40), /* MXBR B+40 */
314 COSTS_N_INSNS (100), /* SQXBR B+100 */
315 COSTS_N_INSNS (42), /* SQDBR B+42 */
316 COSTS_N_INSNS (28), /* SQEBR B+28 */
317 COSTS_N_INSNS (1), /* MADBR B */
318 COSTS_N_INSNS (1), /* MAEBR B */
319 COSTS_N_INSNS (131), /* DXBR B+131 */
320 COSTS_N_INSNS (29), /* DDBR */
321 COSTS_N_INSNS (22), /* DEBR */
322 COSTS_N_INSNS (160), /* DLGR cracked */
323 COSTS_N_INSNS (160), /* DLR cracked */
324 COSTS_N_INSNS (160), /* DR expanded */
325 COSTS_N_INSNS (160), /* DSGFR cracked */
326 COSTS_N_INSNS (160), /* DSGR cracked */
327 };
328
329 static struct
330 {
331 /* The preferred name to be used in user visible output. */
332 const char *const name;
333 /* CPU name as it should be passed to Binutils via .machine */
334 const char *const binutils_name;
335 const enum processor_type processor;
336 const struct processor_costs *cost;
337 }
338 const processor_table[] =
339 {
340 { "g5", "g5", PROCESSOR_9672_G5, &z900_cost },
341 { "g6", "g6", PROCESSOR_9672_G6, &z900_cost },
342 { "z900", "z900", PROCESSOR_2064_Z900, &z900_cost },
343 { "z990", "z990", PROCESSOR_2084_Z990, &z990_cost },
344 { "z9-109", "z9-109", PROCESSOR_2094_Z9_109, &z9_109_cost },
345 { "z9-ec", "z9-ec", PROCESSOR_2094_Z9_EC, &z9_109_cost },
346 { "z10", "z10", PROCESSOR_2097_Z10, &z10_cost },
347 { "z196", "z196", PROCESSOR_2817_Z196, &z196_cost },
348 { "zEC12", "zEC12", PROCESSOR_2827_ZEC12, &zEC12_cost },
349 { "z13", "z13", PROCESSOR_2964_Z13, &zEC12_cost },
350 { "z14", "arch12", PROCESSOR_3906_Z14, &zEC12_cost },
351 { "native", "", PROCESSOR_NATIVE, NULL }
352 };
353
354 extern int reload_completed;
355
356 /* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */
357 static rtx_insn *last_scheduled_insn;
358 #define MAX_SCHED_UNITS 3
359 static int last_scheduled_unit_distance[MAX_SCHED_UNITS];
360
361 #define NUM_SIDES 2
362 static int current_side = 1;
363 #define LONGRUNNING_THRESHOLD 5
364
365 /* Estimate of number of cycles a long-running insn occupies an
366 execution unit. */
367 static unsigned fxu_longrunning[NUM_SIDES];
368 static unsigned vfu_longrunning[NUM_SIDES];
369
370 /* Factor to scale latencies by, determined by measurements. */
371 #define LATENCY_FACTOR 4
372
373 /* The maximum score added for an instruction whose unit hasn't been
374 in use for MAX_SCHED_MIX_DISTANCE steps. Increase this value to
375 give instruction mix scheduling more priority over instruction
376 grouping. */
377 #define MAX_SCHED_MIX_SCORE 8
378
379 /* The maximum distance up to which individual scores will be
380 calculated. Everything beyond this gives MAX_SCHED_MIX_SCORE.
381 Increase this with the OOO windows size of the machine. */
382 #define MAX_SCHED_MIX_DISTANCE 100
383
384 /* Structure used to hold the components of a S/390 memory
385 address. A legitimate address on S/390 is of the general
386 form
387 base + index + displacement
388 where any of the components is optional.
389
390 base and index are registers of the class ADDR_REGS,
391 displacement is an unsigned 12-bit immediate constant. */
392
393 struct s390_address
394 {
395 rtx base;
396 rtx indx;
397 rtx disp;
398 bool pointer;
399 bool literal_pool;
400 };
401
402 /* Few accessor macros for struct cfun->machine->s390_frame_layout. */
403
404 #define cfun_frame_layout (cfun->machine->frame_layout)
405 #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
406 #define cfun_save_arg_fprs_p (!!(TARGET_64BIT \
407 ? cfun_frame_layout.fpr_bitmap & 0x0f \
408 : cfun_frame_layout.fpr_bitmap & 0x03))
409 #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
410 cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
411 #define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |= \
412 (1 << (REGNO - FPR0_REGNUM)))
413 #define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap & \
414 (1 << (REGNO - FPR0_REGNUM))))
415 #define cfun_gpr_save_slot(REGNO) \
416 cfun->machine->frame_layout.gpr_save_slots[REGNO]
417
418 /* Number of GPRs and FPRs used for argument passing. */
419 #define GP_ARG_NUM_REG 5
420 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
421 #define VEC_ARG_NUM_REG 8
422
423 /* A couple of shortcuts. */
424 #define CONST_OK_FOR_J(x) \
425 CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
426 #define CONST_OK_FOR_K(x) \
427 CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
428 #define CONST_OK_FOR_Os(x) \
429 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
430 #define CONST_OK_FOR_Op(x) \
431 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
432 #define CONST_OK_FOR_On(x) \
433 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
434
435 #define REGNO_PAIR_OK(REGNO, MODE) \
436 (s390_hard_regno_nregs ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
437
438 /* That's the read ahead of the dynamic branch prediction unit in
439 bytes on a z10 (or higher) CPU. */
440 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
441
442 /* Masks per jump target register indicating which thunk need to be
443 generated. */
444 static GTY(()) int indirect_branch_prez10thunk_mask = 0;
445 static GTY(()) int indirect_branch_z10thunk_mask = 0;
446
447 #define INDIRECT_BRANCH_NUM_OPTIONS 4
448
449 enum s390_indirect_branch_option
450 {
451 s390_opt_indirect_branch_jump = 0,
452 s390_opt_indirect_branch_call,
453 s390_opt_function_return_reg,
454 s390_opt_function_return_mem
455 };
456
457 static GTY(()) int indirect_branch_table_label_no[INDIRECT_BRANCH_NUM_OPTIONS] = { 0 };
458 const char *indirect_branch_table_label[INDIRECT_BRANCH_NUM_OPTIONS] = \
459 { "LJUMP", "LCALL", "LRETREG", "LRETMEM" };
460 const char *indirect_branch_table_name[INDIRECT_BRANCH_NUM_OPTIONS] = \
461 { ".s390_indirect_jump", ".s390_indirect_call",
462 ".s390_return_reg", ".s390_return_mem" };
463
464 bool
s390_return_addr_from_memory()465 s390_return_addr_from_memory ()
466 {
467 return cfun_gpr_save_slot(RETURN_REGNUM) == SAVE_SLOT_STACK;
468 }
469
470 /* Indicate which ABI has been used for passing vector args.
471 0 - no vector type arguments have been passed where the ABI is relevant
472 1 - the old ABI has been used
473 2 - a vector type argument has been passed either in a vector register
474 or on the stack by value */
475 static int s390_vector_abi = 0;
476
477 /* Set the vector ABI marker if TYPE is subject to the vector ABI
478 switch. The vector ABI affects only vector data types. There are
479 two aspects of the vector ABI relevant here:
480
481 1. vectors >= 16 bytes have an alignment of 8 bytes with the new
482 ABI and natural alignment with the old.
483
484 2. vector <= 16 bytes are passed in VRs or by value on the stack
485 with the new ABI but by reference on the stack with the old.
486
487 If ARG_P is true TYPE is used for a function argument or return
488 value. The ABI marker then is set for all vector data types. If
489 ARG_P is false only type 1 vectors are being checked. */
490
491 static void
s390_check_type_for_vector_abi(const_tree type,bool arg_p,bool in_struct_p)492 s390_check_type_for_vector_abi (const_tree type, bool arg_p, bool in_struct_p)
493 {
494 static hash_set<const_tree> visited_types_hash;
495
496 if (s390_vector_abi)
497 return;
498
499 if (type == NULL_TREE || TREE_CODE (type) == ERROR_MARK)
500 return;
501
502 if (visited_types_hash.contains (type))
503 return;
504
505 visited_types_hash.add (type);
506
507 if (VECTOR_TYPE_P (type))
508 {
509 int type_size = int_size_in_bytes (type);
510
511 /* Outside arguments only the alignment is changing and this
512 only happens for vector types >= 16 bytes. */
513 if (!arg_p && type_size < 16)
514 return;
515
516 /* In arguments vector types > 16 are passed as before (GCC
517 never enforced the bigger alignment for arguments which was
518 required by the old vector ABI). However, it might still be
519 ABI relevant due to the changed alignment if it is a struct
520 member. */
521 if (arg_p && type_size > 16 && !in_struct_p)
522 return;
523
524 s390_vector_abi = TARGET_VX_ABI ? 2 : 1;
525 }
526 else if (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE)
527 {
528 /* ARRAY_TYPE: Since with neither of the ABIs we have more than
529 natural alignment there will never be ABI dependent padding
530 in an array type. That's why we do not set in_struct_p to
531 true here. */
532 s390_check_type_for_vector_abi (TREE_TYPE (type), arg_p, in_struct_p);
533 }
534 else if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
535 {
536 tree arg_chain;
537
538 /* Check the return type. */
539 s390_check_type_for_vector_abi (TREE_TYPE (type), true, false);
540
541 for (arg_chain = TYPE_ARG_TYPES (type);
542 arg_chain;
543 arg_chain = TREE_CHAIN (arg_chain))
544 s390_check_type_for_vector_abi (TREE_VALUE (arg_chain), true, false);
545 }
546 else if (RECORD_OR_UNION_TYPE_P (type))
547 {
548 tree field;
549
550 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
551 {
552 if (TREE_CODE (field) != FIELD_DECL)
553 continue;
554
555 s390_check_type_for_vector_abi (TREE_TYPE (field), arg_p, true);
556 }
557 }
558 }
559
560
561 /* System z builtins. */
562
563 #include "s390-builtins.h"
564
565 const unsigned int bflags_builtin[S390_BUILTIN_MAX + 1] =
566 {
567 #undef B_DEF
568 #undef OB_DEF
569 #undef OB_DEF_VAR
570 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, ...) BFLAGS,
571 #define OB_DEF(...)
572 #define OB_DEF_VAR(...)
573 #include "s390-builtins.def"
574 0
575 };
576
577 const unsigned int opflags_builtin[S390_BUILTIN_MAX + 1] =
578 {
579 #undef B_DEF
580 #undef OB_DEF
581 #undef OB_DEF_VAR
582 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, ...) OPFLAGS,
583 #define OB_DEF(...)
584 #define OB_DEF_VAR(...)
585 #include "s390-builtins.def"
586 0
587 };
588
589 const unsigned int bflags_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1] =
590 {
591 #undef B_DEF
592 #undef OB_DEF
593 #undef OB_DEF_VAR
594 #define B_DEF(...)
595 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, ...) BFLAGS,
596 #define OB_DEF_VAR(...)
597 #include "s390-builtins.def"
598 0
599 };
600
601 const unsigned int
602 bflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
603 {
604 #undef B_DEF
605 #undef OB_DEF
606 #undef OB_DEF_VAR
607 #define B_DEF(...)
608 #define OB_DEF(...)
609 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) FLAGS,
610 #include "s390-builtins.def"
611 0
612 };
613
614 const unsigned int
615 opflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
616 {
617 #undef B_DEF
618 #undef OB_DEF
619 #undef OB_DEF_VAR
620 #define B_DEF(...)
621 #define OB_DEF(...)
622 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) OPFLAGS,
623 #include "s390-builtins.def"
624 0
625 };
626
627 tree s390_builtin_types[BT_MAX];
628 tree s390_builtin_fn_types[BT_FN_MAX];
629 tree s390_builtin_decls[S390_BUILTIN_MAX +
630 S390_OVERLOADED_BUILTIN_MAX +
631 S390_OVERLOADED_BUILTIN_VAR_MAX];
632
633 static enum insn_code const code_for_builtin[S390_BUILTIN_MAX + 1] = {
634 #undef B_DEF
635 #undef OB_DEF
636 #undef OB_DEF_VAR
637 #define B_DEF(NAME, PATTERN, ...) CODE_FOR_##PATTERN,
638 #define OB_DEF(...)
639 #define OB_DEF_VAR(...)
640
641 #include "s390-builtins.def"
642 CODE_FOR_nothing
643 };
644
645 static void
s390_init_builtins(void)646 s390_init_builtins (void)
647 {
648 /* These definitions are being used in s390-builtins.def. */
649 tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"),
650 NULL, NULL);
651 tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL);
652 tree c_uint64_type_node;
653
654 /* The uint64_type_node from tree.c is not compatible to the C99
655 uint64_t data type. What we want is c_uint64_type_node from
656 c-common.c. But since backend code is not supposed to interface
657 with the frontend we recreate it here. */
658 if (TARGET_64BIT)
659 c_uint64_type_node = long_unsigned_type_node;
660 else
661 c_uint64_type_node = long_long_unsigned_type_node;
662
663 #undef DEF_TYPE
664 #define DEF_TYPE(INDEX, NODE, CONST_P) \
665 if (s390_builtin_types[INDEX] == NULL) \
666 s390_builtin_types[INDEX] = (!CONST_P) ? \
667 (NODE) : build_type_variant ((NODE), 1, 0);
668
669 #undef DEF_POINTER_TYPE
670 #define DEF_POINTER_TYPE(INDEX, INDEX_BASE) \
671 if (s390_builtin_types[INDEX] == NULL) \
672 s390_builtin_types[INDEX] = \
673 build_pointer_type (s390_builtin_types[INDEX_BASE]);
674
675 #undef DEF_DISTINCT_TYPE
676 #define DEF_DISTINCT_TYPE(INDEX, INDEX_BASE) \
677 if (s390_builtin_types[INDEX] == NULL) \
678 s390_builtin_types[INDEX] = \
679 build_distinct_type_copy (s390_builtin_types[INDEX_BASE]);
680
681 #undef DEF_VECTOR_TYPE
682 #define DEF_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \
683 if (s390_builtin_types[INDEX] == NULL) \
684 s390_builtin_types[INDEX] = \
685 build_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
686
687 #undef DEF_OPAQUE_VECTOR_TYPE
688 #define DEF_OPAQUE_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \
689 if (s390_builtin_types[INDEX] == NULL) \
690 s390_builtin_types[INDEX] = \
691 build_opaque_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
692
693 #undef DEF_FN_TYPE
694 #define DEF_FN_TYPE(INDEX, args...) \
695 if (s390_builtin_fn_types[INDEX] == NULL) \
696 s390_builtin_fn_types[INDEX] = \
697 build_function_type_list (args, NULL_TREE);
698 #undef DEF_OV_TYPE
699 #define DEF_OV_TYPE(...)
700 #include "s390-builtin-types.def"
701
702 #undef B_DEF
703 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, FNTYPE) \
704 if (s390_builtin_decls[S390_BUILTIN_##NAME] == NULL) \
705 s390_builtin_decls[S390_BUILTIN_##NAME] = \
706 add_builtin_function ("__builtin_" #NAME, \
707 s390_builtin_fn_types[FNTYPE], \
708 S390_BUILTIN_##NAME, \
709 BUILT_IN_MD, \
710 NULL, \
711 ATTRS);
712 #undef OB_DEF
713 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, FNTYPE) \
714 if (s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] \
715 == NULL) \
716 s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \
717 add_builtin_function ("__builtin_" #NAME, \
718 s390_builtin_fn_types[FNTYPE], \
719 S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX, \
720 BUILT_IN_MD, \
721 NULL, \
722 0);
723 #undef OB_DEF_VAR
724 #define OB_DEF_VAR(...)
725 #include "s390-builtins.def"
726
727 }
728
729 /* Return true if ARG is appropriate as argument number ARGNUM of
730 builtin DECL. The operand flags from s390-builtins.def have to
731 passed as OP_FLAGS. */
732 bool
s390_const_operand_ok(tree arg,int argnum,int op_flags,tree decl)733 s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl)
734 {
735 if (O_UIMM_P (op_flags))
736 {
737 int bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32 };
738 int bitwidth = bitwidths[op_flags - O_U1];
739
740 if (!tree_fits_uhwi_p (arg)
741 || tree_to_uhwi (arg) > (HOST_WIDE_INT_1U << bitwidth) - 1)
742 {
743 error("constant argument %d for builtin %qF is out of range (0.."
744 HOST_WIDE_INT_PRINT_UNSIGNED ")",
745 argnum, decl,
746 (HOST_WIDE_INT_1U << bitwidth) - 1);
747 return false;
748 }
749 }
750
751 if (O_SIMM_P (op_flags))
752 {
753 int bitwidths[] = { 2, 3, 4, 5, 8, 12, 16, 32 };
754 int bitwidth = bitwidths[op_flags - O_S2];
755
756 if (!tree_fits_shwi_p (arg)
757 || tree_to_shwi (arg) < -(HOST_WIDE_INT_1 << (bitwidth - 1))
758 || tree_to_shwi (arg) > ((HOST_WIDE_INT_1 << (bitwidth - 1)) - 1))
759 {
760 error("constant argument %d for builtin %qF is out of range ("
761 HOST_WIDE_INT_PRINT_DEC ".."
762 HOST_WIDE_INT_PRINT_DEC ")",
763 argnum, decl,
764 -(HOST_WIDE_INT_1 << (bitwidth - 1)),
765 (HOST_WIDE_INT_1 << (bitwidth - 1)) - 1);
766 return false;
767 }
768 }
769 return true;
770 }
771
772 /* Expand an expression EXP that calls a built-in function,
773 with result going to TARGET if that's convenient
774 (and in mode MODE if that's convenient).
775 SUBTARGET may be used as the target for computing one of EXP's operands.
776 IGNORE is nonzero if the value is to be ignored. */
777
778 static rtx
s390_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,int ignore ATTRIBUTE_UNUSED)779 s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
780 machine_mode mode ATTRIBUTE_UNUSED,
781 int ignore ATTRIBUTE_UNUSED)
782 {
783 #define MAX_ARGS 6
784
785 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
786 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
787 enum insn_code icode;
788 rtx op[MAX_ARGS], pat;
789 int arity;
790 bool nonvoid;
791 tree arg;
792 call_expr_arg_iterator iter;
793 unsigned int all_op_flags = opflags_for_builtin (fcode);
794 machine_mode last_vec_mode = VOIDmode;
795
796 if (TARGET_DEBUG_ARG)
797 {
798 fprintf (stderr,
799 "s390_expand_builtin, code = %4d, %s, bflags = 0x%x\n",
800 (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl)),
801 bflags_for_builtin (fcode));
802 }
803
804 if (S390_USE_TARGET_ATTRIBUTE)
805 {
806 unsigned int bflags;
807
808 bflags = bflags_for_builtin (fcode);
809 if ((bflags & B_HTM) && !TARGET_HTM)
810 {
811 error ("builtin %qF is not supported without -mhtm "
812 "(default with -march=zEC12 and higher).", fndecl);
813 return const0_rtx;
814 }
815 if (((bflags & B_VX) || (bflags & B_VXE)) && !TARGET_VX)
816 {
817 error ("builtin %qF requires -mvx "
818 "(default with -march=z13 and higher).", fndecl);
819 return const0_rtx;
820 }
821
822 if ((bflags & B_VXE) && !TARGET_VXE)
823 {
824 error ("Builtin %qF requires z14 or higher.", fndecl);
825 return const0_rtx;
826 }
827 }
828 if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET
829 && fcode < S390_ALL_BUILTIN_MAX)
830 {
831 gcc_unreachable ();
832 }
833 else if (fcode < S390_OVERLOADED_BUILTIN_OFFSET)
834 {
835 icode = code_for_builtin[fcode];
836 /* Set a flag in the machine specific cfun part in order to support
837 saving/restoring of FPRs. */
838 if (fcode == S390_BUILTIN_tbegin || fcode == S390_BUILTIN_tbegin_retry)
839 cfun->machine->tbegin_p = true;
840 }
841 else if (fcode < S390_OVERLOADED_BUILTIN_VAR_OFFSET)
842 {
843 error ("unresolved overloaded builtin");
844 return const0_rtx;
845 }
846 else
847 internal_error ("bad builtin fcode");
848
849 if (icode == 0)
850 internal_error ("bad builtin icode");
851
852 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
853
854 if (nonvoid)
855 {
856 machine_mode tmode = insn_data[icode].operand[0].mode;
857 if (!target
858 || GET_MODE (target) != tmode
859 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
860 target = gen_reg_rtx (tmode);
861
862 /* There are builtins (e.g. vec_promote) with no vector
863 arguments but an element selector. So we have to also look
864 at the vector return type when emitting the modulo
865 operation. */
866 if (VECTOR_MODE_P (insn_data[icode].operand[0].mode))
867 last_vec_mode = insn_data[icode].operand[0].mode;
868 }
869
870 arity = 0;
871 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
872 {
873 rtx tmp_rtx;
874 const struct insn_operand_data *insn_op;
875 unsigned int op_flags = all_op_flags & ((1 << O_SHIFT) - 1);
876
877 all_op_flags = all_op_flags >> O_SHIFT;
878
879 if (arg == error_mark_node)
880 return NULL_RTX;
881 if (arity >= MAX_ARGS)
882 return NULL_RTX;
883
884 if (O_IMM_P (op_flags)
885 && TREE_CODE (arg) != INTEGER_CST)
886 {
887 error ("constant value required for builtin %qF argument %d",
888 fndecl, arity + 1);
889 return const0_rtx;
890 }
891
892 if (!s390_const_operand_ok (arg, arity + 1, op_flags, fndecl))
893 return const0_rtx;
894
895 insn_op = &insn_data[icode].operand[arity + nonvoid];
896 op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
897
898 /* expand_expr truncates constants to the target mode only if it
899 is "convenient". However, our checks below rely on this
900 being done. */
901 if (CONST_INT_P (op[arity])
902 && SCALAR_INT_MODE_P (insn_op->mode)
903 && GET_MODE (op[arity]) != insn_op->mode)
904 op[arity] = GEN_INT (trunc_int_for_mode (INTVAL (op[arity]),
905 insn_op->mode));
906
907 /* Wrap the expanded RTX for pointer types into a MEM expr with
908 the proper mode. This allows us to use e.g. (match_operand
909 "memory_operand"..) in the insn patterns instead of (mem
910 (match_operand "address_operand)). This is helpful for
911 patterns not just accepting MEMs. */
912 if (POINTER_TYPE_P (TREE_TYPE (arg))
913 && insn_op->predicate != address_operand)
914 op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]);
915
916 /* Expand the module operation required on element selectors. */
917 if (op_flags == O_ELEM)
918 {
919 gcc_assert (last_vec_mode != VOIDmode);
920 op[arity] = simplify_expand_binop (SImode, code_to_optab (AND),
921 op[arity],
922 GEN_INT (GET_MODE_NUNITS (last_vec_mode) - 1),
923 NULL_RTX, 1, OPTAB_DIRECT);
924 }
925
926 /* Record the vector mode used for an element selector. This assumes:
927 1. There is no builtin with two different vector modes and an element selector
928 2. The element selector comes after the vector type it is referring to.
929 This currently the true for all the builtins but FIXME we
930 should better check for that. */
931 if (VECTOR_MODE_P (insn_op->mode))
932 last_vec_mode = insn_op->mode;
933
934 if (insn_op->predicate (op[arity], insn_op->mode))
935 {
936 arity++;
937 continue;
938 }
939
940 if (MEM_P (op[arity])
941 && insn_op->predicate == memory_operand
942 && (GET_MODE (XEXP (op[arity], 0)) == Pmode
943 || GET_MODE (XEXP (op[arity], 0)) == VOIDmode))
944 {
945 op[arity] = replace_equiv_address (op[arity],
946 copy_to_mode_reg (Pmode,
947 XEXP (op[arity], 0)));
948 }
949 /* Some of the builtins require different modes/types than the
950 pattern in order to implement a specific API. Instead of
951 adding many expanders which do the mode change we do it here.
952 E.g. s390_vec_add_u128 required to have vector unsigned char
953 arguments is mapped to addti3. */
954 else if (insn_op->mode != VOIDmode
955 && GET_MODE (op[arity]) != VOIDmode
956 && GET_MODE (op[arity]) != insn_op->mode
957 && ((tmp_rtx = simplify_gen_subreg (insn_op->mode, op[arity],
958 GET_MODE (op[arity]), 0))
959 != NULL_RTX))
960 {
961 op[arity] = tmp_rtx;
962 }
963 else if (GET_MODE (op[arity]) == insn_op->mode
964 || GET_MODE (op[arity]) == VOIDmode
965 || (insn_op->predicate == address_operand
966 && GET_MODE (op[arity]) == Pmode))
967 {
968 /* An address_operand usually has VOIDmode in the expander
969 so we cannot use this. */
970 machine_mode target_mode =
971 (insn_op->predicate == address_operand
972 ? (machine_mode) Pmode : insn_op->mode);
973 op[arity] = copy_to_mode_reg (target_mode, op[arity]);
974 }
975
976 if (!insn_op->predicate (op[arity], insn_op->mode))
977 {
978 error ("invalid argument %d for builtin %qF", arity + 1, fndecl);
979 return const0_rtx;
980 }
981 arity++;
982 }
983
984 switch (arity)
985 {
986 case 0:
987 pat = GEN_FCN (icode) (target);
988 break;
989 case 1:
990 if (nonvoid)
991 pat = GEN_FCN (icode) (target, op[0]);
992 else
993 pat = GEN_FCN (icode) (op[0]);
994 break;
995 case 2:
996 if (nonvoid)
997 pat = GEN_FCN (icode) (target, op[0], op[1]);
998 else
999 pat = GEN_FCN (icode) (op[0], op[1]);
1000 break;
1001 case 3:
1002 if (nonvoid)
1003 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
1004 else
1005 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
1006 break;
1007 case 4:
1008 if (nonvoid)
1009 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
1010 else
1011 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
1012 break;
1013 case 5:
1014 if (nonvoid)
1015 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
1016 else
1017 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
1018 break;
1019 case 6:
1020 if (nonvoid)
1021 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4], op[5]);
1022 else
1023 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
1024 break;
1025 default:
1026 gcc_unreachable ();
1027 }
1028 if (!pat)
1029 return NULL_RTX;
1030 emit_insn (pat);
1031
1032 if (nonvoid)
1033 return target;
1034 else
1035 return const0_rtx;
1036 }
1037
1038
1039 static const int s390_hotpatch_hw_max = 1000000;
1040 static int s390_hotpatch_hw_before_label = 0;
1041 static int s390_hotpatch_hw_after_label = 0;
1042
1043 /* Check whether the hotpatch attribute is applied to a function and, if it has
1044 an argument, the argument is valid. */
1045
1046 static tree
s390_handle_hotpatch_attribute(tree * node,tree name,tree args,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)1047 s390_handle_hotpatch_attribute (tree *node, tree name, tree args,
1048 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1049 {
1050 tree expr;
1051 tree expr2;
1052 int err;
1053
1054 if (TREE_CODE (*node) != FUNCTION_DECL)
1055 {
1056 warning (OPT_Wattributes, "%qE attribute only applies to functions",
1057 name);
1058 *no_add_attrs = true;
1059 }
1060 if (args != NULL && TREE_CHAIN (args) != NULL)
1061 {
1062 expr = TREE_VALUE (args);
1063 expr2 = TREE_VALUE (TREE_CHAIN (args));
1064 }
1065 if (args == NULL || TREE_CHAIN (args) == NULL)
1066 err = 1;
1067 else if (TREE_CODE (expr) != INTEGER_CST
1068 || !INTEGRAL_TYPE_P (TREE_TYPE (expr))
1069 || wi::gtu_p (wi::to_wide (expr), s390_hotpatch_hw_max))
1070 err = 1;
1071 else if (TREE_CODE (expr2) != INTEGER_CST
1072 || !INTEGRAL_TYPE_P (TREE_TYPE (expr2))
1073 || wi::gtu_p (wi::to_wide (expr2), s390_hotpatch_hw_max))
1074 err = 1;
1075 else
1076 err = 0;
1077 if (err)
1078 {
1079 error ("requested %qE attribute is not a comma separated pair of"
1080 " non-negative integer constants or too large (max. %d)", name,
1081 s390_hotpatch_hw_max);
1082 *no_add_attrs = true;
1083 }
1084
1085 return NULL_TREE;
1086 }
1087
1088 /* Expand the s390_vector_bool type attribute. */
1089
1090 static tree
s390_handle_vectorbool_attribute(tree * node,tree name ATTRIBUTE_UNUSED,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)1091 s390_handle_vectorbool_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
1092 tree args ATTRIBUTE_UNUSED,
1093 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1094 {
1095 tree type = *node, result = NULL_TREE;
1096 machine_mode mode;
1097
1098 while (POINTER_TYPE_P (type)
1099 || TREE_CODE (type) == FUNCTION_TYPE
1100 || TREE_CODE (type) == METHOD_TYPE
1101 || TREE_CODE (type) == ARRAY_TYPE)
1102 type = TREE_TYPE (type);
1103
1104 mode = TYPE_MODE (type);
1105 switch (mode)
1106 {
1107 case E_DImode: case E_V2DImode:
1108 result = s390_builtin_types[BT_BV2DI];
1109 break;
1110 case E_SImode: case E_V4SImode:
1111 result = s390_builtin_types[BT_BV4SI];
1112 break;
1113 case E_HImode: case E_V8HImode:
1114 result = s390_builtin_types[BT_BV8HI];
1115 break;
1116 case E_QImode: case E_V16QImode:
1117 result = s390_builtin_types[BT_BV16QI];
1118 break;
1119 default:
1120 break;
1121 }
1122
1123 *no_add_attrs = true; /* No need to hang on to the attribute. */
1124
1125 if (result)
1126 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
1127
1128 return NULL_TREE;
1129 }
1130
1131 /* Check syntax of function decl attributes having a string type value. */
1132
1133 static tree
s390_handle_string_attribute(tree * node,tree name ATTRIBUTE_UNUSED,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)1134 s390_handle_string_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
1135 tree args ATTRIBUTE_UNUSED,
1136 int flags ATTRIBUTE_UNUSED,
1137 bool *no_add_attrs)
1138 {
1139 tree cst;
1140
1141 if (TREE_CODE (*node) != FUNCTION_DECL)
1142 {
1143 warning (OPT_Wattributes, "%qE attribute only applies to functions",
1144 name);
1145 *no_add_attrs = true;
1146 }
1147
1148 cst = TREE_VALUE (args);
1149
1150 if (TREE_CODE (cst) != STRING_CST)
1151 {
1152 warning (OPT_Wattributes,
1153 "%qE attribute requires a string constant argument",
1154 name);
1155 *no_add_attrs = true;
1156 }
1157
1158 if (is_attribute_p ("indirect_branch", name)
1159 || is_attribute_p ("indirect_branch_call", name)
1160 || is_attribute_p ("function_return", name)
1161 || is_attribute_p ("function_return_reg", name)
1162 || is_attribute_p ("function_return_mem", name))
1163 {
1164 if (strcmp (TREE_STRING_POINTER (cst), "keep") != 0
1165 && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0
1166 && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0)
1167 {
1168 warning (OPT_Wattributes,
1169 "argument to %qE attribute is not "
1170 "(keep|thunk|thunk-extern)", name);
1171 *no_add_attrs = true;
1172 }
1173 }
1174
1175 if (is_attribute_p ("indirect_branch_jump", name)
1176 && strcmp (TREE_STRING_POINTER (cst), "keep") != 0
1177 && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0
1178 && strcmp (TREE_STRING_POINTER (cst), "thunk-inline") != 0
1179 && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0)
1180 {
1181 warning (OPT_Wattributes,
1182 "argument to %qE attribute is not "
1183 "(keep|thunk|thunk-inline|thunk-extern)", name);
1184 *no_add_attrs = true;
1185 }
1186
1187 return NULL_TREE;
1188 }
1189
1190 static const struct attribute_spec s390_attribute_table[] = {
1191 { "hotpatch", 2, 2, true, false, false, false,
1192 s390_handle_hotpatch_attribute, NULL },
1193 { "s390_vector_bool", 0, 0, false, true, false, true,
1194 s390_handle_vectorbool_attribute, NULL },
1195 { "indirect_branch", 1, 1, true, false, false, false,
1196 s390_handle_string_attribute, NULL },
1197 { "indirect_branch_jump", 1, 1, true, false, false, false,
1198 s390_handle_string_attribute, NULL },
1199 { "indirect_branch_call", 1, 1, true, false, false, false,
1200 s390_handle_string_attribute, NULL },
1201 { "function_return", 1, 1, true, false, false, false,
1202 s390_handle_string_attribute, NULL },
1203 { "function_return_reg", 1, 1, true, false, false, false,
1204 s390_handle_string_attribute, NULL },
1205 { "function_return_mem", 1, 1, true, false, false, false,
1206 s390_handle_string_attribute, NULL },
1207
1208 /* End element. */
1209 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1210 };
1211
1212 /* Return the alignment for LABEL. We default to the -falign-labels
1213 value except for the literal pool base label. */
1214 int
s390_label_align(rtx_insn * label)1215 s390_label_align (rtx_insn *label)
1216 {
1217 rtx_insn *prev_insn = prev_active_insn (label);
1218 rtx set, src;
1219
1220 if (prev_insn == NULL_RTX)
1221 goto old;
1222
1223 set = single_set (prev_insn);
1224
1225 if (set == NULL_RTX)
1226 goto old;
1227
1228 src = SET_SRC (set);
1229
1230 /* Don't align literal pool base labels. */
1231 if (GET_CODE (src) == UNSPEC
1232 && XINT (src, 1) == UNSPEC_MAIN_BASE)
1233 return 0;
1234
1235 old:
1236 return align_labels_log;
1237 }
1238
1239 static GTY(()) rtx got_symbol;
1240
1241 /* Return the GOT table symbol. The symbol will be created when the
1242 function is invoked for the first time. */
1243
1244 static rtx
s390_got_symbol(void)1245 s390_got_symbol (void)
1246 {
1247 if (!got_symbol)
1248 {
1249 got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
1250 SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL;
1251 }
1252
1253 return got_symbol;
1254 }
1255
1256 static scalar_int_mode
s390_libgcc_cmp_return_mode(void)1257 s390_libgcc_cmp_return_mode (void)
1258 {
1259 return TARGET_64BIT ? DImode : SImode;
1260 }
1261
1262 static scalar_int_mode
s390_libgcc_shift_count_mode(void)1263 s390_libgcc_shift_count_mode (void)
1264 {
1265 return TARGET_64BIT ? DImode : SImode;
1266 }
1267
1268 static scalar_int_mode
s390_unwind_word_mode(void)1269 s390_unwind_word_mode (void)
1270 {
1271 return TARGET_64BIT ? DImode : SImode;
1272 }
1273
1274 /* Return true if the back end supports mode MODE. */
1275 static bool
s390_scalar_mode_supported_p(scalar_mode mode)1276 s390_scalar_mode_supported_p (scalar_mode mode)
1277 {
1278 /* In contrast to the default implementation reject TImode constants on 31bit
1279 TARGET_ZARCH for ABI compliance. */
1280 if (!TARGET_64BIT && TARGET_ZARCH && mode == TImode)
1281 return false;
1282
1283 if (DECIMAL_FLOAT_MODE_P (mode))
1284 return default_decimal_float_supported_p ();
1285
1286 return default_scalar_mode_supported_p (mode);
1287 }
1288
1289 /* Return true if the back end supports vector mode MODE. */
1290 static bool
s390_vector_mode_supported_p(machine_mode mode)1291 s390_vector_mode_supported_p (machine_mode mode)
1292 {
1293 machine_mode inner;
1294
1295 if (!VECTOR_MODE_P (mode)
1296 || !TARGET_VX
1297 || GET_MODE_SIZE (mode) > 16)
1298 return false;
1299
1300 inner = GET_MODE_INNER (mode);
1301
1302 switch (inner)
1303 {
1304 case E_QImode:
1305 case E_HImode:
1306 case E_SImode:
1307 case E_DImode:
1308 case E_TImode:
1309 case E_SFmode:
1310 case E_DFmode:
1311 case E_TFmode:
1312 return true;
1313 default:
1314 return false;
1315 }
1316 }
1317
1318 /* Set the has_landing_pad_p flag in struct machine_function to VALUE. */
1319
1320 void
s390_set_has_landing_pad_p(bool value)1321 s390_set_has_landing_pad_p (bool value)
1322 {
1323 cfun->machine->has_landing_pad_p = value;
1324 }
1325
1326 /* If two condition code modes are compatible, return a condition code
1327 mode which is compatible with both. Otherwise, return
1328 VOIDmode. */
1329
1330 static machine_mode
s390_cc_modes_compatible(machine_mode m1,machine_mode m2)1331 s390_cc_modes_compatible (machine_mode m1, machine_mode m2)
1332 {
1333 if (m1 == m2)
1334 return m1;
1335
1336 switch (m1)
1337 {
1338 case E_CCZmode:
1339 if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode
1340 || m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode)
1341 return m2;
1342 return VOIDmode;
1343
1344 case E_CCSmode:
1345 case E_CCUmode:
1346 case E_CCTmode:
1347 case E_CCSRmode:
1348 case E_CCURmode:
1349 case E_CCZ1mode:
1350 if (m2 == CCZmode)
1351 return m1;
1352
1353 return VOIDmode;
1354
1355 default:
1356 return VOIDmode;
1357 }
1358 return VOIDmode;
1359 }
1360
1361 /* Return true if SET either doesn't set the CC register, or else
1362 the source and destination have matching CC modes and that
1363 CC mode is at least as constrained as REQ_MODE. */
1364
1365 static bool
s390_match_ccmode_set(rtx set,machine_mode req_mode)1366 s390_match_ccmode_set (rtx set, machine_mode req_mode)
1367 {
1368 machine_mode set_mode;
1369
1370 gcc_assert (GET_CODE (set) == SET);
1371
1372 /* These modes are supposed to be used only in CC consumer
1373 patterns. */
1374 gcc_assert (req_mode != CCVIALLmode && req_mode != CCVIANYmode
1375 && req_mode != CCVFALLmode && req_mode != CCVFANYmode);
1376
1377 if (GET_CODE (SET_DEST (set)) != REG || !CC_REGNO_P (REGNO (SET_DEST (set))))
1378 return 1;
1379
1380 set_mode = GET_MODE (SET_DEST (set));
1381 switch (set_mode)
1382 {
1383 case E_CCZ1mode:
1384 case E_CCSmode:
1385 case E_CCSRmode:
1386 case E_CCUmode:
1387 case E_CCURmode:
1388 case E_CCLmode:
1389 case E_CCL1mode:
1390 case E_CCL2mode:
1391 case E_CCL3mode:
1392 case E_CCT1mode:
1393 case E_CCT2mode:
1394 case E_CCT3mode:
1395 case E_CCVEQmode:
1396 case E_CCVIHmode:
1397 case E_CCVIHUmode:
1398 case E_CCVFHmode:
1399 case E_CCVFHEmode:
1400 if (req_mode != set_mode)
1401 return 0;
1402 break;
1403
1404 case E_CCZmode:
1405 if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode
1406 && req_mode != CCSRmode && req_mode != CCURmode
1407 && req_mode != CCZ1mode)
1408 return 0;
1409 break;
1410
1411 case E_CCAPmode:
1412 case E_CCANmode:
1413 if (req_mode != CCAmode)
1414 return 0;
1415 break;
1416
1417 default:
1418 gcc_unreachable ();
1419 }
1420
1421 return (GET_MODE (SET_SRC (set)) == set_mode);
1422 }
1423
1424 /* Return true if every SET in INSN that sets the CC register
1425 has source and destination with matching CC modes and that
1426 CC mode is at least as constrained as REQ_MODE.
1427 If REQ_MODE is VOIDmode, always return false. */
1428
1429 bool
s390_match_ccmode(rtx_insn * insn,machine_mode req_mode)1430 s390_match_ccmode (rtx_insn *insn, machine_mode req_mode)
1431 {
1432 int i;
1433
1434 /* s390_tm_ccmode returns VOIDmode to indicate failure. */
1435 if (req_mode == VOIDmode)
1436 return false;
1437
1438 if (GET_CODE (PATTERN (insn)) == SET)
1439 return s390_match_ccmode_set (PATTERN (insn), req_mode);
1440
1441 if (GET_CODE (PATTERN (insn)) == PARALLEL)
1442 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1443 {
1444 rtx set = XVECEXP (PATTERN (insn), 0, i);
1445 if (GET_CODE (set) == SET)
1446 if (!s390_match_ccmode_set (set, req_mode))
1447 return false;
1448 }
1449
1450 return true;
1451 }
1452
1453 /* If a test-under-mask instruction can be used to implement
1454 (compare (and ... OP1) OP2), return the CC mode required
1455 to do that. Otherwise, return VOIDmode.
1456 MIXED is true if the instruction can distinguish between
1457 CC1 and CC2 for mixed selected bits (TMxx), it is false
1458 if the instruction cannot (TM). */
1459
1460 machine_mode
s390_tm_ccmode(rtx op1,rtx op2,bool mixed)1461 s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
1462 {
1463 int bit0, bit1;
1464
1465 /* ??? Fixme: should work on CONST_WIDE_INT as well. */
1466 if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT)
1467 return VOIDmode;
1468
1469 /* Selected bits all zero: CC0.
1470 e.g.: int a; if ((a & (16 + 128)) == 0) */
1471 if (INTVAL (op2) == 0)
1472 return CCTmode;
1473
1474 /* Selected bits all one: CC3.
1475 e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
1476 if (INTVAL (op2) == INTVAL (op1))
1477 return CCT3mode;
1478
1479 /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
1480 int a;
1481 if ((a & (16 + 128)) == 16) -> CCT1
1482 if ((a & (16 + 128)) == 128) -> CCT2 */
1483 if (mixed)
1484 {
1485 bit1 = exact_log2 (INTVAL (op2));
1486 bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2));
1487 if (bit0 != -1 && bit1 != -1)
1488 return bit0 > bit1 ? CCT1mode : CCT2mode;
1489 }
1490
1491 return VOIDmode;
1492 }
1493
1494 /* Given a comparison code OP (EQ, NE, etc.) and the operands
1495 OP0 and OP1 of a COMPARE, return the mode to be used for the
1496 comparison. */
1497
1498 machine_mode
s390_select_ccmode(enum rtx_code code,rtx op0,rtx op1)1499 s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
1500 {
1501 switch (code)
1502 {
1503 case EQ:
1504 case NE:
1505 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1506 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1507 return CCAPmode;
1508 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1509 && CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))))
1510 return CCAPmode;
1511 if ((GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS
1512 || GET_CODE (op1) == NEG)
1513 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1514 return CCLmode;
1515
1516 if (GET_CODE (op0) == AND)
1517 {
1518 /* Check whether we can potentially do it via TM. */
1519 machine_mode ccmode;
1520 ccmode = s390_tm_ccmode (XEXP (op0, 1), op1, 1);
1521 if (ccmode != VOIDmode)
1522 {
1523 /* Relax CCTmode to CCZmode to allow fall-back to AND
1524 if that turns out to be beneficial. */
1525 return ccmode == CCTmode ? CCZmode : ccmode;
1526 }
1527 }
1528
1529 if (register_operand (op0, HImode)
1530 && GET_CODE (op1) == CONST_INT
1531 && (INTVAL (op1) == -1 || INTVAL (op1) == 65535))
1532 return CCT3mode;
1533 if (register_operand (op0, QImode)
1534 && GET_CODE (op1) == CONST_INT
1535 && (INTVAL (op1) == -1 || INTVAL (op1) == 255))
1536 return CCT3mode;
1537
1538 return CCZmode;
1539
1540 case LE:
1541 case LT:
1542 case GE:
1543 case GT:
1544 /* The only overflow condition of NEG and ABS happens when
1545 -INT_MAX is used as parameter, which stays negative. So
1546 we have an overflow from a positive value to a negative.
1547 Using CCAP mode the resulting cc can be used for comparisons. */
1548 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1549 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1550 return CCAPmode;
1551
1552 /* If constants are involved in an add instruction it is possible to use
1553 the resulting cc for comparisons with zero. Knowing the sign of the
1554 constant the overflow behavior gets predictable. e.g.:
1555 int a, b; if ((b = a + c) > 0)
1556 with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP */
1557 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1558 && (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1)))
1559 || (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os")
1560 /* Avoid INT32_MIN on 32 bit. */
1561 && (!TARGET_ZARCH || INTVAL (XEXP (op0, 1)) != -0x7fffffff - 1))))
1562 {
1563 if (INTVAL (XEXP((op0), 1)) < 0)
1564 return CCANmode;
1565 else
1566 return CCAPmode;
1567 }
1568 /* Fall through. */
1569 case UNORDERED:
1570 case ORDERED:
1571 case UNEQ:
1572 case UNLE:
1573 case UNLT:
1574 case UNGE:
1575 case UNGT:
1576 case LTGT:
1577 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1578 && GET_CODE (op1) != CONST_INT)
1579 return CCSRmode;
1580 return CCSmode;
1581
1582 case LTU:
1583 case GEU:
1584 if (GET_CODE (op0) == PLUS
1585 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1586 return CCL1mode;
1587
1588 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1589 && GET_CODE (op1) != CONST_INT)
1590 return CCURmode;
1591 return CCUmode;
1592
1593 case LEU:
1594 case GTU:
1595 if (GET_CODE (op0) == MINUS
1596 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1597 return CCL2mode;
1598
1599 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1600 && GET_CODE (op1) != CONST_INT)
1601 return CCURmode;
1602 return CCUmode;
1603
1604 default:
1605 gcc_unreachable ();
1606 }
1607 }
1608
1609 /* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
1610 that we can implement more efficiently. */
1611
1612 static void
s390_canonicalize_comparison(int * code,rtx * op0,rtx * op1,bool op0_preserve_value)1613 s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1614 bool op0_preserve_value)
1615 {
1616 if (op0_preserve_value)
1617 return;
1618
1619 /* Convert ZERO_EXTRACT back to AND to enable TM patterns. */
1620 if ((*code == EQ || *code == NE)
1621 && *op1 == const0_rtx
1622 && GET_CODE (*op0) == ZERO_EXTRACT
1623 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1624 && GET_CODE (XEXP (*op0, 2)) == CONST_INT
1625 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1626 {
1627 rtx inner = XEXP (*op0, 0);
1628 HOST_WIDE_INT modesize = GET_MODE_BITSIZE (GET_MODE (inner));
1629 HOST_WIDE_INT len = INTVAL (XEXP (*op0, 1));
1630 HOST_WIDE_INT pos = INTVAL (XEXP (*op0, 2));
1631
1632 if (len > 0 && len < modesize
1633 && pos >= 0 && pos + len <= modesize
1634 && modesize <= HOST_BITS_PER_WIDE_INT)
1635 {
1636 unsigned HOST_WIDE_INT block;
1637 block = (HOST_WIDE_INT_1U << len) - 1;
1638 block <<= modesize - pos - len;
1639
1640 *op0 = gen_rtx_AND (GET_MODE (inner), inner,
1641 gen_int_mode (block, GET_MODE (inner)));
1642 }
1643 }
1644
1645 /* Narrow AND of memory against immediate to enable TM. */
1646 if ((*code == EQ || *code == NE)
1647 && *op1 == const0_rtx
1648 && GET_CODE (*op0) == AND
1649 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1650 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1651 {
1652 rtx inner = XEXP (*op0, 0);
1653 rtx mask = XEXP (*op0, 1);
1654
1655 /* Ignore paradoxical SUBREGs if all extra bits are masked out. */
1656 if (GET_CODE (inner) == SUBREG
1657 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner)))
1658 && (GET_MODE_SIZE (GET_MODE (inner))
1659 >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1660 && ((INTVAL (mask)
1661 & GET_MODE_MASK (GET_MODE (inner))
1662 & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner))))
1663 == 0))
1664 inner = SUBREG_REG (inner);
1665
1666 /* Do not change volatile MEMs. */
1667 if (MEM_P (inner) && !MEM_VOLATILE_P (inner))
1668 {
1669 int part = s390_single_part (XEXP (*op0, 1),
1670 GET_MODE (inner), QImode, 0);
1671 if (part >= 0)
1672 {
1673 mask = gen_int_mode (s390_extract_part (mask, QImode, 0), QImode);
1674 inner = adjust_address_nv (inner, QImode, part);
1675 *op0 = gen_rtx_AND (QImode, inner, mask);
1676 }
1677 }
1678 }
1679
1680 /* Narrow comparisons against 0xffff to HImode if possible. */
1681 if ((*code == EQ || *code == NE)
1682 && GET_CODE (*op1) == CONST_INT
1683 && INTVAL (*op1) == 0xffff
1684 && SCALAR_INT_MODE_P (GET_MODE (*op0))
1685 && (nonzero_bits (*op0, GET_MODE (*op0))
1686 & ~HOST_WIDE_INT_UC (0xffff)) == 0)
1687 {
1688 *op0 = gen_lowpart (HImode, *op0);
1689 *op1 = constm1_rtx;
1690 }
1691
1692 /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible. */
1693 if (GET_CODE (*op0) == UNSPEC
1694 && XINT (*op0, 1) == UNSPEC_STRCMPCC_TO_INT
1695 && XVECLEN (*op0, 0) == 1
1696 && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode
1697 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1698 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1699 && *op1 == const0_rtx)
1700 {
1701 enum rtx_code new_code = UNKNOWN;
1702 switch (*code)
1703 {
1704 case EQ: new_code = EQ; break;
1705 case NE: new_code = NE; break;
1706 case LT: new_code = GTU; break;
1707 case GT: new_code = LTU; break;
1708 case LE: new_code = GEU; break;
1709 case GE: new_code = LEU; break;
1710 default: break;
1711 }
1712
1713 if (new_code != UNKNOWN)
1714 {
1715 *op0 = XVECEXP (*op0, 0, 0);
1716 *code = new_code;
1717 }
1718 }
1719
1720 /* Remove redundant UNSPEC_CC_TO_INT conversions if possible. */
1721 if (GET_CODE (*op0) == UNSPEC
1722 && XINT (*op0, 1) == UNSPEC_CC_TO_INT
1723 && XVECLEN (*op0, 0) == 1
1724 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1725 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1726 && CONST_INT_P (*op1))
1727 {
1728 enum rtx_code new_code = UNKNOWN;
1729 switch (GET_MODE (XVECEXP (*op0, 0, 0)))
1730 {
1731 case E_CCZmode:
1732 case E_CCRAWmode:
1733 switch (*code)
1734 {
1735 case EQ: new_code = EQ; break;
1736 case NE: new_code = NE; break;
1737 default: break;
1738 }
1739 break;
1740 default: break;
1741 }
1742
1743 if (new_code != UNKNOWN)
1744 {
1745 /* For CCRAWmode put the required cc mask into the second
1746 operand. */
1747 if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode
1748 && INTVAL (*op1) >= 0 && INTVAL (*op1) <= 3)
1749 *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1)));
1750 *op0 = XVECEXP (*op0, 0, 0);
1751 *code = new_code;
1752 }
1753 }
1754
1755 /* Simplify cascaded EQ, NE with const0_rtx. */
1756 if ((*code == NE || *code == EQ)
1757 && (GET_CODE (*op0) == EQ || GET_CODE (*op0) == NE)
1758 && GET_MODE (*op0) == SImode
1759 && GET_MODE (XEXP (*op0, 0)) == CCZ1mode
1760 && REG_P (XEXP (*op0, 0))
1761 && XEXP (*op0, 1) == const0_rtx
1762 && *op1 == const0_rtx)
1763 {
1764 if ((*code == EQ && GET_CODE (*op0) == NE)
1765 || (*code == NE && GET_CODE (*op0) == EQ))
1766 *code = EQ;
1767 else
1768 *code = NE;
1769 *op0 = XEXP (*op0, 0);
1770 }
1771
1772 /* Prefer register over memory as first operand. */
1773 if (MEM_P (*op0) && REG_P (*op1))
1774 {
1775 rtx tem = *op0; *op0 = *op1; *op1 = tem;
1776 *code = (int)swap_condition ((enum rtx_code)*code);
1777 }
1778
1779 /* A comparison result is compared against zero. Replace it with
1780 the (perhaps inverted) original comparison.
1781 This probably should be done by simplify_relational_operation. */
1782 if ((*code == EQ || *code == NE)
1783 && *op1 == const0_rtx
1784 && COMPARISON_P (*op0)
1785 && CC_REG_P (XEXP (*op0, 0)))
1786 {
1787 enum rtx_code new_code;
1788
1789 if (*code == EQ)
1790 new_code = reversed_comparison_code_parts (GET_CODE (*op0),
1791 XEXP (*op0, 0),
1792 XEXP (*op1, 0), NULL);
1793 else
1794 new_code = GET_CODE (*op0);
1795
1796 if (new_code != UNKNOWN)
1797 {
1798 *code = new_code;
1799 *op1 = XEXP (*op0, 1);
1800 *op0 = XEXP (*op0, 0);
1801 }
1802 }
1803 }
1804
1805
1806 /* Emit a compare instruction suitable to implement the comparison
1807 OP0 CODE OP1. Return the correct condition RTL to be placed in
1808 the IF_THEN_ELSE of the conditional branch testing the result. */
1809
1810 rtx
s390_emit_compare(enum rtx_code code,rtx op0,rtx op1)1811 s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
1812 {
1813 machine_mode mode = s390_select_ccmode (code, op0, op1);
1814 rtx cc;
1815
1816 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
1817 {
1818 /* Do not output a redundant compare instruction if a
1819 compare_and_swap pattern already computed the result and the
1820 machine modes are compatible. */
1821 gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
1822 == GET_MODE (op0));
1823 cc = op0;
1824 }
1825 else
1826 {
1827 cc = gen_rtx_REG (mode, CC_REGNUM);
1828 emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, op0, op1)));
1829 }
1830
1831 return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
1832 }
1833
1834 /* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
1835 matches CMP.
1836 Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
1837 conditional branch testing the result. */
1838
1839 static rtx
s390_emit_compare_and_swap(enum rtx_code code,rtx old,rtx mem,rtx cmp,rtx new_rtx,machine_mode ccmode)1840 s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
1841 rtx cmp, rtx new_rtx, machine_mode ccmode)
1842 {
1843 rtx cc;
1844
1845 cc = gen_rtx_REG (ccmode, CC_REGNUM);
1846 switch (GET_MODE (mem))
1847 {
1848 case E_SImode:
1849 emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp,
1850 new_rtx, cc));
1851 break;
1852 case E_DImode:
1853 emit_insn (gen_atomic_compare_and_swapdi_internal (old, mem, cmp,
1854 new_rtx, cc));
1855 break;
1856 case E_TImode:
1857 emit_insn (gen_atomic_compare_and_swapti_internal (old, mem, cmp,
1858 new_rtx, cc));
1859 break;
1860 case E_QImode:
1861 case E_HImode:
1862 default:
1863 gcc_unreachable ();
1864 }
1865 return s390_emit_compare (code, cc, const0_rtx);
1866 }
1867
1868 /* Emit a jump instruction to TARGET and return it. If COND is
1869 NULL_RTX, emit an unconditional jump, else a conditional jump under
1870 condition COND. */
1871
1872 rtx_insn *
s390_emit_jump(rtx target,rtx cond)1873 s390_emit_jump (rtx target, rtx cond)
1874 {
1875 rtx insn;
1876
1877 target = gen_rtx_LABEL_REF (VOIDmode, target);
1878 if (cond)
1879 target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
1880
1881 insn = gen_rtx_SET (pc_rtx, target);
1882 return emit_jump_insn (insn);
1883 }
1884
1885 /* Return branch condition mask to implement a branch
1886 specified by CODE. Return -1 for invalid comparisons. */
1887
1888 int
s390_branch_condition_mask(rtx code)1889 s390_branch_condition_mask (rtx code)
1890 {
1891 const int CC0 = 1 << 3;
1892 const int CC1 = 1 << 2;
1893 const int CC2 = 1 << 1;
1894 const int CC3 = 1 << 0;
1895
1896 gcc_assert (GET_CODE (XEXP (code, 0)) == REG);
1897 gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM);
1898 gcc_assert (XEXP (code, 1) == const0_rtx
1899 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
1900 && CONST_INT_P (XEXP (code, 1))));
1901
1902
1903 switch (GET_MODE (XEXP (code, 0)))
1904 {
1905 case E_CCZmode:
1906 case E_CCZ1mode:
1907 switch (GET_CODE (code))
1908 {
1909 case EQ: return CC0;
1910 case NE: return CC1 | CC2 | CC3;
1911 default: return -1;
1912 }
1913 break;
1914
1915 case E_CCT1mode:
1916 switch (GET_CODE (code))
1917 {
1918 case EQ: return CC1;
1919 case NE: return CC0 | CC2 | CC3;
1920 default: return -1;
1921 }
1922 break;
1923
1924 case E_CCT2mode:
1925 switch (GET_CODE (code))
1926 {
1927 case EQ: return CC2;
1928 case NE: return CC0 | CC1 | CC3;
1929 default: return -1;
1930 }
1931 break;
1932
1933 case E_CCT3mode:
1934 switch (GET_CODE (code))
1935 {
1936 case EQ: return CC3;
1937 case NE: return CC0 | CC1 | CC2;
1938 default: return -1;
1939 }
1940 break;
1941
1942 case E_CCLmode:
1943 switch (GET_CODE (code))
1944 {
1945 case EQ: return CC0 | CC2;
1946 case NE: return CC1 | CC3;
1947 default: return -1;
1948 }
1949 break;
1950
1951 case E_CCL1mode:
1952 switch (GET_CODE (code))
1953 {
1954 case LTU: return CC2 | CC3; /* carry */
1955 case GEU: return CC0 | CC1; /* no carry */
1956 default: return -1;
1957 }
1958 break;
1959
1960 case E_CCL2mode:
1961 switch (GET_CODE (code))
1962 {
1963 case GTU: return CC0 | CC1; /* borrow */
1964 case LEU: return CC2 | CC3; /* no borrow */
1965 default: return -1;
1966 }
1967 break;
1968
1969 case E_CCL3mode:
1970 switch (GET_CODE (code))
1971 {
1972 case EQ: return CC0 | CC2;
1973 case NE: return CC1 | CC3;
1974 case LTU: return CC1;
1975 case GTU: return CC3;
1976 case LEU: return CC1 | CC2;
1977 case GEU: return CC2 | CC3;
1978 default: return -1;
1979 }
1980
1981 case E_CCUmode:
1982 switch (GET_CODE (code))
1983 {
1984 case EQ: return CC0;
1985 case NE: return CC1 | CC2 | CC3;
1986 case LTU: return CC1;
1987 case GTU: return CC2;
1988 case LEU: return CC0 | CC1;
1989 case GEU: return CC0 | CC2;
1990 default: return -1;
1991 }
1992 break;
1993
1994 case E_CCURmode:
1995 switch (GET_CODE (code))
1996 {
1997 case EQ: return CC0;
1998 case NE: return CC2 | CC1 | CC3;
1999 case LTU: return CC2;
2000 case GTU: return CC1;
2001 case LEU: return CC0 | CC2;
2002 case GEU: return CC0 | CC1;
2003 default: return -1;
2004 }
2005 break;
2006
2007 case E_CCAPmode:
2008 switch (GET_CODE (code))
2009 {
2010 case EQ: return CC0;
2011 case NE: return CC1 | CC2 | CC3;
2012 case LT: return CC1 | CC3;
2013 case GT: return CC2;
2014 case LE: return CC0 | CC1 | CC3;
2015 case GE: return CC0 | CC2;
2016 default: return -1;
2017 }
2018 break;
2019
2020 case E_CCANmode:
2021 switch (GET_CODE (code))
2022 {
2023 case EQ: return CC0;
2024 case NE: return CC1 | CC2 | CC3;
2025 case LT: return CC1;
2026 case GT: return CC2 | CC3;
2027 case LE: return CC0 | CC1;
2028 case GE: return CC0 | CC2 | CC3;
2029 default: return -1;
2030 }
2031 break;
2032
2033 case E_CCSmode:
2034 switch (GET_CODE (code))
2035 {
2036 case EQ: return CC0;
2037 case NE: return CC1 | CC2 | CC3;
2038 case LT: return CC1;
2039 case GT: return CC2;
2040 case LE: return CC0 | CC1;
2041 case GE: return CC0 | CC2;
2042 case UNORDERED: return CC3;
2043 case ORDERED: return CC0 | CC1 | CC2;
2044 case UNEQ: return CC0 | CC3;
2045 case UNLT: return CC1 | CC3;
2046 case UNGT: return CC2 | CC3;
2047 case UNLE: return CC0 | CC1 | CC3;
2048 case UNGE: return CC0 | CC2 | CC3;
2049 case LTGT: return CC1 | CC2;
2050 default: return -1;
2051 }
2052 break;
2053
2054 case E_CCSRmode:
2055 switch (GET_CODE (code))
2056 {
2057 case EQ: return CC0;
2058 case NE: return CC2 | CC1 | CC3;
2059 case LT: return CC2;
2060 case GT: return CC1;
2061 case LE: return CC0 | CC2;
2062 case GE: return CC0 | CC1;
2063 case UNORDERED: return CC3;
2064 case ORDERED: return CC0 | CC2 | CC1;
2065 case UNEQ: return CC0 | CC3;
2066 case UNLT: return CC2 | CC3;
2067 case UNGT: return CC1 | CC3;
2068 case UNLE: return CC0 | CC2 | CC3;
2069 case UNGE: return CC0 | CC1 | CC3;
2070 case LTGT: return CC2 | CC1;
2071 default: return -1;
2072 }
2073 break;
2074
2075 /* Vector comparison modes. */
2076 /* CC2 will never be set. It however is part of the negated
2077 masks. */
2078 case E_CCVIALLmode:
2079 switch (GET_CODE (code))
2080 {
2081 case EQ:
2082 case GTU:
2083 case GT:
2084 case GE: return CC0;
2085 /* The inverted modes are in fact *any* modes. */
2086 case NE:
2087 case LEU:
2088 case LE:
2089 case LT: return CC3 | CC1 | CC2;
2090 default: return -1;
2091 }
2092
2093 case E_CCVIANYmode:
2094 switch (GET_CODE (code))
2095 {
2096 case EQ:
2097 case GTU:
2098 case GT:
2099 case GE: return CC0 | CC1;
2100 /* The inverted modes are in fact *all* modes. */
2101 case NE:
2102 case LEU:
2103 case LE:
2104 case LT: return CC3 | CC2;
2105 default: return -1;
2106 }
2107 case E_CCVFALLmode:
2108 switch (GET_CODE (code))
2109 {
2110 case EQ:
2111 case GT:
2112 case GE: return CC0;
2113 /* The inverted modes are in fact *any* modes. */
2114 case NE:
2115 case UNLE:
2116 case UNLT: return CC3 | CC1 | CC2;
2117 default: return -1;
2118 }
2119
2120 case E_CCVFANYmode:
2121 switch (GET_CODE (code))
2122 {
2123 case EQ:
2124 case GT:
2125 case GE: return CC0 | CC1;
2126 /* The inverted modes are in fact *all* modes. */
2127 case NE:
2128 case UNLE:
2129 case UNLT: return CC3 | CC2;
2130 default: return -1;
2131 }
2132
2133 case E_CCRAWmode:
2134 switch (GET_CODE (code))
2135 {
2136 case EQ:
2137 return INTVAL (XEXP (code, 1));
2138 case NE:
2139 return (INTVAL (XEXP (code, 1))) ^ 0xf;
2140 default:
2141 gcc_unreachable ();
2142 }
2143
2144 default:
2145 return -1;
2146 }
2147 }
2148
2149
2150 /* Return branch condition mask to implement a compare and branch
2151 specified by CODE. Return -1 for invalid comparisons. */
2152
2153 int
s390_compare_and_branch_condition_mask(rtx code)2154 s390_compare_and_branch_condition_mask (rtx code)
2155 {
2156 const int CC0 = 1 << 3;
2157 const int CC1 = 1 << 2;
2158 const int CC2 = 1 << 1;
2159
2160 switch (GET_CODE (code))
2161 {
2162 case EQ:
2163 return CC0;
2164 case NE:
2165 return CC1 | CC2;
2166 case LT:
2167 case LTU:
2168 return CC1;
2169 case GT:
2170 case GTU:
2171 return CC2;
2172 case LE:
2173 case LEU:
2174 return CC0 | CC1;
2175 case GE:
2176 case GEU:
2177 return CC0 | CC2;
2178 default:
2179 gcc_unreachable ();
2180 }
2181 return -1;
2182 }
2183
2184 /* If INV is false, return assembler mnemonic string to implement
2185 a branch specified by CODE. If INV is true, return mnemonic
2186 for the corresponding inverted branch. */
2187
2188 static const char *
s390_branch_condition_mnemonic(rtx code,int inv)2189 s390_branch_condition_mnemonic (rtx code, int inv)
2190 {
2191 int mask;
2192
2193 static const char *const mnemonic[16] =
2194 {
2195 NULL, "o", "h", "nle",
2196 "l", "nhe", "lh", "ne",
2197 "e", "nlh", "he", "nl",
2198 "le", "nh", "no", NULL
2199 };
2200
2201 if (GET_CODE (XEXP (code, 0)) == REG
2202 && REGNO (XEXP (code, 0)) == CC_REGNUM
2203 && (XEXP (code, 1) == const0_rtx
2204 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
2205 && CONST_INT_P (XEXP (code, 1)))))
2206 mask = s390_branch_condition_mask (code);
2207 else
2208 mask = s390_compare_and_branch_condition_mask (code);
2209
2210 gcc_assert (mask >= 0);
2211
2212 if (inv)
2213 mask ^= 15;
2214
2215 gcc_assert (mask >= 1 && mask <= 14);
2216
2217 return mnemonic[mask];
2218 }
2219
2220 /* Return the part of op which has a value different from def.
2221 The size of the part is determined by mode.
2222 Use this function only if you already know that op really
2223 contains such a part. */
2224
2225 unsigned HOST_WIDE_INT
s390_extract_part(rtx op,machine_mode mode,int def)2226 s390_extract_part (rtx op, machine_mode mode, int def)
2227 {
2228 unsigned HOST_WIDE_INT value = 0;
2229 int max_parts = HOST_BITS_PER_WIDE_INT / GET_MODE_BITSIZE (mode);
2230 int part_bits = GET_MODE_BITSIZE (mode);
2231 unsigned HOST_WIDE_INT part_mask = (HOST_WIDE_INT_1U << part_bits) - 1;
2232 int i;
2233
2234 for (i = 0; i < max_parts; i++)
2235 {
2236 if (i == 0)
2237 value = UINTVAL (op);
2238 else
2239 value >>= part_bits;
2240
2241 if ((value & part_mask) != (def & part_mask))
2242 return value & part_mask;
2243 }
2244
2245 gcc_unreachable ();
2246 }
2247
2248 /* If OP is an integer constant of mode MODE with exactly one
2249 part of mode PART_MODE unequal to DEF, return the number of that
2250 part. Otherwise, return -1. */
2251
2252 int
s390_single_part(rtx op,machine_mode mode,machine_mode part_mode,int def)2253 s390_single_part (rtx op,
2254 machine_mode mode,
2255 machine_mode part_mode,
2256 int def)
2257 {
2258 unsigned HOST_WIDE_INT value = 0;
2259 int n_parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (part_mode);
2260 unsigned HOST_WIDE_INT part_mask
2261 = (HOST_WIDE_INT_1U << GET_MODE_BITSIZE (part_mode)) - 1;
2262 int i, part = -1;
2263
2264 if (GET_CODE (op) != CONST_INT)
2265 return -1;
2266
2267 for (i = 0; i < n_parts; i++)
2268 {
2269 if (i == 0)
2270 value = UINTVAL (op);
2271 else
2272 value >>= GET_MODE_BITSIZE (part_mode);
2273
2274 if ((value & part_mask) != (def & part_mask))
2275 {
2276 if (part != -1)
2277 return -1;
2278 else
2279 part = i;
2280 }
2281 }
2282 return part == -1 ? -1 : n_parts - 1 - part;
2283 }
2284
2285 /* Return true if IN contains a contiguous bitfield in the lower SIZE
2286 bits and no other bits are set in (the lower SIZE bits of) IN.
2287
2288 PSTART and PEND can be used to obtain the start and end
2289 position (inclusive) of the bitfield relative to 64
2290 bits. *PSTART / *PEND gives the position of the first/last bit
2291 of the bitfield counting from the highest order bit starting
2292 with zero. */
2293
2294 bool
s390_contiguous_bitmask_nowrap_p(unsigned HOST_WIDE_INT in,int size,int * pstart,int * pend)2295 s390_contiguous_bitmask_nowrap_p (unsigned HOST_WIDE_INT in, int size,
2296 int *pstart, int *pend)
2297 {
2298 int start;
2299 int end = -1;
2300 int lowbit = HOST_BITS_PER_WIDE_INT - 1;
2301 int highbit = HOST_BITS_PER_WIDE_INT - size;
2302 unsigned HOST_WIDE_INT bitmask = HOST_WIDE_INT_1U;
2303
2304 gcc_assert (!!pstart == !!pend);
2305 for (start = lowbit; start >= highbit; bitmask <<= 1, start--)
2306 if (end == -1)
2307 {
2308 /* Look for the rightmost bit of a contiguous range of ones. */
2309 if (bitmask & in)
2310 /* Found it. */
2311 end = start;
2312 }
2313 else
2314 {
2315 /* Look for the firt zero bit after the range of ones. */
2316 if (! (bitmask & in))
2317 /* Found it. */
2318 break;
2319 }
2320 /* We're one past the last one-bit. */
2321 start++;
2322
2323 if (end == -1)
2324 /* No one bits found. */
2325 return false;
2326
2327 if (start > highbit)
2328 {
2329 unsigned HOST_WIDE_INT mask;
2330
2331 /* Calculate a mask for all bits beyond the contiguous bits. */
2332 mask = ((~HOST_WIDE_INT_0U >> highbit)
2333 & (~HOST_WIDE_INT_0U << (lowbit - start + 1)));
2334 if (mask & in)
2335 /* There are more bits set beyond the first range of one bits. */
2336 return false;
2337 }
2338
2339 if (pstart)
2340 {
2341 *pstart = start;
2342 *pend = end;
2343 }
2344
2345 return true;
2346 }
2347
2348 /* Same as s390_contiguous_bitmask_nowrap_p but also returns true
2349 if ~IN contains a contiguous bitfield. In that case, *END is <
2350 *START.
2351
2352 If WRAP_P is true, a bitmask that wraps around is also tested.
2353 When a wraparoud occurs *START is greater than *END (in
2354 non-null pointers), and the uppermost (64 - SIZE) bits are thus
2355 part of the range. If WRAP_P is false, no wraparound is
2356 tested. */
2357
2358 bool
s390_contiguous_bitmask_p(unsigned HOST_WIDE_INT in,bool wrap_p,int size,int * start,int * end)2359 s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, bool wrap_p,
2360 int size, int *start, int *end)
2361 {
2362 int bs = HOST_BITS_PER_WIDE_INT;
2363 bool b;
2364
2365 gcc_assert (!!start == !!end);
2366 if ((in & ((~HOST_WIDE_INT_0U) >> (bs - size))) == 0)
2367 /* This cannot be expressed as a contiguous bitmask. Exit early because
2368 the second call of s390_contiguous_bitmask_nowrap_p would accept this as
2369 a valid bitmask. */
2370 return false;
2371 b = s390_contiguous_bitmask_nowrap_p (in, size, start, end);
2372 if (b)
2373 return true;
2374 if (! wrap_p)
2375 return false;
2376 b = s390_contiguous_bitmask_nowrap_p (~in, size, start, end);
2377 if (b && start)
2378 {
2379 int s = *start;
2380 int e = *end;
2381
2382 gcc_assert (s >= 1);
2383 *start = ((e + 1) & (bs - 1));
2384 *end = ((s - 1 + bs) & (bs - 1));
2385 }
2386
2387 return b;
2388 }
2389
2390 /* Return true if OP contains the same contiguous bitfield in *all*
2391 its elements. START and END can be used to obtain the start and
2392 end position of the bitfield.
2393
2394 START/STOP give the position of the first/last bit of the bitfield
2395 counting from the lowest order bit starting with zero. In order to
2396 use these values for S/390 instructions this has to be converted to
2397 "bits big endian" style. */
2398
2399 bool
s390_contiguous_bitmask_vector_p(rtx op,int * start,int * end)2400 s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end)
2401 {
2402 unsigned HOST_WIDE_INT mask;
2403 int size;
2404 rtx elt;
2405 bool b;
2406
2407 gcc_assert (!!start == !!end);
2408 if (!const_vec_duplicate_p (op, &elt)
2409 || !CONST_INT_P (elt))
2410 return false;
2411
2412 size = GET_MODE_UNIT_BITSIZE (GET_MODE (op));
2413
2414 /* We cannot deal with V1TI/V1TF. This would require a vgmq. */
2415 if (size > 64)
2416 return false;
2417
2418 mask = UINTVAL (elt);
2419
2420 b = s390_contiguous_bitmask_p (mask, true, size, start, end);
2421 if (b)
2422 {
2423 if (start)
2424 {
2425 *start -= (HOST_BITS_PER_WIDE_INT - size);
2426 *end -= (HOST_BITS_PER_WIDE_INT - size);
2427 }
2428 return true;
2429 }
2430 else
2431 return false;
2432 }
2433
2434 /* Return true if C consists only of byte chunks being either 0 or
2435 0xff. If MASK is !=NULL a byte mask is generated which is
2436 appropriate for the vector generate byte mask instruction. */
2437
2438 bool
s390_bytemask_vector_p(rtx op,unsigned * mask)2439 s390_bytemask_vector_p (rtx op, unsigned *mask)
2440 {
2441 int i;
2442 unsigned tmp_mask = 0;
2443 int nunit, unit_size;
2444
2445 if (!VECTOR_MODE_P (GET_MODE (op))
2446 || GET_CODE (op) != CONST_VECTOR
2447 || !CONST_INT_P (XVECEXP (op, 0, 0)))
2448 return false;
2449
2450 nunit = GET_MODE_NUNITS (GET_MODE (op));
2451 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (op));
2452
2453 for (i = 0; i < nunit; i++)
2454 {
2455 unsigned HOST_WIDE_INT c;
2456 int j;
2457
2458 if (!CONST_INT_P (XVECEXP (op, 0, i)))
2459 return false;
2460
2461 c = UINTVAL (XVECEXP (op, 0, i));
2462 for (j = 0; j < unit_size; j++)
2463 {
2464 if ((c & 0xff) != 0 && (c & 0xff) != 0xff)
2465 return false;
2466 tmp_mask |= (c & 1) << ((nunit - 1 - i) * unit_size + j);
2467 c = c >> BITS_PER_UNIT;
2468 }
2469 }
2470
2471 if (mask != NULL)
2472 *mask = tmp_mask;
2473
2474 return true;
2475 }
2476
2477 /* Check whether a rotate of ROTL followed by an AND of CONTIG is
2478 equivalent to a shift followed by the AND. In particular, CONTIG
2479 should not overlap the (rotated) bit 0/bit 63 gap. Negative values
2480 for ROTL indicate a rotate to the right. */
2481
2482 bool
s390_extzv_shift_ok(int bitsize,int rotl,unsigned HOST_WIDE_INT contig)2483 s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig)
2484 {
2485 int start, end;
2486 bool ok;
2487
2488 ok = s390_contiguous_bitmask_nowrap_p (contig, bitsize, &start, &end);
2489 gcc_assert (ok);
2490
2491 if (rotl >= 0)
2492 return (64 - end >= rotl);
2493 else
2494 {
2495 /* Translate "- rotate right" in BITSIZE mode to "rotate left" in
2496 DIMode. */
2497 rotl = -rotl + (64 - bitsize);
2498 return (start >= rotl);
2499 }
2500 }
2501
2502 /* Check whether we can (and want to) split a double-word
2503 move in mode MODE from SRC to DST into two single-word
2504 moves, moving the subword FIRST_SUBWORD first. */
2505
2506 bool
s390_split_ok_p(rtx dst,rtx src,machine_mode mode,int first_subword)2507 s390_split_ok_p (rtx dst, rtx src, machine_mode mode, int first_subword)
2508 {
2509 /* Floating point and vector registers cannot be split. */
2510 if (FP_REG_P (src) || FP_REG_P (dst) || VECTOR_REG_P (src) || VECTOR_REG_P (dst))
2511 return false;
2512
2513 /* Non-offsettable memory references cannot be split. */
2514 if ((GET_CODE (src) == MEM && !offsettable_memref_p (src))
2515 || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst)))
2516 return false;
2517
2518 /* Moving the first subword must not clobber a register
2519 needed to move the second subword. */
2520 if (register_operand (dst, mode))
2521 {
2522 rtx subreg = operand_subword (dst, first_subword, 0, mode);
2523 if (reg_overlap_mentioned_p (subreg, src))
2524 return false;
2525 }
2526
2527 return true;
2528 }
2529
2530 /* Return true if it can be proven that [MEM1, MEM1 + SIZE]
2531 and [MEM2, MEM2 + SIZE] do overlap and false
2532 otherwise. */
2533
2534 bool
s390_overlap_p(rtx mem1,rtx mem2,HOST_WIDE_INT size)2535 s390_overlap_p (rtx mem1, rtx mem2, HOST_WIDE_INT size)
2536 {
2537 rtx addr1, addr2, addr_delta;
2538 HOST_WIDE_INT delta;
2539
2540 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2541 return true;
2542
2543 if (size == 0)
2544 return false;
2545
2546 addr1 = XEXP (mem1, 0);
2547 addr2 = XEXP (mem2, 0);
2548
2549 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2550
2551 /* This overlapping check is used by peepholes merging memory block operations.
2552 Overlapping operations would otherwise be recognized by the S/390 hardware
2553 and would fall back to a slower implementation. Allowing overlapping
2554 operations would lead to slow code but not to wrong code. Therefore we are
2555 somewhat optimistic if we cannot prove that the memory blocks are
2556 overlapping.
2557 That's why we return false here although this may accept operations on
2558 overlapping memory areas. */
2559 if (!addr_delta || GET_CODE (addr_delta) != CONST_INT)
2560 return false;
2561
2562 delta = INTVAL (addr_delta);
2563
2564 if (delta == 0
2565 || (delta > 0 && delta < size)
2566 || (delta < 0 && -delta < size))
2567 return true;
2568
2569 return false;
2570 }
2571
2572 /* Check whether the address of memory reference MEM2 equals exactly
2573 the address of memory reference MEM1 plus DELTA. Return true if
2574 we can prove this to be the case, false otherwise. */
2575
2576 bool
s390_offset_p(rtx mem1,rtx mem2,rtx delta)2577 s390_offset_p (rtx mem1, rtx mem2, rtx delta)
2578 {
2579 rtx addr1, addr2, addr_delta;
2580
2581 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2582 return false;
2583
2584 addr1 = XEXP (mem1, 0);
2585 addr2 = XEXP (mem2, 0);
2586
2587 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2588 if (!addr_delta || !rtx_equal_p (addr_delta, delta))
2589 return false;
2590
2591 return true;
2592 }
2593
2594 /* Expand logical operator CODE in mode MODE with operands OPERANDS. */
2595
2596 void
s390_expand_logical_operator(enum rtx_code code,machine_mode mode,rtx * operands)2597 s390_expand_logical_operator (enum rtx_code code, machine_mode mode,
2598 rtx *operands)
2599 {
2600 machine_mode wmode = mode;
2601 rtx dst = operands[0];
2602 rtx src1 = operands[1];
2603 rtx src2 = operands[2];
2604 rtx op, clob, tem;
2605
2606 /* If we cannot handle the operation directly, use a temp register. */
2607 if (!s390_logical_operator_ok_p (operands))
2608 dst = gen_reg_rtx (mode);
2609
2610 /* QImode and HImode patterns make sense only if we have a destination
2611 in memory. Otherwise perform the operation in SImode. */
2612 if ((mode == QImode || mode == HImode) && GET_CODE (dst) != MEM)
2613 wmode = SImode;
2614
2615 /* Widen operands if required. */
2616 if (mode != wmode)
2617 {
2618 if (GET_CODE (dst) == SUBREG
2619 && (tem = simplify_subreg (wmode, dst, mode, 0)) != 0)
2620 dst = tem;
2621 else if (REG_P (dst))
2622 dst = gen_rtx_SUBREG (wmode, dst, 0);
2623 else
2624 dst = gen_reg_rtx (wmode);
2625
2626 if (GET_CODE (src1) == SUBREG
2627 && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0)
2628 src1 = tem;
2629 else if (GET_MODE (src1) != VOIDmode)
2630 src1 = gen_rtx_SUBREG (wmode, force_reg (mode, src1), 0);
2631
2632 if (GET_CODE (src2) == SUBREG
2633 && (tem = simplify_subreg (wmode, src2, mode, 0)) != 0)
2634 src2 = tem;
2635 else if (GET_MODE (src2) != VOIDmode)
2636 src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0);
2637 }
2638
2639 /* Emit the instruction. */
2640 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, wmode, src1, src2));
2641 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
2642 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
2643
2644 /* Fix up the destination if needed. */
2645 if (dst != operands[0])
2646 emit_move_insn (operands[0], gen_lowpart (mode, dst));
2647 }
2648
2649 /* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR). */
2650
2651 bool
s390_logical_operator_ok_p(rtx * operands)2652 s390_logical_operator_ok_p (rtx *operands)
2653 {
2654 /* If the destination operand is in memory, it needs to coincide
2655 with one of the source operands. After reload, it has to be
2656 the first source operand. */
2657 if (GET_CODE (operands[0]) == MEM)
2658 return rtx_equal_p (operands[0], operands[1])
2659 || (!reload_completed && rtx_equal_p (operands[0], operands[2]));
2660
2661 return true;
2662 }
2663
2664 /* Narrow logical operation CODE of memory operand MEMOP with immediate
2665 operand IMMOP to switch from SS to SI type instructions. */
2666
2667 void
s390_narrow_logical_operator(enum rtx_code code,rtx * memop,rtx * immop)2668 s390_narrow_logical_operator (enum rtx_code code, rtx *memop, rtx *immop)
2669 {
2670 int def = code == AND ? -1 : 0;
2671 HOST_WIDE_INT mask;
2672 int part;
2673
2674 gcc_assert (GET_CODE (*memop) == MEM);
2675 gcc_assert (!MEM_VOLATILE_P (*memop));
2676
2677 mask = s390_extract_part (*immop, QImode, def);
2678 part = s390_single_part (*immop, GET_MODE (*memop), QImode, def);
2679 gcc_assert (part >= 0);
2680
2681 *memop = adjust_address (*memop, QImode, part);
2682 *immop = gen_int_mode (mask, QImode);
2683 }
2684
2685
2686 /* How to allocate a 'struct machine_function'. */
2687
2688 static struct machine_function *
s390_init_machine_status(void)2689 s390_init_machine_status (void)
2690 {
2691 return ggc_cleared_alloc<machine_function> ();
2692 }
2693
2694 /* Map for smallest class containing reg regno. */
2695
2696 const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
2697 { GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 0 */
2698 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 4 */
2699 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 8 */
2700 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 12 */
2701 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 16 */
2702 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 20 */
2703 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 24 */
2704 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 28 */
2705 ADDR_REGS, CC_REGS, ADDR_REGS, ADDR_REGS, /* 32 */
2706 ACCESS_REGS, ACCESS_REGS, VEC_REGS, VEC_REGS, /* 36 */
2707 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 40 */
2708 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 44 */
2709 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 48 */
2710 VEC_REGS, VEC_REGS /* 52 */
2711 };
2712
2713 /* Return attribute type of insn. */
2714
2715 static enum attr_type
s390_safe_attr_type(rtx_insn * insn)2716 s390_safe_attr_type (rtx_insn *insn)
2717 {
2718 if (recog_memoized (insn) >= 0)
2719 return get_attr_type (insn);
2720 else
2721 return TYPE_NONE;
2722 }
2723
2724 /* Return true if DISP is a valid short displacement. */
2725
2726 static bool
s390_short_displacement(rtx disp)2727 s390_short_displacement (rtx disp)
2728 {
2729 /* No displacement is OK. */
2730 if (!disp)
2731 return true;
2732
2733 /* Without the long displacement facility we don't need to
2734 distingiush between long and short displacement. */
2735 if (!TARGET_LONG_DISPLACEMENT)
2736 return true;
2737
2738 /* Integer displacement in range. */
2739 if (GET_CODE (disp) == CONST_INT)
2740 return INTVAL (disp) >= 0 && INTVAL (disp) < 4096;
2741
2742 /* GOT offset is not OK, the GOT can be large. */
2743 if (GET_CODE (disp) == CONST
2744 && GET_CODE (XEXP (disp, 0)) == UNSPEC
2745 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOT
2746 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF))
2747 return false;
2748
2749 /* All other symbolic constants are literal pool references,
2750 which are OK as the literal pool must be small. */
2751 if (GET_CODE (disp) == CONST)
2752 return true;
2753
2754 return false;
2755 }
2756
2757 /* Decompose a RTL expression ADDR for a memory address into
2758 its components, returned in OUT.
2759
2760 Returns false if ADDR is not a valid memory address, true
2761 otherwise. If OUT is NULL, don't return the components,
2762 but check for validity only.
2763
2764 Note: Only addresses in canonical form are recognized.
2765 LEGITIMIZE_ADDRESS should convert non-canonical forms to the
2766 canonical form so that they will be recognized. */
2767
2768 static int
s390_decompose_address(rtx addr,struct s390_address * out)2769 s390_decompose_address (rtx addr, struct s390_address *out)
2770 {
2771 HOST_WIDE_INT offset = 0;
2772 rtx base = NULL_RTX;
2773 rtx indx = NULL_RTX;
2774 rtx disp = NULL_RTX;
2775 rtx orig_disp;
2776 bool pointer = false;
2777 bool base_ptr = false;
2778 bool indx_ptr = false;
2779 bool literal_pool = false;
2780
2781 /* We may need to substitute the literal pool base register into the address
2782 below. However, at this point we do not know which register is going to
2783 be used as base, so we substitute the arg pointer register. This is going
2784 to be treated as holding a pointer below -- it shouldn't be used for any
2785 other purpose. */
2786 rtx fake_pool_base = gen_rtx_REG (Pmode, ARG_POINTER_REGNUM);
2787
2788 /* Decompose address into base + index + displacement. */
2789
2790 if (GET_CODE (addr) == REG || GET_CODE (addr) == UNSPEC)
2791 base = addr;
2792
2793 else if (GET_CODE (addr) == PLUS)
2794 {
2795 rtx op0 = XEXP (addr, 0);
2796 rtx op1 = XEXP (addr, 1);
2797 enum rtx_code code0 = GET_CODE (op0);
2798 enum rtx_code code1 = GET_CODE (op1);
2799
2800 if (code0 == REG || code0 == UNSPEC)
2801 {
2802 if (code1 == REG || code1 == UNSPEC)
2803 {
2804 indx = op0; /* index + base */
2805 base = op1;
2806 }
2807
2808 else
2809 {
2810 base = op0; /* base + displacement */
2811 disp = op1;
2812 }
2813 }
2814
2815 else if (code0 == PLUS)
2816 {
2817 indx = XEXP (op0, 0); /* index + base + disp */
2818 base = XEXP (op0, 1);
2819 disp = op1;
2820 }
2821
2822 else
2823 {
2824 return false;
2825 }
2826 }
2827
2828 else
2829 disp = addr; /* displacement */
2830
2831 /* Extract integer part of displacement. */
2832 orig_disp = disp;
2833 if (disp)
2834 {
2835 if (GET_CODE (disp) == CONST_INT)
2836 {
2837 offset = INTVAL (disp);
2838 disp = NULL_RTX;
2839 }
2840 else if (GET_CODE (disp) == CONST
2841 && GET_CODE (XEXP (disp, 0)) == PLUS
2842 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
2843 {
2844 offset = INTVAL (XEXP (XEXP (disp, 0), 1));
2845 disp = XEXP (XEXP (disp, 0), 0);
2846 }
2847 }
2848
2849 /* Strip off CONST here to avoid special case tests later. */
2850 if (disp && GET_CODE (disp) == CONST)
2851 disp = XEXP (disp, 0);
2852
2853 /* We can convert literal pool addresses to
2854 displacements by basing them off the base register. */
2855 if (disp && GET_CODE (disp) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (disp))
2856 {
2857 if (base || indx)
2858 return false;
2859
2860 base = fake_pool_base, literal_pool = true;
2861
2862 /* Mark up the displacement. */
2863 disp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, disp),
2864 UNSPEC_LTREL_OFFSET);
2865 }
2866
2867 /* Validate base register. */
2868 if (base)
2869 {
2870 if (GET_CODE (base) == UNSPEC)
2871 switch (XINT (base, 1))
2872 {
2873 case UNSPEC_LTREF:
2874 if (!disp)
2875 disp = gen_rtx_UNSPEC (Pmode,
2876 gen_rtvec (1, XVECEXP (base, 0, 0)),
2877 UNSPEC_LTREL_OFFSET);
2878 else
2879 return false;
2880
2881 base = XVECEXP (base, 0, 1);
2882 break;
2883
2884 case UNSPEC_LTREL_BASE:
2885 if (XVECLEN (base, 0) == 1)
2886 base = fake_pool_base, literal_pool = true;
2887 else
2888 base = XVECEXP (base, 0, 1);
2889 break;
2890
2891 default:
2892 return false;
2893 }
2894
2895 if (!REG_P (base) || GET_MODE (base) != Pmode)
2896 return false;
2897
2898 if (REGNO (base) == STACK_POINTER_REGNUM
2899 || REGNO (base) == FRAME_POINTER_REGNUM
2900 || ((reload_completed || reload_in_progress)
2901 && frame_pointer_needed
2902 && REGNO (base) == HARD_FRAME_POINTER_REGNUM)
2903 || REGNO (base) == ARG_POINTER_REGNUM
2904 || (flag_pic
2905 && REGNO (base) == PIC_OFFSET_TABLE_REGNUM))
2906 pointer = base_ptr = true;
2907
2908 if ((reload_completed || reload_in_progress)
2909 && base == cfun->machine->base_reg)
2910 pointer = base_ptr = literal_pool = true;
2911 }
2912
2913 /* Validate index register. */
2914 if (indx)
2915 {
2916 if (GET_CODE (indx) == UNSPEC)
2917 switch (XINT (indx, 1))
2918 {
2919 case UNSPEC_LTREF:
2920 if (!disp)
2921 disp = gen_rtx_UNSPEC (Pmode,
2922 gen_rtvec (1, XVECEXP (indx, 0, 0)),
2923 UNSPEC_LTREL_OFFSET);
2924 else
2925 return false;
2926
2927 indx = XVECEXP (indx, 0, 1);
2928 break;
2929
2930 case UNSPEC_LTREL_BASE:
2931 if (XVECLEN (indx, 0) == 1)
2932 indx = fake_pool_base, literal_pool = true;
2933 else
2934 indx = XVECEXP (indx, 0, 1);
2935 break;
2936
2937 default:
2938 return false;
2939 }
2940
2941 if (!REG_P (indx) || GET_MODE (indx) != Pmode)
2942 return false;
2943
2944 if (REGNO (indx) == STACK_POINTER_REGNUM
2945 || REGNO (indx) == FRAME_POINTER_REGNUM
2946 || ((reload_completed || reload_in_progress)
2947 && frame_pointer_needed
2948 && REGNO (indx) == HARD_FRAME_POINTER_REGNUM)
2949 || REGNO (indx) == ARG_POINTER_REGNUM
2950 || (flag_pic
2951 && REGNO (indx) == PIC_OFFSET_TABLE_REGNUM))
2952 pointer = indx_ptr = true;
2953
2954 if ((reload_completed || reload_in_progress)
2955 && indx == cfun->machine->base_reg)
2956 pointer = indx_ptr = literal_pool = true;
2957 }
2958
2959 /* Prefer to use pointer as base, not index. */
2960 if (base && indx && !base_ptr
2961 && (indx_ptr || (!REG_POINTER (base) && REG_POINTER (indx))))
2962 {
2963 rtx tmp = base;
2964 base = indx;
2965 indx = tmp;
2966 }
2967
2968 /* Validate displacement. */
2969 if (!disp)
2970 {
2971 /* If virtual registers are involved, the displacement will change later
2972 anyway as the virtual registers get eliminated. This could make a
2973 valid displacement invalid, but it is more likely to make an invalid
2974 displacement valid, because we sometimes access the register save area
2975 via negative offsets to one of those registers.
2976 Thus we don't check the displacement for validity here. If after
2977 elimination the displacement turns out to be invalid after all,
2978 this is fixed up by reload in any case. */
2979 /* LRA maintains always displacements up to date and we need to
2980 know the displacement is right during all LRA not only at the
2981 final elimination. */
2982 if (lra_in_progress
2983 || (base != arg_pointer_rtx
2984 && indx != arg_pointer_rtx
2985 && base != return_address_pointer_rtx
2986 && indx != return_address_pointer_rtx
2987 && base != frame_pointer_rtx
2988 && indx != frame_pointer_rtx
2989 && base != virtual_stack_vars_rtx
2990 && indx != virtual_stack_vars_rtx))
2991 if (!DISP_IN_RANGE (offset))
2992 return false;
2993 }
2994 else
2995 {
2996 /* All the special cases are pointers. */
2997 pointer = true;
2998
2999 /* In the small-PIC case, the linker converts @GOT
3000 and @GOTNTPOFF offsets to possible displacements. */
3001 if (GET_CODE (disp) == UNSPEC
3002 && (XINT (disp, 1) == UNSPEC_GOT
3003 || XINT (disp, 1) == UNSPEC_GOTNTPOFF)
3004 && flag_pic == 1)
3005 {
3006 ;
3007 }
3008
3009 /* Accept pool label offsets. */
3010 else if (GET_CODE (disp) == UNSPEC
3011 && XINT (disp, 1) == UNSPEC_POOL_OFFSET)
3012 ;
3013
3014 /* Accept literal pool references. */
3015 else if (GET_CODE (disp) == UNSPEC
3016 && XINT (disp, 1) == UNSPEC_LTREL_OFFSET)
3017 {
3018 /* In case CSE pulled a non literal pool reference out of
3019 the pool we have to reject the address. This is
3020 especially important when loading the GOT pointer on non
3021 zarch CPUs. In this case the literal pool contains an lt
3022 relative offset to the _GLOBAL_OFFSET_TABLE_ label which
3023 will most likely exceed the displacement. */
3024 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
3025 || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp, 0, 0)))
3026 return false;
3027
3028 orig_disp = gen_rtx_CONST (Pmode, disp);
3029 if (offset)
3030 {
3031 /* If we have an offset, make sure it does not
3032 exceed the size of the constant pool entry. */
3033 rtx sym = XVECEXP (disp, 0, 0);
3034 if (offset >= GET_MODE_SIZE (get_pool_mode (sym)))
3035 return false;
3036
3037 orig_disp = plus_constant (Pmode, orig_disp, offset);
3038 }
3039 }
3040
3041 else
3042 return false;
3043 }
3044
3045 if (!base && !indx)
3046 pointer = true;
3047
3048 if (out)
3049 {
3050 out->base = base;
3051 out->indx = indx;
3052 out->disp = orig_disp;
3053 out->pointer = pointer;
3054 out->literal_pool = literal_pool;
3055 }
3056
3057 return true;
3058 }
3059
3060 /* Decompose a RTL expression OP for an address style operand into its
3061 components, and return the base register in BASE and the offset in
3062 OFFSET. While OP looks like an address it is never supposed to be
3063 used as such.
3064
3065 Return true if OP is a valid address operand, false if not. */
3066
3067 bool
s390_decompose_addrstyle_without_index(rtx op,rtx * base,HOST_WIDE_INT * offset)3068 s390_decompose_addrstyle_without_index (rtx op, rtx *base,
3069 HOST_WIDE_INT *offset)
3070 {
3071 rtx off = NULL_RTX;
3072
3073 /* We can have an integer constant, an address register,
3074 or a sum of the two. */
3075 if (CONST_SCALAR_INT_P (op))
3076 {
3077 off = op;
3078 op = NULL_RTX;
3079 }
3080 if (op && GET_CODE (op) == PLUS && CONST_SCALAR_INT_P (XEXP (op, 1)))
3081 {
3082 off = XEXP (op, 1);
3083 op = XEXP (op, 0);
3084 }
3085 while (op && GET_CODE (op) == SUBREG)
3086 op = SUBREG_REG (op);
3087
3088 if (op && GET_CODE (op) != REG)
3089 return false;
3090
3091 if (offset)
3092 {
3093 if (off == NULL_RTX)
3094 *offset = 0;
3095 else if (CONST_INT_P (off))
3096 *offset = INTVAL (off);
3097 else if (CONST_WIDE_INT_P (off))
3098 /* The offset will anyway be cut down to 12 bits so take just
3099 the lowest order chunk of the wide int. */
3100 *offset = CONST_WIDE_INT_ELT (off, 0);
3101 else
3102 gcc_unreachable ();
3103 }
3104 if (base)
3105 *base = op;
3106
3107 return true;
3108 }
3109
3110
3111 /* Return true if CODE is a valid address without index. */
3112
3113 bool
s390_legitimate_address_without_index_p(rtx op)3114 s390_legitimate_address_without_index_p (rtx op)
3115 {
3116 struct s390_address addr;
3117
3118 if (!s390_decompose_address (XEXP (op, 0), &addr))
3119 return false;
3120 if (addr.indx)
3121 return false;
3122
3123 return true;
3124 }
3125
3126
3127 /* Return TRUE if ADDR is an operand valid for a load/store relative
3128 instruction. Be aware that the alignment of the operand needs to
3129 be checked separately.
3130 Valid addresses are single references or a sum of a reference and a
3131 constant integer. Return these parts in SYMREF and ADDEND. You can
3132 pass NULL in REF and/or ADDEND if you are not interested in these
3133 values. Literal pool references are *not* considered symbol
3134 references. */
3135
3136 static bool
s390_loadrelative_operand_p(rtx addr,rtx * symref,HOST_WIDE_INT * addend)3137 s390_loadrelative_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
3138 {
3139 HOST_WIDE_INT tmpaddend = 0;
3140
3141 if (GET_CODE (addr) == CONST)
3142 addr = XEXP (addr, 0);
3143
3144 if (GET_CODE (addr) == PLUS)
3145 {
3146 if (!CONST_INT_P (XEXP (addr, 1)))
3147 return false;
3148
3149 tmpaddend = INTVAL (XEXP (addr, 1));
3150 addr = XEXP (addr, 0);
3151 }
3152
3153 if ((GET_CODE (addr) == SYMBOL_REF && !CONSTANT_POOL_ADDRESS_P (addr))
3154 || (GET_CODE (addr) == UNSPEC
3155 && (XINT (addr, 1) == UNSPEC_GOTENT
3156 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
3157 {
3158 if (symref)
3159 *symref = addr;
3160 if (addend)
3161 *addend = tmpaddend;
3162
3163 return true;
3164 }
3165 return false;
3166 }
3167
3168 /* Return true if the address in OP is valid for constraint letter C
3169 if wrapped in a MEM rtx. Set LIT_POOL_OK to true if it literal
3170 pool MEMs should be accepted. Only the Q, R, S, T constraint
3171 letters are allowed for C. */
3172
3173 static int
s390_check_qrst_address(char c,rtx op,bool lit_pool_ok)3174 s390_check_qrst_address (char c, rtx op, bool lit_pool_ok)
3175 {
3176 struct s390_address addr;
3177 bool decomposed = false;
3178
3179 if (!address_operand (op, GET_MODE (op)))
3180 return 0;
3181
3182 /* This check makes sure that no symbolic address (except literal
3183 pool references) are accepted by the R or T constraints. */
3184 if (s390_loadrelative_operand_p (op, NULL, NULL))
3185 return 0;
3186
3187 /* Ensure literal pool references are only accepted if LIT_POOL_OK. */
3188 if (!lit_pool_ok)
3189 {
3190 if (!s390_decompose_address (op, &addr))
3191 return 0;
3192 if (addr.literal_pool)
3193 return 0;
3194 decomposed = true;
3195 }
3196
3197 /* With reload, we sometimes get intermediate address forms that are
3198 actually invalid as-is, but we need to accept them in the most
3199 generic cases below ('R' or 'T'), since reload will in fact fix
3200 them up. LRA behaves differently here; we never see such forms,
3201 but on the other hand, we need to strictly reject every invalid
3202 address form. Perform this check right up front. */
3203 if (lra_in_progress)
3204 {
3205 if (!decomposed && !s390_decompose_address (op, &addr))
3206 return 0;
3207 decomposed = true;
3208 }
3209
3210 switch (c)
3211 {
3212 case 'Q': /* no index short displacement */
3213 if (!decomposed && !s390_decompose_address (op, &addr))
3214 return 0;
3215 if (addr.indx)
3216 return 0;
3217 if (!s390_short_displacement (addr.disp))
3218 return 0;
3219 break;
3220
3221 case 'R': /* with index short displacement */
3222 if (TARGET_LONG_DISPLACEMENT)
3223 {
3224 if (!decomposed && !s390_decompose_address (op, &addr))
3225 return 0;
3226 if (!s390_short_displacement (addr.disp))
3227 return 0;
3228 }
3229 /* Any invalid address here will be fixed up by reload,
3230 so accept it for the most generic constraint. */
3231 break;
3232
3233 case 'S': /* no index long displacement */
3234 if (!decomposed && !s390_decompose_address (op, &addr))
3235 return 0;
3236 if (addr.indx)
3237 return 0;
3238 break;
3239
3240 case 'T': /* with index long displacement */
3241 /* Any invalid address here will be fixed up by reload,
3242 so accept it for the most generic constraint. */
3243 break;
3244
3245 default:
3246 return 0;
3247 }
3248 return 1;
3249 }
3250
3251
3252 /* Evaluates constraint strings described by the regular expression
3253 ([A|B|Z](Q|R|S|T))|Y and returns 1 if OP is a valid operand for
3254 the constraint given in STR, or 0 else. */
3255
3256 int
s390_mem_constraint(const char * str,rtx op)3257 s390_mem_constraint (const char *str, rtx op)
3258 {
3259 char c = str[0];
3260
3261 switch (c)
3262 {
3263 case 'A':
3264 /* Check for offsettable variants of memory constraints. */
3265 if (!MEM_P (op) || MEM_VOLATILE_P (op))
3266 return 0;
3267 if ((reload_completed || reload_in_progress)
3268 ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op))
3269 return 0;
3270 return s390_check_qrst_address (str[1], XEXP (op, 0), true);
3271 case 'B':
3272 /* Check for non-literal-pool variants of memory constraints. */
3273 if (!MEM_P (op))
3274 return 0;
3275 return s390_check_qrst_address (str[1], XEXP (op, 0), false);
3276 case 'Q':
3277 case 'R':
3278 case 'S':
3279 case 'T':
3280 if (GET_CODE (op) != MEM)
3281 return 0;
3282 return s390_check_qrst_address (c, XEXP (op, 0), true);
3283 case 'Y':
3284 /* Simply check for the basic form of a shift count. Reload will
3285 take care of making sure we have a proper base register. */
3286 if (!s390_decompose_addrstyle_without_index (op, NULL, NULL))
3287 return 0;
3288 break;
3289 case 'Z':
3290 return s390_check_qrst_address (str[1], op, true);
3291 default:
3292 return 0;
3293 }
3294 return 1;
3295 }
3296
3297
3298 /* Evaluates constraint strings starting with letter O. Input
3299 parameter C is the second letter following the "O" in the constraint
3300 string. Returns 1 if VALUE meets the respective constraint and 0
3301 otherwise. */
3302
3303 int
s390_O_constraint_str(const char c,HOST_WIDE_INT value)3304 s390_O_constraint_str (const char c, HOST_WIDE_INT value)
3305 {
3306 if (!TARGET_EXTIMM)
3307 return 0;
3308
3309 switch (c)
3310 {
3311 case 's':
3312 return trunc_int_for_mode (value, SImode) == value;
3313
3314 case 'p':
3315 return value == 0
3316 || s390_single_part (GEN_INT (value), DImode, SImode, 0) == 1;
3317
3318 case 'n':
3319 return s390_single_part (GEN_INT (value - 1), DImode, SImode, -1) == 1;
3320
3321 default:
3322 gcc_unreachable ();
3323 }
3324 }
3325
3326
3327 /* Evaluates constraint strings starting with letter N. Parameter STR
3328 contains the letters following letter "N" in the constraint string.
3329 Returns true if VALUE matches the constraint. */
3330
3331 int
s390_N_constraint_str(const char * str,HOST_WIDE_INT value)3332 s390_N_constraint_str (const char *str, HOST_WIDE_INT value)
3333 {
3334 machine_mode mode, part_mode;
3335 int def;
3336 int part, part_goal;
3337
3338
3339 if (str[0] == 'x')
3340 part_goal = -1;
3341 else
3342 part_goal = str[0] - '0';
3343
3344 switch (str[1])
3345 {
3346 case 'Q':
3347 part_mode = QImode;
3348 break;
3349 case 'H':
3350 part_mode = HImode;
3351 break;
3352 case 'S':
3353 part_mode = SImode;
3354 break;
3355 default:
3356 return 0;
3357 }
3358
3359 switch (str[2])
3360 {
3361 case 'H':
3362 mode = HImode;
3363 break;
3364 case 'S':
3365 mode = SImode;
3366 break;
3367 case 'D':
3368 mode = DImode;
3369 break;
3370 default:
3371 return 0;
3372 }
3373
3374 switch (str[3])
3375 {
3376 case '0':
3377 def = 0;
3378 break;
3379 case 'F':
3380 def = -1;
3381 break;
3382 default:
3383 return 0;
3384 }
3385
3386 if (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (part_mode))
3387 return 0;
3388
3389 part = s390_single_part (GEN_INT (value), mode, part_mode, def);
3390 if (part < 0)
3391 return 0;
3392 if (part_goal != -1 && part_goal != part)
3393 return 0;
3394
3395 return 1;
3396 }
3397
3398
3399 /* Returns true if the input parameter VALUE is a float zero. */
3400
3401 int
s390_float_const_zero_p(rtx value)3402 s390_float_const_zero_p (rtx value)
3403 {
3404 return (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT
3405 && value == CONST0_RTX (GET_MODE (value)));
3406 }
3407
3408 /* Implement TARGET_REGISTER_MOVE_COST. */
3409
3410 static int
s390_register_move_cost(machine_mode mode,reg_class_t from,reg_class_t to)3411 s390_register_move_cost (machine_mode mode,
3412 reg_class_t from, reg_class_t to)
3413 {
3414 /* On s390, copy between fprs and gprs is expensive. */
3415
3416 /* It becomes somewhat faster having ldgr/lgdr. */
3417 if (TARGET_Z10 && GET_MODE_SIZE (mode) == 8)
3418 {
3419 /* ldgr is single cycle. */
3420 if (reg_classes_intersect_p (from, GENERAL_REGS)
3421 && reg_classes_intersect_p (to, FP_REGS))
3422 return 1;
3423 /* lgdr needs 3 cycles. */
3424 if (reg_classes_intersect_p (to, GENERAL_REGS)
3425 && reg_classes_intersect_p (from, FP_REGS))
3426 return 3;
3427 }
3428
3429 /* Otherwise copying is done via memory. */
3430 if ((reg_classes_intersect_p (from, GENERAL_REGS)
3431 && reg_classes_intersect_p (to, FP_REGS))
3432 || (reg_classes_intersect_p (from, FP_REGS)
3433 && reg_classes_intersect_p (to, GENERAL_REGS)))
3434 return 10;
3435
3436 return 1;
3437 }
3438
3439 /* Implement TARGET_MEMORY_MOVE_COST. */
3440
3441 static int
s390_memory_move_cost(machine_mode mode ATTRIBUTE_UNUSED,reg_class_t rclass ATTRIBUTE_UNUSED,bool in ATTRIBUTE_UNUSED)3442 s390_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
3443 reg_class_t rclass ATTRIBUTE_UNUSED,
3444 bool in ATTRIBUTE_UNUSED)
3445 {
3446 return 2;
3447 }
3448
3449 /* Compute a (partial) cost for rtx X. Return true if the complete
3450 cost has been computed, and false if subexpressions should be
3451 scanned. In either case, *TOTAL contains the cost result. The
3452 initial value of *TOTAL is the default value computed by
3453 rtx_cost. It may be left unmodified. OUTER_CODE contains the
3454 code of the superexpression of x. */
3455
3456 static bool
s390_rtx_costs(rtx x,machine_mode mode,int outer_code,int opno ATTRIBUTE_UNUSED,int * total,bool speed ATTRIBUTE_UNUSED)3457 s390_rtx_costs (rtx x, machine_mode mode, int outer_code,
3458 int opno ATTRIBUTE_UNUSED,
3459 int *total, bool speed ATTRIBUTE_UNUSED)
3460 {
3461 int code = GET_CODE (x);
3462 switch (code)
3463 {
3464 case CONST:
3465 case CONST_INT:
3466 case LABEL_REF:
3467 case SYMBOL_REF:
3468 case CONST_DOUBLE:
3469 case CONST_WIDE_INT:
3470 case MEM:
3471 *total = 0;
3472 return true;
3473
3474 case SET:
3475 {
3476 /* Without this a conditional move instruction would be
3477 accounted as 3 * COSTS_N_INSNS (set, if_then_else,
3478 comparison operator). That's a bit pessimistic. */
3479
3480 if (!TARGET_Z196 || GET_CODE (SET_SRC (x)) != IF_THEN_ELSE)
3481 return false;
3482
3483 rtx cond = XEXP (SET_SRC (x), 0);
3484
3485 if (!CC_REG_P (XEXP (cond, 0)) || !CONST_INT_P (XEXP (cond, 1)))
3486 return false;
3487
3488 /* It is going to be a load/store on condition. Make it
3489 slightly more expensive than a normal load. */
3490 *total = COSTS_N_INSNS (1) + 1;
3491
3492 rtx dst = SET_DEST (x);
3493 rtx then = XEXP (SET_SRC (x), 1);
3494 rtx els = XEXP (SET_SRC (x), 2);
3495
3496 /* It is a real IF-THEN-ELSE. An additional move will be
3497 needed to implement that. */
3498 if (reload_completed
3499 && !rtx_equal_p (dst, then)
3500 && !rtx_equal_p (dst, els))
3501 *total += COSTS_N_INSNS (1) / 2;
3502
3503 /* A minor penalty for constants we cannot directly handle. */
3504 if ((CONST_INT_P (then) || CONST_INT_P (els))
3505 && (!TARGET_Z13 || MEM_P (dst)
3506 || (CONST_INT_P (then) && !satisfies_constraint_K (then))
3507 || (CONST_INT_P (els) && !satisfies_constraint_K (els))))
3508 *total += COSTS_N_INSNS (1) / 2;
3509
3510 /* A store on condition can only handle register src operands. */
3511 if (MEM_P (dst) && (!REG_P (then) || !REG_P (els)))
3512 *total += COSTS_N_INSNS (1) / 2;
3513
3514 return true;
3515 }
3516 case IOR:
3517 /* risbg */
3518 if (GET_CODE (XEXP (x, 0)) == AND
3519 && GET_CODE (XEXP (x, 1)) == ASHIFT
3520 && REG_P (XEXP (XEXP (x, 0), 0))
3521 && REG_P (XEXP (XEXP (x, 1), 0))
3522 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3523 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3524 && (UINTVAL (XEXP (XEXP (x, 0), 1)) ==
3525 (HOST_WIDE_INT_1U << UINTVAL (XEXP (XEXP (x, 1), 1))) - 1))
3526 {
3527 *total = COSTS_N_INSNS (2);
3528 return true;
3529 }
3530
3531 /* ~AND on a 128 bit mode. This can be done using a vector
3532 instruction. */
3533 if (TARGET_VXE
3534 && GET_CODE (XEXP (x, 0)) == NOT
3535 && GET_CODE (XEXP (x, 1)) == NOT
3536 && REG_P (XEXP (XEXP (x, 0), 0))
3537 && REG_P (XEXP (XEXP (x, 1), 0))
3538 && GET_MODE_SIZE (GET_MODE (XEXP (XEXP (x, 0), 0))) == 16
3539 && s390_hard_regno_mode_ok (VR0_REGNUM,
3540 GET_MODE (XEXP (XEXP (x, 0), 0))))
3541 {
3542 *total = COSTS_N_INSNS (1);
3543 return true;
3544 }
3545 /* fallthrough */
3546 case ASHIFT:
3547 case ASHIFTRT:
3548 case LSHIFTRT:
3549 case ROTATE:
3550 case ROTATERT:
3551 case AND:
3552 case XOR:
3553 case NEG:
3554 case NOT:
3555 *total = COSTS_N_INSNS (1);
3556 return false;
3557
3558 case PLUS:
3559 case MINUS:
3560 *total = COSTS_N_INSNS (1);
3561 return false;
3562
3563 case MULT:
3564 switch (mode)
3565 {
3566 case E_SImode:
3567 {
3568 rtx left = XEXP (x, 0);
3569 rtx right = XEXP (x, 1);
3570 if (GET_CODE (right) == CONST_INT
3571 && CONST_OK_FOR_K (INTVAL (right)))
3572 *total = s390_cost->mhi;
3573 else if (GET_CODE (left) == SIGN_EXTEND)
3574 *total = s390_cost->mh;
3575 else
3576 *total = s390_cost->ms; /* msr, ms, msy */
3577 break;
3578 }
3579 case E_DImode:
3580 {
3581 rtx left = XEXP (x, 0);
3582 rtx right = XEXP (x, 1);
3583 if (TARGET_ZARCH)
3584 {
3585 if (GET_CODE (right) == CONST_INT
3586 && CONST_OK_FOR_K (INTVAL (right)))
3587 *total = s390_cost->mghi;
3588 else if (GET_CODE (left) == SIGN_EXTEND)
3589 *total = s390_cost->msgf;
3590 else
3591 *total = s390_cost->msg; /* msgr, msg */
3592 }
3593 else /* TARGET_31BIT */
3594 {
3595 if (GET_CODE (left) == SIGN_EXTEND
3596 && GET_CODE (right) == SIGN_EXTEND)
3597 /* mulsidi case: mr, m */
3598 *total = s390_cost->m;
3599 else if (GET_CODE (left) == ZERO_EXTEND
3600 && GET_CODE (right) == ZERO_EXTEND
3601 && TARGET_CPU_ZARCH)
3602 /* umulsidi case: ml, mlr */
3603 *total = s390_cost->ml;
3604 else
3605 /* Complex calculation is required. */
3606 *total = COSTS_N_INSNS (40);
3607 }
3608 break;
3609 }
3610 case E_SFmode:
3611 case E_DFmode:
3612 *total = s390_cost->mult_df;
3613 break;
3614 case E_TFmode:
3615 *total = s390_cost->mxbr;
3616 break;
3617 default:
3618 return false;
3619 }
3620 return false;
3621
3622 case FMA:
3623 switch (mode)
3624 {
3625 case E_DFmode:
3626 *total = s390_cost->madbr;
3627 break;
3628 case E_SFmode:
3629 *total = s390_cost->maebr;
3630 break;
3631 default:
3632 return false;
3633 }
3634 /* Negate in the third argument is free: FMSUB. */
3635 if (GET_CODE (XEXP (x, 2)) == NEG)
3636 {
3637 *total += (rtx_cost (XEXP (x, 0), mode, FMA, 0, speed)
3638 + rtx_cost (XEXP (x, 1), mode, FMA, 1, speed)
3639 + rtx_cost (XEXP (XEXP (x, 2), 0), mode, FMA, 2, speed));
3640 return true;
3641 }
3642 return false;
3643
3644 case UDIV:
3645 case UMOD:
3646 if (mode == TImode) /* 128 bit division */
3647 *total = s390_cost->dlgr;
3648 else if (mode == DImode)
3649 {
3650 rtx right = XEXP (x, 1);
3651 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3652 *total = s390_cost->dlr;
3653 else /* 64 by 64 bit division */
3654 *total = s390_cost->dlgr;
3655 }
3656 else if (mode == SImode) /* 32 bit division */
3657 *total = s390_cost->dlr;
3658 return false;
3659
3660 case DIV:
3661 case MOD:
3662 if (mode == DImode)
3663 {
3664 rtx right = XEXP (x, 1);
3665 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3666 if (TARGET_ZARCH)
3667 *total = s390_cost->dsgfr;
3668 else
3669 *total = s390_cost->dr;
3670 else /* 64 by 64 bit division */
3671 *total = s390_cost->dsgr;
3672 }
3673 else if (mode == SImode) /* 32 bit division */
3674 *total = s390_cost->dlr;
3675 else if (mode == SFmode)
3676 {
3677 *total = s390_cost->debr;
3678 }
3679 else if (mode == DFmode)
3680 {
3681 *total = s390_cost->ddbr;
3682 }
3683 else if (mode == TFmode)
3684 {
3685 *total = s390_cost->dxbr;
3686 }
3687 return false;
3688
3689 case SQRT:
3690 if (mode == SFmode)
3691 *total = s390_cost->sqebr;
3692 else if (mode == DFmode)
3693 *total = s390_cost->sqdbr;
3694 else /* TFmode */
3695 *total = s390_cost->sqxbr;
3696 return false;
3697
3698 case SIGN_EXTEND:
3699 case ZERO_EXTEND:
3700 if (outer_code == MULT || outer_code == DIV || outer_code == MOD
3701 || outer_code == PLUS || outer_code == MINUS
3702 || outer_code == COMPARE)
3703 *total = 0;
3704 return false;
3705
3706 case COMPARE:
3707 *total = COSTS_N_INSNS (1);
3708 if (GET_CODE (XEXP (x, 0)) == AND
3709 && GET_CODE (XEXP (x, 1)) == CONST_INT
3710 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
3711 {
3712 rtx op0 = XEXP (XEXP (x, 0), 0);
3713 rtx op1 = XEXP (XEXP (x, 0), 1);
3714 rtx op2 = XEXP (x, 1);
3715
3716 if (memory_operand (op0, GET_MODE (op0))
3717 && s390_tm_ccmode (op1, op2, 0) != VOIDmode)
3718 return true;
3719 if (register_operand (op0, GET_MODE (op0))
3720 && s390_tm_ccmode (op1, op2, 1) != VOIDmode)
3721 return true;
3722 }
3723 return false;
3724
3725 default:
3726 return false;
3727 }
3728 }
3729
3730 /* Return the cost of an address rtx ADDR. */
3731
3732 static int
s390_address_cost(rtx addr,machine_mode mode ATTRIBUTE_UNUSED,addr_space_t as ATTRIBUTE_UNUSED,bool speed ATTRIBUTE_UNUSED)3733 s390_address_cost (rtx addr, machine_mode mode ATTRIBUTE_UNUSED,
3734 addr_space_t as ATTRIBUTE_UNUSED,
3735 bool speed ATTRIBUTE_UNUSED)
3736 {
3737 struct s390_address ad;
3738 if (!s390_decompose_address (addr, &ad))
3739 return 1000;
3740
3741 return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
3742 }
3743
3744 /* Implement targetm.vectorize.builtin_vectorization_cost. */
3745 static int
s390_builtin_vectorization_cost(enum vect_cost_for_stmt type_of_cost,tree vectype,int misalign ATTRIBUTE_UNUSED)3746 s390_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
3747 tree vectype,
3748 int misalign ATTRIBUTE_UNUSED)
3749 {
3750 switch (type_of_cost)
3751 {
3752 case scalar_stmt:
3753 case scalar_load:
3754 case scalar_store:
3755 case vector_stmt:
3756 case vector_load:
3757 case vector_store:
3758 case vector_gather_load:
3759 case vector_scatter_store:
3760 case vec_to_scalar:
3761 case scalar_to_vec:
3762 case cond_branch_not_taken:
3763 case vec_perm:
3764 case vec_promote_demote:
3765 case unaligned_load:
3766 case unaligned_store:
3767 return 1;
3768
3769 case cond_branch_taken:
3770 return 3;
3771
3772 case vec_construct:
3773 return TYPE_VECTOR_SUBPARTS (vectype) - 1;
3774
3775 default:
3776 gcc_unreachable ();
3777 }
3778 }
3779
3780 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
3781 otherwise return 0. */
3782
3783 int
tls_symbolic_operand(rtx op)3784 tls_symbolic_operand (rtx op)
3785 {
3786 if (GET_CODE (op) != SYMBOL_REF)
3787 return 0;
3788 return SYMBOL_REF_TLS_MODEL (op);
3789 }
3790
3791 /* Split DImode access register reference REG (on 64-bit) into its constituent
3792 low and high parts, and store them into LO and HI. Note that gen_lowpart/
3793 gen_highpart cannot be used as they assume all registers are word-sized,
3794 while our access registers have only half that size. */
3795
3796 void
s390_split_access_reg(rtx reg,rtx * lo,rtx * hi)3797 s390_split_access_reg (rtx reg, rtx *lo, rtx *hi)
3798 {
3799 gcc_assert (TARGET_64BIT);
3800 gcc_assert (ACCESS_REG_P (reg));
3801 gcc_assert (GET_MODE (reg) == DImode);
3802 gcc_assert (!(REGNO (reg) & 1));
3803
3804 *lo = gen_rtx_REG (SImode, REGNO (reg) + 1);
3805 *hi = gen_rtx_REG (SImode, REGNO (reg));
3806 }
3807
3808 /* Return true if OP contains a symbol reference */
3809
3810 bool
symbolic_reference_mentioned_p(rtx op)3811 symbolic_reference_mentioned_p (rtx op)
3812 {
3813 const char *fmt;
3814 int i;
3815
3816 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3817 return 1;
3818
3819 fmt = GET_RTX_FORMAT (GET_CODE (op));
3820 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3821 {
3822 if (fmt[i] == 'E')
3823 {
3824 int j;
3825
3826 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3827 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3828 return 1;
3829 }
3830
3831 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3832 return 1;
3833 }
3834
3835 return 0;
3836 }
3837
3838 /* Return true if OP contains a reference to a thread-local symbol. */
3839
3840 bool
tls_symbolic_reference_mentioned_p(rtx op)3841 tls_symbolic_reference_mentioned_p (rtx op)
3842 {
3843 const char *fmt;
3844 int i;
3845
3846 if (GET_CODE (op) == SYMBOL_REF)
3847 return tls_symbolic_operand (op);
3848
3849 fmt = GET_RTX_FORMAT (GET_CODE (op));
3850 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3851 {
3852 if (fmt[i] == 'E')
3853 {
3854 int j;
3855
3856 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3857 if (tls_symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3858 return true;
3859 }
3860
3861 else if (fmt[i] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op, i)))
3862 return true;
3863 }
3864
3865 return false;
3866 }
3867
3868
3869 /* Return true if OP is a legitimate general operand when
3870 generating PIC code. It is given that flag_pic is on
3871 and that OP satisfies CONSTANT_P. */
3872
3873 int
legitimate_pic_operand_p(rtx op)3874 legitimate_pic_operand_p (rtx op)
3875 {
3876 /* Accept all non-symbolic constants. */
3877 if (!SYMBOLIC_CONST (op))
3878 return 1;
3879
3880 /* Reject everything else; must be handled
3881 via emit_symbolic_move. */
3882 return 0;
3883 }
3884
3885 /* Returns true if the constant value OP is a legitimate general operand.
3886 It is given that OP satisfies CONSTANT_P. */
3887
3888 static bool
s390_legitimate_constant_p(machine_mode mode,rtx op)3889 s390_legitimate_constant_p (machine_mode mode, rtx op)
3890 {
3891 if (TARGET_VX && VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR)
3892 {
3893 if (GET_MODE_SIZE (mode) != 16)
3894 return 0;
3895
3896 if (!satisfies_constraint_j00 (op)
3897 && !satisfies_constraint_jm1 (op)
3898 && !satisfies_constraint_jKK (op)
3899 && !satisfies_constraint_jxx (op)
3900 && !satisfies_constraint_jyy (op))
3901 return 0;
3902 }
3903
3904 /* Accept all non-symbolic constants. */
3905 if (!SYMBOLIC_CONST (op))
3906 return 1;
3907
3908 /* Accept immediate LARL operands. */
3909 if (TARGET_CPU_ZARCH && larl_operand (op, mode))
3910 return 1;
3911
3912 /* Thread-local symbols are never legal constants. This is
3913 so that emit_call knows that computing such addresses
3914 might require a function call. */
3915 if (TLS_SYMBOLIC_CONST (op))
3916 return 0;
3917
3918 /* In the PIC case, symbolic constants must *not* be
3919 forced into the literal pool. We accept them here,
3920 so that they will be handled by emit_symbolic_move. */
3921 if (flag_pic)
3922 return 1;
3923
3924 /* All remaining non-PIC symbolic constants are
3925 forced into the literal pool. */
3926 return 0;
3927 }
3928
3929 /* Determine if it's legal to put X into the constant pool. This
3930 is not possible if X contains the address of a symbol that is
3931 not constant (TLS) or not known at final link time (PIC). */
3932
3933 static bool
s390_cannot_force_const_mem(machine_mode mode,rtx x)3934 s390_cannot_force_const_mem (machine_mode mode, rtx x)
3935 {
3936 switch (GET_CODE (x))
3937 {
3938 case CONST_INT:
3939 case CONST_DOUBLE:
3940 case CONST_WIDE_INT:
3941 case CONST_VECTOR:
3942 /* Accept all non-symbolic constants. */
3943 return false;
3944
3945 case LABEL_REF:
3946 /* Labels are OK iff we are non-PIC. */
3947 return flag_pic != 0;
3948
3949 case SYMBOL_REF:
3950 /* 'Naked' TLS symbol references are never OK,
3951 non-TLS symbols are OK iff we are non-PIC. */
3952 if (tls_symbolic_operand (x))
3953 return true;
3954 else
3955 return flag_pic != 0;
3956
3957 case CONST:
3958 return s390_cannot_force_const_mem (mode, XEXP (x, 0));
3959 case PLUS:
3960 case MINUS:
3961 return s390_cannot_force_const_mem (mode, XEXP (x, 0))
3962 || s390_cannot_force_const_mem (mode, XEXP (x, 1));
3963
3964 case UNSPEC:
3965 switch (XINT (x, 1))
3966 {
3967 /* Only lt-relative or GOT-relative UNSPECs are OK. */
3968 case UNSPEC_LTREL_OFFSET:
3969 case UNSPEC_GOT:
3970 case UNSPEC_GOTOFF:
3971 case UNSPEC_PLTOFF:
3972 case UNSPEC_TLSGD:
3973 case UNSPEC_TLSLDM:
3974 case UNSPEC_NTPOFF:
3975 case UNSPEC_DTPOFF:
3976 case UNSPEC_GOTNTPOFF:
3977 case UNSPEC_INDNTPOFF:
3978 return false;
3979
3980 /* If the literal pool shares the code section, be put
3981 execute template placeholders into the pool as well. */
3982 case UNSPEC_INSN:
3983 return TARGET_CPU_ZARCH;
3984
3985 default:
3986 return true;
3987 }
3988 break;
3989
3990 default:
3991 gcc_unreachable ();
3992 }
3993 }
3994
3995 /* Returns true if the constant value OP is a legitimate general
3996 operand during and after reload. The difference to
3997 legitimate_constant_p is that this function will not accept
3998 a constant that would need to be forced to the literal pool
3999 before it can be used as operand.
4000 This function accepts all constants which can be loaded directly
4001 into a GPR. */
4002
4003 bool
legitimate_reload_constant_p(rtx op)4004 legitimate_reload_constant_p (rtx op)
4005 {
4006 /* Accept la(y) operands. */
4007 if (GET_CODE (op) == CONST_INT
4008 && DISP_IN_RANGE (INTVAL (op)))
4009 return true;
4010
4011 /* Accept l(g)hi/l(g)fi operands. */
4012 if (GET_CODE (op) == CONST_INT
4013 && (CONST_OK_FOR_K (INTVAL (op)) || CONST_OK_FOR_Os (INTVAL (op))))
4014 return true;
4015
4016 /* Accept lliXX operands. */
4017 if (TARGET_ZARCH
4018 && GET_CODE (op) == CONST_INT
4019 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
4020 && s390_single_part (op, word_mode, HImode, 0) >= 0)
4021 return true;
4022
4023 if (TARGET_EXTIMM
4024 && GET_CODE (op) == CONST_INT
4025 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
4026 && s390_single_part (op, word_mode, SImode, 0) >= 0)
4027 return true;
4028
4029 /* Accept larl operands. */
4030 if (TARGET_CPU_ZARCH
4031 && larl_operand (op, VOIDmode))
4032 return true;
4033
4034 /* Accept floating-point zero operands that fit into a single GPR. */
4035 if (GET_CODE (op) == CONST_DOUBLE
4036 && s390_float_const_zero_p (op)
4037 && GET_MODE_SIZE (GET_MODE (op)) <= UNITS_PER_WORD)
4038 return true;
4039
4040 /* Accept double-word operands that can be split. */
4041 if (GET_CODE (op) == CONST_WIDE_INT
4042 || (GET_CODE (op) == CONST_INT
4043 && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op)))
4044 {
4045 machine_mode dword_mode = word_mode == SImode ? DImode : TImode;
4046 rtx hi = operand_subword (op, 0, 0, dword_mode);
4047 rtx lo = operand_subword (op, 1, 0, dword_mode);
4048 return legitimate_reload_constant_p (hi)
4049 && legitimate_reload_constant_p (lo);
4050 }
4051
4052 /* Everything else cannot be handled without reload. */
4053 return false;
4054 }
4055
4056 /* Returns true if the constant value OP is a legitimate fp operand
4057 during and after reload.
4058 This function accepts all constants which can be loaded directly
4059 into an FPR. */
4060
4061 static bool
legitimate_reload_fp_constant_p(rtx op)4062 legitimate_reload_fp_constant_p (rtx op)
4063 {
4064 /* Accept floating-point zero operands if the load zero instruction
4065 can be used. Prior to z196 the load fp zero instruction caused a
4066 performance penalty if the result is used as BFP number. */
4067 if (TARGET_Z196
4068 && GET_CODE (op) == CONST_DOUBLE
4069 && s390_float_const_zero_p (op))
4070 return true;
4071
4072 return false;
4073 }
4074
4075 /* Returns true if the constant value OP is a legitimate vector operand
4076 during and after reload.
4077 This function accepts all constants which can be loaded directly
4078 into an VR. */
4079
4080 static bool
legitimate_reload_vector_constant_p(rtx op)4081 legitimate_reload_vector_constant_p (rtx op)
4082 {
4083 if (TARGET_VX && GET_MODE_SIZE (GET_MODE (op)) == 16
4084 && (satisfies_constraint_j00 (op)
4085 || satisfies_constraint_jm1 (op)
4086 || satisfies_constraint_jKK (op)
4087 || satisfies_constraint_jxx (op)
4088 || satisfies_constraint_jyy (op)))
4089 return true;
4090
4091 return false;
4092 }
4093
4094 /* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
4095 return the class of reg to actually use. */
4096
4097 static reg_class_t
s390_preferred_reload_class(rtx op,reg_class_t rclass)4098 s390_preferred_reload_class (rtx op, reg_class_t rclass)
4099 {
4100 switch (GET_CODE (op))
4101 {
4102 /* Constants we cannot reload into general registers
4103 must be forced into the literal pool. */
4104 case CONST_VECTOR:
4105 case CONST_DOUBLE:
4106 case CONST_INT:
4107 case CONST_WIDE_INT:
4108 if (reg_class_subset_p (GENERAL_REGS, rclass)
4109 && legitimate_reload_constant_p (op))
4110 return GENERAL_REGS;
4111 else if (reg_class_subset_p (ADDR_REGS, rclass)
4112 && legitimate_reload_constant_p (op))
4113 return ADDR_REGS;
4114 else if (reg_class_subset_p (FP_REGS, rclass)
4115 && legitimate_reload_fp_constant_p (op))
4116 return FP_REGS;
4117 else if (reg_class_subset_p (VEC_REGS, rclass)
4118 && legitimate_reload_vector_constant_p (op))
4119 return VEC_REGS;
4120
4121 return NO_REGS;
4122
4123 /* If a symbolic constant or a PLUS is reloaded,
4124 it is most likely being used as an address, so
4125 prefer ADDR_REGS. If 'class' is not a superset
4126 of ADDR_REGS, e.g. FP_REGS, reject this reload. */
4127 case CONST:
4128 /* Symrefs cannot be pushed into the literal pool with -fPIC
4129 so we *MUST NOT* return NO_REGS for these cases
4130 (s390_cannot_force_const_mem will return true).
4131
4132 On the other hand we MUST return NO_REGS for symrefs with
4133 invalid addend which might have been pushed to the literal
4134 pool (no -fPIC). Usually we would expect them to be
4135 handled via secondary reload but this does not happen if
4136 they are used as literal pool slot replacement in reload
4137 inheritance (see emit_input_reload_insns). */
4138 if (TARGET_CPU_ZARCH
4139 && GET_CODE (XEXP (op, 0)) == PLUS
4140 && GET_CODE (XEXP (XEXP(op, 0), 0)) == SYMBOL_REF
4141 && GET_CODE (XEXP (XEXP(op, 0), 1)) == CONST_INT)
4142 {
4143 if (flag_pic && reg_class_subset_p (ADDR_REGS, rclass))
4144 return ADDR_REGS;
4145 else
4146 return NO_REGS;
4147 }
4148 /* fallthrough */
4149 case LABEL_REF:
4150 case SYMBOL_REF:
4151 if (!legitimate_reload_constant_p (op))
4152 return NO_REGS;
4153 /* fallthrough */
4154 case PLUS:
4155 /* load address will be used. */
4156 if (reg_class_subset_p (ADDR_REGS, rclass))
4157 return ADDR_REGS;
4158 else
4159 return NO_REGS;
4160
4161 default:
4162 break;
4163 }
4164
4165 return rclass;
4166 }
4167
4168 /* Return true if ADDR is SYMBOL_REF + addend with addend being a
4169 multiple of ALIGNMENT and the SYMBOL_REF being naturally
4170 aligned. */
4171
4172 bool
s390_check_symref_alignment(rtx addr,HOST_WIDE_INT alignment)4173 s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment)
4174 {
4175 HOST_WIDE_INT addend;
4176 rtx symref;
4177
4178 /* The "required alignment" might be 0 (e.g. for certain structs
4179 accessed via BLKmode). Early abort in this case, as well as when
4180 an alignment > 8 is required. */
4181 if (alignment < 2 || alignment > 8)
4182 return false;
4183
4184 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4185 return false;
4186
4187 if (addend & (alignment - 1))
4188 return false;
4189
4190 if (GET_CODE (symref) == SYMBOL_REF)
4191 {
4192 /* We have load-relative instructions for 2-byte, 4-byte, and
4193 8-byte alignment so allow only these. */
4194 switch (alignment)
4195 {
4196 case 8: return !SYMBOL_FLAG_NOTALIGN8_P (symref);
4197 case 4: return !SYMBOL_FLAG_NOTALIGN4_P (symref);
4198 case 2: return !SYMBOL_FLAG_NOTALIGN2_P (symref);
4199 default: return false;
4200 }
4201 }
4202
4203 if (GET_CODE (symref) == UNSPEC
4204 && alignment <= UNITS_PER_LONG)
4205 return true;
4206
4207 return false;
4208 }
4209
4210 /* ADDR is moved into REG using larl. If ADDR isn't a valid larl
4211 operand SCRATCH is used to reload the even part of the address and
4212 adding one. */
4213
4214 void
s390_reload_larl_operand(rtx reg,rtx addr,rtx scratch)4215 s390_reload_larl_operand (rtx reg, rtx addr, rtx scratch)
4216 {
4217 HOST_WIDE_INT addend;
4218 rtx symref;
4219
4220 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4221 gcc_unreachable ();
4222
4223 if (!(addend & 1))
4224 /* Easy case. The addend is even so larl will do fine. */
4225 emit_move_insn (reg, addr);
4226 else
4227 {
4228 /* We can leave the scratch register untouched if the target
4229 register is a valid base register. */
4230 if (REGNO (reg) < FIRST_PSEUDO_REGISTER
4231 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS)
4232 scratch = reg;
4233
4234 gcc_assert (REGNO (scratch) < FIRST_PSEUDO_REGISTER);
4235 gcc_assert (REGNO_REG_CLASS (REGNO (scratch)) == ADDR_REGS);
4236
4237 if (addend != 1)
4238 emit_move_insn (scratch,
4239 gen_rtx_CONST (Pmode,
4240 gen_rtx_PLUS (Pmode, symref,
4241 GEN_INT (addend - 1))));
4242 else
4243 emit_move_insn (scratch, symref);
4244
4245 /* Increment the address using la in order to avoid clobbering cc. */
4246 s390_load_address (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx));
4247 }
4248 }
4249
4250 /* Generate what is necessary to move between REG and MEM using
4251 SCRATCH. The direction is given by TOMEM. */
4252
4253 void
s390_reload_symref_address(rtx reg,rtx mem,rtx scratch,bool tomem)4254 s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem)
4255 {
4256 /* Reload might have pulled a constant out of the literal pool.
4257 Force it back in. */
4258 if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
4259 || GET_CODE (mem) == CONST_WIDE_INT
4260 || GET_CODE (mem) == CONST_VECTOR
4261 || GET_CODE (mem) == CONST)
4262 mem = force_const_mem (GET_MODE (reg), mem);
4263
4264 gcc_assert (MEM_P (mem));
4265
4266 /* For a load from memory we can leave the scratch register
4267 untouched if the target register is a valid base register. */
4268 if (!tomem
4269 && REGNO (reg) < FIRST_PSEUDO_REGISTER
4270 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS
4271 && GET_MODE (reg) == GET_MODE (scratch))
4272 scratch = reg;
4273
4274 /* Load address into scratch register. Since we can't have a
4275 secondary reload for a secondary reload we have to cover the case
4276 where larl would need a secondary reload here as well. */
4277 s390_reload_larl_operand (scratch, XEXP (mem, 0), scratch);
4278
4279 /* Now we can use a standard load/store to do the move. */
4280 if (tomem)
4281 emit_move_insn (replace_equiv_address (mem, scratch), reg);
4282 else
4283 emit_move_insn (reg, replace_equiv_address (mem, scratch));
4284 }
4285
4286 /* Inform reload about cases where moving X with a mode MODE to a register in
4287 RCLASS requires an extra scratch or immediate register. Return the class
4288 needed for the immediate register. */
4289
4290 static reg_class_t
s390_secondary_reload(bool in_p,rtx x,reg_class_t rclass_i,machine_mode mode,secondary_reload_info * sri)4291 s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
4292 machine_mode mode, secondary_reload_info *sri)
4293 {
4294 enum reg_class rclass = (enum reg_class) rclass_i;
4295
4296 /* Intermediate register needed. */
4297 if (reg_classes_intersect_p (CC_REGS, rclass))
4298 return GENERAL_REGS;
4299
4300 if (TARGET_VX)
4301 {
4302 /* The vst/vl vector move instructions allow only for short
4303 displacements. */
4304 if (MEM_P (x)
4305 && GET_CODE (XEXP (x, 0)) == PLUS
4306 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4307 && !SHORT_DISP_IN_RANGE(INTVAL (XEXP (XEXP (x, 0), 1)))
4308 && reg_class_subset_p (rclass, VEC_REGS)
4309 && (!reg_class_subset_p (rclass, FP_REGS)
4310 || (GET_MODE_SIZE (mode) > 8
4311 && s390_class_max_nregs (FP_REGS, mode) == 1)))
4312 {
4313 if (in_p)
4314 sri->icode = (TARGET_64BIT ?
4315 CODE_FOR_reloaddi_la_in :
4316 CODE_FOR_reloadsi_la_in);
4317 else
4318 sri->icode = (TARGET_64BIT ?
4319 CODE_FOR_reloaddi_la_out :
4320 CODE_FOR_reloadsi_la_out);
4321 }
4322 }
4323
4324 if (TARGET_Z10)
4325 {
4326 HOST_WIDE_INT offset;
4327 rtx symref;
4328
4329 /* On z10 several optimizer steps may generate larl operands with
4330 an odd addend. */
4331 if (in_p
4332 && s390_loadrelative_operand_p (x, &symref, &offset)
4333 && mode == Pmode
4334 && !SYMBOL_FLAG_NOTALIGN2_P (symref)
4335 && (offset & 1) == 1)
4336 sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10
4337 : CODE_FOR_reloadsi_larl_odd_addend_z10);
4338
4339 /* Handle all the (mem (symref)) accesses we cannot use the z10
4340 instructions for. */
4341 if (MEM_P (x)
4342 && s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL)
4343 && (mode == QImode
4344 || !reg_class_subset_p (rclass, GENERAL_REGS)
4345 || GET_MODE_SIZE (mode) > UNITS_PER_WORD
4346 || !s390_check_symref_alignment (XEXP (x, 0),
4347 GET_MODE_SIZE (mode))))
4348 {
4349 #define __SECONDARY_RELOAD_CASE(M,m) \
4350 case E_##M##mode: \
4351 if (TARGET_64BIT) \
4352 sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 : \
4353 CODE_FOR_reload##m##di_tomem_z10; \
4354 else \
4355 sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 : \
4356 CODE_FOR_reload##m##si_tomem_z10; \
4357 break;
4358
4359 switch (GET_MODE (x))
4360 {
4361 __SECONDARY_RELOAD_CASE (QI, qi);
4362 __SECONDARY_RELOAD_CASE (HI, hi);
4363 __SECONDARY_RELOAD_CASE (SI, si);
4364 __SECONDARY_RELOAD_CASE (DI, di);
4365 __SECONDARY_RELOAD_CASE (TI, ti);
4366 __SECONDARY_RELOAD_CASE (SF, sf);
4367 __SECONDARY_RELOAD_CASE (DF, df);
4368 __SECONDARY_RELOAD_CASE (TF, tf);
4369 __SECONDARY_RELOAD_CASE (SD, sd);
4370 __SECONDARY_RELOAD_CASE (DD, dd);
4371 __SECONDARY_RELOAD_CASE (TD, td);
4372 __SECONDARY_RELOAD_CASE (V1QI, v1qi);
4373 __SECONDARY_RELOAD_CASE (V2QI, v2qi);
4374 __SECONDARY_RELOAD_CASE (V4QI, v4qi);
4375 __SECONDARY_RELOAD_CASE (V8QI, v8qi);
4376 __SECONDARY_RELOAD_CASE (V16QI, v16qi);
4377 __SECONDARY_RELOAD_CASE (V1HI, v1hi);
4378 __SECONDARY_RELOAD_CASE (V2HI, v2hi);
4379 __SECONDARY_RELOAD_CASE (V4HI, v4hi);
4380 __SECONDARY_RELOAD_CASE (V8HI, v8hi);
4381 __SECONDARY_RELOAD_CASE (V1SI, v1si);
4382 __SECONDARY_RELOAD_CASE (V2SI, v2si);
4383 __SECONDARY_RELOAD_CASE (V4SI, v4si);
4384 __SECONDARY_RELOAD_CASE (V1DI, v1di);
4385 __SECONDARY_RELOAD_CASE (V2DI, v2di);
4386 __SECONDARY_RELOAD_CASE (V1TI, v1ti);
4387 __SECONDARY_RELOAD_CASE (V1SF, v1sf);
4388 __SECONDARY_RELOAD_CASE (V2SF, v2sf);
4389 __SECONDARY_RELOAD_CASE (V4SF, v4sf);
4390 __SECONDARY_RELOAD_CASE (V1DF, v1df);
4391 __SECONDARY_RELOAD_CASE (V2DF, v2df);
4392 __SECONDARY_RELOAD_CASE (V1TF, v1tf);
4393 default:
4394 gcc_unreachable ();
4395 }
4396 #undef __SECONDARY_RELOAD_CASE
4397 }
4398 }
4399
4400 /* We need a scratch register when loading a PLUS expression which
4401 is not a legitimate operand of the LOAD ADDRESS instruction. */
4402 /* LRA can deal with transformation of plus op very well -- so we
4403 don't need to prompt LRA in this case. */
4404 if (! lra_in_progress && in_p && s390_plus_operand (x, mode))
4405 sri->icode = (TARGET_64BIT ?
4406 CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
4407
4408 /* Performing a multiword move from or to memory we have to make sure the
4409 second chunk in memory is addressable without causing a displacement
4410 overflow. If that would be the case we calculate the address in
4411 a scratch register. */
4412 if (MEM_P (x)
4413 && GET_CODE (XEXP (x, 0)) == PLUS
4414 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4415 && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1))
4416 + GET_MODE_SIZE (mode) - 1))
4417 {
4418 /* For GENERAL_REGS a displacement overflow is no problem if occurring
4419 in a s_operand address since we may fallback to lm/stm. So we only
4420 have to care about overflows in the b+i+d case. */
4421 if ((reg_classes_intersect_p (GENERAL_REGS, rclass)
4422 && s390_class_max_nregs (GENERAL_REGS, mode) > 1
4423 && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
4424 /* For FP_REGS no lm/stm is available so this check is triggered
4425 for displacement overflows in b+i+d and b+d like addresses. */
4426 || (reg_classes_intersect_p (FP_REGS, rclass)
4427 && s390_class_max_nregs (FP_REGS, mode) > 1))
4428 {
4429 if (in_p)
4430 sri->icode = (TARGET_64BIT ?
4431 CODE_FOR_reloaddi_la_in :
4432 CODE_FOR_reloadsi_la_in);
4433 else
4434 sri->icode = (TARGET_64BIT ?
4435 CODE_FOR_reloaddi_la_out :
4436 CODE_FOR_reloadsi_la_out);
4437 }
4438 }
4439
4440 /* A scratch address register is needed when a symbolic constant is
4441 copied to r0 compiling with -fPIC. In other cases the target
4442 register might be used as temporary (see legitimize_pic_address). */
4443 if (in_p && SYMBOLIC_CONST (x) && flag_pic == 2 && rclass != ADDR_REGS)
4444 sri->icode = (TARGET_64BIT ?
4445 CODE_FOR_reloaddi_PIC_addr :
4446 CODE_FOR_reloadsi_PIC_addr);
4447
4448 /* Either scratch or no register needed. */
4449 return NO_REGS;
4450 }
4451
4452 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
4453
4454 We need secondary memory to move data between GPRs and FPRs.
4455
4456 - With DFP the ldgr lgdr instructions are available. Due to the
4457 different alignment we cannot use them for SFmode. For 31 bit a
4458 64 bit value in GPR would be a register pair so here we still
4459 need to go via memory.
4460
4461 - With z13 we can do the SF/SImode moves with vlgvf. Due to the
4462 overlapping of FPRs and VRs we still disallow TF/TD modes to be
4463 in full VRs so as before also on z13 we do these moves via
4464 memory.
4465
4466 FIXME: Should we try splitting it into two vlgvg's/vlvg's instead? */
4467
4468 static bool
s390_secondary_memory_needed(machine_mode mode,reg_class_t class1,reg_class_t class2)4469 s390_secondary_memory_needed (machine_mode mode,
4470 reg_class_t class1, reg_class_t class2)
4471 {
4472 return (((reg_classes_intersect_p (class1, VEC_REGS)
4473 && reg_classes_intersect_p (class2, GENERAL_REGS))
4474 || (reg_classes_intersect_p (class1, GENERAL_REGS)
4475 && reg_classes_intersect_p (class2, VEC_REGS)))
4476 && (!TARGET_DFP || !TARGET_64BIT || GET_MODE_SIZE (mode) != 8)
4477 && (!TARGET_VX || (SCALAR_FLOAT_MODE_P (mode)
4478 && GET_MODE_SIZE (mode) > 8)));
4479 }
4480
4481 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
4482
4483 get_secondary_mem widens its argument to BITS_PER_WORD which loses on 64bit
4484 because the movsi and movsf patterns don't handle r/f moves. */
4485
4486 static machine_mode
s390_secondary_memory_needed_mode(machine_mode mode)4487 s390_secondary_memory_needed_mode (machine_mode mode)
4488 {
4489 if (GET_MODE_BITSIZE (mode) < 32)
4490 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
4491 return mode;
4492 }
4493
4494 /* Generate code to load SRC, which is PLUS that is not a
4495 legitimate operand for the LA instruction, into TARGET.
4496 SCRATCH may be used as scratch register. */
4497
4498 void
s390_expand_plus_operand(rtx target,rtx src,rtx scratch)4499 s390_expand_plus_operand (rtx target, rtx src,
4500 rtx scratch)
4501 {
4502 rtx sum1, sum2;
4503 struct s390_address ad;
4504
4505 /* src must be a PLUS; get its two operands. */
4506 gcc_assert (GET_CODE (src) == PLUS);
4507 gcc_assert (GET_MODE (src) == Pmode);
4508
4509 /* Check if any of the two operands is already scheduled
4510 for replacement by reload. This can happen e.g. when
4511 float registers occur in an address. */
4512 sum1 = find_replacement (&XEXP (src, 0));
4513 sum2 = find_replacement (&XEXP (src, 1));
4514 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4515
4516 /* If the address is already strictly valid, there's nothing to do. */
4517 if (!s390_decompose_address (src, &ad)
4518 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4519 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
4520 {
4521 /* Otherwise, one of the operands cannot be an address register;
4522 we reload its value into the scratch register. */
4523 if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15)
4524 {
4525 emit_move_insn (scratch, sum1);
4526 sum1 = scratch;
4527 }
4528 if (true_regnum (sum2) < 1 || true_regnum (sum2) > 15)
4529 {
4530 emit_move_insn (scratch, sum2);
4531 sum2 = scratch;
4532 }
4533
4534 /* According to the way these invalid addresses are generated
4535 in reload.c, it should never happen (at least on s390) that
4536 *neither* of the PLUS components, after find_replacements
4537 was applied, is an address register. */
4538 if (sum1 == scratch && sum2 == scratch)
4539 {
4540 debug_rtx (src);
4541 gcc_unreachable ();
4542 }
4543
4544 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4545 }
4546
4547 /* Emit the LOAD ADDRESS pattern. Note that reload of PLUS
4548 is only ever performed on addresses, so we can mark the
4549 sum as legitimate for LA in any case. */
4550 s390_load_address (target, src);
4551 }
4552
4553
4554 /* Return true if ADDR is a valid memory address.
4555 STRICT specifies whether strict register checking applies. */
4556
4557 static bool
s390_legitimate_address_p(machine_mode mode,rtx addr,bool strict)4558 s390_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4559 {
4560 struct s390_address ad;
4561
4562 if (TARGET_Z10
4563 && larl_operand (addr, VOIDmode)
4564 && (mode == VOIDmode
4565 || s390_check_symref_alignment (addr, GET_MODE_SIZE (mode))))
4566 return true;
4567
4568 if (!s390_decompose_address (addr, &ad))
4569 return false;
4570
4571 if (strict)
4572 {
4573 if (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4574 return false;
4575
4576 if (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))
4577 return false;
4578 }
4579 else
4580 {
4581 if (ad.base
4582 && !(REGNO (ad.base) >= FIRST_PSEUDO_REGISTER
4583 || REGNO_REG_CLASS (REGNO (ad.base)) == ADDR_REGS))
4584 return false;
4585
4586 if (ad.indx
4587 && !(REGNO (ad.indx) >= FIRST_PSEUDO_REGISTER
4588 || REGNO_REG_CLASS (REGNO (ad.indx)) == ADDR_REGS))
4589 return false;
4590 }
4591 return true;
4592 }
4593
4594 /* Return true if OP is a valid operand for the LA instruction.
4595 In 31-bit, we need to prove that the result is used as an
4596 address, as LA performs only a 31-bit addition. */
4597
4598 bool
legitimate_la_operand_p(rtx op)4599 legitimate_la_operand_p (rtx op)
4600 {
4601 struct s390_address addr;
4602 if (!s390_decompose_address (op, &addr))
4603 return false;
4604
4605 return (TARGET_64BIT || addr.pointer);
4606 }
4607
4608 /* Return true if it is valid *and* preferable to use LA to
4609 compute the sum of OP1 and OP2. */
4610
4611 bool
preferred_la_operand_p(rtx op1,rtx op2)4612 preferred_la_operand_p (rtx op1, rtx op2)
4613 {
4614 struct s390_address addr;
4615
4616 if (op2 != const0_rtx)
4617 op1 = gen_rtx_PLUS (Pmode, op1, op2);
4618
4619 if (!s390_decompose_address (op1, &addr))
4620 return false;
4621 if (addr.base && !REGNO_OK_FOR_BASE_P (REGNO (addr.base)))
4622 return false;
4623 if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx)))
4624 return false;
4625
4626 /* Avoid LA instructions with index register on z196; it is
4627 preferable to use regular add instructions when possible.
4628 Starting with zEC12 the la with index register is "uncracked"
4629 again. */
4630 if (addr.indx && s390_tune == PROCESSOR_2817_Z196)
4631 return false;
4632
4633 if (!TARGET_64BIT && !addr.pointer)
4634 return false;
4635
4636 if (addr.pointer)
4637 return true;
4638
4639 if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base))
4640 || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx)))
4641 return true;
4642
4643 return false;
4644 }
4645
4646 /* Emit a forced load-address operation to load SRC into DST.
4647 This will use the LOAD ADDRESS instruction even in situations
4648 where legitimate_la_operand_p (SRC) returns false. */
4649
4650 void
s390_load_address(rtx dst,rtx src)4651 s390_load_address (rtx dst, rtx src)
4652 {
4653 if (TARGET_64BIT)
4654 emit_move_insn (dst, src);
4655 else
4656 emit_insn (gen_force_la_31 (dst, src));
4657 }
4658
4659 /* Return true if it ok to use SYMBOL_REF in a relative address. */
4660
4661 bool
s390_rel_address_ok_p(rtx symbol_ref)4662 s390_rel_address_ok_p (rtx symbol_ref)
4663 {
4664 tree decl;
4665
4666 if (symbol_ref == s390_got_symbol () || CONSTANT_POOL_ADDRESS_P (symbol_ref))
4667 return true;
4668
4669 decl = SYMBOL_REF_DECL (symbol_ref);
4670
4671 if (!flag_pic || SYMBOL_REF_LOCAL_P (symbol_ref))
4672 return (s390_pic_data_is_text_relative
4673 || (decl
4674 && TREE_CODE (decl) == FUNCTION_DECL));
4675
4676 return false;
4677 }
4678
4679 /* Return a legitimate reference for ORIG (an address) using the
4680 register REG. If REG is 0, a new pseudo is generated.
4681
4682 There are two types of references that must be handled:
4683
4684 1. Global data references must load the address from the GOT, via
4685 the PIC reg. An insn is emitted to do this load, and the reg is
4686 returned.
4687
4688 2. Static data references, constant pool addresses, and code labels
4689 compute the address as an offset from the GOT, whose base is in
4690 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
4691 differentiate them from global data objects. The returned
4692 address is the PIC reg + an unspec constant.
4693
4694 TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
4695 reg also appears in the address. */
4696
4697 rtx
legitimize_pic_address(rtx orig,rtx reg)4698 legitimize_pic_address (rtx orig, rtx reg)
4699 {
4700 rtx addr = orig;
4701 rtx addend = const0_rtx;
4702 rtx new_rtx = orig;
4703
4704 gcc_assert (!TLS_SYMBOLIC_CONST (addr));
4705
4706 if (GET_CODE (addr) == CONST)
4707 addr = XEXP (addr, 0);
4708
4709 if (GET_CODE (addr) == PLUS)
4710 {
4711 addend = XEXP (addr, 1);
4712 addr = XEXP (addr, 0);
4713 }
4714
4715 if ((GET_CODE (addr) == LABEL_REF
4716 || (SYMBOL_REF_P (addr) && s390_rel_address_ok_p (addr))
4717 || (GET_CODE (addr) == UNSPEC &&
4718 (XINT (addr, 1) == UNSPEC_GOTENT
4719 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
4720 && GET_CODE (addend) == CONST_INT)
4721 {
4722 /* This can be locally addressed. */
4723
4724 /* larl_operand requires UNSPECs to be wrapped in a const rtx. */
4725 rtx const_addr = (GET_CODE (addr) == UNSPEC ?
4726 gen_rtx_CONST (Pmode, addr) : addr);
4727
4728 if (TARGET_CPU_ZARCH
4729 && larl_operand (const_addr, VOIDmode)
4730 && INTVAL (addend) < HOST_WIDE_INT_1 << 31
4731 && INTVAL (addend) >= -(HOST_WIDE_INT_1 << 31))
4732 {
4733 if (INTVAL (addend) & 1)
4734 {
4735 /* LARL can't handle odd offsets, so emit a pair of LARL
4736 and LA. */
4737 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4738
4739 if (!DISP_IN_RANGE (INTVAL (addend)))
4740 {
4741 HOST_WIDE_INT even = INTVAL (addend) - 1;
4742 addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (even));
4743 addr = gen_rtx_CONST (Pmode, addr);
4744 addend = const1_rtx;
4745 }
4746
4747 emit_move_insn (temp, addr);
4748 new_rtx = gen_rtx_PLUS (Pmode, temp, addend);
4749
4750 if (reg != 0)
4751 {
4752 s390_load_address (reg, new_rtx);
4753 new_rtx = reg;
4754 }
4755 }
4756 else
4757 {
4758 /* If the offset is even, we can just use LARL. This
4759 will happen automatically. */
4760 }
4761 }
4762 else
4763 {
4764 /* No larl - Access local symbols relative to the GOT. */
4765
4766 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4767
4768 if (reload_in_progress || reload_completed)
4769 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4770
4771 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
4772 if (addend != const0_rtx)
4773 addr = gen_rtx_PLUS (Pmode, addr, addend);
4774 addr = gen_rtx_CONST (Pmode, addr);
4775 addr = force_const_mem (Pmode, addr);
4776 emit_move_insn (temp, addr);
4777
4778 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4779 if (reg != 0)
4780 {
4781 s390_load_address (reg, new_rtx);
4782 new_rtx = reg;
4783 }
4784 }
4785 }
4786 else if (GET_CODE (addr) == SYMBOL_REF && addend == const0_rtx)
4787 {
4788 /* A non-local symbol reference without addend.
4789
4790 The symbol ref is wrapped into an UNSPEC to make sure the
4791 proper operand modifier (@GOT or @GOTENT) will be emitted.
4792 This will tell the linker to put the symbol into the GOT.
4793
4794 Additionally the code dereferencing the GOT slot is emitted here.
4795
4796 An addend to the symref needs to be added afterwards.
4797 legitimize_pic_address calls itself recursively to handle
4798 that case. So no need to do it here. */
4799
4800 if (reg == 0)
4801 reg = gen_reg_rtx (Pmode);
4802
4803 if (TARGET_Z10)
4804 {
4805 /* Use load relative if possible.
4806 lgrl <target>, sym@GOTENT */
4807 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4808 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4809 new_rtx = gen_const_mem (GET_MODE (reg), new_rtx);
4810
4811 emit_move_insn (reg, new_rtx);
4812 new_rtx = reg;
4813 }
4814 else if (flag_pic == 1)
4815 {
4816 /* Assume GOT offset is a valid displacement operand (< 4k
4817 or < 512k with z990). This is handled the same way in
4818 both 31- and 64-bit code (@GOT).
4819 lg <target>, sym@GOT(r12) */
4820
4821 if (reload_in_progress || reload_completed)
4822 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4823
4824 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
4825 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4826 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
4827 new_rtx = gen_const_mem (Pmode, new_rtx);
4828 emit_move_insn (reg, new_rtx);
4829 new_rtx = reg;
4830 }
4831 else if (TARGET_CPU_ZARCH)
4832 {
4833 /* If the GOT offset might be >= 4k, we determine the position
4834 of the GOT entry via a PC-relative LARL (@GOTENT).
4835 larl temp, sym@GOTENT
4836 lg <target>, 0(temp) */
4837
4838 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
4839
4840 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
4841 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
4842
4843 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4844 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4845 emit_move_insn (temp, new_rtx);
4846
4847 new_rtx = gen_const_mem (Pmode, temp);
4848 emit_move_insn (reg, new_rtx);
4849
4850 new_rtx = reg;
4851 }
4852 else
4853 {
4854 /* If the GOT offset might be >= 4k, we have to load it
4855 from the literal pool (@GOT).
4856
4857 lg temp, lit-litbase(r13)
4858 lg <target>, 0(temp)
4859 lit: .long sym@GOT */
4860
4861 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
4862
4863 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
4864 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
4865
4866 if (reload_in_progress || reload_completed)
4867 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4868
4869 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
4870 addr = gen_rtx_CONST (Pmode, addr);
4871 addr = force_const_mem (Pmode, addr);
4872 emit_move_insn (temp, addr);
4873
4874 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4875 new_rtx = gen_const_mem (Pmode, new_rtx);
4876 emit_move_insn (reg, new_rtx);
4877 new_rtx = reg;
4878 }
4879 }
4880 else if (GET_CODE (addr) == UNSPEC && GET_CODE (addend) == CONST_INT)
4881 {
4882 gcc_assert (XVECLEN (addr, 0) == 1);
4883 switch (XINT (addr, 1))
4884 {
4885 /* These address symbols (or PLT slots) relative to the GOT
4886 (not GOT slots!). In general this will exceed the
4887 displacement range so these value belong into the literal
4888 pool. */
4889 case UNSPEC_GOTOFF:
4890 case UNSPEC_PLTOFF:
4891 new_rtx = force_const_mem (Pmode, orig);
4892 break;
4893
4894 /* For -fPIC the GOT size might exceed the displacement
4895 range so make sure the value is in the literal pool. */
4896 case UNSPEC_GOT:
4897 if (flag_pic == 2)
4898 new_rtx = force_const_mem (Pmode, orig);
4899 break;
4900
4901 /* For @GOTENT larl is used. This is handled like local
4902 symbol refs. */
4903 case UNSPEC_GOTENT:
4904 gcc_unreachable ();
4905 break;
4906
4907 /* @PLT is OK as is on 64-bit, must be converted to
4908 GOT-relative @PLTOFF on 31-bit. */
4909 case UNSPEC_PLT:
4910 if (!TARGET_CPU_ZARCH)
4911 {
4912 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4913
4914 if (reload_in_progress || reload_completed)
4915 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4916
4917 addr = XVECEXP (addr, 0, 0);
4918 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
4919 UNSPEC_PLTOFF);
4920 if (addend != const0_rtx)
4921 addr = gen_rtx_PLUS (Pmode, addr, addend);
4922 addr = gen_rtx_CONST (Pmode, addr);
4923 addr = force_const_mem (Pmode, addr);
4924 emit_move_insn (temp, addr);
4925
4926 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4927 if (reg != 0)
4928 {
4929 s390_load_address (reg, new_rtx);
4930 new_rtx = reg;
4931 }
4932 }
4933 else
4934 /* On 64 bit larl can be used. This case is handled like
4935 local symbol refs. */
4936 gcc_unreachable ();
4937 break;
4938
4939 /* Everything else cannot happen. */
4940 default:
4941 gcc_unreachable ();
4942 }
4943 }
4944 else if (addend != const0_rtx)
4945 {
4946 /* Otherwise, compute the sum. */
4947
4948 rtx base = legitimize_pic_address (addr, reg);
4949 new_rtx = legitimize_pic_address (addend,
4950 base == reg ? NULL_RTX : reg);
4951 if (GET_CODE (new_rtx) == CONST_INT)
4952 new_rtx = plus_constant (Pmode, base, INTVAL (new_rtx));
4953 else
4954 {
4955 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
4956 {
4957 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
4958 new_rtx = XEXP (new_rtx, 1);
4959 }
4960 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
4961 }
4962
4963 if (GET_CODE (new_rtx) == CONST)
4964 new_rtx = XEXP (new_rtx, 0);
4965 new_rtx = force_operand (new_rtx, 0);
4966 }
4967
4968 return new_rtx;
4969 }
4970
4971 /* Load the thread pointer into a register. */
4972
4973 rtx
s390_get_thread_pointer(void)4974 s390_get_thread_pointer (void)
4975 {
4976 rtx tp = gen_reg_rtx (Pmode);
4977
4978 emit_move_insn (tp, gen_rtx_REG (Pmode, TP_REGNUM));
4979 mark_reg_pointer (tp, BITS_PER_WORD);
4980
4981 return tp;
4982 }
4983
4984 /* Emit a tls call insn. The call target is the SYMBOL_REF stored
4985 in s390_tls_symbol which always refers to __tls_get_offset.
4986 The returned offset is written to RESULT_REG and an USE rtx is
4987 generated for TLS_CALL. */
4988
4989 static GTY(()) rtx s390_tls_symbol;
4990
4991 static void
s390_emit_tls_call_insn(rtx result_reg,rtx tls_call)4992 s390_emit_tls_call_insn (rtx result_reg, rtx tls_call)
4993 {
4994 rtx insn;
4995
4996 if (!flag_pic)
4997 emit_insn (s390_load_got ());
4998
4999 if (!s390_tls_symbol)
5000 s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset");
5001
5002 insn = s390_emit_call (s390_tls_symbol, tls_call, result_reg,
5003 gen_rtx_REG (Pmode, RETURN_REGNUM));
5004
5005 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), result_reg);
5006 RTL_CONST_CALL_P (insn) = 1;
5007 }
5008
5009 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
5010 this (thread-local) address. REG may be used as temporary. */
5011
5012 static rtx
legitimize_tls_address(rtx addr,rtx reg)5013 legitimize_tls_address (rtx addr, rtx reg)
5014 {
5015 rtx new_rtx, tls_call, temp, base, r2;
5016 rtx_insn *insn;
5017
5018 if (GET_CODE (addr) == SYMBOL_REF)
5019 switch (tls_symbolic_operand (addr))
5020 {
5021 case TLS_MODEL_GLOBAL_DYNAMIC:
5022 start_sequence ();
5023 r2 = gen_rtx_REG (Pmode, 2);
5024 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD);
5025 new_rtx = gen_rtx_CONST (Pmode, tls_call);
5026 new_rtx = force_const_mem (Pmode, new_rtx);
5027 emit_move_insn (r2, new_rtx);
5028 s390_emit_tls_call_insn (r2, tls_call);
5029 insn = get_insns ();
5030 end_sequence ();
5031
5032 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
5033 temp = gen_reg_rtx (Pmode);
5034 emit_libcall_block (insn, temp, r2, new_rtx);
5035
5036 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5037 if (reg != 0)
5038 {
5039 s390_load_address (reg, new_rtx);
5040 new_rtx = reg;
5041 }
5042 break;
5043
5044 case TLS_MODEL_LOCAL_DYNAMIC:
5045 start_sequence ();
5046 r2 = gen_rtx_REG (Pmode, 2);
5047 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM);
5048 new_rtx = gen_rtx_CONST (Pmode, tls_call);
5049 new_rtx = force_const_mem (Pmode, new_rtx);
5050 emit_move_insn (r2, new_rtx);
5051 s390_emit_tls_call_insn (r2, tls_call);
5052 insn = get_insns ();
5053 end_sequence ();
5054
5055 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
5056 temp = gen_reg_rtx (Pmode);
5057 emit_libcall_block (insn, temp, r2, new_rtx);
5058
5059 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5060 base = gen_reg_rtx (Pmode);
5061 s390_load_address (base, new_rtx);
5062
5063 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF);
5064 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5065 new_rtx = force_const_mem (Pmode, new_rtx);
5066 temp = gen_reg_rtx (Pmode);
5067 emit_move_insn (temp, new_rtx);
5068
5069 new_rtx = gen_rtx_PLUS (Pmode, base, temp);
5070 if (reg != 0)
5071 {
5072 s390_load_address (reg, new_rtx);
5073 new_rtx = reg;
5074 }
5075 break;
5076
5077 case TLS_MODEL_INITIAL_EXEC:
5078 if (flag_pic == 1)
5079 {
5080 /* Assume GOT offset < 4k. This is handled the same way
5081 in both 31- and 64-bit code. */
5082
5083 if (reload_in_progress || reload_completed)
5084 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
5085
5086 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
5087 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5088 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
5089 new_rtx = gen_const_mem (Pmode, new_rtx);
5090 temp = gen_reg_rtx (Pmode);
5091 emit_move_insn (temp, new_rtx);
5092 }
5093 else if (TARGET_CPU_ZARCH)
5094 {
5095 /* If the GOT offset might be >= 4k, we determine the position
5096 of the GOT entry via a PC-relative LARL. */
5097
5098 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
5099 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5100 temp = gen_reg_rtx (Pmode);
5101 emit_move_insn (temp, new_rtx);
5102
5103 new_rtx = gen_const_mem (Pmode, temp);
5104 temp = gen_reg_rtx (Pmode);
5105 emit_move_insn (temp, new_rtx);
5106 }
5107 else if (flag_pic)
5108 {
5109 /* If the GOT offset might be >= 4k, we have to load it
5110 from the literal pool. */
5111
5112 if (reload_in_progress || reload_completed)
5113 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
5114
5115 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
5116 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5117 new_rtx = force_const_mem (Pmode, new_rtx);
5118 temp = gen_reg_rtx (Pmode);
5119 emit_move_insn (temp, new_rtx);
5120
5121 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
5122 new_rtx = gen_const_mem (Pmode, new_rtx);
5123
5124 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
5125 temp = gen_reg_rtx (Pmode);
5126 emit_insn (gen_rtx_SET (temp, new_rtx));
5127 }
5128 else
5129 {
5130 /* In position-dependent code, load the absolute address of
5131 the GOT entry from the literal pool. */
5132
5133 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
5134 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5135 new_rtx = force_const_mem (Pmode, new_rtx);
5136 temp = gen_reg_rtx (Pmode);
5137 emit_move_insn (temp, new_rtx);
5138
5139 new_rtx = temp;
5140 new_rtx = gen_const_mem (Pmode, new_rtx);
5141 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
5142 temp = gen_reg_rtx (Pmode);
5143 emit_insn (gen_rtx_SET (temp, new_rtx));
5144 }
5145
5146 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5147 if (reg != 0)
5148 {
5149 s390_load_address (reg, new_rtx);
5150 new_rtx = reg;
5151 }
5152 break;
5153
5154 case TLS_MODEL_LOCAL_EXEC:
5155 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
5156 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5157 new_rtx = force_const_mem (Pmode, new_rtx);
5158 temp = gen_reg_rtx (Pmode);
5159 emit_move_insn (temp, new_rtx);
5160
5161 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5162 if (reg != 0)
5163 {
5164 s390_load_address (reg, new_rtx);
5165 new_rtx = reg;
5166 }
5167 break;
5168
5169 default:
5170 gcc_unreachable ();
5171 }
5172
5173 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == UNSPEC)
5174 {
5175 switch (XINT (XEXP (addr, 0), 1))
5176 {
5177 case UNSPEC_INDNTPOFF:
5178 gcc_assert (TARGET_CPU_ZARCH);
5179 new_rtx = addr;
5180 break;
5181
5182 default:
5183 gcc_unreachable ();
5184 }
5185 }
5186
5187 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
5188 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
5189 {
5190 new_rtx = XEXP (XEXP (addr, 0), 0);
5191 if (GET_CODE (new_rtx) != SYMBOL_REF)
5192 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5193
5194 new_rtx = legitimize_tls_address (new_rtx, reg);
5195 new_rtx = plus_constant (Pmode, new_rtx,
5196 INTVAL (XEXP (XEXP (addr, 0), 1)));
5197 new_rtx = force_operand (new_rtx, 0);
5198 }
5199
5200 else
5201 gcc_unreachable (); /* for now ... */
5202
5203 return new_rtx;
5204 }
5205
5206 /* Emit insns making the address in operands[1] valid for a standard
5207 move to operands[0]. operands[1] is replaced by an address which
5208 should be used instead of the former RTX to emit the move
5209 pattern. */
5210
5211 void
emit_symbolic_move(rtx * operands)5212 emit_symbolic_move (rtx *operands)
5213 {
5214 rtx temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
5215
5216 if (GET_CODE (operands[0]) == MEM)
5217 operands[1] = force_reg (Pmode, operands[1]);
5218 else if (TLS_SYMBOLIC_CONST (operands[1]))
5219 operands[1] = legitimize_tls_address (operands[1], temp);
5220 else if (flag_pic)
5221 operands[1] = legitimize_pic_address (operands[1], temp);
5222 }
5223
5224 /* Try machine-dependent ways of modifying an illegitimate address X
5225 to be legitimate. If we find one, return the new, valid address.
5226
5227 OLDX is the address as it was before break_out_memory_refs was called.
5228 In some cases it is useful to look at this to decide what needs to be done.
5229
5230 MODE is the mode of the operand pointed to by X.
5231
5232 When -fpic is used, special handling is needed for symbolic references.
5233 See comments by legitimize_pic_address for details. */
5234
5235 static rtx
s390_legitimize_address(rtx x,rtx oldx ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED)5236 s390_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
5237 machine_mode mode ATTRIBUTE_UNUSED)
5238 {
5239 rtx constant_term = const0_rtx;
5240
5241 if (TLS_SYMBOLIC_CONST (x))
5242 {
5243 x = legitimize_tls_address (x, 0);
5244
5245 if (s390_legitimate_address_p (mode, x, FALSE))
5246 return x;
5247 }
5248 else if (GET_CODE (x) == PLUS
5249 && (TLS_SYMBOLIC_CONST (XEXP (x, 0))
5250 || TLS_SYMBOLIC_CONST (XEXP (x, 1))))
5251 {
5252 return x;
5253 }
5254 else if (flag_pic)
5255 {
5256 if (SYMBOLIC_CONST (x)
5257 || (GET_CODE (x) == PLUS
5258 && (SYMBOLIC_CONST (XEXP (x, 0))
5259 || SYMBOLIC_CONST (XEXP (x, 1)))))
5260 x = legitimize_pic_address (x, 0);
5261
5262 if (s390_legitimate_address_p (mode, x, FALSE))
5263 return x;
5264 }
5265
5266 x = eliminate_constant_term (x, &constant_term);
5267
5268 /* Optimize loading of large displacements by splitting them
5269 into the multiple of 4K and the rest; this allows the
5270 former to be CSE'd if possible.
5271
5272 Don't do this if the displacement is added to a register
5273 pointing into the stack frame, as the offsets will
5274 change later anyway. */
5275
5276 if (GET_CODE (constant_term) == CONST_INT
5277 && !TARGET_LONG_DISPLACEMENT
5278 && !DISP_IN_RANGE (INTVAL (constant_term))
5279 && !(REG_P (x) && REGNO_PTR_FRAME_P (REGNO (x))))
5280 {
5281 HOST_WIDE_INT lower = INTVAL (constant_term) & 0xfff;
5282 HOST_WIDE_INT upper = INTVAL (constant_term) ^ lower;
5283
5284 rtx temp = gen_reg_rtx (Pmode);
5285 rtx val = force_operand (GEN_INT (upper), temp);
5286 if (val != temp)
5287 emit_move_insn (temp, val);
5288
5289 x = gen_rtx_PLUS (Pmode, x, temp);
5290 constant_term = GEN_INT (lower);
5291 }
5292
5293 if (GET_CODE (x) == PLUS)
5294 {
5295 if (GET_CODE (XEXP (x, 0)) == REG)
5296 {
5297 rtx temp = gen_reg_rtx (Pmode);
5298 rtx val = force_operand (XEXP (x, 1), temp);
5299 if (val != temp)
5300 emit_move_insn (temp, val);
5301
5302 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), temp);
5303 }
5304
5305 else if (GET_CODE (XEXP (x, 1)) == REG)
5306 {
5307 rtx temp = gen_reg_rtx (Pmode);
5308 rtx val = force_operand (XEXP (x, 0), temp);
5309 if (val != temp)
5310 emit_move_insn (temp, val);
5311
5312 x = gen_rtx_PLUS (Pmode, temp, XEXP (x, 1));
5313 }
5314 }
5315
5316 if (constant_term != const0_rtx)
5317 x = gen_rtx_PLUS (Pmode, x, constant_term);
5318
5319 return x;
5320 }
5321
5322 /* Try a machine-dependent way of reloading an illegitimate address AD
5323 operand. If we find one, push the reload and return the new address.
5324
5325 MODE is the mode of the enclosing MEM. OPNUM is the operand number
5326 and TYPE is the reload type of the current reload. */
5327
5328 rtx
legitimize_reload_address(rtx ad,machine_mode mode ATTRIBUTE_UNUSED,int opnum,int type)5329 legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
5330 int opnum, int type)
5331 {
5332 if (!optimize || TARGET_LONG_DISPLACEMENT)
5333 return NULL_RTX;
5334
5335 if (GET_CODE (ad) == PLUS)
5336 {
5337 rtx tem = simplify_binary_operation (PLUS, Pmode,
5338 XEXP (ad, 0), XEXP (ad, 1));
5339 if (tem)
5340 ad = tem;
5341 }
5342
5343 if (GET_CODE (ad) == PLUS
5344 && GET_CODE (XEXP (ad, 0)) == REG
5345 && GET_CODE (XEXP (ad, 1)) == CONST_INT
5346 && !DISP_IN_RANGE (INTVAL (XEXP (ad, 1))))
5347 {
5348 HOST_WIDE_INT lower = INTVAL (XEXP (ad, 1)) & 0xfff;
5349 HOST_WIDE_INT upper = INTVAL (XEXP (ad, 1)) ^ lower;
5350 rtx cst, tem, new_rtx;
5351
5352 cst = GEN_INT (upper);
5353 if (!legitimate_reload_constant_p (cst))
5354 cst = force_const_mem (Pmode, cst);
5355
5356 tem = gen_rtx_PLUS (Pmode, XEXP (ad, 0), cst);
5357 new_rtx = gen_rtx_PLUS (Pmode, tem, GEN_INT (lower));
5358
5359 push_reload (XEXP (tem, 1), 0, &XEXP (tem, 1), 0,
5360 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
5361 opnum, (enum reload_type) type);
5362 return new_rtx;
5363 }
5364
5365 return NULL_RTX;
5366 }
5367
5368 /* Emit code to move LEN bytes from DST to SRC. */
5369
5370 bool
s390_expand_movmem(rtx dst,rtx src,rtx len)5371 s390_expand_movmem (rtx dst, rtx src, rtx len)
5372 {
5373 /* When tuning for z10 or higher we rely on the Glibc functions to
5374 do the right thing. Only for constant lengths below 64k we will
5375 generate inline code. */
5376 if (s390_tune >= PROCESSOR_2097_Z10
5377 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5378 return false;
5379
5380 /* Expand memcpy for constant length operands without a loop if it
5381 is shorter that way.
5382
5383 With a constant length argument a
5384 memcpy loop (without pfd) is 36 bytes -> 6 * mvc */
5385 if (GET_CODE (len) == CONST_INT
5386 && INTVAL (len) >= 0
5387 && INTVAL (len) <= 256 * 6
5388 && (!TARGET_MVCLE || INTVAL (len) <= 256))
5389 {
5390 HOST_WIDE_INT o, l;
5391
5392 for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
5393 {
5394 rtx newdst = adjust_address (dst, BLKmode, o);
5395 rtx newsrc = adjust_address (src, BLKmode, o);
5396 emit_insn (gen_movmem_short (newdst, newsrc,
5397 GEN_INT (l > 256 ? 255 : l - 1)));
5398 }
5399 }
5400
5401 else if (TARGET_MVCLE)
5402 {
5403 emit_insn (gen_movmem_long (dst, src, convert_to_mode (Pmode, len, 1)));
5404 }
5405
5406 else
5407 {
5408 rtx dst_addr, src_addr, count, blocks, temp;
5409 rtx_code_label *loop_start_label = gen_label_rtx ();
5410 rtx_code_label *loop_end_label = gen_label_rtx ();
5411 rtx_code_label *end_label = gen_label_rtx ();
5412 machine_mode mode;
5413
5414 mode = GET_MODE (len);
5415 if (mode == VOIDmode)
5416 mode = Pmode;
5417
5418 dst_addr = gen_reg_rtx (Pmode);
5419 src_addr = gen_reg_rtx (Pmode);
5420 count = gen_reg_rtx (mode);
5421 blocks = gen_reg_rtx (mode);
5422
5423 convert_move (count, len, 1);
5424 emit_cmp_and_jump_insns (count, const0_rtx,
5425 EQ, NULL_RTX, mode, 1, end_label);
5426
5427 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5428 emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX));
5429 dst = change_address (dst, VOIDmode, dst_addr);
5430 src = change_address (src, VOIDmode, src_addr);
5431
5432 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5433 OPTAB_DIRECT);
5434 if (temp != count)
5435 emit_move_insn (count, temp);
5436
5437 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5438 OPTAB_DIRECT);
5439 if (temp != blocks)
5440 emit_move_insn (blocks, temp);
5441
5442 emit_cmp_and_jump_insns (blocks, const0_rtx,
5443 EQ, NULL_RTX, mode, 1, loop_end_label);
5444
5445 emit_label (loop_start_label);
5446
5447 if (TARGET_Z10
5448 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 768))
5449 {
5450 rtx prefetch;
5451
5452 /* Issue a read prefetch for the +3 cache line. */
5453 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, src_addr, GEN_INT (768)),
5454 const0_rtx, const0_rtx);
5455 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5456 emit_insn (prefetch);
5457
5458 /* Issue a write prefetch for the +3 cache line. */
5459 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (768)),
5460 const1_rtx, const0_rtx);
5461 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5462 emit_insn (prefetch);
5463 }
5464
5465 emit_insn (gen_movmem_short (dst, src, GEN_INT (255)));
5466 s390_load_address (dst_addr,
5467 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5468 s390_load_address (src_addr,
5469 gen_rtx_PLUS (Pmode, src_addr, GEN_INT (256)));
5470
5471 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5472 OPTAB_DIRECT);
5473 if (temp != blocks)
5474 emit_move_insn (blocks, temp);
5475
5476 emit_cmp_and_jump_insns (blocks, const0_rtx,
5477 EQ, NULL_RTX, mode, 1, loop_end_label);
5478
5479 emit_jump (loop_start_label);
5480 emit_label (loop_end_label);
5481
5482 emit_insn (gen_movmem_short (dst, src,
5483 convert_to_mode (Pmode, count, 1)));
5484 emit_label (end_label);
5485 }
5486 return true;
5487 }
5488
5489 /* Emit code to set LEN bytes at DST to VAL.
5490 Make use of clrmem if VAL is zero. */
5491
5492 void
s390_expand_setmem(rtx dst,rtx len,rtx val)5493 s390_expand_setmem (rtx dst, rtx len, rtx val)
5494 {
5495 if (GET_CODE (len) == CONST_INT && INTVAL (len) <= 0)
5496 return;
5497
5498 gcc_assert (GET_CODE (val) == CONST_INT || GET_MODE (val) == QImode);
5499
5500 /* Expand setmem/clrmem for a constant length operand without a
5501 loop if it will be shorter that way.
5502 With a constant length and without pfd argument a
5503 clrmem loop is 32 bytes -> 5.3 * xc
5504 setmem loop is 36 bytes -> 3.6 * (mvi/stc + mvc) */
5505 if (GET_CODE (len) == CONST_INT
5506 && ((INTVAL (len) <= 256 * 5 && val == const0_rtx)
5507 || INTVAL (len) <= 257 * 3)
5508 && (!TARGET_MVCLE || INTVAL (len) <= 256))
5509 {
5510 HOST_WIDE_INT o, l;
5511
5512 if (val == const0_rtx)
5513 /* clrmem: emit 256 byte blockwise XCs. */
5514 for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
5515 {
5516 rtx newdst = adjust_address (dst, BLKmode, o);
5517 emit_insn (gen_clrmem_short (newdst,
5518 GEN_INT (l > 256 ? 255 : l - 1)));
5519 }
5520 else
5521 /* setmem: emit 1(mvi) + 256(mvc) byte blockwise memsets by
5522 setting first byte to val and using a 256 byte mvc with one
5523 byte overlap to propagate the byte. */
5524 for (l = INTVAL (len), o = 0; l > 0; l -= 257, o += 257)
5525 {
5526 rtx newdst = adjust_address (dst, BLKmode, o);
5527 emit_move_insn (adjust_address (dst, QImode, o), val);
5528 if (l > 1)
5529 {
5530 rtx newdstp1 = adjust_address (dst, BLKmode, o + 1);
5531 emit_insn (gen_movmem_short (newdstp1, newdst,
5532 GEN_INT (l > 257 ? 255 : l - 2)));
5533 }
5534 }
5535 }
5536
5537 else if (TARGET_MVCLE)
5538 {
5539 val = force_not_mem (convert_modes (Pmode, QImode, val, 1));
5540 if (TARGET_64BIT)
5541 emit_insn (gen_setmem_long_di (dst, convert_to_mode (Pmode, len, 1),
5542 val));
5543 else
5544 emit_insn (gen_setmem_long_si (dst, convert_to_mode (Pmode, len, 1),
5545 val));
5546 }
5547
5548 else
5549 {
5550 rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX;
5551 rtx_code_label *loop_start_label = gen_label_rtx ();
5552 rtx_code_label *onebyte_end_label = gen_label_rtx ();
5553 rtx_code_label *zerobyte_end_label = gen_label_rtx ();
5554 rtx_code_label *restbyte_end_label = gen_label_rtx ();
5555 machine_mode mode;
5556
5557 mode = GET_MODE (len);
5558 if (mode == VOIDmode)
5559 mode = Pmode;
5560
5561 dst_addr = gen_reg_rtx (Pmode);
5562 count = gen_reg_rtx (mode);
5563 blocks = gen_reg_rtx (mode);
5564
5565 convert_move (count, len, 1);
5566 emit_cmp_and_jump_insns (count, const0_rtx,
5567 EQ, NULL_RTX, mode, 1, zerobyte_end_label,
5568 profile_probability::very_unlikely ());
5569
5570 /* We need to make a copy of the target address since memset is
5571 supposed to return it unmodified. We have to make it here
5572 already since the new reg is used at onebyte_end_label. */
5573 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5574 dst = change_address (dst, VOIDmode, dst_addr);
5575
5576 if (val != const0_rtx)
5577 {
5578 /* When using the overlapping mvc the original target
5579 address is only accessed as single byte entity (even by
5580 the mvc reading this value). */
5581 set_mem_size (dst, 1);
5582 dstp1 = adjust_address (dst, VOIDmode, 1);
5583 emit_cmp_and_jump_insns (count,
5584 const1_rtx, EQ, NULL_RTX, mode, 1,
5585 onebyte_end_label,
5586 profile_probability::very_unlikely ());
5587 }
5588
5589 /* There is one unconditional (mvi+mvc)/xc after the loop
5590 dealing with the rest of the bytes, subtracting two (mvi+mvc)
5591 or one (xc) here leaves this number of bytes to be handled by
5592 it. */
5593 temp = expand_binop (mode, add_optab, count,
5594 val == const0_rtx ? constm1_rtx : GEN_INT (-2),
5595 count, 1, OPTAB_DIRECT);
5596 if (temp != count)
5597 emit_move_insn (count, temp);
5598
5599 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5600 OPTAB_DIRECT);
5601 if (temp != blocks)
5602 emit_move_insn (blocks, temp);
5603
5604 emit_cmp_and_jump_insns (blocks, const0_rtx,
5605 EQ, NULL_RTX, mode, 1, restbyte_end_label);
5606
5607 emit_jump (loop_start_label);
5608
5609 if (val != const0_rtx)
5610 {
5611 /* The 1 byte != 0 special case. Not handled efficiently
5612 since we require two jumps for that. However, this
5613 should be very rare. */
5614 emit_label (onebyte_end_label);
5615 emit_move_insn (adjust_address (dst, QImode, 0), val);
5616 emit_jump (zerobyte_end_label);
5617 }
5618
5619 emit_label (loop_start_label);
5620
5621 if (TARGET_Z10
5622 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 1024))
5623 {
5624 /* Issue a write prefetch for the +4 cache line. */
5625 rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr,
5626 GEN_INT (1024)),
5627 const1_rtx, const0_rtx);
5628 emit_insn (prefetch);
5629 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5630 }
5631
5632 if (val == const0_rtx)
5633 emit_insn (gen_clrmem_short (dst, GEN_INT (255)));
5634 else
5635 {
5636 /* Set the first byte in the block to the value and use an
5637 overlapping mvc for the block. */
5638 emit_move_insn (adjust_address (dst, QImode, 0), val);
5639 emit_insn (gen_movmem_short (dstp1, dst, GEN_INT (254)));
5640 }
5641 s390_load_address (dst_addr,
5642 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5643
5644 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5645 OPTAB_DIRECT);
5646 if (temp != blocks)
5647 emit_move_insn (blocks, temp);
5648
5649 emit_cmp_and_jump_insns (blocks, const0_rtx,
5650 NE, NULL_RTX, mode, 1, loop_start_label);
5651
5652 emit_label (restbyte_end_label);
5653
5654 if (val == const0_rtx)
5655 emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1)));
5656 else
5657 {
5658 /* Set the first byte in the block to the value and use an
5659 overlapping mvc for the block. */
5660 emit_move_insn (adjust_address (dst, QImode, 0), val);
5661 /* execute only uses the lowest 8 bits of count that's
5662 exactly what we need here. */
5663 emit_insn (gen_movmem_short (dstp1, dst,
5664 convert_to_mode (Pmode, count, 1)));
5665 }
5666
5667 emit_label (zerobyte_end_label);
5668 }
5669 }
5670
5671 /* Emit code to compare LEN bytes at OP0 with those at OP1,
5672 and return the result in TARGET. */
5673
5674 bool
s390_expand_cmpmem(rtx target,rtx op0,rtx op1,rtx len)5675 s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
5676 {
5677 rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM);
5678 rtx tmp;
5679
5680 /* When tuning for z10 or higher we rely on the Glibc functions to
5681 do the right thing. Only for constant lengths below 64k we will
5682 generate inline code. */
5683 if (s390_tune >= PROCESSOR_2097_Z10
5684 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5685 return false;
5686
5687 /* As the result of CMPINT is inverted compared to what we need,
5688 we have to swap the operands. */
5689 tmp = op0; op0 = op1; op1 = tmp;
5690
5691 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
5692 {
5693 if (INTVAL (len) > 0)
5694 {
5695 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1)));
5696 emit_insn (gen_cmpint (target, ccreg));
5697 }
5698 else
5699 emit_move_insn (target, const0_rtx);
5700 }
5701 else if (TARGET_MVCLE)
5702 {
5703 emit_insn (gen_cmpmem_long (op0, op1, convert_to_mode (Pmode, len, 1)));
5704 emit_insn (gen_cmpint (target, ccreg));
5705 }
5706 else
5707 {
5708 rtx addr0, addr1, count, blocks, temp;
5709 rtx_code_label *loop_start_label = gen_label_rtx ();
5710 rtx_code_label *loop_end_label = gen_label_rtx ();
5711 rtx_code_label *end_label = gen_label_rtx ();
5712 machine_mode mode;
5713
5714 mode = GET_MODE (len);
5715 if (mode == VOIDmode)
5716 mode = Pmode;
5717
5718 addr0 = gen_reg_rtx (Pmode);
5719 addr1 = gen_reg_rtx (Pmode);
5720 count = gen_reg_rtx (mode);
5721 blocks = gen_reg_rtx (mode);
5722
5723 convert_move (count, len, 1);
5724 emit_cmp_and_jump_insns (count, const0_rtx,
5725 EQ, NULL_RTX, mode, 1, end_label);
5726
5727 emit_move_insn (addr0, force_operand (XEXP (op0, 0), NULL_RTX));
5728 emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX));
5729 op0 = change_address (op0, VOIDmode, addr0);
5730 op1 = change_address (op1, VOIDmode, addr1);
5731
5732 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5733 OPTAB_DIRECT);
5734 if (temp != count)
5735 emit_move_insn (count, temp);
5736
5737 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5738 OPTAB_DIRECT);
5739 if (temp != blocks)
5740 emit_move_insn (blocks, temp);
5741
5742 emit_cmp_and_jump_insns (blocks, const0_rtx,
5743 EQ, NULL_RTX, mode, 1, loop_end_label);
5744
5745 emit_label (loop_start_label);
5746
5747 if (TARGET_Z10
5748 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 512))
5749 {
5750 rtx prefetch;
5751
5752 /* Issue a read prefetch for the +2 cache line of operand 1. */
5753 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr0, GEN_INT (512)),
5754 const0_rtx, const0_rtx);
5755 emit_insn (prefetch);
5756 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5757
5758 /* Issue a read prefetch for the +2 cache line of operand 2. */
5759 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr1, GEN_INT (512)),
5760 const0_rtx, const0_rtx);
5761 emit_insn (prefetch);
5762 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5763 }
5764
5765 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (255)));
5766 temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
5767 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5768 gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx);
5769 temp = gen_rtx_SET (pc_rtx, temp);
5770 emit_jump_insn (temp);
5771
5772 s390_load_address (addr0,
5773 gen_rtx_PLUS (Pmode, addr0, GEN_INT (256)));
5774 s390_load_address (addr1,
5775 gen_rtx_PLUS (Pmode, addr1, GEN_INT (256)));
5776
5777 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5778 OPTAB_DIRECT);
5779 if (temp != blocks)
5780 emit_move_insn (blocks, temp);
5781
5782 emit_cmp_and_jump_insns (blocks, const0_rtx,
5783 EQ, NULL_RTX, mode, 1, loop_end_label);
5784
5785 emit_jump (loop_start_label);
5786 emit_label (loop_end_label);
5787
5788 emit_insn (gen_cmpmem_short (op0, op1,
5789 convert_to_mode (Pmode, count, 1)));
5790 emit_label (end_label);
5791
5792 emit_insn (gen_cmpint (target, ccreg));
5793 }
5794 return true;
5795 }
5796
5797 /* Emit a conditional jump to LABEL for condition code mask MASK using
5798 comparsion operator COMPARISON. Return the emitted jump insn. */
5799
5800 static rtx_insn *
s390_emit_ccraw_jump(HOST_WIDE_INT mask,enum rtx_code comparison,rtx label)5801 s390_emit_ccraw_jump (HOST_WIDE_INT mask, enum rtx_code comparison, rtx label)
5802 {
5803 rtx temp;
5804
5805 gcc_assert (comparison == EQ || comparison == NE);
5806 gcc_assert (mask > 0 && mask < 15);
5807
5808 temp = gen_rtx_fmt_ee (comparison, VOIDmode,
5809 gen_rtx_REG (CCRAWmode, CC_REGNUM), GEN_INT (mask));
5810 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5811 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
5812 temp = gen_rtx_SET (pc_rtx, temp);
5813 return emit_jump_insn (temp);
5814 }
5815
5816 /* Emit the instructions to implement strlen of STRING and store the
5817 result in TARGET. The string has the known ALIGNMENT. This
5818 version uses vector instructions and is therefore not appropriate
5819 for targets prior to z13. */
5820
5821 void
s390_expand_vec_strlen(rtx target,rtx string,rtx alignment)5822 s390_expand_vec_strlen (rtx target, rtx string, rtx alignment)
5823 {
5824 rtx highest_index_to_load_reg = gen_reg_rtx (Pmode);
5825 rtx str_reg = gen_reg_rtx (V16QImode);
5826 rtx str_addr_base_reg = gen_reg_rtx (Pmode);
5827 rtx str_idx_reg = gen_reg_rtx (Pmode);
5828 rtx result_reg = gen_reg_rtx (V16QImode);
5829 rtx is_aligned_label = gen_label_rtx ();
5830 rtx into_loop_label = NULL_RTX;
5831 rtx loop_start_label = gen_label_rtx ();
5832 rtx temp;
5833 rtx len = gen_reg_rtx (QImode);
5834 rtx cond;
5835
5836 s390_load_address (str_addr_base_reg, XEXP (string, 0));
5837 emit_move_insn (str_idx_reg, const0_rtx);
5838
5839 if (INTVAL (alignment) < 16)
5840 {
5841 /* Check whether the address happens to be aligned properly so
5842 jump directly to the aligned loop. */
5843 emit_cmp_and_jump_insns (gen_rtx_AND (Pmode,
5844 str_addr_base_reg, GEN_INT (15)),
5845 const0_rtx, EQ, NULL_RTX,
5846 Pmode, 1, is_aligned_label);
5847
5848 temp = gen_reg_rtx (Pmode);
5849 temp = expand_binop (Pmode, and_optab, str_addr_base_reg,
5850 GEN_INT (15), temp, 1, OPTAB_DIRECT);
5851 gcc_assert (REG_P (temp));
5852 highest_index_to_load_reg =
5853 expand_binop (Pmode, sub_optab, GEN_INT (15), temp,
5854 highest_index_to_load_reg, 1, OPTAB_DIRECT);
5855 gcc_assert (REG_P (highest_index_to_load_reg));
5856 emit_insn (gen_vllv16qi (str_reg,
5857 convert_to_mode (SImode, highest_index_to_load_reg, 1),
5858 gen_rtx_MEM (BLKmode, str_addr_base_reg)));
5859
5860 into_loop_label = gen_label_rtx ();
5861 s390_emit_jump (into_loop_label, NULL_RTX);
5862 emit_barrier ();
5863 }
5864
5865 emit_label (is_aligned_label);
5866 LABEL_NUSES (is_aligned_label) = INTVAL (alignment) < 16 ? 2 : 1;
5867
5868 /* Reaching this point we are only performing 16 bytes aligned
5869 loads. */
5870 emit_move_insn (highest_index_to_load_reg, GEN_INT (15));
5871
5872 emit_label (loop_start_label);
5873 LABEL_NUSES (loop_start_label) = 1;
5874
5875 /* Load 16 bytes of the string into VR. */
5876 emit_move_insn (str_reg,
5877 gen_rtx_MEM (V16QImode,
5878 gen_rtx_PLUS (Pmode, str_idx_reg,
5879 str_addr_base_reg)));
5880 if (into_loop_label != NULL_RTX)
5881 {
5882 emit_label (into_loop_label);
5883 LABEL_NUSES (into_loop_label) = 1;
5884 }
5885
5886 /* Increment string index by 16 bytes. */
5887 expand_binop (Pmode, add_optab, str_idx_reg, GEN_INT (16),
5888 str_idx_reg, 1, OPTAB_DIRECT);
5889
5890 emit_insn (gen_vec_vfenesv16qi (result_reg, str_reg, str_reg,
5891 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
5892
5893 add_int_reg_note (s390_emit_ccraw_jump (8, NE, loop_start_label),
5894 REG_BR_PROB,
5895 profile_probability::very_likely ().to_reg_br_prob_note ());
5896 emit_insn (gen_vec_extractv16qiqi (len, result_reg, GEN_INT (7)));
5897
5898 /* If the string pointer wasn't aligned we have loaded less then 16
5899 bytes and the remaining bytes got filled with zeros (by vll).
5900 Now we have to check whether the resulting index lies within the
5901 bytes actually part of the string. */
5902
5903 cond = s390_emit_compare (GT, convert_to_mode (Pmode, len, 1),
5904 highest_index_to_load_reg);
5905 s390_load_address (highest_index_to_load_reg,
5906 gen_rtx_PLUS (Pmode, highest_index_to_load_reg,
5907 const1_rtx));
5908 if (TARGET_64BIT)
5909 emit_insn (gen_movdicc (str_idx_reg, cond,
5910 highest_index_to_load_reg, str_idx_reg));
5911 else
5912 emit_insn (gen_movsicc (str_idx_reg, cond,
5913 highest_index_to_load_reg, str_idx_reg));
5914
5915 add_reg_br_prob_note (s390_emit_jump (is_aligned_label, cond),
5916 profile_probability::very_unlikely ());
5917
5918 expand_binop (Pmode, add_optab, str_idx_reg,
5919 GEN_INT (-16), str_idx_reg, 1, OPTAB_DIRECT);
5920 /* FIXME: len is already zero extended - so avoid the llgcr emitted
5921 here. */
5922 temp = expand_binop (Pmode, add_optab, str_idx_reg,
5923 convert_to_mode (Pmode, len, 1),
5924 target, 1, OPTAB_DIRECT);
5925 if (temp != target)
5926 emit_move_insn (target, temp);
5927 }
5928
5929 void
s390_expand_vec_movstr(rtx result,rtx dst,rtx src)5930 s390_expand_vec_movstr (rtx result, rtx dst, rtx src)
5931 {
5932 rtx temp = gen_reg_rtx (Pmode);
5933 rtx src_addr = XEXP (src, 0);
5934 rtx dst_addr = XEXP (dst, 0);
5935 rtx src_addr_reg = gen_reg_rtx (Pmode);
5936 rtx dst_addr_reg = gen_reg_rtx (Pmode);
5937 rtx offset = gen_reg_rtx (Pmode);
5938 rtx vsrc = gen_reg_rtx (V16QImode);
5939 rtx vpos = gen_reg_rtx (V16QImode);
5940 rtx loadlen = gen_reg_rtx (SImode);
5941 rtx gpos_qi = gen_reg_rtx(QImode);
5942 rtx gpos = gen_reg_rtx (SImode);
5943 rtx done_label = gen_label_rtx ();
5944 rtx loop_label = gen_label_rtx ();
5945 rtx exit_label = gen_label_rtx ();
5946 rtx full_label = gen_label_rtx ();
5947
5948 /* Perform a quick check for string ending on the first up to 16
5949 bytes and exit early if successful. */
5950
5951 emit_insn (gen_vlbb (vsrc, src, GEN_INT (6)));
5952 emit_insn (gen_lcbb (loadlen, src_addr, GEN_INT (6)));
5953 emit_insn (gen_vfenezv16qi (vpos, vsrc, vsrc));
5954 emit_insn (gen_vec_extractv16qiqi (gpos_qi, vpos, GEN_INT (7)));
5955 emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
5956 /* gpos is the byte index if a zero was found and 16 otherwise.
5957 So if it is lower than the loaded bytes we have a hit. */
5958 emit_cmp_and_jump_insns (gpos, loadlen, GE, NULL_RTX, SImode, 1,
5959 full_label);
5960 emit_insn (gen_vstlv16qi (vsrc, gpos, dst));
5961
5962 force_expand_binop (Pmode, add_optab, dst_addr, gpos, result,
5963 1, OPTAB_DIRECT);
5964 emit_jump (exit_label);
5965 emit_barrier ();
5966
5967 emit_label (full_label);
5968 LABEL_NUSES (full_label) = 1;
5969
5970 /* Calculate `offset' so that src + offset points to the last byte
5971 before 16 byte alignment. */
5972
5973 /* temp = src_addr & 0xf */
5974 force_expand_binop (Pmode, and_optab, src_addr, GEN_INT (15), temp,
5975 1, OPTAB_DIRECT);
5976
5977 /* offset = 0xf - temp */
5978 emit_move_insn (offset, GEN_INT (15));
5979 force_expand_binop (Pmode, sub_optab, offset, temp, offset,
5980 1, OPTAB_DIRECT);
5981
5982 /* Store `offset' bytes in the dstination string. The quick check
5983 has loaded at least `offset' bytes into vsrc. */
5984
5985 emit_insn (gen_vstlv16qi (vsrc, gen_lowpart (SImode, offset), dst));
5986
5987 /* Advance to the next byte to be loaded. */
5988 force_expand_binop (Pmode, add_optab, offset, const1_rtx, offset,
5989 1, OPTAB_DIRECT);
5990
5991 /* Make sure the addresses are single regs which can be used as a
5992 base. */
5993 emit_move_insn (src_addr_reg, src_addr);
5994 emit_move_insn (dst_addr_reg, dst_addr);
5995
5996 /* MAIN LOOP */
5997
5998 emit_label (loop_label);
5999 LABEL_NUSES (loop_label) = 1;
6000
6001 emit_move_insn (vsrc,
6002 gen_rtx_MEM (V16QImode,
6003 gen_rtx_PLUS (Pmode, src_addr_reg, offset)));
6004
6005 emit_insn (gen_vec_vfenesv16qi (vpos, vsrc, vsrc,
6006 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
6007 add_int_reg_note (s390_emit_ccraw_jump (8, EQ, done_label),
6008 REG_BR_PROB, profile_probability::very_unlikely ()
6009 .to_reg_br_prob_note ());
6010
6011 emit_move_insn (gen_rtx_MEM (V16QImode,
6012 gen_rtx_PLUS (Pmode, dst_addr_reg, offset)),
6013 vsrc);
6014 /* offset += 16 */
6015 force_expand_binop (Pmode, add_optab, offset, GEN_INT (16),
6016 offset, 1, OPTAB_DIRECT);
6017
6018 emit_jump (loop_label);
6019 emit_barrier ();
6020
6021 /* REGULAR EXIT */
6022
6023 /* We are done. Add the offset of the zero character to the dst_addr
6024 pointer to get the result. */
6025
6026 emit_label (done_label);
6027 LABEL_NUSES (done_label) = 1;
6028
6029 force_expand_binop (Pmode, add_optab, dst_addr_reg, offset, dst_addr_reg,
6030 1, OPTAB_DIRECT);
6031
6032 emit_insn (gen_vec_extractv16qiqi (gpos_qi, vpos, GEN_INT (7)));
6033 emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
6034
6035 emit_insn (gen_vstlv16qi (vsrc, gpos, gen_rtx_MEM (BLKmode, dst_addr_reg)));
6036
6037 force_expand_binop (Pmode, add_optab, dst_addr_reg, gpos, result,
6038 1, OPTAB_DIRECT);
6039
6040 /* EARLY EXIT */
6041
6042 emit_label (exit_label);
6043 LABEL_NUSES (exit_label) = 1;
6044 }
6045
6046
6047 /* Expand conditional increment or decrement using alc/slb instructions.
6048 Should generate code setting DST to either SRC or SRC + INCREMENT,
6049 depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
6050 Returns true if successful, false otherwise.
6051
6052 That makes it possible to implement some if-constructs without jumps e.g.:
6053 (borrow = CC0 | CC1 and carry = CC2 | CC3)
6054 unsigned int a, b, c;
6055 if (a < b) c++; -> CCU b > a -> CC2; c += carry;
6056 if (a < b) c--; -> CCL3 a - b -> borrow; c -= borrow;
6057 if (a <= b) c++; -> CCL3 b - a -> borrow; c += carry;
6058 if (a <= b) c--; -> CCU a <= b -> borrow; c -= borrow;
6059
6060 Checks for EQ and NE with a nonzero value need an additional xor e.g.:
6061 if (a == b) c++; -> CCL3 a ^= b; 0 - a -> borrow; c += carry;
6062 if (a == b) c--; -> CCU a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
6063 if (a != b) c++; -> CCU a ^= b; a > 0 -> CC2; c += carry;
6064 if (a != b) c--; -> CCL3 a ^= b; 0 - a -> borrow; c -= borrow; */
6065
6066 bool
s390_expand_addcc(enum rtx_code cmp_code,rtx cmp_op0,rtx cmp_op1,rtx dst,rtx src,rtx increment)6067 s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1,
6068 rtx dst, rtx src, rtx increment)
6069 {
6070 machine_mode cmp_mode;
6071 machine_mode cc_mode;
6072 rtx op_res;
6073 rtx insn;
6074 rtvec p;
6075 int ret;
6076
6077 if ((GET_MODE (cmp_op0) == SImode || GET_MODE (cmp_op0) == VOIDmode)
6078 && (GET_MODE (cmp_op1) == SImode || GET_MODE (cmp_op1) == VOIDmode))
6079 cmp_mode = SImode;
6080 else if ((GET_MODE (cmp_op0) == DImode || GET_MODE (cmp_op0) == VOIDmode)
6081 && (GET_MODE (cmp_op1) == DImode || GET_MODE (cmp_op1) == VOIDmode))
6082 cmp_mode = DImode;
6083 else
6084 return false;
6085
6086 /* Try ADD LOGICAL WITH CARRY. */
6087 if (increment == const1_rtx)
6088 {
6089 /* Determine CC mode to use. */
6090 if (cmp_code == EQ || cmp_code == NE)
6091 {
6092 if (cmp_op1 != const0_rtx)
6093 {
6094 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
6095 NULL_RTX, 0, OPTAB_WIDEN);
6096 cmp_op1 = const0_rtx;
6097 }
6098
6099 cmp_code = cmp_code == EQ ? LEU : GTU;
6100 }
6101
6102 if (cmp_code == LTU || cmp_code == LEU)
6103 {
6104 rtx tem = cmp_op0;
6105 cmp_op0 = cmp_op1;
6106 cmp_op1 = tem;
6107 cmp_code = swap_condition (cmp_code);
6108 }
6109
6110 switch (cmp_code)
6111 {
6112 case GTU:
6113 cc_mode = CCUmode;
6114 break;
6115
6116 case GEU:
6117 cc_mode = CCL3mode;
6118 break;
6119
6120 default:
6121 return false;
6122 }
6123
6124 /* Emit comparison instruction pattern. */
6125 if (!register_operand (cmp_op0, cmp_mode))
6126 cmp_op0 = force_reg (cmp_mode, cmp_op0);
6127
6128 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
6129 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
6130 /* We use insn_invalid_p here to add clobbers if required. */
6131 ret = insn_invalid_p (emit_insn (insn), false);
6132 gcc_assert (!ret);
6133
6134 /* Emit ALC instruction pattern. */
6135 op_res = gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
6136 gen_rtx_REG (cc_mode, CC_REGNUM),
6137 const0_rtx);
6138
6139 if (src != const0_rtx)
6140 {
6141 if (!register_operand (src, GET_MODE (dst)))
6142 src = force_reg (GET_MODE (dst), src);
6143
6144 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, src);
6145 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, const0_rtx);
6146 }
6147
6148 p = rtvec_alloc (2);
6149 RTVEC_ELT (p, 0) =
6150 gen_rtx_SET (dst, op_res);
6151 RTVEC_ELT (p, 1) =
6152 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6153 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
6154
6155 return true;
6156 }
6157
6158 /* Try SUBTRACT LOGICAL WITH BORROW. */
6159 if (increment == constm1_rtx)
6160 {
6161 /* Determine CC mode to use. */
6162 if (cmp_code == EQ || cmp_code == NE)
6163 {
6164 if (cmp_op1 != const0_rtx)
6165 {
6166 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
6167 NULL_RTX, 0, OPTAB_WIDEN);
6168 cmp_op1 = const0_rtx;
6169 }
6170
6171 cmp_code = cmp_code == EQ ? LEU : GTU;
6172 }
6173
6174 if (cmp_code == GTU || cmp_code == GEU)
6175 {
6176 rtx tem = cmp_op0;
6177 cmp_op0 = cmp_op1;
6178 cmp_op1 = tem;
6179 cmp_code = swap_condition (cmp_code);
6180 }
6181
6182 switch (cmp_code)
6183 {
6184 case LEU:
6185 cc_mode = CCUmode;
6186 break;
6187
6188 case LTU:
6189 cc_mode = CCL3mode;
6190 break;
6191
6192 default:
6193 return false;
6194 }
6195
6196 /* Emit comparison instruction pattern. */
6197 if (!register_operand (cmp_op0, cmp_mode))
6198 cmp_op0 = force_reg (cmp_mode, cmp_op0);
6199
6200 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
6201 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
6202 /* We use insn_invalid_p here to add clobbers if required. */
6203 ret = insn_invalid_p (emit_insn (insn), false);
6204 gcc_assert (!ret);
6205
6206 /* Emit SLB instruction pattern. */
6207 if (!register_operand (src, GET_MODE (dst)))
6208 src = force_reg (GET_MODE (dst), src);
6209
6210 op_res = gen_rtx_MINUS (GET_MODE (dst),
6211 gen_rtx_MINUS (GET_MODE (dst), src, const0_rtx),
6212 gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
6213 gen_rtx_REG (cc_mode, CC_REGNUM),
6214 const0_rtx));
6215 p = rtvec_alloc (2);
6216 RTVEC_ELT (p, 0) =
6217 gen_rtx_SET (dst, op_res);
6218 RTVEC_ELT (p, 1) =
6219 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6220 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
6221
6222 return true;
6223 }
6224
6225 return false;
6226 }
6227
6228 /* Expand code for the insv template. Return true if successful. */
6229
6230 bool
s390_expand_insv(rtx dest,rtx op1,rtx op2,rtx src)6231 s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
6232 {
6233 int bitsize = INTVAL (op1);
6234 int bitpos = INTVAL (op2);
6235 machine_mode mode = GET_MODE (dest);
6236 machine_mode smode;
6237 int smode_bsize, mode_bsize;
6238 rtx op, clobber;
6239
6240 if (bitsize + bitpos > GET_MODE_BITSIZE (mode))
6241 return false;
6242
6243 /* Generate INSERT IMMEDIATE (IILL et al). */
6244 /* (set (ze (reg)) (const_int)). */
6245 if (TARGET_ZARCH
6246 && register_operand (dest, word_mode)
6247 && (bitpos % 16) == 0
6248 && (bitsize % 16) == 0
6249 && const_int_operand (src, VOIDmode))
6250 {
6251 HOST_WIDE_INT val = INTVAL (src);
6252 int regpos = bitpos + bitsize;
6253
6254 while (regpos > bitpos)
6255 {
6256 machine_mode putmode;
6257 int putsize;
6258
6259 if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32))
6260 putmode = SImode;
6261 else
6262 putmode = HImode;
6263
6264 putsize = GET_MODE_BITSIZE (putmode);
6265 regpos -= putsize;
6266 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6267 GEN_INT (putsize),
6268 GEN_INT (regpos)),
6269 gen_int_mode (val, putmode));
6270 val >>= putsize;
6271 }
6272 gcc_assert (regpos == bitpos);
6273 return true;
6274 }
6275
6276 smode = smallest_int_mode_for_size (bitsize);
6277 smode_bsize = GET_MODE_BITSIZE (smode);
6278 mode_bsize = GET_MODE_BITSIZE (mode);
6279
6280 /* Generate STORE CHARACTERS UNDER MASK (STCM et al). */
6281 if (bitpos == 0
6282 && (bitsize % BITS_PER_UNIT) == 0
6283 && MEM_P (dest)
6284 && (register_operand (src, word_mode)
6285 || const_int_operand (src, VOIDmode)))
6286 {
6287 /* Emit standard pattern if possible. */
6288 if (smode_bsize == bitsize)
6289 {
6290 emit_move_insn (adjust_address (dest, smode, 0),
6291 gen_lowpart (smode, src));
6292 return true;
6293 }
6294
6295 /* (set (ze (mem)) (const_int)). */
6296 else if (const_int_operand (src, VOIDmode))
6297 {
6298 int size = bitsize / BITS_PER_UNIT;
6299 rtx src_mem = adjust_address (force_const_mem (word_mode, src),
6300 BLKmode,
6301 UNITS_PER_WORD - size);
6302
6303 dest = adjust_address (dest, BLKmode, 0);
6304 set_mem_size (dest, size);
6305 s390_expand_movmem (dest, src_mem, GEN_INT (size));
6306 return true;
6307 }
6308
6309 /* (set (ze (mem)) (reg)). */
6310 else if (register_operand (src, word_mode))
6311 {
6312 if (bitsize <= 32)
6313 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1,
6314 const0_rtx), src);
6315 else
6316 {
6317 /* Emit st,stcmh sequence. */
6318 int stcmh_width = bitsize - 32;
6319 int size = stcmh_width / BITS_PER_UNIT;
6320
6321 emit_move_insn (adjust_address (dest, SImode, size),
6322 gen_lowpart (SImode, src));
6323 set_mem_size (dest, size);
6324 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6325 GEN_INT (stcmh_width),
6326 const0_rtx),
6327 gen_rtx_LSHIFTRT (word_mode, src, GEN_INT (32)));
6328 }
6329 return true;
6330 }
6331 }
6332
6333 /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al). */
6334 if ((bitpos % BITS_PER_UNIT) == 0
6335 && (bitsize % BITS_PER_UNIT) == 0
6336 && (bitpos & 32) == ((bitpos + bitsize - 1) & 32)
6337 && MEM_P (src)
6338 && (mode == DImode || mode == SImode)
6339 && register_operand (dest, mode))
6340 {
6341 /* Emit a strict_low_part pattern if possible. */
6342 if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
6343 {
6344 op = gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (smode, dest));
6345 op = gen_rtx_SET (op, gen_lowpart (smode, src));
6346 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6347 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)));
6348 return true;
6349 }
6350
6351 /* ??? There are more powerful versions of ICM that are not
6352 completely represented in the md file. */
6353 }
6354
6355 /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al). */
6356 if (TARGET_Z10 && (mode == DImode || mode == SImode))
6357 {
6358 machine_mode mode_s = GET_MODE (src);
6359
6360 if (CONSTANT_P (src))
6361 {
6362 /* For constant zero values the representation with AND
6363 appears to be folded in more situations than the (set
6364 (zero_extract) ...).
6365 We only do this when the start and end of the bitfield
6366 remain in the same SImode chunk. That way nihf or nilf
6367 can be used.
6368 The AND patterns might still generate a risbg for this. */
6369 if (src == const0_rtx && bitpos / 32 == (bitpos + bitsize - 1) / 32)
6370 return false;
6371 else
6372 src = force_reg (mode, src);
6373 }
6374 else if (mode_s != mode)
6375 {
6376 gcc_assert (GET_MODE_BITSIZE (mode_s) >= bitsize);
6377 src = force_reg (mode_s, src);
6378 src = gen_lowpart (mode, src);
6379 }
6380
6381 op = gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2),
6382 op = gen_rtx_SET (op, src);
6383
6384 if (!TARGET_ZEC12)
6385 {
6386 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6387 op = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber));
6388 }
6389 emit_insn (op);
6390
6391 return true;
6392 }
6393
6394 return false;
6395 }
6396
6397 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
6398 register that holds VAL of mode MODE shifted by COUNT bits. */
6399
6400 static inline rtx
s390_expand_mask_and_shift(rtx val,machine_mode mode,rtx count)6401 s390_expand_mask_and_shift (rtx val, machine_mode mode, rtx count)
6402 {
6403 val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
6404 NULL_RTX, 1, OPTAB_DIRECT);
6405 return expand_simple_binop (SImode, ASHIFT, val, count,
6406 NULL_RTX, 1, OPTAB_DIRECT);
6407 }
6408
6409 /* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
6410 the result in TARGET. */
6411
6412 void
s390_expand_vec_compare(rtx target,enum rtx_code cond,rtx cmp_op1,rtx cmp_op2)6413 s390_expand_vec_compare (rtx target, enum rtx_code cond,
6414 rtx cmp_op1, rtx cmp_op2)
6415 {
6416 machine_mode mode = GET_MODE (target);
6417 bool neg_p = false, swap_p = false;
6418 rtx tmp;
6419
6420 if (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_VECTOR_FLOAT)
6421 {
6422 switch (cond)
6423 {
6424 /* NE a != b -> !(a == b) */
6425 case NE: cond = EQ; neg_p = true; break;
6426 /* UNGT a u> b -> !(b >= a) */
6427 case UNGT: cond = GE; neg_p = true; swap_p = true; break;
6428 /* UNGE a u>= b -> !(b > a) */
6429 case UNGE: cond = GT; neg_p = true; swap_p = true; break;
6430 /* LE: a <= b -> b >= a */
6431 case LE: cond = GE; swap_p = true; break;
6432 /* UNLE: a u<= b -> !(a > b) */
6433 case UNLE: cond = GT; neg_p = true; break;
6434 /* LT: a < b -> b > a */
6435 case LT: cond = GT; swap_p = true; break;
6436 /* UNLT: a u< b -> !(a >= b) */
6437 case UNLT: cond = GE; neg_p = true; break;
6438 case UNEQ:
6439 emit_insn (gen_vec_cmpuneq (target, cmp_op1, cmp_op2));
6440 return;
6441 case LTGT:
6442 emit_insn (gen_vec_cmpltgt (target, cmp_op1, cmp_op2));
6443 return;
6444 case ORDERED:
6445 emit_insn (gen_vec_ordered (target, cmp_op1, cmp_op2));
6446 return;
6447 case UNORDERED:
6448 emit_insn (gen_vec_unordered (target, cmp_op1, cmp_op2));
6449 return;
6450 default: break;
6451 }
6452 }
6453 else
6454 {
6455 switch (cond)
6456 {
6457 /* NE: a != b -> !(a == b) */
6458 case NE: cond = EQ; neg_p = true; break;
6459 /* GE: a >= b -> !(b > a) */
6460 case GE: cond = GT; neg_p = true; swap_p = true; break;
6461 /* GEU: a >= b -> !(b > a) */
6462 case GEU: cond = GTU; neg_p = true; swap_p = true; break;
6463 /* LE: a <= b -> !(a > b) */
6464 case LE: cond = GT; neg_p = true; break;
6465 /* LEU: a <= b -> !(a > b) */
6466 case LEU: cond = GTU; neg_p = true; break;
6467 /* LT: a < b -> b > a */
6468 case LT: cond = GT; swap_p = true; break;
6469 /* LTU: a < b -> b > a */
6470 case LTU: cond = GTU; swap_p = true; break;
6471 default: break;
6472 }
6473 }
6474
6475 if (swap_p)
6476 {
6477 tmp = cmp_op1; cmp_op1 = cmp_op2; cmp_op2 = tmp;
6478 }
6479
6480 emit_insn (gen_rtx_SET (target, gen_rtx_fmt_ee (cond,
6481 mode,
6482 cmp_op1, cmp_op2)));
6483 if (neg_p)
6484 emit_insn (gen_rtx_SET (target, gen_rtx_NOT (mode, target)));
6485 }
6486
6487 /* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into
6488 TARGET if either all (ALL_P is true) or any (ALL_P is false) of the
6489 elements in CMP1 and CMP2 fulfill the comparison.
6490 This function is only used to emit patterns for the vx builtins and
6491 therefore only handles comparison codes required by the
6492 builtins. */
6493 void
s390_expand_vec_compare_cc(rtx target,enum rtx_code code,rtx cmp1,rtx cmp2,bool all_p)6494 s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
6495 rtx cmp1, rtx cmp2, bool all_p)
6496 {
6497 machine_mode cc_producer_mode, cc_consumer_mode, scratch_mode;
6498 rtx tmp_reg = gen_reg_rtx (SImode);
6499 bool swap_p = false;
6500
6501 if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_INT)
6502 {
6503 switch (code)
6504 {
6505 case EQ:
6506 case NE:
6507 cc_producer_mode = CCVEQmode;
6508 break;
6509 case GE:
6510 case LT:
6511 code = swap_condition (code);
6512 swap_p = true;
6513 /* fallthrough */
6514 case GT:
6515 case LE:
6516 cc_producer_mode = CCVIHmode;
6517 break;
6518 case GEU:
6519 case LTU:
6520 code = swap_condition (code);
6521 swap_p = true;
6522 /* fallthrough */
6523 case GTU:
6524 case LEU:
6525 cc_producer_mode = CCVIHUmode;
6526 break;
6527 default:
6528 gcc_unreachable ();
6529 }
6530
6531 scratch_mode = GET_MODE (cmp1);
6532 /* These codes represent inverted CC interpretations. Inverting
6533 an ALL CC mode results in an ANY CC mode and the other way
6534 around. Invert the all_p flag here to compensate for
6535 that. */
6536 if (code == NE || code == LE || code == LEU)
6537 all_p = !all_p;
6538
6539 cc_consumer_mode = all_p ? CCVIALLmode : CCVIANYmode;
6540 }
6541 else if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_FLOAT)
6542 {
6543 bool inv_p = false;
6544
6545 switch (code)
6546 {
6547 case EQ: cc_producer_mode = CCVEQmode; break;
6548 case NE: cc_producer_mode = CCVEQmode; inv_p = true; break;
6549 case GT: cc_producer_mode = CCVFHmode; break;
6550 case GE: cc_producer_mode = CCVFHEmode; break;
6551 case UNLE: cc_producer_mode = CCVFHmode; inv_p = true; break;
6552 case UNLT: cc_producer_mode = CCVFHEmode; inv_p = true; break;
6553 case LT: cc_producer_mode = CCVFHmode; code = GT; swap_p = true; break;
6554 case LE: cc_producer_mode = CCVFHEmode; code = GE; swap_p = true; break;
6555 default: gcc_unreachable ();
6556 }
6557 scratch_mode = mode_for_int_vector (GET_MODE (cmp1)).require ();
6558
6559 if (inv_p)
6560 all_p = !all_p;
6561
6562 cc_consumer_mode = all_p ? CCVFALLmode : CCVFANYmode;
6563 }
6564 else
6565 gcc_unreachable ();
6566
6567 if (swap_p)
6568 {
6569 rtx tmp = cmp2;
6570 cmp2 = cmp1;
6571 cmp1 = tmp;
6572 }
6573
6574 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6575 gen_rtvec (2, gen_rtx_SET (
6576 gen_rtx_REG (cc_producer_mode, CC_REGNUM),
6577 gen_rtx_COMPARE (cc_producer_mode, cmp1, cmp2)),
6578 gen_rtx_CLOBBER (VOIDmode,
6579 gen_rtx_SCRATCH (scratch_mode)))));
6580 emit_move_insn (target, const0_rtx);
6581 emit_move_insn (tmp_reg, const1_rtx);
6582
6583 emit_move_insn (target,
6584 gen_rtx_IF_THEN_ELSE (SImode,
6585 gen_rtx_fmt_ee (code, VOIDmode,
6586 gen_rtx_REG (cc_consumer_mode, CC_REGNUM),
6587 const0_rtx),
6588 tmp_reg, target));
6589 }
6590
6591 /* Invert the comparison CODE applied to a CC mode. This is only safe
6592 if we know whether there result was created by a floating point
6593 compare or not. For the CCV modes this is encoded as part of the
6594 mode. */
6595 enum rtx_code
s390_reverse_condition(machine_mode mode,enum rtx_code code)6596 s390_reverse_condition (machine_mode mode, enum rtx_code code)
6597 {
6598 /* Reversal of FP compares takes care -- an ordered compare
6599 becomes an unordered compare and vice versa. */
6600 if (mode == CCVFALLmode || mode == CCVFANYmode)
6601 return reverse_condition_maybe_unordered (code);
6602 else if (mode == CCVIALLmode || mode == CCVIANYmode)
6603 return reverse_condition (code);
6604 else
6605 gcc_unreachable ();
6606 }
6607
6608 /* Generate a vector comparison expression loading either elements of
6609 THEN or ELS into TARGET depending on the comparison COND of CMP_OP1
6610 and CMP_OP2. */
6611
6612 void
s390_expand_vcond(rtx target,rtx then,rtx els,enum rtx_code cond,rtx cmp_op1,rtx cmp_op2)6613 s390_expand_vcond (rtx target, rtx then, rtx els,
6614 enum rtx_code cond, rtx cmp_op1, rtx cmp_op2)
6615 {
6616 rtx tmp;
6617 machine_mode result_mode;
6618 rtx result_target;
6619
6620 machine_mode target_mode = GET_MODE (target);
6621 machine_mode cmp_mode = GET_MODE (cmp_op1);
6622 rtx op = (cond == LT) ? els : then;
6623
6624 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
6625 and x < 0 ? 1 : 0 into (unsigned) x >> 31. Likewise
6626 for short and byte (x >> 15 and x >> 7 respectively). */
6627 if ((cond == LT || cond == GE)
6628 && target_mode == cmp_mode
6629 && cmp_op2 == CONST0_RTX (cmp_mode)
6630 && op == CONST0_RTX (target_mode)
6631 && s390_vector_mode_supported_p (target_mode)
6632 && GET_MODE_CLASS (target_mode) == MODE_VECTOR_INT)
6633 {
6634 rtx negop = (cond == LT) ? then : els;
6635
6636 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (target_mode)) - 1;
6637
6638 /* if x < 0 ? 1 : 0 or if x >= 0 ? 0 : 1 */
6639 if (negop == CONST1_RTX (target_mode))
6640 {
6641 rtx res = expand_simple_binop (cmp_mode, LSHIFTRT, cmp_op1,
6642 GEN_INT (shift), target,
6643 1, OPTAB_DIRECT);
6644 if (res != target)
6645 emit_move_insn (target, res);
6646 return;
6647 }
6648
6649 /* if x < 0 ? -1 : 0 or if x >= 0 ? 0 : -1 */
6650 else if (all_ones_operand (negop, target_mode))
6651 {
6652 rtx res = expand_simple_binop (cmp_mode, ASHIFTRT, cmp_op1,
6653 GEN_INT (shift), target,
6654 0, OPTAB_DIRECT);
6655 if (res != target)
6656 emit_move_insn (target, res);
6657 return;
6658 }
6659 }
6660
6661 /* We always use an integral type vector to hold the comparison
6662 result. */
6663 result_mode = mode_for_int_vector (cmp_mode).require ();
6664 result_target = gen_reg_rtx (result_mode);
6665
6666 /* We allow vector immediates as comparison operands that
6667 can be handled by the optimization above but not by the
6668 following code. Hence, force them into registers here. */
6669 if (!REG_P (cmp_op1))
6670 cmp_op1 = force_reg (GET_MODE (cmp_op1), cmp_op1);
6671
6672 if (!REG_P (cmp_op2))
6673 cmp_op2 = force_reg (GET_MODE (cmp_op2), cmp_op2);
6674
6675 s390_expand_vec_compare (result_target, cond,
6676 cmp_op1, cmp_op2);
6677
6678 /* If the results are supposed to be either -1 or 0 we are done
6679 since this is what our compare instructions generate anyway. */
6680 if (all_ones_operand (then, GET_MODE (then))
6681 && const0_operand (els, GET_MODE (els)))
6682 {
6683 emit_move_insn (target, gen_rtx_SUBREG (target_mode,
6684 result_target, 0));
6685 return;
6686 }
6687
6688 /* Otherwise we will do a vsel afterwards. */
6689 /* This gets triggered e.g.
6690 with gcc.c-torture/compile/pr53410-1.c */
6691 if (!REG_P (then))
6692 then = force_reg (target_mode, then);
6693
6694 if (!REG_P (els))
6695 els = force_reg (target_mode, els);
6696
6697 tmp = gen_rtx_fmt_ee (EQ, VOIDmode,
6698 result_target,
6699 CONST0_RTX (result_mode));
6700
6701 /* We compared the result against zero above so we have to swap then
6702 and els here. */
6703 tmp = gen_rtx_IF_THEN_ELSE (target_mode, tmp, els, then);
6704
6705 gcc_assert (target_mode == GET_MODE (then));
6706 emit_insn (gen_rtx_SET (target, tmp));
6707 }
6708
6709 /* Emit the RTX necessary to initialize the vector TARGET with values
6710 in VALS. */
6711 void
s390_expand_vec_init(rtx target,rtx vals)6712 s390_expand_vec_init (rtx target, rtx vals)
6713 {
6714 machine_mode mode = GET_MODE (target);
6715 machine_mode inner_mode = GET_MODE_INNER (mode);
6716 int n_elts = GET_MODE_NUNITS (mode);
6717 bool all_same = true, all_regs = true, all_const_int = true;
6718 rtx x;
6719 int i;
6720
6721 for (i = 0; i < n_elts; ++i)
6722 {
6723 x = XVECEXP (vals, 0, i);
6724
6725 if (!CONST_INT_P (x))
6726 all_const_int = false;
6727
6728 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6729 all_same = false;
6730
6731 if (!REG_P (x))
6732 all_regs = false;
6733 }
6734
6735 /* Use vector gen mask or vector gen byte mask if possible. */
6736 if (all_same && all_const_int
6737 && (XVECEXP (vals, 0, 0) == const0_rtx
6738 || s390_contiguous_bitmask_vector_p (XVECEXP (vals, 0, 0),
6739 NULL, NULL)
6740 || s390_bytemask_vector_p (XVECEXP (vals, 0, 0), NULL)))
6741 {
6742 emit_insn (gen_rtx_SET (target,
6743 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))));
6744 return;
6745 }
6746
6747 /* Use vector replicate instructions. vlrep/vrepi/vrep */
6748 if (all_same)
6749 {
6750 rtx elem = XVECEXP (vals, 0, 0);
6751
6752 /* vec_splats accepts general_operand as source. */
6753 if (!general_operand (elem, GET_MODE (elem)))
6754 elem = force_reg (inner_mode, elem);
6755
6756 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, elem)));
6757 return;
6758 }
6759
6760 if (all_regs
6761 && REG_P (target)
6762 && n_elts == 2
6763 && GET_MODE_SIZE (inner_mode) == 8)
6764 {
6765 /* Use vector load pair. */
6766 emit_insn (gen_rtx_SET (target,
6767 gen_rtx_VEC_CONCAT (mode,
6768 XVECEXP (vals, 0, 0),
6769 XVECEXP (vals, 0, 1))));
6770 return;
6771 }
6772
6773 /* Use vector load logical element and zero. */
6774 if (TARGET_VXE && (mode == V4SImode || mode == V4SFmode))
6775 {
6776 bool found = true;
6777
6778 x = XVECEXP (vals, 0, 0);
6779 if (memory_operand (x, inner_mode))
6780 {
6781 for (i = 1; i < n_elts; ++i)
6782 found = found && XVECEXP (vals, 0, i) == const0_rtx;
6783
6784 if (found)
6785 {
6786 machine_mode half_mode = (inner_mode == SFmode
6787 ? V2SFmode : V2SImode);
6788 emit_insn (gen_rtx_SET (target,
6789 gen_rtx_VEC_CONCAT (mode,
6790 gen_rtx_VEC_CONCAT (half_mode,
6791 x,
6792 const0_rtx),
6793 gen_rtx_VEC_CONCAT (half_mode,
6794 const0_rtx,
6795 const0_rtx))));
6796 return;
6797 }
6798 }
6799 }
6800
6801 /* We are about to set the vector elements one by one. Zero out the
6802 full register first in order to help the data flow framework to
6803 detect it as full VR set. */
6804 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
6805
6806 /* Unfortunately the vec_init expander is not allowed to fail. So
6807 we have to implement the fallback ourselves. */
6808 for (i = 0; i < n_elts; i++)
6809 {
6810 rtx elem = XVECEXP (vals, 0, i);
6811 if (!general_operand (elem, GET_MODE (elem)))
6812 elem = force_reg (inner_mode, elem);
6813
6814 emit_insn (gen_rtx_SET (target,
6815 gen_rtx_UNSPEC (mode,
6816 gen_rtvec (3, elem,
6817 GEN_INT (i), target),
6818 UNSPEC_VEC_SET)));
6819 }
6820 }
6821
6822 /* Structure to hold the initial parameters for a compare_and_swap operation
6823 in HImode and QImode. */
6824
6825 struct alignment_context
6826 {
6827 rtx memsi; /* SI aligned memory location. */
6828 rtx shift; /* Bit offset with regard to lsb. */
6829 rtx modemask; /* Mask of the HQImode shifted by SHIFT bits. */
6830 rtx modemaski; /* ~modemask */
6831 bool aligned; /* True if memory is aligned, false else. */
6832 };
6833
6834 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
6835 structure AC for transparent simplifying, if the memory alignment is known
6836 to be at least 32bit. MEM is the memory location for the actual operation
6837 and MODE its mode. */
6838
6839 static void
init_alignment_context(struct alignment_context * ac,rtx mem,machine_mode mode)6840 init_alignment_context (struct alignment_context *ac, rtx mem,
6841 machine_mode mode)
6842 {
6843 ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode));
6844 ac->aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode));
6845
6846 if (ac->aligned)
6847 ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned. */
6848 else
6849 {
6850 /* Alignment is unknown. */
6851 rtx byteoffset, addr, align;
6852
6853 /* Force the address into a register. */
6854 addr = force_reg (Pmode, XEXP (mem, 0));
6855
6856 /* Align it to SImode. */
6857 align = expand_simple_binop (Pmode, AND, addr,
6858 GEN_INT (-GET_MODE_SIZE (SImode)),
6859 NULL_RTX, 1, OPTAB_DIRECT);
6860 /* Generate MEM. */
6861 ac->memsi = gen_rtx_MEM (SImode, align);
6862 MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem);
6863 set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER);
6864 set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode));
6865
6866 /* Calculate shiftcount. */
6867 byteoffset = expand_simple_binop (Pmode, AND, addr,
6868 GEN_INT (GET_MODE_SIZE (SImode) - 1),
6869 NULL_RTX, 1, OPTAB_DIRECT);
6870 /* As we already have some offset, evaluate the remaining distance. */
6871 ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
6872 NULL_RTX, 1, OPTAB_DIRECT);
6873 }
6874
6875 /* Shift is the byte count, but we need the bitcount. */
6876 ac->shift = expand_simple_binop (SImode, ASHIFT, ac->shift, GEN_INT (3),
6877 NULL_RTX, 1, OPTAB_DIRECT);
6878
6879 /* Calculate masks. */
6880 ac->modemask = expand_simple_binop (SImode, ASHIFT,
6881 GEN_INT (GET_MODE_MASK (mode)),
6882 ac->shift, NULL_RTX, 1, OPTAB_DIRECT);
6883 ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask,
6884 NULL_RTX, 1);
6885 }
6886
6887 /* A subroutine of s390_expand_cs_hqi. Insert INS into VAL. If possible,
6888 use a single insv insn into SEQ2. Otherwise, put prep insns in SEQ1 and
6889 perform the merge in SEQ2. */
6890
6891 static rtx
s390_two_part_insv(struct alignment_context * ac,rtx * seq1,rtx * seq2,machine_mode mode,rtx val,rtx ins)6892 s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
6893 machine_mode mode, rtx val, rtx ins)
6894 {
6895 rtx tmp;
6896
6897 if (ac->aligned)
6898 {
6899 start_sequence ();
6900 tmp = copy_to_mode_reg (SImode, val);
6901 if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)),
6902 const0_rtx, ins))
6903 {
6904 *seq1 = NULL;
6905 *seq2 = get_insns ();
6906 end_sequence ();
6907 return tmp;
6908 }
6909 end_sequence ();
6910 }
6911
6912 /* Failed to use insv. Generate a two part shift and mask. */
6913 start_sequence ();
6914 tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
6915 *seq1 = get_insns ();
6916 end_sequence ();
6917
6918 start_sequence ();
6919 tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
6920 *seq2 = get_insns ();
6921 end_sequence ();
6922
6923 return tmp;
6924 }
6925
6926 /* Expand an atomic compare and swap operation for HImode and QImode. MEM is
6927 the memory location, CMP the old value to compare MEM with and NEW_RTX the
6928 value to set if CMP == MEM. */
6929
6930 static void
s390_expand_cs_hqi(machine_mode mode,rtx btarget,rtx vtarget,rtx mem,rtx cmp,rtx new_rtx,bool is_weak)6931 s390_expand_cs_hqi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
6932 rtx cmp, rtx new_rtx, bool is_weak)
6933 {
6934 struct alignment_context ac;
6935 rtx cmpv, newv, val, cc, seq0, seq1, seq2, seq3;
6936 rtx res = gen_reg_rtx (SImode);
6937 rtx_code_label *csloop = NULL, *csend = NULL;
6938
6939 gcc_assert (MEM_P (mem));
6940
6941 init_alignment_context (&ac, mem, mode);
6942
6943 /* Load full word. Subsequent loads are performed by CS. */
6944 val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski,
6945 NULL_RTX, 1, OPTAB_DIRECT);
6946
6947 /* Prepare insertions of cmp and new_rtx into the loaded value. When
6948 possible, we try to use insv to make this happen efficiently. If
6949 that fails we'll generate code both inside and outside the loop. */
6950 cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp);
6951 newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx);
6952
6953 if (seq0)
6954 emit_insn (seq0);
6955 if (seq1)
6956 emit_insn (seq1);
6957
6958 /* Start CS loop. */
6959 if (!is_weak)
6960 {
6961 /* Begin assuming success. */
6962 emit_move_insn (btarget, const1_rtx);
6963
6964 csloop = gen_label_rtx ();
6965 csend = gen_label_rtx ();
6966 emit_label (csloop);
6967 }
6968
6969 /* val = "<mem>00..0<mem>"
6970 * cmp = "00..0<cmp>00..0"
6971 * new = "00..0<new>00..0"
6972 */
6973
6974 emit_insn (seq2);
6975 emit_insn (seq3);
6976
6977 cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv, CCZ1mode);
6978 if (is_weak)
6979 emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1)));
6980 else
6981 {
6982 rtx tmp;
6983
6984 /* Jump to end if we're done (likely?). */
6985 s390_emit_jump (csend, cc);
6986
6987 /* Check for changes outside mode, and loop internal if so.
6988 Arrange the moves so that the compare is adjacent to the
6989 branch so that we can generate CRJ. */
6990 tmp = copy_to_reg (val);
6991 force_expand_binop (SImode, and_optab, res, ac.modemaski, val,
6992 1, OPTAB_DIRECT);
6993 cc = s390_emit_compare (NE, val, tmp);
6994 s390_emit_jump (csloop, cc);
6995
6996 /* Failed. */
6997 emit_move_insn (btarget, const0_rtx);
6998 emit_label (csend);
6999 }
7000
7001 /* Return the correct part of the bitfield. */
7002 convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
7003 NULL_RTX, 1, OPTAB_DIRECT), 1);
7004 }
7005
7006 /* Variant of s390_expand_cs for SI, DI and TI modes. */
7007 static void
s390_expand_cs_tdsi(machine_mode mode,rtx btarget,rtx vtarget,rtx mem,rtx cmp,rtx new_rtx,bool is_weak)7008 s390_expand_cs_tdsi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
7009 rtx cmp, rtx new_rtx, bool is_weak)
7010 {
7011 rtx output = vtarget;
7012 rtx_code_label *skip_cs_label = NULL;
7013 bool do_const_opt = false;
7014
7015 if (!register_operand (output, mode))
7016 output = gen_reg_rtx (mode);
7017
7018 /* If IS_WEAK is true and the INPUT value is a constant, compare the memory
7019 with the constant first and skip the compare_and_swap because its very
7020 expensive and likely to fail anyway.
7021 Note 1: This is done only for IS_WEAK. C11 allows optimizations that may
7022 cause spurious in that case.
7023 Note 2: It may be useful to do this also for non-constant INPUT.
7024 Note 3: Currently only targets with "load on condition" are supported
7025 (z196 and newer). */
7026
7027 if (TARGET_Z196
7028 && (mode == SImode || mode == DImode))
7029 do_const_opt = (is_weak && CONST_INT_P (cmp));
7030
7031 if (do_const_opt)
7032 {
7033 rtx cc = gen_rtx_REG (CCZmode, CC_REGNUM);
7034
7035 skip_cs_label = gen_label_rtx ();
7036 emit_move_insn (btarget, const0_rtx);
7037 if (CONST_INT_P (cmp) && INTVAL (cmp) == 0)
7038 {
7039 rtvec lt = rtvec_alloc (2);
7040
7041 /* Load-and-test + conditional jump. */
7042 RTVEC_ELT (lt, 0)
7043 = gen_rtx_SET (cc, gen_rtx_COMPARE (CCZmode, mem, cmp));
7044 RTVEC_ELT (lt, 1) = gen_rtx_SET (output, mem);
7045 emit_insn (gen_rtx_PARALLEL (VOIDmode, lt));
7046 }
7047 else
7048 {
7049 emit_move_insn (output, mem);
7050 emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (CCZmode, output, cmp)));
7051 }
7052 s390_emit_jump (skip_cs_label, gen_rtx_NE (VOIDmode, cc, const0_rtx));
7053 add_reg_br_prob_note (get_last_insn (),
7054 profile_probability::very_unlikely ());
7055 /* If the jump is not taken, OUTPUT is the expected value. */
7056 cmp = output;
7057 /* Reload newval to a register manually, *after* the compare and jump
7058 above. Otherwise Reload might place it before the jump. */
7059 }
7060 else
7061 cmp = force_reg (mode, cmp);
7062 new_rtx = force_reg (mode, new_rtx);
7063 s390_emit_compare_and_swap (EQ, output, mem, cmp, new_rtx,
7064 (do_const_opt) ? CCZmode : CCZ1mode);
7065 if (skip_cs_label != NULL)
7066 emit_label (skip_cs_label);
7067
7068 /* We deliberately accept non-register operands in the predicate
7069 to ensure the write back to the output operand happens *before*
7070 the store-flags code below. This makes it easier for combine
7071 to merge the store-flags code with a potential test-and-branch
7072 pattern following (immediately!) afterwards. */
7073 if (output != vtarget)
7074 emit_move_insn (vtarget, output);
7075
7076 if (do_const_opt)
7077 {
7078 rtx cc, cond, ite;
7079
7080 /* Do not use gen_cstorecc4 here because it writes either 1 or 0, but
7081 btarget has already been initialized with 0 above. */
7082 cc = gen_rtx_REG (CCZmode, CC_REGNUM);
7083 cond = gen_rtx_EQ (VOIDmode, cc, const0_rtx);
7084 ite = gen_rtx_IF_THEN_ELSE (SImode, cond, const1_rtx, btarget);
7085 emit_insn (gen_rtx_SET (btarget, ite));
7086 }
7087 else
7088 {
7089 rtx cc, cond;
7090
7091 cc = gen_rtx_REG (CCZ1mode, CC_REGNUM);
7092 cond = gen_rtx_EQ (SImode, cc, const0_rtx);
7093 emit_insn (gen_cstorecc4 (btarget, cond, cc, const0_rtx));
7094 }
7095 }
7096
7097 /* Expand an atomic compare and swap operation. MEM is the memory location,
7098 CMP the old value to compare MEM with and NEW_RTX the value to set if
7099 CMP == MEM. */
7100
7101 void
s390_expand_cs(machine_mode mode,rtx btarget,rtx vtarget,rtx mem,rtx cmp,rtx new_rtx,bool is_weak)7102 s390_expand_cs (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
7103 rtx cmp, rtx new_rtx, bool is_weak)
7104 {
7105 switch (mode)
7106 {
7107 case E_TImode:
7108 case E_DImode:
7109 case E_SImode:
7110 s390_expand_cs_tdsi (mode, btarget, vtarget, mem, cmp, new_rtx, is_weak);
7111 break;
7112 case E_HImode:
7113 case E_QImode:
7114 s390_expand_cs_hqi (mode, btarget, vtarget, mem, cmp, new_rtx, is_weak);
7115 break;
7116 default:
7117 gcc_unreachable ();
7118 }
7119 }
7120
7121 /* Expand an atomic_exchange operation simulated with a compare-and-swap loop.
7122 The memory location MEM is set to INPUT. OUTPUT is set to the previous value
7123 of MEM. */
7124
7125 void
s390_expand_atomic_exchange_tdsi(rtx output,rtx mem,rtx input)7126 s390_expand_atomic_exchange_tdsi (rtx output, rtx mem, rtx input)
7127 {
7128 machine_mode mode = GET_MODE (mem);
7129 rtx_code_label *csloop;
7130
7131 if (TARGET_Z196
7132 && (mode == DImode || mode == SImode)
7133 && CONST_INT_P (input) && INTVAL (input) == 0)
7134 {
7135 emit_move_insn (output, const0_rtx);
7136 if (mode == DImode)
7137 emit_insn (gen_atomic_fetch_anddi (output, mem, const0_rtx, input));
7138 else
7139 emit_insn (gen_atomic_fetch_andsi (output, mem, const0_rtx, input));
7140 return;
7141 }
7142
7143 input = force_reg (mode, input);
7144 emit_move_insn (output, mem);
7145 csloop = gen_label_rtx ();
7146 emit_label (csloop);
7147 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, output, mem, output,
7148 input, CCZ1mode));
7149 }
7150
7151 /* Expand an atomic operation CODE of mode MODE. MEM is the memory location
7152 and VAL the value to play with. If AFTER is true then store the value
7153 MEM holds after the operation, if AFTER is false then store the value MEM
7154 holds before the operation. If TARGET is zero then discard that value, else
7155 store it to TARGET. */
7156
7157 void
s390_expand_atomic(machine_mode mode,enum rtx_code code,rtx target,rtx mem,rtx val,bool after)7158 s390_expand_atomic (machine_mode mode, enum rtx_code code,
7159 rtx target, rtx mem, rtx val, bool after)
7160 {
7161 struct alignment_context ac;
7162 rtx cmp;
7163 rtx new_rtx = gen_reg_rtx (SImode);
7164 rtx orig = gen_reg_rtx (SImode);
7165 rtx_code_label *csloop = gen_label_rtx ();
7166
7167 gcc_assert (!target || register_operand (target, VOIDmode));
7168 gcc_assert (MEM_P (mem));
7169
7170 init_alignment_context (&ac, mem, mode);
7171
7172 /* Shift val to the correct bit positions.
7173 Preserve "icm", but prevent "ex icm". */
7174 if (!(ac.aligned && code == SET && MEM_P (val)))
7175 val = s390_expand_mask_and_shift (val, mode, ac.shift);
7176
7177 /* Further preparation insns. */
7178 if (code == PLUS || code == MINUS)
7179 emit_move_insn (orig, val);
7180 else if (code == MULT || code == AND) /* val = "11..1<val>11..1" */
7181 val = expand_simple_binop (SImode, XOR, val, ac.modemaski,
7182 NULL_RTX, 1, OPTAB_DIRECT);
7183
7184 /* Load full word. Subsequent loads are performed by CS. */
7185 cmp = force_reg (SImode, ac.memsi);
7186
7187 /* Start CS loop. */
7188 emit_label (csloop);
7189 emit_move_insn (new_rtx, cmp);
7190
7191 /* Patch new with val at correct position. */
7192 switch (code)
7193 {
7194 case PLUS:
7195 case MINUS:
7196 val = expand_simple_binop (SImode, code, new_rtx, orig,
7197 NULL_RTX, 1, OPTAB_DIRECT);
7198 val = expand_simple_binop (SImode, AND, val, ac.modemask,
7199 NULL_RTX, 1, OPTAB_DIRECT);
7200 /* FALLTHRU */
7201 case SET:
7202 if (ac.aligned && MEM_P (val))
7203 store_bit_field (new_rtx, GET_MODE_BITSIZE (mode), 0,
7204 0, 0, SImode, val, false);
7205 else
7206 {
7207 new_rtx = expand_simple_binop (SImode, AND, new_rtx, ac.modemaski,
7208 NULL_RTX, 1, OPTAB_DIRECT);
7209 new_rtx = expand_simple_binop (SImode, IOR, new_rtx, val,
7210 NULL_RTX, 1, OPTAB_DIRECT);
7211 }
7212 break;
7213 case AND:
7214 case IOR:
7215 case XOR:
7216 new_rtx = expand_simple_binop (SImode, code, new_rtx, val,
7217 NULL_RTX, 1, OPTAB_DIRECT);
7218 break;
7219 case MULT: /* NAND */
7220 new_rtx = expand_simple_binop (SImode, AND, new_rtx, val,
7221 NULL_RTX, 1, OPTAB_DIRECT);
7222 new_rtx = expand_simple_binop (SImode, XOR, new_rtx, ac.modemask,
7223 NULL_RTX, 1, OPTAB_DIRECT);
7224 break;
7225 default:
7226 gcc_unreachable ();
7227 }
7228
7229 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, cmp,
7230 ac.memsi, cmp, new_rtx,
7231 CCZ1mode));
7232
7233 /* Return the correct part of the bitfield. */
7234 if (target)
7235 convert_move (target, expand_simple_binop (SImode, LSHIFTRT,
7236 after ? new_rtx : cmp, ac.shift,
7237 NULL_RTX, 1, OPTAB_DIRECT), 1);
7238 }
7239
7240 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7241 We need to emit DTP-relative relocations. */
7242
7243 static void s390_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
7244
7245 static void
s390_output_dwarf_dtprel(FILE * file,int size,rtx x)7246 s390_output_dwarf_dtprel (FILE *file, int size, rtx x)
7247 {
7248 switch (size)
7249 {
7250 case 4:
7251 fputs ("\t.long\t", file);
7252 break;
7253 case 8:
7254 fputs ("\t.quad\t", file);
7255 break;
7256 default:
7257 gcc_unreachable ();
7258 }
7259 output_addr_const (file, x);
7260 fputs ("@DTPOFF", file);
7261 }
7262
7263 /* Return the proper mode for REGNO being represented in the dwarf
7264 unwind table. */
7265 machine_mode
s390_dwarf_frame_reg_mode(int regno)7266 s390_dwarf_frame_reg_mode (int regno)
7267 {
7268 machine_mode save_mode = default_dwarf_frame_reg_mode (regno);
7269
7270 /* Make sure not to return DImode for any GPR with -m31 -mzarch. */
7271 if (GENERAL_REGNO_P (regno))
7272 save_mode = Pmode;
7273
7274 /* The rightmost 64 bits of vector registers are call-clobbered. */
7275 if (GET_MODE_SIZE (save_mode) > 8)
7276 save_mode = DImode;
7277
7278 return save_mode;
7279 }
7280
7281 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
7282 /* Implement TARGET_MANGLE_TYPE. */
7283
7284 static const char *
s390_mangle_type(const_tree type)7285 s390_mangle_type (const_tree type)
7286 {
7287 type = TYPE_MAIN_VARIANT (type);
7288
7289 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
7290 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
7291 return NULL;
7292
7293 if (type == s390_builtin_types[BT_BV16QI]) return "U6__boolc";
7294 if (type == s390_builtin_types[BT_BV8HI]) return "U6__bools";
7295 if (type == s390_builtin_types[BT_BV4SI]) return "U6__booli";
7296 if (type == s390_builtin_types[BT_BV2DI]) return "U6__booll";
7297
7298 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
7299 && TARGET_LONG_DOUBLE_128)
7300 return "g";
7301
7302 /* For all other types, use normal C++ mangling. */
7303 return NULL;
7304 }
7305 #endif
7306
7307 /* In the name of slightly smaller debug output, and to cater to
7308 general assembler lossage, recognize various UNSPEC sequences
7309 and turn them back into a direct symbol reference. */
7310
7311 static rtx
s390_delegitimize_address(rtx orig_x)7312 s390_delegitimize_address (rtx orig_x)
7313 {
7314 rtx x, y;
7315
7316 orig_x = delegitimize_mem_from_attrs (orig_x);
7317 x = orig_x;
7318
7319 /* Extract the symbol ref from:
7320 (plus:SI (reg:SI 12 %r12)
7321 (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
7322 UNSPEC_GOTOFF/PLTOFF)))
7323 and
7324 (plus:SI (reg:SI 12 %r12)
7325 (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
7326 UNSPEC_GOTOFF/PLTOFF)
7327 (const_int 4 [0x4])))) */
7328 if (GET_CODE (x) == PLUS
7329 && REG_P (XEXP (x, 0))
7330 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
7331 && GET_CODE (XEXP (x, 1)) == CONST)
7332 {
7333 HOST_WIDE_INT offset = 0;
7334
7335 /* The const operand. */
7336 y = XEXP (XEXP (x, 1), 0);
7337
7338 if (GET_CODE (y) == PLUS
7339 && GET_CODE (XEXP (y, 1)) == CONST_INT)
7340 {
7341 offset = INTVAL (XEXP (y, 1));
7342 y = XEXP (y, 0);
7343 }
7344
7345 if (GET_CODE (y) == UNSPEC
7346 && (XINT (y, 1) == UNSPEC_GOTOFF
7347 || XINT (y, 1) == UNSPEC_PLTOFF))
7348 return plus_constant (Pmode, XVECEXP (y, 0, 0), offset);
7349 }
7350
7351 if (GET_CODE (x) != MEM)
7352 return orig_x;
7353
7354 x = XEXP (x, 0);
7355 if (GET_CODE (x) == PLUS
7356 && GET_CODE (XEXP (x, 1)) == CONST
7357 && GET_CODE (XEXP (x, 0)) == REG
7358 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7359 {
7360 y = XEXP (XEXP (x, 1), 0);
7361 if (GET_CODE (y) == UNSPEC
7362 && XINT (y, 1) == UNSPEC_GOT)
7363 y = XVECEXP (y, 0, 0);
7364 else
7365 return orig_x;
7366 }
7367 else if (GET_CODE (x) == CONST)
7368 {
7369 /* Extract the symbol ref from:
7370 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
7371 UNSPEC_PLT/GOTENT))) */
7372
7373 y = XEXP (x, 0);
7374 if (GET_CODE (y) == UNSPEC
7375 && (XINT (y, 1) == UNSPEC_GOTENT
7376 || XINT (y, 1) == UNSPEC_PLT))
7377 y = XVECEXP (y, 0, 0);
7378 else
7379 return orig_x;
7380 }
7381 else
7382 return orig_x;
7383
7384 if (GET_MODE (orig_x) != Pmode)
7385 {
7386 if (GET_MODE (orig_x) == BLKmode)
7387 return orig_x;
7388 y = lowpart_subreg (GET_MODE (orig_x), y, Pmode);
7389 if (y == NULL_RTX)
7390 return orig_x;
7391 }
7392 return y;
7393 }
7394
7395 /* Output operand OP to stdio stream FILE.
7396 OP is an address (register + offset) which is not used to address data;
7397 instead the rightmost bits are interpreted as the value. */
7398
7399 static void
print_addrstyle_operand(FILE * file,rtx op)7400 print_addrstyle_operand (FILE *file, rtx op)
7401 {
7402 HOST_WIDE_INT offset;
7403 rtx base;
7404
7405 /* Extract base register and offset. */
7406 if (!s390_decompose_addrstyle_without_index (op, &base, &offset))
7407 gcc_unreachable ();
7408
7409 /* Sanity check. */
7410 if (base)
7411 {
7412 gcc_assert (GET_CODE (base) == REG);
7413 gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER);
7414 gcc_assert (REGNO_REG_CLASS (REGNO (base)) == ADDR_REGS);
7415 }
7416
7417 /* Offsets are constricted to twelve bits. */
7418 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset & ((1 << 12) - 1));
7419 if (base)
7420 fprintf (file, "(%s)", reg_names[REGNO (base)]);
7421 }
7422
7423 /* Assigns the number of NOP halfwords to be emitted before and after the
7424 function label to *HW_BEFORE and *HW_AFTER. Both pointers must not be NULL.
7425 If hotpatching is disabled for the function, the values are set to zero.
7426 */
7427
7428 static void
s390_function_num_hotpatch_hw(tree decl,int * hw_before,int * hw_after)7429 s390_function_num_hotpatch_hw (tree decl,
7430 int *hw_before,
7431 int *hw_after)
7432 {
7433 tree attr;
7434
7435 attr = lookup_attribute ("hotpatch", DECL_ATTRIBUTES (decl));
7436
7437 /* Handle the arguments of the hotpatch attribute. The values
7438 specified via attribute might override the cmdline argument
7439 values. */
7440 if (attr)
7441 {
7442 tree args = TREE_VALUE (attr);
7443
7444 *hw_before = TREE_INT_CST_LOW (TREE_VALUE (args));
7445 *hw_after = TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (args)));
7446 }
7447 else
7448 {
7449 /* Use the values specified by the cmdline arguments. */
7450 *hw_before = s390_hotpatch_hw_before_label;
7451 *hw_after = s390_hotpatch_hw_after_label;
7452 }
7453 }
7454
7455 /* Write the current .machine and .machinemode specification to the assembler
7456 file. */
7457
7458 #ifdef HAVE_AS_MACHINE_MACHINEMODE
7459 static void
s390_asm_output_machine_for_arch(FILE * asm_out_file)7460 s390_asm_output_machine_for_arch (FILE *asm_out_file)
7461 {
7462 fprintf (asm_out_file, "\t.machinemode %s\n",
7463 (TARGET_ZARCH) ? "zarch" : "esa");
7464 fprintf (asm_out_file, "\t.machine \"%s",
7465 processor_table[s390_arch].binutils_name);
7466 if (S390_USE_ARCHITECTURE_MODIFIERS)
7467 {
7468 int cpu_flags;
7469
7470 cpu_flags = processor_flags_table[(int) s390_arch];
7471 if (TARGET_HTM && !(cpu_flags & PF_TX))
7472 fprintf (asm_out_file, "+htm");
7473 else if (!TARGET_HTM && (cpu_flags & PF_TX))
7474 fprintf (asm_out_file, "+nohtm");
7475 if (TARGET_VX && !(cpu_flags & PF_VX))
7476 fprintf (asm_out_file, "+vx");
7477 else if (!TARGET_VX && (cpu_flags & PF_VX))
7478 fprintf (asm_out_file, "+novx");
7479 }
7480 fprintf (asm_out_file, "\"\n");
7481 }
7482
7483 /* Write an extra function header before the very start of the function. */
7484
7485 void
s390_asm_output_function_prefix(FILE * asm_out_file,const char * fnname ATTRIBUTE_UNUSED)7486 s390_asm_output_function_prefix (FILE *asm_out_file,
7487 const char *fnname ATTRIBUTE_UNUSED)
7488 {
7489 if (DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl) == NULL)
7490 return;
7491 /* Since only the function specific options are saved but not the indications
7492 which options are set, it's too much work here to figure out which options
7493 have actually changed. Thus, generate .machine and .machinemode whenever a
7494 function has the target attribute or pragma. */
7495 fprintf (asm_out_file, "\t.machinemode push\n");
7496 fprintf (asm_out_file, "\t.machine push\n");
7497 s390_asm_output_machine_for_arch (asm_out_file);
7498 }
7499
7500 /* Write an extra function footer after the very end of the function. */
7501
7502 void
s390_asm_declare_function_size(FILE * asm_out_file,const char * fnname,tree decl)7503 s390_asm_declare_function_size (FILE *asm_out_file,
7504 const char *fnname, tree decl)
7505 {
7506 if (!flag_inhibit_size_directive)
7507 ASM_OUTPUT_MEASURED_SIZE (asm_out_file, fnname);
7508 if (DECL_FUNCTION_SPECIFIC_TARGET (decl) == NULL)
7509 return;
7510 fprintf (asm_out_file, "\t.machine pop\n");
7511 fprintf (asm_out_file, "\t.machinemode pop\n");
7512 }
7513 #endif
7514
7515 /* Write the extra assembler code needed to declare a function properly. */
7516
7517 void
s390_asm_output_function_label(FILE * asm_out_file,const char * fname,tree decl)7518 s390_asm_output_function_label (FILE *asm_out_file, const char *fname,
7519 tree decl)
7520 {
7521 int hw_before, hw_after;
7522
7523 s390_function_num_hotpatch_hw (decl, &hw_before, &hw_after);
7524 if (hw_before > 0)
7525 {
7526 unsigned int function_alignment;
7527 int i;
7528
7529 /* Add a trampoline code area before the function label and initialize it
7530 with two-byte nop instructions. This area can be overwritten with code
7531 that jumps to a patched version of the function. */
7532 asm_fprintf (asm_out_file, "\tnopr\t%%r0"
7533 "\t# pre-label NOPs for hotpatch (%d halfwords)\n",
7534 hw_before);
7535 for (i = 1; i < hw_before; i++)
7536 fputs ("\tnopr\t%r0\n", asm_out_file);
7537
7538 /* Note: The function label must be aligned so that (a) the bytes of the
7539 following nop do not cross a cacheline boundary, and (b) a jump address
7540 (eight bytes for 64 bit targets, 4 bytes for 32 bit targets) can be
7541 stored directly before the label without crossing a cacheline
7542 boundary. All this is necessary to make sure the trampoline code can
7543 be changed atomically.
7544 This alignment is done automatically using the FOUNCTION_BOUNDARY, but
7545 if there are NOPs before the function label, the alignment is placed
7546 before them. So it is necessary to duplicate the alignment after the
7547 NOPs. */
7548 function_alignment = MAX (8, DECL_ALIGN (decl) / BITS_PER_UNIT);
7549 if (! DECL_USER_ALIGN (decl))
7550 function_alignment = MAX (function_alignment,
7551 (unsigned int) align_functions);
7552 fputs ("\t# alignment for hotpatch\n", asm_out_file);
7553 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (function_alignment));
7554 }
7555
7556 if (S390_USE_TARGET_ATTRIBUTE && TARGET_DEBUG_ARG)
7557 {
7558 asm_fprintf (asm_out_file, "\t# fn:%s ar%d\n", fname, s390_arch);
7559 asm_fprintf (asm_out_file, "\t# fn:%s tu%d\n", fname, s390_tune);
7560 asm_fprintf (asm_out_file, "\t# fn:%s sg%d\n", fname, s390_stack_guard);
7561 asm_fprintf (asm_out_file, "\t# fn:%s ss%d\n", fname, s390_stack_size);
7562 asm_fprintf (asm_out_file, "\t# fn:%s bc%d\n", fname, s390_branch_cost);
7563 asm_fprintf (asm_out_file, "\t# fn:%s wf%d\n", fname,
7564 s390_warn_framesize);
7565 asm_fprintf (asm_out_file, "\t# fn:%s ba%d\n", fname, TARGET_BACKCHAIN);
7566 asm_fprintf (asm_out_file, "\t# fn:%s hd%d\n", fname, TARGET_HARD_DFP);
7567 asm_fprintf (asm_out_file, "\t# fn:%s hf%d\n", fname, !TARGET_SOFT_FLOAT);
7568 asm_fprintf (asm_out_file, "\t# fn:%s ht%d\n", fname, TARGET_OPT_HTM);
7569 asm_fprintf (asm_out_file, "\t# fn:%s vx%d\n", fname, TARGET_OPT_VX);
7570 asm_fprintf (asm_out_file, "\t# fn:%s ps%d\n", fname,
7571 TARGET_PACKED_STACK);
7572 asm_fprintf (asm_out_file, "\t# fn:%s se%d\n", fname, TARGET_SMALL_EXEC);
7573 asm_fprintf (asm_out_file, "\t# fn:%s mv%d\n", fname, TARGET_MVCLE);
7574 asm_fprintf (asm_out_file, "\t# fn:%s zv%d\n", fname, TARGET_ZVECTOR);
7575 asm_fprintf (asm_out_file, "\t# fn:%s wd%d\n", fname,
7576 s390_warn_dynamicstack_p);
7577 }
7578 ASM_OUTPUT_LABEL (asm_out_file, fname);
7579 if (hw_after > 0)
7580 asm_fprintf (asm_out_file,
7581 "\t# post-label NOPs for hotpatch (%d halfwords)\n",
7582 hw_after);
7583 }
7584
7585 /* Output machine-dependent UNSPECs occurring in address constant X
7586 in assembler syntax to stdio stream FILE. Returns true if the
7587 constant X could be recognized, false otherwise. */
7588
7589 static bool
s390_output_addr_const_extra(FILE * file,rtx x)7590 s390_output_addr_const_extra (FILE *file, rtx x)
7591 {
7592 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1)
7593 switch (XINT (x, 1))
7594 {
7595 case UNSPEC_GOTENT:
7596 output_addr_const (file, XVECEXP (x, 0, 0));
7597 fprintf (file, "@GOTENT");
7598 return true;
7599 case UNSPEC_GOT:
7600 output_addr_const (file, XVECEXP (x, 0, 0));
7601 fprintf (file, "@GOT");
7602 return true;
7603 case UNSPEC_GOTOFF:
7604 output_addr_const (file, XVECEXP (x, 0, 0));
7605 fprintf (file, "@GOTOFF");
7606 return true;
7607 case UNSPEC_PLT:
7608 output_addr_const (file, XVECEXP (x, 0, 0));
7609 fprintf (file, "@PLT");
7610 return true;
7611 case UNSPEC_PLTOFF:
7612 output_addr_const (file, XVECEXP (x, 0, 0));
7613 fprintf (file, "@PLTOFF");
7614 return true;
7615 case UNSPEC_TLSGD:
7616 output_addr_const (file, XVECEXP (x, 0, 0));
7617 fprintf (file, "@TLSGD");
7618 return true;
7619 case UNSPEC_TLSLDM:
7620 assemble_name (file, get_some_local_dynamic_name ());
7621 fprintf (file, "@TLSLDM");
7622 return true;
7623 case UNSPEC_DTPOFF:
7624 output_addr_const (file, XVECEXP (x, 0, 0));
7625 fprintf (file, "@DTPOFF");
7626 return true;
7627 case UNSPEC_NTPOFF:
7628 output_addr_const (file, XVECEXP (x, 0, 0));
7629 fprintf (file, "@NTPOFF");
7630 return true;
7631 case UNSPEC_GOTNTPOFF:
7632 output_addr_const (file, XVECEXP (x, 0, 0));
7633 fprintf (file, "@GOTNTPOFF");
7634 return true;
7635 case UNSPEC_INDNTPOFF:
7636 output_addr_const (file, XVECEXP (x, 0, 0));
7637 fprintf (file, "@INDNTPOFF");
7638 return true;
7639 }
7640
7641 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 2)
7642 switch (XINT (x, 1))
7643 {
7644 case UNSPEC_POOL_OFFSET:
7645 x = gen_rtx_MINUS (GET_MODE (x), XVECEXP (x, 0, 0), XVECEXP (x, 0, 1));
7646 output_addr_const (file, x);
7647 return true;
7648 }
7649 return false;
7650 }
7651
7652 /* Output address operand ADDR in assembler syntax to
7653 stdio stream FILE. */
7654
7655 void
print_operand_address(FILE * file,rtx addr)7656 print_operand_address (FILE *file, rtx addr)
7657 {
7658 struct s390_address ad;
7659 memset (&ad, 0, sizeof (s390_address));
7660
7661 if (s390_loadrelative_operand_p (addr, NULL, NULL))
7662 {
7663 if (!TARGET_Z10)
7664 {
7665 output_operand_lossage ("symbolic memory references are "
7666 "only supported on z10 or later");
7667 return;
7668 }
7669 output_addr_const (file, addr);
7670 return;
7671 }
7672
7673 if (!s390_decompose_address (addr, &ad)
7674 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7675 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
7676 output_operand_lossage ("cannot decompose address");
7677
7678 if (ad.disp)
7679 output_addr_const (file, ad.disp);
7680 else
7681 fprintf (file, "0");
7682
7683 if (ad.base && ad.indx)
7684 fprintf (file, "(%s,%s)", reg_names[REGNO (ad.indx)],
7685 reg_names[REGNO (ad.base)]);
7686 else if (ad.base)
7687 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7688 }
7689
7690 /* Output operand X in assembler syntax to stdio stream FILE.
7691 CODE specified the format flag. The following format flags
7692 are recognized:
7693
7694 'C': print opcode suffix for branch condition.
7695 'D': print opcode suffix for inverse branch condition.
7696 'E': print opcode suffix for branch on index instruction.
7697 'G': print the size of the operand in bytes.
7698 'J': print tls_load/tls_gdcall/tls_ldcall suffix
7699 'M': print the second word of a TImode operand.
7700 'N': print the second word of a DImode operand.
7701 'O': print only the displacement of a memory reference or address.
7702 'R': print only the base register of a memory reference or address.
7703 'S': print S-type memory reference (base+displacement).
7704 'Y': print address style operand without index (e.g. shift count or setmem
7705 operand).
7706
7707 'b': print integer X as if it's an unsigned byte.
7708 'c': print integer X as if it's an signed byte.
7709 'e': "end" contiguous bitmask X in either DImode or vector inner mode.
7710 'f': "end" contiguous bitmask X in SImode.
7711 'h': print integer X as if it's a signed halfword.
7712 'i': print the first nonzero HImode part of X.
7713 'j': print the first HImode part unequal to -1 of X.
7714 'k': print the first nonzero SImode part of X.
7715 'm': print the first SImode part unequal to -1 of X.
7716 'o': print integer X as if it's an unsigned 32bit word.
7717 's': "start" of contiguous bitmask X in either DImode or vector inner mode.
7718 't': CONST_INT: "start" of contiguous bitmask X in SImode.
7719 CONST_VECTOR: Generate a bitmask for vgbm instruction.
7720 'x': print integer X as if it's an unsigned halfword.
7721 'v': print register number as vector register (v1 instead of f1).
7722 */
7723
7724 void
print_operand(FILE * file,rtx x,int code)7725 print_operand (FILE *file, rtx x, int code)
7726 {
7727 HOST_WIDE_INT ival;
7728
7729 switch (code)
7730 {
7731 case 'C':
7732 fprintf (file, s390_branch_condition_mnemonic (x, FALSE));
7733 return;
7734
7735 case 'D':
7736 fprintf (file, s390_branch_condition_mnemonic (x, TRUE));
7737 return;
7738
7739 case 'E':
7740 if (GET_CODE (x) == LE)
7741 fprintf (file, "l");
7742 else if (GET_CODE (x) == GT)
7743 fprintf (file, "h");
7744 else
7745 output_operand_lossage ("invalid comparison operator "
7746 "for 'E' output modifier");
7747 return;
7748
7749 case 'J':
7750 if (GET_CODE (x) == SYMBOL_REF)
7751 {
7752 fprintf (file, "%s", ":tls_load:");
7753 output_addr_const (file, x);
7754 }
7755 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
7756 {
7757 fprintf (file, "%s", ":tls_gdcall:");
7758 output_addr_const (file, XVECEXP (x, 0, 0));
7759 }
7760 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM)
7761 {
7762 fprintf (file, "%s", ":tls_ldcall:");
7763 const char *name = get_some_local_dynamic_name ();
7764 gcc_assert (name);
7765 assemble_name (file, name);
7766 }
7767 else
7768 output_operand_lossage ("invalid reference for 'J' output modifier");
7769 return;
7770
7771 case 'G':
7772 fprintf (file, "%u", GET_MODE_SIZE (GET_MODE (x)));
7773 return;
7774
7775 case 'O':
7776 {
7777 struct s390_address ad;
7778 int ret;
7779
7780 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7781
7782 if (!ret
7783 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7784 || ad.indx)
7785 {
7786 output_operand_lossage ("invalid address for 'O' output modifier");
7787 return;
7788 }
7789
7790 if (ad.disp)
7791 output_addr_const (file, ad.disp);
7792 else
7793 fprintf (file, "0");
7794 }
7795 return;
7796
7797 case 'R':
7798 {
7799 struct s390_address ad;
7800 int ret;
7801
7802 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7803
7804 if (!ret
7805 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7806 || ad.indx)
7807 {
7808 output_operand_lossage ("invalid address for 'R' output modifier");
7809 return;
7810 }
7811
7812 if (ad.base)
7813 fprintf (file, "%s", reg_names[REGNO (ad.base)]);
7814 else
7815 fprintf (file, "0");
7816 }
7817 return;
7818
7819 case 'S':
7820 {
7821 struct s390_address ad;
7822 int ret;
7823
7824 if (!MEM_P (x))
7825 {
7826 output_operand_lossage ("memory reference expected for "
7827 "'S' output modifier");
7828 return;
7829 }
7830 ret = s390_decompose_address (XEXP (x, 0), &ad);
7831
7832 if (!ret
7833 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7834 || ad.indx)
7835 {
7836 output_operand_lossage ("invalid address for 'S' output modifier");
7837 return;
7838 }
7839
7840 if (ad.disp)
7841 output_addr_const (file, ad.disp);
7842 else
7843 fprintf (file, "0");
7844
7845 if (ad.base)
7846 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7847 }
7848 return;
7849
7850 case 'N':
7851 if (GET_CODE (x) == REG)
7852 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7853 else if (GET_CODE (x) == MEM)
7854 x = change_address (x, VOIDmode,
7855 plus_constant (Pmode, XEXP (x, 0), 4));
7856 else
7857 output_operand_lossage ("register or memory expression expected "
7858 "for 'N' output modifier");
7859 break;
7860
7861 case 'M':
7862 if (GET_CODE (x) == REG)
7863 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7864 else if (GET_CODE (x) == MEM)
7865 x = change_address (x, VOIDmode,
7866 plus_constant (Pmode, XEXP (x, 0), 8));
7867 else
7868 output_operand_lossage ("register or memory expression expected "
7869 "for 'M' output modifier");
7870 break;
7871
7872 case 'Y':
7873 print_addrstyle_operand (file, x);
7874 return;
7875 }
7876
7877 switch (GET_CODE (x))
7878 {
7879 case REG:
7880 /* Print FP regs as fx instead of vx when they are accessed
7881 through non-vector mode. */
7882 if (code == 'v'
7883 || VECTOR_NOFP_REG_P (x)
7884 || (FP_REG_P (x) && VECTOR_MODE_P (GET_MODE (x)))
7885 || (VECTOR_REG_P (x)
7886 && (GET_MODE_SIZE (GET_MODE (x)) /
7887 s390_class_max_nregs (FP_REGS, GET_MODE (x))) > 8))
7888 fprintf (file, "%%v%s", reg_names[REGNO (x)] + 2);
7889 else
7890 fprintf (file, "%s", reg_names[REGNO (x)]);
7891 break;
7892
7893 case MEM:
7894 output_address (GET_MODE (x), XEXP (x, 0));
7895 break;
7896
7897 case CONST:
7898 case CODE_LABEL:
7899 case LABEL_REF:
7900 case SYMBOL_REF:
7901 output_addr_const (file, x);
7902 break;
7903
7904 case CONST_INT:
7905 ival = INTVAL (x);
7906 switch (code)
7907 {
7908 case 0:
7909 break;
7910 case 'b':
7911 ival &= 0xff;
7912 break;
7913 case 'c':
7914 ival = ((ival & 0xff) ^ 0x80) - 0x80;
7915 break;
7916 case 'x':
7917 ival &= 0xffff;
7918 break;
7919 case 'h':
7920 ival = ((ival & 0xffff) ^ 0x8000) - 0x8000;
7921 break;
7922 case 'i':
7923 ival = s390_extract_part (x, HImode, 0);
7924 break;
7925 case 'j':
7926 ival = s390_extract_part (x, HImode, -1);
7927 break;
7928 case 'k':
7929 ival = s390_extract_part (x, SImode, 0);
7930 break;
7931 case 'm':
7932 ival = s390_extract_part (x, SImode, -1);
7933 break;
7934 case 'o':
7935 ival &= 0xffffffff;
7936 break;
7937 case 'e': case 'f':
7938 case 's': case 't':
7939 {
7940 int start, end;
7941 int len;
7942 bool ok;
7943
7944 len = (code == 's' || code == 'e' ? 64 : 32);
7945 ok = s390_contiguous_bitmask_p (ival, true, len, &start, &end);
7946 gcc_assert (ok);
7947 if (code == 's' || code == 't')
7948 ival = start;
7949 else
7950 ival = end;
7951 }
7952 break;
7953 default:
7954 output_operand_lossage ("invalid constant for output modifier '%c'", code);
7955 }
7956 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
7957 break;
7958
7959 case CONST_WIDE_INT:
7960 if (code == 'b')
7961 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7962 CONST_WIDE_INT_ELT (x, 0) & 0xff);
7963 else if (code == 'x')
7964 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7965 CONST_WIDE_INT_ELT (x, 0) & 0xffff);
7966 else if (code == 'h')
7967 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7968 ((CONST_WIDE_INT_ELT (x, 0) & 0xffff) ^ 0x8000) - 0x8000);
7969 else
7970 {
7971 if (code == 0)
7972 output_operand_lossage ("invalid constant - try using "
7973 "an output modifier");
7974 else
7975 output_operand_lossage ("invalid constant for output modifier '%c'",
7976 code);
7977 }
7978 break;
7979 case CONST_VECTOR:
7980 switch (code)
7981 {
7982 case 'h':
7983 gcc_assert (const_vec_duplicate_p (x));
7984 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7985 ((INTVAL (XVECEXP (x, 0, 0)) & 0xffff) ^ 0x8000) - 0x8000);
7986 break;
7987 case 'e':
7988 case 's':
7989 {
7990 int start, end;
7991 bool ok;
7992
7993 ok = s390_contiguous_bitmask_vector_p (x, &start, &end);
7994 gcc_assert (ok);
7995 ival = (code == 's') ? start : end;
7996 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
7997 }
7998 break;
7999 case 't':
8000 {
8001 unsigned mask;
8002 bool ok = s390_bytemask_vector_p (x, &mask);
8003 gcc_assert (ok);
8004 fprintf (file, "%u", mask);
8005 }
8006 break;
8007
8008 default:
8009 output_operand_lossage ("invalid constant vector for output "
8010 "modifier '%c'", code);
8011 }
8012 break;
8013
8014 default:
8015 if (code == 0)
8016 output_operand_lossage ("invalid expression - try using "
8017 "an output modifier");
8018 else
8019 output_operand_lossage ("invalid expression for output "
8020 "modifier '%c'", code);
8021 break;
8022 }
8023 }
8024
8025 /* Target hook for assembling integer objects. We need to define it
8026 here to work a round a bug in some versions of GAS, which couldn't
8027 handle values smaller than INT_MIN when printed in decimal. */
8028
8029 static bool
s390_assemble_integer(rtx x,unsigned int size,int aligned_p)8030 s390_assemble_integer (rtx x, unsigned int size, int aligned_p)
8031 {
8032 if (size == 8 && aligned_p
8033 && GET_CODE (x) == CONST_INT && INTVAL (x) < INT_MIN)
8034 {
8035 fprintf (asm_out_file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n",
8036 INTVAL (x));
8037 return true;
8038 }
8039 return default_assemble_integer (x, size, aligned_p);
8040 }
8041
8042 /* Returns true if register REGNO is used for forming
8043 a memory address in expression X. */
8044
8045 static bool
reg_used_in_mem_p(int regno,rtx x)8046 reg_used_in_mem_p (int regno, rtx x)
8047 {
8048 enum rtx_code code = GET_CODE (x);
8049 int i, j;
8050 const char *fmt;
8051
8052 if (code == MEM)
8053 {
8054 if (refers_to_regno_p (regno, XEXP (x, 0)))
8055 return true;
8056 }
8057 else if (code == SET
8058 && GET_CODE (SET_DEST (x)) == PC)
8059 {
8060 if (refers_to_regno_p (regno, SET_SRC (x)))
8061 return true;
8062 }
8063
8064 fmt = GET_RTX_FORMAT (code);
8065 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8066 {
8067 if (fmt[i] == 'e'
8068 && reg_used_in_mem_p (regno, XEXP (x, i)))
8069 return true;
8070
8071 else if (fmt[i] == 'E')
8072 for (j = 0; j < XVECLEN (x, i); j++)
8073 if (reg_used_in_mem_p (regno, XVECEXP (x, i, j)))
8074 return true;
8075 }
8076 return false;
8077 }
8078
8079 /* Returns true if expression DEP_RTX sets an address register
8080 used by instruction INSN to address memory. */
8081
8082 static bool
addr_generation_dependency_p(rtx dep_rtx,rtx_insn * insn)8083 addr_generation_dependency_p (rtx dep_rtx, rtx_insn *insn)
8084 {
8085 rtx target, pat;
8086
8087 if (NONJUMP_INSN_P (dep_rtx))
8088 dep_rtx = PATTERN (dep_rtx);
8089
8090 if (GET_CODE (dep_rtx) == SET)
8091 {
8092 target = SET_DEST (dep_rtx);
8093 if (GET_CODE (target) == STRICT_LOW_PART)
8094 target = XEXP (target, 0);
8095 while (GET_CODE (target) == SUBREG)
8096 target = SUBREG_REG (target);
8097
8098 if (GET_CODE (target) == REG)
8099 {
8100 int regno = REGNO (target);
8101
8102 if (s390_safe_attr_type (insn) == TYPE_LA)
8103 {
8104 pat = PATTERN (insn);
8105 if (GET_CODE (pat) == PARALLEL)
8106 {
8107 gcc_assert (XVECLEN (pat, 0) == 2);
8108 pat = XVECEXP (pat, 0, 0);
8109 }
8110 gcc_assert (GET_CODE (pat) == SET);
8111 return refers_to_regno_p (regno, SET_SRC (pat));
8112 }
8113 else if (get_attr_atype (insn) == ATYPE_AGEN)
8114 return reg_used_in_mem_p (regno, PATTERN (insn));
8115 }
8116 }
8117 return false;
8118 }
8119
8120 /* Return 1, if dep_insn sets register used in insn in the agen unit. */
8121
8122 int
s390_agen_dep_p(rtx_insn * dep_insn,rtx_insn * insn)8123 s390_agen_dep_p (rtx_insn *dep_insn, rtx_insn *insn)
8124 {
8125 rtx dep_rtx = PATTERN (dep_insn);
8126 int i;
8127
8128 if (GET_CODE (dep_rtx) == SET
8129 && addr_generation_dependency_p (dep_rtx, insn))
8130 return 1;
8131 else if (GET_CODE (dep_rtx) == PARALLEL)
8132 {
8133 for (i = 0; i < XVECLEN (dep_rtx, 0); i++)
8134 {
8135 if (addr_generation_dependency_p (XVECEXP (dep_rtx, 0, i), insn))
8136 return 1;
8137 }
8138 }
8139 return 0;
8140 }
8141
8142
8143 /* A C statement (sans semicolon) to update the integer scheduling priority
8144 INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier,
8145 reduce the priority to execute INSN later. Do not define this macro if
8146 you do not need to adjust the scheduling priorities of insns.
8147
8148 A STD instruction should be scheduled earlier,
8149 in order to use the bypass. */
8150 static int
s390_adjust_priority(rtx_insn * insn,int priority)8151 s390_adjust_priority (rtx_insn *insn, int priority)
8152 {
8153 if (! INSN_P (insn))
8154 return priority;
8155
8156 if (s390_tune <= PROCESSOR_2064_Z900)
8157 return priority;
8158
8159 switch (s390_safe_attr_type (insn))
8160 {
8161 case TYPE_FSTOREDF:
8162 case TYPE_FSTORESF:
8163 priority = priority << 3;
8164 break;
8165 case TYPE_STORE:
8166 case TYPE_STM:
8167 priority = priority << 1;
8168 break;
8169 default:
8170 break;
8171 }
8172 return priority;
8173 }
8174
8175
8176 /* The number of instructions that can be issued per cycle. */
8177
8178 static int
s390_issue_rate(void)8179 s390_issue_rate (void)
8180 {
8181 switch (s390_tune)
8182 {
8183 case PROCESSOR_2084_Z990:
8184 case PROCESSOR_2094_Z9_109:
8185 case PROCESSOR_2094_Z9_EC:
8186 case PROCESSOR_2817_Z196:
8187 return 3;
8188 case PROCESSOR_2097_Z10:
8189 return 2;
8190 case PROCESSOR_9672_G5:
8191 case PROCESSOR_9672_G6:
8192 case PROCESSOR_2064_Z900:
8193 /* Starting with EC12 we use the sched_reorder hook to take care
8194 of instruction dispatch constraints. The algorithm only
8195 picks the best instruction and assumes only a single
8196 instruction gets issued per cycle. */
8197 case PROCESSOR_2827_ZEC12:
8198 case PROCESSOR_2964_Z13:
8199 case PROCESSOR_3906_Z14:
8200 default:
8201 return 1;
8202 }
8203 }
8204
8205 static int
s390_first_cycle_multipass_dfa_lookahead(void)8206 s390_first_cycle_multipass_dfa_lookahead (void)
8207 {
8208 return 4;
8209 }
8210
8211 /* Annotate every literal pool reference in X by an UNSPEC_LTREF expression.
8212 Fix up MEMs as required. */
8213
8214 static void
annotate_constant_pool_refs(rtx * x)8215 annotate_constant_pool_refs (rtx *x)
8216 {
8217 int i, j;
8218 const char *fmt;
8219
8220 gcc_assert (GET_CODE (*x) != SYMBOL_REF
8221 || !CONSTANT_POOL_ADDRESS_P (*x));
8222
8223 /* Literal pool references can only occur inside a MEM ... */
8224 if (GET_CODE (*x) == MEM)
8225 {
8226 rtx memref = XEXP (*x, 0);
8227
8228 if (GET_CODE (memref) == SYMBOL_REF
8229 && CONSTANT_POOL_ADDRESS_P (memref))
8230 {
8231 rtx base = cfun->machine->base_reg;
8232 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, memref, base),
8233 UNSPEC_LTREF);
8234
8235 *x = replace_equiv_address (*x, addr);
8236 return;
8237 }
8238
8239 if (GET_CODE (memref) == CONST
8240 && GET_CODE (XEXP (memref, 0)) == PLUS
8241 && GET_CODE (XEXP (XEXP (memref, 0), 1)) == CONST_INT
8242 && GET_CODE (XEXP (XEXP (memref, 0), 0)) == SYMBOL_REF
8243 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref, 0), 0)))
8244 {
8245 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (memref, 0), 1));
8246 rtx sym = XEXP (XEXP (memref, 0), 0);
8247 rtx base = cfun->machine->base_reg;
8248 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
8249 UNSPEC_LTREF);
8250
8251 *x = replace_equiv_address (*x, plus_constant (Pmode, addr, off));
8252 return;
8253 }
8254 }
8255
8256 /* ... or a load-address type pattern. */
8257 if (GET_CODE (*x) == SET)
8258 {
8259 rtx addrref = SET_SRC (*x);
8260
8261 if (GET_CODE (addrref) == SYMBOL_REF
8262 && CONSTANT_POOL_ADDRESS_P (addrref))
8263 {
8264 rtx base = cfun->machine->base_reg;
8265 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addrref, base),
8266 UNSPEC_LTREF);
8267
8268 SET_SRC (*x) = addr;
8269 return;
8270 }
8271
8272 if (GET_CODE (addrref) == CONST
8273 && GET_CODE (XEXP (addrref, 0)) == PLUS
8274 && GET_CODE (XEXP (XEXP (addrref, 0), 1)) == CONST_INT
8275 && GET_CODE (XEXP (XEXP (addrref, 0), 0)) == SYMBOL_REF
8276 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref, 0), 0)))
8277 {
8278 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (addrref, 0), 1));
8279 rtx sym = XEXP (XEXP (addrref, 0), 0);
8280 rtx base = cfun->machine->base_reg;
8281 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
8282 UNSPEC_LTREF);
8283
8284 SET_SRC (*x) = plus_constant (Pmode, addr, off);
8285 return;
8286 }
8287 }
8288
8289 /* Annotate LTREL_BASE as well. */
8290 if (GET_CODE (*x) == UNSPEC
8291 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
8292 {
8293 rtx base = cfun->machine->base_reg;
8294 *x = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XVECEXP (*x, 0, 0), base),
8295 UNSPEC_LTREL_BASE);
8296 return;
8297 }
8298
8299 fmt = GET_RTX_FORMAT (GET_CODE (*x));
8300 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8301 {
8302 if (fmt[i] == 'e')
8303 {
8304 annotate_constant_pool_refs (&XEXP (*x, i));
8305 }
8306 else if (fmt[i] == 'E')
8307 {
8308 for (j = 0; j < XVECLEN (*x, i); j++)
8309 annotate_constant_pool_refs (&XVECEXP (*x, i, j));
8310 }
8311 }
8312 }
8313
8314 /* Split all branches that exceed the maximum distance.
8315 Returns true if this created a new literal pool entry. */
8316
8317 static int
s390_split_branches(void)8318 s390_split_branches (void)
8319 {
8320 rtx temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
8321 int new_literal = 0, ret;
8322 rtx_insn *insn;
8323 rtx pat, target;
8324 rtx *label;
8325
8326 /* We need correct insn addresses. */
8327
8328 shorten_branches (get_insns ());
8329
8330 /* Find all branches that exceed 64KB, and split them. */
8331
8332 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8333 {
8334 if (! JUMP_P (insn) || tablejump_p (insn, NULL, NULL))
8335 continue;
8336
8337 pat = PATTERN (insn);
8338 if (GET_CODE (pat) == PARALLEL)
8339 pat = XVECEXP (pat, 0, 0);
8340 if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
8341 continue;
8342
8343 if (GET_CODE (SET_SRC (pat)) == LABEL_REF)
8344 {
8345 label = &SET_SRC (pat);
8346 }
8347 else if (GET_CODE (SET_SRC (pat)) == IF_THEN_ELSE)
8348 {
8349 if (GET_CODE (XEXP (SET_SRC (pat), 1)) == LABEL_REF)
8350 label = &XEXP (SET_SRC (pat), 1);
8351 else if (GET_CODE (XEXP (SET_SRC (pat), 2)) == LABEL_REF)
8352 label = &XEXP (SET_SRC (pat), 2);
8353 else
8354 continue;
8355 }
8356 else
8357 continue;
8358
8359 if (get_attr_length (insn) <= 4)
8360 continue;
8361
8362 /* We are going to use the return register as scratch register,
8363 make sure it will be saved/restored by the prologue/epilogue. */
8364 cfun_frame_layout.save_return_addr_p = 1;
8365
8366 if (!flag_pic)
8367 {
8368 new_literal = 1;
8369 rtx mem = force_const_mem (Pmode, *label);
8370 rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, mem),
8371 insn);
8372 INSN_ADDRESSES_NEW (set_insn, -1);
8373 annotate_constant_pool_refs (&PATTERN (set_insn));
8374
8375 target = temp_reg;
8376 }
8377 else
8378 {
8379 new_literal = 1;
8380 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, *label),
8381 UNSPEC_LTREL_OFFSET);
8382 target = gen_rtx_CONST (Pmode, target);
8383 target = force_const_mem (Pmode, target);
8384 rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, target),
8385 insn);
8386 INSN_ADDRESSES_NEW (set_insn, -1);
8387 annotate_constant_pool_refs (&PATTERN (set_insn));
8388
8389 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XEXP (target, 0),
8390 cfun->machine->base_reg),
8391 UNSPEC_LTREL_BASE);
8392 target = gen_rtx_PLUS (Pmode, temp_reg, target);
8393 }
8394
8395 ret = validate_change (insn, label, target, 0);
8396 gcc_assert (ret);
8397 }
8398
8399 return new_literal;
8400 }
8401
8402
8403 /* Find an annotated literal pool symbol referenced in RTX X,
8404 and store it at REF. Will abort if X contains references to
8405 more than one such pool symbol; multiple references to the same
8406 symbol are allowed, however.
8407
8408 The rtx pointed to by REF must be initialized to NULL_RTX
8409 by the caller before calling this routine. */
8410
8411 static void
find_constant_pool_ref(rtx x,rtx * ref)8412 find_constant_pool_ref (rtx x, rtx *ref)
8413 {
8414 int i, j;
8415 const char *fmt;
8416
8417 /* Ignore LTREL_BASE references. */
8418 if (GET_CODE (x) == UNSPEC
8419 && XINT (x, 1) == UNSPEC_LTREL_BASE)
8420 return;
8421 /* Likewise POOL_ENTRY insns. */
8422 if (GET_CODE (x) == UNSPEC_VOLATILE
8423 && XINT (x, 1) == UNSPECV_POOL_ENTRY)
8424 return;
8425
8426 gcc_assert (GET_CODE (x) != SYMBOL_REF
8427 || !CONSTANT_POOL_ADDRESS_P (x));
8428
8429 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_LTREF)
8430 {
8431 rtx sym = XVECEXP (x, 0, 0);
8432 gcc_assert (GET_CODE (sym) == SYMBOL_REF
8433 && CONSTANT_POOL_ADDRESS_P (sym));
8434
8435 if (*ref == NULL_RTX)
8436 *ref = sym;
8437 else
8438 gcc_assert (*ref == sym);
8439
8440 return;
8441 }
8442
8443 fmt = GET_RTX_FORMAT (GET_CODE (x));
8444 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8445 {
8446 if (fmt[i] == 'e')
8447 {
8448 find_constant_pool_ref (XEXP (x, i), ref);
8449 }
8450 else if (fmt[i] == 'E')
8451 {
8452 for (j = 0; j < XVECLEN (x, i); j++)
8453 find_constant_pool_ref (XVECEXP (x, i, j), ref);
8454 }
8455 }
8456 }
8457
8458 /* Replace every reference to the annotated literal pool
8459 symbol REF in X by its base plus OFFSET. */
8460
8461 static void
replace_constant_pool_ref(rtx * x,rtx ref,rtx offset)8462 replace_constant_pool_ref (rtx *x, rtx ref, rtx offset)
8463 {
8464 int i, j;
8465 const char *fmt;
8466
8467 gcc_assert (*x != ref);
8468
8469 if (GET_CODE (*x) == UNSPEC
8470 && XINT (*x, 1) == UNSPEC_LTREF
8471 && XVECEXP (*x, 0, 0) == ref)
8472 {
8473 *x = gen_rtx_PLUS (Pmode, XVECEXP (*x, 0, 1), offset);
8474 return;
8475 }
8476
8477 if (GET_CODE (*x) == PLUS
8478 && GET_CODE (XEXP (*x, 1)) == CONST_INT
8479 && GET_CODE (XEXP (*x, 0)) == UNSPEC
8480 && XINT (XEXP (*x, 0), 1) == UNSPEC_LTREF
8481 && XVECEXP (XEXP (*x, 0), 0, 0) == ref)
8482 {
8483 rtx addr = gen_rtx_PLUS (Pmode, XVECEXP (XEXP (*x, 0), 0, 1), offset);
8484 *x = plus_constant (Pmode, addr, INTVAL (XEXP (*x, 1)));
8485 return;
8486 }
8487
8488 fmt = GET_RTX_FORMAT (GET_CODE (*x));
8489 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8490 {
8491 if (fmt[i] == 'e')
8492 {
8493 replace_constant_pool_ref (&XEXP (*x, i), ref, offset);
8494 }
8495 else if (fmt[i] == 'E')
8496 {
8497 for (j = 0; j < XVECLEN (*x, i); j++)
8498 replace_constant_pool_ref (&XVECEXP (*x, i, j), ref, offset);
8499 }
8500 }
8501 }
8502
8503 /* Check whether X contains an UNSPEC_LTREL_BASE.
8504 Return its constant pool symbol if found, NULL_RTX otherwise. */
8505
8506 static rtx
find_ltrel_base(rtx x)8507 find_ltrel_base (rtx x)
8508 {
8509 int i, j;
8510 const char *fmt;
8511
8512 if (GET_CODE (x) == UNSPEC
8513 && XINT (x, 1) == UNSPEC_LTREL_BASE)
8514 return XVECEXP (x, 0, 0);
8515
8516 fmt = GET_RTX_FORMAT (GET_CODE (x));
8517 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8518 {
8519 if (fmt[i] == 'e')
8520 {
8521 rtx fnd = find_ltrel_base (XEXP (x, i));
8522 if (fnd)
8523 return fnd;
8524 }
8525 else if (fmt[i] == 'E')
8526 {
8527 for (j = 0; j < XVECLEN (x, i); j++)
8528 {
8529 rtx fnd = find_ltrel_base (XVECEXP (x, i, j));
8530 if (fnd)
8531 return fnd;
8532 }
8533 }
8534 }
8535
8536 return NULL_RTX;
8537 }
8538
8539 /* Replace any occurrence of UNSPEC_LTREL_BASE in X with its base. */
8540
8541 static void
replace_ltrel_base(rtx * x)8542 replace_ltrel_base (rtx *x)
8543 {
8544 int i, j;
8545 const char *fmt;
8546
8547 if (GET_CODE (*x) == UNSPEC
8548 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
8549 {
8550 *x = XVECEXP (*x, 0, 1);
8551 return;
8552 }
8553
8554 fmt = GET_RTX_FORMAT (GET_CODE (*x));
8555 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8556 {
8557 if (fmt[i] == 'e')
8558 {
8559 replace_ltrel_base (&XEXP (*x, i));
8560 }
8561 else if (fmt[i] == 'E')
8562 {
8563 for (j = 0; j < XVECLEN (*x, i); j++)
8564 replace_ltrel_base (&XVECEXP (*x, i, j));
8565 }
8566 }
8567 }
8568
8569
8570 /* We keep a list of constants which we have to add to internal
8571 constant tables in the middle of large functions. */
8572
8573 #define NR_C_MODES 32
8574 machine_mode constant_modes[NR_C_MODES] =
8575 {
8576 TFmode, TImode, TDmode,
8577 V16QImode, V8HImode, V4SImode, V2DImode, V1TImode,
8578 V4SFmode, V2DFmode, V1TFmode,
8579 DFmode, DImode, DDmode,
8580 V8QImode, V4HImode, V2SImode, V1DImode, V2SFmode, V1DFmode,
8581 SFmode, SImode, SDmode,
8582 V4QImode, V2HImode, V1SImode, V1SFmode,
8583 HImode,
8584 V2QImode, V1HImode,
8585 QImode,
8586 V1QImode
8587 };
8588
8589 struct constant
8590 {
8591 struct constant *next;
8592 rtx value;
8593 rtx_code_label *label;
8594 };
8595
8596 struct constant_pool
8597 {
8598 struct constant_pool *next;
8599 rtx_insn *first_insn;
8600 rtx_insn *pool_insn;
8601 bitmap insns;
8602 rtx_insn *emit_pool_after;
8603
8604 struct constant *constants[NR_C_MODES];
8605 struct constant *execute;
8606 rtx_code_label *label;
8607 int size;
8608 };
8609
8610 /* Allocate new constant_pool structure. */
8611
8612 static struct constant_pool *
s390_alloc_pool(void)8613 s390_alloc_pool (void)
8614 {
8615 struct constant_pool *pool;
8616 int i;
8617
8618 pool = (struct constant_pool *) xmalloc (sizeof *pool);
8619 pool->next = NULL;
8620 for (i = 0; i < NR_C_MODES; i++)
8621 pool->constants[i] = NULL;
8622
8623 pool->execute = NULL;
8624 pool->label = gen_label_rtx ();
8625 pool->first_insn = NULL;
8626 pool->pool_insn = NULL;
8627 pool->insns = BITMAP_ALLOC (NULL);
8628 pool->size = 0;
8629 pool->emit_pool_after = NULL;
8630
8631 return pool;
8632 }
8633
8634 /* Create new constant pool covering instructions starting at INSN
8635 and chain it to the end of POOL_LIST. */
8636
8637 static struct constant_pool *
s390_start_pool(struct constant_pool ** pool_list,rtx_insn * insn)8638 s390_start_pool (struct constant_pool **pool_list, rtx_insn *insn)
8639 {
8640 struct constant_pool *pool, **prev;
8641
8642 pool = s390_alloc_pool ();
8643 pool->first_insn = insn;
8644
8645 for (prev = pool_list; *prev; prev = &(*prev)->next)
8646 ;
8647 *prev = pool;
8648
8649 return pool;
8650 }
8651
8652 /* End range of instructions covered by POOL at INSN and emit
8653 placeholder insn representing the pool. */
8654
8655 static void
s390_end_pool(struct constant_pool * pool,rtx_insn * insn)8656 s390_end_pool (struct constant_pool *pool, rtx_insn *insn)
8657 {
8658 rtx pool_size = GEN_INT (pool->size + 8 /* alignment slop */);
8659
8660 if (!insn)
8661 insn = get_last_insn ();
8662
8663 pool->pool_insn = emit_insn_after (gen_pool (pool_size), insn);
8664 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8665 }
8666
8667 /* Add INSN to the list of insns covered by POOL. */
8668
8669 static void
s390_add_pool_insn(struct constant_pool * pool,rtx insn)8670 s390_add_pool_insn (struct constant_pool *pool, rtx insn)
8671 {
8672 bitmap_set_bit (pool->insns, INSN_UID (insn));
8673 }
8674
8675 /* Return pool out of POOL_LIST that covers INSN. */
8676
8677 static struct constant_pool *
s390_find_pool(struct constant_pool * pool_list,rtx insn)8678 s390_find_pool (struct constant_pool *pool_list, rtx insn)
8679 {
8680 struct constant_pool *pool;
8681
8682 for (pool = pool_list; pool; pool = pool->next)
8683 if (bitmap_bit_p (pool->insns, INSN_UID (insn)))
8684 break;
8685
8686 return pool;
8687 }
8688
8689 /* Add constant VAL of mode MODE to the constant pool POOL. */
8690
8691 static void
s390_add_constant(struct constant_pool * pool,rtx val,machine_mode mode)8692 s390_add_constant (struct constant_pool *pool, rtx val, machine_mode mode)
8693 {
8694 struct constant *c;
8695 int i;
8696
8697 for (i = 0; i < NR_C_MODES; i++)
8698 if (constant_modes[i] == mode)
8699 break;
8700 gcc_assert (i != NR_C_MODES);
8701
8702 for (c = pool->constants[i]; c != NULL; c = c->next)
8703 if (rtx_equal_p (val, c->value))
8704 break;
8705
8706 if (c == NULL)
8707 {
8708 c = (struct constant *) xmalloc (sizeof *c);
8709 c->value = val;
8710 c->label = gen_label_rtx ();
8711 c->next = pool->constants[i];
8712 pool->constants[i] = c;
8713 pool->size += GET_MODE_SIZE (mode);
8714 }
8715 }
8716
8717 /* Return an rtx that represents the offset of X from the start of
8718 pool POOL. */
8719
8720 static rtx
s390_pool_offset(struct constant_pool * pool,rtx x)8721 s390_pool_offset (struct constant_pool *pool, rtx x)
8722 {
8723 rtx label;
8724
8725 label = gen_rtx_LABEL_REF (GET_MODE (x), pool->label);
8726 x = gen_rtx_UNSPEC (GET_MODE (x), gen_rtvec (2, x, label),
8727 UNSPEC_POOL_OFFSET);
8728 return gen_rtx_CONST (GET_MODE (x), x);
8729 }
8730
8731 /* Find constant VAL of mode MODE in the constant pool POOL.
8732 Return an RTX describing the distance from the start of
8733 the pool to the location of the new constant. */
8734
8735 static rtx
s390_find_constant(struct constant_pool * pool,rtx val,machine_mode mode)8736 s390_find_constant (struct constant_pool *pool, rtx val,
8737 machine_mode mode)
8738 {
8739 struct constant *c;
8740 int i;
8741
8742 for (i = 0; i < NR_C_MODES; i++)
8743 if (constant_modes[i] == mode)
8744 break;
8745 gcc_assert (i != NR_C_MODES);
8746
8747 for (c = pool->constants[i]; c != NULL; c = c->next)
8748 if (rtx_equal_p (val, c->value))
8749 break;
8750
8751 gcc_assert (c);
8752
8753 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8754 }
8755
8756 /* Check whether INSN is an execute. Return the label_ref to its
8757 execute target template if so, NULL_RTX otherwise. */
8758
8759 static rtx
s390_execute_label(rtx insn)8760 s390_execute_label (rtx insn)
8761 {
8762 if (INSN_P (insn)
8763 && GET_CODE (PATTERN (insn)) == PARALLEL
8764 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
8765 && (XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE
8766 || XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE_JUMP))
8767 {
8768 if (XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE)
8769 return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2);
8770 else
8771 {
8772 gcc_assert (JUMP_P (insn));
8773 /* For jump insns as execute target:
8774 - There is one operand less in the parallel (the
8775 modification register of the execute is always 0).
8776 - The execute target label is wrapped into an
8777 if_then_else in order to hide it from jump analysis. */
8778 return XEXP (XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 0), 0);
8779 }
8780 }
8781
8782 return NULL_RTX;
8783 }
8784
8785 /* Add execute target for INSN to the constant pool POOL. */
8786
8787 static void
s390_add_execute(struct constant_pool * pool,rtx insn)8788 s390_add_execute (struct constant_pool *pool, rtx insn)
8789 {
8790 struct constant *c;
8791
8792 for (c = pool->execute; c != NULL; c = c->next)
8793 if (INSN_UID (insn) == INSN_UID (c->value))
8794 break;
8795
8796 if (c == NULL)
8797 {
8798 c = (struct constant *) xmalloc (sizeof *c);
8799 c->value = insn;
8800 c->label = gen_label_rtx ();
8801 c->next = pool->execute;
8802 pool->execute = c;
8803 pool->size += 6;
8804 }
8805 }
8806
8807 /* Find execute target for INSN in the constant pool POOL.
8808 Return an RTX describing the distance from the start of
8809 the pool to the location of the execute target. */
8810
8811 static rtx
s390_find_execute(struct constant_pool * pool,rtx insn)8812 s390_find_execute (struct constant_pool *pool, rtx insn)
8813 {
8814 struct constant *c;
8815
8816 for (c = pool->execute; c != NULL; c = c->next)
8817 if (INSN_UID (insn) == INSN_UID (c->value))
8818 break;
8819
8820 gcc_assert (c);
8821
8822 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8823 }
8824
8825 /* For an execute INSN, extract the execute target template. */
8826
8827 static rtx
s390_execute_target(rtx insn)8828 s390_execute_target (rtx insn)
8829 {
8830 rtx pattern = PATTERN (insn);
8831 gcc_assert (s390_execute_label (insn));
8832
8833 if (XVECLEN (pattern, 0) == 2)
8834 {
8835 pattern = copy_rtx (XVECEXP (pattern, 0, 1));
8836 }
8837 else
8838 {
8839 rtvec vec = rtvec_alloc (XVECLEN (pattern, 0) - 1);
8840 int i;
8841
8842 for (i = 0; i < XVECLEN (pattern, 0) - 1; i++)
8843 RTVEC_ELT (vec, i) = copy_rtx (XVECEXP (pattern, 0, i + 1));
8844
8845 pattern = gen_rtx_PARALLEL (VOIDmode, vec);
8846 }
8847
8848 return pattern;
8849 }
8850
8851 /* Indicate that INSN cannot be duplicated. This is the case for
8852 execute insns that carry a unique label. */
8853
8854 static bool
s390_cannot_copy_insn_p(rtx_insn * insn)8855 s390_cannot_copy_insn_p (rtx_insn *insn)
8856 {
8857 rtx label = s390_execute_label (insn);
8858 return label && label != const0_rtx;
8859 }
8860
8861 /* Dump out the constants in POOL. If REMOTE_LABEL is true,
8862 do not emit the pool base label. */
8863
8864 static void
s390_dump_pool(struct constant_pool * pool,bool remote_label)8865 s390_dump_pool (struct constant_pool *pool, bool remote_label)
8866 {
8867 struct constant *c;
8868 rtx_insn *insn = pool->pool_insn;
8869 int i;
8870
8871 /* Switch to rodata section. */
8872 if (TARGET_CPU_ZARCH)
8873 {
8874 insn = emit_insn_after (gen_pool_section_start (), insn);
8875 INSN_ADDRESSES_NEW (insn, -1);
8876 }
8877
8878 /* Ensure minimum pool alignment. */
8879 if (TARGET_CPU_ZARCH)
8880 insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn);
8881 else
8882 insn = emit_insn_after (gen_pool_align (GEN_INT (4)), insn);
8883 INSN_ADDRESSES_NEW (insn, -1);
8884
8885 /* Emit pool base label. */
8886 if (!remote_label)
8887 {
8888 insn = emit_label_after (pool->label, insn);
8889 INSN_ADDRESSES_NEW (insn, -1);
8890 }
8891
8892 /* Dump constants in descending alignment requirement order,
8893 ensuring proper alignment for every constant. */
8894 for (i = 0; i < NR_C_MODES; i++)
8895 for (c = pool->constants[i]; c; c = c->next)
8896 {
8897 /* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references. */
8898 rtx value = copy_rtx (c->value);
8899 if (GET_CODE (value) == CONST
8900 && GET_CODE (XEXP (value, 0)) == UNSPEC
8901 && XINT (XEXP (value, 0), 1) == UNSPEC_LTREL_OFFSET
8902 && XVECLEN (XEXP (value, 0), 0) == 1)
8903 value = s390_pool_offset (pool, XVECEXP (XEXP (value, 0), 0, 0));
8904
8905 insn = emit_label_after (c->label, insn);
8906 INSN_ADDRESSES_NEW (insn, -1);
8907
8908 value = gen_rtx_UNSPEC_VOLATILE (constant_modes[i],
8909 gen_rtvec (1, value),
8910 UNSPECV_POOL_ENTRY);
8911 insn = emit_insn_after (value, insn);
8912 INSN_ADDRESSES_NEW (insn, -1);
8913 }
8914
8915 /* Ensure minimum alignment for instructions. */
8916 insn = emit_insn_after (gen_pool_align (GEN_INT (2)), insn);
8917 INSN_ADDRESSES_NEW (insn, -1);
8918
8919 /* Output in-pool execute template insns. */
8920 for (c = pool->execute; c; c = c->next)
8921 {
8922 insn = emit_label_after (c->label, insn);
8923 INSN_ADDRESSES_NEW (insn, -1);
8924
8925 insn = emit_insn_after (s390_execute_target (c->value), insn);
8926 INSN_ADDRESSES_NEW (insn, -1);
8927 }
8928
8929 /* Switch back to previous section. */
8930 if (TARGET_CPU_ZARCH)
8931 {
8932 insn = emit_insn_after (gen_pool_section_end (), insn);
8933 INSN_ADDRESSES_NEW (insn, -1);
8934 }
8935
8936 insn = emit_barrier_after (insn);
8937 INSN_ADDRESSES_NEW (insn, -1);
8938
8939 /* Remove placeholder insn. */
8940 remove_insn (pool->pool_insn);
8941 }
8942
8943 /* Free all memory used by POOL. */
8944
8945 static void
s390_free_pool(struct constant_pool * pool)8946 s390_free_pool (struct constant_pool *pool)
8947 {
8948 struct constant *c, *next;
8949 int i;
8950
8951 for (i = 0; i < NR_C_MODES; i++)
8952 for (c = pool->constants[i]; c; c = next)
8953 {
8954 next = c->next;
8955 free (c);
8956 }
8957
8958 for (c = pool->execute; c; c = next)
8959 {
8960 next = c->next;
8961 free (c);
8962 }
8963
8964 BITMAP_FREE (pool->insns);
8965 free (pool);
8966 }
8967
8968
8969 /* Collect main literal pool. Return NULL on overflow. */
8970
8971 static struct constant_pool *
s390_mainpool_start(void)8972 s390_mainpool_start (void)
8973 {
8974 struct constant_pool *pool;
8975 rtx_insn *insn;
8976
8977 pool = s390_alloc_pool ();
8978
8979 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8980 {
8981 if (NONJUMP_INSN_P (insn)
8982 && GET_CODE (PATTERN (insn)) == SET
8983 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC_VOLATILE
8984 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPECV_MAIN_POOL)
8985 {
8986 /* There might be two main_pool instructions if base_reg
8987 is call-clobbered; one for shrink-wrapped code and one
8988 for the rest. We want to keep the first. */
8989 if (pool->pool_insn)
8990 {
8991 insn = PREV_INSN (insn);
8992 delete_insn (NEXT_INSN (insn));
8993 continue;
8994 }
8995 pool->pool_insn = insn;
8996 }
8997
8998 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
8999 {
9000 s390_add_execute (pool, insn);
9001 }
9002 else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9003 {
9004 rtx pool_ref = NULL_RTX;
9005 find_constant_pool_ref (PATTERN (insn), &pool_ref);
9006 if (pool_ref)
9007 {
9008 rtx constant = get_pool_constant (pool_ref);
9009 machine_mode mode = get_pool_mode (pool_ref);
9010 s390_add_constant (pool, constant, mode);
9011 }
9012 }
9013
9014 /* If hot/cold partitioning is enabled we have to make sure that
9015 the literal pool is emitted in the same section where the
9016 initialization of the literal pool base pointer takes place.
9017 emit_pool_after is only used in the non-overflow case on non
9018 Z cpus where we can emit the literal pool at the end of the
9019 function body within the text section. */
9020 if (NOTE_P (insn)
9021 && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS
9022 && !pool->emit_pool_after)
9023 pool->emit_pool_after = PREV_INSN (insn);
9024 }
9025
9026 gcc_assert (pool->pool_insn || pool->size == 0);
9027
9028 if (pool->size >= 4096)
9029 {
9030 /* We're going to chunkify the pool, so remove the main
9031 pool placeholder insn. */
9032 remove_insn (pool->pool_insn);
9033
9034 s390_free_pool (pool);
9035 pool = NULL;
9036 }
9037
9038 /* If the functions ends with the section where the literal pool
9039 should be emitted set the marker to its end. */
9040 if (pool && !pool->emit_pool_after)
9041 pool->emit_pool_after = get_last_insn ();
9042
9043 return pool;
9044 }
9045
9046 /* POOL holds the main literal pool as collected by s390_mainpool_start.
9047 Modify the current function to output the pool constants as well as
9048 the pool register setup instruction. */
9049
9050 static void
s390_mainpool_finish(struct constant_pool * pool)9051 s390_mainpool_finish (struct constant_pool *pool)
9052 {
9053 rtx base_reg = cfun->machine->base_reg;
9054
9055 /* If the pool is empty, we're done. */
9056 if (pool->size == 0)
9057 {
9058 /* We don't actually need a base register after all. */
9059 cfun->machine->base_reg = NULL_RTX;
9060
9061 if (pool->pool_insn)
9062 remove_insn (pool->pool_insn);
9063 s390_free_pool (pool);
9064 return;
9065 }
9066
9067 /* We need correct insn addresses. */
9068 shorten_branches (get_insns ());
9069
9070 /* On zSeries, we use a LARL to load the pool register. The pool is
9071 located in the .rodata section, so we emit it after the function. */
9072 if (TARGET_CPU_ZARCH)
9073 {
9074 rtx set = gen_main_base_64 (base_reg, pool->label);
9075 rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
9076 INSN_ADDRESSES_NEW (insn, -1);
9077 remove_insn (pool->pool_insn);
9078
9079 insn = get_last_insn ();
9080 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
9081 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
9082
9083 s390_dump_pool (pool, 0);
9084 }
9085
9086 /* On S/390, if the total size of the function's code plus literal pool
9087 does not exceed 4096 bytes, we use BASR to set up a function base
9088 pointer, and emit the literal pool at the end of the function. */
9089 else if (INSN_ADDRESSES (INSN_UID (pool->emit_pool_after))
9090 + pool->size + 8 /* alignment slop */ < 4096)
9091 {
9092 rtx set = gen_main_base_31_small (base_reg, pool->label);
9093 rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
9094 INSN_ADDRESSES_NEW (insn, -1);
9095 remove_insn (pool->pool_insn);
9096
9097 insn = emit_label_after (pool->label, insn);
9098 INSN_ADDRESSES_NEW (insn, -1);
9099
9100 /* emit_pool_after will be set by s390_mainpool_start to the
9101 last insn of the section where the literal pool should be
9102 emitted. */
9103 insn = pool->emit_pool_after;
9104
9105 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
9106 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
9107
9108 s390_dump_pool (pool, 1);
9109 }
9110
9111 /* Otherwise, we emit an inline literal pool and use BASR to branch
9112 over it, setting up the pool register at the same time. */
9113 else
9114 {
9115 rtx_code_label *pool_end = gen_label_rtx ();
9116
9117 rtx pat = gen_main_base_31_large (base_reg, pool->label, pool_end);
9118 rtx_insn *insn = emit_jump_insn_after (pat, pool->pool_insn);
9119 JUMP_LABEL (insn) = pool_end;
9120 INSN_ADDRESSES_NEW (insn, -1);
9121 remove_insn (pool->pool_insn);
9122
9123 insn = emit_label_after (pool->label, insn);
9124 INSN_ADDRESSES_NEW (insn, -1);
9125
9126 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
9127 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
9128
9129 insn = emit_label_after (pool_end, pool->pool_insn);
9130 INSN_ADDRESSES_NEW (insn, -1);
9131
9132 s390_dump_pool (pool, 1);
9133 }
9134
9135
9136 /* Replace all literal pool references. */
9137
9138 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
9139 {
9140 if (INSN_P (insn))
9141 replace_ltrel_base (&PATTERN (insn));
9142
9143 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9144 {
9145 rtx addr, pool_ref = NULL_RTX;
9146 find_constant_pool_ref (PATTERN (insn), &pool_ref);
9147 if (pool_ref)
9148 {
9149 if (s390_execute_label (insn))
9150 addr = s390_find_execute (pool, insn);
9151 else
9152 addr = s390_find_constant (pool, get_pool_constant (pool_ref),
9153 get_pool_mode (pool_ref));
9154
9155 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
9156 INSN_CODE (insn) = -1;
9157 }
9158 }
9159 }
9160
9161
9162 /* Free the pool. */
9163 s390_free_pool (pool);
9164 }
9165
9166 /* POOL holds the main literal pool as collected by s390_mainpool_start.
9167 We have decided we cannot use this pool, so revert all changes
9168 to the current function that were done by s390_mainpool_start. */
9169 static void
s390_mainpool_cancel(struct constant_pool * pool)9170 s390_mainpool_cancel (struct constant_pool *pool)
9171 {
9172 /* We didn't actually change the instruction stream, so simply
9173 free the pool memory. */
9174 s390_free_pool (pool);
9175 }
9176
9177
9178 /* Chunkify the literal pool. */
9179
9180 #define S390_POOL_CHUNK_MIN 0xc00
9181 #define S390_POOL_CHUNK_MAX 0xe00
9182
9183 static struct constant_pool *
s390_chunkify_start(void)9184 s390_chunkify_start (void)
9185 {
9186 struct constant_pool *curr_pool = NULL, *pool_list = NULL;
9187 int extra_size = 0;
9188 bitmap far_labels;
9189 rtx pending_ltrel = NULL_RTX;
9190 rtx_insn *insn;
9191
9192 rtx (*gen_reload_base) (rtx, rtx) =
9193 TARGET_CPU_ZARCH? gen_reload_base_64 : gen_reload_base_31;
9194
9195
9196 /* We need correct insn addresses. */
9197
9198 shorten_branches (get_insns ());
9199
9200 /* Scan all insns and move literals to pool chunks. */
9201
9202 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9203 {
9204 bool section_switch_p = false;
9205
9206 /* Check for pending LTREL_BASE. */
9207 if (INSN_P (insn))
9208 {
9209 rtx ltrel_base = find_ltrel_base (PATTERN (insn));
9210 if (ltrel_base)
9211 {
9212 gcc_assert (ltrel_base == pending_ltrel);
9213 pending_ltrel = NULL_RTX;
9214 }
9215 }
9216
9217 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
9218 {
9219 if (!curr_pool)
9220 curr_pool = s390_start_pool (&pool_list, insn);
9221
9222 s390_add_execute (curr_pool, insn);
9223 s390_add_pool_insn (curr_pool, insn);
9224 }
9225 else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9226 {
9227 rtx pool_ref = NULL_RTX;
9228 find_constant_pool_ref (PATTERN (insn), &pool_ref);
9229 if (pool_ref)
9230 {
9231 rtx constant = get_pool_constant (pool_ref);
9232 machine_mode mode = get_pool_mode (pool_ref);
9233
9234 if (!curr_pool)
9235 curr_pool = s390_start_pool (&pool_list, insn);
9236
9237 s390_add_constant (curr_pool, constant, mode);
9238 s390_add_pool_insn (curr_pool, insn);
9239
9240 /* Don't split the pool chunk between a LTREL_OFFSET load
9241 and the corresponding LTREL_BASE. */
9242 if (GET_CODE (constant) == CONST
9243 && GET_CODE (XEXP (constant, 0)) == UNSPEC
9244 && XINT (XEXP (constant, 0), 1) == UNSPEC_LTREL_OFFSET)
9245 {
9246 gcc_assert (!pending_ltrel);
9247 pending_ltrel = pool_ref;
9248 }
9249 }
9250 }
9251
9252 if (JUMP_P (insn) || JUMP_TABLE_DATA_P (insn) || LABEL_P (insn))
9253 {
9254 if (curr_pool)
9255 s390_add_pool_insn (curr_pool, insn);
9256 /* An LTREL_BASE must follow within the same basic block. */
9257 gcc_assert (!pending_ltrel);
9258 }
9259
9260 if (NOTE_P (insn))
9261 switch (NOTE_KIND (insn))
9262 {
9263 case NOTE_INSN_SWITCH_TEXT_SECTIONS:
9264 section_switch_p = true;
9265 break;
9266 case NOTE_INSN_VAR_LOCATION:
9267 continue;
9268 default:
9269 break;
9270 }
9271
9272 if (!curr_pool
9273 || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn)
9274 || INSN_ADDRESSES (INSN_UID (insn)) == -1)
9275 continue;
9276
9277 if (TARGET_CPU_ZARCH)
9278 {
9279 if (curr_pool->size < S390_POOL_CHUNK_MAX)
9280 continue;
9281
9282 s390_end_pool (curr_pool, NULL);
9283 curr_pool = NULL;
9284 }
9285 else
9286 {
9287 int chunk_size = INSN_ADDRESSES (INSN_UID (insn))
9288 - INSN_ADDRESSES (INSN_UID (curr_pool->first_insn))
9289 + extra_size;
9290
9291 /* We will later have to insert base register reload insns.
9292 Those will have an effect on code size, which we need to
9293 consider here. This calculation makes rather pessimistic
9294 worst-case assumptions. */
9295 if (LABEL_P (insn))
9296 extra_size += 6;
9297
9298 if (chunk_size < S390_POOL_CHUNK_MIN
9299 && curr_pool->size < S390_POOL_CHUNK_MIN
9300 && !section_switch_p)
9301 continue;
9302
9303 /* Pool chunks can only be inserted after BARRIERs ... */
9304 if (BARRIER_P (insn))
9305 {
9306 s390_end_pool (curr_pool, insn);
9307 curr_pool = NULL;
9308 extra_size = 0;
9309 }
9310
9311 /* ... so if we don't find one in time, create one. */
9312 else if (chunk_size > S390_POOL_CHUNK_MAX
9313 || curr_pool->size > S390_POOL_CHUNK_MAX
9314 || section_switch_p)
9315 {
9316 rtx_insn *label, *jump, *barrier, *next, *prev;
9317
9318 if (!section_switch_p)
9319 {
9320 /* We can insert the barrier only after a 'real' insn. */
9321 if (! NONJUMP_INSN_P (insn) && ! CALL_P (insn))
9322 continue;
9323 if (get_attr_length (insn) == 0)
9324 continue;
9325 /* Don't separate LTREL_BASE from the corresponding
9326 LTREL_OFFSET load. */
9327 if (pending_ltrel)
9328 continue;
9329 next = insn;
9330 do
9331 {
9332 insn = next;
9333 next = NEXT_INSN (insn);
9334 }
9335 while (next
9336 && NOTE_P (next)
9337 && NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION);
9338 }
9339 else
9340 {
9341 gcc_assert (!pending_ltrel);
9342
9343 /* The old pool has to end before the section switch
9344 note in order to make it part of the current
9345 section. */
9346 insn = PREV_INSN (insn);
9347 }
9348
9349 label = gen_label_rtx ();
9350 prev = insn;
9351 if (prev && NOTE_P (prev))
9352 prev = prev_nonnote_insn (prev);
9353 if (prev)
9354 jump = emit_jump_insn_after_setloc (gen_jump (label), insn,
9355 INSN_LOCATION (prev));
9356 else
9357 jump = emit_jump_insn_after_noloc (gen_jump (label), insn);
9358 barrier = emit_barrier_after (jump);
9359 insn = emit_label_after (label, barrier);
9360 JUMP_LABEL (jump) = label;
9361 LABEL_NUSES (label) = 1;
9362
9363 INSN_ADDRESSES_NEW (jump, -1);
9364 INSN_ADDRESSES_NEW (barrier, -1);
9365 INSN_ADDRESSES_NEW (insn, -1);
9366
9367 s390_end_pool (curr_pool, barrier);
9368 curr_pool = NULL;
9369 extra_size = 0;
9370 }
9371 }
9372 }
9373
9374 if (curr_pool)
9375 s390_end_pool (curr_pool, NULL);
9376 gcc_assert (!pending_ltrel);
9377
9378 /* Find all labels that are branched into
9379 from an insn belonging to a different chunk. */
9380
9381 far_labels = BITMAP_ALLOC (NULL);
9382
9383 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9384 {
9385 rtx_jump_table_data *table;
9386
9387 /* Labels marked with LABEL_PRESERVE_P can be target
9388 of non-local jumps, so we have to mark them.
9389 The same holds for named labels.
9390
9391 Don't do that, however, if it is the label before
9392 a jump table. */
9393
9394 if (LABEL_P (insn)
9395 && (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn)))
9396 {
9397 rtx_insn *vec_insn = NEXT_INSN (insn);
9398 if (! vec_insn || ! JUMP_TABLE_DATA_P (vec_insn))
9399 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn));
9400 }
9401 /* Check potential targets in a table jump (casesi_jump). */
9402 else if (tablejump_p (insn, NULL, &table))
9403 {
9404 rtx vec_pat = PATTERN (table);
9405 int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
9406
9407 for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
9408 {
9409 rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
9410
9411 if (s390_find_pool (pool_list, label)
9412 != s390_find_pool (pool_list, insn))
9413 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
9414 }
9415 }
9416 /* If we have a direct jump (conditional or unconditional),
9417 check all potential targets. */
9418 else if (JUMP_P (insn))
9419 {
9420 rtx pat = PATTERN (insn);
9421
9422 if (GET_CODE (pat) == PARALLEL)
9423 pat = XVECEXP (pat, 0, 0);
9424
9425 if (GET_CODE (pat) == SET)
9426 {
9427 rtx label = JUMP_LABEL (insn);
9428 if (label && !ANY_RETURN_P (label))
9429 {
9430 if (s390_find_pool (pool_list, label)
9431 != s390_find_pool (pool_list, insn))
9432 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
9433 }
9434 }
9435 }
9436 }
9437
9438 /* Insert base register reload insns before every pool. */
9439
9440 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9441 {
9442 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
9443 curr_pool->label);
9444 rtx_insn *insn = curr_pool->first_insn;
9445 INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1);
9446 }
9447
9448 /* Insert base register reload insns at every far label. */
9449
9450 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9451 if (LABEL_P (insn)
9452 && bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn)))
9453 {
9454 struct constant_pool *pool = s390_find_pool (pool_list, insn);
9455 if (pool)
9456 {
9457 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
9458 pool->label);
9459 INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1);
9460 }
9461 }
9462
9463
9464 BITMAP_FREE (far_labels);
9465
9466
9467 /* Recompute insn addresses. */
9468
9469 init_insn_lengths ();
9470 shorten_branches (get_insns ());
9471
9472 return pool_list;
9473 }
9474
9475 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
9476 After we have decided to use this list, finish implementing
9477 all changes to the current function as required. */
9478
9479 static void
s390_chunkify_finish(struct constant_pool * pool_list)9480 s390_chunkify_finish (struct constant_pool *pool_list)
9481 {
9482 struct constant_pool *curr_pool = NULL;
9483 rtx_insn *insn;
9484
9485
9486 /* Replace all literal pool references. */
9487
9488 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9489 {
9490 if (INSN_P (insn))
9491 replace_ltrel_base (&PATTERN (insn));
9492
9493 curr_pool = s390_find_pool (pool_list, insn);
9494 if (!curr_pool)
9495 continue;
9496
9497 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9498 {
9499 rtx addr, pool_ref = NULL_RTX;
9500 find_constant_pool_ref (PATTERN (insn), &pool_ref);
9501 if (pool_ref)
9502 {
9503 if (s390_execute_label (insn))
9504 addr = s390_find_execute (curr_pool, insn);
9505 else
9506 addr = s390_find_constant (curr_pool,
9507 get_pool_constant (pool_ref),
9508 get_pool_mode (pool_ref));
9509
9510 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
9511 INSN_CODE (insn) = -1;
9512 }
9513 }
9514 }
9515
9516 /* Dump out all literal pools. */
9517
9518 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9519 s390_dump_pool (curr_pool, 0);
9520
9521 /* Free pool list. */
9522
9523 while (pool_list)
9524 {
9525 struct constant_pool *next = pool_list->next;
9526 s390_free_pool (pool_list);
9527 pool_list = next;
9528 }
9529 }
9530
9531 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
9532 We have decided we cannot use this list, so revert all changes
9533 to the current function that were done by s390_chunkify_start. */
9534
9535 static void
s390_chunkify_cancel(struct constant_pool * pool_list)9536 s390_chunkify_cancel (struct constant_pool *pool_list)
9537 {
9538 struct constant_pool *curr_pool = NULL;
9539 rtx_insn *insn;
9540
9541 /* Remove all pool placeholder insns. */
9542
9543 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9544 {
9545 /* Did we insert an extra barrier? Remove it. */
9546 rtx_insn *barrier = PREV_INSN (curr_pool->pool_insn);
9547 rtx_insn *jump = barrier? PREV_INSN (barrier) : NULL;
9548 rtx_insn *label = NEXT_INSN (curr_pool->pool_insn);
9549
9550 if (jump && JUMP_P (jump)
9551 && barrier && BARRIER_P (barrier)
9552 && label && LABEL_P (label)
9553 && GET_CODE (PATTERN (jump)) == SET
9554 && SET_DEST (PATTERN (jump)) == pc_rtx
9555 && GET_CODE (SET_SRC (PATTERN (jump))) == LABEL_REF
9556 && XEXP (SET_SRC (PATTERN (jump)), 0) == label)
9557 {
9558 remove_insn (jump);
9559 remove_insn (barrier);
9560 remove_insn (label);
9561 }
9562
9563 remove_insn (curr_pool->pool_insn);
9564 }
9565
9566 /* Remove all base register reload insns. */
9567
9568 for (insn = get_insns (); insn; )
9569 {
9570 rtx_insn *next_insn = NEXT_INSN (insn);
9571
9572 if (NONJUMP_INSN_P (insn)
9573 && GET_CODE (PATTERN (insn)) == SET
9574 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
9575 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_RELOAD_BASE)
9576 remove_insn (insn);
9577
9578 insn = next_insn;
9579 }
9580
9581 /* Free pool list. */
9582
9583 while (pool_list)
9584 {
9585 struct constant_pool *next = pool_list->next;
9586 s390_free_pool (pool_list);
9587 pool_list = next;
9588 }
9589 }
9590
9591 /* Output the constant pool entry EXP in mode MODE with alignment ALIGN. */
9592
9593 void
s390_output_pool_entry(rtx exp,machine_mode mode,unsigned int align)9594 s390_output_pool_entry (rtx exp, machine_mode mode, unsigned int align)
9595 {
9596 switch (GET_MODE_CLASS (mode))
9597 {
9598 case MODE_FLOAT:
9599 case MODE_DECIMAL_FLOAT:
9600 gcc_assert (GET_CODE (exp) == CONST_DOUBLE);
9601
9602 assemble_real (*CONST_DOUBLE_REAL_VALUE (exp),
9603 as_a <scalar_float_mode> (mode), align);
9604 break;
9605
9606 case MODE_INT:
9607 assemble_integer (exp, GET_MODE_SIZE (mode), align, 1);
9608 mark_symbol_refs_as_used (exp);
9609 break;
9610
9611 case MODE_VECTOR_INT:
9612 case MODE_VECTOR_FLOAT:
9613 {
9614 int i;
9615 machine_mode inner_mode;
9616 gcc_assert (GET_CODE (exp) == CONST_VECTOR);
9617
9618 inner_mode = GET_MODE_INNER (GET_MODE (exp));
9619 for (i = 0; i < XVECLEN (exp, 0); i++)
9620 s390_output_pool_entry (XVECEXP (exp, 0, i),
9621 inner_mode,
9622 i == 0
9623 ? align
9624 : GET_MODE_BITSIZE (inner_mode));
9625 }
9626 break;
9627
9628 default:
9629 gcc_unreachable ();
9630 }
9631 }
9632
9633
9634 /* Return an RTL expression representing the value of the return address
9635 for the frame COUNT steps up from the current frame. FRAME is the
9636 frame pointer of that frame. */
9637
9638 rtx
s390_return_addr_rtx(int count,rtx frame ATTRIBUTE_UNUSED)9639 s390_return_addr_rtx (int count, rtx frame ATTRIBUTE_UNUSED)
9640 {
9641 int offset;
9642 rtx addr;
9643
9644 /* Without backchain, we fail for all but the current frame. */
9645
9646 if (!TARGET_BACKCHAIN && count > 0)
9647 return NULL_RTX;
9648
9649 /* For the current frame, we need to make sure the initial
9650 value of RETURN_REGNUM is actually saved. */
9651
9652 if (count == 0)
9653 {
9654 /* On non-z architectures branch splitting could overwrite r14. */
9655 if (TARGET_CPU_ZARCH)
9656 return get_hard_reg_initial_val (Pmode, RETURN_REGNUM);
9657 else
9658 {
9659 cfun_frame_layout.save_return_addr_p = true;
9660 return gen_rtx_MEM (Pmode, return_address_pointer_rtx);
9661 }
9662 }
9663
9664 if (TARGET_PACKED_STACK)
9665 offset = -2 * UNITS_PER_LONG;
9666 else
9667 offset = RETURN_REGNUM * UNITS_PER_LONG;
9668
9669 addr = plus_constant (Pmode, frame, offset);
9670 addr = memory_address (Pmode, addr);
9671 return gen_rtx_MEM (Pmode, addr);
9672 }
9673
9674 /* Return an RTL expression representing the back chain stored in
9675 the current stack frame. */
9676
9677 rtx
s390_back_chain_rtx(void)9678 s390_back_chain_rtx (void)
9679 {
9680 rtx chain;
9681
9682 gcc_assert (TARGET_BACKCHAIN);
9683
9684 if (TARGET_PACKED_STACK)
9685 chain = plus_constant (Pmode, stack_pointer_rtx,
9686 STACK_POINTER_OFFSET - UNITS_PER_LONG);
9687 else
9688 chain = stack_pointer_rtx;
9689
9690 chain = gen_rtx_MEM (Pmode, chain);
9691 return chain;
9692 }
9693
9694 /* Find first call clobbered register unused in a function.
9695 This could be used as base register in a leaf function
9696 or for holding the return address before epilogue. */
9697
9698 static int
find_unused_clobbered_reg(void)9699 find_unused_clobbered_reg (void)
9700 {
9701 int i;
9702 for (i = 0; i < 6; i++)
9703 if (!df_regs_ever_live_p (i))
9704 return i;
9705 return 0;
9706 }
9707
9708
9709 /* Helper function for s390_regs_ever_clobbered. Sets the fields in DATA for all
9710 clobbered hard regs in SETREG. */
9711
9712 static void
s390_reg_clobbered_rtx(rtx setreg,const_rtx set_insn ATTRIBUTE_UNUSED,void * data)9713 s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data)
9714 {
9715 char *regs_ever_clobbered = (char *)data;
9716 unsigned int i, regno;
9717 machine_mode mode = GET_MODE (setreg);
9718
9719 if (GET_CODE (setreg) == SUBREG)
9720 {
9721 rtx inner = SUBREG_REG (setreg);
9722 if (!GENERAL_REG_P (inner) && !FP_REG_P (inner))
9723 return;
9724 regno = subreg_regno (setreg);
9725 }
9726 else if (GENERAL_REG_P (setreg) || FP_REG_P (setreg))
9727 regno = REGNO (setreg);
9728 else
9729 return;
9730
9731 for (i = regno;
9732 i < end_hard_regno (mode, regno);
9733 i++)
9734 regs_ever_clobbered[i] = 1;
9735 }
9736
9737 /* Walks through all basic blocks of the current function looking
9738 for clobbered hard regs using s390_reg_clobbered_rtx. The fields
9739 of the passed integer array REGS_EVER_CLOBBERED are set to one for
9740 each of those regs. */
9741
9742 static void
s390_regs_ever_clobbered(char regs_ever_clobbered[])9743 s390_regs_ever_clobbered (char regs_ever_clobbered[])
9744 {
9745 basic_block cur_bb;
9746 rtx_insn *cur_insn;
9747 unsigned int i;
9748
9749 memset (regs_ever_clobbered, 0, 32);
9750
9751 /* For non-leaf functions we have to consider all call clobbered regs to be
9752 clobbered. */
9753 if (!crtl->is_leaf)
9754 {
9755 for (i = 0; i < 32; i++)
9756 regs_ever_clobbered[i] = call_really_used_regs[i];
9757 }
9758
9759 /* Make the "magic" eh_return registers live if necessary. For regs_ever_live
9760 this work is done by liveness analysis (mark_regs_live_at_end).
9761 Special care is needed for functions containing landing pads. Landing pads
9762 may use the eh registers, but the code which sets these registers is not
9763 contained in that function. Hence s390_regs_ever_clobbered is not able to
9764 deal with this automatically. */
9765 if (crtl->calls_eh_return || cfun->machine->has_landing_pad_p)
9766 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM ; i++)
9767 if (crtl->calls_eh_return
9768 || (cfun->machine->has_landing_pad_p
9769 && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i))))
9770 regs_ever_clobbered[EH_RETURN_DATA_REGNO (i)] = 1;
9771
9772 /* For nonlocal gotos all call-saved registers have to be saved.
9773 This flag is also set for the unwinding code in libgcc.
9774 See expand_builtin_unwind_init. For regs_ever_live this is done by
9775 reload. */
9776 if (crtl->saves_all_registers)
9777 for (i = 0; i < 32; i++)
9778 if (!call_really_used_regs[i])
9779 regs_ever_clobbered[i] = 1;
9780
9781 FOR_EACH_BB_FN (cur_bb, cfun)
9782 {
9783 FOR_BB_INSNS (cur_bb, cur_insn)
9784 {
9785 rtx pat;
9786
9787 if (!INSN_P (cur_insn))
9788 continue;
9789
9790 pat = PATTERN (cur_insn);
9791
9792 /* Ignore GPR restore insns. */
9793 if (epilogue_completed && RTX_FRAME_RELATED_P (cur_insn))
9794 {
9795 if (GET_CODE (pat) == SET
9796 && GENERAL_REG_P (SET_DEST (pat)))
9797 {
9798 /* lgdr */
9799 if (GET_MODE (SET_SRC (pat)) == DImode
9800 && FP_REG_P (SET_SRC (pat)))
9801 continue;
9802
9803 /* l / lg */
9804 if (GET_CODE (SET_SRC (pat)) == MEM)
9805 continue;
9806 }
9807
9808 /* lm / lmg */
9809 if (GET_CODE (pat) == PARALLEL
9810 && load_multiple_operation (pat, VOIDmode))
9811 continue;
9812 }
9813
9814 note_stores (pat,
9815 s390_reg_clobbered_rtx,
9816 regs_ever_clobbered);
9817 }
9818 }
9819 }
9820
9821 /* Determine the frame area which actually has to be accessed
9822 in the function epilogue. The values are stored at the
9823 given pointers AREA_BOTTOM (address of the lowest used stack
9824 address) and AREA_TOP (address of the first item which does
9825 not belong to the stack frame). */
9826
9827 static void
s390_frame_area(int * area_bottom,int * area_top)9828 s390_frame_area (int *area_bottom, int *area_top)
9829 {
9830 int b, t;
9831
9832 b = INT_MAX;
9833 t = INT_MIN;
9834
9835 if (cfun_frame_layout.first_restore_gpr != -1)
9836 {
9837 b = (cfun_frame_layout.gprs_offset
9838 + cfun_frame_layout.first_restore_gpr * UNITS_PER_LONG);
9839 t = b + (cfun_frame_layout.last_restore_gpr
9840 - cfun_frame_layout.first_restore_gpr + 1) * UNITS_PER_LONG;
9841 }
9842
9843 if (TARGET_64BIT && cfun_save_high_fprs_p)
9844 {
9845 b = MIN (b, cfun_frame_layout.f8_offset);
9846 t = MAX (t, (cfun_frame_layout.f8_offset
9847 + cfun_frame_layout.high_fprs * 8));
9848 }
9849
9850 if (!TARGET_64BIT)
9851 {
9852 if (cfun_fpr_save_p (FPR4_REGNUM))
9853 {
9854 b = MIN (b, cfun_frame_layout.f4_offset);
9855 t = MAX (t, cfun_frame_layout.f4_offset + 8);
9856 }
9857 if (cfun_fpr_save_p (FPR6_REGNUM))
9858 {
9859 b = MIN (b, cfun_frame_layout.f4_offset + 8);
9860 t = MAX (t, cfun_frame_layout.f4_offset + 16);
9861 }
9862 }
9863 *area_bottom = b;
9864 *area_top = t;
9865 }
9866 /* Update gpr_save_slots in the frame layout trying to make use of
9867 FPRs as GPR save slots.
9868 This is a helper routine of s390_register_info. */
9869
9870 static void
s390_register_info_gprtofpr()9871 s390_register_info_gprtofpr ()
9872 {
9873 int save_reg_slot = FPR0_REGNUM;
9874 int i, j;
9875
9876 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
9877 return;
9878
9879 /* builtin_eh_return needs to be able to modify the return address
9880 on the stack. It could also adjust the FPR save slot instead but
9881 is it worth the trouble?! */
9882 if (crtl->calls_eh_return)
9883 return;
9884
9885 for (i = 15; i >= 6; i--)
9886 {
9887 if (cfun_gpr_save_slot (i) == SAVE_SLOT_NONE)
9888 continue;
9889
9890 /* Advance to the next FP register which can be used as a
9891 GPR save slot. */
9892 while ((!call_really_used_regs[save_reg_slot]
9893 || df_regs_ever_live_p (save_reg_slot)
9894 || cfun_fpr_save_p (save_reg_slot))
9895 && FP_REGNO_P (save_reg_slot))
9896 save_reg_slot++;
9897 if (!FP_REGNO_P (save_reg_slot))
9898 {
9899 /* We only want to use ldgr/lgdr if we can get rid of
9900 stm/lm entirely. So undo the gpr slot allocation in
9901 case we ran out of FPR save slots. */
9902 for (j = 6; j <= 15; j++)
9903 if (FP_REGNO_P (cfun_gpr_save_slot (j)))
9904 cfun_gpr_save_slot (j) = SAVE_SLOT_STACK;
9905 break;
9906 }
9907 cfun_gpr_save_slot (i) = save_reg_slot++;
9908 }
9909 }
9910
9911 /* Set the bits in fpr_bitmap for FPRs which need to be saved due to
9912 stdarg.
9913 This is a helper routine for s390_register_info. */
9914
9915 static void
s390_register_info_stdarg_fpr()9916 s390_register_info_stdarg_fpr ()
9917 {
9918 int i;
9919 int min_fpr;
9920 int max_fpr;
9921
9922 /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and
9923 f0-f4 for 64 bit. */
9924 if (!cfun->stdarg
9925 || !TARGET_HARD_FLOAT
9926 || !cfun->va_list_fpr_size
9927 || crtl->args.info.fprs >= FP_ARG_NUM_REG)
9928 return;
9929
9930 min_fpr = crtl->args.info.fprs;
9931 max_fpr = min_fpr + cfun->va_list_fpr_size - 1;
9932 if (max_fpr >= FP_ARG_NUM_REG)
9933 max_fpr = FP_ARG_NUM_REG - 1;
9934
9935 /* FPR argument regs start at f0. */
9936 min_fpr += FPR0_REGNUM;
9937 max_fpr += FPR0_REGNUM;
9938
9939 for (i = min_fpr; i <= max_fpr; i++)
9940 cfun_set_fpr_save (i);
9941 }
9942
9943 /* Reserve the GPR save slots for GPRs which need to be saved due to
9944 stdarg.
9945 This is a helper routine for s390_register_info. */
9946
9947 static void
s390_register_info_stdarg_gpr()9948 s390_register_info_stdarg_gpr ()
9949 {
9950 int i;
9951 int min_gpr;
9952 int max_gpr;
9953
9954 if (!cfun->stdarg
9955 || !cfun->va_list_gpr_size
9956 || crtl->args.info.gprs >= GP_ARG_NUM_REG)
9957 return;
9958
9959 min_gpr = crtl->args.info.gprs;
9960 max_gpr = min_gpr + cfun->va_list_gpr_size - 1;
9961 if (max_gpr >= GP_ARG_NUM_REG)
9962 max_gpr = GP_ARG_NUM_REG - 1;
9963
9964 /* GPR argument regs start at r2. */
9965 min_gpr += GPR2_REGNUM;
9966 max_gpr += GPR2_REGNUM;
9967
9968 /* If r6 was supposed to be saved into an FPR and now needs to go to
9969 the stack for vararg we have to adjust the restore range to make
9970 sure that the restore is done from stack as well. */
9971 if (FP_REGNO_P (cfun_gpr_save_slot (GPR6_REGNUM))
9972 && min_gpr <= GPR6_REGNUM
9973 && max_gpr >= GPR6_REGNUM)
9974 {
9975 if (cfun_frame_layout.first_restore_gpr == -1
9976 || cfun_frame_layout.first_restore_gpr > GPR6_REGNUM)
9977 cfun_frame_layout.first_restore_gpr = GPR6_REGNUM;
9978 if (cfun_frame_layout.last_restore_gpr == -1
9979 || cfun_frame_layout.last_restore_gpr < GPR6_REGNUM)
9980 cfun_frame_layout.last_restore_gpr = GPR6_REGNUM;
9981 }
9982
9983 if (cfun_frame_layout.first_save_gpr == -1
9984 || cfun_frame_layout.first_save_gpr > min_gpr)
9985 cfun_frame_layout.first_save_gpr = min_gpr;
9986
9987 if (cfun_frame_layout.last_save_gpr == -1
9988 || cfun_frame_layout.last_save_gpr < max_gpr)
9989 cfun_frame_layout.last_save_gpr = max_gpr;
9990
9991 for (i = min_gpr; i <= max_gpr; i++)
9992 cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
9993 }
9994
9995 /* Calculate the save and restore ranges for stm(g) and lm(g) in the
9996 prologue and epilogue. */
9997
9998 static void
s390_register_info_set_ranges()9999 s390_register_info_set_ranges ()
10000 {
10001 int i, j;
10002
10003 /* Find the first and the last save slot supposed to use the stack
10004 to set the restore range.
10005 Vararg regs might be marked as save to stack but only the
10006 call-saved regs really need restoring (i.e. r6). This code
10007 assumes that the vararg regs have not yet been recorded in
10008 cfun_gpr_save_slot. */
10009 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != SAVE_SLOT_STACK; i++);
10010 for (j = 15; j > i && cfun_gpr_save_slot (j) != SAVE_SLOT_STACK; j--);
10011 cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
10012 cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
10013 cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
10014 cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
10015 }
10016
10017 /* The GPR and FPR save slots in cfun->machine->frame_layout are set
10018 for registers which need to be saved in function prologue.
10019 This function can be used until the insns emitted for save/restore
10020 of the regs are visible in the RTL stream. */
10021
10022 static void
s390_register_info()10023 s390_register_info ()
10024 {
10025 int i;
10026 char clobbered_regs[32];
10027
10028 gcc_assert (!epilogue_completed);
10029
10030 if (reload_completed)
10031 /* After reload we rely on our own routine to determine which
10032 registers need saving. */
10033 s390_regs_ever_clobbered (clobbered_regs);
10034 else
10035 /* During reload we use regs_ever_live as a base since reload
10036 does changes in there which we otherwise would not be aware
10037 of. */
10038 for (i = 0; i < 32; i++)
10039 clobbered_regs[i] = df_regs_ever_live_p (i);
10040
10041 for (i = 0; i < 32; i++)
10042 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
10043
10044 /* Mark the call-saved FPRs which need to be saved.
10045 This needs to be done before checking the special GPRs since the
10046 stack pointer usage depends on whether high FPRs have to be saved
10047 or not. */
10048 cfun_frame_layout.fpr_bitmap = 0;
10049 cfun_frame_layout.high_fprs = 0;
10050 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
10051 if (clobbered_regs[i] && !call_really_used_regs[i])
10052 {
10053 cfun_set_fpr_save (i);
10054 if (i >= FPR8_REGNUM)
10055 cfun_frame_layout.high_fprs++;
10056 }
10057
10058 /* Register 12 is used for GOT address, but also as temp in prologue
10059 for split-stack stdarg functions (unless r14 is available). */
10060 clobbered_regs[12]
10061 |= ((flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
10062 || (flag_split_stack && cfun->stdarg
10063 && (crtl->is_leaf || TARGET_TPF_PROFILING
10064 || has_hard_reg_initial_val (Pmode, RETURN_REGNUM))));
10065
10066 clobbered_regs[BASE_REGNUM]
10067 |= (cfun->machine->base_reg
10068 && REGNO (cfun->machine->base_reg) == BASE_REGNUM);
10069
10070 clobbered_regs[HARD_FRAME_POINTER_REGNUM]
10071 |= !!frame_pointer_needed;
10072
10073 /* On pre z900 machines this might take until machine dependent
10074 reorg to decide.
10075 save_return_addr_p will only be set on non-zarch machines so
10076 there is no risk that r14 goes into an FPR instead of a stack
10077 slot. */
10078 clobbered_regs[RETURN_REGNUM]
10079 |= (!crtl->is_leaf
10080 || TARGET_TPF_PROFILING
10081 || cfun->machine->split_branches_pending_p
10082 || cfun_frame_layout.save_return_addr_p
10083 || crtl->calls_eh_return);
10084
10085 clobbered_regs[STACK_POINTER_REGNUM]
10086 |= (!crtl->is_leaf
10087 || TARGET_TPF_PROFILING
10088 || cfun_save_high_fprs_p
10089 || get_frame_size () > 0
10090 || (reload_completed && cfun_frame_layout.frame_size > 0)
10091 || cfun->calls_alloca);
10092
10093 memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 16);
10094
10095 for (i = 6; i < 16; i++)
10096 if (clobbered_regs[i])
10097 cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
10098
10099 s390_register_info_stdarg_fpr ();
10100 s390_register_info_gprtofpr ();
10101 s390_register_info_set_ranges ();
10102 /* stdarg functions might need to save GPRs 2 to 6. This might
10103 override the GPR->FPR save decision made by
10104 s390_register_info_gprtofpr for r6 since vararg regs must go to
10105 the stack. */
10106 s390_register_info_stdarg_gpr ();
10107 }
10108
10109 /* This function is called by s390_optimize_prologue in order to get
10110 rid of unnecessary GPR save/restore instructions. The register info
10111 for the GPRs is re-computed and the ranges are re-calculated. */
10112
10113 static void
s390_optimize_register_info()10114 s390_optimize_register_info ()
10115 {
10116 char clobbered_regs[32];
10117 int i;
10118
10119 gcc_assert (epilogue_completed);
10120 gcc_assert (!cfun->machine->split_branches_pending_p);
10121
10122 s390_regs_ever_clobbered (clobbered_regs);
10123
10124 for (i = 0; i < 32; i++)
10125 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
10126
10127 /* There is still special treatment needed for cases invisible to
10128 s390_regs_ever_clobbered. */
10129 clobbered_regs[RETURN_REGNUM]
10130 |= (TARGET_TPF_PROFILING
10131 /* When expanding builtin_return_addr in ESA mode we do not
10132 know whether r14 will later be needed as scratch reg when
10133 doing branch splitting. So the builtin always accesses the
10134 r14 save slot and we need to stick to the save/restore
10135 decision for r14 even if it turns out that it didn't get
10136 clobbered. */
10137 || cfun_frame_layout.save_return_addr_p
10138 || crtl->calls_eh_return);
10139
10140 memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 6);
10141
10142 for (i = 6; i < 16; i++)
10143 if (!clobbered_regs[i])
10144 cfun_gpr_save_slot (i) = SAVE_SLOT_NONE;
10145
10146 s390_register_info_set_ranges ();
10147 s390_register_info_stdarg_gpr ();
10148 }
10149
10150 /* Fill cfun->machine with info about frame of current function. */
10151
10152 static void
s390_frame_info(void)10153 s390_frame_info (void)
10154 {
10155 HOST_WIDE_INT lowest_offset;
10156
10157 cfun_frame_layout.first_save_gpr_slot = cfun_frame_layout.first_save_gpr;
10158 cfun_frame_layout.last_save_gpr_slot = cfun_frame_layout.last_save_gpr;
10159
10160 /* The va_arg builtin uses a constant distance of 16 *
10161 UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area
10162 pointer. So even if we are going to save the stack pointer in an
10163 FPR we need the stack space in order to keep the offsets
10164 correct. */
10165 if (cfun->stdarg && cfun_save_arg_fprs_p)
10166 {
10167 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
10168
10169 if (cfun_frame_layout.first_save_gpr_slot == -1)
10170 cfun_frame_layout.first_save_gpr_slot = STACK_POINTER_REGNUM;
10171 }
10172
10173 cfun_frame_layout.frame_size = get_frame_size ();
10174 if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000)
10175 fatal_error (input_location,
10176 "total size of local variables exceeds architecture limit");
10177
10178 if (!TARGET_PACKED_STACK)
10179 {
10180 /* Fixed stack layout. */
10181 cfun_frame_layout.backchain_offset = 0;
10182 cfun_frame_layout.f0_offset = 16 * UNITS_PER_LONG;
10183 cfun_frame_layout.f4_offset = cfun_frame_layout.f0_offset + 2 * 8;
10184 cfun_frame_layout.f8_offset = -cfun_frame_layout.high_fprs * 8;
10185 cfun_frame_layout.gprs_offset = (cfun_frame_layout.first_save_gpr_slot
10186 * UNITS_PER_LONG);
10187 }
10188 else if (TARGET_BACKCHAIN)
10189 {
10190 /* Kernel stack layout - packed stack, backchain, no float */
10191 gcc_assert (TARGET_SOFT_FLOAT);
10192 cfun_frame_layout.backchain_offset = (STACK_POINTER_OFFSET
10193 - UNITS_PER_LONG);
10194
10195 /* The distance between the backchain and the return address
10196 save slot must not change. So we always need a slot for the
10197 stack pointer which resides in between. */
10198 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
10199
10200 cfun_frame_layout.gprs_offset
10201 = cfun_frame_layout.backchain_offset - cfun_gprs_save_area_size;
10202
10203 /* FPRs will not be saved. Nevertheless pick sane values to
10204 keep area calculations valid. */
10205 cfun_frame_layout.f0_offset =
10206 cfun_frame_layout.f4_offset =
10207 cfun_frame_layout.f8_offset = cfun_frame_layout.gprs_offset;
10208 }
10209 else
10210 {
10211 int num_fprs;
10212
10213 /* Packed stack layout without backchain. */
10214
10215 /* With stdarg FPRs need their dedicated slots. */
10216 num_fprs = (TARGET_64BIT && cfun->stdarg ? 2
10217 : (cfun_fpr_save_p (FPR4_REGNUM) +
10218 cfun_fpr_save_p (FPR6_REGNUM)));
10219 cfun_frame_layout.f4_offset = STACK_POINTER_OFFSET - 8 * num_fprs;
10220
10221 num_fprs = (cfun->stdarg ? 2
10222 : (cfun_fpr_save_p (FPR0_REGNUM)
10223 + cfun_fpr_save_p (FPR2_REGNUM)));
10224 cfun_frame_layout.f0_offset = cfun_frame_layout.f4_offset - 8 * num_fprs;
10225
10226 cfun_frame_layout.gprs_offset
10227 = cfun_frame_layout.f0_offset - cfun_gprs_save_area_size;
10228
10229 cfun_frame_layout.f8_offset = (cfun_frame_layout.gprs_offset
10230 - cfun_frame_layout.high_fprs * 8);
10231 }
10232
10233 if (cfun_save_high_fprs_p)
10234 cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8;
10235
10236 if (!crtl->is_leaf)
10237 cfun_frame_layout.frame_size += crtl->outgoing_args_size;
10238
10239 /* In the following cases we have to allocate a STACK_POINTER_OFFSET
10240 sized area at the bottom of the stack. This is required also for
10241 leaf functions. When GCC generates a local stack reference it
10242 will always add STACK_POINTER_OFFSET to all these references. */
10243 if (crtl->is_leaf
10244 && !TARGET_TPF_PROFILING
10245 && cfun_frame_layout.frame_size == 0
10246 && !cfun->calls_alloca)
10247 return;
10248
10249 /* Calculate the number of bytes we have used in our own register
10250 save area. With the packed stack layout we can re-use the
10251 remaining bytes for normal stack elements. */
10252
10253 if (TARGET_PACKED_STACK)
10254 lowest_offset = MIN (MIN (cfun_frame_layout.f0_offset,
10255 cfun_frame_layout.f4_offset),
10256 cfun_frame_layout.gprs_offset);
10257 else
10258 lowest_offset = 0;
10259
10260 if (TARGET_BACKCHAIN)
10261 lowest_offset = MIN (lowest_offset, cfun_frame_layout.backchain_offset);
10262
10263 cfun_frame_layout.frame_size += STACK_POINTER_OFFSET - lowest_offset;
10264
10265 /* If under 31 bit an odd number of gprs has to be saved we have to
10266 adjust the frame size to sustain 8 byte alignment of stack
10267 frames. */
10268 cfun_frame_layout.frame_size = ((cfun_frame_layout.frame_size +
10269 STACK_BOUNDARY / BITS_PER_UNIT - 1)
10270 & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1));
10271 }
10272
10273 /* Generate frame layout. Fills in register and frame data for the current
10274 function in cfun->machine. This routine can be called multiple times;
10275 it will re-do the complete frame layout every time. */
10276
10277 static void
s390_init_frame_layout(void)10278 s390_init_frame_layout (void)
10279 {
10280 HOST_WIDE_INT frame_size;
10281 int base_used;
10282
10283 /* After LRA the frame layout is supposed to be read-only and should
10284 not be re-computed. */
10285 if (reload_completed)
10286 return;
10287
10288 /* On S/390 machines, we may need to perform branch splitting, which
10289 will require both base and return address register. We have no
10290 choice but to assume we're going to need them until right at the
10291 end of the machine dependent reorg phase. */
10292 if (!TARGET_CPU_ZARCH)
10293 cfun->machine->split_branches_pending_p = true;
10294
10295 do
10296 {
10297 frame_size = cfun_frame_layout.frame_size;
10298
10299 /* Try to predict whether we'll need the base register. */
10300 base_used = cfun->machine->split_branches_pending_p
10301 || crtl->uses_const_pool
10302 || (!DISP_IN_RANGE (frame_size)
10303 && !CONST_OK_FOR_K (frame_size));
10304
10305 /* Decide which register to use as literal pool base. In small
10306 leaf functions, try to use an unused call-clobbered register
10307 as base register to avoid save/restore overhead. */
10308 if (!base_used)
10309 cfun->machine->base_reg = NULL_RTX;
10310 else
10311 {
10312 int br = 0;
10313
10314 if (crtl->is_leaf)
10315 /* Prefer r5 (most likely to be free). */
10316 for (br = 5; br >= 2 && df_regs_ever_live_p (br); br--)
10317 ;
10318 cfun->machine->base_reg =
10319 gen_rtx_REG (Pmode, (br >= 2) ? br : BASE_REGNUM);
10320 }
10321
10322 s390_register_info ();
10323 s390_frame_info ();
10324 }
10325 while (frame_size != cfun_frame_layout.frame_size);
10326 }
10327
10328 /* Remove the FPR clobbers from a tbegin insn if it can be proven that
10329 the TX is nonescaping. A transaction is considered escaping if
10330 there is at least one path from tbegin returning CC0 to the
10331 function exit block without an tend.
10332
10333 The check so far has some limitations:
10334 - only single tbegin/tend BBs are supported
10335 - the first cond jump after tbegin must separate the CC0 path from ~CC0
10336 - when CC is copied to a GPR and the CC0 check is done with the GPR
10337 this is not supported
10338 */
10339
10340 static void
s390_optimize_nonescaping_tx(void)10341 s390_optimize_nonescaping_tx (void)
10342 {
10343 const unsigned int CC0 = 1 << 3;
10344 basic_block tbegin_bb = NULL;
10345 basic_block tend_bb = NULL;
10346 basic_block bb;
10347 rtx_insn *insn;
10348 bool result = true;
10349 int bb_index;
10350 rtx_insn *tbegin_insn = NULL;
10351
10352 if (!cfun->machine->tbegin_p)
10353 return;
10354
10355 for (bb_index = 0; bb_index < n_basic_blocks_for_fn (cfun); bb_index++)
10356 {
10357 bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
10358
10359 if (!bb)
10360 continue;
10361
10362 FOR_BB_INSNS (bb, insn)
10363 {
10364 rtx ite, cc, pat, target;
10365 unsigned HOST_WIDE_INT mask;
10366
10367 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
10368 continue;
10369
10370 pat = PATTERN (insn);
10371
10372 if (GET_CODE (pat) == PARALLEL)
10373 pat = XVECEXP (pat, 0, 0);
10374
10375 if (GET_CODE (pat) != SET
10376 || GET_CODE (SET_SRC (pat)) != UNSPEC_VOLATILE)
10377 continue;
10378
10379 if (XINT (SET_SRC (pat), 1) == UNSPECV_TBEGIN)
10380 {
10381 rtx_insn *tmp;
10382
10383 tbegin_insn = insn;
10384
10385 /* Just return if the tbegin doesn't have clobbers. */
10386 if (GET_CODE (PATTERN (insn)) != PARALLEL)
10387 return;
10388
10389 if (tbegin_bb != NULL)
10390 return;
10391
10392 /* Find the next conditional jump. */
10393 for (tmp = NEXT_INSN (insn);
10394 tmp != NULL_RTX;
10395 tmp = NEXT_INSN (tmp))
10396 {
10397 if (reg_set_p (gen_rtx_REG (CCmode, CC_REGNUM), tmp))
10398 return;
10399 if (!JUMP_P (tmp))
10400 continue;
10401
10402 ite = SET_SRC (PATTERN (tmp));
10403 if (GET_CODE (ite) != IF_THEN_ELSE)
10404 continue;
10405
10406 cc = XEXP (XEXP (ite, 0), 0);
10407 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc))
10408 || GET_MODE (cc) != CCRAWmode
10409 || GET_CODE (XEXP (XEXP (ite, 0), 1)) != CONST_INT)
10410 return;
10411
10412 if (bb->succs->length () != 2)
10413 return;
10414
10415 mask = INTVAL (XEXP (XEXP (ite, 0), 1));
10416 if (GET_CODE (XEXP (ite, 0)) == NE)
10417 mask ^= 0xf;
10418
10419 if (mask == CC0)
10420 target = XEXP (ite, 1);
10421 else if (mask == (CC0 ^ 0xf))
10422 target = XEXP (ite, 2);
10423 else
10424 return;
10425
10426 {
10427 edge_iterator ei;
10428 edge e1, e2;
10429
10430 ei = ei_start (bb->succs);
10431 e1 = ei_safe_edge (ei);
10432 ei_next (&ei);
10433 e2 = ei_safe_edge (ei);
10434
10435 if (e2->flags & EDGE_FALLTHRU)
10436 {
10437 e2 = e1;
10438 e1 = ei_safe_edge (ei);
10439 }
10440
10441 if (!(e1->flags & EDGE_FALLTHRU))
10442 return;
10443
10444 tbegin_bb = (target == pc_rtx) ? e1->dest : e2->dest;
10445 }
10446 if (tmp == BB_END (bb))
10447 break;
10448 }
10449 }
10450
10451 if (XINT (SET_SRC (pat), 1) == UNSPECV_TEND)
10452 {
10453 if (tend_bb != NULL)
10454 return;
10455 tend_bb = bb;
10456 }
10457 }
10458 }
10459
10460 /* Either we successfully remove the FPR clobbers here or we are not
10461 able to do anything for this TX. Both cases don't qualify for
10462 another look. */
10463 cfun->machine->tbegin_p = false;
10464
10465 if (tbegin_bb == NULL || tend_bb == NULL)
10466 return;
10467
10468 calculate_dominance_info (CDI_POST_DOMINATORS);
10469 result = dominated_by_p (CDI_POST_DOMINATORS, tbegin_bb, tend_bb);
10470 free_dominance_info (CDI_POST_DOMINATORS);
10471
10472 if (!result)
10473 return;
10474
10475 PATTERN (tbegin_insn) = gen_rtx_PARALLEL (VOIDmode,
10476 gen_rtvec (2,
10477 XVECEXP (PATTERN (tbegin_insn), 0, 0),
10478 XVECEXP (PATTERN (tbegin_insn), 0, 1)));
10479 INSN_CODE (tbegin_insn) = -1;
10480 df_insn_rescan (tbegin_insn);
10481
10482 return;
10483 }
10484
10485 /* Implement TARGET_HARD_REGNO_NREGS. Because all registers in a class
10486 have the same size, this is equivalent to CLASS_MAX_NREGS. */
10487
10488 static unsigned int
s390_hard_regno_nregs(unsigned int regno,machine_mode mode)10489 s390_hard_regno_nregs (unsigned int regno, machine_mode mode)
10490 {
10491 return s390_class_max_nregs (REGNO_REG_CLASS (regno), mode);
10492 }
10493
10494 /* Implement TARGET_HARD_REGNO_MODE_OK.
10495
10496 Integer modes <= word size fit into any GPR.
10497 Integer modes > word size fit into successive GPRs, starting with
10498 an even-numbered register.
10499 SImode and DImode fit into FPRs as well.
10500
10501 Floating point modes <= word size fit into any FPR or GPR.
10502 Floating point modes > word size (i.e. DFmode on 32-bit) fit
10503 into any FPR, or an even-odd GPR pair.
10504 TFmode fits only into an even-odd FPR pair.
10505
10506 Complex floating point modes fit either into two FPRs, or into
10507 successive GPRs (again starting with an even number).
10508 TCmode fits only into two successive even-odd FPR pairs.
10509
10510 Condition code modes fit only into the CC register. */
10511
10512 static bool
s390_hard_regno_mode_ok(unsigned int regno,machine_mode mode)10513 s390_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
10514 {
10515 if (!TARGET_VX && VECTOR_NOFP_REGNO_P (regno))
10516 return false;
10517
10518 switch (REGNO_REG_CLASS (regno))
10519 {
10520 case VEC_REGS:
10521 return ((GET_MODE_CLASS (mode) == MODE_INT
10522 && s390_class_max_nregs (VEC_REGS, mode) == 1)
10523 || mode == DFmode
10524 || (TARGET_VXE && mode == SFmode)
10525 || s390_vector_mode_supported_p (mode));
10526 break;
10527 case FP_REGS:
10528 if (TARGET_VX
10529 && ((GET_MODE_CLASS (mode) == MODE_INT
10530 && s390_class_max_nregs (FP_REGS, mode) == 1)
10531 || mode == DFmode
10532 || s390_vector_mode_supported_p (mode)))
10533 return true;
10534
10535 if (REGNO_PAIR_OK (regno, mode))
10536 {
10537 if (mode == SImode || mode == DImode)
10538 return true;
10539
10540 if (FLOAT_MODE_P (mode) && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
10541 return true;
10542 }
10543 break;
10544 case ADDR_REGS:
10545 if (FRAME_REGNO_P (regno) && mode == Pmode)
10546 return true;
10547
10548 /* fallthrough */
10549 case GENERAL_REGS:
10550 if (REGNO_PAIR_OK (regno, mode))
10551 {
10552 if (TARGET_ZARCH
10553 || (mode != TFmode && mode != TCmode && mode != TDmode))
10554 return true;
10555 }
10556 break;
10557 case CC_REGS:
10558 if (GET_MODE_CLASS (mode) == MODE_CC)
10559 return true;
10560 break;
10561 case ACCESS_REGS:
10562 if (REGNO_PAIR_OK (regno, mode))
10563 {
10564 if (mode == SImode || mode == Pmode)
10565 return true;
10566 }
10567 break;
10568 default:
10569 return false;
10570 }
10571
10572 return false;
10573 }
10574
10575 /* Implement TARGET_MODES_TIEABLE_P. */
10576
10577 static bool
s390_modes_tieable_p(machine_mode mode1,machine_mode mode2)10578 s390_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10579 {
10580 return ((mode1 == SFmode || mode1 == DFmode)
10581 == (mode2 == SFmode || mode2 == DFmode));
10582 }
10583
10584 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
10585
10586 bool
s390_hard_regno_rename_ok(unsigned int old_reg,unsigned int new_reg)10587 s390_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg)
10588 {
10589 /* Once we've decided upon a register to use as base register, it must
10590 no longer be used for any other purpose. */
10591 if (cfun->machine->base_reg)
10592 if (REGNO (cfun->machine->base_reg) == old_reg
10593 || REGNO (cfun->machine->base_reg) == new_reg)
10594 return false;
10595
10596 /* Prevent regrename from using call-saved regs which haven't
10597 actually been saved. This is necessary since regrename assumes
10598 the backend save/restore decisions are based on
10599 df_regs_ever_live. Since we have our own routine we have to tell
10600 regrename manually about it. */
10601 if (GENERAL_REGNO_P (new_reg)
10602 && !call_really_used_regs[new_reg]
10603 && cfun_gpr_save_slot (new_reg) == SAVE_SLOT_NONE)
10604 return false;
10605
10606 return true;
10607 }
10608
10609 /* Return nonzero if register REGNO can be used as a scratch register
10610 in peephole2. */
10611
10612 static bool
s390_hard_regno_scratch_ok(unsigned int regno)10613 s390_hard_regno_scratch_ok (unsigned int regno)
10614 {
10615 /* See s390_hard_regno_rename_ok. */
10616 if (GENERAL_REGNO_P (regno)
10617 && !call_really_used_regs[regno]
10618 && cfun_gpr_save_slot (regno) == SAVE_SLOT_NONE)
10619 return false;
10620
10621 return true;
10622 }
10623
10624 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. When generating
10625 code that runs in z/Architecture mode, but conforms to the 31-bit
10626 ABI, GPRs can hold 8 bytes; the ABI guarantees only that the lower 4
10627 bytes are saved across calls, however. */
10628
10629 static bool
s390_hard_regno_call_part_clobbered(unsigned int regno,machine_mode mode)10630 s390_hard_regno_call_part_clobbered (unsigned int regno, machine_mode mode)
10631 {
10632 if (!TARGET_64BIT
10633 && TARGET_ZARCH
10634 && GET_MODE_SIZE (mode) > 4
10635 && ((regno >= 6 && regno <= 15) || regno == 32))
10636 return true;
10637
10638 if (TARGET_VX
10639 && GET_MODE_SIZE (mode) > 8
10640 && (((TARGET_64BIT && regno >= 24 && regno <= 31))
10641 || (!TARGET_64BIT && (regno == 18 || regno == 19))))
10642 return true;
10643
10644 return false;
10645 }
10646
10647 /* Maximum number of registers to represent a value of mode MODE
10648 in a register of class RCLASS. */
10649
10650 int
s390_class_max_nregs(enum reg_class rclass,machine_mode mode)10651 s390_class_max_nregs (enum reg_class rclass, machine_mode mode)
10652 {
10653 int reg_size;
10654 bool reg_pair_required_p = false;
10655
10656 switch (rclass)
10657 {
10658 case FP_REGS:
10659 case VEC_REGS:
10660 reg_size = TARGET_VX ? 16 : 8;
10661
10662 /* TF and TD modes would fit into a VR but we put them into a
10663 register pair since we do not have 128bit FP instructions on
10664 full VRs. */
10665 if (TARGET_VX
10666 && SCALAR_FLOAT_MODE_P (mode)
10667 && GET_MODE_SIZE (mode) >= 16)
10668 reg_pair_required_p = true;
10669
10670 /* Even if complex types would fit into a single FPR/VR we force
10671 them into a register pair to deal with the parts more easily.
10672 (FIXME: What about complex ints?) */
10673 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
10674 reg_pair_required_p = true;
10675 break;
10676 case ACCESS_REGS:
10677 reg_size = 4;
10678 break;
10679 default:
10680 reg_size = UNITS_PER_WORD;
10681 break;
10682 }
10683
10684 if (reg_pair_required_p)
10685 return 2 * ((GET_MODE_SIZE (mode) / 2 + reg_size - 1) / reg_size);
10686
10687 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
10688 }
10689
10690 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
10691
10692 static bool
s390_can_change_mode_class(machine_mode from_mode,machine_mode to_mode,reg_class_t rclass)10693 s390_can_change_mode_class (machine_mode from_mode,
10694 machine_mode to_mode,
10695 reg_class_t rclass)
10696 {
10697 machine_mode small_mode;
10698 machine_mode big_mode;
10699
10700 /* V1TF and TF have different representations in vector
10701 registers. */
10702 if (reg_classes_intersect_p (VEC_REGS, rclass)
10703 && ((from_mode == V1TFmode && to_mode == TFmode)
10704 || (from_mode == TFmode && to_mode == V1TFmode)))
10705 return false;
10706
10707 if (GET_MODE_SIZE (from_mode) == GET_MODE_SIZE (to_mode))
10708 return true;
10709
10710 if (GET_MODE_SIZE (from_mode) < GET_MODE_SIZE (to_mode))
10711 {
10712 small_mode = from_mode;
10713 big_mode = to_mode;
10714 }
10715 else
10716 {
10717 small_mode = to_mode;
10718 big_mode = from_mode;
10719 }
10720
10721 /* Values residing in VRs are little-endian style. All modes are
10722 placed left-aligned in an VR. This means that we cannot allow
10723 switching between modes with differing sizes. Also if the vector
10724 facility is available we still place TFmode values in VR register
10725 pairs, since the only instructions we have operating on TFmodes
10726 only deal with register pairs. Therefore we have to allow DFmode
10727 subregs of TFmodes to enable the TFmode splitters. */
10728 if (reg_classes_intersect_p (VEC_REGS, rclass)
10729 && (GET_MODE_SIZE (small_mode) < 8
10730 || s390_class_max_nregs (VEC_REGS, big_mode) == 1))
10731 return false;
10732
10733 /* Likewise for access registers, since they have only half the
10734 word size on 64-bit. */
10735 if (reg_classes_intersect_p (ACCESS_REGS, rclass))
10736 return false;
10737
10738 return true;
10739 }
10740
10741 /* Return true if we use LRA instead of reload pass. */
10742 static bool
s390_lra_p(void)10743 s390_lra_p (void)
10744 {
10745 return s390_lra_flag;
10746 }
10747
10748 /* Return true if register FROM can be eliminated via register TO. */
10749
10750 static bool
s390_can_eliminate(const int from,const int to)10751 s390_can_eliminate (const int from, const int to)
10752 {
10753 /* On zSeries machines, we have not marked the base register as fixed.
10754 Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
10755 If a function requires the base register, we say here that this
10756 elimination cannot be performed. This will cause reload to free
10757 up the base register (as if it were fixed). On the other hand,
10758 if the current function does *not* require the base register, we
10759 say here the elimination succeeds, which in turn allows reload
10760 to allocate the base register for any other purpose. */
10761 if (from == BASE_REGNUM && to == BASE_REGNUM)
10762 {
10763 if (TARGET_CPU_ZARCH)
10764 {
10765 s390_init_frame_layout ();
10766 return cfun->machine->base_reg == NULL_RTX;
10767 }
10768
10769 return false;
10770 }
10771
10772 /* Everything else must point into the stack frame. */
10773 gcc_assert (to == STACK_POINTER_REGNUM
10774 || to == HARD_FRAME_POINTER_REGNUM);
10775
10776 gcc_assert (from == FRAME_POINTER_REGNUM
10777 || from == ARG_POINTER_REGNUM
10778 || from == RETURN_ADDRESS_POINTER_REGNUM);
10779
10780 /* Make sure we actually saved the return address. */
10781 if (from == RETURN_ADDRESS_POINTER_REGNUM)
10782 if (!crtl->calls_eh_return
10783 && !cfun->stdarg
10784 && !cfun_frame_layout.save_return_addr_p)
10785 return false;
10786
10787 return true;
10788 }
10789
10790 /* Return offset between register FROM and TO initially after prolog. */
10791
10792 HOST_WIDE_INT
s390_initial_elimination_offset(int from,int to)10793 s390_initial_elimination_offset (int from, int to)
10794 {
10795 HOST_WIDE_INT offset;
10796
10797 /* ??? Why are we called for non-eliminable pairs? */
10798 if (!s390_can_eliminate (from, to))
10799 return 0;
10800
10801 switch (from)
10802 {
10803 case FRAME_POINTER_REGNUM:
10804 offset = (get_frame_size()
10805 + STACK_POINTER_OFFSET
10806 + crtl->outgoing_args_size);
10807 break;
10808
10809 case ARG_POINTER_REGNUM:
10810 s390_init_frame_layout ();
10811 offset = cfun_frame_layout.frame_size + STACK_POINTER_OFFSET;
10812 break;
10813
10814 case RETURN_ADDRESS_POINTER_REGNUM:
10815 s390_init_frame_layout ();
10816
10817 if (cfun_frame_layout.first_save_gpr_slot == -1)
10818 {
10819 /* If it turns out that for stdarg nothing went into the reg
10820 save area we also do not need the return address
10821 pointer. */
10822 if (cfun->stdarg && !cfun_save_arg_fprs_p)
10823 return 0;
10824
10825 gcc_unreachable ();
10826 }
10827
10828 /* In order to make the following work it is not necessary for
10829 r14 to have a save slot. It is sufficient if one other GPR
10830 got one. Since the GPRs are always stored without gaps we
10831 are able to calculate where the r14 save slot would
10832 reside. */
10833 offset = (cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset +
10834 (RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot) *
10835 UNITS_PER_LONG);
10836 break;
10837
10838 case BASE_REGNUM:
10839 offset = 0;
10840 break;
10841
10842 default:
10843 gcc_unreachable ();
10844 }
10845
10846 return offset;
10847 }
10848
10849 /* Emit insn to save fpr REGNUM at offset OFFSET relative
10850 to register BASE. Return generated insn. */
10851
10852 static rtx
save_fpr(rtx base,int offset,int regnum)10853 save_fpr (rtx base, int offset, int regnum)
10854 {
10855 rtx addr;
10856 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10857
10858 if (regnum >= 16 && regnum <= (16 + FP_ARG_NUM_REG))
10859 set_mem_alias_set (addr, get_varargs_alias_set ());
10860 else
10861 set_mem_alias_set (addr, get_frame_alias_set ());
10862
10863 return emit_move_insn (addr, gen_rtx_REG (DFmode, regnum));
10864 }
10865
10866 /* Emit insn to restore fpr REGNUM from offset OFFSET relative
10867 to register BASE. Return generated insn. */
10868
10869 static rtx
restore_fpr(rtx base,int offset,int regnum)10870 restore_fpr (rtx base, int offset, int regnum)
10871 {
10872 rtx addr;
10873 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10874 set_mem_alias_set (addr, get_frame_alias_set ());
10875
10876 return emit_move_insn (gen_rtx_REG (DFmode, regnum), addr);
10877 }
10878
10879 /* Return true if REGNO is a global register, but not one
10880 of the special ones that need to be saved/restored in anyway. */
10881
10882 static inline bool
global_not_special_regno_p(int regno)10883 global_not_special_regno_p (int regno)
10884 {
10885 return (global_regs[regno]
10886 /* These registers are special and need to be
10887 restored in any case. */
10888 && !(regno == STACK_POINTER_REGNUM
10889 || regno == RETURN_REGNUM
10890 || regno == BASE_REGNUM
10891 || (flag_pic && regno == (int)PIC_OFFSET_TABLE_REGNUM)));
10892 }
10893
10894 /* Generate insn to save registers FIRST to LAST into
10895 the register save area located at offset OFFSET
10896 relative to register BASE. */
10897
10898 static rtx
save_gprs(rtx base,int offset,int first,int last)10899 save_gprs (rtx base, int offset, int first, int last)
10900 {
10901 rtx addr, insn, note;
10902 int i;
10903
10904 addr = plus_constant (Pmode, base, offset);
10905 addr = gen_rtx_MEM (Pmode, addr);
10906
10907 set_mem_alias_set (addr, get_frame_alias_set ());
10908
10909 /* Special-case single register. */
10910 if (first == last)
10911 {
10912 if (TARGET_64BIT)
10913 insn = gen_movdi (addr, gen_rtx_REG (Pmode, first));
10914 else
10915 insn = gen_movsi (addr, gen_rtx_REG (Pmode, first));
10916
10917 if (!global_not_special_regno_p (first))
10918 RTX_FRAME_RELATED_P (insn) = 1;
10919 return insn;
10920 }
10921
10922
10923 insn = gen_store_multiple (addr,
10924 gen_rtx_REG (Pmode, first),
10925 GEN_INT (last - first + 1));
10926
10927 if (first <= 6 && cfun->stdarg)
10928 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
10929 {
10930 rtx mem = XEXP (XVECEXP (PATTERN (insn), 0, i), 0);
10931
10932 if (first + i <= 6)
10933 set_mem_alias_set (mem, get_varargs_alias_set ());
10934 }
10935
10936 /* We need to set the FRAME_RELATED flag on all SETs
10937 inside the store-multiple pattern.
10938
10939 However, we must not emit DWARF records for registers 2..5
10940 if they are stored for use by variable arguments ...
10941
10942 ??? Unfortunately, it is not enough to simply not the
10943 FRAME_RELATED flags for those SETs, because the first SET
10944 of the PARALLEL is always treated as if it had the flag
10945 set, even if it does not. Therefore we emit a new pattern
10946 without those registers as REG_FRAME_RELATED_EXPR note. */
10947
10948 if (first >= 6 && !global_not_special_regno_p (first))
10949 {
10950 rtx pat = PATTERN (insn);
10951
10952 for (i = 0; i < XVECLEN (pat, 0); i++)
10953 if (GET_CODE (XVECEXP (pat, 0, i)) == SET
10954 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat,
10955 0, i)))))
10956 RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1;
10957
10958 RTX_FRAME_RELATED_P (insn) = 1;
10959 }
10960 else if (last >= 6)
10961 {
10962 int start;
10963
10964 for (start = first >= 6 ? first : 6; start <= last; start++)
10965 if (!global_not_special_regno_p (start))
10966 break;
10967
10968 if (start > last)
10969 return insn;
10970
10971 addr = plus_constant (Pmode, base,
10972 offset + (start - first) * UNITS_PER_LONG);
10973
10974 if (start == last)
10975 {
10976 if (TARGET_64BIT)
10977 note = gen_movdi (gen_rtx_MEM (Pmode, addr),
10978 gen_rtx_REG (Pmode, start));
10979 else
10980 note = gen_movsi (gen_rtx_MEM (Pmode, addr),
10981 gen_rtx_REG (Pmode, start));
10982 note = PATTERN (note);
10983
10984 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10985 RTX_FRAME_RELATED_P (insn) = 1;
10986
10987 return insn;
10988 }
10989
10990 note = gen_store_multiple (gen_rtx_MEM (Pmode, addr),
10991 gen_rtx_REG (Pmode, start),
10992 GEN_INT (last - start + 1));
10993 note = PATTERN (note);
10994
10995 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10996
10997 for (i = 0; i < XVECLEN (note, 0); i++)
10998 if (GET_CODE (XVECEXP (note, 0, i)) == SET
10999 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note,
11000 0, i)))))
11001 RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1;
11002
11003 RTX_FRAME_RELATED_P (insn) = 1;
11004 }
11005
11006 return insn;
11007 }
11008
11009 /* Generate insn to restore registers FIRST to LAST from
11010 the register save area located at offset OFFSET
11011 relative to register BASE. */
11012
11013 static rtx
restore_gprs(rtx base,int offset,int first,int last)11014 restore_gprs (rtx base, int offset, int first, int last)
11015 {
11016 rtx addr, insn;
11017
11018 addr = plus_constant (Pmode, base, offset);
11019 addr = gen_rtx_MEM (Pmode, addr);
11020 set_mem_alias_set (addr, get_frame_alias_set ());
11021
11022 /* Special-case single register. */
11023 if (first == last)
11024 {
11025 if (TARGET_64BIT)
11026 insn = gen_movdi (gen_rtx_REG (Pmode, first), addr);
11027 else
11028 insn = gen_movsi (gen_rtx_REG (Pmode, first), addr);
11029
11030 RTX_FRAME_RELATED_P (insn) = 1;
11031 return insn;
11032 }
11033
11034 insn = gen_load_multiple (gen_rtx_REG (Pmode, first),
11035 addr,
11036 GEN_INT (last - first + 1));
11037 RTX_FRAME_RELATED_P (insn) = 1;
11038 return insn;
11039 }
11040
11041 /* Return insn sequence to load the GOT register. */
11042
11043 rtx_insn *
s390_load_got(void)11044 s390_load_got (void)
11045 {
11046 rtx_insn *insns;
11047
11048 /* We cannot use pic_offset_table_rtx here since we use this
11049 function also for non-pic if __tls_get_offset is called and in
11050 that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
11051 aren't usable. */
11052 rtx got_rtx = gen_rtx_REG (Pmode, 12);
11053
11054 start_sequence ();
11055
11056 if (TARGET_CPU_ZARCH)
11057 {
11058 emit_move_insn (got_rtx, s390_got_symbol ());
11059 }
11060 else
11061 {
11062 rtx offset;
11063
11064 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, s390_got_symbol ()),
11065 UNSPEC_LTREL_OFFSET);
11066 offset = gen_rtx_CONST (Pmode, offset);
11067 offset = force_const_mem (Pmode, offset);
11068
11069 emit_move_insn (got_rtx, offset);
11070
11071 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (offset, 0)),
11072 UNSPEC_LTREL_BASE);
11073 offset = gen_rtx_PLUS (Pmode, got_rtx, offset);
11074
11075 emit_move_insn (got_rtx, offset);
11076 }
11077
11078 insns = get_insns ();
11079 end_sequence ();
11080 return insns;
11081 }
11082
11083 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
11084 and the change to the stack pointer. */
11085
11086 static void
s390_emit_stack_tie(void)11087 s390_emit_stack_tie (void)
11088 {
11089 rtx mem = gen_frame_mem (BLKmode,
11090 gen_rtx_REG (Pmode, STACK_POINTER_REGNUM));
11091
11092 emit_insn (gen_stack_tie (mem));
11093 }
11094
11095 /* Copy GPRS into FPR save slots. */
11096
11097 static void
s390_save_gprs_to_fprs(void)11098 s390_save_gprs_to_fprs (void)
11099 {
11100 int i;
11101
11102 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
11103 return;
11104
11105 for (i = 6; i < 16; i++)
11106 {
11107 if (FP_REGNO_P (cfun_gpr_save_slot (i)))
11108 {
11109 rtx_insn *insn =
11110 emit_move_insn (gen_rtx_REG (DImode, cfun_gpr_save_slot (i)),
11111 gen_rtx_REG (DImode, i));
11112 RTX_FRAME_RELATED_P (insn) = 1;
11113 /* This prevents dwarf2cfi from interpreting the set. Doing
11114 so it might emit def_cfa_register infos setting an FPR as
11115 new CFA. */
11116 add_reg_note (insn, REG_CFA_REGISTER, copy_rtx (PATTERN (insn)));
11117 }
11118 }
11119 }
11120
11121 /* Restore GPRs from FPR save slots. */
11122
11123 static void
s390_restore_gprs_from_fprs(void)11124 s390_restore_gprs_from_fprs (void)
11125 {
11126 int i;
11127
11128 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
11129 return;
11130
11131 for (i = 6; i < 16; i++)
11132 {
11133 rtx_insn *insn;
11134
11135 if (!FP_REGNO_P (cfun_gpr_save_slot (i)))
11136 continue;
11137
11138 rtx fpr = gen_rtx_REG (DImode, cfun_gpr_save_slot (i));
11139
11140 if (i == STACK_POINTER_REGNUM)
11141 insn = emit_insn (gen_stack_restore_from_fpr (fpr));
11142 else
11143 insn = emit_move_insn (gen_rtx_REG (DImode, i), fpr);
11144
11145 df_set_regs_ever_live (i, true);
11146 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, i));
11147 if (i == STACK_POINTER_REGNUM)
11148 add_reg_note (insn, REG_CFA_DEF_CFA,
11149 plus_constant (Pmode, stack_pointer_rtx,
11150 STACK_POINTER_OFFSET));
11151 RTX_FRAME_RELATED_P (insn) = 1;
11152 }
11153 }
11154
11155
11156 /* A pass run immediately before shrink-wrapping and prologue and epilogue
11157 generation. */
11158
11159 namespace {
11160
11161 const pass_data pass_data_s390_early_mach =
11162 {
11163 RTL_PASS, /* type */
11164 "early_mach", /* name */
11165 OPTGROUP_NONE, /* optinfo_flags */
11166 TV_MACH_DEP, /* tv_id */
11167 0, /* properties_required */
11168 0, /* properties_provided */
11169 0, /* properties_destroyed */
11170 0, /* todo_flags_start */
11171 ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */
11172 };
11173
11174 class pass_s390_early_mach : public rtl_opt_pass
11175 {
11176 public:
pass_s390_early_mach(gcc::context * ctxt)11177 pass_s390_early_mach (gcc::context *ctxt)
11178 : rtl_opt_pass (pass_data_s390_early_mach, ctxt)
11179 {}
11180
11181 /* opt_pass methods: */
11182 virtual unsigned int execute (function *);
11183
11184 }; // class pass_s390_early_mach
11185
11186 unsigned int
execute(function * fun)11187 pass_s390_early_mach::execute (function *fun)
11188 {
11189 rtx_insn *insn;
11190
11191 /* Try to get rid of the FPR clobbers. */
11192 s390_optimize_nonescaping_tx ();
11193
11194 /* Re-compute register info. */
11195 s390_register_info ();
11196
11197 /* If we're using a base register, ensure that it is always valid for
11198 the first non-prologue instruction. */
11199 if (fun->machine->base_reg)
11200 emit_insn_at_entry (gen_main_pool (fun->machine->base_reg));
11201
11202 /* Annotate all constant pool references to let the scheduler know
11203 they implicitly use the base register. */
11204 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
11205 if (INSN_P (insn))
11206 {
11207 annotate_constant_pool_refs (&PATTERN (insn));
11208 df_insn_rescan (insn);
11209 }
11210 return 0;
11211 }
11212
11213 } // anon namespace
11214
11215 /* Calculate TARGET = REG + OFFSET as s390_emit_prologue would do it.
11216 - push too big immediates to the literal pool and annotate the refs
11217 - emit frame related notes for stack pointer changes. */
11218
11219 static rtx
s390_prologue_plus_offset(rtx target,rtx reg,rtx offset,bool frame_related_p)11220 s390_prologue_plus_offset (rtx target, rtx reg, rtx offset, bool frame_related_p)
11221 {
11222 rtx insn;
11223 rtx orig_offset = offset;
11224
11225 gcc_assert (REG_P (target));
11226 gcc_assert (REG_P (reg));
11227 gcc_assert (CONST_INT_P (offset));
11228
11229 if (offset == const0_rtx) /* lr/lgr */
11230 {
11231 insn = emit_move_insn (target, reg);
11232 }
11233 else if (DISP_IN_RANGE (INTVAL (offset))) /* la */
11234 {
11235 insn = emit_move_insn (target, gen_rtx_PLUS (Pmode, reg,
11236 offset));
11237 }
11238 else
11239 {
11240 if (!satisfies_constraint_K (offset) /* ahi/aghi */
11241 && (!TARGET_EXTIMM
11242 || (!satisfies_constraint_Op (offset) /* alfi/algfi */
11243 && !satisfies_constraint_On (offset)))) /* slfi/slgfi */
11244 offset = force_const_mem (Pmode, offset);
11245
11246 if (target != reg)
11247 {
11248 insn = emit_move_insn (target, reg);
11249 RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0;
11250 }
11251
11252 insn = emit_insn (gen_add2_insn (target, offset));
11253
11254 if (!CONST_INT_P (offset))
11255 {
11256 annotate_constant_pool_refs (&PATTERN (insn));
11257
11258 if (frame_related_p)
11259 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11260 gen_rtx_SET (target,
11261 gen_rtx_PLUS (Pmode, target,
11262 orig_offset)));
11263 }
11264 }
11265
11266 RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0;
11267
11268 /* If this is a stack adjustment and we are generating a stack clash
11269 prologue, then add a REG_STACK_CHECK note to signal that this insn
11270 should be left alone. */
11271 if (flag_stack_clash_protection && target == stack_pointer_rtx)
11272 add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
11273
11274 return insn;
11275 }
11276
11277 /* Emit a compare instruction with a volatile memory access as stack
11278 probe. It does not waste store tags and does not clobber any
11279 registers apart from the condition code. */
11280 static void
s390_emit_stack_probe(rtx addr)11281 s390_emit_stack_probe (rtx addr)
11282 {
11283 rtx tmp = gen_rtx_MEM (Pmode, addr);
11284 MEM_VOLATILE_P (tmp) = 1;
11285 s390_emit_compare (EQ, gen_rtx_REG (Pmode, 0), tmp);
11286 emit_insn (gen_blockage ());
11287 }
11288
11289 /* Use a runtime loop if we have to emit more probes than this. */
11290 #define MIN_UNROLL_PROBES 3
11291
11292 /* Allocate SIZE bytes of stack space, using TEMP_REG as a temporary
11293 if necessary. LAST_PROBE_OFFSET contains the offset of the closest
11294 probe relative to the stack pointer.
11295
11296 Note that SIZE is negative.
11297
11298 The return value is true if TEMP_REG has been clobbered. */
11299 static bool
allocate_stack_space(rtx size,HOST_WIDE_INT last_probe_offset,rtx temp_reg)11300 allocate_stack_space (rtx size, HOST_WIDE_INT last_probe_offset,
11301 rtx temp_reg)
11302 {
11303 bool temp_reg_clobbered_p = false;
11304 HOST_WIDE_INT probe_interval
11305 = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL);
11306 HOST_WIDE_INT guard_size
11307 = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE);
11308
11309 if (flag_stack_clash_protection)
11310 {
11311 if (last_probe_offset + -INTVAL (size) < guard_size)
11312 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
11313 else
11314 {
11315 rtx offset = GEN_INT (probe_interval - UNITS_PER_LONG);
11316 HOST_WIDE_INT rounded_size = -INTVAL (size) & -probe_interval;
11317 HOST_WIDE_INT num_probes = rounded_size / probe_interval;
11318 HOST_WIDE_INT residual = -INTVAL (size) - rounded_size;
11319
11320 if (num_probes < MIN_UNROLL_PROBES)
11321 {
11322 /* Emit unrolled probe statements. */
11323
11324 for (unsigned int i = 0; i < num_probes; i++)
11325 {
11326 s390_prologue_plus_offset (stack_pointer_rtx,
11327 stack_pointer_rtx,
11328 GEN_INT (-probe_interval), true);
11329 s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
11330 stack_pointer_rtx,
11331 offset));
11332 }
11333 dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
11334 }
11335 else
11336 {
11337 /* Emit a loop probing the pages. */
11338
11339 rtx_code_label *loop_start_label = gen_label_rtx ();
11340
11341 /* From now on temp_reg will be the CFA register. */
11342 s390_prologue_plus_offset (temp_reg, stack_pointer_rtx,
11343 GEN_INT (-rounded_size), true);
11344 emit_label (loop_start_label);
11345
11346 s390_prologue_plus_offset (stack_pointer_rtx,
11347 stack_pointer_rtx,
11348 GEN_INT (-probe_interval), false);
11349 s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
11350 stack_pointer_rtx,
11351 offset));
11352 emit_cmp_and_jump_insns (stack_pointer_rtx, temp_reg,
11353 GT, NULL_RTX,
11354 Pmode, 1, loop_start_label);
11355
11356 /* Without this make_edges ICEes. */
11357 JUMP_LABEL (get_last_insn ()) = loop_start_label;
11358 LABEL_NUSES (loop_start_label) = 1;
11359
11360 /* That's going to be a NOP since stack pointer and
11361 temp_reg are supposed to be the same here. We just
11362 emit it to set the CFA reg back to r15. */
11363 s390_prologue_plus_offset (stack_pointer_rtx, temp_reg,
11364 const0_rtx, true);
11365 temp_reg_clobbered_p = true;
11366 dump_stack_clash_frame_info (PROBE_LOOP, residual != 0);
11367 }
11368
11369 /* Handle any residual allocation request. */
11370 s390_prologue_plus_offset (stack_pointer_rtx,
11371 stack_pointer_rtx,
11372 GEN_INT (-residual), true);
11373 last_probe_offset += residual;
11374 if (last_probe_offset >= probe_interval)
11375 s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
11376 stack_pointer_rtx,
11377 GEN_INT (residual
11378 - UNITS_PER_LONG)));
11379
11380 return temp_reg_clobbered_p;
11381 }
11382 }
11383
11384 /* Subtract frame size from stack pointer. */
11385 s390_prologue_plus_offset (stack_pointer_rtx,
11386 stack_pointer_rtx,
11387 size, true);
11388
11389 return temp_reg_clobbered_p;
11390 }
11391
11392 /* Expand the prologue into a bunch of separate insns. */
11393
11394 void
s390_emit_prologue(void)11395 s390_emit_prologue (void)
11396 {
11397 rtx insn, addr;
11398 rtx temp_reg;
11399 int i;
11400 int offset;
11401 int next_fpr = 0;
11402
11403 /* Choose best register to use for temp use within prologue.
11404 TPF with profiling must avoid the register 14 - the tracing function
11405 needs the original contents of r14 to be preserved. */
11406
11407 if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM)
11408 && !crtl->is_leaf
11409 && !TARGET_TPF_PROFILING)
11410 temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
11411 else if (flag_split_stack && cfun->stdarg)
11412 temp_reg = gen_rtx_REG (Pmode, 12);
11413 else
11414 temp_reg = gen_rtx_REG (Pmode, 1);
11415
11416 /* When probing for stack-clash mitigation, we have to track the distance
11417 between the stack pointer and closest known reference.
11418
11419 Most of the time we have to make a worst case assumption. The
11420 only exception is when TARGET_BACKCHAIN is active, in which case
11421 we know *sp (offset 0) was written. */
11422 HOST_WIDE_INT probe_interval
11423 = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL);
11424 HOST_WIDE_INT last_probe_offset
11425 = (TARGET_BACKCHAIN
11426 ? (TARGET_PACKED_STACK ? STACK_POINTER_OFFSET - UNITS_PER_LONG : 0)
11427 : probe_interval - (STACK_BOUNDARY / UNITS_PER_WORD));
11428
11429 s390_save_gprs_to_fprs ();
11430
11431 /* Save call saved gprs. */
11432 if (cfun_frame_layout.first_save_gpr != -1)
11433 {
11434 insn = save_gprs (stack_pointer_rtx,
11435 cfun_frame_layout.gprs_offset +
11436 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
11437 - cfun_frame_layout.first_save_gpr_slot),
11438 cfun_frame_layout.first_save_gpr,
11439 cfun_frame_layout.last_save_gpr);
11440
11441 /* This is not 100% correct. If we have more than one register saved,
11442 then LAST_PROBE_OFFSET can move even closer to sp. */
11443 last_probe_offset
11444 = (cfun_frame_layout.gprs_offset +
11445 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
11446 - cfun_frame_layout.first_save_gpr_slot));
11447
11448 emit_insn (insn);
11449 }
11450
11451 /* Dummy insn to mark literal pool slot. */
11452
11453 if (cfun->machine->base_reg)
11454 emit_insn (gen_main_pool (cfun->machine->base_reg));
11455
11456 offset = cfun_frame_layout.f0_offset;
11457
11458 /* Save f0 and f2. */
11459 for (i = FPR0_REGNUM; i <= FPR0_REGNUM + 1; i++)
11460 {
11461 if (cfun_fpr_save_p (i))
11462 {
11463 save_fpr (stack_pointer_rtx, offset, i);
11464 if (offset < last_probe_offset)
11465 last_probe_offset = offset;
11466 offset += 8;
11467 }
11468 else if (!TARGET_PACKED_STACK || cfun->stdarg)
11469 offset += 8;
11470 }
11471
11472 /* Save f4 and f6. */
11473 offset = cfun_frame_layout.f4_offset;
11474 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
11475 {
11476 if (cfun_fpr_save_p (i))
11477 {
11478 insn = save_fpr (stack_pointer_rtx, offset, i);
11479 if (offset < last_probe_offset)
11480 last_probe_offset = offset;
11481 offset += 8;
11482
11483 /* If f4 and f6 are call clobbered they are saved due to
11484 stdargs and therefore are not frame related. */
11485 if (!call_really_used_regs[i])
11486 RTX_FRAME_RELATED_P (insn) = 1;
11487 }
11488 else if (!TARGET_PACKED_STACK || call_really_used_regs[i])
11489 offset += 8;
11490 }
11491
11492 if (TARGET_PACKED_STACK
11493 && cfun_save_high_fprs_p
11494 && cfun_frame_layout.f8_offset + cfun_frame_layout.high_fprs * 8 > 0)
11495 {
11496 offset = (cfun_frame_layout.f8_offset
11497 + (cfun_frame_layout.high_fprs - 1) * 8);
11498
11499 for (i = FPR15_REGNUM; i >= FPR8_REGNUM && offset >= 0; i--)
11500 if (cfun_fpr_save_p (i))
11501 {
11502 insn = save_fpr (stack_pointer_rtx, offset, i);
11503 if (offset < last_probe_offset)
11504 last_probe_offset = offset;
11505
11506 RTX_FRAME_RELATED_P (insn) = 1;
11507 offset -= 8;
11508 }
11509 if (offset >= cfun_frame_layout.f8_offset)
11510 next_fpr = i;
11511 }
11512
11513 if (!TARGET_PACKED_STACK)
11514 next_fpr = cfun_save_high_fprs_p ? FPR15_REGNUM : 0;
11515
11516 if (flag_stack_usage_info)
11517 current_function_static_stack_size = cfun_frame_layout.frame_size;
11518
11519 /* Decrement stack pointer. */
11520
11521 if (cfun_frame_layout.frame_size > 0)
11522 {
11523 rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
11524 rtx_insn *stack_pointer_backup_loc;
11525 bool temp_reg_clobbered_p;
11526
11527 if (s390_stack_size)
11528 {
11529 HOST_WIDE_INT stack_guard;
11530
11531 if (s390_stack_guard)
11532 stack_guard = s390_stack_guard;
11533 else
11534 {
11535 /* If no value for stack guard is provided the smallest power of 2
11536 larger than the current frame size is chosen. */
11537 stack_guard = 1;
11538 while (stack_guard < cfun_frame_layout.frame_size)
11539 stack_guard <<= 1;
11540 }
11541
11542 if (cfun_frame_layout.frame_size >= s390_stack_size)
11543 {
11544 warning (0, "frame size of function %qs is %wd"
11545 " bytes exceeding user provided stack limit of "
11546 "%d bytes. "
11547 "An unconditional trap is added.",
11548 current_function_name(), cfun_frame_layout.frame_size,
11549 s390_stack_size);
11550 emit_insn (gen_trap ());
11551 emit_barrier ();
11552 }
11553 else
11554 {
11555 /* stack_guard has to be smaller than s390_stack_size.
11556 Otherwise we would emit an AND with zero which would
11557 not match the test under mask pattern. */
11558 if (stack_guard >= s390_stack_size)
11559 {
11560 warning (0, "frame size of function %qs is %wd"
11561 " bytes which is more than half the stack size. "
11562 "The dynamic check would not be reliable. "
11563 "No check emitted for this function.",
11564 current_function_name(),
11565 cfun_frame_layout.frame_size);
11566 }
11567 else
11568 {
11569 HOST_WIDE_INT stack_check_mask = ((s390_stack_size - 1)
11570 & ~(stack_guard - 1));
11571
11572 rtx t = gen_rtx_AND (Pmode, stack_pointer_rtx,
11573 GEN_INT (stack_check_mask));
11574 if (TARGET_64BIT)
11575 emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode,
11576 t, const0_rtx),
11577 t, const0_rtx, const0_rtx));
11578 else
11579 emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode,
11580 t, const0_rtx),
11581 t, const0_rtx, const0_rtx));
11582 }
11583 }
11584 }
11585
11586 if (s390_warn_framesize > 0
11587 && cfun_frame_layout.frame_size >= s390_warn_framesize)
11588 warning (0, "frame size of %qs is %wd bytes",
11589 current_function_name (), cfun_frame_layout.frame_size);
11590
11591 if (s390_warn_dynamicstack_p && cfun->calls_alloca)
11592 warning (0, "%qs uses dynamic stack allocation", current_function_name ());
11593
11594 /* Save the location where we could backup the incoming stack
11595 pointer. */
11596 stack_pointer_backup_loc = get_last_insn ();
11597
11598 temp_reg_clobbered_p = allocate_stack_space (frame_off, last_probe_offset,
11599 temp_reg);
11600
11601 if (TARGET_BACKCHAIN || next_fpr)
11602 {
11603 if (temp_reg_clobbered_p)
11604 {
11605 /* allocate_stack_space had to make use of temp_reg and
11606 we need it to hold a backup of the incoming stack
11607 pointer. Calculate back that value from the current
11608 stack pointer. */
11609 s390_prologue_plus_offset (temp_reg, stack_pointer_rtx,
11610 GEN_INT (cfun_frame_layout.frame_size),
11611 false);
11612 }
11613 else
11614 {
11615 /* allocate_stack_space didn't actually required
11616 temp_reg. Insert the stack pointer backup insn
11617 before the stack pointer decrement code - knowing now
11618 that the value will survive. */
11619 emit_insn_after (gen_move_insn (temp_reg, stack_pointer_rtx),
11620 stack_pointer_backup_loc);
11621 }
11622 }
11623
11624 /* Set backchain. */
11625
11626 if (TARGET_BACKCHAIN)
11627 {
11628 if (cfun_frame_layout.backchain_offset)
11629 addr = gen_rtx_MEM (Pmode,
11630 plus_constant (Pmode, stack_pointer_rtx,
11631 cfun_frame_layout.backchain_offset));
11632 else
11633 addr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
11634 set_mem_alias_set (addr, get_frame_alias_set ());
11635 insn = emit_insn (gen_move_insn (addr, temp_reg));
11636 }
11637
11638 /* If we support non-call exceptions (e.g. for Java),
11639 we need to make sure the backchain pointer is set up
11640 before any possibly trapping memory access. */
11641 if (TARGET_BACKCHAIN && cfun->can_throw_non_call_exceptions)
11642 {
11643 addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
11644 emit_clobber (addr);
11645 }
11646 }
11647 else if (flag_stack_clash_protection)
11648 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
11649
11650 /* Save fprs 8 - 15 (64 bit ABI). */
11651
11652 if (cfun_save_high_fprs_p && next_fpr)
11653 {
11654 /* If the stack might be accessed through a different register
11655 we have to make sure that the stack pointer decrement is not
11656 moved below the use of the stack slots. */
11657 s390_emit_stack_tie ();
11658
11659 insn = emit_insn (gen_add2_insn (temp_reg,
11660 GEN_INT (cfun_frame_layout.f8_offset)));
11661
11662 offset = 0;
11663
11664 for (i = FPR8_REGNUM; i <= next_fpr; i++)
11665 if (cfun_fpr_save_p (i))
11666 {
11667 rtx addr = plus_constant (Pmode, stack_pointer_rtx,
11668 cfun_frame_layout.frame_size
11669 + cfun_frame_layout.f8_offset
11670 + offset);
11671
11672 insn = save_fpr (temp_reg, offset, i);
11673 offset += 8;
11674 RTX_FRAME_RELATED_P (insn) = 1;
11675 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11676 gen_rtx_SET (gen_rtx_MEM (DFmode, addr),
11677 gen_rtx_REG (DFmode, i)));
11678 }
11679 }
11680
11681 /* Set frame pointer, if needed. */
11682
11683 if (frame_pointer_needed)
11684 {
11685 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11686 RTX_FRAME_RELATED_P (insn) = 1;
11687 }
11688
11689 /* Set up got pointer, if needed. */
11690
11691 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
11692 {
11693 rtx_insn *insns = s390_load_got ();
11694
11695 for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn))
11696 annotate_constant_pool_refs (&PATTERN (insn));
11697
11698 emit_insn (insns);
11699 }
11700
11701 if (TARGET_TPF_PROFILING)
11702 {
11703 /* Generate a BAS instruction to serve as a function
11704 entry intercept to facilitate the use of tracing
11705 algorithms located at the branch target. */
11706 emit_insn (gen_prologue_tpf ());
11707
11708 /* Emit a blockage here so that all code
11709 lies between the profiling mechanisms. */
11710 emit_insn (gen_blockage ());
11711 }
11712 }
11713
11714 /* Expand the epilogue into a bunch of separate insns. */
11715
11716 void
s390_emit_epilogue(bool sibcall)11717 s390_emit_epilogue (bool sibcall)
11718 {
11719 rtx frame_pointer, return_reg, cfa_restores = NULL_RTX;
11720 int area_bottom, area_top, offset = 0;
11721 int next_offset;
11722 int i;
11723
11724 if (TARGET_TPF_PROFILING)
11725 {
11726
11727 /* Generate a BAS instruction to serve as a function
11728 entry intercept to facilitate the use of tracing
11729 algorithms located at the branch target. */
11730
11731 /* Emit a blockage here so that all code
11732 lies between the profiling mechanisms. */
11733 emit_insn (gen_blockage ());
11734
11735 emit_insn (gen_epilogue_tpf ());
11736 }
11737
11738 /* Check whether to use frame or stack pointer for restore. */
11739
11740 frame_pointer = (frame_pointer_needed
11741 ? hard_frame_pointer_rtx : stack_pointer_rtx);
11742
11743 s390_frame_area (&area_bottom, &area_top);
11744
11745 /* Check whether we can access the register save area.
11746 If not, increment the frame pointer as required. */
11747
11748 if (area_top <= area_bottom)
11749 {
11750 /* Nothing to restore. */
11751 }
11752 else if (DISP_IN_RANGE (cfun_frame_layout.frame_size + area_bottom)
11753 && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1))
11754 {
11755 /* Area is in range. */
11756 offset = cfun_frame_layout.frame_size;
11757 }
11758 else
11759 {
11760 rtx insn, frame_off, cfa;
11761
11762 offset = area_bottom < 0 ? -area_bottom : 0;
11763 frame_off = GEN_INT (cfun_frame_layout.frame_size - offset);
11764
11765 cfa = gen_rtx_SET (frame_pointer,
11766 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11767 if (DISP_IN_RANGE (INTVAL (frame_off)))
11768 {
11769 insn = gen_rtx_SET (frame_pointer,
11770 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11771 insn = emit_insn (insn);
11772 }
11773 else
11774 {
11775 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
11776 frame_off = force_const_mem (Pmode, frame_off);
11777
11778 insn = emit_insn (gen_add2_insn (frame_pointer, frame_off));
11779 annotate_constant_pool_refs (&PATTERN (insn));
11780 }
11781 add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa);
11782 RTX_FRAME_RELATED_P (insn) = 1;
11783 }
11784
11785 /* Restore call saved fprs. */
11786
11787 if (TARGET_64BIT)
11788 {
11789 if (cfun_save_high_fprs_p)
11790 {
11791 next_offset = cfun_frame_layout.f8_offset;
11792 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
11793 {
11794 if (cfun_fpr_save_p (i))
11795 {
11796 restore_fpr (frame_pointer,
11797 offset + next_offset, i);
11798 cfa_restores
11799 = alloc_reg_note (REG_CFA_RESTORE,
11800 gen_rtx_REG (DFmode, i), cfa_restores);
11801 next_offset += 8;
11802 }
11803 }
11804 }
11805
11806 }
11807 else
11808 {
11809 next_offset = cfun_frame_layout.f4_offset;
11810 /* f4, f6 */
11811 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
11812 {
11813 if (cfun_fpr_save_p (i))
11814 {
11815 restore_fpr (frame_pointer,
11816 offset + next_offset, i);
11817 cfa_restores
11818 = alloc_reg_note (REG_CFA_RESTORE,
11819 gen_rtx_REG (DFmode, i), cfa_restores);
11820 next_offset += 8;
11821 }
11822 else if (!TARGET_PACKED_STACK)
11823 next_offset += 8;
11824 }
11825
11826 }
11827
11828 /* Return register. */
11829
11830 return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
11831
11832 /* Restore call saved gprs. */
11833
11834 if (cfun_frame_layout.first_restore_gpr != -1)
11835 {
11836 rtx insn, addr;
11837 int i;
11838
11839 /* Check for global register and save them
11840 to stack location from where they get restored. */
11841
11842 for (i = cfun_frame_layout.first_restore_gpr;
11843 i <= cfun_frame_layout.last_restore_gpr;
11844 i++)
11845 {
11846 if (global_not_special_regno_p (i))
11847 {
11848 addr = plus_constant (Pmode, frame_pointer,
11849 offset + cfun_frame_layout.gprs_offset
11850 + (i - cfun_frame_layout.first_save_gpr_slot)
11851 * UNITS_PER_LONG);
11852 addr = gen_rtx_MEM (Pmode, addr);
11853 set_mem_alias_set (addr, get_frame_alias_set ());
11854 emit_move_insn (addr, gen_rtx_REG (Pmode, i));
11855 }
11856 else
11857 cfa_restores
11858 = alloc_reg_note (REG_CFA_RESTORE,
11859 gen_rtx_REG (Pmode, i), cfa_restores);
11860 }
11861
11862 /* Fetch return address from stack before load multiple,
11863 this will do good for scheduling.
11864
11865 Only do this if we already decided that r14 needs to be
11866 saved to a stack slot. (And not just because r14 happens to
11867 be in between two GPRs which need saving.) Otherwise it
11868 would be difficult to take that decision back in
11869 s390_optimize_prologue.
11870
11871 This optimization is only helpful on in-order machines. */
11872 if (! sibcall
11873 && cfun_gpr_save_slot (RETURN_REGNUM) == SAVE_SLOT_STACK
11874 && s390_tune <= PROCESSOR_2097_Z10)
11875 {
11876 int return_regnum = find_unused_clobbered_reg();
11877 if (!return_regnum
11878 || (TARGET_INDIRECT_BRANCH_NOBP_RET_OPTION
11879 && !TARGET_CPU_Z10
11880 && return_regnum == INDIRECT_BRANCH_THUNK_REGNUM))
11881 {
11882 gcc_assert (INDIRECT_BRANCH_THUNK_REGNUM != 4);
11883 return_regnum = 4;
11884 }
11885 return_reg = gen_rtx_REG (Pmode, return_regnum);
11886
11887 addr = plus_constant (Pmode, frame_pointer,
11888 offset + cfun_frame_layout.gprs_offset
11889 + (RETURN_REGNUM
11890 - cfun_frame_layout.first_save_gpr_slot)
11891 * UNITS_PER_LONG);
11892 addr = gen_rtx_MEM (Pmode, addr);
11893 set_mem_alias_set (addr, get_frame_alias_set ());
11894 emit_move_insn (return_reg, addr);
11895
11896 /* Once we did that optimization we have to make sure
11897 s390_optimize_prologue does not try to remove the store
11898 of r14 since we will not be able to find the load issued
11899 here. */
11900 cfun_frame_layout.save_return_addr_p = true;
11901 }
11902
11903 insn = restore_gprs (frame_pointer,
11904 offset + cfun_frame_layout.gprs_offset
11905 + (cfun_frame_layout.first_restore_gpr
11906 - cfun_frame_layout.first_save_gpr_slot)
11907 * UNITS_PER_LONG,
11908 cfun_frame_layout.first_restore_gpr,
11909 cfun_frame_layout.last_restore_gpr);
11910 insn = emit_insn (insn);
11911 REG_NOTES (insn) = cfa_restores;
11912 add_reg_note (insn, REG_CFA_DEF_CFA,
11913 plus_constant (Pmode, stack_pointer_rtx,
11914 STACK_POINTER_OFFSET));
11915 RTX_FRAME_RELATED_P (insn) = 1;
11916 }
11917
11918 s390_restore_gprs_from_fprs ();
11919
11920 if (! sibcall)
11921 emit_jump_insn (gen_return_use (return_reg));
11922 }
11923
11924 /* Implement TARGET_SET_UP_BY_PROLOGUE. */
11925
11926 static void
s300_set_up_by_prologue(hard_reg_set_container * regs)11927 s300_set_up_by_prologue (hard_reg_set_container *regs)
11928 {
11929 if (cfun->machine->base_reg
11930 && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
11931 SET_HARD_REG_BIT (regs->set, REGNO (cfun->machine->base_reg));
11932 }
11933
11934 /* -fsplit-stack support. */
11935
11936 /* A SYMBOL_REF for __morestack. */
11937 static GTY(()) rtx morestack_ref;
11938
11939 /* When using -fsplit-stack, the allocation routines set a field in
11940 the TCB to the bottom of the stack plus this much space, measured
11941 in bytes. */
11942
11943 #define SPLIT_STACK_AVAILABLE 1024
11944
11945 /* Emit -fsplit-stack prologue, which goes before the regular function
11946 prologue. */
11947
11948 void
s390_expand_split_stack_prologue(void)11949 s390_expand_split_stack_prologue (void)
11950 {
11951 rtx r1, guard, cc = NULL;
11952 rtx_insn *insn;
11953 /* Offset from thread pointer to __private_ss. */
11954 int psso = TARGET_64BIT ? 0x38 : 0x20;
11955 /* Pointer size in bytes. */
11956 /* Frame size and argument size - the two parameters to __morestack. */
11957 HOST_WIDE_INT frame_size = cfun_frame_layout.frame_size;
11958 /* Align argument size to 8 bytes - simplifies __morestack code. */
11959 HOST_WIDE_INT args_size = crtl->args.size >= 0
11960 ? ((crtl->args.size + 7) & ~7)
11961 : 0;
11962 /* Label to be called by __morestack. */
11963 rtx_code_label *call_done = NULL;
11964 rtx_code_label *parm_base = NULL;
11965 rtx tmp;
11966
11967 gcc_assert (flag_split_stack && reload_completed);
11968 if (!TARGET_CPU_ZARCH)
11969 {
11970 sorry ("CPUs older than z900 are not supported for -fsplit-stack");
11971 return;
11972 }
11973
11974 r1 = gen_rtx_REG (Pmode, 1);
11975
11976 /* If no stack frame will be allocated, don't do anything. */
11977 if (!frame_size)
11978 {
11979 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11980 {
11981 /* If va_start is used, just use r15. */
11982 emit_move_insn (r1,
11983 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11984 GEN_INT (STACK_POINTER_OFFSET)));
11985
11986 }
11987 return;
11988 }
11989
11990 if (morestack_ref == NULL_RTX)
11991 {
11992 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11993 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
11994 | SYMBOL_FLAG_FUNCTION);
11995 }
11996
11997 if (CONST_OK_FOR_K (frame_size) || CONST_OK_FOR_Op (frame_size))
11998 {
11999 /* If frame_size will fit in an add instruction, do a stack space
12000 check, and only call __morestack if there's not enough space. */
12001
12002 /* Get thread pointer. r1 is the only register we can always destroy - r0
12003 could contain a static chain (and cannot be used to address memory
12004 anyway), r2-r6 can contain parameters, and r6-r15 are callee-saved. */
12005 emit_move_insn (r1, gen_rtx_REG (Pmode, TP_REGNUM));
12006 /* Aim at __private_ss. */
12007 guard = gen_rtx_MEM (Pmode, plus_constant (Pmode, r1, psso));
12008
12009 /* If less that 1kiB used, skip addition and compare directly with
12010 __private_ss. */
12011 if (frame_size > SPLIT_STACK_AVAILABLE)
12012 {
12013 emit_move_insn (r1, guard);
12014 if (TARGET_64BIT)
12015 emit_insn (gen_adddi3 (r1, r1, GEN_INT (frame_size)));
12016 else
12017 emit_insn (gen_addsi3 (r1, r1, GEN_INT (frame_size)));
12018 guard = r1;
12019 }
12020
12021 /* Compare the (maybe adjusted) guard with the stack pointer. */
12022 cc = s390_emit_compare (LT, stack_pointer_rtx, guard);
12023 }
12024
12025 call_done = gen_label_rtx ();
12026 parm_base = gen_label_rtx ();
12027
12028 /* Emit the parameter block. */
12029 tmp = gen_split_stack_data (parm_base, call_done,
12030 GEN_INT (frame_size),
12031 GEN_INT (args_size));
12032 insn = emit_insn (tmp);
12033 add_reg_note (insn, REG_LABEL_OPERAND, call_done);
12034 LABEL_NUSES (call_done)++;
12035 add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
12036 LABEL_NUSES (parm_base)++;
12037
12038 /* %r1 = litbase. */
12039 insn = emit_move_insn (r1, gen_rtx_LABEL_REF (VOIDmode, parm_base));
12040 add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
12041 LABEL_NUSES (parm_base)++;
12042
12043 /* Now, we need to call __morestack. It has very special calling
12044 conventions: it preserves param/return/static chain registers for
12045 calling main function body, and looks for its own parameters at %r1. */
12046
12047 if (cc != NULL)
12048 {
12049 tmp = gen_split_stack_cond_call (morestack_ref, cc, call_done);
12050
12051 insn = emit_jump_insn (tmp);
12052 JUMP_LABEL (insn) = call_done;
12053 LABEL_NUSES (call_done)++;
12054
12055 /* Mark the jump as very unlikely to be taken. */
12056 add_reg_br_prob_note (insn,
12057 profile_probability::very_unlikely ());
12058
12059 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12060 {
12061 /* If va_start is used, and __morestack was not called, just use
12062 r15. */
12063 emit_move_insn (r1,
12064 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12065 GEN_INT (STACK_POINTER_OFFSET)));
12066 }
12067 }
12068 else
12069 {
12070 tmp = gen_split_stack_call (morestack_ref, call_done);
12071 insn = emit_jump_insn (tmp);
12072 JUMP_LABEL (insn) = call_done;
12073 LABEL_NUSES (call_done)++;
12074 emit_barrier ();
12075 }
12076
12077 /* __morestack will call us here. */
12078
12079 emit_label (call_done);
12080 }
12081
12082 /* We may have to tell the dataflow pass that the split stack prologue
12083 is initializing a register. */
12084
12085 static void
s390_live_on_entry(bitmap regs)12086 s390_live_on_entry (bitmap regs)
12087 {
12088 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12089 {
12090 gcc_assert (flag_split_stack);
12091 bitmap_set_bit (regs, 1);
12092 }
12093 }
12094
12095 /* Return true if the function can use simple_return to return outside
12096 of a shrink-wrapped region. At present shrink-wrapping is supported
12097 in all cases. */
12098
12099 bool
s390_can_use_simple_return_insn(void)12100 s390_can_use_simple_return_insn (void)
12101 {
12102 return true;
12103 }
12104
12105 /* Return true if the epilogue is guaranteed to contain only a return
12106 instruction and if a direct return can therefore be used instead.
12107 One of the main advantages of using direct return instructions
12108 is that we can then use conditional returns. */
12109
12110 bool
s390_can_use_return_insn(void)12111 s390_can_use_return_insn (void)
12112 {
12113 int i;
12114
12115 if (!reload_completed)
12116 return false;
12117
12118 if (crtl->profile)
12119 return false;
12120
12121 if (TARGET_TPF_PROFILING)
12122 return false;
12123
12124 for (i = 0; i < 16; i++)
12125 if (cfun_gpr_save_slot (i) != SAVE_SLOT_NONE)
12126 return false;
12127
12128 /* For 31 bit this is not covered by the frame_size check below
12129 since f4, f6 are saved in the register save area without needing
12130 additional stack space. */
12131 if (!TARGET_64BIT
12132 && (cfun_fpr_save_p (FPR4_REGNUM) || cfun_fpr_save_p (FPR6_REGNUM)))
12133 return false;
12134
12135 if (cfun->machine->base_reg
12136 && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
12137 return false;
12138
12139 return cfun_frame_layout.frame_size == 0;
12140 }
12141
12142 /* The VX ABI differs for vararg functions. Therefore we need the
12143 prototype of the callee to be available when passing vector type
12144 values. */
12145 static const char *
s390_invalid_arg_for_unprototyped_fn(const_tree typelist,const_tree funcdecl,const_tree val)12146 s390_invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
12147 {
12148 return ((TARGET_VX_ABI
12149 && typelist == 0
12150 && VECTOR_TYPE_P (TREE_TYPE (val))
12151 && (funcdecl == NULL_TREE
12152 || (TREE_CODE (funcdecl) == FUNCTION_DECL
12153 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
12154 ? N_("vector argument passed to unprototyped function")
12155 : NULL);
12156 }
12157
12158
12159 /* Return the size in bytes of a function argument of
12160 type TYPE and/or mode MODE. At least one of TYPE or
12161 MODE must be specified. */
12162
12163 static int
s390_function_arg_size(machine_mode mode,const_tree type)12164 s390_function_arg_size (machine_mode mode, const_tree type)
12165 {
12166 if (type)
12167 return int_size_in_bytes (type);
12168
12169 /* No type info available for some library calls ... */
12170 if (mode != BLKmode)
12171 return GET_MODE_SIZE (mode);
12172
12173 /* If we have neither type nor mode, abort */
12174 gcc_unreachable ();
12175 }
12176
12177 /* Return true if a function argument of type TYPE and mode MODE
12178 is to be passed in a vector register, if available. */
12179
12180 bool
s390_function_arg_vector(machine_mode mode,const_tree type)12181 s390_function_arg_vector (machine_mode mode, const_tree type)
12182 {
12183 if (!TARGET_VX_ABI)
12184 return false;
12185
12186 if (s390_function_arg_size (mode, type) > 16)
12187 return false;
12188
12189 /* No type info available for some library calls ... */
12190 if (!type)
12191 return VECTOR_MODE_P (mode);
12192
12193 /* The ABI says that record types with a single member are treated
12194 just like that member would be. */
12195 while (TREE_CODE (type) == RECORD_TYPE)
12196 {
12197 tree field, single = NULL_TREE;
12198
12199 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
12200 {
12201 if (TREE_CODE (field) != FIELD_DECL)
12202 continue;
12203
12204 if (single == NULL_TREE)
12205 single = TREE_TYPE (field);
12206 else
12207 return false;
12208 }
12209
12210 if (single == NULL_TREE)
12211 return false;
12212 else
12213 {
12214 /* If the field declaration adds extra byte due to
12215 e.g. padding this is not accepted as vector type. */
12216 if (int_size_in_bytes (single) <= 0
12217 || int_size_in_bytes (single) != int_size_in_bytes (type))
12218 return false;
12219 type = single;
12220 }
12221 }
12222
12223 return VECTOR_TYPE_P (type);
12224 }
12225
12226 /* Return true if a function argument of type TYPE and mode MODE
12227 is to be passed in a floating-point register, if available. */
12228
12229 static bool
s390_function_arg_float(machine_mode mode,const_tree type)12230 s390_function_arg_float (machine_mode mode, const_tree type)
12231 {
12232 if (s390_function_arg_size (mode, type) > 8)
12233 return false;
12234
12235 /* Soft-float changes the ABI: no floating-point registers are used. */
12236 if (TARGET_SOFT_FLOAT)
12237 return false;
12238
12239 /* No type info available for some library calls ... */
12240 if (!type)
12241 return mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode;
12242
12243 /* The ABI says that record types with a single member are treated
12244 just like that member would be. */
12245 while (TREE_CODE (type) == RECORD_TYPE)
12246 {
12247 tree field, single = NULL_TREE;
12248
12249 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
12250 {
12251 if (TREE_CODE (field) != FIELD_DECL)
12252 continue;
12253
12254 if (single == NULL_TREE)
12255 single = TREE_TYPE (field);
12256 else
12257 return false;
12258 }
12259
12260 if (single == NULL_TREE)
12261 return false;
12262 else
12263 type = single;
12264 }
12265
12266 return TREE_CODE (type) == REAL_TYPE;
12267 }
12268
12269 /* Return true if a function argument of type TYPE and mode MODE
12270 is to be passed in an integer register, or a pair of integer
12271 registers, if available. */
12272
12273 static bool
s390_function_arg_integer(machine_mode mode,const_tree type)12274 s390_function_arg_integer (machine_mode mode, const_tree type)
12275 {
12276 int size = s390_function_arg_size (mode, type);
12277 if (size > 8)
12278 return false;
12279
12280 /* No type info available for some library calls ... */
12281 if (!type)
12282 return GET_MODE_CLASS (mode) == MODE_INT
12283 || (TARGET_SOFT_FLOAT && SCALAR_FLOAT_MODE_P (mode));
12284
12285 /* We accept small integral (and similar) types. */
12286 if (INTEGRAL_TYPE_P (type)
12287 || POINTER_TYPE_P (type)
12288 || TREE_CODE (type) == NULLPTR_TYPE
12289 || TREE_CODE (type) == OFFSET_TYPE
12290 || (TARGET_SOFT_FLOAT && TREE_CODE (type) == REAL_TYPE))
12291 return true;
12292
12293 /* We also accept structs of size 1, 2, 4, 8 that are not
12294 passed in floating-point registers. */
12295 if (AGGREGATE_TYPE_P (type)
12296 && exact_log2 (size) >= 0
12297 && !s390_function_arg_float (mode, type))
12298 return true;
12299
12300 return false;
12301 }
12302
12303 /* Return 1 if a function argument of type TYPE and mode MODE
12304 is to be passed by reference. The ABI specifies that only
12305 structures of size 1, 2, 4, or 8 bytes are passed by value,
12306 all other structures (and complex numbers) are passed by
12307 reference. */
12308
12309 static bool
s390_pass_by_reference(cumulative_args_t ca ATTRIBUTE_UNUSED,machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)12310 s390_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
12311 machine_mode mode, const_tree type,
12312 bool named ATTRIBUTE_UNUSED)
12313 {
12314 int size = s390_function_arg_size (mode, type);
12315
12316 if (s390_function_arg_vector (mode, type))
12317 return false;
12318
12319 if (size > 8)
12320 return true;
12321
12322 if (type)
12323 {
12324 if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0)
12325 return true;
12326
12327 if (TREE_CODE (type) == COMPLEX_TYPE
12328 || TREE_CODE (type) == VECTOR_TYPE)
12329 return true;
12330 }
12331
12332 return false;
12333 }
12334
12335 /* Update the data in CUM to advance over an argument of mode MODE and
12336 data type TYPE. (TYPE is null for libcalls where that information
12337 may not be available.). The boolean NAMED specifies whether the
12338 argument is a named argument (as opposed to an unnamed argument
12339 matching an ellipsis). */
12340
12341 static void
s390_function_arg_advance(cumulative_args_t cum_v,machine_mode mode,const_tree type,bool named)12342 s390_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
12343 const_tree type, bool named)
12344 {
12345 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12346
12347 if (s390_function_arg_vector (mode, type))
12348 {
12349 /* We are called for unnamed vector stdarg arguments which are
12350 passed on the stack. In this case this hook does not have to
12351 do anything since stack arguments are tracked by common
12352 code. */
12353 if (!named)
12354 return;
12355 cum->vrs += 1;
12356 }
12357 else if (s390_function_arg_float (mode, type))
12358 {
12359 cum->fprs += 1;
12360 }
12361 else if (s390_function_arg_integer (mode, type))
12362 {
12363 int size = s390_function_arg_size (mode, type);
12364 cum->gprs += ((size + UNITS_PER_LONG - 1) / UNITS_PER_LONG);
12365 }
12366 else
12367 gcc_unreachable ();
12368 }
12369
12370 /* Define where to put the arguments to a function.
12371 Value is zero to push the argument on the stack,
12372 or a hard register in which to store the argument.
12373
12374 MODE is the argument's machine mode.
12375 TYPE is the data type of the argument (as a tree).
12376 This is null for libcalls where that information may
12377 not be available.
12378 CUM is a variable of type CUMULATIVE_ARGS which gives info about
12379 the preceding args and about the function being called.
12380 NAMED is nonzero if this argument is a named parameter
12381 (otherwise it is an extra parameter matching an ellipsis).
12382
12383 On S/390, we use general purpose registers 2 through 6 to
12384 pass integer, pointer, and certain structure arguments, and
12385 floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
12386 to pass floating point arguments. All remaining arguments
12387 are pushed to the stack. */
12388
12389 static rtx
s390_function_arg(cumulative_args_t cum_v,machine_mode mode,const_tree type,bool named)12390 s390_function_arg (cumulative_args_t cum_v, machine_mode mode,
12391 const_tree type, bool named)
12392 {
12393 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12394
12395 if (!named)
12396 s390_check_type_for_vector_abi (type, true, false);
12397
12398 if (s390_function_arg_vector (mode, type))
12399 {
12400 /* Vector arguments being part of the ellipsis are passed on the
12401 stack. */
12402 if (!named || (cum->vrs + 1 > VEC_ARG_NUM_REG))
12403 return NULL_RTX;
12404
12405 return gen_rtx_REG (mode, cum->vrs + FIRST_VEC_ARG_REGNO);
12406 }
12407 else if (s390_function_arg_float (mode, type))
12408 {
12409 if (cum->fprs + 1 > FP_ARG_NUM_REG)
12410 return NULL_RTX;
12411 else
12412 return gen_rtx_REG (mode, cum->fprs + 16);
12413 }
12414 else if (s390_function_arg_integer (mode, type))
12415 {
12416 int size = s390_function_arg_size (mode, type);
12417 int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
12418
12419 if (cum->gprs + n_gprs > GP_ARG_NUM_REG)
12420 return NULL_RTX;
12421 else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG)
12422 return gen_rtx_REG (mode, cum->gprs + 2);
12423 else if (n_gprs == 2)
12424 {
12425 rtvec p = rtvec_alloc (2);
12426
12427 RTVEC_ELT (p, 0)
12428 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 2),
12429 const0_rtx);
12430 RTVEC_ELT (p, 1)
12431 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 3),
12432 GEN_INT (4));
12433
12434 return gen_rtx_PARALLEL (mode, p);
12435 }
12436 }
12437
12438 /* After the real arguments, expand_call calls us once again
12439 with a void_type_node type. Whatever we return here is
12440 passed as operand 2 to the call expanders.
12441
12442 We don't need this feature ... */
12443 else if (type == void_type_node)
12444 return const0_rtx;
12445
12446 gcc_unreachable ();
12447 }
12448
12449 /* Implement TARGET_FUNCTION_ARG_BOUNDARY. Vector arguments are
12450 left-justified when placed on the stack during parameter passing. */
12451
12452 static pad_direction
s390_function_arg_padding(machine_mode mode,const_tree type)12453 s390_function_arg_padding (machine_mode mode, const_tree type)
12454 {
12455 if (s390_function_arg_vector (mode, type))
12456 return PAD_UPWARD;
12457
12458 return default_function_arg_padding (mode, type);
12459 }
12460
12461 /* Return true if return values of type TYPE should be returned
12462 in a memory buffer whose address is passed by the caller as
12463 hidden first argument. */
12464
12465 static bool
s390_return_in_memory(const_tree type,const_tree fundecl ATTRIBUTE_UNUSED)12466 s390_return_in_memory (const_tree type, const_tree fundecl ATTRIBUTE_UNUSED)
12467 {
12468 /* We accept small integral (and similar) types. */
12469 if (INTEGRAL_TYPE_P (type)
12470 || POINTER_TYPE_P (type)
12471 || TREE_CODE (type) == OFFSET_TYPE
12472 || TREE_CODE (type) == REAL_TYPE)
12473 return int_size_in_bytes (type) > 8;
12474
12475 /* vector types which fit into a VR. */
12476 if (TARGET_VX_ABI
12477 && VECTOR_TYPE_P (type)
12478 && int_size_in_bytes (type) <= 16)
12479 return false;
12480
12481 /* Aggregates and similar constructs are always returned
12482 in memory. */
12483 if (AGGREGATE_TYPE_P (type)
12484 || TREE_CODE (type) == COMPLEX_TYPE
12485 || VECTOR_TYPE_P (type))
12486 return true;
12487
12488 /* ??? We get called on all sorts of random stuff from
12489 aggregate_value_p. We can't abort, but it's not clear
12490 what's safe to return. Pretend it's a struct I guess. */
12491 return true;
12492 }
12493
12494 /* Function arguments and return values are promoted to word size. */
12495
12496 static machine_mode
s390_promote_function_mode(const_tree type,machine_mode mode,int * punsignedp,const_tree fntype ATTRIBUTE_UNUSED,int for_return ATTRIBUTE_UNUSED)12497 s390_promote_function_mode (const_tree type, machine_mode mode,
12498 int *punsignedp,
12499 const_tree fntype ATTRIBUTE_UNUSED,
12500 int for_return ATTRIBUTE_UNUSED)
12501 {
12502 if (INTEGRAL_MODE_P (mode)
12503 && GET_MODE_SIZE (mode) < UNITS_PER_LONG)
12504 {
12505 if (type != NULL_TREE && POINTER_TYPE_P (type))
12506 *punsignedp = POINTERS_EXTEND_UNSIGNED;
12507 return Pmode;
12508 }
12509
12510 return mode;
12511 }
12512
12513 /* Define where to return a (scalar) value of type RET_TYPE.
12514 If RET_TYPE is null, define where to return a (scalar)
12515 value of mode MODE from a libcall. */
12516
12517 static rtx
s390_function_and_libcall_value(machine_mode mode,const_tree ret_type,const_tree fntype_or_decl,bool outgoing ATTRIBUTE_UNUSED)12518 s390_function_and_libcall_value (machine_mode mode,
12519 const_tree ret_type,
12520 const_tree fntype_or_decl,
12521 bool outgoing ATTRIBUTE_UNUSED)
12522 {
12523 /* For vector return types it is important to use the RET_TYPE
12524 argument whenever available since the middle-end might have
12525 changed the mode to a scalar mode. */
12526 bool vector_ret_type_p = ((ret_type && VECTOR_TYPE_P (ret_type))
12527 || (!ret_type && VECTOR_MODE_P (mode)));
12528
12529 /* For normal functions perform the promotion as
12530 promote_function_mode would do. */
12531 if (ret_type)
12532 {
12533 int unsignedp = TYPE_UNSIGNED (ret_type);
12534 mode = promote_function_mode (ret_type, mode, &unsignedp,
12535 fntype_or_decl, 1);
12536 }
12537
12538 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
12539 || SCALAR_FLOAT_MODE_P (mode)
12540 || (TARGET_VX_ABI && vector_ret_type_p));
12541 gcc_assert (GET_MODE_SIZE (mode) <= (TARGET_VX_ABI ? 16 : 8));
12542
12543 if (TARGET_VX_ABI && vector_ret_type_p)
12544 return gen_rtx_REG (mode, FIRST_VEC_ARG_REGNO);
12545 else if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
12546 return gen_rtx_REG (mode, 16);
12547 else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG
12548 || UNITS_PER_LONG == UNITS_PER_WORD)
12549 return gen_rtx_REG (mode, 2);
12550 else if (GET_MODE_SIZE (mode) == 2 * UNITS_PER_LONG)
12551 {
12552 /* This case is triggered when returning a 64 bit value with
12553 -m31 -mzarch. Although the value would fit into a single
12554 register it has to be forced into a 32 bit register pair in
12555 order to match the ABI. */
12556 rtvec p = rtvec_alloc (2);
12557
12558 RTVEC_ELT (p, 0)
12559 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 2), const0_rtx);
12560 RTVEC_ELT (p, 1)
12561 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 3), GEN_INT (4));
12562
12563 return gen_rtx_PARALLEL (mode, p);
12564 }
12565
12566 gcc_unreachable ();
12567 }
12568
12569 /* Define where to return a scalar return value of type RET_TYPE. */
12570
12571 static rtx
s390_function_value(const_tree ret_type,const_tree fn_decl_or_type,bool outgoing)12572 s390_function_value (const_tree ret_type, const_tree fn_decl_or_type,
12573 bool outgoing)
12574 {
12575 return s390_function_and_libcall_value (TYPE_MODE (ret_type), ret_type,
12576 fn_decl_or_type, outgoing);
12577 }
12578
12579 /* Define where to return a scalar libcall return value of mode
12580 MODE. */
12581
12582 static rtx
s390_libcall_value(machine_mode mode,const_rtx fun ATTRIBUTE_UNUSED)12583 s390_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
12584 {
12585 return s390_function_and_libcall_value (mode, NULL_TREE,
12586 NULL_TREE, true);
12587 }
12588
12589
12590 /* Create and return the va_list datatype.
12591
12592 On S/390, va_list is an array type equivalent to
12593
12594 typedef struct __va_list_tag
12595 {
12596 long __gpr;
12597 long __fpr;
12598 void *__overflow_arg_area;
12599 void *__reg_save_area;
12600 } va_list[1];
12601
12602 where __gpr and __fpr hold the number of general purpose
12603 or floating point arguments used up to now, respectively,
12604 __overflow_arg_area points to the stack location of the
12605 next argument passed on the stack, and __reg_save_area
12606 always points to the start of the register area in the
12607 call frame of the current function. The function prologue
12608 saves all registers used for argument passing into this
12609 area if the function uses variable arguments. */
12610
12611 static tree
s390_build_builtin_va_list(void)12612 s390_build_builtin_va_list (void)
12613 {
12614 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
12615
12616 record = lang_hooks.types.make_type (RECORD_TYPE);
12617
12618 type_decl =
12619 build_decl (BUILTINS_LOCATION,
12620 TYPE_DECL, get_identifier ("__va_list_tag"), record);
12621
12622 f_gpr = build_decl (BUILTINS_LOCATION,
12623 FIELD_DECL, get_identifier ("__gpr"),
12624 long_integer_type_node);
12625 f_fpr = build_decl (BUILTINS_LOCATION,
12626 FIELD_DECL, get_identifier ("__fpr"),
12627 long_integer_type_node);
12628 f_ovf = build_decl (BUILTINS_LOCATION,
12629 FIELD_DECL, get_identifier ("__overflow_arg_area"),
12630 ptr_type_node);
12631 f_sav = build_decl (BUILTINS_LOCATION,
12632 FIELD_DECL, get_identifier ("__reg_save_area"),
12633 ptr_type_node);
12634
12635 va_list_gpr_counter_field = f_gpr;
12636 va_list_fpr_counter_field = f_fpr;
12637
12638 DECL_FIELD_CONTEXT (f_gpr) = record;
12639 DECL_FIELD_CONTEXT (f_fpr) = record;
12640 DECL_FIELD_CONTEXT (f_ovf) = record;
12641 DECL_FIELD_CONTEXT (f_sav) = record;
12642
12643 TYPE_STUB_DECL (record) = type_decl;
12644 TYPE_NAME (record) = type_decl;
12645 TYPE_FIELDS (record) = f_gpr;
12646 DECL_CHAIN (f_gpr) = f_fpr;
12647 DECL_CHAIN (f_fpr) = f_ovf;
12648 DECL_CHAIN (f_ovf) = f_sav;
12649
12650 layout_type (record);
12651
12652 /* The correct type is an array type of one element. */
12653 return build_array_type (record, build_index_type (size_zero_node));
12654 }
12655
12656 /* Implement va_start by filling the va_list structure VALIST.
12657 STDARG_P is always true, and ignored.
12658 NEXTARG points to the first anonymous stack argument.
12659
12660 The following global variables are used to initialize
12661 the va_list structure:
12662
12663 crtl->args.info:
12664 holds number of gprs and fprs used for named arguments.
12665 crtl->args.arg_offset_rtx:
12666 holds the offset of the first anonymous stack argument
12667 (relative to the virtual arg pointer). */
12668
12669 static void
s390_va_start(tree valist,rtx nextarg ATTRIBUTE_UNUSED)12670 s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
12671 {
12672 HOST_WIDE_INT n_gpr, n_fpr;
12673 int off;
12674 tree f_gpr, f_fpr, f_ovf, f_sav;
12675 tree gpr, fpr, ovf, sav, t;
12676
12677 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12678 f_fpr = DECL_CHAIN (f_gpr);
12679 f_ovf = DECL_CHAIN (f_fpr);
12680 f_sav = DECL_CHAIN (f_ovf);
12681
12682 valist = build_simple_mem_ref (valist);
12683 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12684 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
12685 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
12686 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
12687
12688 /* Count number of gp and fp argument registers used. */
12689
12690 n_gpr = crtl->args.info.gprs;
12691 n_fpr = crtl->args.info.fprs;
12692
12693 if (cfun->va_list_gpr_size)
12694 {
12695 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
12696 build_int_cst (NULL_TREE, n_gpr));
12697 TREE_SIDE_EFFECTS (t) = 1;
12698 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12699 }
12700
12701 if (cfun->va_list_fpr_size)
12702 {
12703 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
12704 build_int_cst (NULL_TREE, n_fpr));
12705 TREE_SIDE_EFFECTS (t) = 1;
12706 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12707 }
12708
12709 if (flag_split_stack
12710 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
12711 == NULL)
12712 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
12713 {
12714 rtx reg;
12715 rtx_insn *seq;
12716
12717 reg = gen_reg_rtx (Pmode);
12718 cfun->machine->split_stack_varargs_pointer = reg;
12719
12720 start_sequence ();
12721 emit_move_insn (reg, gen_rtx_REG (Pmode, 1));
12722 seq = get_insns ();
12723 end_sequence ();
12724
12725 push_topmost_sequence ();
12726 emit_insn_after (seq, entry_of_function ());
12727 pop_topmost_sequence ();
12728 }
12729
12730 /* Find the overflow area.
12731 FIXME: This currently is too pessimistic when the vector ABI is
12732 enabled. In that case we *always* set up the overflow area
12733 pointer. */
12734 if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG
12735 || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG
12736 || TARGET_VX_ABI)
12737 {
12738 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
12739 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
12740 else
12741 t = make_tree (TREE_TYPE (ovf), cfun->machine->split_stack_varargs_pointer);
12742
12743 off = INTVAL (crtl->args.arg_offset_rtx);
12744 off = off < 0 ? 0 : off;
12745 if (TARGET_DEBUG_ARG)
12746 fprintf (stderr, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
12747 (int)n_gpr, (int)n_fpr, off);
12748
12749 t = fold_build_pointer_plus_hwi (t, off);
12750
12751 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
12752 TREE_SIDE_EFFECTS (t) = 1;
12753 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12754 }
12755
12756 /* Find the register save area. */
12757 if ((cfun->va_list_gpr_size && n_gpr < GP_ARG_NUM_REG)
12758 || (cfun->va_list_fpr_size && n_fpr < FP_ARG_NUM_REG))
12759 {
12760 t = make_tree (TREE_TYPE (sav), return_address_pointer_rtx);
12761 t = fold_build_pointer_plus_hwi (t, -RETURN_REGNUM * UNITS_PER_LONG);
12762
12763 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
12764 TREE_SIDE_EFFECTS (t) = 1;
12765 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12766 }
12767 }
12768
12769 /* Implement va_arg by updating the va_list structure
12770 VALIST as required to retrieve an argument of type
12771 TYPE, and returning that argument.
12772
12773 Generates code equivalent to:
12774
12775 if (integral value) {
12776 if (size <= 4 && args.gpr < 5 ||
12777 size > 4 && args.gpr < 4 )
12778 ret = args.reg_save_area[args.gpr+8]
12779 else
12780 ret = *args.overflow_arg_area++;
12781 } else if (vector value) {
12782 ret = *args.overflow_arg_area;
12783 args.overflow_arg_area += size / 8;
12784 } else if (float value) {
12785 if (args.fgpr < 2)
12786 ret = args.reg_save_area[args.fpr+64]
12787 else
12788 ret = *args.overflow_arg_area++;
12789 } else if (aggregate value) {
12790 if (args.gpr < 5)
12791 ret = *args.reg_save_area[args.gpr]
12792 else
12793 ret = **args.overflow_arg_area++;
12794 } */
12795
12796 static tree
s390_gimplify_va_arg(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p ATTRIBUTE_UNUSED)12797 s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
12798 gimple_seq *post_p ATTRIBUTE_UNUSED)
12799 {
12800 tree f_gpr, f_fpr, f_ovf, f_sav;
12801 tree gpr, fpr, ovf, sav, reg, t, u;
12802 int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg;
12803 tree lab_false, lab_over = NULL_TREE;
12804 tree addr = create_tmp_var (ptr_type_node, "addr");
12805 bool left_align_p; /* How a value < UNITS_PER_LONG is aligned within
12806 a stack slot. */
12807
12808 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12809 f_fpr = DECL_CHAIN (f_gpr);
12810 f_ovf = DECL_CHAIN (f_fpr);
12811 f_sav = DECL_CHAIN (f_ovf);
12812
12813 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12814 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
12815 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
12816
12817 /* The tree for args* cannot be shared between gpr/fpr and ovf since
12818 both appear on a lhs. */
12819 valist = unshare_expr (valist);
12820 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
12821
12822 size = int_size_in_bytes (type);
12823
12824 s390_check_type_for_vector_abi (type, true, false);
12825
12826 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
12827 {
12828 if (TARGET_DEBUG_ARG)
12829 {
12830 fprintf (stderr, "va_arg: aggregate type");
12831 debug_tree (type);
12832 }
12833
12834 /* Aggregates are passed by reference. */
12835 indirect_p = 1;
12836 reg = gpr;
12837 n_reg = 1;
12838
12839 /* kernel stack layout on 31 bit: It is assumed here that no padding
12840 will be added by s390_frame_info because for va_args always an even
12841 number of gprs has to be saved r15-r2 = 14 regs. */
12842 sav_ofs = 2 * UNITS_PER_LONG;
12843 sav_scale = UNITS_PER_LONG;
12844 size = UNITS_PER_LONG;
12845 max_reg = GP_ARG_NUM_REG - n_reg;
12846 left_align_p = false;
12847 }
12848 else if (s390_function_arg_vector (TYPE_MODE (type), type))
12849 {
12850 if (TARGET_DEBUG_ARG)
12851 {
12852 fprintf (stderr, "va_arg: vector type");
12853 debug_tree (type);
12854 }
12855
12856 indirect_p = 0;
12857 reg = NULL_TREE;
12858 n_reg = 0;
12859 sav_ofs = 0;
12860 sav_scale = 8;
12861 max_reg = 0;
12862 left_align_p = true;
12863 }
12864 else if (s390_function_arg_float (TYPE_MODE (type), type))
12865 {
12866 if (TARGET_DEBUG_ARG)
12867 {
12868 fprintf (stderr, "va_arg: float type");
12869 debug_tree (type);
12870 }
12871
12872 /* FP args go in FP registers, if present. */
12873 indirect_p = 0;
12874 reg = fpr;
12875 n_reg = 1;
12876 sav_ofs = 16 * UNITS_PER_LONG;
12877 sav_scale = 8;
12878 max_reg = FP_ARG_NUM_REG - n_reg;
12879 left_align_p = false;
12880 }
12881 else
12882 {
12883 if (TARGET_DEBUG_ARG)
12884 {
12885 fprintf (stderr, "va_arg: other type");
12886 debug_tree (type);
12887 }
12888
12889 /* Otherwise into GP registers. */
12890 indirect_p = 0;
12891 reg = gpr;
12892 n_reg = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
12893
12894 /* kernel stack layout on 31 bit: It is assumed here that no padding
12895 will be added by s390_frame_info because for va_args always an even
12896 number of gprs has to be saved r15-r2 = 14 regs. */
12897 sav_ofs = 2 * UNITS_PER_LONG;
12898
12899 if (size < UNITS_PER_LONG)
12900 sav_ofs += UNITS_PER_LONG - size;
12901
12902 sav_scale = UNITS_PER_LONG;
12903 max_reg = GP_ARG_NUM_REG - n_reg;
12904 left_align_p = false;
12905 }
12906
12907 /* Pull the value out of the saved registers ... */
12908
12909 if (reg != NULL_TREE)
12910 {
12911 /*
12912 if (reg > ((typeof (reg))max_reg))
12913 goto lab_false;
12914
12915 addr = sav + sav_ofs + reg * save_scale;
12916
12917 goto lab_over;
12918
12919 lab_false:
12920 */
12921
12922 lab_false = create_artificial_label (UNKNOWN_LOCATION);
12923 lab_over = create_artificial_label (UNKNOWN_LOCATION);
12924
12925 t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
12926 t = build2 (GT_EXPR, boolean_type_node, reg, t);
12927 u = build1 (GOTO_EXPR, void_type_node, lab_false);
12928 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
12929 gimplify_and_add (t, pre_p);
12930
12931 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
12932 u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
12933 fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
12934 t = fold_build_pointer_plus (t, u);
12935
12936 gimplify_assign (addr, t, pre_p);
12937
12938 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
12939
12940 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
12941 }
12942
12943 /* ... Otherwise out of the overflow area. */
12944
12945 t = ovf;
12946 if (size < UNITS_PER_LONG && !left_align_p)
12947 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size);
12948
12949 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
12950
12951 gimplify_assign (addr, t, pre_p);
12952
12953 if (size < UNITS_PER_LONG && left_align_p)
12954 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG);
12955 else
12956 t = fold_build_pointer_plus_hwi (t, size);
12957
12958 gimplify_assign (ovf, t, pre_p);
12959
12960 if (reg != NULL_TREE)
12961 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
12962
12963
12964 /* Increment register save count. */
12965
12966 if (n_reg > 0)
12967 {
12968 u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
12969 fold_convert (TREE_TYPE (reg), size_int (n_reg)));
12970 gimplify_and_add (u, pre_p);
12971 }
12972
12973 if (indirect_p)
12974 {
12975 t = build_pointer_type_for_mode (build_pointer_type (type),
12976 ptr_mode, true);
12977 addr = fold_convert (t, addr);
12978 addr = build_va_arg_indirect_ref (addr);
12979 }
12980 else
12981 {
12982 t = build_pointer_type_for_mode (type, ptr_mode, true);
12983 addr = fold_convert (t, addr);
12984 }
12985
12986 return build_va_arg_indirect_ref (addr);
12987 }
12988
12989 /* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX)
12990 expanders.
12991 DEST - Register location where CC will be stored.
12992 TDB - Pointer to a 256 byte area where to store the transaction.
12993 diagnostic block. NULL if TDB is not needed.
12994 RETRY - Retry count value. If non-NULL a retry loop for CC2
12995 is emitted
12996 CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
12997 of the tbegin instruction pattern. */
12998
12999 void
s390_expand_tbegin(rtx dest,rtx tdb,rtx retry,bool clobber_fprs_p)13000 s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
13001 {
13002 rtx retry_plus_two = gen_reg_rtx (SImode);
13003 rtx retry_reg = gen_reg_rtx (SImode);
13004 rtx_code_label *retry_label = NULL;
13005
13006 if (retry != NULL_RTX)
13007 {
13008 emit_move_insn (retry_reg, retry);
13009 emit_insn (gen_addsi3 (retry_plus_two, retry_reg, const2_rtx));
13010 emit_insn (gen_addsi3 (retry_reg, retry_reg, const1_rtx));
13011 retry_label = gen_label_rtx ();
13012 emit_label (retry_label);
13013 }
13014
13015 if (clobber_fprs_p)
13016 {
13017 if (TARGET_VX)
13018 emit_insn (gen_tbegin_1_z13 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
13019 tdb));
13020 else
13021 emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
13022 tdb));
13023 }
13024 else
13025 emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
13026 tdb));
13027
13028 emit_move_insn (dest, gen_rtx_UNSPEC (SImode,
13029 gen_rtvec (1, gen_rtx_REG (CCRAWmode,
13030 CC_REGNUM)),
13031 UNSPEC_CC_TO_INT));
13032 if (retry != NULL_RTX)
13033 {
13034 const int CC0 = 1 << 3;
13035 const int CC1 = 1 << 2;
13036 const int CC3 = 1 << 0;
13037 rtx jump;
13038 rtx count = gen_reg_rtx (SImode);
13039 rtx_code_label *leave_label = gen_label_rtx ();
13040
13041 /* Exit for success and permanent failures. */
13042 jump = s390_emit_jump (leave_label,
13043 gen_rtx_EQ (VOIDmode,
13044 gen_rtx_REG (CCRAWmode, CC_REGNUM),
13045 gen_rtx_CONST_INT (VOIDmode, CC0 | CC1 | CC3)));
13046 LABEL_NUSES (leave_label) = 1;
13047
13048 /* CC2 - transient failure. Perform retry with ppa. */
13049 emit_move_insn (count, retry_plus_two);
13050 emit_insn (gen_subsi3 (count, count, retry_reg));
13051 emit_insn (gen_tx_assist (count));
13052 jump = emit_jump_insn (gen_doloop_si64 (retry_label,
13053 retry_reg,
13054 retry_reg));
13055 JUMP_LABEL (jump) = retry_label;
13056 LABEL_NUSES (retry_label) = 1;
13057 emit_label (leave_label);
13058 }
13059 }
13060
13061
13062 /* Return the decl for the target specific builtin with the function
13063 code FCODE. */
13064
13065 static tree
s390_builtin_decl(unsigned fcode,bool initialized_p ATTRIBUTE_UNUSED)13066 s390_builtin_decl (unsigned fcode, bool initialized_p ATTRIBUTE_UNUSED)
13067 {
13068 if (fcode >= S390_BUILTIN_MAX)
13069 return error_mark_node;
13070
13071 return s390_builtin_decls[fcode];
13072 }
13073
13074 /* We call mcount before the function prologue. So a profiled leaf
13075 function should stay a leaf function. */
13076
13077 static bool
s390_keep_leaf_when_profiled()13078 s390_keep_leaf_when_profiled ()
13079 {
13080 return true;
13081 }
13082
13083 /* Output assembly code for the trampoline template to
13084 stdio stream FILE.
13085
13086 On S/390, we use gpr 1 internally in the trampoline code;
13087 gpr 0 is used to hold the static chain. */
13088
13089 static void
s390_asm_trampoline_template(FILE * file)13090 s390_asm_trampoline_template (FILE *file)
13091 {
13092 rtx op[2];
13093 op[0] = gen_rtx_REG (Pmode, 0);
13094 op[1] = gen_rtx_REG (Pmode, 1);
13095
13096 if (TARGET_64BIT)
13097 {
13098 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
13099 output_asm_insn ("lmg\t%0,%1,14(%1)", op); /* 6 byte */
13100 output_asm_insn ("br\t%1", op); /* 2 byte */
13101 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10));
13102 }
13103 else
13104 {
13105 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
13106 output_asm_insn ("lm\t%0,%1,6(%1)", op); /* 4 byte */
13107 output_asm_insn ("br\t%1", op); /* 2 byte */
13108 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8));
13109 }
13110 }
13111
13112 /* Emit RTL insns to initialize the variable parts of a trampoline.
13113 FNADDR is an RTX for the address of the function's pure code.
13114 CXT is an RTX for the static chain value for the function. */
13115
13116 static void
s390_trampoline_init(rtx m_tramp,tree fndecl,rtx cxt)13117 s390_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
13118 {
13119 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
13120 rtx mem;
13121
13122 emit_block_move (m_tramp, assemble_trampoline_template (),
13123 GEN_INT (2 * UNITS_PER_LONG), BLOCK_OP_NORMAL);
13124
13125 mem = adjust_address (m_tramp, Pmode, 2 * UNITS_PER_LONG);
13126 emit_move_insn (mem, cxt);
13127 mem = adjust_address (m_tramp, Pmode, 3 * UNITS_PER_LONG);
13128 emit_move_insn (mem, fnaddr);
13129 }
13130
13131 /* Output assembler code to FILE to increment profiler label # LABELNO
13132 for profiling a function entry. */
13133
13134 void
s390_function_profiler(FILE * file,int labelno)13135 s390_function_profiler (FILE *file, int labelno)
13136 {
13137 rtx op[7];
13138
13139 char label[128];
13140 ASM_GENERATE_INTERNAL_LABEL (label, "LP", labelno);
13141
13142 fprintf (file, "# function profiler \n");
13143
13144 op[0] = gen_rtx_REG (Pmode, RETURN_REGNUM);
13145 op[1] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
13146 op[1] = gen_rtx_MEM (Pmode, plus_constant (Pmode, op[1], UNITS_PER_LONG));
13147
13148 op[2] = gen_rtx_REG (Pmode, 1);
13149 op[3] = gen_rtx_SYMBOL_REF (Pmode, label);
13150 SYMBOL_REF_FLAGS (op[3]) = SYMBOL_FLAG_LOCAL;
13151
13152 op[4] = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
13153 if (flag_pic)
13154 {
13155 op[4] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[4]), UNSPEC_PLT);
13156 op[4] = gen_rtx_CONST (Pmode, op[4]);
13157 }
13158
13159 if (TARGET_64BIT)
13160 {
13161 output_asm_insn ("stg\t%0,%1", op);
13162 output_asm_insn ("larl\t%2,%3", op);
13163 output_asm_insn ("brasl\t%0,%4", op);
13164 output_asm_insn ("lg\t%0,%1", op);
13165 }
13166 else if (TARGET_CPU_ZARCH)
13167 {
13168 output_asm_insn ("st\t%0,%1", op);
13169 output_asm_insn ("larl\t%2,%3", op);
13170 output_asm_insn ("brasl\t%0,%4", op);
13171 output_asm_insn ("l\t%0,%1", op);
13172 }
13173 else if (!flag_pic)
13174 {
13175 op[6] = gen_label_rtx ();
13176
13177 output_asm_insn ("st\t%0,%1", op);
13178 output_asm_insn ("bras\t%2,%l6", op);
13179 output_asm_insn (".long\t%4", op);
13180 output_asm_insn (".long\t%3", op);
13181 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
13182 output_asm_insn ("l\t%0,0(%2)", op);
13183 output_asm_insn ("l\t%2,4(%2)", op);
13184 output_asm_insn ("basr\t%0,%0", op);
13185 output_asm_insn ("l\t%0,%1", op);
13186 }
13187 else
13188 {
13189 op[5] = gen_label_rtx ();
13190 op[6] = gen_label_rtx ();
13191
13192 output_asm_insn ("st\t%0,%1", op);
13193 output_asm_insn ("bras\t%2,%l6", op);
13194 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[5]));
13195 output_asm_insn (".long\t%4-%l5", op);
13196 output_asm_insn (".long\t%3-%l5", op);
13197 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
13198 output_asm_insn ("lr\t%0,%2", op);
13199 output_asm_insn ("a\t%0,0(%2)", op);
13200 output_asm_insn ("a\t%2,4(%2)", op);
13201 output_asm_insn ("basr\t%0,%0", op);
13202 output_asm_insn ("l\t%0,%1", op);
13203 }
13204 }
13205
13206 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
13207 into its SYMBOL_REF_FLAGS. */
13208
13209 static void
s390_encode_section_info(tree decl,rtx rtl,int first)13210 s390_encode_section_info (tree decl, rtx rtl, int first)
13211 {
13212 default_encode_section_info (decl, rtl, first);
13213
13214 if (TREE_CODE (decl) == VAR_DECL)
13215 {
13216 /* Store the alignment to be able to check if we can use
13217 a larl/load-relative instruction. We only handle the cases
13218 that can go wrong (i.e. no FUNC_DECLs). */
13219 if (DECL_ALIGN (decl) == 0 || DECL_ALIGN (decl) % 16)
13220 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
13221 else if (DECL_ALIGN (decl) % 32)
13222 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
13223 else if (DECL_ALIGN (decl) % 64)
13224 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
13225 }
13226
13227 /* Literal pool references don't have a decl so they are handled
13228 differently here. We rely on the information in the MEM_ALIGN
13229 entry to decide upon the alignment. */
13230 if (MEM_P (rtl)
13231 && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF
13232 && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0)))
13233 {
13234 if (MEM_ALIGN (rtl) == 0 || MEM_ALIGN (rtl) % 16)
13235 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
13236 else if (MEM_ALIGN (rtl) % 32)
13237 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
13238 else if (MEM_ALIGN (rtl) % 64)
13239 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
13240 }
13241 }
13242
13243 /* Output thunk to FILE that implements a C++ virtual function call (with
13244 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
13245 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
13246 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
13247 relative to the resulting this pointer. */
13248
13249 static void
s390_output_mi_thunk(FILE * file,tree thunk ATTRIBUTE_UNUSED,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)13250 s390_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
13251 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
13252 tree function)
13253 {
13254 rtx op[10];
13255 int nonlocal = 0;
13256
13257 /* Make sure unwind info is emitted for the thunk if needed. */
13258 final_start_function (emit_barrier (), file, 1);
13259
13260 /* Operand 0 is the target function. */
13261 op[0] = XEXP (DECL_RTL (function), 0);
13262 if (flag_pic && !SYMBOL_REF_LOCAL_P (op[0]))
13263 {
13264 nonlocal = 1;
13265 op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]),
13266 TARGET_64BIT ? UNSPEC_PLT : UNSPEC_GOT);
13267 op[0] = gen_rtx_CONST (Pmode, op[0]);
13268 }
13269
13270 /* Operand 1 is the 'this' pointer. */
13271 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
13272 op[1] = gen_rtx_REG (Pmode, 3);
13273 else
13274 op[1] = gen_rtx_REG (Pmode, 2);
13275
13276 /* Operand 2 is the delta. */
13277 op[2] = GEN_INT (delta);
13278
13279 /* Operand 3 is the vcall_offset. */
13280 op[3] = GEN_INT (vcall_offset);
13281
13282 /* Operand 4 is the temporary register. */
13283 op[4] = gen_rtx_REG (Pmode, 1);
13284
13285 /* Operands 5 to 8 can be used as labels. */
13286 op[5] = NULL_RTX;
13287 op[6] = NULL_RTX;
13288 op[7] = NULL_RTX;
13289 op[8] = NULL_RTX;
13290
13291 /* Operand 9 can be used for temporary register. */
13292 op[9] = NULL_RTX;
13293
13294 /* Generate code. */
13295 if (TARGET_64BIT)
13296 {
13297 /* Setup literal pool pointer if required. */
13298 if ((!DISP_IN_RANGE (delta)
13299 && !CONST_OK_FOR_K (delta)
13300 && !CONST_OK_FOR_Os (delta))
13301 || (!DISP_IN_RANGE (vcall_offset)
13302 && !CONST_OK_FOR_K (vcall_offset)
13303 && !CONST_OK_FOR_Os (vcall_offset)))
13304 {
13305 op[5] = gen_label_rtx ();
13306 output_asm_insn ("larl\t%4,%5", op);
13307 }
13308
13309 /* Add DELTA to this pointer. */
13310 if (delta)
13311 {
13312 if (CONST_OK_FOR_J (delta))
13313 output_asm_insn ("la\t%1,%2(%1)", op);
13314 else if (DISP_IN_RANGE (delta))
13315 output_asm_insn ("lay\t%1,%2(%1)", op);
13316 else if (CONST_OK_FOR_K (delta))
13317 output_asm_insn ("aghi\t%1,%2", op);
13318 else if (CONST_OK_FOR_Os (delta))
13319 output_asm_insn ("agfi\t%1,%2", op);
13320 else
13321 {
13322 op[6] = gen_label_rtx ();
13323 output_asm_insn ("agf\t%1,%6-%5(%4)", op);
13324 }
13325 }
13326
13327 /* Perform vcall adjustment. */
13328 if (vcall_offset)
13329 {
13330 if (DISP_IN_RANGE (vcall_offset))
13331 {
13332 output_asm_insn ("lg\t%4,0(%1)", op);
13333 output_asm_insn ("ag\t%1,%3(%4)", op);
13334 }
13335 else if (CONST_OK_FOR_K (vcall_offset))
13336 {
13337 output_asm_insn ("lghi\t%4,%3", op);
13338 output_asm_insn ("ag\t%4,0(%1)", op);
13339 output_asm_insn ("ag\t%1,0(%4)", op);
13340 }
13341 else if (CONST_OK_FOR_Os (vcall_offset))
13342 {
13343 output_asm_insn ("lgfi\t%4,%3", op);
13344 output_asm_insn ("ag\t%4,0(%1)", op);
13345 output_asm_insn ("ag\t%1,0(%4)", op);
13346 }
13347 else
13348 {
13349 op[7] = gen_label_rtx ();
13350 output_asm_insn ("llgf\t%4,%7-%5(%4)", op);
13351 output_asm_insn ("ag\t%4,0(%1)", op);
13352 output_asm_insn ("ag\t%1,0(%4)", op);
13353 }
13354 }
13355
13356 /* Jump to target. */
13357 output_asm_insn ("jg\t%0", op);
13358
13359 /* Output literal pool if required. */
13360 if (op[5])
13361 {
13362 output_asm_insn (".align\t4", op);
13363 targetm.asm_out.internal_label (file, "L",
13364 CODE_LABEL_NUMBER (op[5]));
13365 }
13366 if (op[6])
13367 {
13368 targetm.asm_out.internal_label (file, "L",
13369 CODE_LABEL_NUMBER (op[6]));
13370 output_asm_insn (".long\t%2", op);
13371 }
13372 if (op[7])
13373 {
13374 targetm.asm_out.internal_label (file, "L",
13375 CODE_LABEL_NUMBER (op[7]));
13376 output_asm_insn (".long\t%3", op);
13377 }
13378 }
13379 else
13380 {
13381 /* Setup base pointer if required. */
13382 if (!vcall_offset
13383 || (!DISP_IN_RANGE (delta)
13384 && !CONST_OK_FOR_K (delta)
13385 && !CONST_OK_FOR_Os (delta))
13386 || (!DISP_IN_RANGE (delta)
13387 && !CONST_OK_FOR_K (vcall_offset)
13388 && !CONST_OK_FOR_Os (vcall_offset)))
13389 {
13390 op[5] = gen_label_rtx ();
13391 output_asm_insn ("basr\t%4,0", op);
13392 targetm.asm_out.internal_label (file, "L",
13393 CODE_LABEL_NUMBER (op[5]));
13394 }
13395
13396 /* Add DELTA to this pointer. */
13397 if (delta)
13398 {
13399 if (CONST_OK_FOR_J (delta))
13400 output_asm_insn ("la\t%1,%2(%1)", op);
13401 else if (DISP_IN_RANGE (delta))
13402 output_asm_insn ("lay\t%1,%2(%1)", op);
13403 else if (CONST_OK_FOR_K (delta))
13404 output_asm_insn ("ahi\t%1,%2", op);
13405 else if (CONST_OK_FOR_Os (delta))
13406 output_asm_insn ("afi\t%1,%2", op);
13407 else
13408 {
13409 op[6] = gen_label_rtx ();
13410 output_asm_insn ("a\t%1,%6-%5(%4)", op);
13411 }
13412 }
13413
13414 /* Perform vcall adjustment. */
13415 if (vcall_offset)
13416 {
13417 if (CONST_OK_FOR_J (vcall_offset))
13418 {
13419 output_asm_insn ("l\t%4,0(%1)", op);
13420 output_asm_insn ("a\t%1,%3(%4)", op);
13421 }
13422 else if (DISP_IN_RANGE (vcall_offset))
13423 {
13424 output_asm_insn ("l\t%4,0(%1)", op);
13425 output_asm_insn ("ay\t%1,%3(%4)", op);
13426 }
13427 else if (CONST_OK_FOR_K (vcall_offset))
13428 {
13429 output_asm_insn ("lhi\t%4,%3", op);
13430 output_asm_insn ("a\t%4,0(%1)", op);
13431 output_asm_insn ("a\t%1,0(%4)", op);
13432 }
13433 else if (CONST_OK_FOR_Os (vcall_offset))
13434 {
13435 output_asm_insn ("iilf\t%4,%3", op);
13436 output_asm_insn ("a\t%4,0(%1)", op);
13437 output_asm_insn ("a\t%1,0(%4)", op);
13438 }
13439 else
13440 {
13441 op[7] = gen_label_rtx ();
13442 output_asm_insn ("l\t%4,%7-%5(%4)", op);
13443 output_asm_insn ("a\t%4,0(%1)", op);
13444 output_asm_insn ("a\t%1,0(%4)", op);
13445 }
13446
13447 /* We had to clobber the base pointer register.
13448 Re-setup the base pointer (with a different base). */
13449 op[5] = gen_label_rtx ();
13450 output_asm_insn ("basr\t%4,0", op);
13451 targetm.asm_out.internal_label (file, "L",
13452 CODE_LABEL_NUMBER (op[5]));
13453 }
13454
13455 /* Jump to target. */
13456 op[8] = gen_label_rtx ();
13457
13458 if (!flag_pic)
13459 output_asm_insn ("l\t%4,%8-%5(%4)", op);
13460 else if (!nonlocal)
13461 output_asm_insn ("a\t%4,%8-%5(%4)", op);
13462 /* We cannot call through .plt, since .plt requires %r12 loaded. */
13463 else if (flag_pic == 1)
13464 {
13465 output_asm_insn ("a\t%4,%8-%5(%4)", op);
13466 output_asm_insn ("l\t%4,%0(%4)", op);
13467 }
13468 else if (flag_pic == 2)
13469 {
13470 op[9] = gen_rtx_REG (Pmode, 0);
13471 output_asm_insn ("l\t%9,%8-4-%5(%4)", op);
13472 output_asm_insn ("a\t%4,%8-%5(%4)", op);
13473 output_asm_insn ("ar\t%4,%9", op);
13474 output_asm_insn ("l\t%4,0(%4)", op);
13475 }
13476
13477 output_asm_insn ("br\t%4", op);
13478
13479 /* Output literal pool. */
13480 output_asm_insn (".align\t4", op);
13481
13482 if (nonlocal && flag_pic == 2)
13483 output_asm_insn (".long\t%0", op);
13484 if (nonlocal)
13485 {
13486 op[0] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
13487 SYMBOL_REF_FLAGS (op[0]) = SYMBOL_FLAG_LOCAL;
13488 }
13489
13490 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[8]));
13491 if (!flag_pic)
13492 output_asm_insn (".long\t%0", op);
13493 else
13494 output_asm_insn (".long\t%0-%5", op);
13495
13496 if (op[6])
13497 {
13498 targetm.asm_out.internal_label (file, "L",
13499 CODE_LABEL_NUMBER (op[6]));
13500 output_asm_insn (".long\t%2", op);
13501 }
13502 if (op[7])
13503 {
13504 targetm.asm_out.internal_label (file, "L",
13505 CODE_LABEL_NUMBER (op[7]));
13506 output_asm_insn (".long\t%3", op);
13507 }
13508 }
13509 final_end_function ();
13510 }
13511
13512 /* Output either an indirect jump or a an indirect call
13513 (RETURN_ADDR_REGNO != INVALID_REGNUM) with target register REGNO
13514 using a branch trampoline disabling branch target prediction. */
13515
13516 void
s390_indirect_branch_via_thunk(unsigned int regno,unsigned int return_addr_regno,rtx comparison_operator,enum s390_indirect_branch_type type)13517 s390_indirect_branch_via_thunk (unsigned int regno,
13518 unsigned int return_addr_regno,
13519 rtx comparison_operator,
13520 enum s390_indirect_branch_type type)
13521 {
13522 enum s390_indirect_branch_option option;
13523
13524 if (type == s390_indirect_branch_type_return)
13525 {
13526 if (s390_return_addr_from_memory ())
13527 option = s390_opt_function_return_mem;
13528 else
13529 option = s390_opt_function_return_reg;
13530 }
13531 else if (type == s390_indirect_branch_type_jump)
13532 option = s390_opt_indirect_branch_jump;
13533 else if (type == s390_indirect_branch_type_call)
13534 option = s390_opt_indirect_branch_call;
13535 else
13536 gcc_unreachable ();
13537
13538 if (TARGET_INDIRECT_BRANCH_TABLE)
13539 {
13540 char label[32];
13541
13542 ASM_GENERATE_INTERNAL_LABEL (label,
13543 indirect_branch_table_label[option],
13544 indirect_branch_table_label_no[option]++);
13545 ASM_OUTPUT_LABEL (asm_out_file, label);
13546 }
13547
13548 if (return_addr_regno != INVALID_REGNUM)
13549 {
13550 gcc_assert (comparison_operator == NULL_RTX);
13551 fprintf (asm_out_file, " \tbrasl\t%%r%d,", return_addr_regno);
13552 }
13553 else
13554 {
13555 fputs (" \tjg", asm_out_file);
13556 if (comparison_operator != NULL_RTX)
13557 print_operand (asm_out_file, comparison_operator, 'C');
13558
13559 fputs ("\t", asm_out_file);
13560 }
13561
13562 if (TARGET_CPU_Z10)
13563 fprintf (asm_out_file,
13564 TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL "\n",
13565 regno);
13566 else
13567 fprintf (asm_out_file,
13568 TARGET_INDIRECT_BRANCH_THUNK_NAME_EX "\n",
13569 INDIRECT_BRANCH_THUNK_REGNUM, regno);
13570
13571 if ((option == s390_opt_indirect_branch_jump
13572 && cfun->machine->indirect_branch_jump == indirect_branch_thunk)
13573 || (option == s390_opt_indirect_branch_call
13574 && cfun->machine->indirect_branch_call == indirect_branch_thunk)
13575 || (option == s390_opt_function_return_reg
13576 && cfun->machine->function_return_reg == indirect_branch_thunk)
13577 || (option == s390_opt_function_return_mem
13578 && cfun->machine->function_return_mem == indirect_branch_thunk))
13579 {
13580 if (TARGET_CPU_Z10)
13581 indirect_branch_z10thunk_mask |= (1 << regno);
13582 else
13583 indirect_branch_prez10thunk_mask |= (1 << regno);
13584 }
13585 }
13586
13587 /* Output an inline thunk for indirect jumps. EXECUTE_TARGET can
13588 either be an address register or a label pointing to the location
13589 of the jump instruction. */
13590
13591 void
s390_indirect_branch_via_inline_thunk(rtx execute_target)13592 s390_indirect_branch_via_inline_thunk (rtx execute_target)
13593 {
13594 if (TARGET_INDIRECT_BRANCH_TABLE)
13595 {
13596 char label[32];
13597
13598 ASM_GENERATE_INTERNAL_LABEL (label,
13599 indirect_branch_table_label[s390_opt_indirect_branch_jump],
13600 indirect_branch_table_label_no[s390_opt_indirect_branch_jump]++);
13601 ASM_OUTPUT_LABEL (asm_out_file, label);
13602 }
13603
13604 if (!TARGET_ZARCH)
13605 fputs ("\t.machinemode zarch\n", asm_out_file);
13606
13607 if (REG_P (execute_target))
13608 fprintf (asm_out_file, "\tex\t%%r0,0(%%r%d)\n", REGNO (execute_target));
13609 else
13610 output_asm_insn ("\texrl\t%%r0,%0", &execute_target);
13611
13612 if (!TARGET_ZARCH)
13613 fputs ("\t.machinemode esa\n", asm_out_file);
13614
13615 fputs ("0:\tj\t0b\n", asm_out_file);
13616 }
13617
13618 static bool
s390_valid_pointer_mode(scalar_int_mode mode)13619 s390_valid_pointer_mode (scalar_int_mode mode)
13620 {
13621 return (mode == SImode || (TARGET_64BIT && mode == DImode));
13622 }
13623
13624 /* Checks whether the given CALL_EXPR would use a caller
13625 saved register. This is used to decide whether sibling call
13626 optimization could be performed on the respective function
13627 call. */
13628
13629 static bool
s390_call_saved_register_used(tree call_expr)13630 s390_call_saved_register_used (tree call_expr)
13631 {
13632 CUMULATIVE_ARGS cum_v;
13633 cumulative_args_t cum;
13634 tree parameter;
13635 machine_mode mode;
13636 tree type;
13637 rtx parm_rtx;
13638 int reg, i;
13639
13640 INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
13641 cum = pack_cumulative_args (&cum_v);
13642
13643 for (i = 0; i < call_expr_nargs (call_expr); i++)
13644 {
13645 parameter = CALL_EXPR_ARG (call_expr, i);
13646 gcc_assert (parameter);
13647
13648 /* For an undeclared variable passed as parameter we will get
13649 an ERROR_MARK node here. */
13650 if (TREE_CODE (parameter) == ERROR_MARK)
13651 return true;
13652
13653 type = TREE_TYPE (parameter);
13654 gcc_assert (type);
13655
13656 mode = TYPE_MODE (type);
13657 gcc_assert (mode);
13658
13659 /* We assume that in the target function all parameters are
13660 named. This only has an impact on vector argument register
13661 usage none of which is call-saved. */
13662 if (pass_by_reference (&cum_v, mode, type, true))
13663 {
13664 mode = Pmode;
13665 type = build_pointer_type (type);
13666 }
13667
13668 parm_rtx = s390_function_arg (cum, mode, type, true);
13669
13670 s390_function_arg_advance (cum, mode, type, true);
13671
13672 if (!parm_rtx)
13673 continue;
13674
13675 if (REG_P (parm_rtx))
13676 {
13677 for (reg = 0; reg < REG_NREGS (parm_rtx); reg++)
13678 if (!call_used_regs[reg + REGNO (parm_rtx)])
13679 return true;
13680 }
13681
13682 if (GET_CODE (parm_rtx) == PARALLEL)
13683 {
13684 int i;
13685
13686 for (i = 0; i < XVECLEN (parm_rtx, 0); i++)
13687 {
13688 rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0);
13689
13690 gcc_assert (REG_P (r));
13691
13692 for (reg = 0; reg < REG_NREGS (r); reg++)
13693 if (!call_used_regs[reg + REGNO (r)])
13694 return true;
13695 }
13696 }
13697
13698 }
13699 return false;
13700 }
13701
13702 /* Return true if the given call expression can be
13703 turned into a sibling call.
13704 DECL holds the declaration of the function to be called whereas
13705 EXP is the call expression itself. */
13706
13707 static bool
s390_function_ok_for_sibcall(tree decl,tree exp)13708 s390_function_ok_for_sibcall (tree decl, tree exp)
13709 {
13710 /* The TPF epilogue uses register 1. */
13711 if (TARGET_TPF_PROFILING)
13712 return false;
13713
13714 /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
13715 which would have to be restored before the sibcall. */
13716 if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl))
13717 return false;
13718
13719 /* The thunks for indirect branches require r1 if no exrl is
13720 available. r1 might not be available when doing a sibling
13721 call. */
13722 if (TARGET_INDIRECT_BRANCH_NOBP_CALL
13723 && !TARGET_CPU_Z10
13724 && !decl)
13725 return false;
13726
13727 /* Register 6 on s390 is available as an argument register but unfortunately
13728 "caller saved". This makes functions needing this register for arguments
13729 not suitable for sibcalls. */
13730 return !s390_call_saved_register_used (exp);
13731 }
13732
13733 /* Return the fixed registers used for condition codes. */
13734
13735 static bool
s390_fixed_condition_code_regs(unsigned int * p1,unsigned int * p2)13736 s390_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
13737 {
13738 *p1 = CC_REGNUM;
13739 *p2 = INVALID_REGNUM;
13740
13741 return true;
13742 }
13743
13744 /* This function is used by the call expanders of the machine description.
13745 It emits the call insn itself together with the necessary operations
13746 to adjust the target address and returns the emitted insn.
13747 ADDR_LOCATION is the target address rtx
13748 TLS_CALL the location of the thread-local symbol
13749 RESULT_REG the register where the result of the call should be stored
13750 RETADDR_REG the register where the return address should be stored
13751 If this parameter is NULL_RTX the call is considered
13752 to be a sibling call. */
13753
13754 rtx_insn *
s390_emit_call(rtx addr_location,rtx tls_call,rtx result_reg,rtx retaddr_reg)13755 s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg,
13756 rtx retaddr_reg)
13757 {
13758 bool plt_call = false;
13759 rtx_insn *insn;
13760 rtx vec[4] = { NULL_RTX };
13761 int elts = 0;
13762 rtx *call = &vec[0];
13763 rtx *clobber_ret_reg = &vec[1];
13764 rtx *use = &vec[2];
13765 rtx *clobber_thunk_reg = &vec[3];
13766 int i;
13767
13768 /* Direct function calls need special treatment. */
13769 if (GET_CODE (addr_location) == SYMBOL_REF)
13770 {
13771 /* When calling a global routine in PIC mode, we must
13772 replace the symbol itself with the PLT stub. */
13773 if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location))
13774 {
13775 if (TARGET_64BIT || retaddr_reg != NULL_RTX)
13776 {
13777 addr_location = gen_rtx_UNSPEC (Pmode,
13778 gen_rtvec (1, addr_location),
13779 UNSPEC_PLT);
13780 addr_location = gen_rtx_CONST (Pmode, addr_location);
13781 plt_call = true;
13782 }
13783 else
13784 /* For -fpic code the PLT entries might use r12 which is
13785 call-saved. Therefore we cannot do a sibcall when
13786 calling directly using a symbol ref. When reaching
13787 this point we decided (in s390_function_ok_for_sibcall)
13788 to do a sibcall for a function pointer but one of the
13789 optimizers was able to get rid of the function pointer
13790 by propagating the symbol ref into the call. This
13791 optimization is illegal for S/390 so we turn the direct
13792 call into a indirect call again. */
13793 addr_location = force_reg (Pmode, addr_location);
13794 }
13795
13796 /* Unless we can use the bras(l) insn, force the
13797 routine address into a register. */
13798 if (!TARGET_SMALL_EXEC && !TARGET_CPU_ZARCH)
13799 {
13800 if (flag_pic)
13801 addr_location = legitimize_pic_address (addr_location, 0);
13802 else
13803 addr_location = force_reg (Pmode, addr_location);
13804 }
13805 }
13806
13807 /* If it is already an indirect call or the code above moved the
13808 SYMBOL_REF to somewhere else make sure the address can be found in
13809 register 1. */
13810 if (retaddr_reg == NULL_RTX
13811 && GET_CODE (addr_location) != SYMBOL_REF
13812 && !plt_call)
13813 {
13814 emit_move_insn (gen_rtx_REG (Pmode, SIBCALL_REGNUM), addr_location);
13815 addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM);
13816 }
13817
13818 if (TARGET_INDIRECT_BRANCH_NOBP_CALL
13819 && GET_CODE (addr_location) != SYMBOL_REF
13820 && !plt_call)
13821 {
13822 /* Indirect branch thunks require the target to be a single GPR. */
13823 addr_location = force_reg (Pmode, addr_location);
13824
13825 /* Without exrl the indirect branch thunks need an additional
13826 register for larl;ex */
13827 if (!TARGET_CPU_Z10)
13828 {
13829 *clobber_thunk_reg = gen_rtx_REG (Pmode, INDIRECT_BRANCH_THUNK_REGNUM);
13830 *clobber_thunk_reg = gen_rtx_CLOBBER (VOIDmode, *clobber_thunk_reg);
13831 }
13832 }
13833
13834 addr_location = gen_rtx_MEM (QImode, addr_location);
13835 *call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx);
13836
13837 if (result_reg != NULL_RTX)
13838 *call = gen_rtx_SET (result_reg, *call);
13839
13840 if (retaddr_reg != NULL_RTX)
13841 {
13842 *clobber_ret_reg = gen_rtx_CLOBBER (VOIDmode, retaddr_reg);
13843
13844 if (tls_call != NULL_RTX)
13845 *use = gen_rtx_USE (VOIDmode, tls_call);
13846 }
13847
13848
13849 for (i = 0; i < 4; i++)
13850 if (vec[i] != NULL_RTX)
13851 elts++;
13852
13853 if (elts > 1)
13854 {
13855 rtvec v;
13856 int e = 0;
13857
13858 v = rtvec_alloc (elts);
13859 for (i = 0; i < 4; i++)
13860 if (vec[i] != NULL_RTX)
13861 {
13862 RTVEC_ELT (v, e) = vec[i];
13863 e++;
13864 }
13865
13866 *call = gen_rtx_PARALLEL (VOIDmode, v);
13867 }
13868
13869 insn = emit_call_insn (*call);
13870
13871 /* 31-bit PLT stubs and tls calls use the GOT register implicitly. */
13872 if ((!TARGET_64BIT && plt_call) || tls_call != NULL_RTX)
13873 {
13874 /* s390_function_ok_for_sibcall should
13875 have denied sibcalls in this case. */
13876 gcc_assert (retaddr_reg != NULL_RTX);
13877 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, 12));
13878 }
13879 return insn;
13880 }
13881
13882 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
13883
13884 static void
s390_conditional_register_usage(void)13885 s390_conditional_register_usage (void)
13886 {
13887 int i;
13888
13889 if (flag_pic)
13890 {
13891 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13892 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13893 }
13894 if (TARGET_CPU_ZARCH)
13895 {
13896 fixed_regs[BASE_REGNUM] = 0;
13897 call_used_regs[BASE_REGNUM] = 0;
13898 fixed_regs[RETURN_REGNUM] = 0;
13899 call_used_regs[RETURN_REGNUM] = 0;
13900 }
13901 if (TARGET_64BIT)
13902 {
13903 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
13904 call_used_regs[i] = call_really_used_regs[i] = 0;
13905 }
13906 else
13907 {
13908 call_used_regs[FPR4_REGNUM] = call_really_used_regs[FPR4_REGNUM] = 0;
13909 call_used_regs[FPR6_REGNUM] = call_really_used_regs[FPR6_REGNUM] = 0;
13910 }
13911
13912 if (TARGET_SOFT_FLOAT)
13913 {
13914 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
13915 call_used_regs[i] = fixed_regs[i] = 1;
13916 }
13917
13918 /* Disable v16 - v31 for non-vector target. */
13919 if (!TARGET_VX)
13920 {
13921 for (i = VR16_REGNUM; i <= VR31_REGNUM; i++)
13922 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
13923 }
13924 }
13925
13926 /* Corresponding function to eh_return expander. */
13927
13928 static GTY(()) rtx s390_tpf_eh_return_symbol;
13929 void
s390_emit_tpf_eh_return(rtx target)13930 s390_emit_tpf_eh_return (rtx target)
13931 {
13932 rtx_insn *insn;
13933 rtx reg, orig_ra;
13934
13935 if (!s390_tpf_eh_return_symbol)
13936 s390_tpf_eh_return_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tpf_eh_return");
13937
13938 reg = gen_rtx_REG (Pmode, 2);
13939 orig_ra = gen_rtx_REG (Pmode, 3);
13940
13941 emit_move_insn (reg, target);
13942 emit_move_insn (orig_ra, get_hard_reg_initial_val (Pmode, RETURN_REGNUM));
13943 insn = s390_emit_call (s390_tpf_eh_return_symbol, NULL_RTX, reg,
13944 gen_rtx_REG (Pmode, RETURN_REGNUM));
13945 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
13946 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), orig_ra);
13947
13948 emit_move_insn (EH_RETURN_HANDLER_RTX, reg);
13949 }
13950
13951 /* Rework the prologue/epilogue to avoid saving/restoring
13952 registers unnecessarily. */
13953
13954 static void
s390_optimize_prologue(void)13955 s390_optimize_prologue (void)
13956 {
13957 rtx_insn *insn, *new_insn, *next_insn;
13958
13959 /* Do a final recompute of the frame-related data. */
13960 s390_optimize_register_info ();
13961
13962 /* If all special registers are in fact used, there's nothing we
13963 can do, so no point in walking the insn list. */
13964
13965 if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM
13966 && cfun_frame_layout.last_save_gpr >= BASE_REGNUM
13967 && (TARGET_CPU_ZARCH
13968 || (cfun_frame_layout.first_save_gpr <= RETURN_REGNUM
13969 && cfun_frame_layout.last_save_gpr >= RETURN_REGNUM)))
13970 return;
13971
13972 /* Search for prologue/epilogue insns and replace them. */
13973
13974 for (insn = get_insns (); insn; insn = next_insn)
13975 {
13976 int first, last, off;
13977 rtx set, base, offset;
13978 rtx pat;
13979
13980 next_insn = NEXT_INSN (insn);
13981
13982 if (! NONJUMP_INSN_P (insn) || ! RTX_FRAME_RELATED_P (insn))
13983 continue;
13984
13985 pat = PATTERN (insn);
13986
13987 /* Remove ldgr/lgdr instructions used for saving and restore
13988 GPRs if possible. */
13989 if (TARGET_Z10)
13990 {
13991 rtx tmp_pat = pat;
13992
13993 if (INSN_CODE (insn) == CODE_FOR_stack_restore_from_fpr)
13994 tmp_pat = XVECEXP (pat, 0, 0);
13995
13996 if (GET_CODE (tmp_pat) == SET
13997 && GET_MODE (SET_SRC (tmp_pat)) == DImode
13998 && REG_P (SET_SRC (tmp_pat))
13999 && REG_P (SET_DEST (tmp_pat)))
14000 {
14001 int src_regno = REGNO (SET_SRC (tmp_pat));
14002 int dest_regno = REGNO (SET_DEST (tmp_pat));
14003 int gpr_regno;
14004 int fpr_regno;
14005
14006 if (!((GENERAL_REGNO_P (src_regno)
14007 && FP_REGNO_P (dest_regno))
14008 || (FP_REGNO_P (src_regno)
14009 && GENERAL_REGNO_P (dest_regno))))
14010 continue;
14011
14012 gpr_regno = GENERAL_REGNO_P (src_regno) ? src_regno : dest_regno;
14013 fpr_regno = FP_REGNO_P (src_regno) ? src_regno : dest_regno;
14014
14015 /* GPR must be call-saved, FPR must be call-clobbered. */
14016 if (!call_really_used_regs[fpr_regno]
14017 || call_really_used_regs[gpr_regno])
14018 continue;
14019
14020 /* It must not happen that what we once saved in an FPR now
14021 needs a stack slot. */
14022 gcc_assert (cfun_gpr_save_slot (gpr_regno) != SAVE_SLOT_STACK);
14023
14024 if (cfun_gpr_save_slot (gpr_regno) == SAVE_SLOT_NONE)
14025 {
14026 remove_insn (insn);
14027 continue;
14028 }
14029 }
14030 }
14031
14032 if (GET_CODE (pat) == PARALLEL
14033 && store_multiple_operation (pat, VOIDmode))
14034 {
14035 set = XVECEXP (pat, 0, 0);
14036 first = REGNO (SET_SRC (set));
14037 last = first + XVECLEN (pat, 0) - 1;
14038 offset = const0_rtx;
14039 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
14040 off = INTVAL (offset);
14041
14042 if (GET_CODE (base) != REG || off < 0)
14043 continue;
14044 if (cfun_frame_layout.first_save_gpr != -1
14045 && (cfun_frame_layout.first_save_gpr < first
14046 || cfun_frame_layout.last_save_gpr > last))
14047 continue;
14048 if (REGNO (base) != STACK_POINTER_REGNUM
14049 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
14050 continue;
14051 if (first > BASE_REGNUM || last < BASE_REGNUM)
14052 continue;
14053
14054 if (cfun_frame_layout.first_save_gpr != -1)
14055 {
14056 rtx s_pat = save_gprs (base,
14057 off + (cfun_frame_layout.first_save_gpr
14058 - first) * UNITS_PER_LONG,
14059 cfun_frame_layout.first_save_gpr,
14060 cfun_frame_layout.last_save_gpr);
14061 new_insn = emit_insn_before (s_pat, insn);
14062 INSN_ADDRESSES_NEW (new_insn, -1);
14063 }
14064
14065 remove_insn (insn);
14066 continue;
14067 }
14068
14069 if (cfun_frame_layout.first_save_gpr == -1
14070 && GET_CODE (pat) == SET
14071 && GENERAL_REG_P (SET_SRC (pat))
14072 && GET_CODE (SET_DEST (pat)) == MEM)
14073 {
14074 set = pat;
14075 first = REGNO (SET_SRC (set));
14076 offset = const0_rtx;
14077 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
14078 off = INTVAL (offset);
14079
14080 if (GET_CODE (base) != REG || off < 0)
14081 continue;
14082 if (REGNO (base) != STACK_POINTER_REGNUM
14083 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
14084 continue;
14085
14086 remove_insn (insn);
14087 continue;
14088 }
14089
14090 if (GET_CODE (pat) == PARALLEL
14091 && load_multiple_operation (pat, VOIDmode))
14092 {
14093 set = XVECEXP (pat, 0, 0);
14094 first = REGNO (SET_DEST (set));
14095 last = first + XVECLEN (pat, 0) - 1;
14096 offset = const0_rtx;
14097 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
14098 off = INTVAL (offset);
14099
14100 if (GET_CODE (base) != REG || off < 0)
14101 continue;
14102
14103 if (cfun_frame_layout.first_restore_gpr != -1
14104 && (cfun_frame_layout.first_restore_gpr < first
14105 || cfun_frame_layout.last_restore_gpr > last))
14106 continue;
14107 if (REGNO (base) != STACK_POINTER_REGNUM
14108 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
14109 continue;
14110 if (first > BASE_REGNUM || last < BASE_REGNUM)
14111 continue;
14112
14113 if (cfun_frame_layout.first_restore_gpr != -1)
14114 {
14115 rtx rpat = restore_gprs (base,
14116 off + (cfun_frame_layout.first_restore_gpr
14117 - first) * UNITS_PER_LONG,
14118 cfun_frame_layout.first_restore_gpr,
14119 cfun_frame_layout.last_restore_gpr);
14120
14121 /* Remove REG_CFA_RESTOREs for registers that we no
14122 longer need to save. */
14123 REG_NOTES (rpat) = REG_NOTES (insn);
14124 for (rtx *ptr = ®_NOTES (rpat); *ptr; )
14125 if (REG_NOTE_KIND (*ptr) == REG_CFA_RESTORE
14126 && ((int) REGNO (XEXP (*ptr, 0))
14127 < cfun_frame_layout.first_restore_gpr))
14128 *ptr = XEXP (*ptr, 1);
14129 else
14130 ptr = &XEXP (*ptr, 1);
14131 new_insn = emit_insn_before (rpat, insn);
14132 RTX_FRAME_RELATED_P (new_insn) = 1;
14133 INSN_ADDRESSES_NEW (new_insn, -1);
14134 }
14135
14136 remove_insn (insn);
14137 continue;
14138 }
14139
14140 if (cfun_frame_layout.first_restore_gpr == -1
14141 && GET_CODE (pat) == SET
14142 && GENERAL_REG_P (SET_DEST (pat))
14143 && GET_CODE (SET_SRC (pat)) == MEM)
14144 {
14145 set = pat;
14146 first = REGNO (SET_DEST (set));
14147 offset = const0_rtx;
14148 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
14149 off = INTVAL (offset);
14150
14151 if (GET_CODE (base) != REG || off < 0)
14152 continue;
14153
14154 if (REGNO (base) != STACK_POINTER_REGNUM
14155 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
14156 continue;
14157
14158 remove_insn (insn);
14159 continue;
14160 }
14161 }
14162 }
14163
14164 /* On z10 and later the dynamic branch prediction must see the
14165 backward jump within a certain windows. If not it falls back to
14166 the static prediction. This function rearranges the loop backward
14167 branch in a way which makes the static prediction always correct.
14168 The function returns true if it added an instruction. */
14169 static bool
s390_fix_long_loop_prediction(rtx_insn * insn)14170 s390_fix_long_loop_prediction (rtx_insn *insn)
14171 {
14172 rtx set = single_set (insn);
14173 rtx code_label, label_ref;
14174 rtx_insn *uncond_jump;
14175 rtx_insn *cur_insn;
14176 rtx tmp;
14177 int distance;
14178
14179 /* This will exclude branch on count and branch on index patterns
14180 since these are correctly statically predicted. */
14181 if (!set
14182 || SET_DEST (set) != pc_rtx
14183 || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
14184 return false;
14185
14186 /* Skip conditional returns. */
14187 if (ANY_RETURN_P (XEXP (SET_SRC (set), 1))
14188 && XEXP (SET_SRC (set), 2) == pc_rtx)
14189 return false;
14190
14191 label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
14192 XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
14193
14194 gcc_assert (GET_CODE (label_ref) == LABEL_REF);
14195
14196 code_label = XEXP (label_ref, 0);
14197
14198 if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
14199 || INSN_ADDRESSES (INSN_UID (insn)) == -1
14200 || (INSN_ADDRESSES (INSN_UID (insn))
14201 - INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE))
14202 return false;
14203
14204 for (distance = 0, cur_insn = PREV_INSN (insn);
14205 distance < PREDICT_DISTANCE - 6;
14206 distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
14207 if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
14208 return false;
14209
14210 rtx_code_label *new_label = gen_label_rtx ();
14211 uncond_jump = emit_jump_insn_after (
14212 gen_rtx_SET (pc_rtx,
14213 gen_rtx_LABEL_REF (VOIDmode, code_label)),
14214 insn);
14215 emit_label_after (new_label, uncond_jump);
14216
14217 tmp = XEXP (SET_SRC (set), 1);
14218 XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
14219 XEXP (SET_SRC (set), 2) = tmp;
14220 INSN_CODE (insn) = -1;
14221
14222 XEXP (label_ref, 0) = new_label;
14223 JUMP_LABEL (insn) = new_label;
14224 JUMP_LABEL (uncond_jump) = code_label;
14225
14226 return true;
14227 }
14228
14229 /* Returns 1 if INSN reads the value of REG for purposes not related
14230 to addressing of memory, and 0 otherwise. */
14231 static int
s390_non_addr_reg_read_p(rtx reg,rtx_insn * insn)14232 s390_non_addr_reg_read_p (rtx reg, rtx_insn *insn)
14233 {
14234 return reg_referenced_p (reg, PATTERN (insn))
14235 && !reg_used_in_mem_p (REGNO (reg), PATTERN (insn));
14236 }
14237
14238 /* Starting from INSN find_cond_jump looks downwards in the insn
14239 stream for a single jump insn which is the last user of the
14240 condition code set in INSN. */
14241 static rtx_insn *
find_cond_jump(rtx_insn * insn)14242 find_cond_jump (rtx_insn *insn)
14243 {
14244 for (; insn; insn = NEXT_INSN (insn))
14245 {
14246 rtx ite, cc;
14247
14248 if (LABEL_P (insn))
14249 break;
14250
14251 if (!JUMP_P (insn))
14252 {
14253 if (reg_mentioned_p (gen_rtx_REG (CCmode, CC_REGNUM), insn))
14254 break;
14255 continue;
14256 }
14257
14258 /* This will be triggered by a return. */
14259 if (GET_CODE (PATTERN (insn)) != SET)
14260 break;
14261
14262 gcc_assert (SET_DEST (PATTERN (insn)) == pc_rtx);
14263 ite = SET_SRC (PATTERN (insn));
14264
14265 if (GET_CODE (ite) != IF_THEN_ELSE)
14266 break;
14267
14268 cc = XEXP (XEXP (ite, 0), 0);
14269 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)))
14270 break;
14271
14272 if (find_reg_note (insn, REG_DEAD, cc))
14273 return insn;
14274 break;
14275 }
14276
14277 return NULL;
14278 }
14279
14280 /* Swap the condition in COND and the operands in OP0 and OP1 so that
14281 the semantics does not change. If NULL_RTX is passed as COND the
14282 function tries to find the conditional jump starting with INSN. */
14283 static void
s390_swap_cmp(rtx cond,rtx * op0,rtx * op1,rtx_insn * insn)14284 s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx_insn *insn)
14285 {
14286 rtx tmp = *op0;
14287
14288 if (cond == NULL_RTX)
14289 {
14290 rtx_insn *jump = find_cond_jump (NEXT_INSN (insn));
14291 rtx set = jump ? single_set (jump) : NULL_RTX;
14292
14293 if (set == NULL_RTX)
14294 return;
14295
14296 cond = XEXP (SET_SRC (set), 0);
14297 }
14298
14299 *op0 = *op1;
14300 *op1 = tmp;
14301 PUT_CODE (cond, swap_condition (GET_CODE (cond)));
14302 }
14303
14304 /* On z10, instructions of the compare-and-branch family have the
14305 property to access the register occurring as second operand with
14306 its bits complemented. If such a compare is grouped with a second
14307 instruction that accesses the same register non-complemented, and
14308 if that register's value is delivered via a bypass, then the
14309 pipeline recycles, thereby causing significant performance decline.
14310 This function locates such situations and exchanges the two
14311 operands of the compare. The function return true whenever it
14312 added an insn. */
14313 static bool
s390_z10_optimize_cmp(rtx_insn * insn)14314 s390_z10_optimize_cmp (rtx_insn *insn)
14315 {
14316 rtx_insn *prev_insn, *next_insn;
14317 bool insn_added_p = false;
14318 rtx cond, *op0, *op1;
14319
14320 if (GET_CODE (PATTERN (insn)) == PARALLEL)
14321 {
14322 /* Handle compare and branch and branch on count
14323 instructions. */
14324 rtx pattern = single_set (insn);
14325
14326 if (!pattern
14327 || SET_DEST (pattern) != pc_rtx
14328 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
14329 return false;
14330
14331 cond = XEXP (SET_SRC (pattern), 0);
14332 op0 = &XEXP (cond, 0);
14333 op1 = &XEXP (cond, 1);
14334 }
14335 else if (GET_CODE (PATTERN (insn)) == SET)
14336 {
14337 rtx src, dest;
14338
14339 /* Handle normal compare instructions. */
14340 src = SET_SRC (PATTERN (insn));
14341 dest = SET_DEST (PATTERN (insn));
14342
14343 if (!REG_P (dest)
14344 || !CC_REGNO_P (REGNO (dest))
14345 || GET_CODE (src) != COMPARE)
14346 return false;
14347
14348 /* s390_swap_cmp will try to find the conditional
14349 jump when passing NULL_RTX as condition. */
14350 cond = NULL_RTX;
14351 op0 = &XEXP (src, 0);
14352 op1 = &XEXP (src, 1);
14353 }
14354 else
14355 return false;
14356
14357 if (!REG_P (*op0) || !REG_P (*op1))
14358 return false;
14359
14360 if (GET_MODE_CLASS (GET_MODE (*op0)) != MODE_INT)
14361 return false;
14362
14363 /* Swap the COMPARE arguments and its mask if there is a
14364 conflicting access in the previous insn. */
14365 prev_insn = prev_active_insn (insn);
14366 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
14367 && reg_referenced_p (*op1, PATTERN (prev_insn)))
14368 s390_swap_cmp (cond, op0, op1, insn);
14369
14370 /* Check if there is a conflict with the next insn. If there
14371 was no conflict with the previous insn, then swap the
14372 COMPARE arguments and its mask. If we already swapped
14373 the operands, or if swapping them would cause a conflict
14374 with the previous insn, issue a NOP after the COMPARE in
14375 order to separate the two instuctions. */
14376 next_insn = next_active_insn (insn);
14377 if (next_insn != NULL_RTX && INSN_P (next_insn)
14378 && s390_non_addr_reg_read_p (*op1, next_insn))
14379 {
14380 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
14381 && s390_non_addr_reg_read_p (*op0, prev_insn))
14382 {
14383 if (REGNO (*op1) == 0)
14384 emit_insn_after (gen_nop_lr1 (), insn);
14385 else
14386 emit_insn_after (gen_nop_lr0 (), insn);
14387 insn_added_p = true;
14388 }
14389 else
14390 s390_swap_cmp (cond, op0, op1, insn);
14391 }
14392 return insn_added_p;
14393 }
14394
14395 /* Number of INSNs to be scanned backward in the last BB of the loop
14396 and forward in the first BB of the loop. This usually should be a
14397 bit more than the number of INSNs which could go into one
14398 group. */
14399 #define S390_OSC_SCAN_INSN_NUM 5
14400
14401 /* Scan LOOP for static OSC collisions and return true if a osc_break
14402 should be issued for this loop. */
14403 static bool
s390_adjust_loop_scan_osc(struct loop * loop)14404 s390_adjust_loop_scan_osc (struct loop* loop)
14405
14406 {
14407 HARD_REG_SET modregs, newregs;
14408 rtx_insn *insn, *store_insn = NULL;
14409 rtx set;
14410 struct s390_address addr_store, addr_load;
14411 subrtx_iterator::array_type array;
14412 int insn_count;
14413
14414 CLEAR_HARD_REG_SET (modregs);
14415
14416 insn_count = 0;
14417 FOR_BB_INSNS_REVERSE (loop->latch, insn)
14418 {
14419 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14420 continue;
14421
14422 insn_count++;
14423 if (insn_count > S390_OSC_SCAN_INSN_NUM)
14424 return false;
14425
14426 find_all_hard_reg_sets (insn, &newregs, true);
14427 IOR_HARD_REG_SET (modregs, newregs);
14428
14429 set = single_set (insn);
14430 if (!set)
14431 continue;
14432
14433 if (MEM_P (SET_DEST (set))
14434 && s390_decompose_address (XEXP (SET_DEST (set), 0), &addr_store))
14435 {
14436 store_insn = insn;
14437 break;
14438 }
14439 }
14440
14441 if (store_insn == NULL_RTX)
14442 return false;
14443
14444 insn_count = 0;
14445 FOR_BB_INSNS (loop->header, insn)
14446 {
14447 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14448 continue;
14449
14450 if (insn == store_insn)
14451 return false;
14452
14453 insn_count++;
14454 if (insn_count > S390_OSC_SCAN_INSN_NUM)
14455 return false;
14456
14457 find_all_hard_reg_sets (insn, &newregs, true);
14458 IOR_HARD_REG_SET (modregs, newregs);
14459
14460 set = single_set (insn);
14461 if (!set)
14462 continue;
14463
14464 /* An intermediate store disrupts static OSC checking
14465 anyway. */
14466 if (MEM_P (SET_DEST (set))
14467 && s390_decompose_address (XEXP (SET_DEST (set), 0), NULL))
14468 return false;
14469
14470 FOR_EACH_SUBRTX (iter, array, SET_SRC (set), NONCONST)
14471 if (MEM_P (*iter)
14472 && s390_decompose_address (XEXP (*iter, 0), &addr_load)
14473 && rtx_equal_p (addr_load.base, addr_store.base)
14474 && rtx_equal_p (addr_load.indx, addr_store.indx)
14475 && rtx_equal_p (addr_load.disp, addr_store.disp))
14476 {
14477 if ((addr_load.base != NULL_RTX
14478 && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.base)))
14479 || (addr_load.indx != NULL_RTX
14480 && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.indx))))
14481 return true;
14482 }
14483 }
14484 return false;
14485 }
14486
14487 /* Look for adjustments which can be done on simple innermost
14488 loops. */
14489 static void
s390_adjust_loops()14490 s390_adjust_loops ()
14491 {
14492 struct loop *loop = NULL;
14493
14494 df_analyze ();
14495 compute_bb_for_insn ();
14496
14497 /* Find the loops. */
14498 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
14499
14500 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
14501 {
14502 if (dump_file)
14503 {
14504 flow_loop_dump (loop, dump_file, NULL, 0);
14505 fprintf (dump_file, ";; OSC loop scan Loop: ");
14506 }
14507 if (loop->latch == NULL
14508 || pc_set (BB_END (loop->latch)) == NULL_RTX
14509 || !s390_adjust_loop_scan_osc (loop))
14510 {
14511 if (dump_file)
14512 {
14513 if (loop->latch == NULL)
14514 fprintf (dump_file, " muliple backward jumps\n");
14515 else
14516 {
14517 fprintf (dump_file, " header insn: %d latch insn: %d ",
14518 INSN_UID (BB_HEAD (loop->header)),
14519 INSN_UID (BB_END (loop->latch)));
14520 if (pc_set (BB_END (loop->latch)) == NULL_RTX)
14521 fprintf (dump_file, " loop does not end with jump\n");
14522 else
14523 fprintf (dump_file, " not instrumented\n");
14524 }
14525 }
14526 }
14527 else
14528 {
14529 rtx_insn *new_insn;
14530
14531 if (dump_file)
14532 fprintf (dump_file, " adding OSC break insn: ");
14533 new_insn = emit_insn_before (gen_osc_break (),
14534 BB_END (loop->latch));
14535 INSN_ADDRESSES_NEW (new_insn, -1);
14536 }
14537 }
14538
14539 loop_optimizer_finalize ();
14540
14541 df_finish_pass (false);
14542 }
14543
14544 /* Perform machine-dependent processing. */
14545
14546 static void
s390_reorg(void)14547 s390_reorg (void)
14548 {
14549 bool pool_overflow = false;
14550 int hw_before, hw_after;
14551
14552 if (s390_tune == PROCESSOR_2964_Z13)
14553 s390_adjust_loops ();
14554
14555 /* Make sure all splits have been performed; splits after
14556 machine_dependent_reorg might confuse insn length counts. */
14557 split_all_insns_noflow ();
14558
14559 /* Install the main literal pool and the associated base
14560 register load insns.
14561
14562 In addition, there are two problematic situations we need
14563 to correct:
14564
14565 - the literal pool might be > 4096 bytes in size, so that
14566 some of its elements cannot be directly accessed
14567
14568 - a branch target might be > 64K away from the branch, so that
14569 it is not possible to use a PC-relative instruction.
14570
14571 To fix those, we split the single literal pool into multiple
14572 pool chunks, reloading the pool base register at various
14573 points throughout the function to ensure it always points to
14574 the pool chunk the following code expects, and / or replace
14575 PC-relative branches by absolute branches.
14576
14577 However, the two problems are interdependent: splitting the
14578 literal pool can move a branch further away from its target,
14579 causing the 64K limit to overflow, and on the other hand,
14580 replacing a PC-relative branch by an absolute branch means
14581 we need to put the branch target address into the literal
14582 pool, possibly causing it to overflow.
14583
14584 So, we loop trying to fix up both problems until we manage
14585 to satisfy both conditions at the same time. Note that the
14586 loop is guaranteed to terminate as every pass of the loop
14587 strictly decreases the total number of PC-relative branches
14588 in the function. (This is not completely true as there
14589 might be branch-over-pool insns introduced by chunkify_start.
14590 Those never need to be split however.) */
14591
14592 for (;;)
14593 {
14594 struct constant_pool *pool = NULL;
14595
14596 /* Collect the literal pool. */
14597 if (!pool_overflow)
14598 {
14599 pool = s390_mainpool_start ();
14600 if (!pool)
14601 pool_overflow = true;
14602 }
14603
14604 /* If literal pool overflowed, start to chunkify it. */
14605 if (pool_overflow)
14606 pool = s390_chunkify_start ();
14607
14608 /* Split out-of-range branches. If this has created new
14609 literal pool entries, cancel current chunk list and
14610 recompute it. zSeries machines have large branch
14611 instructions, so we never need to split a branch. */
14612 if (!TARGET_CPU_ZARCH && s390_split_branches ())
14613 {
14614 if (pool_overflow)
14615 s390_chunkify_cancel (pool);
14616 else
14617 s390_mainpool_cancel (pool);
14618
14619 continue;
14620 }
14621
14622 /* If we made it up to here, both conditions are satisfied.
14623 Finish up literal pool related changes. */
14624 if (pool_overflow)
14625 s390_chunkify_finish (pool);
14626 else
14627 s390_mainpool_finish (pool);
14628
14629 /* We're done splitting branches. */
14630 cfun->machine->split_branches_pending_p = false;
14631 break;
14632 }
14633
14634 /* Generate out-of-pool execute target insns. */
14635 if (TARGET_CPU_ZARCH)
14636 {
14637 rtx_insn *insn, *target;
14638 rtx label;
14639
14640 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14641 {
14642 label = s390_execute_label (insn);
14643 if (!label)
14644 continue;
14645
14646 gcc_assert (label != const0_rtx);
14647
14648 target = emit_label (XEXP (label, 0));
14649 INSN_ADDRESSES_NEW (target, -1);
14650
14651 if (JUMP_P (insn))
14652 {
14653 target = emit_jump_insn (s390_execute_target (insn));
14654 /* This is important in order to keep a table jump
14655 pointing at the jump table label. Only this makes it
14656 being recognized as table jump. */
14657 JUMP_LABEL (target) = JUMP_LABEL (insn);
14658 }
14659 else
14660 target = emit_insn (s390_execute_target (insn));
14661 INSN_ADDRESSES_NEW (target, -1);
14662 }
14663 }
14664
14665 /* Try to optimize prologue and epilogue further. */
14666 s390_optimize_prologue ();
14667
14668 /* Walk over the insns and do some >=z10 specific changes. */
14669 if (s390_tune >= PROCESSOR_2097_Z10)
14670 {
14671 rtx_insn *insn;
14672 bool insn_added_p = false;
14673
14674 /* The insn lengths and addresses have to be up to date for the
14675 following manipulations. */
14676 shorten_branches (get_insns ());
14677
14678 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14679 {
14680 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14681 continue;
14682
14683 if (JUMP_P (insn))
14684 insn_added_p |= s390_fix_long_loop_prediction (insn);
14685
14686 if ((GET_CODE (PATTERN (insn)) == PARALLEL
14687 || GET_CODE (PATTERN (insn)) == SET)
14688 && s390_tune == PROCESSOR_2097_Z10)
14689 insn_added_p |= s390_z10_optimize_cmp (insn);
14690 }
14691
14692 /* Adjust branches if we added new instructions. */
14693 if (insn_added_p)
14694 shorten_branches (get_insns ());
14695 }
14696
14697 s390_function_num_hotpatch_hw (current_function_decl, &hw_before, &hw_after);
14698 if (hw_after > 0)
14699 {
14700 rtx_insn *insn;
14701
14702 /* Insert NOPs for hotpatching. */
14703 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14704 /* Emit NOPs
14705 1. inside the area covered by debug information to allow setting
14706 breakpoints at the NOPs,
14707 2. before any insn which results in an asm instruction,
14708 3. before in-function labels to avoid jumping to the NOPs, for
14709 example as part of a loop,
14710 4. before any barrier in case the function is completely empty
14711 (__builtin_unreachable ()) and has neither internal labels nor
14712 active insns.
14713 */
14714 if (active_insn_p (insn) || BARRIER_P (insn) || LABEL_P (insn))
14715 break;
14716 /* Output a series of NOPs before the first active insn. */
14717 while (insn && hw_after > 0)
14718 {
14719 if (hw_after >= 3 && TARGET_CPU_ZARCH)
14720 {
14721 emit_insn_before (gen_nop_6_byte (), insn);
14722 hw_after -= 3;
14723 }
14724 else if (hw_after >= 2)
14725 {
14726 emit_insn_before (gen_nop_4_byte (), insn);
14727 hw_after -= 2;
14728 }
14729 else
14730 {
14731 emit_insn_before (gen_nop_2_byte (), insn);
14732 hw_after -= 1;
14733 }
14734 }
14735 }
14736 }
14737
14738 /* Return true if INSN is a fp load insn writing register REGNO. */
14739 static inline bool
s390_fpload_toreg(rtx_insn * insn,unsigned int regno)14740 s390_fpload_toreg (rtx_insn *insn, unsigned int regno)
14741 {
14742 rtx set;
14743 enum attr_type flag = s390_safe_attr_type (insn);
14744
14745 if (flag != TYPE_FLOADSF && flag != TYPE_FLOADDF)
14746 return false;
14747
14748 set = single_set (insn);
14749
14750 if (set == NULL_RTX)
14751 return false;
14752
14753 if (!REG_P (SET_DEST (set)) || !MEM_P (SET_SRC (set)))
14754 return false;
14755
14756 if (REGNO (SET_DEST (set)) != regno)
14757 return false;
14758
14759 return true;
14760 }
14761
14762 /* This value describes the distance to be avoided between an
14763 arithmetic fp instruction and an fp load writing the same register.
14764 Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
14765 fine but the exact value has to be avoided. Otherwise the FP
14766 pipeline will throw an exception causing a major penalty. */
14767 #define Z10_EARLYLOAD_DISTANCE 7
14768
14769 /* Rearrange the ready list in order to avoid the situation described
14770 for Z10_EARLYLOAD_DISTANCE. A problematic load instruction is
14771 moved to the very end of the ready list. */
14772 static void
s390_z10_prevent_earlyload_conflicts(rtx_insn ** ready,int * nready_p)14773 s390_z10_prevent_earlyload_conflicts (rtx_insn **ready, int *nready_p)
14774 {
14775 unsigned int regno;
14776 int nready = *nready_p;
14777 rtx_insn *tmp;
14778 int i;
14779 rtx_insn *insn;
14780 rtx set;
14781 enum attr_type flag;
14782 int distance;
14783
14784 /* Skip DISTANCE - 1 active insns. */
14785 for (insn = last_scheduled_insn, distance = Z10_EARLYLOAD_DISTANCE - 1;
14786 distance > 0 && insn != NULL_RTX;
14787 distance--, insn = prev_active_insn (insn))
14788 if (CALL_P (insn) || JUMP_P (insn))
14789 return;
14790
14791 if (insn == NULL_RTX)
14792 return;
14793
14794 set = single_set (insn);
14795
14796 if (set == NULL_RTX || !REG_P (SET_DEST (set))
14797 || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) != MODE_FLOAT)
14798 return;
14799
14800 flag = s390_safe_attr_type (insn);
14801
14802 if (flag == TYPE_FLOADSF || flag == TYPE_FLOADDF)
14803 return;
14804
14805 regno = REGNO (SET_DEST (set));
14806 i = nready - 1;
14807
14808 while (!s390_fpload_toreg (ready[i], regno) && i > 0)
14809 i--;
14810
14811 if (!i)
14812 return;
14813
14814 tmp = ready[i];
14815 memmove (&ready[1], &ready[0], sizeof (rtx_insn *) * i);
14816 ready[0] = tmp;
14817 }
14818
14819 /* Returns TRUE if BB is entered via a fallthru edge and all other
14820 incoming edges are less than unlikely. */
14821 static bool
s390_bb_fallthru_entry_likely(basic_block bb)14822 s390_bb_fallthru_entry_likely (basic_block bb)
14823 {
14824 edge e, fallthru_edge;
14825 edge_iterator ei;
14826
14827 if (!bb)
14828 return false;
14829
14830 fallthru_edge = find_fallthru_edge (bb->preds);
14831 if (!fallthru_edge)
14832 return false;
14833
14834 FOR_EACH_EDGE (e, ei, bb->preds)
14835 if (e != fallthru_edge
14836 && e->probability >= profile_probability::unlikely ())
14837 return false;
14838
14839 return true;
14840 }
14841
14842 /* The s390_sched_state variable tracks the state of the current or
14843 the last instruction group.
14844
14845 0,1,2 number of instructions scheduled in the current group
14846 3 the last group is complete - normal insns
14847 4 the last group was a cracked/expanded insn */
14848
14849 static int s390_sched_state = 0;
14850
14851 #define S390_SCHED_STATE_NORMAL 3
14852 #define S390_SCHED_STATE_CRACKED 4
14853
14854 #define S390_SCHED_ATTR_MASK_CRACKED 0x1
14855 #define S390_SCHED_ATTR_MASK_EXPANDED 0x2
14856 #define S390_SCHED_ATTR_MASK_ENDGROUP 0x4
14857 #define S390_SCHED_ATTR_MASK_GROUPALONE 0x8
14858
14859 static unsigned int
s390_get_sched_attrmask(rtx_insn * insn)14860 s390_get_sched_attrmask (rtx_insn *insn)
14861 {
14862 unsigned int mask = 0;
14863
14864 switch (s390_tune)
14865 {
14866 case PROCESSOR_2827_ZEC12:
14867 if (get_attr_zEC12_cracked (insn))
14868 mask |= S390_SCHED_ATTR_MASK_CRACKED;
14869 if (get_attr_zEC12_expanded (insn))
14870 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14871 if (get_attr_zEC12_endgroup (insn))
14872 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14873 if (get_attr_zEC12_groupalone (insn))
14874 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14875 break;
14876 case PROCESSOR_2964_Z13:
14877 case PROCESSOR_3906_Z14:
14878 if (get_attr_z13_cracked (insn))
14879 mask |= S390_SCHED_ATTR_MASK_CRACKED;
14880 if (get_attr_z13_expanded (insn))
14881 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14882 if (get_attr_z13_endgroup (insn))
14883 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14884 if (get_attr_z13_groupalone (insn))
14885 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14886 break;
14887 default:
14888 gcc_unreachable ();
14889 }
14890 return mask;
14891 }
14892
14893 static unsigned int
s390_get_unit_mask(rtx_insn * insn,int * units)14894 s390_get_unit_mask (rtx_insn *insn, int *units)
14895 {
14896 unsigned int mask = 0;
14897
14898 switch (s390_tune)
14899 {
14900 case PROCESSOR_2964_Z13:
14901 case PROCESSOR_3906_Z14:
14902 *units = 3;
14903 if (get_attr_z13_unit_lsu (insn))
14904 mask |= 1 << 0;
14905 if (get_attr_z13_unit_fxu (insn))
14906 mask |= 1 << 1;
14907 if (get_attr_z13_unit_vfu (insn))
14908 mask |= 1 << 2;
14909 break;
14910 default:
14911 gcc_unreachable ();
14912 }
14913 return mask;
14914 }
14915
14916 /* Return the scheduling score for INSN. The higher the score the
14917 better. The score is calculated from the OOO scheduling attributes
14918 of INSN and the scheduling state s390_sched_state. */
14919 static int
s390_sched_score(rtx_insn * insn)14920 s390_sched_score (rtx_insn *insn)
14921 {
14922 unsigned int mask = s390_get_sched_attrmask (insn);
14923 int score = 0;
14924
14925 switch (s390_sched_state)
14926 {
14927 case 0:
14928 /* Try to put insns into the first slot which would otherwise
14929 break a group. */
14930 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14931 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
14932 score += 5;
14933 if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
14934 score += 10;
14935 /* fallthrough */
14936 case 1:
14937 /* Prefer not cracked insns while trying to put together a
14938 group. */
14939 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14940 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
14941 && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
14942 score += 10;
14943 if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) == 0)
14944 score += 5;
14945 break;
14946 case 2:
14947 /* Prefer not cracked insns while trying to put together a
14948 group. */
14949 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14950 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
14951 && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
14952 score += 10;
14953 /* Prefer endgroup insns in the last slot. */
14954 if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0)
14955 score += 10;
14956 break;
14957 case S390_SCHED_STATE_NORMAL:
14958 /* Prefer not cracked insns if the last was not cracked. */
14959 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14960 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0)
14961 score += 5;
14962 if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
14963 score += 10;
14964 break;
14965 case S390_SCHED_STATE_CRACKED:
14966 /* Try to keep cracked insns together to prevent them from
14967 interrupting groups. */
14968 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14969 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
14970 score += 5;
14971 break;
14972 }
14973
14974 if (s390_tune >= PROCESSOR_2964_Z13)
14975 {
14976 int units, i;
14977 unsigned unit_mask, m = 1;
14978
14979 unit_mask = s390_get_unit_mask (insn, &units);
14980 gcc_assert (units <= MAX_SCHED_UNITS);
14981
14982 /* Add a score in range 0..MAX_SCHED_MIX_SCORE depending on how long
14983 ago the last insn of this unit type got scheduled. This is
14984 supposed to help providing a proper instruction mix to the
14985 CPU. */
14986 for (i = 0; i < units; i++, m <<= 1)
14987 if (m & unit_mask)
14988 score += (last_scheduled_unit_distance[i] * MAX_SCHED_MIX_SCORE /
14989 MAX_SCHED_MIX_DISTANCE);
14990
14991 unsigned latency = insn_default_latency (insn);
14992
14993 int other_side = 1 - current_side;
14994
14995 /* Try to delay long-running insns when side is busy. */
14996 if (latency > LONGRUNNING_THRESHOLD)
14997 {
14998 if (get_attr_z13_unit_fxu (insn) && fxu_longrunning[current_side]
14999 && fxu_longrunning[other_side] <= fxu_longrunning[current_side])
15000 score = MAX (0, score - 10);
15001
15002 if (get_attr_z13_unit_vfu (insn) && vfu_longrunning[current_side]
15003 && vfu_longrunning[other_side] <= vfu_longrunning[current_side])
15004 score = MAX (0, score - 10);
15005 }
15006 }
15007
15008 return score;
15009 }
15010
15011 /* This function is called via hook TARGET_SCHED_REORDER before
15012 issuing one insn from list READY which contains *NREADYP entries.
15013 For target z10 it reorders load instructions to avoid early load
15014 conflicts in the floating point pipeline */
15015 static int
s390_sched_reorder(FILE * file,int verbose,rtx_insn ** ready,int * nreadyp,int clock ATTRIBUTE_UNUSED)15016 s390_sched_reorder (FILE *file, int verbose,
15017 rtx_insn **ready, int *nreadyp, int clock ATTRIBUTE_UNUSED)
15018 {
15019 if (s390_tune == PROCESSOR_2097_Z10
15020 && reload_completed
15021 && *nreadyp > 1)
15022 s390_z10_prevent_earlyload_conflicts (ready, nreadyp);
15023
15024 if (s390_tune >= PROCESSOR_2827_ZEC12
15025 && reload_completed
15026 && *nreadyp > 1)
15027 {
15028 int i;
15029 int last_index = *nreadyp - 1;
15030 int max_index = -1;
15031 int max_score = -1;
15032 rtx_insn *tmp;
15033
15034 /* Just move the insn with the highest score to the top (the
15035 end) of the list. A full sort is not needed since a conflict
15036 in the hazard recognition cannot happen. So the top insn in
15037 the ready list will always be taken. */
15038 for (i = last_index; i >= 0; i--)
15039 {
15040 int score;
15041
15042 if (recog_memoized (ready[i]) < 0)
15043 continue;
15044
15045 score = s390_sched_score (ready[i]);
15046 if (score > max_score)
15047 {
15048 max_score = score;
15049 max_index = i;
15050 }
15051 }
15052
15053 if (max_index != -1)
15054 {
15055 if (max_index != last_index)
15056 {
15057 tmp = ready[max_index];
15058 ready[max_index] = ready[last_index];
15059 ready[last_index] = tmp;
15060
15061 if (verbose > 5)
15062 fprintf (file,
15063 ";;\t\tBACKEND: move insn %d to the top of list\n",
15064 INSN_UID (ready[last_index]));
15065 }
15066 else if (verbose > 5)
15067 fprintf (file,
15068 ";;\t\tBACKEND: best insn %d already on top\n",
15069 INSN_UID (ready[last_index]));
15070 }
15071
15072 if (verbose > 5)
15073 {
15074 fprintf (file, "ready list ooo attributes - sched state: %d\n",
15075 s390_sched_state);
15076
15077 for (i = last_index; i >= 0; i--)
15078 {
15079 unsigned int sched_mask;
15080 rtx_insn *insn = ready[i];
15081
15082 if (recog_memoized (insn) < 0)
15083 continue;
15084
15085 sched_mask = s390_get_sched_attrmask (insn);
15086 fprintf (file, ";;\t\tBACKEND: insn %d score: %d: ",
15087 INSN_UID (insn),
15088 s390_sched_score (insn));
15089 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ",\
15090 ((M) & sched_mask) ? #ATTR : "");
15091 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
15092 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
15093 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
15094 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
15095 #undef PRINT_SCHED_ATTR
15096 if (s390_tune >= PROCESSOR_2964_Z13)
15097 {
15098 unsigned int unit_mask, m = 1;
15099 int units, j;
15100
15101 unit_mask = s390_get_unit_mask (insn, &units);
15102 fprintf (file, "(units:");
15103 for (j = 0; j < units; j++, m <<= 1)
15104 if (m & unit_mask)
15105 fprintf (file, " u%d", j);
15106 fprintf (file, ")");
15107 }
15108 fprintf (file, "\n");
15109 }
15110 }
15111 }
15112
15113 return s390_issue_rate ();
15114 }
15115
15116
15117 /* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
15118 the scheduler has issued INSN. It stores the last issued insn into
15119 last_scheduled_insn in order to make it available for
15120 s390_sched_reorder. */
15121 static int
s390_sched_variable_issue(FILE * file,int verbose,rtx_insn * insn,int more)15122 s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
15123 {
15124 last_scheduled_insn = insn;
15125
15126 bool starts_group = false;
15127
15128 if (s390_tune >= PROCESSOR_2827_ZEC12
15129 && reload_completed
15130 && recog_memoized (insn) >= 0)
15131 {
15132 unsigned int mask = s390_get_sched_attrmask (insn);
15133
15134 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
15135 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0
15136 || (mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
15137 starts_group = true;
15138
15139 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
15140 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
15141 s390_sched_state = S390_SCHED_STATE_CRACKED;
15142 else if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0
15143 || (mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
15144 s390_sched_state = S390_SCHED_STATE_NORMAL;
15145 else
15146 {
15147 /* Only normal insns are left (mask == 0). */
15148 switch (s390_sched_state)
15149 {
15150 case 0:
15151 starts_group = true;
15152 /* fallthrough */
15153 case 1:
15154 case 2:
15155 s390_sched_state++;
15156 break;
15157 case S390_SCHED_STATE_NORMAL:
15158 starts_group = true;
15159 s390_sched_state = 1;
15160 break;
15161 case S390_SCHED_STATE_CRACKED:
15162 s390_sched_state = S390_SCHED_STATE_NORMAL;
15163 break;
15164 }
15165 }
15166
15167 if (s390_tune >= PROCESSOR_2964_Z13)
15168 {
15169 int units, i;
15170 unsigned unit_mask, m = 1;
15171
15172 unit_mask = s390_get_unit_mask (insn, &units);
15173 gcc_assert (units <= MAX_SCHED_UNITS);
15174
15175 for (i = 0; i < units; i++, m <<= 1)
15176 if (m & unit_mask)
15177 last_scheduled_unit_distance[i] = 0;
15178 else if (last_scheduled_unit_distance[i] < MAX_SCHED_MIX_DISTANCE)
15179 last_scheduled_unit_distance[i]++;
15180 }
15181
15182 /* If this insn started a new group, the side flipped. */
15183 if (starts_group)
15184 current_side = current_side ? 0 : 1;
15185
15186 for (int i = 0; i < 2; i++)
15187 {
15188 if (fxu_longrunning[i] >= 1)
15189 fxu_longrunning[i] -= 1;
15190 if (vfu_longrunning[i] >= 1)
15191 vfu_longrunning[i] -= 1;
15192 }
15193
15194 unsigned latency = insn_default_latency (insn);
15195 if (latency > LONGRUNNING_THRESHOLD)
15196 {
15197 if (get_attr_z13_unit_fxu (insn))
15198 fxu_longrunning[current_side] = latency * LATENCY_FACTOR;
15199 else
15200 vfu_longrunning[current_side] = latency * LATENCY_FACTOR;
15201 }
15202
15203 if (verbose > 5)
15204 {
15205 unsigned int sched_mask;
15206
15207 sched_mask = s390_get_sched_attrmask (insn);
15208
15209 fprintf (file, ";;\t\tBACKEND: insn %d: ", INSN_UID (insn));
15210 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ", ((M) & sched_mask) ? #ATTR : "");
15211 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
15212 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
15213 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
15214 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
15215 #undef PRINT_SCHED_ATTR
15216
15217 if (s390_tune >= PROCESSOR_2964_Z13)
15218 {
15219 unsigned int unit_mask, m = 1;
15220 int units, j;
15221
15222 unit_mask = s390_get_unit_mask (insn, &units);
15223 fprintf (file, "(units:");
15224 for (j = 0; j < units; j++, m <<= 1)
15225 if (m & unit_mask)
15226 fprintf (file, " %d", j);
15227 fprintf (file, ")");
15228 }
15229 fprintf (file, " sched state: %d\n", s390_sched_state);
15230
15231 if (s390_tune >= PROCESSOR_2964_Z13)
15232 {
15233 int units, j;
15234
15235 s390_get_unit_mask (insn, &units);
15236
15237 fprintf (file, ";;\t\tBACKEND: units unused for: ");
15238 for (j = 0; j < units; j++)
15239 fprintf (file, "%d:%d ", j, last_scheduled_unit_distance[j]);
15240 fprintf (file, "\n");
15241 }
15242 }
15243 }
15244
15245 if (GET_CODE (PATTERN (insn)) != USE
15246 && GET_CODE (PATTERN (insn)) != CLOBBER)
15247 return more - 1;
15248 else
15249 return more;
15250 }
15251
15252 static void
s390_sched_init(FILE * file ATTRIBUTE_UNUSED,int verbose ATTRIBUTE_UNUSED,int max_ready ATTRIBUTE_UNUSED)15253 s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
15254 int verbose ATTRIBUTE_UNUSED,
15255 int max_ready ATTRIBUTE_UNUSED)
15256 {
15257 last_scheduled_insn = NULL;
15258 memset (last_scheduled_unit_distance, 0, MAX_SCHED_UNITS * sizeof (int));
15259
15260 /* If the next basic block is most likely entered via a fallthru edge
15261 we keep the last sched state. Otherwise we start a new group.
15262 The scheduler traverses basic blocks in "instruction stream" ordering
15263 so if we see a fallthru edge here, s390_sched_state will be of its
15264 source block.
15265
15266 current_sched_info->prev_head is the insn before the first insn of the
15267 block of insns to be scheduled.
15268 */
15269 rtx_insn *insn = current_sched_info->prev_head
15270 ? NEXT_INSN (current_sched_info->prev_head) : NULL;
15271 basic_block bb = insn ? BLOCK_FOR_INSN (insn) : NULL;
15272 if (s390_tune < PROCESSOR_2964_Z13 || !s390_bb_fallthru_entry_likely (bb))
15273 s390_sched_state = 0;
15274 }
15275
15276 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
15277 a new number struct loop *loop should be unrolled if tuned for cpus with
15278 a built-in stride prefetcher.
15279 The loop is analyzed for memory accesses by calling check_dpu for
15280 each rtx of the loop. Depending on the loop_depth and the amount of
15281 memory accesses a new number <=nunroll is returned to improve the
15282 behavior of the hardware prefetch unit. */
15283 static unsigned
s390_loop_unroll_adjust(unsigned nunroll,struct loop * loop)15284 s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
15285 {
15286 basic_block *bbs;
15287 rtx_insn *insn;
15288 unsigned i;
15289 unsigned mem_count = 0;
15290
15291 if (s390_tune < PROCESSOR_2097_Z10)
15292 return nunroll;
15293
15294 /* Count the number of memory references within the loop body. */
15295 bbs = get_loop_body (loop);
15296 subrtx_iterator::array_type array;
15297 for (i = 0; i < loop->num_nodes; i++)
15298 FOR_BB_INSNS (bbs[i], insn)
15299 if (INSN_P (insn) && INSN_CODE (insn) != -1)
15300 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
15301 if (MEM_P (*iter))
15302 mem_count += 1;
15303 free (bbs);
15304
15305 /* Prevent division by zero, and we do not need to adjust nunroll in this case. */
15306 if (mem_count == 0)
15307 return nunroll;
15308
15309 switch (loop_depth(loop))
15310 {
15311 case 1:
15312 return MIN (nunroll, 28 / mem_count);
15313 case 2:
15314 return MIN (nunroll, 22 / mem_count);
15315 default:
15316 return MIN (nunroll, 16 / mem_count);
15317 }
15318 }
15319
15320 /* Restore the current options. This is a hook function and also called
15321 internally. */
15322
15323 static void
s390_function_specific_restore(struct gcc_options * opts,struct cl_target_option * ptr ATTRIBUTE_UNUSED)15324 s390_function_specific_restore (struct gcc_options *opts,
15325 struct cl_target_option *ptr ATTRIBUTE_UNUSED)
15326 {
15327 opts->x_s390_cost_pointer = (long)processor_table[opts->x_s390_tune].cost;
15328 }
15329
15330 static void
s390_option_override_internal(bool main_args_p,struct gcc_options * opts,const struct gcc_options * opts_set)15331 s390_option_override_internal (bool main_args_p,
15332 struct gcc_options *opts,
15333 const struct gcc_options *opts_set)
15334 {
15335 const char *prefix;
15336 const char *suffix;
15337
15338 /* Set up prefix/suffix so the error messages refer to either the command
15339 line argument, or the attribute(target). */
15340 if (main_args_p)
15341 {
15342 prefix = "-m";
15343 suffix = "";
15344 }
15345 else
15346 {
15347 prefix = "option(\"";
15348 suffix = "\")";
15349 }
15350
15351
15352 /* Architecture mode defaults according to ABI. */
15353 if (!(opts_set->x_target_flags & MASK_ZARCH))
15354 {
15355 if (TARGET_64BIT)
15356 opts->x_target_flags |= MASK_ZARCH;
15357 else
15358 opts->x_target_flags &= ~MASK_ZARCH;
15359 }
15360
15361 /* Set the march default in case it hasn't been specified on cmdline. */
15362 if (!opts_set->x_s390_arch)
15363 opts->x_s390_arch = PROCESSOR_2064_Z900;
15364 else if (opts->x_s390_arch == PROCESSOR_9672_G5
15365 || opts->x_s390_arch == PROCESSOR_9672_G6)
15366 warning (OPT_Wdeprecated, "%sarch=%s%s is deprecated and will be removed "
15367 "in future releases; use at least %sarch=z900%s",
15368 prefix, opts->x_s390_arch == PROCESSOR_9672_G5 ? "g5" : "g6",
15369 suffix, prefix, suffix);
15370
15371 opts->x_s390_arch_flags = processor_flags_table[(int) opts->x_s390_arch];
15372
15373 /* Determine processor to tune for. */
15374 if (!opts_set->x_s390_tune)
15375 opts->x_s390_tune = opts->x_s390_arch;
15376 else if (opts->x_s390_tune == PROCESSOR_9672_G5
15377 || opts->x_s390_tune == PROCESSOR_9672_G6)
15378 warning (OPT_Wdeprecated, "%stune=%s%s is deprecated and will be removed "
15379 "in future releases; use at least %stune=z900%s",
15380 prefix, opts->x_s390_tune == PROCESSOR_9672_G5 ? "g5" : "g6",
15381 suffix, prefix, suffix);
15382
15383 opts->x_s390_tune_flags = processor_flags_table[opts->x_s390_tune];
15384
15385 /* Sanity checks. */
15386 if (opts->x_s390_arch == PROCESSOR_NATIVE
15387 || opts->x_s390_tune == PROCESSOR_NATIVE)
15388 gcc_unreachable ();
15389 if (TARGET_ZARCH_P (opts->x_target_flags) && !TARGET_CPU_ZARCH_P (opts))
15390 error ("z/Architecture mode not supported on %s",
15391 processor_table[(int)opts->x_s390_arch].name);
15392 if (TARGET_64BIT && !TARGET_ZARCH_P (opts->x_target_flags))
15393 error ("64-bit ABI not supported in ESA/390 mode");
15394
15395 if (opts->x_s390_indirect_branch == indirect_branch_thunk_inline
15396 || opts->x_s390_indirect_branch_call == indirect_branch_thunk_inline
15397 || opts->x_s390_function_return == indirect_branch_thunk_inline
15398 || opts->x_s390_function_return_reg == indirect_branch_thunk_inline
15399 || opts->x_s390_function_return_mem == indirect_branch_thunk_inline)
15400 error ("thunk-inline is only supported with -mindirect-branch-jump");
15401
15402 if (opts->x_s390_indirect_branch != indirect_branch_keep)
15403 {
15404 if (!opts_set->x_s390_indirect_branch_call)
15405 opts->x_s390_indirect_branch_call = opts->x_s390_indirect_branch;
15406
15407 if (!opts_set->x_s390_indirect_branch_jump)
15408 opts->x_s390_indirect_branch_jump = opts->x_s390_indirect_branch;
15409 }
15410
15411 if (opts->x_s390_function_return != indirect_branch_keep)
15412 {
15413 if (!opts_set->x_s390_function_return_reg)
15414 opts->x_s390_function_return_reg = opts->x_s390_function_return;
15415
15416 if (!opts_set->x_s390_function_return_mem)
15417 opts->x_s390_function_return_mem = opts->x_s390_function_return;
15418 }
15419
15420 if (!TARGET_CPU_ZARCH)
15421 {
15422 if (opts->x_s390_indirect_branch_call != indirect_branch_keep
15423 || opts->x_s390_indirect_branch_jump != indirect_branch_keep)
15424 error ("-mindirect-branch* options require -march=z900 or higher");
15425 if (opts->x_s390_function_return_reg != indirect_branch_keep
15426 || opts->x_s390_function_return_mem != indirect_branch_keep)
15427 error ("-mfunction-return* options require -march=z900 or higher");
15428 }
15429
15430
15431 /* Enable hardware transactions if available and not explicitly
15432 disabled by user. E.g. with -m31 -march=zEC12 -mzarch */
15433 if (!TARGET_OPT_HTM_P (opts_set->x_target_flags))
15434 {
15435 if (TARGET_CPU_HTM_P (opts) && TARGET_ZARCH_P (opts->x_target_flags))
15436 opts->x_target_flags |= MASK_OPT_HTM;
15437 else
15438 opts->x_target_flags &= ~MASK_OPT_HTM;
15439 }
15440
15441 if (TARGET_OPT_VX_P (opts_set->x_target_flags))
15442 {
15443 if (TARGET_OPT_VX_P (opts->x_target_flags))
15444 {
15445 if (!TARGET_CPU_VX_P (opts))
15446 error ("hardware vector support not available on %s",
15447 processor_table[(int)opts->x_s390_arch].name);
15448 if (TARGET_SOFT_FLOAT_P (opts->x_target_flags))
15449 error ("hardware vector support not available with -msoft-float");
15450 }
15451 }
15452 else
15453 {
15454 if (TARGET_CPU_VX_P (opts))
15455 /* Enable vector support if available and not explicitly disabled
15456 by user. E.g. with -m31 -march=z13 -mzarch */
15457 opts->x_target_flags |= MASK_OPT_VX;
15458 else
15459 opts->x_target_flags &= ~MASK_OPT_VX;
15460 }
15461
15462 /* Use hardware DFP if available and not explicitly disabled by
15463 user. E.g. with -m31 -march=z10 -mzarch */
15464 if (!TARGET_HARD_DFP_P (opts_set->x_target_flags))
15465 {
15466 if (TARGET_DFP_P (opts))
15467 opts->x_target_flags |= MASK_HARD_DFP;
15468 else
15469 opts->x_target_flags &= ~MASK_HARD_DFP;
15470 }
15471
15472 if (TARGET_HARD_DFP_P (opts->x_target_flags) && !TARGET_DFP_P (opts))
15473 {
15474 if (TARGET_HARD_DFP_P (opts_set->x_target_flags))
15475 {
15476 if (!TARGET_CPU_DFP_P (opts))
15477 error ("hardware decimal floating point instructions"
15478 " not available on %s",
15479 processor_table[(int)opts->x_s390_arch].name);
15480 if (!TARGET_ZARCH_P (opts->x_target_flags))
15481 error ("hardware decimal floating point instructions"
15482 " not available in ESA/390 mode");
15483 }
15484 else
15485 opts->x_target_flags &= ~MASK_HARD_DFP;
15486 }
15487
15488 if (TARGET_SOFT_FLOAT_P (opts_set->x_target_flags)
15489 && TARGET_SOFT_FLOAT_P (opts->x_target_flags))
15490 {
15491 if (TARGET_HARD_DFP_P (opts_set->x_target_flags)
15492 && TARGET_HARD_DFP_P (opts->x_target_flags))
15493 error ("-mhard-dfp can%'t be used in conjunction with -msoft-float");
15494
15495 opts->x_target_flags &= ~MASK_HARD_DFP;
15496 }
15497
15498 if (TARGET_BACKCHAIN_P (opts->x_target_flags)
15499 && TARGET_PACKED_STACK_P (opts->x_target_flags)
15500 && TARGET_HARD_FLOAT_P (opts->x_target_flags))
15501 error ("-mbackchain -mpacked-stack -mhard-float are not supported "
15502 "in combination");
15503
15504 if (opts->x_s390_stack_size)
15505 {
15506 if (opts->x_s390_stack_guard >= opts->x_s390_stack_size)
15507 error ("stack size must be greater than the stack guard value");
15508 else if (opts->x_s390_stack_size > 1 << 16)
15509 error ("stack size must not be greater than 64k");
15510 }
15511 else if (opts->x_s390_stack_guard)
15512 error ("-mstack-guard implies use of -mstack-size");
15513
15514 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
15515 if (!TARGET_LONG_DOUBLE_128_P (opts_set->x_target_flags))
15516 opts->x_target_flags |= MASK_LONG_DOUBLE_128;
15517 #endif
15518
15519 if (opts->x_s390_tune >= PROCESSOR_2097_Z10)
15520 {
15521 maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 100,
15522 opts->x_param_values,
15523 opts_set->x_param_values);
15524 maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 32,
15525 opts->x_param_values,
15526 opts_set->x_param_values);
15527 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 2000,
15528 opts->x_param_values,
15529 opts_set->x_param_values);
15530 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 64,
15531 opts->x_param_values,
15532 opts_set->x_param_values);
15533 }
15534
15535 maybe_set_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 256,
15536 opts->x_param_values,
15537 opts_set->x_param_values);
15538 /* values for loop prefetching */
15539 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, 256,
15540 opts->x_param_values,
15541 opts_set->x_param_values);
15542 maybe_set_param_value (PARAM_L1_CACHE_SIZE, 128,
15543 opts->x_param_values,
15544 opts_set->x_param_values);
15545 /* s390 has more than 2 levels and the size is much larger. Since
15546 we are always running virtualized assume that we only get a small
15547 part of the caches above l1. */
15548 maybe_set_param_value (PARAM_L2_CACHE_SIZE, 1500,
15549 opts->x_param_values,
15550 opts_set->x_param_values);
15551 maybe_set_param_value (PARAM_PREFETCH_MIN_INSN_TO_MEM_RATIO, 2,
15552 opts->x_param_values,
15553 opts_set->x_param_values);
15554 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 6,
15555 opts->x_param_values,
15556 opts_set->x_param_values);
15557
15558 /* Use the alternative scheduling-pressure algorithm by default. */
15559 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
15560 opts->x_param_values,
15561 opts_set->x_param_values);
15562
15563 maybe_set_param_value (PARAM_MIN_VECT_LOOP_BOUND, 2,
15564 opts->x_param_values,
15565 opts_set->x_param_values);
15566
15567 /* Call target specific restore function to do post-init work. At the moment,
15568 this just sets opts->x_s390_cost_pointer. */
15569 s390_function_specific_restore (opts, NULL);
15570 }
15571
15572 static void
s390_option_override(void)15573 s390_option_override (void)
15574 {
15575 unsigned int i;
15576 cl_deferred_option *opt;
15577 vec<cl_deferred_option> *v =
15578 (vec<cl_deferred_option> *) s390_deferred_options;
15579
15580 if (v)
15581 FOR_EACH_VEC_ELT (*v, i, opt)
15582 {
15583 switch (opt->opt_index)
15584 {
15585 case OPT_mhotpatch_:
15586 {
15587 int val1;
15588 int val2;
15589 char *s = strtok (ASTRDUP (opt->arg), ",");
15590 char *t = strtok (NULL, "\0");
15591
15592 if (t != NULL)
15593 {
15594 val1 = integral_argument (s);
15595 val2 = integral_argument (t);
15596 }
15597 else
15598 {
15599 val1 = -1;
15600 val2 = -1;
15601 }
15602 if (val1 == -1 || val2 == -1)
15603 {
15604 /* argument is not a plain number */
15605 error ("arguments to %qs should be non-negative integers",
15606 "-mhotpatch=n,m");
15607 break;
15608 }
15609 else if (val1 > s390_hotpatch_hw_max
15610 || val2 > s390_hotpatch_hw_max)
15611 {
15612 error ("argument to %qs is too large (max. %d)",
15613 "-mhotpatch=n,m", s390_hotpatch_hw_max);
15614 break;
15615 }
15616 s390_hotpatch_hw_before_label = val1;
15617 s390_hotpatch_hw_after_label = val2;
15618 break;
15619 }
15620 default:
15621 gcc_unreachable ();
15622 }
15623 }
15624
15625 /* Set up function hooks. */
15626 init_machine_status = s390_init_machine_status;
15627
15628 s390_option_override_internal (true, &global_options, &global_options_set);
15629
15630 /* Save the initial options in case the user does function specific
15631 options. */
15632 target_option_default_node = build_target_option_node (&global_options);
15633 target_option_current_node = target_option_default_node;
15634
15635 /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
15636 requires the arch flags to be evaluated already. Since prefetching
15637 is beneficial on s390, we enable it if available. */
15638 if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3)
15639 flag_prefetch_loop_arrays = 1;
15640
15641 if (!s390_pic_data_is_text_relative && !flag_pic)
15642 error ("-mno-pic-data-is-text-relative cannot be used without -fpic/-fPIC");
15643
15644 if (TARGET_TPF)
15645 {
15646 /* Don't emit DWARF3/4 unless specifically selected. The TPF
15647 debuggers do not yet support DWARF 3/4. */
15648 if (!global_options_set.x_dwarf_strict)
15649 dwarf_strict = 1;
15650 if (!global_options_set.x_dwarf_version)
15651 dwarf_version = 2;
15652 }
15653
15654 /* Register a target-specific optimization-and-lowering pass
15655 to run immediately before prologue and epilogue generation.
15656
15657 Registering the pass must be done at start up. It's
15658 convenient to do it here. */
15659 opt_pass *new_pass = new pass_s390_early_mach (g);
15660 struct register_pass_info insert_pass_s390_early_mach =
15661 {
15662 new_pass, /* pass */
15663 "pro_and_epilogue", /* reference_pass_name */
15664 1, /* ref_pass_instance_number */
15665 PASS_POS_INSERT_BEFORE /* po_op */
15666 };
15667 register_pass (&insert_pass_s390_early_mach);
15668 }
15669
15670 #if S390_USE_TARGET_ATTRIBUTE
15671 /* Inner function to process the attribute((target(...))), take an argument and
15672 set the current options from the argument. If we have a list, recursively go
15673 over the list. */
15674
15675 static bool
s390_valid_target_attribute_inner_p(tree args,struct gcc_options * opts,struct gcc_options * new_opts_set,bool force_pragma)15676 s390_valid_target_attribute_inner_p (tree args,
15677 struct gcc_options *opts,
15678 struct gcc_options *new_opts_set,
15679 bool force_pragma)
15680 {
15681 char *next_optstr;
15682 bool ret = true;
15683
15684 #define S390_ATTRIB(S,O,A) { S, sizeof (S)-1, O, A, 0 }
15685 #define S390_PRAGMA(S,O,A) { S, sizeof (S)-1, O, A, 1 }
15686 static const struct
15687 {
15688 const char *string;
15689 size_t len;
15690 int opt;
15691 int has_arg;
15692 int only_as_pragma;
15693 } attrs[] = {
15694 /* enum options */
15695 S390_ATTRIB ("arch=", OPT_march_, 1),
15696 S390_ATTRIB ("tune=", OPT_mtune_, 1),
15697 /* uinteger options */
15698 S390_ATTRIB ("stack-guard=", OPT_mstack_guard_, 1),
15699 S390_ATTRIB ("stack-size=", OPT_mstack_size_, 1),
15700 S390_ATTRIB ("branch-cost=", OPT_mbranch_cost_, 1),
15701 S390_ATTRIB ("warn-framesize=", OPT_mwarn_framesize_, 1),
15702 /* flag options */
15703 S390_ATTRIB ("backchain", OPT_mbackchain, 0),
15704 S390_ATTRIB ("hard-dfp", OPT_mhard_dfp, 0),
15705 S390_ATTRIB ("hard-float", OPT_mhard_float, 0),
15706 S390_ATTRIB ("htm", OPT_mhtm, 0),
15707 S390_ATTRIB ("vx", OPT_mvx, 0),
15708 S390_ATTRIB ("packed-stack", OPT_mpacked_stack, 0),
15709 S390_ATTRIB ("small-exec", OPT_msmall_exec, 0),
15710 S390_ATTRIB ("soft-float", OPT_msoft_float, 0),
15711 S390_ATTRIB ("mvcle", OPT_mmvcle, 0),
15712 S390_PRAGMA ("zvector", OPT_mzvector, 0),
15713 /* boolean options */
15714 S390_ATTRIB ("warn-dynamicstack", OPT_mwarn_dynamicstack, 0),
15715 };
15716 #undef S390_ATTRIB
15717 #undef S390_PRAGMA
15718
15719 /* If this is a list, recurse to get the options. */
15720 if (TREE_CODE (args) == TREE_LIST)
15721 {
15722 bool ret = true;
15723 int num_pragma_values;
15724 int i;
15725
15726 /* Note: attribs.c:decl_attributes prepends the values from
15727 current_target_pragma to the list of target attributes. To determine
15728 whether we're looking at a value of the attribute or the pragma we
15729 assume that the first [list_length (current_target_pragma)] values in
15730 the list are the values from the pragma. */
15731 num_pragma_values = (!force_pragma && current_target_pragma != NULL)
15732 ? list_length (current_target_pragma) : 0;
15733 for (i = 0; args; args = TREE_CHAIN (args), i++)
15734 {
15735 bool is_pragma;
15736
15737 is_pragma = (force_pragma || i < num_pragma_values);
15738 if (TREE_VALUE (args)
15739 && !s390_valid_target_attribute_inner_p (TREE_VALUE (args),
15740 opts, new_opts_set,
15741 is_pragma))
15742 {
15743 ret = false;
15744 }
15745 }
15746 return ret;
15747 }
15748
15749 else if (TREE_CODE (args) != STRING_CST)
15750 {
15751 error ("attribute %<target%> argument not a string");
15752 return false;
15753 }
15754
15755 /* Handle multiple arguments separated by commas. */
15756 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
15757
15758 while (next_optstr && *next_optstr != '\0')
15759 {
15760 char *p = next_optstr;
15761 char *orig_p = p;
15762 char *comma = strchr (next_optstr, ',');
15763 size_t len, opt_len;
15764 int opt;
15765 bool opt_set_p;
15766 char ch;
15767 unsigned i;
15768 int mask = 0;
15769 enum cl_var_type var_type;
15770 bool found;
15771
15772 if (comma)
15773 {
15774 *comma = '\0';
15775 len = comma - next_optstr;
15776 next_optstr = comma + 1;
15777 }
15778 else
15779 {
15780 len = strlen (p);
15781 next_optstr = NULL;
15782 }
15783
15784 /* Recognize no-xxx. */
15785 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
15786 {
15787 opt_set_p = false;
15788 p += 3;
15789 len -= 3;
15790 }
15791 else
15792 opt_set_p = true;
15793
15794 /* Find the option. */
15795 ch = *p;
15796 found = false;
15797 for (i = 0; i < ARRAY_SIZE (attrs); i++)
15798 {
15799 opt_len = attrs[i].len;
15800 if (ch == attrs[i].string[0]
15801 && ((attrs[i].has_arg) ? len > opt_len : len == opt_len)
15802 && memcmp (p, attrs[i].string, opt_len) == 0)
15803 {
15804 opt = attrs[i].opt;
15805 if (!opt_set_p && cl_options[opt].cl_reject_negative)
15806 continue;
15807 mask = cl_options[opt].var_value;
15808 var_type = cl_options[opt].var_type;
15809 found = true;
15810 break;
15811 }
15812 }
15813
15814 /* Process the option. */
15815 if (!found)
15816 {
15817 error ("attribute(target(\"%s\")) is unknown", orig_p);
15818 return false;
15819 }
15820 else if (attrs[i].only_as_pragma && !force_pragma)
15821 {
15822 /* Value is not allowed for the target attribute. */
15823 error ("value %qs is not supported by attribute %<target%>",
15824 attrs[i].string);
15825 return false;
15826 }
15827
15828 else if (var_type == CLVC_BIT_SET || var_type == CLVC_BIT_CLEAR)
15829 {
15830 if (var_type == CLVC_BIT_CLEAR)
15831 opt_set_p = !opt_set_p;
15832
15833 if (opt_set_p)
15834 opts->x_target_flags |= mask;
15835 else
15836 opts->x_target_flags &= ~mask;
15837 new_opts_set->x_target_flags |= mask;
15838 }
15839
15840 else if (cl_options[opt].var_type == CLVC_BOOLEAN)
15841 {
15842 int value;
15843
15844 if (cl_options[opt].cl_uinteger)
15845 {
15846 /* Unsigned integer argument. Code based on the function
15847 decode_cmdline_option () in opts-common.c. */
15848 value = integral_argument (p + opt_len);
15849 }
15850 else
15851 value = (opt_set_p) ? 1 : 0;
15852
15853 if (value != -1)
15854 {
15855 struct cl_decoded_option decoded;
15856
15857 /* Value range check; only implemented for numeric and boolean
15858 options at the moment. */
15859 generate_option (opt, NULL, value, CL_TARGET, &decoded);
15860 s390_handle_option (opts, new_opts_set, &decoded, input_location);
15861 set_option (opts, new_opts_set, opt, value,
15862 p + opt_len, DK_UNSPECIFIED, input_location,
15863 global_dc);
15864 }
15865 else
15866 {
15867 error ("attribute(target(\"%s\")) is unknown", orig_p);
15868 ret = false;
15869 }
15870 }
15871
15872 else if (cl_options[opt].var_type == CLVC_ENUM)
15873 {
15874 bool arg_ok;
15875 int value;
15876
15877 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
15878 if (arg_ok)
15879 set_option (opts, new_opts_set, opt, value,
15880 p + opt_len, DK_UNSPECIFIED, input_location,
15881 global_dc);
15882 else
15883 {
15884 error ("attribute(target(\"%s\")) is unknown", orig_p);
15885 ret = false;
15886 }
15887 }
15888
15889 else
15890 gcc_unreachable ();
15891 }
15892 return ret;
15893 }
15894
15895 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
15896
15897 tree
s390_valid_target_attribute_tree(tree args,struct gcc_options * opts,const struct gcc_options * opts_set,bool force_pragma)15898 s390_valid_target_attribute_tree (tree args,
15899 struct gcc_options *opts,
15900 const struct gcc_options *opts_set,
15901 bool force_pragma)
15902 {
15903 tree t = NULL_TREE;
15904 struct gcc_options new_opts_set;
15905
15906 memset (&new_opts_set, 0, sizeof (new_opts_set));
15907
15908 /* Process each of the options on the chain. */
15909 if (! s390_valid_target_attribute_inner_p (args, opts, &new_opts_set,
15910 force_pragma))
15911 return error_mark_node;
15912
15913 /* If some option was set (even if it has not changed), rerun
15914 s390_option_override_internal, and then save the options away. */
15915 if (new_opts_set.x_target_flags
15916 || new_opts_set.x_s390_arch
15917 || new_opts_set.x_s390_tune
15918 || new_opts_set.x_s390_stack_guard
15919 || new_opts_set.x_s390_stack_size
15920 || new_opts_set.x_s390_branch_cost
15921 || new_opts_set.x_s390_warn_framesize
15922 || new_opts_set.x_s390_warn_dynamicstack_p)
15923 {
15924 const unsigned char *src = (const unsigned char *)opts_set;
15925 unsigned char *dest = (unsigned char *)&new_opts_set;
15926 unsigned int i;
15927
15928 /* Merge the original option flags into the new ones. */
15929 for (i = 0; i < sizeof(*opts_set); i++)
15930 dest[i] |= src[i];
15931
15932 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
15933 s390_option_override_internal (false, opts, &new_opts_set);
15934 /* Save the current options unless we are validating options for
15935 #pragma. */
15936 t = build_target_option_node (opts);
15937 }
15938 return t;
15939 }
15940
15941 /* Hook to validate attribute((target("string"))). */
15942
15943 static bool
s390_valid_target_attribute_p(tree fndecl,tree ARG_UNUSED (name),tree args,int ARG_UNUSED (flags))15944 s390_valid_target_attribute_p (tree fndecl,
15945 tree ARG_UNUSED (name),
15946 tree args,
15947 int ARG_UNUSED (flags))
15948 {
15949 struct gcc_options func_options;
15950 tree new_target, new_optimize;
15951 bool ret = true;
15952
15953 /* attribute((target("default"))) does nothing, beyond
15954 affecting multi-versioning. */
15955 if (TREE_VALUE (args)
15956 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
15957 && TREE_CHAIN (args) == NULL_TREE
15958 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
15959 return true;
15960
15961 tree old_optimize = build_optimization_node (&global_options);
15962
15963 /* Get the optimization options of the current function. */
15964 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
15965
15966 if (!func_optimize)
15967 func_optimize = old_optimize;
15968
15969 /* Init func_options. */
15970 memset (&func_options, 0, sizeof (func_options));
15971 init_options_struct (&func_options, NULL);
15972 lang_hooks.init_options_struct (&func_options);
15973
15974 cl_optimization_restore (&func_options, TREE_OPTIMIZATION (func_optimize));
15975
15976 /* Initialize func_options to the default before its target options can
15977 be set. */
15978 cl_target_option_restore (&func_options,
15979 TREE_TARGET_OPTION (target_option_default_node));
15980
15981 new_target = s390_valid_target_attribute_tree (args, &func_options,
15982 &global_options_set,
15983 (args ==
15984 current_target_pragma));
15985 new_optimize = build_optimization_node (&func_options);
15986 if (new_target == error_mark_node)
15987 ret = false;
15988 else if (fndecl && new_target)
15989 {
15990 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
15991 if (old_optimize != new_optimize)
15992 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
15993 }
15994 return ret;
15995 }
15996
15997 /* Hook to determine if one function can safely inline another. */
15998
15999 static bool
s390_can_inline_p(tree caller,tree callee)16000 s390_can_inline_p (tree caller, tree callee)
16001 {
16002 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
16003 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
16004
16005 if (!callee_tree)
16006 callee_tree = target_option_default_node;
16007 if (!caller_tree)
16008 caller_tree = target_option_default_node;
16009 if (callee_tree == caller_tree)
16010 return true;
16011
16012 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
16013 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
16014 bool ret = true;
16015
16016 if ((caller_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP))
16017 != (callee_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP)))
16018 ret = false;
16019
16020 /* Don't inline functions to be compiled for a more recent arch into a
16021 function for an older arch. */
16022 else if (caller_opts->x_s390_arch < callee_opts->x_s390_arch)
16023 ret = false;
16024
16025 /* Inlining a hard float function into a soft float function is only
16026 allowed if the hard float function doesn't actually make use of
16027 floating point.
16028
16029 We are called from FEs for multi-versioning call optimization, so
16030 beware of ipa_fn_summaries not available. */
16031 else if (((TARGET_SOFT_FLOAT_P (caller_opts->x_target_flags)
16032 && !TARGET_SOFT_FLOAT_P (callee_opts->x_target_flags))
16033 || (!TARGET_HARD_DFP_P (caller_opts->x_target_flags)
16034 && TARGET_HARD_DFP_P (callee_opts->x_target_flags)))
16035 && (! ipa_fn_summaries
16036 || ipa_fn_summaries->get
16037 (cgraph_node::get (callee))->fp_expressions))
16038 ret = false;
16039
16040 return ret;
16041 }
16042
16043 /* Set VAL to correct enum value according to the indirect-branch or
16044 function-return attribute in ATTR. */
16045
16046 static inline void
s390_indirect_branch_attrvalue(tree attr,enum indirect_branch * val)16047 s390_indirect_branch_attrvalue (tree attr, enum indirect_branch *val)
16048 {
16049 const char *str = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
16050 if (strcmp (str, "keep") == 0)
16051 *val = indirect_branch_keep;
16052 else if (strcmp (str, "thunk") == 0)
16053 *val = indirect_branch_thunk;
16054 else if (strcmp (str, "thunk-inline") == 0)
16055 *val = indirect_branch_thunk_inline;
16056 else if (strcmp (str, "thunk-extern") == 0)
16057 *val = indirect_branch_thunk_extern;
16058 }
16059
16060 /* Memorize the setting for -mindirect-branch* and -mfunction-return*
16061 from either the cmdline or the function attributes in
16062 cfun->machine. */
16063
16064 static void
s390_indirect_branch_settings(tree fndecl)16065 s390_indirect_branch_settings (tree fndecl)
16066 {
16067 tree attr;
16068
16069 if (!fndecl)
16070 return;
16071
16072 /* Initialize with the cmdline options and let the attributes
16073 override it. */
16074 cfun->machine->indirect_branch_jump = s390_indirect_branch_jump;
16075 cfun->machine->indirect_branch_call = s390_indirect_branch_call;
16076
16077 cfun->machine->function_return_reg = s390_function_return_reg;
16078 cfun->machine->function_return_mem = s390_function_return_mem;
16079
16080 if ((attr = lookup_attribute ("indirect_branch",
16081 DECL_ATTRIBUTES (fndecl))))
16082 {
16083 s390_indirect_branch_attrvalue (attr,
16084 &cfun->machine->indirect_branch_jump);
16085 s390_indirect_branch_attrvalue (attr,
16086 &cfun->machine->indirect_branch_call);
16087 }
16088
16089 if ((attr = lookup_attribute ("indirect_branch_jump",
16090 DECL_ATTRIBUTES (fndecl))))
16091 s390_indirect_branch_attrvalue (attr, &cfun->machine->indirect_branch_jump);
16092
16093 if ((attr = lookup_attribute ("indirect_branch_call",
16094 DECL_ATTRIBUTES (fndecl))))
16095 s390_indirect_branch_attrvalue (attr, &cfun->machine->indirect_branch_call);
16096
16097 if ((attr = lookup_attribute ("function_return",
16098 DECL_ATTRIBUTES (fndecl))))
16099 {
16100 s390_indirect_branch_attrvalue (attr,
16101 &cfun->machine->function_return_reg);
16102 s390_indirect_branch_attrvalue (attr,
16103 &cfun->machine->function_return_mem);
16104 }
16105
16106 if ((attr = lookup_attribute ("function_return_reg",
16107 DECL_ATTRIBUTES (fndecl))))
16108 s390_indirect_branch_attrvalue (attr, &cfun->machine->function_return_reg);
16109
16110 if ((attr = lookup_attribute ("function_return_mem",
16111 DECL_ATTRIBUTES (fndecl))))
16112 s390_indirect_branch_attrvalue (attr, &cfun->machine->function_return_mem);
16113 }
16114
16115 /* Restore targets globals from NEW_TREE and invalidate s390_previous_fndecl
16116 cache. */
16117
16118 void
s390_activate_target_options(tree new_tree)16119 s390_activate_target_options (tree new_tree)
16120 {
16121 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
16122 if (TREE_TARGET_GLOBALS (new_tree))
16123 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
16124 else if (new_tree == target_option_default_node)
16125 restore_target_globals (&default_target_globals);
16126 else
16127 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
16128 s390_previous_fndecl = NULL_TREE;
16129 }
16130
16131 /* Establish appropriate back-end context for processing the function
16132 FNDECL. The argument might be NULL to indicate processing at top
16133 level, outside of any function scope. */
16134 static void
s390_set_current_function(tree fndecl)16135 s390_set_current_function (tree fndecl)
16136 {
16137 /* Only change the context if the function changes. This hook is called
16138 several times in the course of compiling a function, and we don't want to
16139 slow things down too much or call target_reinit when it isn't safe. */
16140 if (fndecl == s390_previous_fndecl)
16141 {
16142 s390_indirect_branch_settings (fndecl);
16143 return;
16144 }
16145
16146 tree old_tree;
16147 if (s390_previous_fndecl == NULL_TREE)
16148 old_tree = target_option_current_node;
16149 else if (DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl))
16150 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl);
16151 else
16152 old_tree = target_option_default_node;
16153
16154 if (fndecl == NULL_TREE)
16155 {
16156 if (old_tree != target_option_current_node)
16157 s390_activate_target_options (target_option_current_node);
16158 return;
16159 }
16160
16161 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
16162 if (new_tree == NULL_TREE)
16163 new_tree = target_option_default_node;
16164
16165 if (old_tree != new_tree)
16166 s390_activate_target_options (new_tree);
16167 s390_previous_fndecl = fndecl;
16168
16169 s390_indirect_branch_settings (fndecl);
16170 }
16171 #endif
16172
16173 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
16174
16175 static bool
s390_use_by_pieces_infrastructure_p(unsigned HOST_WIDE_INT size,unsigned int align ATTRIBUTE_UNUSED,enum by_pieces_operation op ATTRIBUTE_UNUSED,bool speed_p ATTRIBUTE_UNUSED)16176 s390_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
16177 unsigned int align ATTRIBUTE_UNUSED,
16178 enum by_pieces_operation op ATTRIBUTE_UNUSED,
16179 bool speed_p ATTRIBUTE_UNUSED)
16180 {
16181 return (size == 1 || size == 2
16182 || size == 4 || (TARGET_ZARCH && size == 8));
16183 }
16184
16185 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
16186
16187 static void
s390_atomic_assign_expand_fenv(tree * hold,tree * clear,tree * update)16188 s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
16189 {
16190 tree sfpc = s390_builtin_decls[S390_BUILTIN_s390_sfpc];
16191 tree efpc = s390_builtin_decls[S390_BUILTIN_s390_efpc];
16192 tree call_efpc = build_call_expr (efpc, 0);
16193 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
16194
16195 #define FPC_EXCEPTION_MASK HOST_WIDE_INT_UC (0xf8000000)
16196 #define FPC_FLAGS_MASK HOST_WIDE_INT_UC (0x00f80000)
16197 #define FPC_DXC_MASK HOST_WIDE_INT_UC (0x0000ff00)
16198 #define FPC_EXCEPTION_MASK_SHIFT HOST_WIDE_INT_UC (24)
16199 #define FPC_FLAGS_SHIFT HOST_WIDE_INT_UC (16)
16200 #define FPC_DXC_SHIFT HOST_WIDE_INT_UC (8)
16201
16202 /* Generates the equivalent of feholdexcept (&fenv_var)
16203
16204 fenv_var = __builtin_s390_efpc ();
16205 __builtin_s390_sfpc (fenv_var & mask) */
16206 tree old_fpc = build2 (MODIFY_EXPR, unsigned_type_node, fenv_var, call_efpc);
16207 tree new_fpc =
16208 build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
16209 build_int_cst (unsigned_type_node,
16210 ~(FPC_DXC_MASK | FPC_FLAGS_MASK |
16211 FPC_EXCEPTION_MASK)));
16212 tree set_new_fpc = build_call_expr (sfpc, 1, new_fpc);
16213 *hold = build2 (COMPOUND_EXPR, void_type_node, old_fpc, set_new_fpc);
16214
16215 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT)
16216
16217 __builtin_s390_sfpc (__builtin_s390_efpc () & mask) */
16218 new_fpc = build2 (BIT_AND_EXPR, unsigned_type_node, call_efpc,
16219 build_int_cst (unsigned_type_node,
16220 ~(FPC_DXC_MASK | FPC_FLAGS_MASK)));
16221 *clear = build_call_expr (sfpc, 1, new_fpc);
16222
16223 /* Generates the equivalent of feupdateenv (fenv_var)
16224
16225 old_fpc = __builtin_s390_efpc ();
16226 __builtin_s390_sfpc (fenv_var);
16227 __atomic_feraiseexcept ((old_fpc & FPC_FLAGS_MASK) >> FPC_FLAGS_SHIFT); */
16228
16229 old_fpc = create_tmp_var_raw (unsigned_type_node);
16230 tree store_old_fpc = build2 (MODIFY_EXPR, void_type_node,
16231 old_fpc, call_efpc);
16232
16233 set_new_fpc = build_call_expr (sfpc, 1, fenv_var);
16234
16235 tree raise_old_except = build2 (BIT_AND_EXPR, unsigned_type_node, old_fpc,
16236 build_int_cst (unsigned_type_node,
16237 FPC_FLAGS_MASK));
16238 raise_old_except = build2 (RSHIFT_EXPR, unsigned_type_node, raise_old_except,
16239 build_int_cst (unsigned_type_node,
16240 FPC_FLAGS_SHIFT));
16241 tree atomic_feraiseexcept
16242 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
16243 raise_old_except = build_call_expr (atomic_feraiseexcept,
16244 1, raise_old_except);
16245
16246 *update = build2 (COMPOUND_EXPR, void_type_node,
16247 build2 (COMPOUND_EXPR, void_type_node,
16248 store_old_fpc, set_new_fpc),
16249 raise_old_except);
16250
16251 #undef FPC_EXCEPTION_MASK
16252 #undef FPC_FLAGS_MASK
16253 #undef FPC_DXC_MASK
16254 #undef FPC_EXCEPTION_MASK_SHIFT
16255 #undef FPC_FLAGS_SHIFT
16256 #undef FPC_DXC_SHIFT
16257 }
16258
16259 /* Return the vector mode to be used for inner mode MODE when doing
16260 vectorization. */
16261 static machine_mode
s390_preferred_simd_mode(scalar_mode mode)16262 s390_preferred_simd_mode (scalar_mode mode)
16263 {
16264 if (TARGET_VXE)
16265 switch (mode)
16266 {
16267 case E_SFmode:
16268 return V4SFmode;
16269 default:;
16270 }
16271
16272 if (TARGET_VX)
16273 switch (mode)
16274 {
16275 case E_DFmode:
16276 return V2DFmode;
16277 case E_DImode:
16278 return V2DImode;
16279 case E_SImode:
16280 return V4SImode;
16281 case E_HImode:
16282 return V8HImode;
16283 case E_QImode:
16284 return V16QImode;
16285 default:;
16286 }
16287 return word_mode;
16288 }
16289
16290 /* Our hardware does not require vectors to be strictly aligned. */
16291 static bool
s390_support_vector_misalignment(machine_mode mode ATTRIBUTE_UNUSED,const_tree type ATTRIBUTE_UNUSED,int misalignment ATTRIBUTE_UNUSED,bool is_packed ATTRIBUTE_UNUSED)16292 s390_support_vector_misalignment (machine_mode mode ATTRIBUTE_UNUSED,
16293 const_tree type ATTRIBUTE_UNUSED,
16294 int misalignment ATTRIBUTE_UNUSED,
16295 bool is_packed ATTRIBUTE_UNUSED)
16296 {
16297 if (TARGET_VX)
16298 return true;
16299
16300 return default_builtin_support_vector_misalignment (mode, type, misalignment,
16301 is_packed);
16302 }
16303
16304 /* The vector ABI requires vector types to be aligned on an 8 byte
16305 boundary (our stack alignment). However, we allow this to be
16306 overriden by the user, while this definitely breaks the ABI. */
16307 static HOST_WIDE_INT
s390_vector_alignment(const_tree type)16308 s390_vector_alignment (const_tree type)
16309 {
16310 if (!TARGET_VX_ABI)
16311 return default_vector_alignment (type);
16312
16313 if (TYPE_USER_ALIGN (type))
16314 return TYPE_ALIGN (type);
16315
16316 return MIN (64, tree_to_shwi (TYPE_SIZE (type)));
16317 }
16318
16319 /* Implement TARGET_CONSTANT_ALIGNMENT. Alignment on even addresses for
16320 LARL instruction. */
16321
16322 static HOST_WIDE_INT
s390_constant_alignment(const_tree,HOST_WIDE_INT align)16323 s390_constant_alignment (const_tree, HOST_WIDE_INT align)
16324 {
16325 return MAX (align, 16);
16326 }
16327
16328 #ifdef HAVE_AS_MACHINE_MACHINEMODE
16329 /* Implement TARGET_ASM_FILE_START. */
16330 static void
s390_asm_file_start(void)16331 s390_asm_file_start (void)
16332 {
16333 default_file_start ();
16334 s390_asm_output_machine_for_arch (asm_out_file);
16335 }
16336 #endif
16337
16338 /* Implement TARGET_ASM_FILE_END. */
16339 static void
s390_asm_file_end(void)16340 s390_asm_file_end (void)
16341 {
16342 #ifdef HAVE_AS_GNU_ATTRIBUTE
16343 varpool_node *vnode;
16344 cgraph_node *cnode;
16345
16346 FOR_EACH_VARIABLE (vnode)
16347 if (TREE_PUBLIC (vnode->decl))
16348 s390_check_type_for_vector_abi (TREE_TYPE (vnode->decl), false, false);
16349
16350 FOR_EACH_FUNCTION (cnode)
16351 if (TREE_PUBLIC (cnode->decl))
16352 s390_check_type_for_vector_abi (TREE_TYPE (cnode->decl), false, false);
16353
16354
16355 if (s390_vector_abi != 0)
16356 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
16357 s390_vector_abi);
16358 #endif
16359 file_end_indicate_exec_stack ();
16360
16361 if (flag_split_stack)
16362 file_end_indicate_split_stack ();
16363 }
16364
16365 /* Return true if TYPE is a vector bool type. */
16366 static inline bool
s390_vector_bool_type_p(const_tree type)16367 s390_vector_bool_type_p (const_tree type)
16368 {
16369 return TYPE_VECTOR_OPAQUE (type);
16370 }
16371
16372 /* Return the diagnostic message string if the binary operation OP is
16373 not permitted on TYPE1 and TYPE2, NULL otherwise. */
16374 static const char*
s390_invalid_binary_op(int op ATTRIBUTE_UNUSED,const_tree type1,const_tree type2)16375 s390_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
16376 {
16377 bool bool1_p, bool2_p;
16378 bool plusminus_p;
16379 bool muldiv_p;
16380 bool compare_p;
16381 machine_mode mode1, mode2;
16382
16383 if (!TARGET_ZVECTOR)
16384 return NULL;
16385
16386 if (!VECTOR_TYPE_P (type1) || !VECTOR_TYPE_P (type2))
16387 return NULL;
16388
16389 bool1_p = s390_vector_bool_type_p (type1);
16390 bool2_p = s390_vector_bool_type_p (type2);
16391
16392 /* Mixing signed and unsigned types is forbidden for all
16393 operators. */
16394 if (!bool1_p && !bool2_p
16395 && TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
16396 return N_("types differ in signedness");
16397
16398 plusminus_p = (op == PLUS_EXPR || op == MINUS_EXPR);
16399 muldiv_p = (op == MULT_EXPR || op == RDIV_EXPR || op == TRUNC_DIV_EXPR
16400 || op == CEIL_DIV_EXPR || op == FLOOR_DIV_EXPR
16401 || op == ROUND_DIV_EXPR);
16402 compare_p = (op == LT_EXPR || op == LE_EXPR || op == GT_EXPR || op == GE_EXPR
16403 || op == EQ_EXPR || op == NE_EXPR);
16404
16405 if (bool1_p && bool2_p && (plusminus_p || muldiv_p))
16406 return N_("binary operator does not support two vector bool operands");
16407
16408 if (bool1_p != bool2_p && (muldiv_p || compare_p))
16409 return N_("binary operator does not support vector bool operand");
16410
16411 mode1 = TYPE_MODE (type1);
16412 mode2 = TYPE_MODE (type2);
16413
16414 if (bool1_p != bool2_p && plusminus_p
16415 && (GET_MODE_CLASS (mode1) == MODE_VECTOR_FLOAT
16416 || GET_MODE_CLASS (mode2) == MODE_VECTOR_FLOAT))
16417 return N_("binary operator does not support mixing vector "
16418 "bool with floating point vector operands");
16419
16420 return NULL;
16421 }
16422
16423 /* Implement TARGET_C_EXCESS_PRECISION.
16424
16425 FIXME: For historical reasons, float_t and double_t are typedef'ed to
16426 double on s390, causing operations on float_t to operate in a higher
16427 precision than is necessary. However, it is not the case that SFmode
16428 operations have implicit excess precision, and we generate more optimal
16429 code if we let the compiler know no implicit extra precision is added.
16430
16431 That means when we are compiling with -fexcess-precision=fast, the value
16432 we set for FLT_EVAL_METHOD will be out of line with the actual precision of
16433 float_t (though they would be correct for -fexcess-precision=standard).
16434
16435 A complete fix would modify glibc to remove the unnecessary typedef
16436 of float_t to double. */
16437
16438 static enum flt_eval_method
s390_excess_precision(enum excess_precision_type type)16439 s390_excess_precision (enum excess_precision_type type)
16440 {
16441 switch (type)
16442 {
16443 case EXCESS_PRECISION_TYPE_IMPLICIT:
16444 case EXCESS_PRECISION_TYPE_FAST:
16445 /* The fastest type to promote to will always be the native type,
16446 whether that occurs with implicit excess precision or
16447 otherwise. */
16448 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
16449 case EXCESS_PRECISION_TYPE_STANDARD:
16450 /* Otherwise, when we are in a standards compliant mode, to
16451 ensure consistency with the implementation in glibc, report that
16452 float is evaluated to the range and precision of double. */
16453 return FLT_EVAL_METHOD_PROMOTE_TO_DOUBLE;
16454 default:
16455 gcc_unreachable ();
16456 }
16457 return FLT_EVAL_METHOD_UNPREDICTABLE;
16458 }
16459
16460 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
16461
16462 static unsigned HOST_WIDE_INT
s390_asan_shadow_offset(void)16463 s390_asan_shadow_offset (void)
16464 {
16465 return TARGET_64BIT ? HOST_WIDE_INT_1U << 52 : HOST_WIDE_INT_UC (0x20000000);
16466 }
16467
16468 #ifdef HAVE_GAS_HIDDEN
16469 # define USE_HIDDEN_LINKONCE 1
16470 #else
16471 # define USE_HIDDEN_LINKONCE 0
16472 #endif
16473
16474 /* Output an indirect branch trampoline for target register REGNO. */
16475
16476 static void
s390_output_indirect_thunk_function(unsigned int regno,bool z10_p)16477 s390_output_indirect_thunk_function (unsigned int regno, bool z10_p)
16478 {
16479 tree decl;
16480 char thunk_label[32];
16481 int i;
16482
16483 if (z10_p)
16484 sprintf (thunk_label, TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL, regno);
16485 else
16486 sprintf (thunk_label, TARGET_INDIRECT_BRANCH_THUNK_NAME_EX,
16487 INDIRECT_BRANCH_THUNK_REGNUM, regno);
16488
16489 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
16490 get_identifier (thunk_label),
16491 build_function_type_list (void_type_node, NULL_TREE));
16492 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
16493 NULL_TREE, void_type_node);
16494 TREE_PUBLIC (decl) = 1;
16495 TREE_STATIC (decl) = 1;
16496 DECL_IGNORED_P (decl) = 1;
16497
16498 if (USE_HIDDEN_LINKONCE)
16499 {
16500 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
16501
16502 targetm.asm_out.unique_section (decl, 0);
16503 switch_to_section (get_named_section (decl, NULL, 0));
16504
16505 targetm.asm_out.globalize_label (asm_out_file, thunk_label);
16506 fputs ("\t.hidden\t", asm_out_file);
16507 assemble_name (asm_out_file, thunk_label);
16508 putc ('\n', asm_out_file);
16509 ASM_DECLARE_FUNCTION_NAME (asm_out_file, thunk_label, decl);
16510 }
16511 else
16512 {
16513 switch_to_section (text_section);
16514 ASM_OUTPUT_LABEL (asm_out_file, thunk_label);
16515 }
16516
16517 DECL_INITIAL (decl) = make_node (BLOCK);
16518 current_function_decl = decl;
16519 allocate_struct_function (decl, false);
16520 init_function_start (decl);
16521 cfun->is_thunk = true;
16522 first_function_block_is_cold = false;
16523 final_start_function (emit_barrier (), asm_out_file, 1);
16524
16525 /* This makes CFI at least usable for indirect jumps.
16526
16527 Stopping in the thunk: backtrace will point to the thunk target
16528 is if it was interrupted by a signal. For a call this means that
16529 the call chain will be: caller->callee->thunk */
16530 if (flag_asynchronous_unwind_tables && flag_dwarf2_cfi_asm)
16531 {
16532 fputs ("\t.cfi_signal_frame\n", asm_out_file);
16533 fprintf (asm_out_file, "\t.cfi_return_column %d\n", regno);
16534 for (i = 0; i < FPR15_REGNUM; i++)
16535 fprintf (asm_out_file, "\t.cfi_same_value %s\n", reg_names[i]);
16536 }
16537
16538 if (z10_p)
16539 {
16540 /* exrl 0,1f */
16541
16542 /* We generate a thunk for z10 compiled code although z10 is
16543 currently not enabled. Tell the assembler to accept the
16544 instruction. */
16545 if (!TARGET_CPU_Z10)
16546 {
16547 fputs ("\t.machine push\n", asm_out_file);
16548 fputs ("\t.machine z10\n", asm_out_file);
16549 }
16550 /* We use exrl even if -mzarch hasn't been specified on the
16551 command line so we have to tell the assembler to accept
16552 it. */
16553 if (!TARGET_ZARCH)
16554 fputs ("\t.machinemode zarch\n", asm_out_file);
16555
16556 fputs ("\texrl\t0,1f\n", asm_out_file);
16557
16558 if (!TARGET_ZARCH)
16559 fputs ("\t.machinemode esa\n", asm_out_file);
16560
16561 if (!TARGET_CPU_Z10)
16562 fputs ("\t.machine pop\n", asm_out_file);
16563 }
16564 else if (TARGET_CPU_ZARCH)
16565 {
16566 /* larl %r1,1f */
16567 fprintf (asm_out_file, "\tlarl\t%%r%d,1f\n",
16568 INDIRECT_BRANCH_THUNK_REGNUM);
16569
16570 /* ex 0,0(%r1) */
16571 fprintf (asm_out_file, "\tex\t0,0(%%r%d)\n",
16572 INDIRECT_BRANCH_THUNK_REGNUM);
16573 }
16574 else
16575 gcc_unreachable ();
16576
16577 /* 0: j 0b */
16578 fputs ("0:\tj\t0b\n", asm_out_file);
16579
16580 /* 1: br <regno> */
16581 fprintf (asm_out_file, "1:\tbr\t%%r%d\n", regno);
16582
16583 final_end_function ();
16584 init_insn_lengths ();
16585 free_after_compilation (cfun);
16586 set_cfun (NULL);
16587 current_function_decl = NULL;
16588 }
16589
16590 /* Implement the asm.code_end target hook. */
16591
16592 static void
s390_code_end(void)16593 s390_code_end (void)
16594 {
16595 int i;
16596
16597 for (i = 1; i < 16; i++)
16598 {
16599 if (indirect_branch_z10thunk_mask & (1 << i))
16600 s390_output_indirect_thunk_function (i, true);
16601
16602 if (indirect_branch_prez10thunk_mask & (1 << i))
16603 s390_output_indirect_thunk_function (i, false);
16604 }
16605
16606 if (TARGET_INDIRECT_BRANCH_TABLE)
16607 {
16608 int o;
16609 int i;
16610
16611 for (o = 0; o < INDIRECT_BRANCH_NUM_OPTIONS; o++)
16612 {
16613 if (indirect_branch_table_label_no[o] == 0)
16614 continue;
16615
16616 switch_to_section (get_section (indirect_branch_table_name[o],
16617 0,
16618 NULL_TREE));
16619 for (i = 0; i < indirect_branch_table_label_no[o]; i++)
16620 {
16621 char label_start[32];
16622
16623 ASM_GENERATE_INTERNAL_LABEL (label_start,
16624 indirect_branch_table_label[o], i);
16625
16626 fputs ("\t.long\t", asm_out_file);
16627 assemble_name_raw (asm_out_file, label_start);
16628 fputs ("-.\n", asm_out_file);
16629 }
16630 switch_to_section (current_function_section ());
16631 }
16632 }
16633 }
16634
16635 /* Implement the TARGET_CASE_VALUES_THRESHOLD target hook. */
16636
16637 unsigned int
s390_case_values_threshold(void)16638 s390_case_values_threshold (void)
16639 {
16640 /* Disabling branch prediction for indirect jumps makes jump tables
16641 much more expensive. */
16642 if (TARGET_INDIRECT_BRANCH_NOBP_JUMP)
16643 return 20;
16644
16645 return default_case_values_threshold ();
16646 }
16647
16648 /* Initialize GCC target structure. */
16649
16650 #undef TARGET_ASM_ALIGNED_HI_OP
16651 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
16652 #undef TARGET_ASM_ALIGNED_DI_OP
16653 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
16654 #undef TARGET_ASM_INTEGER
16655 #define TARGET_ASM_INTEGER s390_assemble_integer
16656
16657 #undef TARGET_ASM_OPEN_PAREN
16658 #define TARGET_ASM_OPEN_PAREN ""
16659
16660 #undef TARGET_ASM_CLOSE_PAREN
16661 #define TARGET_ASM_CLOSE_PAREN ""
16662
16663 #undef TARGET_OPTION_OVERRIDE
16664 #define TARGET_OPTION_OVERRIDE s390_option_override
16665
16666 #ifdef TARGET_THREAD_SSP_OFFSET
16667 #undef TARGET_STACK_PROTECT_GUARD
16668 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
16669 #endif
16670
16671 #undef TARGET_ENCODE_SECTION_INFO
16672 #define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
16673
16674 #undef TARGET_SCALAR_MODE_SUPPORTED_P
16675 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
16676
16677 #ifdef HAVE_AS_TLS
16678 #undef TARGET_HAVE_TLS
16679 #define TARGET_HAVE_TLS true
16680 #endif
16681 #undef TARGET_CANNOT_FORCE_CONST_MEM
16682 #define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
16683
16684 #undef TARGET_DELEGITIMIZE_ADDRESS
16685 #define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
16686
16687 #undef TARGET_LEGITIMIZE_ADDRESS
16688 #define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
16689
16690 #undef TARGET_RETURN_IN_MEMORY
16691 #define TARGET_RETURN_IN_MEMORY s390_return_in_memory
16692
16693 #undef TARGET_INIT_BUILTINS
16694 #define TARGET_INIT_BUILTINS s390_init_builtins
16695 #undef TARGET_EXPAND_BUILTIN
16696 #define TARGET_EXPAND_BUILTIN s390_expand_builtin
16697 #undef TARGET_BUILTIN_DECL
16698 #define TARGET_BUILTIN_DECL s390_builtin_decl
16699
16700 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
16701 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
16702
16703 #undef TARGET_ASM_OUTPUT_MI_THUNK
16704 #define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
16705 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
16706 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
16707
16708 #undef TARGET_C_EXCESS_PRECISION
16709 #define TARGET_C_EXCESS_PRECISION s390_excess_precision
16710
16711 #undef TARGET_SCHED_ADJUST_PRIORITY
16712 #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
16713 #undef TARGET_SCHED_ISSUE_RATE
16714 #define TARGET_SCHED_ISSUE_RATE s390_issue_rate
16715 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
16716 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
16717
16718 #undef TARGET_SCHED_VARIABLE_ISSUE
16719 #define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
16720 #undef TARGET_SCHED_REORDER
16721 #define TARGET_SCHED_REORDER s390_sched_reorder
16722 #undef TARGET_SCHED_INIT
16723 #define TARGET_SCHED_INIT s390_sched_init
16724
16725 #undef TARGET_CANNOT_COPY_INSN_P
16726 #define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
16727 #undef TARGET_RTX_COSTS
16728 #define TARGET_RTX_COSTS s390_rtx_costs
16729 #undef TARGET_ADDRESS_COST
16730 #define TARGET_ADDRESS_COST s390_address_cost
16731 #undef TARGET_REGISTER_MOVE_COST
16732 #define TARGET_REGISTER_MOVE_COST s390_register_move_cost
16733 #undef TARGET_MEMORY_MOVE_COST
16734 #define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
16735 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
16736 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
16737 s390_builtin_vectorization_cost
16738
16739 #undef TARGET_MACHINE_DEPENDENT_REORG
16740 #define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
16741
16742 #undef TARGET_VALID_POINTER_MODE
16743 #define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
16744
16745 #undef TARGET_BUILD_BUILTIN_VA_LIST
16746 #define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
16747 #undef TARGET_EXPAND_BUILTIN_VA_START
16748 #define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
16749 #undef TARGET_ASAN_SHADOW_OFFSET
16750 #define TARGET_ASAN_SHADOW_OFFSET s390_asan_shadow_offset
16751 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
16752 #define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
16753
16754 #undef TARGET_PROMOTE_FUNCTION_MODE
16755 #define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
16756 #undef TARGET_PASS_BY_REFERENCE
16757 #define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
16758
16759 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
16760 #define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
16761 #undef TARGET_FUNCTION_ARG
16762 #define TARGET_FUNCTION_ARG s390_function_arg
16763 #undef TARGET_FUNCTION_ARG_ADVANCE
16764 #define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
16765 #undef TARGET_FUNCTION_ARG_PADDING
16766 #define TARGET_FUNCTION_ARG_PADDING s390_function_arg_padding
16767 #undef TARGET_FUNCTION_VALUE
16768 #define TARGET_FUNCTION_VALUE s390_function_value
16769 #undef TARGET_LIBCALL_VALUE
16770 #define TARGET_LIBCALL_VALUE s390_libcall_value
16771 #undef TARGET_STRICT_ARGUMENT_NAMING
16772 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
16773
16774 #undef TARGET_KEEP_LEAF_WHEN_PROFILED
16775 #define TARGET_KEEP_LEAF_WHEN_PROFILED s390_keep_leaf_when_profiled
16776
16777 #undef TARGET_FIXED_CONDITION_CODE_REGS
16778 #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
16779
16780 #undef TARGET_CC_MODES_COMPATIBLE
16781 #define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
16782
16783 #undef TARGET_INVALID_WITHIN_DOLOOP
16784 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
16785
16786 #ifdef HAVE_AS_TLS
16787 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
16788 #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
16789 #endif
16790
16791 #undef TARGET_DWARF_FRAME_REG_MODE
16792 #define TARGET_DWARF_FRAME_REG_MODE s390_dwarf_frame_reg_mode
16793
16794 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
16795 #undef TARGET_MANGLE_TYPE
16796 #define TARGET_MANGLE_TYPE s390_mangle_type
16797 #endif
16798
16799 #undef TARGET_SCALAR_MODE_SUPPORTED_P
16800 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
16801
16802 #undef TARGET_VECTOR_MODE_SUPPORTED_P
16803 #define TARGET_VECTOR_MODE_SUPPORTED_P s390_vector_mode_supported_p
16804
16805 #undef TARGET_PREFERRED_RELOAD_CLASS
16806 #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
16807
16808 #undef TARGET_SECONDARY_RELOAD
16809 #define TARGET_SECONDARY_RELOAD s390_secondary_reload
16810 #undef TARGET_SECONDARY_MEMORY_NEEDED
16811 #define TARGET_SECONDARY_MEMORY_NEEDED s390_secondary_memory_needed
16812 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
16813 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE s390_secondary_memory_needed_mode
16814
16815 #undef TARGET_LIBGCC_CMP_RETURN_MODE
16816 #define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
16817
16818 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
16819 #define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
16820
16821 #undef TARGET_LEGITIMATE_ADDRESS_P
16822 #define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
16823
16824 #undef TARGET_LEGITIMATE_CONSTANT_P
16825 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
16826
16827 #undef TARGET_LRA_P
16828 #define TARGET_LRA_P s390_lra_p
16829
16830 #undef TARGET_CAN_ELIMINATE
16831 #define TARGET_CAN_ELIMINATE s390_can_eliminate
16832
16833 #undef TARGET_CONDITIONAL_REGISTER_USAGE
16834 #define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
16835
16836 #undef TARGET_LOOP_UNROLL_ADJUST
16837 #define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
16838
16839 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
16840 #define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
16841 #undef TARGET_TRAMPOLINE_INIT
16842 #define TARGET_TRAMPOLINE_INIT s390_trampoline_init
16843
16844 /* PR 79421 */
16845 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
16846 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
16847
16848 #undef TARGET_UNWIND_WORD_MODE
16849 #define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
16850
16851 #undef TARGET_CANONICALIZE_COMPARISON
16852 #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
16853
16854 #undef TARGET_HARD_REGNO_SCRATCH_OK
16855 #define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok
16856
16857 #undef TARGET_HARD_REGNO_NREGS
16858 #define TARGET_HARD_REGNO_NREGS s390_hard_regno_nregs
16859 #undef TARGET_HARD_REGNO_MODE_OK
16860 #define TARGET_HARD_REGNO_MODE_OK s390_hard_regno_mode_ok
16861 #undef TARGET_MODES_TIEABLE_P
16862 #define TARGET_MODES_TIEABLE_P s390_modes_tieable_p
16863
16864 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
16865 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
16866 s390_hard_regno_call_part_clobbered
16867
16868 #undef TARGET_ATTRIBUTE_TABLE
16869 #define TARGET_ATTRIBUTE_TABLE s390_attribute_table
16870
16871 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
16872 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
16873
16874 #undef TARGET_SET_UP_BY_PROLOGUE
16875 #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
16876
16877 #undef TARGET_EXTRA_LIVE_ON_ENTRY
16878 #define TARGET_EXTRA_LIVE_ON_ENTRY s390_live_on_entry
16879
16880 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
16881 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
16882 s390_use_by_pieces_infrastructure_p
16883
16884 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
16885 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV s390_atomic_assign_expand_fenv
16886
16887 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
16888 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN s390_invalid_arg_for_unprototyped_fn
16889
16890 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
16891 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE s390_preferred_simd_mode
16892
16893 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
16894 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT s390_support_vector_misalignment
16895
16896 #undef TARGET_VECTOR_ALIGNMENT
16897 #define TARGET_VECTOR_ALIGNMENT s390_vector_alignment
16898
16899 #undef TARGET_INVALID_BINARY_OP
16900 #define TARGET_INVALID_BINARY_OP s390_invalid_binary_op
16901
16902 #ifdef HAVE_AS_MACHINE_MACHINEMODE
16903 #undef TARGET_ASM_FILE_START
16904 #define TARGET_ASM_FILE_START s390_asm_file_start
16905 #endif
16906
16907 #undef TARGET_ASM_FILE_END
16908 #define TARGET_ASM_FILE_END s390_asm_file_end
16909
16910 #if S390_USE_TARGET_ATTRIBUTE
16911 #undef TARGET_SET_CURRENT_FUNCTION
16912 #define TARGET_SET_CURRENT_FUNCTION s390_set_current_function
16913
16914 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
16915 #define TARGET_OPTION_VALID_ATTRIBUTE_P s390_valid_target_attribute_p
16916
16917 #undef TARGET_CAN_INLINE_P
16918 #define TARGET_CAN_INLINE_P s390_can_inline_p
16919 #endif
16920
16921 #undef TARGET_OPTION_RESTORE
16922 #define TARGET_OPTION_RESTORE s390_function_specific_restore
16923
16924 #undef TARGET_CAN_CHANGE_MODE_CLASS
16925 #define TARGET_CAN_CHANGE_MODE_CLASS s390_can_change_mode_class
16926
16927 #undef TARGET_CONSTANT_ALIGNMENT
16928 #define TARGET_CONSTANT_ALIGNMENT s390_constant_alignment
16929
16930 #undef TARGET_ASM_CODE_END
16931 #define TARGET_ASM_CODE_END s390_code_end
16932
16933 #undef TARGET_CASE_VALUES_THRESHOLD
16934 #define TARGET_CASE_VALUES_THRESHOLD s390_case_values_threshold
16935
16936 struct gcc_target targetm = TARGET_INITIALIZER;
16937
16938 #include "gt-s390.h"
16939