1 /* Subroutines used for code generation on IBM S/390 and zSeries
2 Copyright (C) 1999-2014 Free Software Foundation, Inc.
3 Contributed by Hartmut Penner (hpenner@de.ibm.com) and
4 Ulrich Weigand (uweigand@de.ibm.com) and
5 Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "print-tree.h"
30 #include "stringpool.h"
31 #include "stor-layout.h"
32 #include "varasm.h"
33 #include "calls.h"
34 #include "tm_p.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "insn-config.h"
38 #include "conditions.h"
39 #include "output.h"
40 #include "insn-attr.h"
41 #include "flags.h"
42 #include "except.h"
43 #include "function.h"
44 #include "recog.h"
45 #include "expr.h"
46 #include "reload.h"
47 #include "diagnostic-core.h"
48 #include "basic-block.h"
49 #include "ggc.h"
50 #include "target.h"
51 #include "target-def.h"
52 #include "debug.h"
53 #include "langhooks.h"
54 #include "optabs.h"
55 #include "pointer-set.h"
56 #include "hash-table.h"
57 #include "vec.h"
58 #include "basic-block.h"
59 #include "tree-ssa-alias.h"
60 #include "internal-fn.h"
61 #include "gimple-fold.h"
62 #include "tree-eh.h"
63 #include "gimple-expr.h"
64 #include "is-a.h"
65 #include "gimple.h"
66 #include "gimplify.h"
67 #include "df.h"
68 #include "params.h"
69 #include "cfgloop.h"
70 #include "opts.h"
71 #include "tree-pass.h"
72 #include "context.h"
73
74 /* Define the specific costs for a given cpu. */
75
76 struct processor_costs
77 {
78 /* multiplication */
79 const int m; /* cost of an M instruction. */
80 const int mghi; /* cost of an MGHI instruction. */
81 const int mh; /* cost of an MH instruction. */
82 const int mhi; /* cost of an MHI instruction. */
83 const int ml; /* cost of an ML instruction. */
84 const int mr; /* cost of an MR instruction. */
85 const int ms; /* cost of an MS instruction. */
86 const int msg; /* cost of an MSG instruction. */
87 const int msgf; /* cost of an MSGF instruction. */
88 const int msgfr; /* cost of an MSGFR instruction. */
89 const int msgr; /* cost of an MSGR instruction. */
90 const int msr; /* cost of an MSR instruction. */
91 const int mult_df; /* cost of multiplication in DFmode. */
92 const int mxbr;
93 /* square root */
94 const int sqxbr; /* cost of square root in TFmode. */
95 const int sqdbr; /* cost of square root in DFmode. */
96 const int sqebr; /* cost of square root in SFmode. */
97 /* multiply and add */
98 const int madbr; /* cost of multiply and add in DFmode. */
99 const int maebr; /* cost of multiply and add in SFmode. */
100 /* division */
101 const int dxbr;
102 const int ddbr;
103 const int debr;
104 const int dlgr;
105 const int dlr;
106 const int dr;
107 const int dsgfr;
108 const int dsgr;
109 };
110
111 const struct processor_costs *s390_cost;
112
113 static const
114 struct processor_costs z900_cost =
115 {
116 COSTS_N_INSNS (5), /* M */
117 COSTS_N_INSNS (10), /* MGHI */
118 COSTS_N_INSNS (5), /* MH */
119 COSTS_N_INSNS (4), /* MHI */
120 COSTS_N_INSNS (5), /* ML */
121 COSTS_N_INSNS (5), /* MR */
122 COSTS_N_INSNS (4), /* MS */
123 COSTS_N_INSNS (15), /* MSG */
124 COSTS_N_INSNS (7), /* MSGF */
125 COSTS_N_INSNS (7), /* MSGFR */
126 COSTS_N_INSNS (10), /* MSGR */
127 COSTS_N_INSNS (4), /* MSR */
128 COSTS_N_INSNS (7), /* multiplication in DFmode */
129 COSTS_N_INSNS (13), /* MXBR */
130 COSTS_N_INSNS (136), /* SQXBR */
131 COSTS_N_INSNS (44), /* SQDBR */
132 COSTS_N_INSNS (35), /* SQEBR */
133 COSTS_N_INSNS (18), /* MADBR */
134 COSTS_N_INSNS (13), /* MAEBR */
135 COSTS_N_INSNS (134), /* DXBR */
136 COSTS_N_INSNS (30), /* DDBR */
137 COSTS_N_INSNS (27), /* DEBR */
138 COSTS_N_INSNS (220), /* DLGR */
139 COSTS_N_INSNS (34), /* DLR */
140 COSTS_N_INSNS (34), /* DR */
141 COSTS_N_INSNS (32), /* DSGFR */
142 COSTS_N_INSNS (32), /* DSGR */
143 };
144
145 static const
146 struct processor_costs z990_cost =
147 {
148 COSTS_N_INSNS (4), /* M */
149 COSTS_N_INSNS (2), /* MGHI */
150 COSTS_N_INSNS (2), /* MH */
151 COSTS_N_INSNS (2), /* MHI */
152 COSTS_N_INSNS (4), /* ML */
153 COSTS_N_INSNS (4), /* MR */
154 COSTS_N_INSNS (5), /* MS */
155 COSTS_N_INSNS (6), /* MSG */
156 COSTS_N_INSNS (4), /* MSGF */
157 COSTS_N_INSNS (4), /* MSGFR */
158 COSTS_N_INSNS (4), /* MSGR */
159 COSTS_N_INSNS (4), /* MSR */
160 COSTS_N_INSNS (1), /* multiplication in DFmode */
161 COSTS_N_INSNS (28), /* MXBR */
162 COSTS_N_INSNS (130), /* SQXBR */
163 COSTS_N_INSNS (66), /* SQDBR */
164 COSTS_N_INSNS (38), /* SQEBR */
165 COSTS_N_INSNS (1), /* MADBR */
166 COSTS_N_INSNS (1), /* MAEBR */
167 COSTS_N_INSNS (60), /* DXBR */
168 COSTS_N_INSNS (40), /* DDBR */
169 COSTS_N_INSNS (26), /* DEBR */
170 COSTS_N_INSNS (176), /* DLGR */
171 COSTS_N_INSNS (31), /* DLR */
172 COSTS_N_INSNS (31), /* DR */
173 COSTS_N_INSNS (31), /* DSGFR */
174 COSTS_N_INSNS (31), /* DSGR */
175 };
176
177 static const
178 struct processor_costs z9_109_cost =
179 {
180 COSTS_N_INSNS (4), /* M */
181 COSTS_N_INSNS (2), /* MGHI */
182 COSTS_N_INSNS (2), /* MH */
183 COSTS_N_INSNS (2), /* MHI */
184 COSTS_N_INSNS (4), /* ML */
185 COSTS_N_INSNS (4), /* MR */
186 COSTS_N_INSNS (5), /* MS */
187 COSTS_N_INSNS (6), /* MSG */
188 COSTS_N_INSNS (4), /* MSGF */
189 COSTS_N_INSNS (4), /* MSGFR */
190 COSTS_N_INSNS (4), /* MSGR */
191 COSTS_N_INSNS (4), /* MSR */
192 COSTS_N_INSNS (1), /* multiplication in DFmode */
193 COSTS_N_INSNS (28), /* MXBR */
194 COSTS_N_INSNS (130), /* SQXBR */
195 COSTS_N_INSNS (66), /* SQDBR */
196 COSTS_N_INSNS (38), /* SQEBR */
197 COSTS_N_INSNS (1), /* MADBR */
198 COSTS_N_INSNS (1), /* MAEBR */
199 COSTS_N_INSNS (60), /* DXBR */
200 COSTS_N_INSNS (40), /* DDBR */
201 COSTS_N_INSNS (26), /* DEBR */
202 COSTS_N_INSNS (30), /* DLGR */
203 COSTS_N_INSNS (23), /* DLR */
204 COSTS_N_INSNS (23), /* DR */
205 COSTS_N_INSNS (24), /* DSGFR */
206 COSTS_N_INSNS (24), /* DSGR */
207 };
208
209 static const
210 struct processor_costs z10_cost =
211 {
212 COSTS_N_INSNS (10), /* M */
213 COSTS_N_INSNS (10), /* MGHI */
214 COSTS_N_INSNS (10), /* MH */
215 COSTS_N_INSNS (10), /* MHI */
216 COSTS_N_INSNS (10), /* ML */
217 COSTS_N_INSNS (10), /* MR */
218 COSTS_N_INSNS (10), /* MS */
219 COSTS_N_INSNS (10), /* MSG */
220 COSTS_N_INSNS (10), /* MSGF */
221 COSTS_N_INSNS (10), /* MSGFR */
222 COSTS_N_INSNS (10), /* MSGR */
223 COSTS_N_INSNS (10), /* MSR */
224 COSTS_N_INSNS (1) , /* multiplication in DFmode */
225 COSTS_N_INSNS (50), /* MXBR */
226 COSTS_N_INSNS (120), /* SQXBR */
227 COSTS_N_INSNS (52), /* SQDBR */
228 COSTS_N_INSNS (38), /* SQEBR */
229 COSTS_N_INSNS (1), /* MADBR */
230 COSTS_N_INSNS (1), /* MAEBR */
231 COSTS_N_INSNS (111), /* DXBR */
232 COSTS_N_INSNS (39), /* DDBR */
233 COSTS_N_INSNS (32), /* DEBR */
234 COSTS_N_INSNS (160), /* DLGR */
235 COSTS_N_INSNS (71), /* DLR */
236 COSTS_N_INSNS (71), /* DR */
237 COSTS_N_INSNS (71), /* DSGFR */
238 COSTS_N_INSNS (71), /* DSGR */
239 };
240
241 static const
242 struct processor_costs z196_cost =
243 {
244 COSTS_N_INSNS (7), /* M */
245 COSTS_N_INSNS (5), /* MGHI */
246 COSTS_N_INSNS (5), /* MH */
247 COSTS_N_INSNS (5), /* MHI */
248 COSTS_N_INSNS (7), /* ML */
249 COSTS_N_INSNS (7), /* MR */
250 COSTS_N_INSNS (6), /* MS */
251 COSTS_N_INSNS (8), /* MSG */
252 COSTS_N_INSNS (6), /* MSGF */
253 COSTS_N_INSNS (6), /* MSGFR */
254 COSTS_N_INSNS (8), /* MSGR */
255 COSTS_N_INSNS (6), /* MSR */
256 COSTS_N_INSNS (1) , /* multiplication in DFmode */
257 COSTS_N_INSNS (40), /* MXBR B+40 */
258 COSTS_N_INSNS (100), /* SQXBR B+100 */
259 COSTS_N_INSNS (42), /* SQDBR B+42 */
260 COSTS_N_INSNS (28), /* SQEBR B+28 */
261 COSTS_N_INSNS (1), /* MADBR B */
262 COSTS_N_INSNS (1), /* MAEBR B */
263 COSTS_N_INSNS (101), /* DXBR B+101 */
264 COSTS_N_INSNS (29), /* DDBR */
265 COSTS_N_INSNS (22), /* DEBR */
266 COSTS_N_INSNS (160), /* DLGR cracked */
267 COSTS_N_INSNS (160), /* DLR cracked */
268 COSTS_N_INSNS (160), /* DR expanded */
269 COSTS_N_INSNS (160), /* DSGFR cracked */
270 COSTS_N_INSNS (160), /* DSGR cracked */
271 };
272
273 static const
274 struct processor_costs zEC12_cost =
275 {
276 COSTS_N_INSNS (7), /* M */
277 COSTS_N_INSNS (5), /* MGHI */
278 COSTS_N_INSNS (5), /* MH */
279 COSTS_N_INSNS (5), /* MHI */
280 COSTS_N_INSNS (7), /* ML */
281 COSTS_N_INSNS (7), /* MR */
282 COSTS_N_INSNS (6), /* MS */
283 COSTS_N_INSNS (8), /* MSG */
284 COSTS_N_INSNS (6), /* MSGF */
285 COSTS_N_INSNS (6), /* MSGFR */
286 COSTS_N_INSNS (8), /* MSGR */
287 COSTS_N_INSNS (6), /* MSR */
288 COSTS_N_INSNS (1) , /* multiplication in DFmode */
289 COSTS_N_INSNS (40), /* MXBR B+40 */
290 COSTS_N_INSNS (100), /* SQXBR B+100 */
291 COSTS_N_INSNS (42), /* SQDBR B+42 */
292 COSTS_N_INSNS (28), /* SQEBR B+28 */
293 COSTS_N_INSNS (1), /* MADBR B */
294 COSTS_N_INSNS (1), /* MAEBR B */
295 COSTS_N_INSNS (131), /* DXBR B+131 */
296 COSTS_N_INSNS (29), /* DDBR */
297 COSTS_N_INSNS (22), /* DEBR */
298 COSTS_N_INSNS (160), /* DLGR cracked */
299 COSTS_N_INSNS (160), /* DLR cracked */
300 COSTS_N_INSNS (160), /* DR expanded */
301 COSTS_N_INSNS (160), /* DSGFR cracked */
302 COSTS_N_INSNS (160), /* DSGR cracked */
303 };
304
305 extern int reload_completed;
306
307 /* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */
308 static rtx last_scheduled_insn;
309
310 /* Structure used to hold the components of a S/390 memory
311 address. A legitimate address on S/390 is of the general
312 form
313 base + index + displacement
314 where any of the components is optional.
315
316 base and index are registers of the class ADDR_REGS,
317 displacement is an unsigned 12-bit immediate constant. */
318
319 struct s390_address
320 {
321 rtx base;
322 rtx indx;
323 rtx disp;
324 bool pointer;
325 bool literal_pool;
326 };
327
328 /* The following structure is embedded in the machine
329 specific part of struct function. */
330
331 struct GTY (()) s390_frame_layout
332 {
333 /* Offset within stack frame. */
334 HOST_WIDE_INT gprs_offset;
335 HOST_WIDE_INT f0_offset;
336 HOST_WIDE_INT f4_offset;
337 HOST_WIDE_INT f8_offset;
338 HOST_WIDE_INT backchain_offset;
339
340 /* Number of first and last gpr where slots in the register
341 save area are reserved for. */
342 int first_save_gpr_slot;
343 int last_save_gpr_slot;
344
345 /* Location (FP register number) where GPRs (r0-r15) should
346 be saved to.
347 0 - does not need to be saved at all
348 -1 - stack slot */
349 signed char gpr_save_slots[16];
350
351 /* Number of first and last gpr to be saved, restored. */
352 int first_save_gpr;
353 int first_restore_gpr;
354 int last_save_gpr;
355 int last_restore_gpr;
356
357 /* Bits standing for floating point registers. Set, if the
358 respective register has to be saved. Starting with reg 16 (f0)
359 at the rightmost bit.
360 Bit 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
361 fpr 15 13 11 9 14 12 10 8 7 5 3 1 6 4 2 0
362 reg 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 */
363 unsigned int fpr_bitmap;
364
365 /* Number of floating point registers f8-f15 which must be saved. */
366 int high_fprs;
367
368 /* Set if return address needs to be saved.
369 This flag is set by s390_return_addr_rtx if it could not use
370 the initial value of r14 and therefore depends on r14 saved
371 to the stack. */
372 bool save_return_addr_p;
373
374 /* Size of stack frame. */
375 HOST_WIDE_INT frame_size;
376 };
377
378 /* Define the structure for the machine field in struct function. */
379
380 struct GTY(()) machine_function
381 {
382 struct s390_frame_layout frame_layout;
383
384 /* Literal pool base register. */
385 rtx base_reg;
386
387 /* True if we may need to perform branch splitting. */
388 bool split_branches_pending_p;
389
390 /* Some local-dynamic TLS symbol name. */
391 const char *some_ld_name;
392
393 bool has_landing_pad_p;
394
395 /* True if the current function may contain a tbegin clobbering
396 FPRs. */
397 bool tbegin_p;
398 };
399
400 /* Few accessor macros for struct cfun->machine->s390_frame_layout. */
401
402 #define cfun_frame_layout (cfun->machine->frame_layout)
403 #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
404 #define cfun_save_arg_fprs_p (!!(TARGET_64BIT \
405 ? cfun_frame_layout.fpr_bitmap & 0x0f \
406 : cfun_frame_layout.fpr_bitmap & 0x03))
407 #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
408 cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
409 #define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |= \
410 (1 << (REGNO - FPR0_REGNUM)))
411 #define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap & \
412 (1 << (REGNO - FPR0_REGNUM))))
413 #define cfun_gpr_save_slot(REGNO) \
414 cfun->machine->frame_layout.gpr_save_slots[REGNO]
415
416 /* Number of GPRs and FPRs used for argument passing. */
417 #define GP_ARG_NUM_REG 5
418 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
419
420 /* A couple of shortcuts. */
421 #define CONST_OK_FOR_J(x) \
422 CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
423 #define CONST_OK_FOR_K(x) \
424 CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
425 #define CONST_OK_FOR_Os(x) \
426 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
427 #define CONST_OK_FOR_Op(x) \
428 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
429 #define CONST_OK_FOR_On(x) \
430 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
431
432 #define REGNO_PAIR_OK(REGNO, MODE) \
433 (HARD_REGNO_NREGS ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
434
435 /* That's the read ahead of the dynamic branch prediction unit in
436 bytes on a z10 (or higher) CPU. */
437 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
438
439 static const int s390_hotpatch_trampoline_halfwords_default = 12;
440 static const int s390_hotpatch_trampoline_halfwords_max = 1000000;
441 static int s390_hotpatch_trampoline_halfwords = -1;
442
443 /* Return the argument of the given hotpatch attribute or the default value if
444 no argument is present. */
445
446 static inline int
get_hotpatch_attribute(tree hotpatch_attr)447 get_hotpatch_attribute (tree hotpatch_attr)
448 {
449 const_tree args;
450
451 args = TREE_VALUE (hotpatch_attr);
452
453 return (args) ?
454 TREE_INT_CST_LOW (TREE_VALUE (args)):
455 s390_hotpatch_trampoline_halfwords_default;
456 }
457
458 /* Check whether the hotpatch attribute is applied to a function and, if it has
459 an argument, the argument is valid. */
460
461 static tree
s390_handle_hotpatch_attribute(tree * node,tree name,tree args,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)462 s390_handle_hotpatch_attribute (tree *node, tree name, tree args,
463 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
464 {
465 if (TREE_CODE (*node) != FUNCTION_DECL)
466 {
467 warning (OPT_Wattributes, "%qE attribute only applies to functions",
468 name);
469 *no_add_attrs = true;
470 }
471 else if (args)
472 {
473 tree expr = TREE_VALUE (args);
474
475 if (TREE_CODE (expr) != INTEGER_CST
476 || !INTEGRAL_TYPE_P (TREE_TYPE (expr))
477 || TREE_INT_CST_HIGH (expr) != 0
478 || TREE_INT_CST_LOW (expr) > (unsigned int)
479 s390_hotpatch_trampoline_halfwords_max)
480 {
481 error ("requested %qE attribute is not a non-negative integer"
482 " constant or too large (max. %d)", name,
483 s390_hotpatch_trampoline_halfwords_max);
484 *no_add_attrs = true;
485 }
486 }
487
488 return NULL_TREE;
489 }
490
491 static const struct attribute_spec s390_attribute_table[] = {
492 { "hotpatch", 0, 1, true, false, false, s390_handle_hotpatch_attribute, false
493 },
494 /* End element. */
495 { NULL, 0, 0, false, false, false, NULL, false }
496 };
497
498 /* Return the alignment for LABEL. We default to the -falign-labels
499 value except for the literal pool base label. */
500 int
s390_label_align(rtx label)501 s390_label_align (rtx label)
502 {
503 rtx prev_insn = prev_active_insn (label);
504
505 if (prev_insn == NULL_RTX)
506 goto old;
507
508 prev_insn = single_set (prev_insn);
509
510 if (prev_insn == NULL_RTX)
511 goto old;
512
513 prev_insn = SET_SRC (prev_insn);
514
515 /* Don't align literal pool base labels. */
516 if (GET_CODE (prev_insn) == UNSPEC
517 && XINT (prev_insn, 1) == UNSPEC_MAIN_BASE)
518 return 0;
519
520 old:
521 return align_labels_log;
522 }
523
524 static enum machine_mode
s390_libgcc_cmp_return_mode(void)525 s390_libgcc_cmp_return_mode (void)
526 {
527 return TARGET_64BIT ? DImode : SImode;
528 }
529
530 static enum machine_mode
s390_libgcc_shift_count_mode(void)531 s390_libgcc_shift_count_mode (void)
532 {
533 return TARGET_64BIT ? DImode : SImode;
534 }
535
536 static enum machine_mode
s390_unwind_word_mode(void)537 s390_unwind_word_mode (void)
538 {
539 return TARGET_64BIT ? DImode : SImode;
540 }
541
542 /* Return true if the back end supports mode MODE. */
543 static bool
s390_scalar_mode_supported_p(enum machine_mode mode)544 s390_scalar_mode_supported_p (enum machine_mode mode)
545 {
546 /* In contrast to the default implementation reject TImode constants on 31bit
547 TARGET_ZARCH for ABI compliance. */
548 if (!TARGET_64BIT && TARGET_ZARCH && mode == TImode)
549 return false;
550
551 if (DECIMAL_FLOAT_MODE_P (mode))
552 return default_decimal_float_supported_p ();
553
554 return default_scalar_mode_supported_p (mode);
555 }
556
557 /* Set the has_landing_pad_p flag in struct machine_function to VALUE. */
558
559 void
s390_set_has_landing_pad_p(bool value)560 s390_set_has_landing_pad_p (bool value)
561 {
562 cfun->machine->has_landing_pad_p = value;
563 }
564
565 /* If two condition code modes are compatible, return a condition code
566 mode which is compatible with both. Otherwise, return
567 VOIDmode. */
568
569 static enum machine_mode
s390_cc_modes_compatible(enum machine_mode m1,enum machine_mode m2)570 s390_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
571 {
572 if (m1 == m2)
573 return m1;
574
575 switch (m1)
576 {
577 case CCZmode:
578 if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode
579 || m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode)
580 return m2;
581 return VOIDmode;
582
583 case CCSmode:
584 case CCUmode:
585 case CCTmode:
586 case CCSRmode:
587 case CCURmode:
588 case CCZ1mode:
589 if (m2 == CCZmode)
590 return m1;
591
592 return VOIDmode;
593
594 default:
595 return VOIDmode;
596 }
597 return VOIDmode;
598 }
599
600 /* Return true if SET either doesn't set the CC register, or else
601 the source and destination have matching CC modes and that
602 CC mode is at least as constrained as REQ_MODE. */
603
604 static bool
s390_match_ccmode_set(rtx set,enum machine_mode req_mode)605 s390_match_ccmode_set (rtx set, enum machine_mode req_mode)
606 {
607 enum machine_mode set_mode;
608
609 gcc_assert (GET_CODE (set) == SET);
610
611 if (GET_CODE (SET_DEST (set)) != REG || !CC_REGNO_P (REGNO (SET_DEST (set))))
612 return 1;
613
614 set_mode = GET_MODE (SET_DEST (set));
615 switch (set_mode)
616 {
617 case CCSmode:
618 case CCSRmode:
619 case CCUmode:
620 case CCURmode:
621 case CCLmode:
622 case CCL1mode:
623 case CCL2mode:
624 case CCL3mode:
625 case CCT1mode:
626 case CCT2mode:
627 case CCT3mode:
628 if (req_mode != set_mode)
629 return 0;
630 break;
631
632 case CCZmode:
633 if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode
634 && req_mode != CCSRmode && req_mode != CCURmode)
635 return 0;
636 break;
637
638 case CCAPmode:
639 case CCANmode:
640 if (req_mode != CCAmode)
641 return 0;
642 break;
643
644 default:
645 gcc_unreachable ();
646 }
647
648 return (GET_MODE (SET_SRC (set)) == set_mode);
649 }
650
651 /* Return true if every SET in INSN that sets the CC register
652 has source and destination with matching CC modes and that
653 CC mode is at least as constrained as REQ_MODE.
654 If REQ_MODE is VOIDmode, always return false. */
655
656 bool
s390_match_ccmode(rtx insn,enum machine_mode req_mode)657 s390_match_ccmode (rtx insn, enum machine_mode req_mode)
658 {
659 int i;
660
661 /* s390_tm_ccmode returns VOIDmode to indicate failure. */
662 if (req_mode == VOIDmode)
663 return false;
664
665 if (GET_CODE (PATTERN (insn)) == SET)
666 return s390_match_ccmode_set (PATTERN (insn), req_mode);
667
668 if (GET_CODE (PATTERN (insn)) == PARALLEL)
669 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
670 {
671 rtx set = XVECEXP (PATTERN (insn), 0, i);
672 if (GET_CODE (set) == SET)
673 if (!s390_match_ccmode_set (set, req_mode))
674 return false;
675 }
676
677 return true;
678 }
679
680 /* If a test-under-mask instruction can be used to implement
681 (compare (and ... OP1) OP2), return the CC mode required
682 to do that. Otherwise, return VOIDmode.
683 MIXED is true if the instruction can distinguish between
684 CC1 and CC2 for mixed selected bits (TMxx), it is false
685 if the instruction cannot (TM). */
686
687 enum machine_mode
s390_tm_ccmode(rtx op1,rtx op2,bool mixed)688 s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
689 {
690 int bit0, bit1;
691
692 /* ??? Fixme: should work on CONST_DOUBLE as well. */
693 if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT)
694 return VOIDmode;
695
696 /* Selected bits all zero: CC0.
697 e.g.: int a; if ((a & (16 + 128)) == 0) */
698 if (INTVAL (op2) == 0)
699 return CCTmode;
700
701 /* Selected bits all one: CC3.
702 e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
703 if (INTVAL (op2) == INTVAL (op1))
704 return CCT3mode;
705
706 /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
707 int a;
708 if ((a & (16 + 128)) == 16) -> CCT1
709 if ((a & (16 + 128)) == 128) -> CCT2 */
710 if (mixed)
711 {
712 bit1 = exact_log2 (INTVAL (op2));
713 bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2));
714 if (bit0 != -1 && bit1 != -1)
715 return bit0 > bit1 ? CCT1mode : CCT2mode;
716 }
717
718 return VOIDmode;
719 }
720
721 /* Given a comparison code OP (EQ, NE, etc.) and the operands
722 OP0 and OP1 of a COMPARE, return the mode to be used for the
723 comparison. */
724
725 enum machine_mode
s390_select_ccmode(enum rtx_code code,rtx op0,rtx op1)726 s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
727 {
728 switch (code)
729 {
730 case EQ:
731 case NE:
732 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
733 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
734 return CCAPmode;
735 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
736 && CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))))
737 return CCAPmode;
738 if ((GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS
739 || GET_CODE (op1) == NEG)
740 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
741 return CCLmode;
742
743 if (GET_CODE (op0) == AND)
744 {
745 /* Check whether we can potentially do it via TM. */
746 enum machine_mode ccmode;
747 ccmode = s390_tm_ccmode (XEXP (op0, 1), op1, 1);
748 if (ccmode != VOIDmode)
749 {
750 /* Relax CCTmode to CCZmode to allow fall-back to AND
751 if that turns out to be beneficial. */
752 return ccmode == CCTmode ? CCZmode : ccmode;
753 }
754 }
755
756 if (register_operand (op0, HImode)
757 && GET_CODE (op1) == CONST_INT
758 && (INTVAL (op1) == -1 || INTVAL (op1) == 65535))
759 return CCT3mode;
760 if (register_operand (op0, QImode)
761 && GET_CODE (op1) == CONST_INT
762 && (INTVAL (op1) == -1 || INTVAL (op1) == 255))
763 return CCT3mode;
764
765 return CCZmode;
766
767 case LE:
768 case LT:
769 case GE:
770 case GT:
771 /* The only overflow condition of NEG and ABS happens when
772 -INT_MAX is used as parameter, which stays negative. So
773 we have an overflow from a positive value to a negative.
774 Using CCAP mode the resulting cc can be used for comparisons. */
775 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
776 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
777 return CCAPmode;
778
779 /* If constants are involved in an add instruction it is possible to use
780 the resulting cc for comparisons with zero. Knowing the sign of the
781 constant the overflow behavior gets predictable. e.g.:
782 int a, b; if ((b = a + c) > 0)
783 with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP */
784 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
785 && (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1)))
786 || (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os")
787 /* Avoid INT32_MIN on 32 bit. */
788 && (!TARGET_ZARCH || INTVAL (XEXP (op0, 1)) != -0x7fffffff - 1))))
789 {
790 if (INTVAL (XEXP((op0), 1)) < 0)
791 return CCANmode;
792 else
793 return CCAPmode;
794 }
795 /* Fall through. */
796 case UNORDERED:
797 case ORDERED:
798 case UNEQ:
799 case UNLE:
800 case UNLT:
801 case UNGE:
802 case UNGT:
803 case LTGT:
804 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
805 && GET_CODE (op1) != CONST_INT)
806 return CCSRmode;
807 return CCSmode;
808
809 case LTU:
810 case GEU:
811 if (GET_CODE (op0) == PLUS
812 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
813 return CCL1mode;
814
815 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
816 && GET_CODE (op1) != CONST_INT)
817 return CCURmode;
818 return CCUmode;
819
820 case LEU:
821 case GTU:
822 if (GET_CODE (op0) == MINUS
823 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
824 return CCL2mode;
825
826 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
827 && GET_CODE (op1) != CONST_INT)
828 return CCURmode;
829 return CCUmode;
830
831 default:
832 gcc_unreachable ();
833 }
834 }
835
836 /* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
837 that we can implement more efficiently. */
838
839 static void
s390_canonicalize_comparison(int * code,rtx * op0,rtx * op1,bool op0_preserve_value)840 s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
841 bool op0_preserve_value)
842 {
843 if (op0_preserve_value)
844 return;
845
846 /* Convert ZERO_EXTRACT back to AND to enable TM patterns. */
847 if ((*code == EQ || *code == NE)
848 && *op1 == const0_rtx
849 && GET_CODE (*op0) == ZERO_EXTRACT
850 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
851 && GET_CODE (XEXP (*op0, 2)) == CONST_INT
852 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
853 {
854 rtx inner = XEXP (*op0, 0);
855 HOST_WIDE_INT modesize = GET_MODE_BITSIZE (GET_MODE (inner));
856 HOST_WIDE_INT len = INTVAL (XEXP (*op0, 1));
857 HOST_WIDE_INT pos = INTVAL (XEXP (*op0, 2));
858
859 if (len > 0 && len < modesize
860 && pos >= 0 && pos + len <= modesize
861 && modesize <= HOST_BITS_PER_WIDE_INT)
862 {
863 unsigned HOST_WIDE_INT block;
864 block = ((unsigned HOST_WIDE_INT) 1 << len) - 1;
865 block <<= modesize - pos - len;
866
867 *op0 = gen_rtx_AND (GET_MODE (inner), inner,
868 gen_int_mode (block, GET_MODE (inner)));
869 }
870 }
871
872 /* Narrow AND of memory against immediate to enable TM. */
873 if ((*code == EQ || *code == NE)
874 && *op1 == const0_rtx
875 && GET_CODE (*op0) == AND
876 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
877 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
878 {
879 rtx inner = XEXP (*op0, 0);
880 rtx mask = XEXP (*op0, 1);
881
882 /* Ignore paradoxical SUBREGs if all extra bits are masked out. */
883 if (GET_CODE (inner) == SUBREG
884 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner)))
885 && (GET_MODE_SIZE (GET_MODE (inner))
886 >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
887 && ((INTVAL (mask)
888 & GET_MODE_MASK (GET_MODE (inner))
889 & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner))))
890 == 0))
891 inner = SUBREG_REG (inner);
892
893 /* Do not change volatile MEMs. */
894 if (MEM_P (inner) && !MEM_VOLATILE_P (inner))
895 {
896 int part = s390_single_part (XEXP (*op0, 1),
897 GET_MODE (inner), QImode, 0);
898 if (part >= 0)
899 {
900 mask = gen_int_mode (s390_extract_part (mask, QImode, 0), QImode);
901 inner = adjust_address_nv (inner, QImode, part);
902 *op0 = gen_rtx_AND (QImode, inner, mask);
903 }
904 }
905 }
906
907 /* Narrow comparisons against 0xffff to HImode if possible. */
908 if ((*code == EQ || *code == NE)
909 && GET_CODE (*op1) == CONST_INT
910 && INTVAL (*op1) == 0xffff
911 && SCALAR_INT_MODE_P (GET_MODE (*op0))
912 && (nonzero_bits (*op0, GET_MODE (*op0))
913 & ~(unsigned HOST_WIDE_INT) 0xffff) == 0)
914 {
915 *op0 = gen_lowpart (HImode, *op0);
916 *op1 = constm1_rtx;
917 }
918
919 /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible. */
920 if (GET_CODE (*op0) == UNSPEC
921 && XINT (*op0, 1) == UNSPEC_STRCMPCC_TO_INT
922 && XVECLEN (*op0, 0) == 1
923 && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode
924 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
925 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
926 && *op1 == const0_rtx)
927 {
928 enum rtx_code new_code = UNKNOWN;
929 switch (*code)
930 {
931 case EQ: new_code = EQ; break;
932 case NE: new_code = NE; break;
933 case LT: new_code = GTU; break;
934 case GT: new_code = LTU; break;
935 case LE: new_code = GEU; break;
936 case GE: new_code = LEU; break;
937 default: break;
938 }
939
940 if (new_code != UNKNOWN)
941 {
942 *op0 = XVECEXP (*op0, 0, 0);
943 *code = new_code;
944 }
945 }
946
947 /* Remove redundant UNSPEC_CC_TO_INT conversions if possible. */
948 if (GET_CODE (*op0) == UNSPEC
949 && XINT (*op0, 1) == UNSPEC_CC_TO_INT
950 && XVECLEN (*op0, 0) == 1
951 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
952 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
953 && CONST_INT_P (*op1))
954 {
955 enum rtx_code new_code = UNKNOWN;
956 switch (GET_MODE (XVECEXP (*op0, 0, 0)))
957 {
958 case CCZmode:
959 case CCRAWmode:
960 switch (*code)
961 {
962 case EQ: new_code = EQ; break;
963 case NE: new_code = NE; break;
964 default: break;
965 }
966 break;
967 default: break;
968 }
969
970 if (new_code != UNKNOWN)
971 {
972 /* For CCRAWmode put the required cc mask into the second
973 operand. */
974 if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode
975 && INTVAL (*op1) >= 0 && INTVAL (*op1) <= 3)
976 *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1)));
977 *op0 = XVECEXP (*op0, 0, 0);
978 *code = new_code;
979 }
980 }
981
982 /* Simplify cascaded EQ, NE with const0_rtx. */
983 if ((*code == NE || *code == EQ)
984 && (GET_CODE (*op0) == EQ || GET_CODE (*op0) == NE)
985 && GET_MODE (*op0) == SImode
986 && GET_MODE (XEXP (*op0, 0)) == CCZ1mode
987 && REG_P (XEXP (*op0, 0))
988 && XEXP (*op0, 1) == const0_rtx
989 && *op1 == const0_rtx)
990 {
991 if ((*code == EQ && GET_CODE (*op0) == NE)
992 || (*code == NE && GET_CODE (*op0) == EQ))
993 *code = EQ;
994 else
995 *code = NE;
996 *op0 = XEXP (*op0, 0);
997 }
998
999 /* Prefer register over memory as first operand. */
1000 if (MEM_P (*op0) && REG_P (*op1))
1001 {
1002 rtx tem = *op0; *op0 = *op1; *op1 = tem;
1003 *code = (int)swap_condition ((enum rtx_code)*code);
1004 }
1005 }
1006
1007 /* Emit a compare instruction suitable to implement the comparison
1008 OP0 CODE OP1. Return the correct condition RTL to be placed in
1009 the IF_THEN_ELSE of the conditional branch testing the result. */
1010
1011 rtx
s390_emit_compare(enum rtx_code code,rtx op0,rtx op1)1012 s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
1013 {
1014 enum machine_mode mode = s390_select_ccmode (code, op0, op1);
1015 rtx cc;
1016
1017 /* Do not output a redundant compare instruction if a compare_and_swap
1018 pattern already computed the result and the machine modes are compatible. */
1019 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
1020 {
1021 gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
1022 == GET_MODE (op0));
1023 cc = op0;
1024 }
1025 else
1026 {
1027 cc = gen_rtx_REG (mode, CC_REGNUM);
1028 emit_insn (gen_rtx_SET (VOIDmode, cc, gen_rtx_COMPARE (mode, op0, op1)));
1029 }
1030
1031 return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
1032 }
1033
1034 /* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
1035 matches CMP.
1036 Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
1037 conditional branch testing the result. */
1038
1039 static rtx
s390_emit_compare_and_swap(enum rtx_code code,rtx old,rtx mem,rtx cmp,rtx new_rtx)1040 s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
1041 rtx cmp, rtx new_rtx)
1042 {
1043 emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp, new_rtx));
1044 return s390_emit_compare (code, gen_rtx_REG (CCZ1mode, CC_REGNUM),
1045 const0_rtx);
1046 }
1047
1048 /* Emit a jump instruction to TARGET and return it. If COND is
1049 NULL_RTX, emit an unconditional jump, else a conditional jump under
1050 condition COND. */
1051
1052 rtx
s390_emit_jump(rtx target,rtx cond)1053 s390_emit_jump (rtx target, rtx cond)
1054 {
1055 rtx insn;
1056
1057 target = gen_rtx_LABEL_REF (VOIDmode, target);
1058 if (cond)
1059 target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
1060
1061 insn = gen_rtx_SET (VOIDmode, pc_rtx, target);
1062 return emit_jump_insn (insn);
1063 }
1064
1065 /* Return branch condition mask to implement a branch
1066 specified by CODE. Return -1 for invalid comparisons. */
1067
1068 int
s390_branch_condition_mask(rtx code)1069 s390_branch_condition_mask (rtx code)
1070 {
1071 const int CC0 = 1 << 3;
1072 const int CC1 = 1 << 2;
1073 const int CC2 = 1 << 1;
1074 const int CC3 = 1 << 0;
1075
1076 gcc_assert (GET_CODE (XEXP (code, 0)) == REG);
1077 gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM);
1078 gcc_assert (XEXP (code, 1) == const0_rtx
1079 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
1080 && CONST_INT_P (XEXP (code, 1))));
1081
1082
1083 switch (GET_MODE (XEXP (code, 0)))
1084 {
1085 case CCZmode:
1086 case CCZ1mode:
1087 switch (GET_CODE (code))
1088 {
1089 case EQ: return CC0;
1090 case NE: return CC1 | CC2 | CC3;
1091 default: return -1;
1092 }
1093 break;
1094
1095 case CCT1mode:
1096 switch (GET_CODE (code))
1097 {
1098 case EQ: return CC1;
1099 case NE: return CC0 | CC2 | CC3;
1100 default: return -1;
1101 }
1102 break;
1103
1104 case CCT2mode:
1105 switch (GET_CODE (code))
1106 {
1107 case EQ: return CC2;
1108 case NE: return CC0 | CC1 | CC3;
1109 default: return -1;
1110 }
1111 break;
1112
1113 case CCT3mode:
1114 switch (GET_CODE (code))
1115 {
1116 case EQ: return CC3;
1117 case NE: return CC0 | CC1 | CC2;
1118 default: return -1;
1119 }
1120 break;
1121
1122 case CCLmode:
1123 switch (GET_CODE (code))
1124 {
1125 case EQ: return CC0 | CC2;
1126 case NE: return CC1 | CC3;
1127 default: return -1;
1128 }
1129 break;
1130
1131 case CCL1mode:
1132 switch (GET_CODE (code))
1133 {
1134 case LTU: return CC2 | CC3; /* carry */
1135 case GEU: return CC0 | CC1; /* no carry */
1136 default: return -1;
1137 }
1138 break;
1139
1140 case CCL2mode:
1141 switch (GET_CODE (code))
1142 {
1143 case GTU: return CC0 | CC1; /* borrow */
1144 case LEU: return CC2 | CC3; /* no borrow */
1145 default: return -1;
1146 }
1147 break;
1148
1149 case CCL3mode:
1150 switch (GET_CODE (code))
1151 {
1152 case EQ: return CC0 | CC2;
1153 case NE: return CC1 | CC3;
1154 case LTU: return CC1;
1155 case GTU: return CC3;
1156 case LEU: return CC1 | CC2;
1157 case GEU: return CC2 | CC3;
1158 default: return -1;
1159 }
1160
1161 case CCUmode:
1162 switch (GET_CODE (code))
1163 {
1164 case EQ: return CC0;
1165 case NE: return CC1 | CC2 | CC3;
1166 case LTU: return CC1;
1167 case GTU: return CC2;
1168 case LEU: return CC0 | CC1;
1169 case GEU: return CC0 | CC2;
1170 default: return -1;
1171 }
1172 break;
1173
1174 case CCURmode:
1175 switch (GET_CODE (code))
1176 {
1177 case EQ: return CC0;
1178 case NE: return CC2 | CC1 | CC3;
1179 case LTU: return CC2;
1180 case GTU: return CC1;
1181 case LEU: return CC0 | CC2;
1182 case GEU: return CC0 | CC1;
1183 default: return -1;
1184 }
1185 break;
1186
1187 case CCAPmode:
1188 switch (GET_CODE (code))
1189 {
1190 case EQ: return CC0;
1191 case NE: return CC1 | CC2 | CC3;
1192 case LT: return CC1 | CC3;
1193 case GT: return CC2;
1194 case LE: return CC0 | CC1 | CC3;
1195 case GE: return CC0 | CC2;
1196 default: return -1;
1197 }
1198 break;
1199
1200 case CCANmode:
1201 switch (GET_CODE (code))
1202 {
1203 case EQ: return CC0;
1204 case NE: return CC1 | CC2 | CC3;
1205 case LT: return CC1;
1206 case GT: return CC2 | CC3;
1207 case LE: return CC0 | CC1;
1208 case GE: return CC0 | CC2 | CC3;
1209 default: return -1;
1210 }
1211 break;
1212
1213 case CCSmode:
1214 switch (GET_CODE (code))
1215 {
1216 case EQ: return CC0;
1217 case NE: return CC1 | CC2 | CC3;
1218 case LT: return CC1;
1219 case GT: return CC2;
1220 case LE: return CC0 | CC1;
1221 case GE: return CC0 | CC2;
1222 case UNORDERED: return CC3;
1223 case ORDERED: return CC0 | CC1 | CC2;
1224 case UNEQ: return CC0 | CC3;
1225 case UNLT: return CC1 | CC3;
1226 case UNGT: return CC2 | CC3;
1227 case UNLE: return CC0 | CC1 | CC3;
1228 case UNGE: return CC0 | CC2 | CC3;
1229 case LTGT: return CC1 | CC2;
1230 default: return -1;
1231 }
1232 break;
1233
1234 case CCSRmode:
1235 switch (GET_CODE (code))
1236 {
1237 case EQ: return CC0;
1238 case NE: return CC2 | CC1 | CC3;
1239 case LT: return CC2;
1240 case GT: return CC1;
1241 case LE: return CC0 | CC2;
1242 case GE: return CC0 | CC1;
1243 case UNORDERED: return CC3;
1244 case ORDERED: return CC0 | CC2 | CC1;
1245 case UNEQ: return CC0 | CC3;
1246 case UNLT: return CC2 | CC3;
1247 case UNGT: return CC1 | CC3;
1248 case UNLE: return CC0 | CC2 | CC3;
1249 case UNGE: return CC0 | CC1 | CC3;
1250 case LTGT: return CC2 | CC1;
1251 default: return -1;
1252 }
1253 break;
1254
1255 case CCRAWmode:
1256 switch (GET_CODE (code))
1257 {
1258 case EQ:
1259 return INTVAL (XEXP (code, 1));
1260 case NE:
1261 return (INTVAL (XEXP (code, 1))) ^ 0xf;
1262 default:
1263 gcc_unreachable ();
1264 }
1265
1266 default:
1267 return -1;
1268 }
1269 }
1270
1271
1272 /* Return branch condition mask to implement a compare and branch
1273 specified by CODE. Return -1 for invalid comparisons. */
1274
1275 int
s390_compare_and_branch_condition_mask(rtx code)1276 s390_compare_and_branch_condition_mask (rtx code)
1277 {
1278 const int CC0 = 1 << 3;
1279 const int CC1 = 1 << 2;
1280 const int CC2 = 1 << 1;
1281
1282 switch (GET_CODE (code))
1283 {
1284 case EQ:
1285 return CC0;
1286 case NE:
1287 return CC1 | CC2;
1288 case LT:
1289 case LTU:
1290 return CC1;
1291 case GT:
1292 case GTU:
1293 return CC2;
1294 case LE:
1295 case LEU:
1296 return CC0 | CC1;
1297 case GE:
1298 case GEU:
1299 return CC0 | CC2;
1300 default:
1301 gcc_unreachable ();
1302 }
1303 return -1;
1304 }
1305
1306 /* If INV is false, return assembler mnemonic string to implement
1307 a branch specified by CODE. If INV is true, return mnemonic
1308 for the corresponding inverted branch. */
1309
1310 static const char *
s390_branch_condition_mnemonic(rtx code,int inv)1311 s390_branch_condition_mnemonic (rtx code, int inv)
1312 {
1313 int mask;
1314
1315 static const char *const mnemonic[16] =
1316 {
1317 NULL, "o", "h", "nle",
1318 "l", "nhe", "lh", "ne",
1319 "e", "nlh", "he", "nl",
1320 "le", "nh", "no", NULL
1321 };
1322
1323 if (GET_CODE (XEXP (code, 0)) == REG
1324 && REGNO (XEXP (code, 0)) == CC_REGNUM
1325 && (XEXP (code, 1) == const0_rtx
1326 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
1327 && CONST_INT_P (XEXP (code, 1)))))
1328 mask = s390_branch_condition_mask (code);
1329 else
1330 mask = s390_compare_and_branch_condition_mask (code);
1331
1332 gcc_assert (mask >= 0);
1333
1334 if (inv)
1335 mask ^= 15;
1336
1337 gcc_assert (mask >= 1 && mask <= 14);
1338
1339 return mnemonic[mask];
1340 }
1341
1342 /* Return the part of op which has a value different from def.
1343 The size of the part is determined by mode.
1344 Use this function only if you already know that op really
1345 contains such a part. */
1346
1347 unsigned HOST_WIDE_INT
s390_extract_part(rtx op,enum machine_mode mode,int def)1348 s390_extract_part (rtx op, enum machine_mode mode, int def)
1349 {
1350 unsigned HOST_WIDE_INT value = 0;
1351 int max_parts = HOST_BITS_PER_WIDE_INT / GET_MODE_BITSIZE (mode);
1352 int part_bits = GET_MODE_BITSIZE (mode);
1353 unsigned HOST_WIDE_INT part_mask
1354 = ((unsigned HOST_WIDE_INT)1 << part_bits) - 1;
1355 int i;
1356
1357 for (i = 0; i < max_parts; i++)
1358 {
1359 if (i == 0)
1360 value = (unsigned HOST_WIDE_INT) INTVAL (op);
1361 else
1362 value >>= part_bits;
1363
1364 if ((value & part_mask) != (def & part_mask))
1365 return value & part_mask;
1366 }
1367
1368 gcc_unreachable ();
1369 }
1370
1371 /* If OP is an integer constant of mode MODE with exactly one
1372 part of mode PART_MODE unequal to DEF, return the number of that
1373 part. Otherwise, return -1. */
1374
1375 int
s390_single_part(rtx op,enum machine_mode mode,enum machine_mode part_mode,int def)1376 s390_single_part (rtx op,
1377 enum machine_mode mode,
1378 enum machine_mode part_mode,
1379 int def)
1380 {
1381 unsigned HOST_WIDE_INT value = 0;
1382 int n_parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (part_mode);
1383 unsigned HOST_WIDE_INT part_mask
1384 = ((unsigned HOST_WIDE_INT)1 << GET_MODE_BITSIZE (part_mode)) - 1;
1385 int i, part = -1;
1386
1387 if (GET_CODE (op) != CONST_INT)
1388 return -1;
1389
1390 for (i = 0; i < n_parts; i++)
1391 {
1392 if (i == 0)
1393 value = (unsigned HOST_WIDE_INT) INTVAL (op);
1394 else
1395 value >>= GET_MODE_BITSIZE (part_mode);
1396
1397 if ((value & part_mask) != (def & part_mask))
1398 {
1399 if (part != -1)
1400 return -1;
1401 else
1402 part = i;
1403 }
1404 }
1405 return part == -1 ? -1 : n_parts - 1 - part;
1406 }
1407
1408 /* Return true if IN contains a contiguous bitfield in the lower SIZE
1409 bits and no other bits are set in IN. POS and LENGTH can be used
1410 to obtain the start position and the length of the bitfield.
1411
1412 POS gives the position of the first bit of the bitfield counting
1413 from the lowest order bit starting with zero. In order to use this
1414 value for S/390 instructions this has to be converted to "bits big
1415 endian" style. */
1416
1417 bool
s390_contiguous_bitmask_p(unsigned HOST_WIDE_INT in,int size,int * pos,int * length)1418 s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, int size,
1419 int *pos, int *length)
1420 {
1421 int tmp_pos = 0;
1422 int tmp_length = 0;
1423 int i;
1424 unsigned HOST_WIDE_INT mask = 1ULL;
1425 bool contiguous = false;
1426
1427 for (i = 0; i < size; mask <<= 1, i++)
1428 {
1429 if (contiguous)
1430 {
1431 if (mask & in)
1432 tmp_length++;
1433 else
1434 break;
1435 }
1436 else
1437 {
1438 if (mask & in)
1439 {
1440 contiguous = true;
1441 tmp_length++;
1442 }
1443 else
1444 tmp_pos++;
1445 }
1446 }
1447
1448 if (!tmp_length)
1449 return false;
1450
1451 /* Calculate a mask for all bits beyond the contiguous bits. */
1452 mask = (-1LL & ~(((1ULL << (tmp_length + tmp_pos - 1)) << 1) - 1));
1453
1454 if (mask & in)
1455 return false;
1456
1457 if (tmp_length + tmp_pos - 1 > size)
1458 return false;
1459
1460 if (length)
1461 *length = tmp_length;
1462
1463 if (pos)
1464 *pos = tmp_pos;
1465
1466 return true;
1467 }
1468
1469 /* Check whether a rotate of ROTL followed by an AND of CONTIG is
1470 equivalent to a shift followed by the AND. In particular, CONTIG
1471 should not overlap the (rotated) bit 0/bit 63 gap. Negative values
1472 for ROTL indicate a rotate to the right. */
1473
1474 bool
s390_extzv_shift_ok(int bitsize,int rotl,unsigned HOST_WIDE_INT contig)1475 s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig)
1476 {
1477 int pos, len;
1478 bool ok;
1479
1480 ok = s390_contiguous_bitmask_p (contig, bitsize, &pos, &len);
1481 gcc_assert (ok);
1482
1483 return ((rotl >= 0 && rotl <= pos)
1484 || (rotl < 0 && -rotl <= bitsize - len - pos));
1485 }
1486
1487 /* Check whether we can (and want to) split a double-word
1488 move in mode MODE from SRC to DST into two single-word
1489 moves, moving the subword FIRST_SUBWORD first. */
1490
1491 bool
s390_split_ok_p(rtx dst,rtx src,enum machine_mode mode,int first_subword)1492 s390_split_ok_p (rtx dst, rtx src, enum machine_mode mode, int first_subword)
1493 {
1494 /* Floating point registers cannot be split. */
1495 if (FP_REG_P (src) || FP_REG_P (dst))
1496 return false;
1497
1498 /* We don't need to split if operands are directly accessible. */
1499 if (s_operand (src, mode) || s_operand (dst, mode))
1500 return false;
1501
1502 /* Non-offsettable memory references cannot be split. */
1503 if ((GET_CODE (src) == MEM && !offsettable_memref_p (src))
1504 || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst)))
1505 return false;
1506
1507 /* Moving the first subword must not clobber a register
1508 needed to move the second subword. */
1509 if (register_operand (dst, mode))
1510 {
1511 rtx subreg = operand_subword (dst, first_subword, 0, mode);
1512 if (reg_overlap_mentioned_p (subreg, src))
1513 return false;
1514 }
1515
1516 return true;
1517 }
1518
1519 /* Return true if it can be proven that [MEM1, MEM1 + SIZE]
1520 and [MEM2, MEM2 + SIZE] do overlap and false
1521 otherwise. */
1522
1523 bool
s390_overlap_p(rtx mem1,rtx mem2,HOST_WIDE_INT size)1524 s390_overlap_p (rtx mem1, rtx mem2, HOST_WIDE_INT size)
1525 {
1526 rtx addr1, addr2, addr_delta;
1527 HOST_WIDE_INT delta;
1528
1529 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
1530 return true;
1531
1532 if (size == 0)
1533 return false;
1534
1535 addr1 = XEXP (mem1, 0);
1536 addr2 = XEXP (mem2, 0);
1537
1538 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
1539
1540 /* This overlapping check is used by peepholes merging memory block operations.
1541 Overlapping operations would otherwise be recognized by the S/390 hardware
1542 and would fall back to a slower implementation. Allowing overlapping
1543 operations would lead to slow code but not to wrong code. Therefore we are
1544 somewhat optimistic if we cannot prove that the memory blocks are
1545 overlapping.
1546 That's why we return false here although this may accept operations on
1547 overlapping memory areas. */
1548 if (!addr_delta || GET_CODE (addr_delta) != CONST_INT)
1549 return false;
1550
1551 delta = INTVAL (addr_delta);
1552
1553 if (delta == 0
1554 || (delta > 0 && delta < size)
1555 || (delta < 0 && -delta < size))
1556 return true;
1557
1558 return false;
1559 }
1560
1561 /* Check whether the address of memory reference MEM2 equals exactly
1562 the address of memory reference MEM1 plus DELTA. Return true if
1563 we can prove this to be the case, false otherwise. */
1564
1565 bool
s390_offset_p(rtx mem1,rtx mem2,rtx delta)1566 s390_offset_p (rtx mem1, rtx mem2, rtx delta)
1567 {
1568 rtx addr1, addr2, addr_delta;
1569
1570 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
1571 return false;
1572
1573 addr1 = XEXP (mem1, 0);
1574 addr2 = XEXP (mem2, 0);
1575
1576 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
1577 if (!addr_delta || !rtx_equal_p (addr_delta, delta))
1578 return false;
1579
1580 return true;
1581 }
1582
1583 /* Expand logical operator CODE in mode MODE with operands OPERANDS. */
1584
1585 void
s390_expand_logical_operator(enum rtx_code code,enum machine_mode mode,rtx * operands)1586 s390_expand_logical_operator (enum rtx_code code, enum machine_mode mode,
1587 rtx *operands)
1588 {
1589 enum machine_mode wmode = mode;
1590 rtx dst = operands[0];
1591 rtx src1 = operands[1];
1592 rtx src2 = operands[2];
1593 rtx op, clob, tem;
1594
1595 /* If we cannot handle the operation directly, use a temp register. */
1596 if (!s390_logical_operator_ok_p (operands))
1597 dst = gen_reg_rtx (mode);
1598
1599 /* QImode and HImode patterns make sense only if we have a destination
1600 in memory. Otherwise perform the operation in SImode. */
1601 if ((mode == QImode || mode == HImode) && GET_CODE (dst) != MEM)
1602 wmode = SImode;
1603
1604 /* Widen operands if required. */
1605 if (mode != wmode)
1606 {
1607 if (GET_CODE (dst) == SUBREG
1608 && (tem = simplify_subreg (wmode, dst, mode, 0)) != 0)
1609 dst = tem;
1610 else if (REG_P (dst))
1611 dst = gen_rtx_SUBREG (wmode, dst, 0);
1612 else
1613 dst = gen_reg_rtx (wmode);
1614
1615 if (GET_CODE (src1) == SUBREG
1616 && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0)
1617 src1 = tem;
1618 else if (GET_MODE (src1) != VOIDmode)
1619 src1 = gen_rtx_SUBREG (wmode, force_reg (mode, src1), 0);
1620
1621 if (GET_CODE (src2) == SUBREG
1622 && (tem = simplify_subreg (wmode, src2, mode, 0)) != 0)
1623 src2 = tem;
1624 else if (GET_MODE (src2) != VOIDmode)
1625 src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0);
1626 }
1627
1628 /* Emit the instruction. */
1629 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, wmode, src1, src2));
1630 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
1631 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
1632
1633 /* Fix up the destination if needed. */
1634 if (dst != operands[0])
1635 emit_move_insn (operands[0], gen_lowpart (mode, dst));
1636 }
1637
1638 /* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR). */
1639
1640 bool
s390_logical_operator_ok_p(rtx * operands)1641 s390_logical_operator_ok_p (rtx *operands)
1642 {
1643 /* If the destination operand is in memory, it needs to coincide
1644 with one of the source operands. After reload, it has to be
1645 the first source operand. */
1646 if (GET_CODE (operands[0]) == MEM)
1647 return rtx_equal_p (operands[0], operands[1])
1648 || (!reload_completed && rtx_equal_p (operands[0], operands[2]));
1649
1650 return true;
1651 }
1652
1653 /* Narrow logical operation CODE of memory operand MEMOP with immediate
1654 operand IMMOP to switch from SS to SI type instructions. */
1655
1656 void
s390_narrow_logical_operator(enum rtx_code code,rtx * memop,rtx * immop)1657 s390_narrow_logical_operator (enum rtx_code code, rtx *memop, rtx *immop)
1658 {
1659 int def = code == AND ? -1 : 0;
1660 HOST_WIDE_INT mask;
1661 int part;
1662
1663 gcc_assert (GET_CODE (*memop) == MEM);
1664 gcc_assert (!MEM_VOLATILE_P (*memop));
1665
1666 mask = s390_extract_part (*immop, QImode, def);
1667 part = s390_single_part (*immop, GET_MODE (*memop), QImode, def);
1668 gcc_assert (part >= 0);
1669
1670 *memop = adjust_address (*memop, QImode, part);
1671 *immop = gen_int_mode (mask, QImode);
1672 }
1673
1674
1675 /* How to allocate a 'struct machine_function'. */
1676
1677 static struct machine_function *
s390_init_machine_status(void)1678 s390_init_machine_status (void)
1679 {
1680 return ggc_alloc_cleared_machine_function ();
1681 }
1682
1683 /* Map for smallest class containing reg regno. */
1684
1685 const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
1686 { GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,
1687 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,
1688 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,
1689 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,
1690 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
1691 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
1692 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
1693 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
1694 ADDR_REGS, CC_REGS, ADDR_REGS, ADDR_REGS,
1695 ACCESS_REGS, ACCESS_REGS
1696 };
1697
1698 /* Return attribute type of insn. */
1699
1700 static enum attr_type
s390_safe_attr_type(rtx insn)1701 s390_safe_attr_type (rtx insn)
1702 {
1703 if (recog_memoized (insn) >= 0)
1704 return get_attr_type (insn);
1705 else
1706 return TYPE_NONE;
1707 }
1708
1709 /* Return true if DISP is a valid short displacement. */
1710
1711 static bool
s390_short_displacement(rtx disp)1712 s390_short_displacement (rtx disp)
1713 {
1714 /* No displacement is OK. */
1715 if (!disp)
1716 return true;
1717
1718 /* Without the long displacement facility we don't need to
1719 distingiush between long and short displacement. */
1720 if (!TARGET_LONG_DISPLACEMENT)
1721 return true;
1722
1723 /* Integer displacement in range. */
1724 if (GET_CODE (disp) == CONST_INT)
1725 return INTVAL (disp) >= 0 && INTVAL (disp) < 4096;
1726
1727 /* GOT offset is not OK, the GOT can be large. */
1728 if (GET_CODE (disp) == CONST
1729 && GET_CODE (XEXP (disp, 0)) == UNSPEC
1730 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOT
1731 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF))
1732 return false;
1733
1734 /* All other symbolic constants are literal pool references,
1735 which are OK as the literal pool must be small. */
1736 if (GET_CODE (disp) == CONST)
1737 return true;
1738
1739 return false;
1740 }
1741
1742 /* Decompose a RTL expression ADDR for a memory address into
1743 its components, returned in OUT.
1744
1745 Returns false if ADDR is not a valid memory address, true
1746 otherwise. If OUT is NULL, don't return the components,
1747 but check for validity only.
1748
1749 Note: Only addresses in canonical form are recognized.
1750 LEGITIMIZE_ADDRESS should convert non-canonical forms to the
1751 canonical form so that they will be recognized. */
1752
1753 static int
s390_decompose_address(rtx addr,struct s390_address * out)1754 s390_decompose_address (rtx addr, struct s390_address *out)
1755 {
1756 HOST_WIDE_INT offset = 0;
1757 rtx base = NULL_RTX;
1758 rtx indx = NULL_RTX;
1759 rtx disp = NULL_RTX;
1760 rtx orig_disp;
1761 bool pointer = false;
1762 bool base_ptr = false;
1763 bool indx_ptr = false;
1764 bool literal_pool = false;
1765
1766 /* We may need to substitute the literal pool base register into the address
1767 below. However, at this point we do not know which register is going to
1768 be used as base, so we substitute the arg pointer register. This is going
1769 to be treated as holding a pointer below -- it shouldn't be used for any
1770 other purpose. */
1771 rtx fake_pool_base = gen_rtx_REG (Pmode, ARG_POINTER_REGNUM);
1772
1773 /* Decompose address into base + index + displacement. */
1774
1775 if (GET_CODE (addr) == REG || GET_CODE (addr) == UNSPEC)
1776 base = addr;
1777
1778 else if (GET_CODE (addr) == PLUS)
1779 {
1780 rtx op0 = XEXP (addr, 0);
1781 rtx op1 = XEXP (addr, 1);
1782 enum rtx_code code0 = GET_CODE (op0);
1783 enum rtx_code code1 = GET_CODE (op1);
1784
1785 if (code0 == REG || code0 == UNSPEC)
1786 {
1787 if (code1 == REG || code1 == UNSPEC)
1788 {
1789 indx = op0; /* index + base */
1790 base = op1;
1791 }
1792
1793 else
1794 {
1795 base = op0; /* base + displacement */
1796 disp = op1;
1797 }
1798 }
1799
1800 else if (code0 == PLUS)
1801 {
1802 indx = XEXP (op0, 0); /* index + base + disp */
1803 base = XEXP (op0, 1);
1804 disp = op1;
1805 }
1806
1807 else
1808 {
1809 return false;
1810 }
1811 }
1812
1813 else
1814 disp = addr; /* displacement */
1815
1816 /* Extract integer part of displacement. */
1817 orig_disp = disp;
1818 if (disp)
1819 {
1820 if (GET_CODE (disp) == CONST_INT)
1821 {
1822 offset = INTVAL (disp);
1823 disp = NULL_RTX;
1824 }
1825 else if (GET_CODE (disp) == CONST
1826 && GET_CODE (XEXP (disp, 0)) == PLUS
1827 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
1828 {
1829 offset = INTVAL (XEXP (XEXP (disp, 0), 1));
1830 disp = XEXP (XEXP (disp, 0), 0);
1831 }
1832 }
1833
1834 /* Strip off CONST here to avoid special case tests later. */
1835 if (disp && GET_CODE (disp) == CONST)
1836 disp = XEXP (disp, 0);
1837
1838 /* We can convert literal pool addresses to
1839 displacements by basing them off the base register. */
1840 if (disp && GET_CODE (disp) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (disp))
1841 {
1842 /* Either base or index must be free to hold the base register. */
1843 if (!base)
1844 base = fake_pool_base, literal_pool = true;
1845 else if (!indx)
1846 indx = fake_pool_base, literal_pool = true;
1847 else
1848 return false;
1849
1850 /* Mark up the displacement. */
1851 disp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, disp),
1852 UNSPEC_LTREL_OFFSET);
1853 }
1854
1855 /* Validate base register. */
1856 if (base)
1857 {
1858 if (GET_CODE (base) == UNSPEC)
1859 switch (XINT (base, 1))
1860 {
1861 case UNSPEC_LTREF:
1862 if (!disp)
1863 disp = gen_rtx_UNSPEC (Pmode,
1864 gen_rtvec (1, XVECEXP (base, 0, 0)),
1865 UNSPEC_LTREL_OFFSET);
1866 else
1867 return false;
1868
1869 base = XVECEXP (base, 0, 1);
1870 break;
1871
1872 case UNSPEC_LTREL_BASE:
1873 if (XVECLEN (base, 0) == 1)
1874 base = fake_pool_base, literal_pool = true;
1875 else
1876 base = XVECEXP (base, 0, 1);
1877 break;
1878
1879 default:
1880 return false;
1881 }
1882
1883 if (!REG_P (base)
1884 || (GET_MODE (base) != SImode
1885 && GET_MODE (base) != Pmode))
1886 return false;
1887
1888 if (REGNO (base) == STACK_POINTER_REGNUM
1889 || REGNO (base) == FRAME_POINTER_REGNUM
1890 || ((reload_completed || reload_in_progress)
1891 && frame_pointer_needed
1892 && REGNO (base) == HARD_FRAME_POINTER_REGNUM)
1893 || REGNO (base) == ARG_POINTER_REGNUM
1894 || (flag_pic
1895 && REGNO (base) == PIC_OFFSET_TABLE_REGNUM))
1896 pointer = base_ptr = true;
1897
1898 if ((reload_completed || reload_in_progress)
1899 && base == cfun->machine->base_reg)
1900 pointer = base_ptr = literal_pool = true;
1901 }
1902
1903 /* Validate index register. */
1904 if (indx)
1905 {
1906 if (GET_CODE (indx) == UNSPEC)
1907 switch (XINT (indx, 1))
1908 {
1909 case UNSPEC_LTREF:
1910 if (!disp)
1911 disp = gen_rtx_UNSPEC (Pmode,
1912 gen_rtvec (1, XVECEXP (indx, 0, 0)),
1913 UNSPEC_LTREL_OFFSET);
1914 else
1915 return false;
1916
1917 indx = XVECEXP (indx, 0, 1);
1918 break;
1919
1920 case UNSPEC_LTREL_BASE:
1921 if (XVECLEN (indx, 0) == 1)
1922 indx = fake_pool_base, literal_pool = true;
1923 else
1924 indx = XVECEXP (indx, 0, 1);
1925 break;
1926
1927 default:
1928 return false;
1929 }
1930
1931 if (!REG_P (indx)
1932 || (GET_MODE (indx) != SImode
1933 && GET_MODE (indx) != Pmode))
1934 return false;
1935
1936 if (REGNO (indx) == STACK_POINTER_REGNUM
1937 || REGNO (indx) == FRAME_POINTER_REGNUM
1938 || ((reload_completed || reload_in_progress)
1939 && frame_pointer_needed
1940 && REGNO (indx) == HARD_FRAME_POINTER_REGNUM)
1941 || REGNO (indx) == ARG_POINTER_REGNUM
1942 || (flag_pic
1943 && REGNO (indx) == PIC_OFFSET_TABLE_REGNUM))
1944 pointer = indx_ptr = true;
1945
1946 if ((reload_completed || reload_in_progress)
1947 && indx == cfun->machine->base_reg)
1948 pointer = indx_ptr = literal_pool = true;
1949 }
1950
1951 /* Prefer to use pointer as base, not index. */
1952 if (base && indx && !base_ptr
1953 && (indx_ptr || (!REG_POINTER (base) && REG_POINTER (indx))))
1954 {
1955 rtx tmp = base;
1956 base = indx;
1957 indx = tmp;
1958 }
1959
1960 /* Validate displacement. */
1961 if (!disp)
1962 {
1963 /* If virtual registers are involved, the displacement will change later
1964 anyway as the virtual registers get eliminated. This could make a
1965 valid displacement invalid, but it is more likely to make an invalid
1966 displacement valid, because we sometimes access the register save area
1967 via negative offsets to one of those registers.
1968 Thus we don't check the displacement for validity here. If after
1969 elimination the displacement turns out to be invalid after all,
1970 this is fixed up by reload in any case. */
1971 /* LRA maintains always displacements up to date and we need to
1972 know the displacement is right during all LRA not only at the
1973 final elimination. */
1974 if (lra_in_progress
1975 || (base != arg_pointer_rtx
1976 && indx != arg_pointer_rtx
1977 && base != return_address_pointer_rtx
1978 && indx != return_address_pointer_rtx
1979 && base != frame_pointer_rtx
1980 && indx != frame_pointer_rtx
1981 && base != virtual_stack_vars_rtx
1982 && indx != virtual_stack_vars_rtx))
1983 if (!DISP_IN_RANGE (offset))
1984 return false;
1985 }
1986 else
1987 {
1988 /* All the special cases are pointers. */
1989 pointer = true;
1990
1991 /* In the small-PIC case, the linker converts @GOT
1992 and @GOTNTPOFF offsets to possible displacements. */
1993 if (GET_CODE (disp) == UNSPEC
1994 && (XINT (disp, 1) == UNSPEC_GOT
1995 || XINT (disp, 1) == UNSPEC_GOTNTPOFF)
1996 && flag_pic == 1)
1997 {
1998 ;
1999 }
2000
2001 /* Accept pool label offsets. */
2002 else if (GET_CODE (disp) == UNSPEC
2003 && XINT (disp, 1) == UNSPEC_POOL_OFFSET)
2004 ;
2005
2006 /* Accept literal pool references. */
2007 else if (GET_CODE (disp) == UNSPEC
2008 && XINT (disp, 1) == UNSPEC_LTREL_OFFSET)
2009 {
2010 /* In case CSE pulled a non literal pool reference out of
2011 the pool we have to reject the address. This is
2012 especially important when loading the GOT pointer on non
2013 zarch CPUs. In this case the literal pool contains an lt
2014 relative offset to the _GLOBAL_OFFSET_TABLE_ label which
2015 will most likely exceed the displacement. */
2016 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
2017 || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp, 0, 0)))
2018 return false;
2019
2020 orig_disp = gen_rtx_CONST (Pmode, disp);
2021 if (offset)
2022 {
2023 /* If we have an offset, make sure it does not
2024 exceed the size of the constant pool entry. */
2025 rtx sym = XVECEXP (disp, 0, 0);
2026 if (offset >= GET_MODE_SIZE (get_pool_mode (sym)))
2027 return false;
2028
2029 orig_disp = plus_constant (Pmode, orig_disp, offset);
2030 }
2031 }
2032
2033 else
2034 return false;
2035 }
2036
2037 if (!base && !indx)
2038 pointer = true;
2039
2040 if (out)
2041 {
2042 out->base = base;
2043 out->indx = indx;
2044 out->disp = orig_disp;
2045 out->pointer = pointer;
2046 out->literal_pool = literal_pool;
2047 }
2048
2049 return true;
2050 }
2051
2052 /* Decompose a RTL expression OP for a shift count into its components,
2053 and return the base register in BASE and the offset in OFFSET.
2054
2055 Return true if OP is a valid shift count, false if not. */
2056
2057 bool
s390_decompose_shift_count(rtx op,rtx * base,HOST_WIDE_INT * offset)2058 s390_decompose_shift_count (rtx op, rtx *base, HOST_WIDE_INT *offset)
2059 {
2060 HOST_WIDE_INT off = 0;
2061
2062 /* We can have an integer constant, an address register,
2063 or a sum of the two. */
2064 if (GET_CODE (op) == CONST_INT)
2065 {
2066 off = INTVAL (op);
2067 op = NULL_RTX;
2068 }
2069 if (op && GET_CODE (op) == PLUS && GET_CODE (XEXP (op, 1)) == CONST_INT)
2070 {
2071 off = INTVAL (XEXP (op, 1));
2072 op = XEXP (op, 0);
2073 }
2074 while (op && GET_CODE (op) == SUBREG)
2075 op = SUBREG_REG (op);
2076
2077 if (op && GET_CODE (op) != REG)
2078 return false;
2079
2080 if (offset)
2081 *offset = off;
2082 if (base)
2083 *base = op;
2084
2085 return true;
2086 }
2087
2088
2089 /* Return true if CODE is a valid address without index. */
2090
2091 bool
s390_legitimate_address_without_index_p(rtx op)2092 s390_legitimate_address_without_index_p (rtx op)
2093 {
2094 struct s390_address addr;
2095
2096 if (!s390_decompose_address (XEXP (op, 0), &addr))
2097 return false;
2098 if (addr.indx)
2099 return false;
2100
2101 return true;
2102 }
2103
2104
2105 /* Return TRUE if ADDR is an operand valid for a load/store relative
2106 instruction. Be aware that the alignment of the operand needs to
2107 be checked separately.
2108 Valid addresses are single references or a sum of a reference and a
2109 constant integer. Return these parts in SYMREF and ADDEND. You can
2110 pass NULL in REF and/or ADDEND if you are not interested in these
2111 values. Literal pool references are *not* considered symbol
2112 references. */
2113
2114 static bool
s390_loadrelative_operand_p(rtx addr,rtx * symref,HOST_WIDE_INT * addend)2115 s390_loadrelative_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
2116 {
2117 HOST_WIDE_INT tmpaddend = 0;
2118
2119 if (GET_CODE (addr) == CONST)
2120 addr = XEXP (addr, 0);
2121
2122 if (GET_CODE (addr) == PLUS)
2123 {
2124 if (!CONST_INT_P (XEXP (addr, 1)))
2125 return false;
2126
2127 tmpaddend = INTVAL (XEXP (addr, 1));
2128 addr = XEXP (addr, 0);
2129 }
2130
2131 if ((GET_CODE (addr) == SYMBOL_REF && !CONSTANT_POOL_ADDRESS_P (addr))
2132 || (GET_CODE (addr) == UNSPEC
2133 && (XINT (addr, 1) == UNSPEC_GOTENT
2134 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
2135 {
2136 if (symref)
2137 *symref = addr;
2138 if (addend)
2139 *addend = tmpaddend;
2140
2141 return true;
2142 }
2143 return false;
2144 }
2145
2146 /* Return true if the address in OP is valid for constraint letter C
2147 if wrapped in a MEM rtx. Set LIT_POOL_OK to true if it literal
2148 pool MEMs should be accepted. Only the Q, R, S, T constraint
2149 letters are allowed for C. */
2150
2151 static int
s390_check_qrst_address(char c,rtx op,bool lit_pool_ok)2152 s390_check_qrst_address (char c, rtx op, bool lit_pool_ok)
2153 {
2154 struct s390_address addr;
2155 bool decomposed = false;
2156
2157 /* This check makes sure that no symbolic address (except literal
2158 pool references) are accepted by the R or T constraints. */
2159 if (s390_loadrelative_operand_p (op, NULL, NULL))
2160 return 0;
2161
2162 /* Ensure literal pool references are only accepted if LIT_POOL_OK. */
2163 if (!lit_pool_ok)
2164 {
2165 if (!s390_decompose_address (op, &addr))
2166 return 0;
2167 if (addr.literal_pool)
2168 return 0;
2169 decomposed = true;
2170 }
2171
2172 switch (c)
2173 {
2174 case 'Q': /* no index short displacement */
2175 if (!decomposed && !s390_decompose_address (op, &addr))
2176 return 0;
2177 if (addr.indx)
2178 return 0;
2179 if (!s390_short_displacement (addr.disp))
2180 return 0;
2181 break;
2182
2183 case 'R': /* with index short displacement */
2184 if (TARGET_LONG_DISPLACEMENT)
2185 {
2186 if (!decomposed && !s390_decompose_address (op, &addr))
2187 return 0;
2188 if (!s390_short_displacement (addr.disp))
2189 return 0;
2190 }
2191 /* Any invalid address here will be fixed up by reload,
2192 so accept it for the most generic constraint. */
2193 break;
2194
2195 case 'S': /* no index long displacement */
2196 if (!TARGET_LONG_DISPLACEMENT)
2197 return 0;
2198 if (!decomposed && !s390_decompose_address (op, &addr))
2199 return 0;
2200 if (addr.indx)
2201 return 0;
2202 if (s390_short_displacement (addr.disp))
2203 return 0;
2204 break;
2205
2206 case 'T': /* with index long displacement */
2207 if (!TARGET_LONG_DISPLACEMENT)
2208 return 0;
2209 /* Any invalid address here will be fixed up by reload,
2210 so accept it for the most generic constraint. */
2211 if ((decomposed || s390_decompose_address (op, &addr))
2212 && s390_short_displacement (addr.disp))
2213 return 0;
2214 break;
2215 default:
2216 return 0;
2217 }
2218 return 1;
2219 }
2220
2221
2222 /* Evaluates constraint strings described by the regular expression
2223 ([A|B|Z](Q|R|S|T))|U|W|Y and returns 1 if OP is a valid operand for
2224 the constraint given in STR, or 0 else. */
2225
2226 int
s390_mem_constraint(const char * str,rtx op)2227 s390_mem_constraint (const char *str, rtx op)
2228 {
2229 char c = str[0];
2230
2231 switch (c)
2232 {
2233 case 'A':
2234 /* Check for offsettable variants of memory constraints. */
2235 if (!MEM_P (op) || MEM_VOLATILE_P (op))
2236 return 0;
2237 if ((reload_completed || reload_in_progress)
2238 ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op))
2239 return 0;
2240 return s390_check_qrst_address (str[1], XEXP (op, 0), true);
2241 case 'B':
2242 /* Check for non-literal-pool variants of memory constraints. */
2243 if (!MEM_P (op))
2244 return 0;
2245 return s390_check_qrst_address (str[1], XEXP (op, 0), false);
2246 case 'Q':
2247 case 'R':
2248 case 'S':
2249 case 'T':
2250 if (GET_CODE (op) != MEM)
2251 return 0;
2252 return s390_check_qrst_address (c, XEXP (op, 0), true);
2253 case 'U':
2254 return (s390_check_qrst_address ('Q', op, true)
2255 || s390_check_qrst_address ('R', op, true));
2256 case 'W':
2257 return (s390_check_qrst_address ('S', op, true)
2258 || s390_check_qrst_address ('T', op, true));
2259 case 'Y':
2260 /* Simply check for the basic form of a shift count. Reload will
2261 take care of making sure we have a proper base register. */
2262 if (!s390_decompose_shift_count (op, NULL, NULL))
2263 return 0;
2264 break;
2265 case 'Z':
2266 return s390_check_qrst_address (str[1], op, true);
2267 default:
2268 return 0;
2269 }
2270 return 1;
2271 }
2272
2273
2274 /* Evaluates constraint strings starting with letter O. Input
2275 parameter C is the second letter following the "O" in the constraint
2276 string. Returns 1 if VALUE meets the respective constraint and 0
2277 otherwise. */
2278
2279 int
s390_O_constraint_str(const char c,HOST_WIDE_INT value)2280 s390_O_constraint_str (const char c, HOST_WIDE_INT value)
2281 {
2282 if (!TARGET_EXTIMM)
2283 return 0;
2284
2285 switch (c)
2286 {
2287 case 's':
2288 return trunc_int_for_mode (value, SImode) == value;
2289
2290 case 'p':
2291 return value == 0
2292 || s390_single_part (GEN_INT (value), DImode, SImode, 0) == 1;
2293
2294 case 'n':
2295 return s390_single_part (GEN_INT (value - 1), DImode, SImode, -1) == 1;
2296
2297 default:
2298 gcc_unreachable ();
2299 }
2300 }
2301
2302
2303 /* Evaluates constraint strings starting with letter N. Parameter STR
2304 contains the letters following letter "N" in the constraint string.
2305 Returns true if VALUE matches the constraint. */
2306
2307 int
s390_N_constraint_str(const char * str,HOST_WIDE_INT value)2308 s390_N_constraint_str (const char *str, HOST_WIDE_INT value)
2309 {
2310 enum machine_mode mode, part_mode;
2311 int def;
2312 int part, part_goal;
2313
2314
2315 if (str[0] == 'x')
2316 part_goal = -1;
2317 else
2318 part_goal = str[0] - '0';
2319
2320 switch (str[1])
2321 {
2322 case 'Q':
2323 part_mode = QImode;
2324 break;
2325 case 'H':
2326 part_mode = HImode;
2327 break;
2328 case 'S':
2329 part_mode = SImode;
2330 break;
2331 default:
2332 return 0;
2333 }
2334
2335 switch (str[2])
2336 {
2337 case 'H':
2338 mode = HImode;
2339 break;
2340 case 'S':
2341 mode = SImode;
2342 break;
2343 case 'D':
2344 mode = DImode;
2345 break;
2346 default:
2347 return 0;
2348 }
2349
2350 switch (str[3])
2351 {
2352 case '0':
2353 def = 0;
2354 break;
2355 case 'F':
2356 def = -1;
2357 break;
2358 default:
2359 return 0;
2360 }
2361
2362 if (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (part_mode))
2363 return 0;
2364
2365 part = s390_single_part (GEN_INT (value), mode, part_mode, def);
2366 if (part < 0)
2367 return 0;
2368 if (part_goal != -1 && part_goal != part)
2369 return 0;
2370
2371 return 1;
2372 }
2373
2374
2375 /* Returns true if the input parameter VALUE is a float zero. */
2376
2377 int
s390_float_const_zero_p(rtx value)2378 s390_float_const_zero_p (rtx value)
2379 {
2380 return (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT
2381 && value == CONST0_RTX (GET_MODE (value)));
2382 }
2383
2384 /* Implement TARGET_REGISTER_MOVE_COST. */
2385
2386 static int
s390_register_move_cost(enum machine_mode mode ATTRIBUTE_UNUSED,reg_class_t from,reg_class_t to)2387 s390_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
2388 reg_class_t from, reg_class_t to)
2389 {
2390 /* On s390, copy between fprs and gprs is expensive as long as no
2391 ldgr/lgdr can be used. */
2392 if ((!TARGET_Z10 || GET_MODE_SIZE (mode) != 8)
2393 && ((reg_classes_intersect_p (from, GENERAL_REGS)
2394 && reg_classes_intersect_p (to, FP_REGS))
2395 || (reg_classes_intersect_p (from, FP_REGS)
2396 && reg_classes_intersect_p (to, GENERAL_REGS))))
2397 return 10;
2398
2399 return 1;
2400 }
2401
2402 /* Implement TARGET_MEMORY_MOVE_COST. */
2403
2404 static int
s390_memory_move_cost(enum machine_mode mode ATTRIBUTE_UNUSED,reg_class_t rclass ATTRIBUTE_UNUSED,bool in ATTRIBUTE_UNUSED)2405 s390_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
2406 reg_class_t rclass ATTRIBUTE_UNUSED,
2407 bool in ATTRIBUTE_UNUSED)
2408 {
2409 return 1;
2410 }
2411
2412 /* Compute a (partial) cost for rtx X. Return true if the complete
2413 cost has been computed, and false if subexpressions should be
2414 scanned. In either case, *TOTAL contains the cost result.
2415 CODE contains GET_CODE (x), OUTER_CODE contains the code
2416 of the superexpression of x. */
2417
2418 static bool
s390_rtx_costs(rtx x,int code,int outer_code,int opno ATTRIBUTE_UNUSED,int * total,bool speed ATTRIBUTE_UNUSED)2419 s390_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
2420 int *total, bool speed ATTRIBUTE_UNUSED)
2421 {
2422 switch (code)
2423 {
2424 case CONST:
2425 case CONST_INT:
2426 case LABEL_REF:
2427 case SYMBOL_REF:
2428 case CONST_DOUBLE:
2429 case MEM:
2430 *total = 0;
2431 return true;
2432
2433 case ASHIFT:
2434 case ASHIFTRT:
2435 case LSHIFTRT:
2436 case ROTATE:
2437 case ROTATERT:
2438 case AND:
2439 case IOR:
2440 case XOR:
2441 case NEG:
2442 case NOT:
2443 *total = COSTS_N_INSNS (1);
2444 return false;
2445
2446 case PLUS:
2447 case MINUS:
2448 *total = COSTS_N_INSNS (1);
2449 return false;
2450
2451 case MULT:
2452 switch (GET_MODE (x))
2453 {
2454 case SImode:
2455 {
2456 rtx left = XEXP (x, 0);
2457 rtx right = XEXP (x, 1);
2458 if (GET_CODE (right) == CONST_INT
2459 && CONST_OK_FOR_K (INTVAL (right)))
2460 *total = s390_cost->mhi;
2461 else if (GET_CODE (left) == SIGN_EXTEND)
2462 *total = s390_cost->mh;
2463 else
2464 *total = s390_cost->ms; /* msr, ms, msy */
2465 break;
2466 }
2467 case DImode:
2468 {
2469 rtx left = XEXP (x, 0);
2470 rtx right = XEXP (x, 1);
2471 if (TARGET_ZARCH)
2472 {
2473 if (GET_CODE (right) == CONST_INT
2474 && CONST_OK_FOR_K (INTVAL (right)))
2475 *total = s390_cost->mghi;
2476 else if (GET_CODE (left) == SIGN_EXTEND)
2477 *total = s390_cost->msgf;
2478 else
2479 *total = s390_cost->msg; /* msgr, msg */
2480 }
2481 else /* TARGET_31BIT */
2482 {
2483 if (GET_CODE (left) == SIGN_EXTEND
2484 && GET_CODE (right) == SIGN_EXTEND)
2485 /* mulsidi case: mr, m */
2486 *total = s390_cost->m;
2487 else if (GET_CODE (left) == ZERO_EXTEND
2488 && GET_CODE (right) == ZERO_EXTEND
2489 && TARGET_CPU_ZARCH)
2490 /* umulsidi case: ml, mlr */
2491 *total = s390_cost->ml;
2492 else
2493 /* Complex calculation is required. */
2494 *total = COSTS_N_INSNS (40);
2495 }
2496 break;
2497 }
2498 case SFmode:
2499 case DFmode:
2500 *total = s390_cost->mult_df;
2501 break;
2502 case TFmode:
2503 *total = s390_cost->mxbr;
2504 break;
2505 default:
2506 return false;
2507 }
2508 return false;
2509
2510 case FMA:
2511 switch (GET_MODE (x))
2512 {
2513 case DFmode:
2514 *total = s390_cost->madbr;
2515 break;
2516 case SFmode:
2517 *total = s390_cost->maebr;
2518 break;
2519 default:
2520 return false;
2521 }
2522 /* Negate in the third argument is free: FMSUB. */
2523 if (GET_CODE (XEXP (x, 2)) == NEG)
2524 {
2525 *total += (rtx_cost (XEXP (x, 0), FMA, 0, speed)
2526 + rtx_cost (XEXP (x, 1), FMA, 1, speed)
2527 + rtx_cost (XEXP (XEXP (x, 2), 0), FMA, 2, speed));
2528 return true;
2529 }
2530 return false;
2531
2532 case UDIV:
2533 case UMOD:
2534 if (GET_MODE (x) == TImode) /* 128 bit division */
2535 *total = s390_cost->dlgr;
2536 else if (GET_MODE (x) == DImode)
2537 {
2538 rtx right = XEXP (x, 1);
2539 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
2540 *total = s390_cost->dlr;
2541 else /* 64 by 64 bit division */
2542 *total = s390_cost->dlgr;
2543 }
2544 else if (GET_MODE (x) == SImode) /* 32 bit division */
2545 *total = s390_cost->dlr;
2546 return false;
2547
2548 case DIV:
2549 case MOD:
2550 if (GET_MODE (x) == DImode)
2551 {
2552 rtx right = XEXP (x, 1);
2553 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
2554 if (TARGET_ZARCH)
2555 *total = s390_cost->dsgfr;
2556 else
2557 *total = s390_cost->dr;
2558 else /* 64 by 64 bit division */
2559 *total = s390_cost->dsgr;
2560 }
2561 else if (GET_MODE (x) == SImode) /* 32 bit division */
2562 *total = s390_cost->dlr;
2563 else if (GET_MODE (x) == SFmode)
2564 {
2565 *total = s390_cost->debr;
2566 }
2567 else if (GET_MODE (x) == DFmode)
2568 {
2569 *total = s390_cost->ddbr;
2570 }
2571 else if (GET_MODE (x) == TFmode)
2572 {
2573 *total = s390_cost->dxbr;
2574 }
2575 return false;
2576
2577 case SQRT:
2578 if (GET_MODE (x) == SFmode)
2579 *total = s390_cost->sqebr;
2580 else if (GET_MODE (x) == DFmode)
2581 *total = s390_cost->sqdbr;
2582 else /* TFmode */
2583 *total = s390_cost->sqxbr;
2584 return false;
2585
2586 case SIGN_EXTEND:
2587 case ZERO_EXTEND:
2588 if (outer_code == MULT || outer_code == DIV || outer_code == MOD
2589 || outer_code == PLUS || outer_code == MINUS
2590 || outer_code == COMPARE)
2591 *total = 0;
2592 return false;
2593
2594 case COMPARE:
2595 *total = COSTS_N_INSNS (1);
2596 if (GET_CODE (XEXP (x, 0)) == AND
2597 && GET_CODE (XEXP (x, 1)) == CONST_INT
2598 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
2599 {
2600 rtx op0 = XEXP (XEXP (x, 0), 0);
2601 rtx op1 = XEXP (XEXP (x, 0), 1);
2602 rtx op2 = XEXP (x, 1);
2603
2604 if (memory_operand (op0, GET_MODE (op0))
2605 && s390_tm_ccmode (op1, op2, 0) != VOIDmode)
2606 return true;
2607 if (register_operand (op0, GET_MODE (op0))
2608 && s390_tm_ccmode (op1, op2, 1) != VOIDmode)
2609 return true;
2610 }
2611 return false;
2612
2613 default:
2614 return false;
2615 }
2616 }
2617
2618 /* Return the cost of an address rtx ADDR. */
2619
2620 static int
s390_address_cost(rtx addr,enum machine_mode mode ATTRIBUTE_UNUSED,addr_space_t as ATTRIBUTE_UNUSED,bool speed ATTRIBUTE_UNUSED)2621 s390_address_cost (rtx addr, enum machine_mode mode ATTRIBUTE_UNUSED,
2622 addr_space_t as ATTRIBUTE_UNUSED,
2623 bool speed ATTRIBUTE_UNUSED)
2624 {
2625 struct s390_address ad;
2626 if (!s390_decompose_address (addr, &ad))
2627 return 1000;
2628
2629 return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
2630 }
2631
2632 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
2633 otherwise return 0. */
2634
2635 int
tls_symbolic_operand(rtx op)2636 tls_symbolic_operand (rtx op)
2637 {
2638 if (GET_CODE (op) != SYMBOL_REF)
2639 return 0;
2640 return SYMBOL_REF_TLS_MODEL (op);
2641 }
2642
2643 /* Split DImode access register reference REG (on 64-bit) into its constituent
2644 low and high parts, and store them into LO and HI. Note that gen_lowpart/
2645 gen_highpart cannot be used as they assume all registers are word-sized,
2646 while our access registers have only half that size. */
2647
2648 void
s390_split_access_reg(rtx reg,rtx * lo,rtx * hi)2649 s390_split_access_reg (rtx reg, rtx *lo, rtx *hi)
2650 {
2651 gcc_assert (TARGET_64BIT);
2652 gcc_assert (ACCESS_REG_P (reg));
2653 gcc_assert (GET_MODE (reg) == DImode);
2654 gcc_assert (!(REGNO (reg) & 1));
2655
2656 *lo = gen_rtx_REG (SImode, REGNO (reg) + 1);
2657 *hi = gen_rtx_REG (SImode, REGNO (reg));
2658 }
2659
2660 /* Return true if OP contains a symbol reference */
2661
2662 bool
symbolic_reference_mentioned_p(rtx op)2663 symbolic_reference_mentioned_p (rtx op)
2664 {
2665 const char *fmt;
2666 int i;
2667
2668 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
2669 return 1;
2670
2671 fmt = GET_RTX_FORMAT (GET_CODE (op));
2672 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
2673 {
2674 if (fmt[i] == 'E')
2675 {
2676 int j;
2677
2678 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
2679 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
2680 return 1;
2681 }
2682
2683 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
2684 return 1;
2685 }
2686
2687 return 0;
2688 }
2689
2690 /* Return true if OP contains a reference to a thread-local symbol. */
2691
2692 bool
tls_symbolic_reference_mentioned_p(rtx op)2693 tls_symbolic_reference_mentioned_p (rtx op)
2694 {
2695 const char *fmt;
2696 int i;
2697
2698 if (GET_CODE (op) == SYMBOL_REF)
2699 return tls_symbolic_operand (op);
2700
2701 fmt = GET_RTX_FORMAT (GET_CODE (op));
2702 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
2703 {
2704 if (fmt[i] == 'E')
2705 {
2706 int j;
2707
2708 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
2709 if (tls_symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
2710 return true;
2711 }
2712
2713 else if (fmt[i] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op, i)))
2714 return true;
2715 }
2716
2717 return false;
2718 }
2719
2720
2721 /* Return true if OP is a legitimate general operand when
2722 generating PIC code. It is given that flag_pic is on
2723 and that OP satisfies CONSTANT_P or is a CONST_DOUBLE. */
2724
2725 int
legitimate_pic_operand_p(rtx op)2726 legitimate_pic_operand_p (rtx op)
2727 {
2728 /* Accept all non-symbolic constants. */
2729 if (!SYMBOLIC_CONST (op))
2730 return 1;
2731
2732 /* Reject everything else; must be handled
2733 via emit_symbolic_move. */
2734 return 0;
2735 }
2736
2737 /* Returns true if the constant value OP is a legitimate general operand.
2738 It is given that OP satisfies CONSTANT_P or is a CONST_DOUBLE. */
2739
2740 static bool
s390_legitimate_constant_p(enum machine_mode mode,rtx op)2741 s390_legitimate_constant_p (enum machine_mode mode, rtx op)
2742 {
2743 /* Accept all non-symbolic constants. */
2744 if (!SYMBOLIC_CONST (op))
2745 return 1;
2746
2747 /* Accept immediate LARL operands. */
2748 if (TARGET_CPU_ZARCH && larl_operand (op, mode))
2749 return 1;
2750
2751 /* Thread-local symbols are never legal constants. This is
2752 so that emit_call knows that computing such addresses
2753 might require a function call. */
2754 if (TLS_SYMBOLIC_CONST (op))
2755 return 0;
2756
2757 /* In the PIC case, symbolic constants must *not* be
2758 forced into the literal pool. We accept them here,
2759 so that they will be handled by emit_symbolic_move. */
2760 if (flag_pic)
2761 return 1;
2762
2763 /* All remaining non-PIC symbolic constants are
2764 forced into the literal pool. */
2765 return 0;
2766 }
2767
2768 /* Determine if it's legal to put X into the constant pool. This
2769 is not possible if X contains the address of a symbol that is
2770 not constant (TLS) or not known at final link time (PIC). */
2771
2772 static bool
s390_cannot_force_const_mem(enum machine_mode mode,rtx x)2773 s390_cannot_force_const_mem (enum machine_mode mode, rtx x)
2774 {
2775 switch (GET_CODE (x))
2776 {
2777 case CONST_INT:
2778 case CONST_DOUBLE:
2779 /* Accept all non-symbolic constants. */
2780 return false;
2781
2782 case LABEL_REF:
2783 /* Labels are OK iff we are non-PIC. */
2784 return flag_pic != 0;
2785
2786 case SYMBOL_REF:
2787 /* 'Naked' TLS symbol references are never OK,
2788 non-TLS symbols are OK iff we are non-PIC. */
2789 if (tls_symbolic_operand (x))
2790 return true;
2791 else
2792 return flag_pic != 0;
2793
2794 case CONST:
2795 return s390_cannot_force_const_mem (mode, XEXP (x, 0));
2796 case PLUS:
2797 case MINUS:
2798 return s390_cannot_force_const_mem (mode, XEXP (x, 0))
2799 || s390_cannot_force_const_mem (mode, XEXP (x, 1));
2800
2801 case UNSPEC:
2802 switch (XINT (x, 1))
2803 {
2804 /* Only lt-relative or GOT-relative UNSPECs are OK. */
2805 case UNSPEC_LTREL_OFFSET:
2806 case UNSPEC_GOT:
2807 case UNSPEC_GOTOFF:
2808 case UNSPEC_PLTOFF:
2809 case UNSPEC_TLSGD:
2810 case UNSPEC_TLSLDM:
2811 case UNSPEC_NTPOFF:
2812 case UNSPEC_DTPOFF:
2813 case UNSPEC_GOTNTPOFF:
2814 case UNSPEC_INDNTPOFF:
2815 return false;
2816
2817 /* If the literal pool shares the code section, be put
2818 execute template placeholders into the pool as well. */
2819 case UNSPEC_INSN:
2820 return TARGET_CPU_ZARCH;
2821
2822 default:
2823 return true;
2824 }
2825 break;
2826
2827 default:
2828 gcc_unreachable ();
2829 }
2830 }
2831
2832 /* Returns true if the constant value OP is a legitimate general
2833 operand during and after reload. The difference to
2834 legitimate_constant_p is that this function will not accept
2835 a constant that would need to be forced to the literal pool
2836 before it can be used as operand.
2837 This function accepts all constants which can be loaded directly
2838 into a GPR. */
2839
2840 bool
legitimate_reload_constant_p(rtx op)2841 legitimate_reload_constant_p (rtx op)
2842 {
2843 /* Accept la(y) operands. */
2844 if (GET_CODE (op) == CONST_INT
2845 && DISP_IN_RANGE (INTVAL (op)))
2846 return true;
2847
2848 /* Accept l(g)hi/l(g)fi operands. */
2849 if (GET_CODE (op) == CONST_INT
2850 && (CONST_OK_FOR_K (INTVAL (op)) || CONST_OK_FOR_Os (INTVAL (op))))
2851 return true;
2852
2853 /* Accept lliXX operands. */
2854 if (TARGET_ZARCH
2855 && GET_CODE (op) == CONST_INT
2856 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
2857 && s390_single_part (op, word_mode, HImode, 0) >= 0)
2858 return true;
2859
2860 if (TARGET_EXTIMM
2861 && GET_CODE (op) == CONST_INT
2862 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
2863 && s390_single_part (op, word_mode, SImode, 0) >= 0)
2864 return true;
2865
2866 /* Accept larl operands. */
2867 if (TARGET_CPU_ZARCH
2868 && larl_operand (op, VOIDmode))
2869 return true;
2870
2871 /* Accept floating-point zero operands that fit into a single GPR. */
2872 if (GET_CODE (op) == CONST_DOUBLE
2873 && s390_float_const_zero_p (op)
2874 && GET_MODE_SIZE (GET_MODE (op)) <= UNITS_PER_WORD)
2875 return true;
2876
2877 /* Accept double-word operands that can be split. */
2878 if (GET_CODE (op) == CONST_INT
2879 && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op))
2880 {
2881 enum machine_mode dword_mode = word_mode == SImode ? DImode : TImode;
2882 rtx hi = operand_subword (op, 0, 0, dword_mode);
2883 rtx lo = operand_subword (op, 1, 0, dword_mode);
2884 return legitimate_reload_constant_p (hi)
2885 && legitimate_reload_constant_p (lo);
2886 }
2887
2888 /* Everything else cannot be handled without reload. */
2889 return false;
2890 }
2891
2892 /* Returns true if the constant value OP is a legitimate fp operand
2893 during and after reload.
2894 This function accepts all constants which can be loaded directly
2895 into an FPR. */
2896
2897 static bool
legitimate_reload_fp_constant_p(rtx op)2898 legitimate_reload_fp_constant_p (rtx op)
2899 {
2900 /* Accept floating-point zero operands if the load zero instruction
2901 can be used. Prior to z196 the load fp zero instruction caused a
2902 performance penalty if the result is used as BFP number. */
2903 if (TARGET_Z196
2904 && GET_CODE (op) == CONST_DOUBLE
2905 && s390_float_const_zero_p (op))
2906 return true;
2907
2908 return false;
2909 }
2910
2911 /* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
2912 return the class of reg to actually use. */
2913
2914 static reg_class_t
s390_preferred_reload_class(rtx op,reg_class_t rclass)2915 s390_preferred_reload_class (rtx op, reg_class_t rclass)
2916 {
2917 switch (GET_CODE (op))
2918 {
2919 /* Constants we cannot reload into general registers
2920 must be forced into the literal pool. */
2921 case CONST_DOUBLE:
2922 case CONST_INT:
2923 if (reg_class_subset_p (GENERAL_REGS, rclass)
2924 && legitimate_reload_constant_p (op))
2925 return GENERAL_REGS;
2926 else if (reg_class_subset_p (ADDR_REGS, rclass)
2927 && legitimate_reload_constant_p (op))
2928 return ADDR_REGS;
2929 else if (reg_class_subset_p (FP_REGS, rclass)
2930 && legitimate_reload_fp_constant_p (op))
2931 return FP_REGS;
2932 return NO_REGS;
2933
2934 /* If a symbolic constant or a PLUS is reloaded,
2935 it is most likely being used as an address, so
2936 prefer ADDR_REGS. If 'class' is not a superset
2937 of ADDR_REGS, e.g. FP_REGS, reject this reload. */
2938 case CONST:
2939 /* Symrefs cannot be pushed into the literal pool with -fPIC
2940 so we *MUST NOT* return NO_REGS for these cases
2941 (s390_cannot_force_const_mem will return true).
2942
2943 On the other hand we MUST return NO_REGS for symrefs with
2944 invalid addend which might have been pushed to the literal
2945 pool (no -fPIC). Usually we would expect them to be
2946 handled via secondary reload but this does not happen if
2947 they are used as literal pool slot replacement in reload
2948 inheritance (see emit_input_reload_insns). */
2949 if (TARGET_CPU_ZARCH
2950 && GET_CODE (XEXP (op, 0)) == PLUS
2951 && GET_CODE (XEXP (XEXP(op, 0), 0)) == SYMBOL_REF
2952 && GET_CODE (XEXP (XEXP(op, 0), 1)) == CONST_INT)
2953 {
2954 if (flag_pic && reg_class_subset_p (ADDR_REGS, rclass))
2955 return ADDR_REGS;
2956 else
2957 return NO_REGS;
2958 }
2959 /* fallthrough */
2960 case LABEL_REF:
2961 case SYMBOL_REF:
2962 if (!legitimate_reload_constant_p (op))
2963 return NO_REGS;
2964 /* fallthrough */
2965 case PLUS:
2966 /* load address will be used. */
2967 if (reg_class_subset_p (ADDR_REGS, rclass))
2968 return ADDR_REGS;
2969 else
2970 return NO_REGS;
2971
2972 default:
2973 break;
2974 }
2975
2976 return rclass;
2977 }
2978
2979 /* Return true if ADDR is SYMBOL_REF + addend with addend being a
2980 multiple of ALIGNMENT and the SYMBOL_REF being naturally
2981 aligned. */
2982
2983 bool
s390_check_symref_alignment(rtx addr,HOST_WIDE_INT alignment)2984 s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment)
2985 {
2986 HOST_WIDE_INT addend;
2987 rtx symref;
2988
2989 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
2990 return false;
2991
2992 if (addend & (alignment - 1))
2993 return false;
2994
2995 if (GET_CODE (symref) == SYMBOL_REF
2996 && !SYMBOL_REF_NOT_NATURALLY_ALIGNED_P (symref))
2997 return true;
2998
2999 if (GET_CODE (symref) == UNSPEC
3000 && alignment <= UNITS_PER_LONG)
3001 return true;
3002
3003 return false;
3004 }
3005
3006 /* ADDR is moved into REG using larl. If ADDR isn't a valid larl
3007 operand SCRATCH is used to reload the even part of the address and
3008 adding one. */
3009
3010 void
s390_reload_larl_operand(rtx reg,rtx addr,rtx scratch)3011 s390_reload_larl_operand (rtx reg, rtx addr, rtx scratch)
3012 {
3013 HOST_WIDE_INT addend;
3014 rtx symref;
3015
3016 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
3017 gcc_unreachable ();
3018
3019 if (!(addend & 1))
3020 /* Easy case. The addend is even so larl will do fine. */
3021 emit_move_insn (reg, addr);
3022 else
3023 {
3024 /* We can leave the scratch register untouched if the target
3025 register is a valid base register. */
3026 if (REGNO (reg) < FIRST_PSEUDO_REGISTER
3027 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS)
3028 scratch = reg;
3029
3030 gcc_assert (REGNO (scratch) < FIRST_PSEUDO_REGISTER);
3031 gcc_assert (REGNO_REG_CLASS (REGNO (scratch)) == ADDR_REGS);
3032
3033 if (addend != 1)
3034 emit_move_insn (scratch,
3035 gen_rtx_CONST (Pmode,
3036 gen_rtx_PLUS (Pmode, symref,
3037 GEN_INT (addend - 1))));
3038 else
3039 emit_move_insn (scratch, symref);
3040
3041 /* Increment the address using la in order to avoid clobbering cc. */
3042 s390_load_address (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx));
3043 }
3044 }
3045
3046 /* Generate what is necessary to move between REG and MEM using
3047 SCRATCH. The direction is given by TOMEM. */
3048
3049 void
s390_reload_symref_address(rtx reg,rtx mem,rtx scratch,bool tomem)3050 s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem)
3051 {
3052 /* Reload might have pulled a constant out of the literal pool.
3053 Force it back in. */
3054 if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
3055 || GET_CODE (mem) == CONST)
3056 mem = force_const_mem (GET_MODE (reg), mem);
3057
3058 gcc_assert (MEM_P (mem));
3059
3060 /* For a load from memory we can leave the scratch register
3061 untouched if the target register is a valid base register. */
3062 if (!tomem
3063 && REGNO (reg) < FIRST_PSEUDO_REGISTER
3064 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS
3065 && GET_MODE (reg) == GET_MODE (scratch))
3066 scratch = reg;
3067
3068 /* Load address into scratch register. Since we can't have a
3069 secondary reload for a secondary reload we have to cover the case
3070 where larl would need a secondary reload here as well. */
3071 s390_reload_larl_operand (scratch, XEXP (mem, 0), scratch);
3072
3073 /* Now we can use a standard load/store to do the move. */
3074 if (tomem)
3075 emit_move_insn (replace_equiv_address (mem, scratch), reg);
3076 else
3077 emit_move_insn (reg, replace_equiv_address (mem, scratch));
3078 }
3079
3080 /* Inform reload about cases where moving X with a mode MODE to a register in
3081 RCLASS requires an extra scratch or immediate register. Return the class
3082 needed for the immediate register. */
3083
3084 static reg_class_t
s390_secondary_reload(bool in_p,rtx x,reg_class_t rclass_i,enum machine_mode mode,secondary_reload_info * sri)3085 s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
3086 enum machine_mode mode, secondary_reload_info *sri)
3087 {
3088 enum reg_class rclass = (enum reg_class) rclass_i;
3089
3090 /* Intermediate register needed. */
3091 if (reg_classes_intersect_p (CC_REGS, rclass))
3092 return GENERAL_REGS;
3093
3094 if (TARGET_Z10)
3095 {
3096 HOST_WIDE_INT offset;
3097 rtx symref;
3098
3099 /* On z10 several optimizer steps may generate larl operands with
3100 an odd addend. */
3101 if (in_p
3102 && s390_loadrelative_operand_p (x, &symref, &offset)
3103 && mode == Pmode
3104 && !SYMBOL_REF_ALIGN1_P (symref)
3105 && (offset & 1) == 1)
3106 sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10
3107 : CODE_FOR_reloadsi_larl_odd_addend_z10);
3108
3109 /* On z10 we need a scratch register when moving QI, TI or floating
3110 point mode values from or to a memory location with a SYMBOL_REF
3111 or if the symref addend of a SI or DI move is not aligned to the
3112 width of the access. */
3113 if (MEM_P (x)
3114 && s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL)
3115 && (mode == QImode || mode == TImode || FLOAT_MODE_P (mode)
3116 || (!TARGET_ZARCH && mode == DImode)
3117 || ((mode == HImode || mode == SImode || mode == DImode)
3118 && (!s390_check_symref_alignment (XEXP (x, 0),
3119 GET_MODE_SIZE (mode))))))
3120 {
3121 #define __SECONDARY_RELOAD_CASE(M,m) \
3122 case M##mode: \
3123 if (TARGET_64BIT) \
3124 sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 : \
3125 CODE_FOR_reload##m##di_tomem_z10; \
3126 else \
3127 sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 : \
3128 CODE_FOR_reload##m##si_tomem_z10; \
3129 break;
3130
3131 switch (GET_MODE (x))
3132 {
3133 __SECONDARY_RELOAD_CASE (QI, qi);
3134 __SECONDARY_RELOAD_CASE (HI, hi);
3135 __SECONDARY_RELOAD_CASE (SI, si);
3136 __SECONDARY_RELOAD_CASE (DI, di);
3137 __SECONDARY_RELOAD_CASE (TI, ti);
3138 __SECONDARY_RELOAD_CASE (SF, sf);
3139 __SECONDARY_RELOAD_CASE (DF, df);
3140 __SECONDARY_RELOAD_CASE (TF, tf);
3141 __SECONDARY_RELOAD_CASE (SD, sd);
3142 __SECONDARY_RELOAD_CASE (DD, dd);
3143 __SECONDARY_RELOAD_CASE (TD, td);
3144
3145 default:
3146 gcc_unreachable ();
3147 }
3148 #undef __SECONDARY_RELOAD_CASE
3149 }
3150 }
3151
3152 /* We need a scratch register when loading a PLUS expression which
3153 is not a legitimate operand of the LOAD ADDRESS instruction. */
3154 /* LRA can deal with transformation of plus op very well -- so we
3155 don't need to prompt LRA in this case. */
3156 if (! lra_in_progress && in_p && s390_plus_operand (x, mode))
3157 sri->icode = (TARGET_64BIT ?
3158 CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
3159
3160 /* Performing a multiword move from or to memory we have to make sure the
3161 second chunk in memory is addressable without causing a displacement
3162 overflow. If that would be the case we calculate the address in
3163 a scratch register. */
3164 if (MEM_P (x)
3165 && GET_CODE (XEXP (x, 0)) == PLUS
3166 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3167 && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1))
3168 + GET_MODE_SIZE (mode) - 1))
3169 {
3170 /* For GENERAL_REGS a displacement overflow is no problem if occurring
3171 in a s_operand address since we may fallback to lm/stm. So we only
3172 have to care about overflows in the b+i+d case. */
3173 if ((reg_classes_intersect_p (GENERAL_REGS, rclass)
3174 && s390_class_max_nregs (GENERAL_REGS, mode) > 1
3175 && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
3176 /* For FP_REGS no lm/stm is available so this check is triggered
3177 for displacement overflows in b+i+d and b+d like addresses. */
3178 || (reg_classes_intersect_p (FP_REGS, rclass)
3179 && s390_class_max_nregs (FP_REGS, mode) > 1))
3180 {
3181 if (in_p)
3182 sri->icode = (TARGET_64BIT ?
3183 CODE_FOR_reloaddi_nonoffmem_in :
3184 CODE_FOR_reloadsi_nonoffmem_in);
3185 else
3186 sri->icode = (TARGET_64BIT ?
3187 CODE_FOR_reloaddi_nonoffmem_out :
3188 CODE_FOR_reloadsi_nonoffmem_out);
3189 }
3190 }
3191
3192 /* A scratch address register is needed when a symbolic constant is
3193 copied to r0 compiling with -fPIC. In other cases the target
3194 register might be used as temporary (see legitimize_pic_address). */
3195 if (in_p && SYMBOLIC_CONST (x) && flag_pic == 2 && rclass != ADDR_REGS)
3196 sri->icode = (TARGET_64BIT ?
3197 CODE_FOR_reloaddi_PIC_addr :
3198 CODE_FOR_reloadsi_PIC_addr);
3199
3200 /* Either scratch or no register needed. */
3201 return NO_REGS;
3202 }
3203
3204 /* Generate code to load SRC, which is PLUS that is not a
3205 legitimate operand for the LA instruction, into TARGET.
3206 SCRATCH may be used as scratch register. */
3207
3208 void
s390_expand_plus_operand(rtx target,rtx src,rtx scratch)3209 s390_expand_plus_operand (rtx target, rtx src,
3210 rtx scratch)
3211 {
3212 rtx sum1, sum2;
3213 struct s390_address ad;
3214
3215 /* src must be a PLUS; get its two operands. */
3216 gcc_assert (GET_CODE (src) == PLUS);
3217 gcc_assert (GET_MODE (src) == Pmode);
3218
3219 /* Check if any of the two operands is already scheduled
3220 for replacement by reload. This can happen e.g. when
3221 float registers occur in an address. */
3222 sum1 = find_replacement (&XEXP (src, 0));
3223 sum2 = find_replacement (&XEXP (src, 1));
3224 src = gen_rtx_PLUS (Pmode, sum1, sum2);
3225
3226 /* If the address is already strictly valid, there's nothing to do. */
3227 if (!s390_decompose_address (src, &ad)
3228 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
3229 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
3230 {
3231 /* Otherwise, one of the operands cannot be an address register;
3232 we reload its value into the scratch register. */
3233 if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15)
3234 {
3235 emit_move_insn (scratch, sum1);
3236 sum1 = scratch;
3237 }
3238 if (true_regnum (sum2) < 1 || true_regnum (sum2) > 15)
3239 {
3240 emit_move_insn (scratch, sum2);
3241 sum2 = scratch;
3242 }
3243
3244 /* According to the way these invalid addresses are generated
3245 in reload.c, it should never happen (at least on s390) that
3246 *neither* of the PLUS components, after find_replacements
3247 was applied, is an address register. */
3248 if (sum1 == scratch && sum2 == scratch)
3249 {
3250 debug_rtx (src);
3251 gcc_unreachable ();
3252 }
3253
3254 src = gen_rtx_PLUS (Pmode, sum1, sum2);
3255 }
3256
3257 /* Emit the LOAD ADDRESS pattern. Note that reload of PLUS
3258 is only ever performed on addresses, so we can mark the
3259 sum as legitimate for LA in any case. */
3260 s390_load_address (target, src);
3261 }
3262
3263
3264 /* Return true if ADDR is a valid memory address.
3265 STRICT specifies whether strict register checking applies. */
3266
3267 static bool
s390_legitimate_address_p(enum machine_mode mode,rtx addr,bool strict)3268 s390_legitimate_address_p (enum machine_mode mode, rtx addr, bool strict)
3269 {
3270 struct s390_address ad;
3271
3272 if (TARGET_Z10
3273 && larl_operand (addr, VOIDmode)
3274 && (mode == VOIDmode
3275 || s390_check_symref_alignment (addr, GET_MODE_SIZE (mode))))
3276 return true;
3277
3278 if (!s390_decompose_address (addr, &ad))
3279 return false;
3280
3281 if (strict)
3282 {
3283 if (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
3284 return false;
3285
3286 if (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))
3287 return false;
3288 }
3289 else
3290 {
3291 if (ad.base
3292 && !(REGNO (ad.base) >= FIRST_PSEUDO_REGISTER
3293 || REGNO_REG_CLASS (REGNO (ad.base)) == ADDR_REGS))
3294 return false;
3295
3296 if (ad.indx
3297 && !(REGNO (ad.indx) >= FIRST_PSEUDO_REGISTER
3298 || REGNO_REG_CLASS (REGNO (ad.indx)) == ADDR_REGS))
3299 return false;
3300 }
3301 return true;
3302 }
3303
3304 /* Return true if OP is a valid operand for the LA instruction.
3305 In 31-bit, we need to prove that the result is used as an
3306 address, as LA performs only a 31-bit addition. */
3307
3308 bool
legitimate_la_operand_p(rtx op)3309 legitimate_la_operand_p (rtx op)
3310 {
3311 struct s390_address addr;
3312 if (!s390_decompose_address (op, &addr))
3313 return false;
3314
3315 return (TARGET_64BIT || addr.pointer);
3316 }
3317
3318 /* Return true if it is valid *and* preferable to use LA to
3319 compute the sum of OP1 and OP2. */
3320
3321 bool
preferred_la_operand_p(rtx op1,rtx op2)3322 preferred_la_operand_p (rtx op1, rtx op2)
3323 {
3324 struct s390_address addr;
3325
3326 if (op2 != const0_rtx)
3327 op1 = gen_rtx_PLUS (Pmode, op1, op2);
3328
3329 if (!s390_decompose_address (op1, &addr))
3330 return false;
3331 if (addr.base && !REGNO_OK_FOR_BASE_P (REGNO (addr.base)))
3332 return false;
3333 if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx)))
3334 return false;
3335
3336 /* Avoid LA instructions with index register on z196; it is
3337 preferable to use regular add instructions when possible.
3338 Starting with zEC12 the la with index register is "uncracked"
3339 again. */
3340 if (addr.indx && s390_tune == PROCESSOR_2817_Z196)
3341 return false;
3342
3343 if (!TARGET_64BIT && !addr.pointer)
3344 return false;
3345
3346 if (addr.pointer)
3347 return true;
3348
3349 if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base))
3350 || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx)))
3351 return true;
3352
3353 return false;
3354 }
3355
3356 /* Emit a forced load-address operation to load SRC into DST.
3357 This will use the LOAD ADDRESS instruction even in situations
3358 where legitimate_la_operand_p (SRC) returns false. */
3359
3360 void
s390_load_address(rtx dst,rtx src)3361 s390_load_address (rtx dst, rtx src)
3362 {
3363 if (TARGET_64BIT)
3364 emit_move_insn (dst, src);
3365 else
3366 emit_insn (gen_force_la_31 (dst, src));
3367 }
3368
3369 /* Return a legitimate reference for ORIG (an address) using the
3370 register REG. If REG is 0, a new pseudo is generated.
3371
3372 There are two types of references that must be handled:
3373
3374 1. Global data references must load the address from the GOT, via
3375 the PIC reg. An insn is emitted to do this load, and the reg is
3376 returned.
3377
3378 2. Static data references, constant pool addresses, and code labels
3379 compute the address as an offset from the GOT, whose base is in
3380 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
3381 differentiate them from global data objects. The returned
3382 address is the PIC reg + an unspec constant.
3383
3384 TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
3385 reg also appears in the address. */
3386
3387 rtx
legitimize_pic_address(rtx orig,rtx reg)3388 legitimize_pic_address (rtx orig, rtx reg)
3389 {
3390 rtx addr = orig;
3391 rtx addend = const0_rtx;
3392 rtx new_rtx = orig;
3393
3394 gcc_assert (!TLS_SYMBOLIC_CONST (addr));
3395
3396 if (GET_CODE (addr) == CONST)
3397 addr = XEXP (addr, 0);
3398
3399 if (GET_CODE (addr) == PLUS)
3400 {
3401 addend = XEXP (addr, 1);
3402 addr = XEXP (addr, 0);
3403 }
3404
3405 if ((GET_CODE (addr) == LABEL_REF
3406 || (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (addr))
3407 || (GET_CODE (addr) == UNSPEC &&
3408 (XINT (addr, 1) == UNSPEC_GOTENT
3409 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
3410 && GET_CODE (addend) == CONST_INT)
3411 {
3412 /* This can be locally addressed. */
3413
3414 /* larl_operand requires UNSPECs to be wrapped in a const rtx. */
3415 rtx const_addr = (GET_CODE (addr) == UNSPEC ?
3416 gen_rtx_CONST (Pmode, addr) : addr);
3417
3418 if (TARGET_CPU_ZARCH
3419 && larl_operand (const_addr, VOIDmode)
3420 && INTVAL (addend) < (HOST_WIDE_INT)1 << 31
3421 && INTVAL (addend) >= -((HOST_WIDE_INT)1 << 31))
3422 {
3423 if (INTVAL (addend) & 1)
3424 {
3425 /* LARL can't handle odd offsets, so emit a pair of LARL
3426 and LA. */
3427 rtx temp = reg? reg : gen_reg_rtx (Pmode);
3428
3429 if (!DISP_IN_RANGE (INTVAL (addend)))
3430 {
3431 HOST_WIDE_INT even = INTVAL (addend) - 1;
3432 addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (even));
3433 addr = gen_rtx_CONST (Pmode, addr);
3434 addend = const1_rtx;
3435 }
3436
3437 emit_move_insn (temp, addr);
3438 new_rtx = gen_rtx_PLUS (Pmode, temp, addend);
3439
3440 if (reg != 0)
3441 {
3442 s390_load_address (reg, new_rtx);
3443 new_rtx = reg;
3444 }
3445 }
3446 else
3447 {
3448 /* If the offset is even, we can just use LARL. This
3449 will happen automatically. */
3450 }
3451 }
3452 else
3453 {
3454 /* No larl - Access local symbols relative to the GOT. */
3455
3456 rtx temp = reg? reg : gen_reg_rtx (Pmode);
3457
3458 if (reload_in_progress || reload_completed)
3459 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3460
3461 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
3462 if (addend != const0_rtx)
3463 addr = gen_rtx_PLUS (Pmode, addr, addend);
3464 addr = gen_rtx_CONST (Pmode, addr);
3465 addr = force_const_mem (Pmode, addr);
3466 emit_move_insn (temp, addr);
3467
3468 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
3469 if (reg != 0)
3470 {
3471 s390_load_address (reg, new_rtx);
3472 new_rtx = reg;
3473 }
3474 }
3475 }
3476 else if (GET_CODE (addr) == SYMBOL_REF && addend == const0_rtx)
3477 {
3478 /* A non-local symbol reference without addend.
3479
3480 The symbol ref is wrapped into an UNSPEC to make sure the
3481 proper operand modifier (@GOT or @GOTENT) will be emitted.
3482 This will tell the linker to put the symbol into the GOT.
3483
3484 Additionally the code dereferencing the GOT slot is emitted here.
3485
3486 An addend to the symref needs to be added afterwards.
3487 legitimize_pic_address calls itself recursively to handle
3488 that case. So no need to do it here. */
3489
3490 if (reg == 0)
3491 reg = gen_reg_rtx (Pmode);
3492
3493 if (TARGET_Z10)
3494 {
3495 /* Use load relative if possible.
3496 lgrl <target>, sym@GOTENT */
3497 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
3498 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3499 new_rtx = gen_const_mem (GET_MODE (reg), new_rtx);
3500
3501 emit_move_insn (reg, new_rtx);
3502 new_rtx = reg;
3503 }
3504 else if (flag_pic == 1)
3505 {
3506 /* Assume GOT offset is a valid displacement operand (< 4k
3507 or < 512k with z990). This is handled the same way in
3508 both 31- and 64-bit code (@GOT).
3509 lg <target>, sym@GOT(r12) */
3510
3511 if (reload_in_progress || reload_completed)
3512 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3513
3514 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
3515 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3516 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
3517 new_rtx = gen_const_mem (Pmode, new_rtx);
3518 emit_move_insn (reg, new_rtx);
3519 new_rtx = reg;
3520 }
3521 else if (TARGET_CPU_ZARCH)
3522 {
3523 /* If the GOT offset might be >= 4k, we determine the position
3524 of the GOT entry via a PC-relative LARL (@GOTENT).
3525 larl temp, sym@GOTENT
3526 lg <target>, 0(temp) */
3527
3528 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
3529
3530 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
3531 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
3532
3533 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
3534 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3535 emit_move_insn (temp, new_rtx);
3536
3537 new_rtx = gen_const_mem (Pmode, temp);
3538 emit_move_insn (reg, new_rtx);
3539
3540 new_rtx = reg;
3541 }
3542 else
3543 {
3544 /* If the GOT offset might be >= 4k, we have to load it
3545 from the literal pool (@GOT).
3546
3547 lg temp, lit-litbase(r13)
3548 lg <target>, 0(temp)
3549 lit: .long sym@GOT */
3550
3551 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
3552
3553 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
3554 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
3555
3556 if (reload_in_progress || reload_completed)
3557 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3558
3559 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
3560 addr = gen_rtx_CONST (Pmode, addr);
3561 addr = force_const_mem (Pmode, addr);
3562 emit_move_insn (temp, addr);
3563
3564 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
3565 new_rtx = gen_const_mem (Pmode, new_rtx);
3566 emit_move_insn (reg, new_rtx);
3567 new_rtx = reg;
3568 }
3569 }
3570 else if (GET_CODE (addr) == UNSPEC && GET_CODE (addend) == CONST_INT)
3571 {
3572 gcc_assert (XVECLEN (addr, 0) == 1);
3573 switch (XINT (addr, 1))
3574 {
3575 /* These address symbols (or PLT slots) relative to the GOT
3576 (not GOT slots!). In general this will exceed the
3577 displacement range so these value belong into the literal
3578 pool. */
3579 case UNSPEC_GOTOFF:
3580 case UNSPEC_PLTOFF:
3581 new_rtx = force_const_mem (Pmode, orig);
3582 break;
3583
3584 /* For -fPIC the GOT size might exceed the displacement
3585 range so make sure the value is in the literal pool. */
3586 case UNSPEC_GOT:
3587 if (flag_pic == 2)
3588 new_rtx = force_const_mem (Pmode, orig);
3589 break;
3590
3591 /* For @GOTENT larl is used. This is handled like local
3592 symbol refs. */
3593 case UNSPEC_GOTENT:
3594 gcc_unreachable ();
3595 break;
3596
3597 /* @PLT is OK as is on 64-bit, must be converted to
3598 GOT-relative @PLTOFF on 31-bit. */
3599 case UNSPEC_PLT:
3600 if (!TARGET_CPU_ZARCH)
3601 {
3602 rtx temp = reg? reg : gen_reg_rtx (Pmode);
3603
3604 if (reload_in_progress || reload_completed)
3605 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3606
3607 addr = XVECEXP (addr, 0, 0);
3608 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
3609 UNSPEC_PLTOFF);
3610 if (addend != const0_rtx)
3611 addr = gen_rtx_PLUS (Pmode, addr, addend);
3612 addr = gen_rtx_CONST (Pmode, addr);
3613 addr = force_const_mem (Pmode, addr);
3614 emit_move_insn (temp, addr);
3615
3616 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
3617 if (reg != 0)
3618 {
3619 s390_load_address (reg, new_rtx);
3620 new_rtx = reg;
3621 }
3622 }
3623 else
3624 /* On 64 bit larl can be used. This case is handled like
3625 local symbol refs. */
3626 gcc_unreachable ();
3627 break;
3628
3629 /* Everything else cannot happen. */
3630 default:
3631 gcc_unreachable ();
3632 }
3633 }
3634 else if (addend != const0_rtx)
3635 {
3636 /* Otherwise, compute the sum. */
3637
3638 rtx base = legitimize_pic_address (addr, reg);
3639 new_rtx = legitimize_pic_address (addend,
3640 base == reg ? NULL_RTX : reg);
3641 if (GET_CODE (new_rtx) == CONST_INT)
3642 new_rtx = plus_constant (Pmode, base, INTVAL (new_rtx));
3643 else
3644 {
3645 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
3646 {
3647 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
3648 new_rtx = XEXP (new_rtx, 1);
3649 }
3650 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
3651 }
3652
3653 if (GET_CODE (new_rtx) == CONST)
3654 new_rtx = XEXP (new_rtx, 0);
3655 new_rtx = force_operand (new_rtx, 0);
3656 }
3657
3658 return new_rtx;
3659 }
3660
3661 /* Load the thread pointer into a register. */
3662
3663 rtx
s390_get_thread_pointer(void)3664 s390_get_thread_pointer (void)
3665 {
3666 rtx tp = gen_reg_rtx (Pmode);
3667
3668 emit_move_insn (tp, gen_rtx_REG (Pmode, TP_REGNUM));
3669 mark_reg_pointer (tp, BITS_PER_WORD);
3670
3671 return tp;
3672 }
3673
3674 /* Emit a tls call insn. The call target is the SYMBOL_REF stored
3675 in s390_tls_symbol which always refers to __tls_get_offset.
3676 The returned offset is written to RESULT_REG and an USE rtx is
3677 generated for TLS_CALL. */
3678
3679 static GTY(()) rtx s390_tls_symbol;
3680
3681 static void
s390_emit_tls_call_insn(rtx result_reg,rtx tls_call)3682 s390_emit_tls_call_insn (rtx result_reg, rtx tls_call)
3683 {
3684 rtx insn;
3685
3686 if (!flag_pic)
3687 emit_insn (s390_load_got ());
3688
3689 if (!s390_tls_symbol)
3690 s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset");
3691
3692 insn = s390_emit_call (s390_tls_symbol, tls_call, result_reg,
3693 gen_rtx_REG (Pmode, RETURN_REGNUM));
3694
3695 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), result_reg);
3696 RTL_CONST_CALL_P (insn) = 1;
3697 }
3698
3699 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
3700 this (thread-local) address. REG may be used as temporary. */
3701
3702 static rtx
legitimize_tls_address(rtx addr,rtx reg)3703 legitimize_tls_address (rtx addr, rtx reg)
3704 {
3705 rtx new_rtx, tls_call, temp, base, r2, insn;
3706
3707 if (GET_CODE (addr) == SYMBOL_REF)
3708 switch (tls_symbolic_operand (addr))
3709 {
3710 case TLS_MODEL_GLOBAL_DYNAMIC:
3711 start_sequence ();
3712 r2 = gen_rtx_REG (Pmode, 2);
3713 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD);
3714 new_rtx = gen_rtx_CONST (Pmode, tls_call);
3715 new_rtx = force_const_mem (Pmode, new_rtx);
3716 emit_move_insn (r2, new_rtx);
3717 s390_emit_tls_call_insn (r2, tls_call);
3718 insn = get_insns ();
3719 end_sequence ();
3720
3721 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
3722 temp = gen_reg_rtx (Pmode);
3723 emit_libcall_block (insn, temp, r2, new_rtx);
3724
3725 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
3726 if (reg != 0)
3727 {
3728 s390_load_address (reg, new_rtx);
3729 new_rtx = reg;
3730 }
3731 break;
3732
3733 case TLS_MODEL_LOCAL_DYNAMIC:
3734 start_sequence ();
3735 r2 = gen_rtx_REG (Pmode, 2);
3736 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM);
3737 new_rtx = gen_rtx_CONST (Pmode, tls_call);
3738 new_rtx = force_const_mem (Pmode, new_rtx);
3739 emit_move_insn (r2, new_rtx);
3740 s390_emit_tls_call_insn (r2, tls_call);
3741 insn = get_insns ();
3742 end_sequence ();
3743
3744 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
3745 temp = gen_reg_rtx (Pmode);
3746 emit_libcall_block (insn, temp, r2, new_rtx);
3747
3748 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
3749 base = gen_reg_rtx (Pmode);
3750 s390_load_address (base, new_rtx);
3751
3752 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF);
3753 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3754 new_rtx = force_const_mem (Pmode, new_rtx);
3755 temp = gen_reg_rtx (Pmode);
3756 emit_move_insn (temp, new_rtx);
3757
3758 new_rtx = gen_rtx_PLUS (Pmode, base, temp);
3759 if (reg != 0)
3760 {
3761 s390_load_address (reg, new_rtx);
3762 new_rtx = reg;
3763 }
3764 break;
3765
3766 case TLS_MODEL_INITIAL_EXEC:
3767 if (flag_pic == 1)
3768 {
3769 /* Assume GOT offset < 4k. This is handled the same way
3770 in both 31- and 64-bit code. */
3771
3772 if (reload_in_progress || reload_completed)
3773 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3774
3775 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
3776 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3777 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
3778 new_rtx = gen_const_mem (Pmode, new_rtx);
3779 temp = gen_reg_rtx (Pmode);
3780 emit_move_insn (temp, new_rtx);
3781 }
3782 else if (TARGET_CPU_ZARCH)
3783 {
3784 /* If the GOT offset might be >= 4k, we determine the position
3785 of the GOT entry via a PC-relative LARL. */
3786
3787 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
3788 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3789 temp = gen_reg_rtx (Pmode);
3790 emit_move_insn (temp, new_rtx);
3791
3792 new_rtx = gen_const_mem (Pmode, temp);
3793 temp = gen_reg_rtx (Pmode);
3794 emit_move_insn (temp, new_rtx);
3795 }
3796 else if (flag_pic)
3797 {
3798 /* If the GOT offset might be >= 4k, we have to load it
3799 from the literal pool. */
3800
3801 if (reload_in_progress || reload_completed)
3802 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3803
3804 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
3805 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3806 new_rtx = force_const_mem (Pmode, new_rtx);
3807 temp = gen_reg_rtx (Pmode);
3808 emit_move_insn (temp, new_rtx);
3809
3810 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
3811 new_rtx = gen_const_mem (Pmode, new_rtx);
3812
3813 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
3814 temp = gen_reg_rtx (Pmode);
3815 emit_insn (gen_rtx_SET (Pmode, temp, new_rtx));
3816 }
3817 else
3818 {
3819 /* In position-dependent code, load the absolute address of
3820 the GOT entry from the literal pool. */
3821
3822 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
3823 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3824 new_rtx = force_const_mem (Pmode, new_rtx);
3825 temp = gen_reg_rtx (Pmode);
3826 emit_move_insn (temp, new_rtx);
3827
3828 new_rtx = temp;
3829 new_rtx = gen_const_mem (Pmode, new_rtx);
3830 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
3831 temp = gen_reg_rtx (Pmode);
3832 emit_insn (gen_rtx_SET (Pmode, temp, new_rtx));
3833 }
3834
3835 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
3836 if (reg != 0)
3837 {
3838 s390_load_address (reg, new_rtx);
3839 new_rtx = reg;
3840 }
3841 break;
3842
3843 case TLS_MODEL_LOCAL_EXEC:
3844 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
3845 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3846 new_rtx = force_const_mem (Pmode, new_rtx);
3847 temp = gen_reg_rtx (Pmode);
3848 emit_move_insn (temp, new_rtx);
3849
3850 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
3851 if (reg != 0)
3852 {
3853 s390_load_address (reg, new_rtx);
3854 new_rtx = reg;
3855 }
3856 break;
3857
3858 default:
3859 gcc_unreachable ();
3860 }
3861
3862 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == UNSPEC)
3863 {
3864 switch (XINT (XEXP (addr, 0), 1))
3865 {
3866 case UNSPEC_INDNTPOFF:
3867 gcc_assert (TARGET_CPU_ZARCH);
3868 new_rtx = addr;
3869 break;
3870
3871 default:
3872 gcc_unreachable ();
3873 }
3874 }
3875
3876 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
3877 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
3878 {
3879 new_rtx = XEXP (XEXP (addr, 0), 0);
3880 if (GET_CODE (new_rtx) != SYMBOL_REF)
3881 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3882
3883 new_rtx = legitimize_tls_address (new_rtx, reg);
3884 new_rtx = plus_constant (Pmode, new_rtx,
3885 INTVAL (XEXP (XEXP (addr, 0), 1)));
3886 new_rtx = force_operand (new_rtx, 0);
3887 }
3888
3889 else
3890 gcc_unreachable (); /* for now ... */
3891
3892 return new_rtx;
3893 }
3894
3895 /* Emit insns making the address in operands[1] valid for a standard
3896 move to operands[0]. operands[1] is replaced by an address which
3897 should be used instead of the former RTX to emit the move
3898 pattern. */
3899
3900 void
emit_symbolic_move(rtx * operands)3901 emit_symbolic_move (rtx *operands)
3902 {
3903 rtx temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
3904
3905 if (GET_CODE (operands[0]) == MEM)
3906 operands[1] = force_reg (Pmode, operands[1]);
3907 else if (TLS_SYMBOLIC_CONST (operands[1]))
3908 operands[1] = legitimize_tls_address (operands[1], temp);
3909 else if (flag_pic)
3910 operands[1] = legitimize_pic_address (operands[1], temp);
3911 }
3912
3913 /* Try machine-dependent ways of modifying an illegitimate address X
3914 to be legitimate. If we find one, return the new, valid address.
3915
3916 OLDX is the address as it was before break_out_memory_refs was called.
3917 In some cases it is useful to look at this to decide what needs to be done.
3918
3919 MODE is the mode of the operand pointed to by X.
3920
3921 When -fpic is used, special handling is needed for symbolic references.
3922 See comments by legitimize_pic_address for details. */
3923
3924 static rtx
s390_legitimize_address(rtx x,rtx oldx ATTRIBUTE_UNUSED,enum machine_mode mode ATTRIBUTE_UNUSED)3925 s390_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3926 enum machine_mode mode ATTRIBUTE_UNUSED)
3927 {
3928 rtx constant_term = const0_rtx;
3929
3930 if (TLS_SYMBOLIC_CONST (x))
3931 {
3932 x = legitimize_tls_address (x, 0);
3933
3934 if (s390_legitimate_address_p (mode, x, FALSE))
3935 return x;
3936 }
3937 else if (GET_CODE (x) == PLUS
3938 && (TLS_SYMBOLIC_CONST (XEXP (x, 0))
3939 || TLS_SYMBOLIC_CONST (XEXP (x, 1))))
3940 {
3941 return x;
3942 }
3943 else if (flag_pic)
3944 {
3945 if (SYMBOLIC_CONST (x)
3946 || (GET_CODE (x) == PLUS
3947 && (SYMBOLIC_CONST (XEXP (x, 0))
3948 || SYMBOLIC_CONST (XEXP (x, 1)))))
3949 x = legitimize_pic_address (x, 0);
3950
3951 if (s390_legitimate_address_p (mode, x, FALSE))
3952 return x;
3953 }
3954
3955 x = eliminate_constant_term (x, &constant_term);
3956
3957 /* Optimize loading of large displacements by splitting them
3958 into the multiple of 4K and the rest; this allows the
3959 former to be CSE'd if possible.
3960
3961 Don't do this if the displacement is added to a register
3962 pointing into the stack frame, as the offsets will
3963 change later anyway. */
3964
3965 if (GET_CODE (constant_term) == CONST_INT
3966 && !TARGET_LONG_DISPLACEMENT
3967 && !DISP_IN_RANGE (INTVAL (constant_term))
3968 && !(REG_P (x) && REGNO_PTR_FRAME_P (REGNO (x))))
3969 {
3970 HOST_WIDE_INT lower = INTVAL (constant_term) & 0xfff;
3971 HOST_WIDE_INT upper = INTVAL (constant_term) ^ lower;
3972
3973 rtx temp = gen_reg_rtx (Pmode);
3974 rtx val = force_operand (GEN_INT (upper), temp);
3975 if (val != temp)
3976 emit_move_insn (temp, val);
3977
3978 x = gen_rtx_PLUS (Pmode, x, temp);
3979 constant_term = GEN_INT (lower);
3980 }
3981
3982 if (GET_CODE (x) == PLUS)
3983 {
3984 if (GET_CODE (XEXP (x, 0)) == REG)
3985 {
3986 rtx temp = gen_reg_rtx (Pmode);
3987 rtx val = force_operand (XEXP (x, 1), temp);
3988 if (val != temp)
3989 emit_move_insn (temp, val);
3990
3991 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), temp);
3992 }
3993
3994 else if (GET_CODE (XEXP (x, 1)) == REG)
3995 {
3996 rtx temp = gen_reg_rtx (Pmode);
3997 rtx val = force_operand (XEXP (x, 0), temp);
3998 if (val != temp)
3999 emit_move_insn (temp, val);
4000
4001 x = gen_rtx_PLUS (Pmode, temp, XEXP (x, 1));
4002 }
4003 }
4004
4005 if (constant_term != const0_rtx)
4006 x = gen_rtx_PLUS (Pmode, x, constant_term);
4007
4008 return x;
4009 }
4010
4011 /* Try a machine-dependent way of reloading an illegitimate address AD
4012 operand. If we find one, push the reload and return the new address.
4013
4014 MODE is the mode of the enclosing MEM. OPNUM is the operand number
4015 and TYPE is the reload type of the current reload. */
4016
4017 rtx
legitimize_reload_address(rtx ad,enum machine_mode mode ATTRIBUTE_UNUSED,int opnum,int type)4018 legitimize_reload_address (rtx ad, enum machine_mode mode ATTRIBUTE_UNUSED,
4019 int opnum, int type)
4020 {
4021 if (!optimize || TARGET_LONG_DISPLACEMENT)
4022 return NULL_RTX;
4023
4024 if (GET_CODE (ad) == PLUS)
4025 {
4026 rtx tem = simplify_binary_operation (PLUS, Pmode,
4027 XEXP (ad, 0), XEXP (ad, 1));
4028 if (tem)
4029 ad = tem;
4030 }
4031
4032 if (GET_CODE (ad) == PLUS
4033 && GET_CODE (XEXP (ad, 0)) == REG
4034 && GET_CODE (XEXP (ad, 1)) == CONST_INT
4035 && !DISP_IN_RANGE (INTVAL (XEXP (ad, 1))))
4036 {
4037 HOST_WIDE_INT lower = INTVAL (XEXP (ad, 1)) & 0xfff;
4038 HOST_WIDE_INT upper = INTVAL (XEXP (ad, 1)) ^ lower;
4039 rtx cst, tem, new_rtx;
4040
4041 cst = GEN_INT (upper);
4042 if (!legitimate_reload_constant_p (cst))
4043 cst = force_const_mem (Pmode, cst);
4044
4045 tem = gen_rtx_PLUS (Pmode, XEXP (ad, 0), cst);
4046 new_rtx = gen_rtx_PLUS (Pmode, tem, GEN_INT (lower));
4047
4048 push_reload (XEXP (tem, 1), 0, &XEXP (tem, 1), 0,
4049 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
4050 opnum, (enum reload_type) type);
4051 return new_rtx;
4052 }
4053
4054 return NULL_RTX;
4055 }
4056
4057 /* Emit code to move LEN bytes from DST to SRC. */
4058
4059 bool
s390_expand_movmem(rtx dst,rtx src,rtx len)4060 s390_expand_movmem (rtx dst, rtx src, rtx len)
4061 {
4062 /* When tuning for z10 or higher we rely on the Glibc functions to
4063 do the right thing. Only for constant lengths below 64k we will
4064 generate inline code. */
4065 if (s390_tune >= PROCESSOR_2097_Z10
4066 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
4067 return false;
4068
4069 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
4070 {
4071 if (INTVAL (len) > 0)
4072 emit_insn (gen_movmem_short (dst, src, GEN_INT (INTVAL (len) - 1)));
4073 }
4074
4075 else if (TARGET_MVCLE)
4076 {
4077 emit_insn (gen_movmem_long (dst, src, convert_to_mode (Pmode, len, 1)));
4078 }
4079
4080 else
4081 {
4082 rtx dst_addr, src_addr, count, blocks, temp;
4083 rtx loop_start_label = gen_label_rtx ();
4084 rtx loop_end_label = gen_label_rtx ();
4085 rtx end_label = gen_label_rtx ();
4086 enum machine_mode mode;
4087
4088 mode = GET_MODE (len);
4089 if (mode == VOIDmode)
4090 mode = Pmode;
4091
4092 dst_addr = gen_reg_rtx (Pmode);
4093 src_addr = gen_reg_rtx (Pmode);
4094 count = gen_reg_rtx (mode);
4095 blocks = gen_reg_rtx (mode);
4096
4097 convert_move (count, len, 1);
4098 emit_cmp_and_jump_insns (count, const0_rtx,
4099 EQ, NULL_RTX, mode, 1, end_label);
4100
4101 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
4102 emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX));
4103 dst = change_address (dst, VOIDmode, dst_addr);
4104 src = change_address (src, VOIDmode, src_addr);
4105
4106 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
4107 OPTAB_DIRECT);
4108 if (temp != count)
4109 emit_move_insn (count, temp);
4110
4111 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
4112 OPTAB_DIRECT);
4113 if (temp != blocks)
4114 emit_move_insn (blocks, temp);
4115
4116 emit_cmp_and_jump_insns (blocks, const0_rtx,
4117 EQ, NULL_RTX, mode, 1, loop_end_label);
4118
4119 emit_label (loop_start_label);
4120
4121 if (TARGET_Z10
4122 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 768))
4123 {
4124 rtx prefetch;
4125
4126 /* Issue a read prefetch for the +3 cache line. */
4127 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, src_addr, GEN_INT (768)),
4128 const0_rtx, const0_rtx);
4129 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
4130 emit_insn (prefetch);
4131
4132 /* Issue a write prefetch for the +3 cache line. */
4133 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (768)),
4134 const1_rtx, const0_rtx);
4135 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
4136 emit_insn (prefetch);
4137 }
4138
4139 emit_insn (gen_movmem_short (dst, src, GEN_INT (255)));
4140 s390_load_address (dst_addr,
4141 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
4142 s390_load_address (src_addr,
4143 gen_rtx_PLUS (Pmode, src_addr, GEN_INT (256)));
4144
4145 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
4146 OPTAB_DIRECT);
4147 if (temp != blocks)
4148 emit_move_insn (blocks, temp);
4149
4150 emit_cmp_and_jump_insns (blocks, const0_rtx,
4151 EQ, NULL_RTX, mode, 1, loop_end_label);
4152
4153 emit_jump (loop_start_label);
4154 emit_label (loop_end_label);
4155
4156 emit_insn (gen_movmem_short (dst, src,
4157 convert_to_mode (Pmode, count, 1)));
4158 emit_label (end_label);
4159 }
4160 return true;
4161 }
4162
4163 /* Emit code to set LEN bytes at DST to VAL.
4164 Make use of clrmem if VAL is zero. */
4165
4166 void
s390_expand_setmem(rtx dst,rtx len,rtx val)4167 s390_expand_setmem (rtx dst, rtx len, rtx val)
4168 {
4169 if (GET_CODE (len) == CONST_INT && INTVAL (len) == 0)
4170 return;
4171
4172 gcc_assert (GET_CODE (val) == CONST_INT || GET_MODE (val) == QImode);
4173
4174 if (GET_CODE (len) == CONST_INT && INTVAL (len) > 0 && INTVAL (len) <= 257)
4175 {
4176 if (val == const0_rtx && INTVAL (len) <= 256)
4177 emit_insn (gen_clrmem_short (dst, GEN_INT (INTVAL (len) - 1)));
4178 else
4179 {
4180 /* Initialize memory by storing the first byte. */
4181 emit_move_insn (adjust_address (dst, QImode, 0), val);
4182
4183 if (INTVAL (len) > 1)
4184 {
4185 /* Initiate 1 byte overlap move.
4186 The first byte of DST is propagated through DSTP1.
4187 Prepare a movmem for: DST+1 = DST (length = LEN - 1).
4188 DST is set to size 1 so the rest of the memory location
4189 does not count as source operand. */
4190 rtx dstp1 = adjust_address (dst, VOIDmode, 1);
4191 set_mem_size (dst, 1);
4192
4193 emit_insn (gen_movmem_short (dstp1, dst,
4194 GEN_INT (INTVAL (len) - 2)));
4195 }
4196 }
4197 }
4198
4199 else if (TARGET_MVCLE)
4200 {
4201 val = force_not_mem (convert_modes (Pmode, QImode, val, 1));
4202 emit_insn (gen_setmem_long (dst, convert_to_mode (Pmode, len, 1), val));
4203 }
4204
4205 else
4206 {
4207 rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX;
4208 rtx loop_start_label = gen_label_rtx ();
4209 rtx loop_end_label = gen_label_rtx ();
4210 rtx end_label = gen_label_rtx ();
4211 enum machine_mode mode;
4212
4213 mode = GET_MODE (len);
4214 if (mode == VOIDmode)
4215 mode = Pmode;
4216
4217 dst_addr = gen_reg_rtx (Pmode);
4218 count = gen_reg_rtx (mode);
4219 blocks = gen_reg_rtx (mode);
4220
4221 convert_move (count, len, 1);
4222 emit_cmp_and_jump_insns (count, const0_rtx,
4223 EQ, NULL_RTX, mode, 1, end_label);
4224
4225 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
4226 dst = change_address (dst, VOIDmode, dst_addr);
4227
4228 if (val == const0_rtx)
4229 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
4230 OPTAB_DIRECT);
4231 else
4232 {
4233 dstp1 = adjust_address (dst, VOIDmode, 1);
4234 set_mem_size (dst, 1);
4235
4236 /* Initialize memory by storing the first byte. */
4237 emit_move_insn (adjust_address (dst, QImode, 0), val);
4238
4239 /* If count is 1 we are done. */
4240 emit_cmp_and_jump_insns (count, const1_rtx,
4241 EQ, NULL_RTX, mode, 1, end_label);
4242
4243 temp = expand_binop (mode, add_optab, count, GEN_INT (-2), count, 1,
4244 OPTAB_DIRECT);
4245 }
4246 if (temp != count)
4247 emit_move_insn (count, temp);
4248
4249 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
4250 OPTAB_DIRECT);
4251 if (temp != blocks)
4252 emit_move_insn (blocks, temp);
4253
4254 emit_cmp_and_jump_insns (blocks, const0_rtx,
4255 EQ, NULL_RTX, mode, 1, loop_end_label);
4256
4257 emit_label (loop_start_label);
4258
4259 if (TARGET_Z10
4260 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 1024))
4261 {
4262 /* Issue a write prefetch for the +4 cache line. */
4263 rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr,
4264 GEN_INT (1024)),
4265 const1_rtx, const0_rtx);
4266 emit_insn (prefetch);
4267 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
4268 }
4269
4270 if (val == const0_rtx)
4271 emit_insn (gen_clrmem_short (dst, GEN_INT (255)));
4272 else
4273 emit_insn (gen_movmem_short (dstp1, dst, GEN_INT (255)));
4274 s390_load_address (dst_addr,
4275 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
4276
4277 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
4278 OPTAB_DIRECT);
4279 if (temp != blocks)
4280 emit_move_insn (blocks, temp);
4281
4282 emit_cmp_and_jump_insns (blocks, const0_rtx,
4283 EQ, NULL_RTX, mode, 1, loop_end_label);
4284
4285 emit_jump (loop_start_label);
4286 emit_label (loop_end_label);
4287
4288 if (val == const0_rtx)
4289 emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1)));
4290 else
4291 emit_insn (gen_movmem_short (dstp1, dst, convert_to_mode (Pmode, count, 1)));
4292 emit_label (end_label);
4293 }
4294 }
4295
4296 /* Emit code to compare LEN bytes at OP0 with those at OP1,
4297 and return the result in TARGET. */
4298
4299 bool
s390_expand_cmpmem(rtx target,rtx op0,rtx op1,rtx len)4300 s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
4301 {
4302 rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM);
4303 rtx tmp;
4304
4305 /* When tuning for z10 or higher we rely on the Glibc functions to
4306 do the right thing. Only for constant lengths below 64k we will
4307 generate inline code. */
4308 if (s390_tune >= PROCESSOR_2097_Z10
4309 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
4310 return false;
4311
4312 /* As the result of CMPINT is inverted compared to what we need,
4313 we have to swap the operands. */
4314 tmp = op0; op0 = op1; op1 = tmp;
4315
4316 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
4317 {
4318 if (INTVAL (len) > 0)
4319 {
4320 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1)));
4321 emit_insn (gen_cmpint (target, ccreg));
4322 }
4323 else
4324 emit_move_insn (target, const0_rtx);
4325 }
4326 else if (TARGET_MVCLE)
4327 {
4328 emit_insn (gen_cmpmem_long (op0, op1, convert_to_mode (Pmode, len, 1)));
4329 emit_insn (gen_cmpint (target, ccreg));
4330 }
4331 else
4332 {
4333 rtx addr0, addr1, count, blocks, temp;
4334 rtx loop_start_label = gen_label_rtx ();
4335 rtx loop_end_label = gen_label_rtx ();
4336 rtx end_label = gen_label_rtx ();
4337 enum machine_mode mode;
4338
4339 mode = GET_MODE (len);
4340 if (mode == VOIDmode)
4341 mode = Pmode;
4342
4343 addr0 = gen_reg_rtx (Pmode);
4344 addr1 = gen_reg_rtx (Pmode);
4345 count = gen_reg_rtx (mode);
4346 blocks = gen_reg_rtx (mode);
4347
4348 convert_move (count, len, 1);
4349 emit_cmp_and_jump_insns (count, const0_rtx,
4350 EQ, NULL_RTX, mode, 1, end_label);
4351
4352 emit_move_insn (addr0, force_operand (XEXP (op0, 0), NULL_RTX));
4353 emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX));
4354 op0 = change_address (op0, VOIDmode, addr0);
4355 op1 = change_address (op1, VOIDmode, addr1);
4356
4357 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
4358 OPTAB_DIRECT);
4359 if (temp != count)
4360 emit_move_insn (count, temp);
4361
4362 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
4363 OPTAB_DIRECT);
4364 if (temp != blocks)
4365 emit_move_insn (blocks, temp);
4366
4367 emit_cmp_and_jump_insns (blocks, const0_rtx,
4368 EQ, NULL_RTX, mode, 1, loop_end_label);
4369
4370 emit_label (loop_start_label);
4371
4372 if (TARGET_Z10
4373 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 512))
4374 {
4375 rtx prefetch;
4376
4377 /* Issue a read prefetch for the +2 cache line of operand 1. */
4378 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr0, GEN_INT (512)),
4379 const0_rtx, const0_rtx);
4380 emit_insn (prefetch);
4381 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
4382
4383 /* Issue a read prefetch for the +2 cache line of operand 2. */
4384 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr1, GEN_INT (512)),
4385 const0_rtx, const0_rtx);
4386 emit_insn (prefetch);
4387 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
4388 }
4389
4390 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (255)));
4391 temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
4392 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
4393 gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx);
4394 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
4395 emit_jump_insn (temp);
4396
4397 s390_load_address (addr0,
4398 gen_rtx_PLUS (Pmode, addr0, GEN_INT (256)));
4399 s390_load_address (addr1,
4400 gen_rtx_PLUS (Pmode, addr1, GEN_INT (256)));
4401
4402 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
4403 OPTAB_DIRECT);
4404 if (temp != blocks)
4405 emit_move_insn (blocks, temp);
4406
4407 emit_cmp_and_jump_insns (blocks, const0_rtx,
4408 EQ, NULL_RTX, mode, 1, loop_end_label);
4409
4410 emit_jump (loop_start_label);
4411 emit_label (loop_end_label);
4412
4413 emit_insn (gen_cmpmem_short (op0, op1,
4414 convert_to_mode (Pmode, count, 1)));
4415 emit_label (end_label);
4416
4417 emit_insn (gen_cmpint (target, ccreg));
4418 }
4419 return true;
4420 }
4421
4422
4423 /* Expand conditional increment or decrement using alc/slb instructions.
4424 Should generate code setting DST to either SRC or SRC + INCREMENT,
4425 depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
4426 Returns true if successful, false otherwise.
4427
4428 That makes it possible to implement some if-constructs without jumps e.g.:
4429 (borrow = CC0 | CC1 and carry = CC2 | CC3)
4430 unsigned int a, b, c;
4431 if (a < b) c++; -> CCU b > a -> CC2; c += carry;
4432 if (a < b) c--; -> CCL3 a - b -> borrow; c -= borrow;
4433 if (a <= b) c++; -> CCL3 b - a -> borrow; c += carry;
4434 if (a <= b) c--; -> CCU a <= b -> borrow; c -= borrow;
4435
4436 Checks for EQ and NE with a nonzero value need an additional xor e.g.:
4437 if (a == b) c++; -> CCL3 a ^= b; 0 - a -> borrow; c += carry;
4438 if (a == b) c--; -> CCU a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
4439 if (a != b) c++; -> CCU a ^= b; a > 0 -> CC2; c += carry;
4440 if (a != b) c--; -> CCL3 a ^= b; 0 - a -> borrow; c -= borrow; */
4441
4442 bool
s390_expand_addcc(enum rtx_code cmp_code,rtx cmp_op0,rtx cmp_op1,rtx dst,rtx src,rtx increment)4443 s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1,
4444 rtx dst, rtx src, rtx increment)
4445 {
4446 enum machine_mode cmp_mode;
4447 enum machine_mode cc_mode;
4448 rtx op_res;
4449 rtx insn;
4450 rtvec p;
4451 int ret;
4452
4453 if ((GET_MODE (cmp_op0) == SImode || GET_MODE (cmp_op0) == VOIDmode)
4454 && (GET_MODE (cmp_op1) == SImode || GET_MODE (cmp_op1) == VOIDmode))
4455 cmp_mode = SImode;
4456 else if ((GET_MODE (cmp_op0) == DImode || GET_MODE (cmp_op0) == VOIDmode)
4457 && (GET_MODE (cmp_op1) == DImode || GET_MODE (cmp_op1) == VOIDmode))
4458 cmp_mode = DImode;
4459 else
4460 return false;
4461
4462 /* Try ADD LOGICAL WITH CARRY. */
4463 if (increment == const1_rtx)
4464 {
4465 /* Determine CC mode to use. */
4466 if (cmp_code == EQ || cmp_code == NE)
4467 {
4468 if (cmp_op1 != const0_rtx)
4469 {
4470 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
4471 NULL_RTX, 0, OPTAB_WIDEN);
4472 cmp_op1 = const0_rtx;
4473 }
4474
4475 cmp_code = cmp_code == EQ ? LEU : GTU;
4476 }
4477
4478 if (cmp_code == LTU || cmp_code == LEU)
4479 {
4480 rtx tem = cmp_op0;
4481 cmp_op0 = cmp_op1;
4482 cmp_op1 = tem;
4483 cmp_code = swap_condition (cmp_code);
4484 }
4485
4486 switch (cmp_code)
4487 {
4488 case GTU:
4489 cc_mode = CCUmode;
4490 break;
4491
4492 case GEU:
4493 cc_mode = CCL3mode;
4494 break;
4495
4496 default:
4497 return false;
4498 }
4499
4500 /* Emit comparison instruction pattern. */
4501 if (!register_operand (cmp_op0, cmp_mode))
4502 cmp_op0 = force_reg (cmp_mode, cmp_op0);
4503
4504 insn = gen_rtx_SET (VOIDmode, gen_rtx_REG (cc_mode, CC_REGNUM),
4505 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
4506 /* We use insn_invalid_p here to add clobbers if required. */
4507 ret = insn_invalid_p (emit_insn (insn), false);
4508 gcc_assert (!ret);
4509
4510 /* Emit ALC instruction pattern. */
4511 op_res = gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
4512 gen_rtx_REG (cc_mode, CC_REGNUM),
4513 const0_rtx);
4514
4515 if (src != const0_rtx)
4516 {
4517 if (!register_operand (src, GET_MODE (dst)))
4518 src = force_reg (GET_MODE (dst), src);
4519
4520 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, src);
4521 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, const0_rtx);
4522 }
4523
4524 p = rtvec_alloc (2);
4525 RTVEC_ELT (p, 0) =
4526 gen_rtx_SET (VOIDmode, dst, op_res);
4527 RTVEC_ELT (p, 1) =
4528 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
4529 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
4530
4531 return true;
4532 }
4533
4534 /* Try SUBTRACT LOGICAL WITH BORROW. */
4535 if (increment == constm1_rtx)
4536 {
4537 /* Determine CC mode to use. */
4538 if (cmp_code == EQ || cmp_code == NE)
4539 {
4540 if (cmp_op1 != const0_rtx)
4541 {
4542 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
4543 NULL_RTX, 0, OPTAB_WIDEN);
4544 cmp_op1 = const0_rtx;
4545 }
4546
4547 cmp_code = cmp_code == EQ ? LEU : GTU;
4548 }
4549
4550 if (cmp_code == GTU || cmp_code == GEU)
4551 {
4552 rtx tem = cmp_op0;
4553 cmp_op0 = cmp_op1;
4554 cmp_op1 = tem;
4555 cmp_code = swap_condition (cmp_code);
4556 }
4557
4558 switch (cmp_code)
4559 {
4560 case LEU:
4561 cc_mode = CCUmode;
4562 break;
4563
4564 case LTU:
4565 cc_mode = CCL3mode;
4566 break;
4567
4568 default:
4569 return false;
4570 }
4571
4572 /* Emit comparison instruction pattern. */
4573 if (!register_operand (cmp_op0, cmp_mode))
4574 cmp_op0 = force_reg (cmp_mode, cmp_op0);
4575
4576 insn = gen_rtx_SET (VOIDmode, gen_rtx_REG (cc_mode, CC_REGNUM),
4577 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
4578 /* We use insn_invalid_p here to add clobbers if required. */
4579 ret = insn_invalid_p (emit_insn (insn), false);
4580 gcc_assert (!ret);
4581
4582 /* Emit SLB instruction pattern. */
4583 if (!register_operand (src, GET_MODE (dst)))
4584 src = force_reg (GET_MODE (dst), src);
4585
4586 op_res = gen_rtx_MINUS (GET_MODE (dst),
4587 gen_rtx_MINUS (GET_MODE (dst), src, const0_rtx),
4588 gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
4589 gen_rtx_REG (cc_mode, CC_REGNUM),
4590 const0_rtx));
4591 p = rtvec_alloc (2);
4592 RTVEC_ELT (p, 0) =
4593 gen_rtx_SET (VOIDmode, dst, op_res);
4594 RTVEC_ELT (p, 1) =
4595 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
4596 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
4597
4598 return true;
4599 }
4600
4601 return false;
4602 }
4603
4604 /* Expand code for the insv template. Return true if successful. */
4605
4606 bool
s390_expand_insv(rtx dest,rtx op1,rtx op2,rtx src)4607 s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
4608 {
4609 int bitsize = INTVAL (op1);
4610 int bitpos = INTVAL (op2);
4611 enum machine_mode mode = GET_MODE (dest);
4612 enum machine_mode smode;
4613 int smode_bsize, mode_bsize;
4614 rtx op, clobber;
4615
4616 if (bitsize + bitpos > GET_MODE_BITSIZE (mode))
4617 return false;
4618
4619 /* Generate INSERT IMMEDIATE (IILL et al). */
4620 /* (set (ze (reg)) (const_int)). */
4621 if (TARGET_ZARCH
4622 && register_operand (dest, word_mode)
4623 && (bitpos % 16) == 0
4624 && (bitsize % 16) == 0
4625 && const_int_operand (src, VOIDmode))
4626 {
4627 HOST_WIDE_INT val = INTVAL (src);
4628 int regpos = bitpos + bitsize;
4629
4630 while (regpos > bitpos)
4631 {
4632 enum machine_mode putmode;
4633 int putsize;
4634
4635 if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32))
4636 putmode = SImode;
4637 else
4638 putmode = HImode;
4639
4640 putsize = GET_MODE_BITSIZE (putmode);
4641 regpos -= putsize;
4642 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
4643 GEN_INT (putsize),
4644 GEN_INT (regpos)),
4645 gen_int_mode (val, putmode));
4646 val >>= putsize;
4647 }
4648 gcc_assert (regpos == bitpos);
4649 return true;
4650 }
4651
4652 smode = smallest_mode_for_size (bitsize, MODE_INT);
4653 smode_bsize = GET_MODE_BITSIZE (smode);
4654 mode_bsize = GET_MODE_BITSIZE (mode);
4655
4656 /* Generate STORE CHARACTERS UNDER MASK (STCM et al). */
4657 if (bitpos == 0
4658 && (bitsize % BITS_PER_UNIT) == 0
4659 && MEM_P (dest)
4660 && (register_operand (src, word_mode)
4661 || const_int_operand (src, VOIDmode)))
4662 {
4663 /* Emit standard pattern if possible. */
4664 if (smode_bsize == bitsize)
4665 {
4666 emit_move_insn (adjust_address (dest, smode, 0),
4667 gen_lowpart (smode, src));
4668 return true;
4669 }
4670
4671 /* (set (ze (mem)) (const_int)). */
4672 else if (const_int_operand (src, VOIDmode))
4673 {
4674 int size = bitsize / BITS_PER_UNIT;
4675 rtx src_mem = adjust_address (force_const_mem (word_mode, src),
4676 BLKmode,
4677 UNITS_PER_WORD - size);
4678
4679 dest = adjust_address (dest, BLKmode, 0);
4680 set_mem_size (dest, size);
4681 s390_expand_movmem (dest, src_mem, GEN_INT (size));
4682 return true;
4683 }
4684
4685 /* (set (ze (mem)) (reg)). */
4686 else if (register_operand (src, word_mode))
4687 {
4688 if (bitsize <= 32)
4689 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1,
4690 const0_rtx), src);
4691 else
4692 {
4693 /* Emit st,stcmh sequence. */
4694 int stcmh_width = bitsize - 32;
4695 int size = stcmh_width / BITS_PER_UNIT;
4696
4697 emit_move_insn (adjust_address (dest, SImode, size),
4698 gen_lowpart (SImode, src));
4699 set_mem_size (dest, size);
4700 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
4701 GEN_INT (stcmh_width),
4702 const0_rtx),
4703 gen_rtx_LSHIFTRT (word_mode, src, GEN_INT (32)));
4704 }
4705 return true;
4706 }
4707 }
4708
4709 /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al). */
4710 if ((bitpos % BITS_PER_UNIT) == 0
4711 && (bitsize % BITS_PER_UNIT) == 0
4712 && (bitpos & 32) == ((bitpos + bitsize - 1) & 32)
4713 && MEM_P (src)
4714 && (mode == DImode || mode == SImode)
4715 && register_operand (dest, mode))
4716 {
4717 /* Emit a strict_low_part pattern if possible. */
4718 if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
4719 {
4720 op = gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (smode, dest));
4721 op = gen_rtx_SET (VOIDmode, op, gen_lowpart (smode, src));
4722 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
4723 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)));
4724 return true;
4725 }
4726
4727 /* ??? There are more powerful versions of ICM that are not
4728 completely represented in the md file. */
4729 }
4730
4731 /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al). */
4732 if (TARGET_Z10 && (mode == DImode || mode == SImode))
4733 {
4734 enum machine_mode mode_s = GET_MODE (src);
4735
4736 if (mode_s == VOIDmode)
4737 {
4738 /* Assume const_int etc already in the proper mode. */
4739 src = force_reg (mode, src);
4740 }
4741 else if (mode_s != mode)
4742 {
4743 gcc_assert (GET_MODE_BITSIZE (mode_s) >= bitsize);
4744 src = force_reg (mode_s, src);
4745 src = gen_lowpart (mode, src);
4746 }
4747
4748 op = gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2),
4749 op = gen_rtx_SET (VOIDmode, op, src);
4750
4751 if (!TARGET_ZEC12)
4752 {
4753 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
4754 op = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber));
4755 }
4756 emit_insn (op);
4757
4758 return true;
4759 }
4760
4761 return false;
4762 }
4763
4764 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
4765 register that holds VAL of mode MODE shifted by COUNT bits. */
4766
4767 static inline rtx
s390_expand_mask_and_shift(rtx val,enum machine_mode mode,rtx count)4768 s390_expand_mask_and_shift (rtx val, enum machine_mode mode, rtx count)
4769 {
4770 val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
4771 NULL_RTX, 1, OPTAB_DIRECT);
4772 return expand_simple_binop (SImode, ASHIFT, val, count,
4773 NULL_RTX, 1, OPTAB_DIRECT);
4774 }
4775
4776 /* Structure to hold the initial parameters for a compare_and_swap operation
4777 in HImode and QImode. */
4778
4779 struct alignment_context
4780 {
4781 rtx memsi; /* SI aligned memory location. */
4782 rtx shift; /* Bit offset with regard to lsb. */
4783 rtx modemask; /* Mask of the HQImode shifted by SHIFT bits. */
4784 rtx modemaski; /* ~modemask */
4785 bool aligned; /* True if memory is aligned, false else. */
4786 };
4787
4788 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
4789 structure AC for transparent simplifying, if the memory alignment is known
4790 to be at least 32bit. MEM is the memory location for the actual operation
4791 and MODE its mode. */
4792
4793 static void
init_alignment_context(struct alignment_context * ac,rtx mem,enum machine_mode mode)4794 init_alignment_context (struct alignment_context *ac, rtx mem,
4795 enum machine_mode mode)
4796 {
4797 ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode));
4798 ac->aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode));
4799
4800 if (ac->aligned)
4801 ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned. */
4802 else
4803 {
4804 /* Alignment is unknown. */
4805 rtx byteoffset, addr, align;
4806
4807 /* Force the address into a register. */
4808 addr = force_reg (Pmode, XEXP (mem, 0));
4809
4810 /* Align it to SImode. */
4811 align = expand_simple_binop (Pmode, AND, addr,
4812 GEN_INT (-GET_MODE_SIZE (SImode)),
4813 NULL_RTX, 1, OPTAB_DIRECT);
4814 /* Generate MEM. */
4815 ac->memsi = gen_rtx_MEM (SImode, align);
4816 MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem);
4817 set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER);
4818 set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode));
4819
4820 /* Calculate shiftcount. */
4821 byteoffset = expand_simple_binop (Pmode, AND, addr,
4822 GEN_INT (GET_MODE_SIZE (SImode) - 1),
4823 NULL_RTX, 1, OPTAB_DIRECT);
4824 /* As we already have some offset, evaluate the remaining distance. */
4825 ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
4826 NULL_RTX, 1, OPTAB_DIRECT);
4827 }
4828
4829 /* Shift is the byte count, but we need the bitcount. */
4830 ac->shift = expand_simple_binop (SImode, ASHIFT, ac->shift, GEN_INT (3),
4831 NULL_RTX, 1, OPTAB_DIRECT);
4832
4833 /* Calculate masks. */
4834 ac->modemask = expand_simple_binop (SImode, ASHIFT,
4835 GEN_INT (GET_MODE_MASK (mode)),
4836 ac->shift, NULL_RTX, 1, OPTAB_DIRECT);
4837 ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask,
4838 NULL_RTX, 1);
4839 }
4840
4841 /* A subroutine of s390_expand_cs_hqi. Insert INS into VAL. If possible,
4842 use a single insv insn into SEQ2. Otherwise, put prep insns in SEQ1 and
4843 perform the merge in SEQ2. */
4844
4845 static rtx
s390_two_part_insv(struct alignment_context * ac,rtx * seq1,rtx * seq2,enum machine_mode mode,rtx val,rtx ins)4846 s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
4847 enum machine_mode mode, rtx val, rtx ins)
4848 {
4849 rtx tmp;
4850
4851 if (ac->aligned)
4852 {
4853 start_sequence ();
4854 tmp = copy_to_mode_reg (SImode, val);
4855 if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)),
4856 const0_rtx, ins))
4857 {
4858 *seq1 = NULL;
4859 *seq2 = get_insns ();
4860 end_sequence ();
4861 return tmp;
4862 }
4863 end_sequence ();
4864 }
4865
4866 /* Failed to use insv. Generate a two part shift and mask. */
4867 start_sequence ();
4868 tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
4869 *seq1 = get_insns ();
4870 end_sequence ();
4871
4872 start_sequence ();
4873 tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
4874 *seq2 = get_insns ();
4875 end_sequence ();
4876
4877 return tmp;
4878 }
4879
4880 /* Expand an atomic compare and swap operation for HImode and QImode. MEM is
4881 the memory location, CMP the old value to compare MEM with and NEW_RTX the
4882 value to set if CMP == MEM. */
4883
4884 void
s390_expand_cs_hqi(enum machine_mode mode,rtx btarget,rtx vtarget,rtx mem,rtx cmp,rtx new_rtx,bool is_weak)4885 s390_expand_cs_hqi (enum machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
4886 rtx cmp, rtx new_rtx, bool is_weak)
4887 {
4888 struct alignment_context ac;
4889 rtx cmpv, newv, val, cc, seq0, seq1, seq2, seq3;
4890 rtx res = gen_reg_rtx (SImode);
4891 rtx csloop = NULL, csend = NULL;
4892
4893 gcc_assert (MEM_P (mem));
4894
4895 init_alignment_context (&ac, mem, mode);
4896
4897 /* Load full word. Subsequent loads are performed by CS. */
4898 val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski,
4899 NULL_RTX, 1, OPTAB_DIRECT);
4900
4901 /* Prepare insertions of cmp and new_rtx into the loaded value. When
4902 possible, we try to use insv to make this happen efficiently. If
4903 that fails we'll generate code both inside and outside the loop. */
4904 cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp);
4905 newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx);
4906
4907 if (seq0)
4908 emit_insn (seq0);
4909 if (seq1)
4910 emit_insn (seq1);
4911
4912 /* Start CS loop. */
4913 if (!is_weak)
4914 {
4915 /* Begin assuming success. */
4916 emit_move_insn (btarget, const1_rtx);
4917
4918 csloop = gen_label_rtx ();
4919 csend = gen_label_rtx ();
4920 emit_label (csloop);
4921 }
4922
4923 /* val = "<mem>00..0<mem>"
4924 * cmp = "00..0<cmp>00..0"
4925 * new = "00..0<new>00..0"
4926 */
4927
4928 emit_insn (seq2);
4929 emit_insn (seq3);
4930
4931 cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv);
4932 if (is_weak)
4933 emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1)));
4934 else
4935 {
4936 rtx tmp;
4937
4938 /* Jump to end if we're done (likely?). */
4939 s390_emit_jump (csend, cc);
4940
4941 /* Check for changes outside mode, and loop internal if so.
4942 Arrange the moves so that the compare is adjacent to the
4943 branch so that we can generate CRJ. */
4944 tmp = copy_to_reg (val);
4945 force_expand_binop (SImode, and_optab, res, ac.modemaski, val,
4946 1, OPTAB_DIRECT);
4947 cc = s390_emit_compare (NE, val, tmp);
4948 s390_emit_jump (csloop, cc);
4949
4950 /* Failed. */
4951 emit_move_insn (btarget, const0_rtx);
4952 emit_label (csend);
4953 }
4954
4955 /* Return the correct part of the bitfield. */
4956 convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
4957 NULL_RTX, 1, OPTAB_DIRECT), 1);
4958 }
4959
4960 /* Expand an atomic operation CODE of mode MODE. MEM is the memory location
4961 and VAL the value to play with. If AFTER is true then store the value
4962 MEM holds after the operation, if AFTER is false then store the value MEM
4963 holds before the operation. If TARGET is zero then discard that value, else
4964 store it to TARGET. */
4965
4966 void
s390_expand_atomic(enum machine_mode mode,enum rtx_code code,rtx target,rtx mem,rtx val,bool after)4967 s390_expand_atomic (enum machine_mode mode, enum rtx_code code,
4968 rtx target, rtx mem, rtx val, bool after)
4969 {
4970 struct alignment_context ac;
4971 rtx cmp;
4972 rtx new_rtx = gen_reg_rtx (SImode);
4973 rtx orig = gen_reg_rtx (SImode);
4974 rtx csloop = gen_label_rtx ();
4975
4976 gcc_assert (!target || register_operand (target, VOIDmode));
4977 gcc_assert (MEM_P (mem));
4978
4979 init_alignment_context (&ac, mem, mode);
4980
4981 /* Shift val to the correct bit positions.
4982 Preserve "icm", but prevent "ex icm". */
4983 if (!(ac.aligned && code == SET && MEM_P (val)))
4984 val = s390_expand_mask_and_shift (val, mode, ac.shift);
4985
4986 /* Further preparation insns. */
4987 if (code == PLUS || code == MINUS)
4988 emit_move_insn (orig, val);
4989 else if (code == MULT || code == AND) /* val = "11..1<val>11..1" */
4990 val = expand_simple_binop (SImode, XOR, val, ac.modemaski,
4991 NULL_RTX, 1, OPTAB_DIRECT);
4992
4993 /* Load full word. Subsequent loads are performed by CS. */
4994 cmp = force_reg (SImode, ac.memsi);
4995
4996 /* Start CS loop. */
4997 emit_label (csloop);
4998 emit_move_insn (new_rtx, cmp);
4999
5000 /* Patch new with val at correct position. */
5001 switch (code)
5002 {
5003 case PLUS:
5004 case MINUS:
5005 val = expand_simple_binop (SImode, code, new_rtx, orig,
5006 NULL_RTX, 1, OPTAB_DIRECT);
5007 val = expand_simple_binop (SImode, AND, val, ac.modemask,
5008 NULL_RTX, 1, OPTAB_DIRECT);
5009 /* FALLTHRU */
5010 case SET:
5011 if (ac.aligned && MEM_P (val))
5012 store_bit_field (new_rtx, GET_MODE_BITSIZE (mode), 0,
5013 0, 0, SImode, val);
5014 else
5015 {
5016 new_rtx = expand_simple_binop (SImode, AND, new_rtx, ac.modemaski,
5017 NULL_RTX, 1, OPTAB_DIRECT);
5018 new_rtx = expand_simple_binop (SImode, IOR, new_rtx, val,
5019 NULL_RTX, 1, OPTAB_DIRECT);
5020 }
5021 break;
5022 case AND:
5023 case IOR:
5024 case XOR:
5025 new_rtx = expand_simple_binop (SImode, code, new_rtx, val,
5026 NULL_RTX, 1, OPTAB_DIRECT);
5027 break;
5028 case MULT: /* NAND */
5029 new_rtx = expand_simple_binop (SImode, AND, new_rtx, val,
5030 NULL_RTX, 1, OPTAB_DIRECT);
5031 new_rtx = expand_simple_binop (SImode, XOR, new_rtx, ac.modemask,
5032 NULL_RTX, 1, OPTAB_DIRECT);
5033 break;
5034 default:
5035 gcc_unreachable ();
5036 }
5037
5038 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, cmp,
5039 ac.memsi, cmp, new_rtx));
5040
5041 /* Return the correct part of the bitfield. */
5042 if (target)
5043 convert_move (target, expand_simple_binop (SImode, LSHIFTRT,
5044 after ? new_rtx : cmp, ac.shift,
5045 NULL_RTX, 1, OPTAB_DIRECT), 1);
5046 }
5047
5048 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
5049 We need to emit DTP-relative relocations. */
5050
5051 static void s390_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
5052
5053 static void
s390_output_dwarf_dtprel(FILE * file,int size,rtx x)5054 s390_output_dwarf_dtprel (FILE *file, int size, rtx x)
5055 {
5056 switch (size)
5057 {
5058 case 4:
5059 fputs ("\t.long\t", file);
5060 break;
5061 case 8:
5062 fputs ("\t.quad\t", file);
5063 break;
5064 default:
5065 gcc_unreachable ();
5066 }
5067 output_addr_const (file, x);
5068 fputs ("@DTPOFF", file);
5069 }
5070
5071 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
5072 /* Implement TARGET_MANGLE_TYPE. */
5073
5074 static const char *
s390_mangle_type(const_tree type)5075 s390_mangle_type (const_tree type)
5076 {
5077 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
5078 && TARGET_LONG_DOUBLE_128)
5079 return "g";
5080
5081 /* For all other types, use normal C++ mangling. */
5082 return NULL;
5083 }
5084 #endif
5085
5086 /* In the name of slightly smaller debug output, and to cater to
5087 general assembler lossage, recognize various UNSPEC sequences
5088 and turn them back into a direct symbol reference. */
5089
5090 static rtx
s390_delegitimize_address(rtx orig_x)5091 s390_delegitimize_address (rtx orig_x)
5092 {
5093 rtx x, y;
5094
5095 orig_x = delegitimize_mem_from_attrs (orig_x);
5096 x = orig_x;
5097
5098 /* Extract the symbol ref from:
5099 (plus:SI (reg:SI 12 %r12)
5100 (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
5101 UNSPEC_GOTOFF/PLTOFF)))
5102 and
5103 (plus:SI (reg:SI 12 %r12)
5104 (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
5105 UNSPEC_GOTOFF/PLTOFF)
5106 (const_int 4 [0x4])))) */
5107 if (GET_CODE (x) == PLUS
5108 && REG_P (XEXP (x, 0))
5109 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
5110 && GET_CODE (XEXP (x, 1)) == CONST)
5111 {
5112 HOST_WIDE_INT offset = 0;
5113
5114 /* The const operand. */
5115 y = XEXP (XEXP (x, 1), 0);
5116
5117 if (GET_CODE (y) == PLUS
5118 && GET_CODE (XEXP (y, 1)) == CONST_INT)
5119 {
5120 offset = INTVAL (XEXP (y, 1));
5121 y = XEXP (y, 0);
5122 }
5123
5124 if (GET_CODE (y) == UNSPEC
5125 && (XINT (y, 1) == UNSPEC_GOTOFF
5126 || XINT (y, 1) == UNSPEC_PLTOFF))
5127 return plus_constant (Pmode, XVECEXP (y, 0, 0), offset);
5128 }
5129
5130 if (GET_CODE (x) != MEM)
5131 return orig_x;
5132
5133 x = XEXP (x, 0);
5134 if (GET_CODE (x) == PLUS
5135 && GET_CODE (XEXP (x, 1)) == CONST
5136 && GET_CODE (XEXP (x, 0)) == REG
5137 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
5138 {
5139 y = XEXP (XEXP (x, 1), 0);
5140 if (GET_CODE (y) == UNSPEC
5141 && XINT (y, 1) == UNSPEC_GOT)
5142 y = XVECEXP (y, 0, 0);
5143 else
5144 return orig_x;
5145 }
5146 else if (GET_CODE (x) == CONST)
5147 {
5148 /* Extract the symbol ref from:
5149 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
5150 UNSPEC_PLT/GOTENT))) */
5151
5152 y = XEXP (x, 0);
5153 if (GET_CODE (y) == UNSPEC
5154 && (XINT (y, 1) == UNSPEC_GOTENT
5155 || XINT (y, 1) == UNSPEC_PLT))
5156 y = XVECEXP (y, 0, 0);
5157 else
5158 return orig_x;
5159 }
5160 else
5161 return orig_x;
5162
5163 if (GET_MODE (orig_x) != Pmode)
5164 {
5165 if (GET_MODE (orig_x) == BLKmode)
5166 return orig_x;
5167 y = lowpart_subreg (GET_MODE (orig_x), y, Pmode);
5168 if (y == NULL_RTX)
5169 return orig_x;
5170 }
5171 return y;
5172 }
5173
5174 /* Output operand OP to stdio stream FILE.
5175 OP is an address (register + offset) which is not used to address data;
5176 instead the rightmost bits are interpreted as the value. */
5177
5178 static void
print_shift_count_operand(FILE * file,rtx op)5179 print_shift_count_operand (FILE *file, rtx op)
5180 {
5181 HOST_WIDE_INT offset;
5182 rtx base;
5183
5184 /* Extract base register and offset. */
5185 if (!s390_decompose_shift_count (op, &base, &offset))
5186 gcc_unreachable ();
5187
5188 /* Sanity check. */
5189 if (base)
5190 {
5191 gcc_assert (GET_CODE (base) == REG);
5192 gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER);
5193 gcc_assert (REGNO_REG_CLASS (REGNO (base)) == ADDR_REGS);
5194 }
5195
5196 /* Offsets are constricted to twelve bits. */
5197 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset & ((1 << 12) - 1));
5198 if (base)
5199 fprintf (file, "(%s)", reg_names[REGNO (base)]);
5200 }
5201
5202 /* See 'get_some_local_dynamic_name'. */
5203
5204 static int
get_some_local_dynamic_name_1(rtx * px,void * data ATTRIBUTE_UNUSED)5205 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
5206 {
5207 rtx x = *px;
5208
5209 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
5210 {
5211 x = get_pool_constant (x);
5212 return for_each_rtx (&x, get_some_local_dynamic_name_1, 0);
5213 }
5214
5215 if (GET_CODE (x) == SYMBOL_REF
5216 && tls_symbolic_operand (x) == TLS_MODEL_LOCAL_DYNAMIC)
5217 {
5218 cfun->machine->some_ld_name = XSTR (x, 0);
5219 return 1;
5220 }
5221
5222 return 0;
5223 }
5224
5225 /* Locate some local-dynamic symbol still in use by this function
5226 so that we can print its name in local-dynamic base patterns. */
5227
5228 static const char *
get_some_local_dynamic_name(void)5229 get_some_local_dynamic_name (void)
5230 {
5231 rtx insn;
5232
5233 if (cfun->machine->some_ld_name)
5234 return cfun->machine->some_ld_name;
5235
5236 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
5237 if (INSN_P (insn)
5238 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
5239 return cfun->machine->some_ld_name;
5240
5241 gcc_unreachable ();
5242 }
5243
5244 /* Returns -1 if the function should not be made hotpatchable. Otherwise it
5245 returns a number >= 0 that is the desired size of the hotpatch trampoline
5246 in halfwords. */
5247
s390_function_num_hotpatch_trampoline_halfwords(tree decl,bool do_warn)5248 static int s390_function_num_hotpatch_trampoline_halfwords (tree decl,
5249 bool do_warn)
5250 {
5251 tree attr;
5252
5253 if (DECL_DECLARED_INLINE_P (decl)
5254 || DECL_ARTIFICIAL (decl)
5255 || MAIN_NAME_P (DECL_NAME (decl)))
5256 {
5257 /* - Explicitly inlined functions cannot be hotpatched.
5258 - Artificial functions need not be hotpatched.
5259 - Making the main function hotpatchable is useless. */
5260 return -1;
5261 }
5262 attr = lookup_attribute ("hotpatch", DECL_ATTRIBUTES (decl));
5263 if (attr || s390_hotpatch_trampoline_halfwords >= 0)
5264 {
5265 if (lookup_attribute ("always_inline", DECL_ATTRIBUTES (decl)))
5266 {
5267 if (do_warn)
5268 warning (OPT_Wattributes, "function %qE with the %qs attribute"
5269 " is not hotpatchable", DECL_NAME (decl), "always_inline");
5270 return -1;
5271 }
5272 else
5273 {
5274 return (attr) ?
5275 get_hotpatch_attribute (attr) : s390_hotpatch_trampoline_halfwords;
5276 }
5277 }
5278
5279 return -1;
5280 }
5281
5282 /* Hook to determine if one function can safely inline another. */
5283
5284 static bool
s390_can_inline_p(tree caller,tree callee)5285 s390_can_inline_p (tree caller, tree callee)
5286 {
5287 if (s390_function_num_hotpatch_trampoline_halfwords (callee, false) >= 0)
5288 return false;
5289
5290 return default_target_can_inline_p (caller, callee);
5291 }
5292
5293 /* Write the extra assembler code needed to declare a function properly. */
5294
5295 void
s390_asm_output_function_label(FILE * asm_out_file,const char * fname,tree decl)5296 s390_asm_output_function_label (FILE *asm_out_file, const char *fname,
5297 tree decl)
5298 {
5299 int hotpatch_trampoline_halfwords = -1;
5300
5301 if (decl)
5302 {
5303 hotpatch_trampoline_halfwords =
5304 s390_function_num_hotpatch_trampoline_halfwords (decl, true);
5305 if (hotpatch_trampoline_halfwords >= 0
5306 && decl_function_context (decl) != NULL_TREE)
5307 {
5308 warning_at (DECL_SOURCE_LOCATION (decl), OPT_mhotpatch,
5309 "hotpatching is not compatible with nested functions");
5310 hotpatch_trampoline_halfwords = -1;
5311 }
5312 }
5313
5314 if (hotpatch_trampoline_halfwords > 0)
5315 {
5316 int i;
5317
5318 /* Add a trampoline code area before the function label and initialize it
5319 with two-byte nop instructions. This area can be overwritten with code
5320 that jumps to a patched version of the function. */
5321 for (i = 0; i < hotpatch_trampoline_halfwords; i++)
5322 asm_fprintf (asm_out_file, "\tnopr\t%%r7\n");
5323 /* Note: The function label must be aligned so that (a) the bytes of the
5324 following nop do not cross a cacheline boundary, and (b) a jump address
5325 (eight bytes for 64 bit targets, 4 bytes for 32 bit targets) can be
5326 stored directly before the label without crossing a cacheline
5327 boundary. All this is necessary to make sure the trampoline code can
5328 be changed atomically. */
5329 }
5330
5331 ASM_OUTPUT_LABEL (asm_out_file, fname);
5332
5333 /* Output a four-byte nop if hotpatching is enabled. This can be overwritten
5334 atomically with a relative backwards jump to the trampoline area. */
5335 if (hotpatch_trampoline_halfwords >= 0)
5336 asm_fprintf (asm_out_file, "\tnop\t0\n");
5337 }
5338
5339 /* Output machine-dependent UNSPECs occurring in address constant X
5340 in assembler syntax to stdio stream FILE. Returns true if the
5341 constant X could be recognized, false otherwise. */
5342
5343 static bool
s390_output_addr_const_extra(FILE * file,rtx x)5344 s390_output_addr_const_extra (FILE *file, rtx x)
5345 {
5346 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1)
5347 switch (XINT (x, 1))
5348 {
5349 case UNSPEC_GOTENT:
5350 output_addr_const (file, XVECEXP (x, 0, 0));
5351 fprintf (file, "@GOTENT");
5352 return true;
5353 case UNSPEC_GOT:
5354 output_addr_const (file, XVECEXP (x, 0, 0));
5355 fprintf (file, "@GOT");
5356 return true;
5357 case UNSPEC_GOTOFF:
5358 output_addr_const (file, XVECEXP (x, 0, 0));
5359 fprintf (file, "@GOTOFF");
5360 return true;
5361 case UNSPEC_PLT:
5362 output_addr_const (file, XVECEXP (x, 0, 0));
5363 fprintf (file, "@PLT");
5364 return true;
5365 case UNSPEC_PLTOFF:
5366 output_addr_const (file, XVECEXP (x, 0, 0));
5367 fprintf (file, "@PLTOFF");
5368 return true;
5369 case UNSPEC_TLSGD:
5370 output_addr_const (file, XVECEXP (x, 0, 0));
5371 fprintf (file, "@TLSGD");
5372 return true;
5373 case UNSPEC_TLSLDM:
5374 assemble_name (file, get_some_local_dynamic_name ());
5375 fprintf (file, "@TLSLDM");
5376 return true;
5377 case UNSPEC_DTPOFF:
5378 output_addr_const (file, XVECEXP (x, 0, 0));
5379 fprintf (file, "@DTPOFF");
5380 return true;
5381 case UNSPEC_NTPOFF:
5382 output_addr_const (file, XVECEXP (x, 0, 0));
5383 fprintf (file, "@NTPOFF");
5384 return true;
5385 case UNSPEC_GOTNTPOFF:
5386 output_addr_const (file, XVECEXP (x, 0, 0));
5387 fprintf (file, "@GOTNTPOFF");
5388 return true;
5389 case UNSPEC_INDNTPOFF:
5390 output_addr_const (file, XVECEXP (x, 0, 0));
5391 fprintf (file, "@INDNTPOFF");
5392 return true;
5393 }
5394
5395 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 2)
5396 switch (XINT (x, 1))
5397 {
5398 case UNSPEC_POOL_OFFSET:
5399 x = gen_rtx_MINUS (GET_MODE (x), XVECEXP (x, 0, 0), XVECEXP (x, 0, 1));
5400 output_addr_const (file, x);
5401 return true;
5402 }
5403 return false;
5404 }
5405
5406 /* Output address operand ADDR in assembler syntax to
5407 stdio stream FILE. */
5408
5409 void
print_operand_address(FILE * file,rtx addr)5410 print_operand_address (FILE *file, rtx addr)
5411 {
5412 struct s390_address ad;
5413
5414 if (s390_loadrelative_operand_p (addr, NULL, NULL))
5415 {
5416 if (!TARGET_Z10)
5417 {
5418 output_operand_lossage ("symbolic memory references are "
5419 "only supported on z10 or later");
5420 return;
5421 }
5422 output_addr_const (file, addr);
5423 return;
5424 }
5425
5426 if (!s390_decompose_address (addr, &ad)
5427 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
5428 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
5429 output_operand_lossage ("cannot decompose address");
5430
5431 if (ad.disp)
5432 output_addr_const (file, ad.disp);
5433 else
5434 fprintf (file, "0");
5435
5436 if (ad.base && ad.indx)
5437 fprintf (file, "(%s,%s)", reg_names[REGNO (ad.indx)],
5438 reg_names[REGNO (ad.base)]);
5439 else if (ad.base)
5440 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
5441 }
5442
5443 /* Output operand X in assembler syntax to stdio stream FILE.
5444 CODE specified the format flag. The following format flags
5445 are recognized:
5446
5447 'C': print opcode suffix for branch condition.
5448 'D': print opcode suffix for inverse branch condition.
5449 'E': print opcode suffix for branch on index instruction.
5450 'G': print the size of the operand in bytes.
5451 'J': print tls_load/tls_gdcall/tls_ldcall suffix
5452 'M': print the second word of a TImode operand.
5453 'N': print the second word of a DImode operand.
5454 'O': print only the displacement of a memory reference.
5455 'R': print only the base register of a memory reference.
5456 'S': print S-type memory reference (base+displacement).
5457 'Y': print shift count operand.
5458
5459 'b': print integer X as if it's an unsigned byte.
5460 'c': print integer X as if it's an signed byte.
5461 'e': "end" of DImode contiguous bitmask X.
5462 'f': "end" of SImode contiguous bitmask X.
5463 'h': print integer X as if it's a signed halfword.
5464 'i': print the first nonzero HImode part of X.
5465 'j': print the first HImode part unequal to -1 of X.
5466 'k': print the first nonzero SImode part of X.
5467 'm': print the first SImode part unequal to -1 of X.
5468 'o': print integer X as if it's an unsigned 32bit word.
5469 's': "start" of DImode contiguous bitmask X.
5470 't': "start" of SImode contiguous bitmask X.
5471 'x': print integer X as if it's an unsigned halfword.
5472 */
5473
5474 void
print_operand(FILE * file,rtx x,int code)5475 print_operand (FILE *file, rtx x, int code)
5476 {
5477 HOST_WIDE_INT ival;
5478
5479 switch (code)
5480 {
5481 case 'C':
5482 fprintf (file, s390_branch_condition_mnemonic (x, FALSE));
5483 return;
5484
5485 case 'D':
5486 fprintf (file, s390_branch_condition_mnemonic (x, TRUE));
5487 return;
5488
5489 case 'E':
5490 if (GET_CODE (x) == LE)
5491 fprintf (file, "l");
5492 else if (GET_CODE (x) == GT)
5493 fprintf (file, "h");
5494 else
5495 output_operand_lossage ("invalid comparison operator "
5496 "for 'E' output modifier");
5497 return;
5498
5499 case 'J':
5500 if (GET_CODE (x) == SYMBOL_REF)
5501 {
5502 fprintf (file, "%s", ":tls_load:");
5503 output_addr_const (file, x);
5504 }
5505 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
5506 {
5507 fprintf (file, "%s", ":tls_gdcall:");
5508 output_addr_const (file, XVECEXP (x, 0, 0));
5509 }
5510 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM)
5511 {
5512 fprintf (file, "%s", ":tls_ldcall:");
5513 assemble_name (file, get_some_local_dynamic_name ());
5514 }
5515 else
5516 output_operand_lossage ("invalid reference for 'J' output modifier");
5517 return;
5518
5519 case 'G':
5520 fprintf (file, "%u", GET_MODE_SIZE (GET_MODE (x)));
5521 return;
5522
5523 case 'O':
5524 {
5525 struct s390_address ad;
5526 int ret;
5527
5528 if (!MEM_P (x))
5529 {
5530 output_operand_lossage ("memory reference expected for "
5531 "'O' output modifier");
5532 return;
5533 }
5534
5535 ret = s390_decompose_address (XEXP (x, 0), &ad);
5536
5537 if (!ret
5538 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
5539 || ad.indx)
5540 {
5541 output_operand_lossage ("invalid address for 'O' output modifier");
5542 return;
5543 }
5544
5545 if (ad.disp)
5546 output_addr_const (file, ad.disp);
5547 else
5548 fprintf (file, "0");
5549 }
5550 return;
5551
5552 case 'R':
5553 {
5554 struct s390_address ad;
5555 int ret;
5556
5557 if (!MEM_P (x))
5558 {
5559 output_operand_lossage ("memory reference expected for "
5560 "'R' output modifier");
5561 return;
5562 }
5563
5564 ret = s390_decompose_address (XEXP (x, 0), &ad);
5565
5566 if (!ret
5567 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
5568 || ad.indx)
5569 {
5570 output_operand_lossage ("invalid address for 'R' output modifier");
5571 return;
5572 }
5573
5574 if (ad.base)
5575 fprintf (file, "%s", reg_names[REGNO (ad.base)]);
5576 else
5577 fprintf (file, "0");
5578 }
5579 return;
5580
5581 case 'S':
5582 {
5583 struct s390_address ad;
5584 int ret;
5585
5586 if (!MEM_P (x))
5587 {
5588 output_operand_lossage ("memory reference expected for "
5589 "'S' output modifier");
5590 return;
5591 }
5592 ret = s390_decompose_address (XEXP (x, 0), &ad);
5593
5594 if (!ret
5595 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
5596 || ad.indx)
5597 {
5598 output_operand_lossage ("invalid address for 'S' output modifier");
5599 return;
5600 }
5601
5602 if (ad.disp)
5603 output_addr_const (file, ad.disp);
5604 else
5605 fprintf (file, "0");
5606
5607 if (ad.base)
5608 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
5609 }
5610 return;
5611
5612 case 'N':
5613 if (GET_CODE (x) == REG)
5614 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
5615 else if (GET_CODE (x) == MEM)
5616 x = change_address (x, VOIDmode,
5617 plus_constant (Pmode, XEXP (x, 0), 4));
5618 else
5619 output_operand_lossage ("register or memory expression expected "
5620 "for 'N' output modifier");
5621 break;
5622
5623 case 'M':
5624 if (GET_CODE (x) == REG)
5625 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
5626 else if (GET_CODE (x) == MEM)
5627 x = change_address (x, VOIDmode,
5628 plus_constant (Pmode, XEXP (x, 0), 8));
5629 else
5630 output_operand_lossage ("register or memory expression expected "
5631 "for 'M' output modifier");
5632 break;
5633
5634 case 'Y':
5635 print_shift_count_operand (file, x);
5636 return;
5637 }
5638
5639 switch (GET_CODE (x))
5640 {
5641 case REG:
5642 fprintf (file, "%s", reg_names[REGNO (x)]);
5643 break;
5644
5645 case MEM:
5646 output_address (XEXP (x, 0));
5647 break;
5648
5649 case CONST:
5650 case CODE_LABEL:
5651 case LABEL_REF:
5652 case SYMBOL_REF:
5653 output_addr_const (file, x);
5654 break;
5655
5656 case CONST_INT:
5657 ival = INTVAL (x);
5658 switch (code)
5659 {
5660 case 0:
5661 break;
5662 case 'b':
5663 ival &= 0xff;
5664 break;
5665 case 'c':
5666 ival = ((ival & 0xff) ^ 0x80) - 0x80;
5667 break;
5668 case 'x':
5669 ival &= 0xffff;
5670 break;
5671 case 'h':
5672 ival = ((ival & 0xffff) ^ 0x8000) - 0x8000;
5673 break;
5674 case 'i':
5675 ival = s390_extract_part (x, HImode, 0);
5676 break;
5677 case 'j':
5678 ival = s390_extract_part (x, HImode, -1);
5679 break;
5680 case 'k':
5681 ival = s390_extract_part (x, SImode, 0);
5682 break;
5683 case 'm':
5684 ival = s390_extract_part (x, SImode, -1);
5685 break;
5686 case 'o':
5687 ival &= 0xffffffff;
5688 break;
5689 case 'e': case 'f':
5690 case 's': case 't':
5691 {
5692 int pos, len;
5693 bool ok;
5694
5695 len = (code == 's' || code == 'e' ? 64 : 32);
5696 ok = s390_contiguous_bitmask_p (ival, len, &pos, &len);
5697 gcc_assert (ok);
5698 if (code == 's' || code == 't')
5699 ival = 64 - pos - len;
5700 else
5701 ival = 64 - 1 - pos;
5702 }
5703 break;
5704 default:
5705 output_operand_lossage ("invalid constant for output modifier '%c'", code);
5706 }
5707 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
5708 break;
5709
5710 case CONST_DOUBLE:
5711 gcc_assert (GET_MODE (x) == VOIDmode);
5712 if (code == 'b')
5713 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x) & 0xff);
5714 else if (code == 'x')
5715 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x) & 0xffff);
5716 else if (code == 'h')
5717 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
5718 ((CONST_DOUBLE_LOW (x) & 0xffff) ^ 0x8000) - 0x8000);
5719 else
5720 {
5721 if (code == 0)
5722 output_operand_lossage ("invalid constant - try using "
5723 "an output modifier");
5724 else
5725 output_operand_lossage ("invalid constant for output modifier '%c'",
5726 code);
5727 }
5728 break;
5729
5730 default:
5731 if (code == 0)
5732 output_operand_lossage ("invalid expression - try using "
5733 "an output modifier");
5734 else
5735 output_operand_lossage ("invalid expression for output "
5736 "modifier '%c'", code);
5737 break;
5738 }
5739 }
5740
5741 /* Target hook for assembling integer objects. We need to define it
5742 here to work a round a bug in some versions of GAS, which couldn't
5743 handle values smaller than INT_MIN when printed in decimal. */
5744
5745 static bool
s390_assemble_integer(rtx x,unsigned int size,int aligned_p)5746 s390_assemble_integer (rtx x, unsigned int size, int aligned_p)
5747 {
5748 if (size == 8 && aligned_p
5749 && GET_CODE (x) == CONST_INT && INTVAL (x) < INT_MIN)
5750 {
5751 fprintf (asm_out_file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n",
5752 INTVAL (x));
5753 return true;
5754 }
5755 return default_assemble_integer (x, size, aligned_p);
5756 }
5757
5758 /* Returns true if register REGNO is used for forming
5759 a memory address in expression X. */
5760
5761 static bool
reg_used_in_mem_p(int regno,rtx x)5762 reg_used_in_mem_p (int regno, rtx x)
5763 {
5764 enum rtx_code code = GET_CODE (x);
5765 int i, j;
5766 const char *fmt;
5767
5768 if (code == MEM)
5769 {
5770 if (refers_to_regno_p (regno, regno+1,
5771 XEXP (x, 0), 0))
5772 return true;
5773 }
5774 else if (code == SET
5775 && GET_CODE (SET_DEST (x)) == PC)
5776 {
5777 if (refers_to_regno_p (regno, regno+1,
5778 SET_SRC (x), 0))
5779 return true;
5780 }
5781
5782 fmt = GET_RTX_FORMAT (code);
5783 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5784 {
5785 if (fmt[i] == 'e'
5786 && reg_used_in_mem_p (regno, XEXP (x, i)))
5787 return true;
5788
5789 else if (fmt[i] == 'E')
5790 for (j = 0; j < XVECLEN (x, i); j++)
5791 if (reg_used_in_mem_p (regno, XVECEXP (x, i, j)))
5792 return true;
5793 }
5794 return false;
5795 }
5796
5797 /* Returns true if expression DEP_RTX sets an address register
5798 used by instruction INSN to address memory. */
5799
5800 static bool
addr_generation_dependency_p(rtx dep_rtx,rtx insn)5801 addr_generation_dependency_p (rtx dep_rtx, rtx insn)
5802 {
5803 rtx target, pat;
5804
5805 if (NONJUMP_INSN_P (dep_rtx))
5806 dep_rtx = PATTERN (dep_rtx);
5807
5808 if (GET_CODE (dep_rtx) == SET)
5809 {
5810 target = SET_DEST (dep_rtx);
5811 if (GET_CODE (target) == STRICT_LOW_PART)
5812 target = XEXP (target, 0);
5813 while (GET_CODE (target) == SUBREG)
5814 target = SUBREG_REG (target);
5815
5816 if (GET_CODE (target) == REG)
5817 {
5818 int regno = REGNO (target);
5819
5820 if (s390_safe_attr_type (insn) == TYPE_LA)
5821 {
5822 pat = PATTERN (insn);
5823 if (GET_CODE (pat) == PARALLEL)
5824 {
5825 gcc_assert (XVECLEN (pat, 0) == 2);
5826 pat = XVECEXP (pat, 0, 0);
5827 }
5828 gcc_assert (GET_CODE (pat) == SET);
5829 return refers_to_regno_p (regno, regno+1, SET_SRC (pat), 0);
5830 }
5831 else if (get_attr_atype (insn) == ATYPE_AGEN)
5832 return reg_used_in_mem_p (regno, PATTERN (insn));
5833 }
5834 }
5835 return false;
5836 }
5837
5838 /* Return 1, if dep_insn sets register used in insn in the agen unit. */
5839
5840 int
s390_agen_dep_p(rtx dep_insn,rtx insn)5841 s390_agen_dep_p (rtx dep_insn, rtx insn)
5842 {
5843 rtx dep_rtx = PATTERN (dep_insn);
5844 int i;
5845
5846 if (GET_CODE (dep_rtx) == SET
5847 && addr_generation_dependency_p (dep_rtx, insn))
5848 return 1;
5849 else if (GET_CODE (dep_rtx) == PARALLEL)
5850 {
5851 for (i = 0; i < XVECLEN (dep_rtx, 0); i++)
5852 {
5853 if (addr_generation_dependency_p (XVECEXP (dep_rtx, 0, i), insn))
5854 return 1;
5855 }
5856 }
5857 return 0;
5858 }
5859
5860
5861 /* A C statement (sans semicolon) to update the integer scheduling priority
5862 INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier,
5863 reduce the priority to execute INSN later. Do not define this macro if
5864 you do not need to adjust the scheduling priorities of insns.
5865
5866 A STD instruction should be scheduled earlier,
5867 in order to use the bypass. */
5868 static int
s390_adjust_priority(rtx insn ATTRIBUTE_UNUSED,int priority)5869 s390_adjust_priority (rtx insn ATTRIBUTE_UNUSED, int priority)
5870 {
5871 if (! INSN_P (insn))
5872 return priority;
5873
5874 if (s390_tune != PROCESSOR_2084_Z990
5875 && s390_tune != PROCESSOR_2094_Z9_109
5876 && s390_tune != PROCESSOR_2097_Z10
5877 && s390_tune != PROCESSOR_2817_Z196
5878 && s390_tune != PROCESSOR_2827_ZEC12)
5879 return priority;
5880
5881 switch (s390_safe_attr_type (insn))
5882 {
5883 case TYPE_FSTOREDF:
5884 case TYPE_FSTORESF:
5885 priority = priority << 3;
5886 break;
5887 case TYPE_STORE:
5888 case TYPE_STM:
5889 priority = priority << 1;
5890 break;
5891 default:
5892 break;
5893 }
5894 return priority;
5895 }
5896
5897
5898 /* The number of instructions that can be issued per cycle. */
5899
5900 static int
s390_issue_rate(void)5901 s390_issue_rate (void)
5902 {
5903 switch (s390_tune)
5904 {
5905 case PROCESSOR_2084_Z990:
5906 case PROCESSOR_2094_Z9_109:
5907 case PROCESSOR_2817_Z196:
5908 return 3;
5909 case PROCESSOR_2097_Z10:
5910 case PROCESSOR_2827_ZEC12:
5911 return 2;
5912 default:
5913 return 1;
5914 }
5915 }
5916
5917 static int
s390_first_cycle_multipass_dfa_lookahead(void)5918 s390_first_cycle_multipass_dfa_lookahead (void)
5919 {
5920 return 4;
5921 }
5922
5923 /* Annotate every literal pool reference in X by an UNSPEC_LTREF expression.
5924 Fix up MEMs as required. */
5925
5926 static void
annotate_constant_pool_refs(rtx * x)5927 annotate_constant_pool_refs (rtx *x)
5928 {
5929 int i, j;
5930 const char *fmt;
5931
5932 gcc_assert (GET_CODE (*x) != SYMBOL_REF
5933 || !CONSTANT_POOL_ADDRESS_P (*x));
5934
5935 /* Literal pool references can only occur inside a MEM ... */
5936 if (GET_CODE (*x) == MEM)
5937 {
5938 rtx memref = XEXP (*x, 0);
5939
5940 if (GET_CODE (memref) == SYMBOL_REF
5941 && CONSTANT_POOL_ADDRESS_P (memref))
5942 {
5943 rtx base = cfun->machine->base_reg;
5944 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, memref, base),
5945 UNSPEC_LTREF);
5946
5947 *x = replace_equiv_address (*x, addr);
5948 return;
5949 }
5950
5951 if (GET_CODE (memref) == CONST
5952 && GET_CODE (XEXP (memref, 0)) == PLUS
5953 && GET_CODE (XEXP (XEXP (memref, 0), 1)) == CONST_INT
5954 && GET_CODE (XEXP (XEXP (memref, 0), 0)) == SYMBOL_REF
5955 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref, 0), 0)))
5956 {
5957 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (memref, 0), 1));
5958 rtx sym = XEXP (XEXP (memref, 0), 0);
5959 rtx base = cfun->machine->base_reg;
5960 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
5961 UNSPEC_LTREF);
5962
5963 *x = replace_equiv_address (*x, plus_constant (Pmode, addr, off));
5964 return;
5965 }
5966 }
5967
5968 /* ... or a load-address type pattern. */
5969 if (GET_CODE (*x) == SET)
5970 {
5971 rtx addrref = SET_SRC (*x);
5972
5973 if (GET_CODE (addrref) == SYMBOL_REF
5974 && CONSTANT_POOL_ADDRESS_P (addrref))
5975 {
5976 rtx base = cfun->machine->base_reg;
5977 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addrref, base),
5978 UNSPEC_LTREF);
5979
5980 SET_SRC (*x) = addr;
5981 return;
5982 }
5983
5984 if (GET_CODE (addrref) == CONST
5985 && GET_CODE (XEXP (addrref, 0)) == PLUS
5986 && GET_CODE (XEXP (XEXP (addrref, 0), 1)) == CONST_INT
5987 && GET_CODE (XEXP (XEXP (addrref, 0), 0)) == SYMBOL_REF
5988 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref, 0), 0)))
5989 {
5990 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (addrref, 0), 1));
5991 rtx sym = XEXP (XEXP (addrref, 0), 0);
5992 rtx base = cfun->machine->base_reg;
5993 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
5994 UNSPEC_LTREF);
5995
5996 SET_SRC (*x) = plus_constant (Pmode, addr, off);
5997 return;
5998 }
5999 }
6000
6001 /* Annotate LTREL_BASE as well. */
6002 if (GET_CODE (*x) == UNSPEC
6003 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
6004 {
6005 rtx base = cfun->machine->base_reg;
6006 *x = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XVECEXP (*x, 0, 0), base),
6007 UNSPEC_LTREL_BASE);
6008 return;
6009 }
6010
6011 fmt = GET_RTX_FORMAT (GET_CODE (*x));
6012 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
6013 {
6014 if (fmt[i] == 'e')
6015 {
6016 annotate_constant_pool_refs (&XEXP (*x, i));
6017 }
6018 else if (fmt[i] == 'E')
6019 {
6020 for (j = 0; j < XVECLEN (*x, i); j++)
6021 annotate_constant_pool_refs (&XVECEXP (*x, i, j));
6022 }
6023 }
6024 }
6025
6026 /* Split all branches that exceed the maximum distance.
6027 Returns true if this created a new literal pool entry. */
6028
6029 static int
s390_split_branches(void)6030 s390_split_branches (void)
6031 {
6032 rtx temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
6033 int new_literal = 0, ret;
6034 rtx insn, pat, tmp, target;
6035 rtx *label;
6036
6037 /* We need correct insn addresses. */
6038
6039 shorten_branches (get_insns ());
6040
6041 /* Find all branches that exceed 64KB, and split them. */
6042
6043 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6044 {
6045 if (! JUMP_P (insn) || tablejump_p (insn, NULL, NULL))
6046 continue;
6047
6048 pat = PATTERN (insn);
6049 if (GET_CODE (pat) == PARALLEL)
6050 pat = XVECEXP (pat, 0, 0);
6051 if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
6052 continue;
6053
6054 if (GET_CODE (SET_SRC (pat)) == LABEL_REF)
6055 {
6056 label = &SET_SRC (pat);
6057 }
6058 else if (GET_CODE (SET_SRC (pat)) == IF_THEN_ELSE)
6059 {
6060 if (GET_CODE (XEXP (SET_SRC (pat), 1)) == LABEL_REF)
6061 label = &XEXP (SET_SRC (pat), 1);
6062 else if (GET_CODE (XEXP (SET_SRC (pat), 2)) == LABEL_REF)
6063 label = &XEXP (SET_SRC (pat), 2);
6064 else
6065 continue;
6066 }
6067 else
6068 continue;
6069
6070 if (get_attr_length (insn) <= 4)
6071 continue;
6072
6073 /* We are going to use the return register as scratch register,
6074 make sure it will be saved/restored by the prologue/epilogue. */
6075 cfun_frame_layout.save_return_addr_p = 1;
6076
6077 if (!flag_pic)
6078 {
6079 new_literal = 1;
6080 tmp = force_const_mem (Pmode, *label);
6081 tmp = emit_insn_before (gen_rtx_SET (Pmode, temp_reg, tmp), insn);
6082 INSN_ADDRESSES_NEW (tmp, -1);
6083 annotate_constant_pool_refs (&PATTERN (tmp));
6084
6085 target = temp_reg;
6086 }
6087 else
6088 {
6089 new_literal = 1;
6090 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, *label),
6091 UNSPEC_LTREL_OFFSET);
6092 target = gen_rtx_CONST (Pmode, target);
6093 target = force_const_mem (Pmode, target);
6094 tmp = emit_insn_before (gen_rtx_SET (Pmode, temp_reg, target), insn);
6095 INSN_ADDRESSES_NEW (tmp, -1);
6096 annotate_constant_pool_refs (&PATTERN (tmp));
6097
6098 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XEXP (target, 0),
6099 cfun->machine->base_reg),
6100 UNSPEC_LTREL_BASE);
6101 target = gen_rtx_PLUS (Pmode, temp_reg, target);
6102 }
6103
6104 ret = validate_change (insn, label, target, 0);
6105 gcc_assert (ret);
6106 }
6107
6108 return new_literal;
6109 }
6110
6111
6112 /* Find an annotated literal pool symbol referenced in RTX X,
6113 and store it at REF. Will abort if X contains references to
6114 more than one such pool symbol; multiple references to the same
6115 symbol are allowed, however.
6116
6117 The rtx pointed to by REF must be initialized to NULL_RTX
6118 by the caller before calling this routine. */
6119
6120 static void
find_constant_pool_ref(rtx x,rtx * ref)6121 find_constant_pool_ref (rtx x, rtx *ref)
6122 {
6123 int i, j;
6124 const char *fmt;
6125
6126 /* Ignore LTREL_BASE references. */
6127 if (GET_CODE (x) == UNSPEC
6128 && XINT (x, 1) == UNSPEC_LTREL_BASE)
6129 return;
6130 /* Likewise POOL_ENTRY insns. */
6131 if (GET_CODE (x) == UNSPEC_VOLATILE
6132 && XINT (x, 1) == UNSPECV_POOL_ENTRY)
6133 return;
6134
6135 gcc_assert (GET_CODE (x) != SYMBOL_REF
6136 || !CONSTANT_POOL_ADDRESS_P (x));
6137
6138 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_LTREF)
6139 {
6140 rtx sym = XVECEXP (x, 0, 0);
6141 gcc_assert (GET_CODE (sym) == SYMBOL_REF
6142 && CONSTANT_POOL_ADDRESS_P (sym));
6143
6144 if (*ref == NULL_RTX)
6145 *ref = sym;
6146 else
6147 gcc_assert (*ref == sym);
6148
6149 return;
6150 }
6151
6152 fmt = GET_RTX_FORMAT (GET_CODE (x));
6153 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6154 {
6155 if (fmt[i] == 'e')
6156 {
6157 find_constant_pool_ref (XEXP (x, i), ref);
6158 }
6159 else if (fmt[i] == 'E')
6160 {
6161 for (j = 0; j < XVECLEN (x, i); j++)
6162 find_constant_pool_ref (XVECEXP (x, i, j), ref);
6163 }
6164 }
6165 }
6166
6167 /* Replace every reference to the annotated literal pool
6168 symbol REF in X by its base plus OFFSET. */
6169
6170 static void
replace_constant_pool_ref(rtx * x,rtx ref,rtx offset)6171 replace_constant_pool_ref (rtx *x, rtx ref, rtx offset)
6172 {
6173 int i, j;
6174 const char *fmt;
6175
6176 gcc_assert (*x != ref);
6177
6178 if (GET_CODE (*x) == UNSPEC
6179 && XINT (*x, 1) == UNSPEC_LTREF
6180 && XVECEXP (*x, 0, 0) == ref)
6181 {
6182 *x = gen_rtx_PLUS (Pmode, XVECEXP (*x, 0, 1), offset);
6183 return;
6184 }
6185
6186 if (GET_CODE (*x) == PLUS
6187 && GET_CODE (XEXP (*x, 1)) == CONST_INT
6188 && GET_CODE (XEXP (*x, 0)) == UNSPEC
6189 && XINT (XEXP (*x, 0), 1) == UNSPEC_LTREF
6190 && XVECEXP (XEXP (*x, 0), 0, 0) == ref)
6191 {
6192 rtx addr = gen_rtx_PLUS (Pmode, XVECEXP (XEXP (*x, 0), 0, 1), offset);
6193 *x = plus_constant (Pmode, addr, INTVAL (XEXP (*x, 1)));
6194 return;
6195 }
6196
6197 fmt = GET_RTX_FORMAT (GET_CODE (*x));
6198 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
6199 {
6200 if (fmt[i] == 'e')
6201 {
6202 replace_constant_pool_ref (&XEXP (*x, i), ref, offset);
6203 }
6204 else if (fmt[i] == 'E')
6205 {
6206 for (j = 0; j < XVECLEN (*x, i); j++)
6207 replace_constant_pool_ref (&XVECEXP (*x, i, j), ref, offset);
6208 }
6209 }
6210 }
6211
6212 /* Check whether X contains an UNSPEC_LTREL_BASE.
6213 Return its constant pool symbol if found, NULL_RTX otherwise. */
6214
6215 static rtx
find_ltrel_base(rtx x)6216 find_ltrel_base (rtx x)
6217 {
6218 int i, j;
6219 const char *fmt;
6220
6221 if (GET_CODE (x) == UNSPEC
6222 && XINT (x, 1) == UNSPEC_LTREL_BASE)
6223 return XVECEXP (x, 0, 0);
6224
6225 fmt = GET_RTX_FORMAT (GET_CODE (x));
6226 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6227 {
6228 if (fmt[i] == 'e')
6229 {
6230 rtx fnd = find_ltrel_base (XEXP (x, i));
6231 if (fnd)
6232 return fnd;
6233 }
6234 else if (fmt[i] == 'E')
6235 {
6236 for (j = 0; j < XVECLEN (x, i); j++)
6237 {
6238 rtx fnd = find_ltrel_base (XVECEXP (x, i, j));
6239 if (fnd)
6240 return fnd;
6241 }
6242 }
6243 }
6244
6245 return NULL_RTX;
6246 }
6247
6248 /* Replace any occurrence of UNSPEC_LTREL_BASE in X with its base. */
6249
6250 static void
replace_ltrel_base(rtx * x)6251 replace_ltrel_base (rtx *x)
6252 {
6253 int i, j;
6254 const char *fmt;
6255
6256 if (GET_CODE (*x) == UNSPEC
6257 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
6258 {
6259 *x = XVECEXP (*x, 0, 1);
6260 return;
6261 }
6262
6263 fmt = GET_RTX_FORMAT (GET_CODE (*x));
6264 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
6265 {
6266 if (fmt[i] == 'e')
6267 {
6268 replace_ltrel_base (&XEXP (*x, i));
6269 }
6270 else if (fmt[i] == 'E')
6271 {
6272 for (j = 0; j < XVECLEN (*x, i); j++)
6273 replace_ltrel_base (&XVECEXP (*x, i, j));
6274 }
6275 }
6276 }
6277
6278
6279 /* We keep a list of constants which we have to add to internal
6280 constant tables in the middle of large functions. */
6281
6282 #define NR_C_MODES 11
6283 enum machine_mode constant_modes[NR_C_MODES] =
6284 {
6285 TFmode, TImode, TDmode,
6286 DFmode, DImode, DDmode,
6287 SFmode, SImode, SDmode,
6288 HImode,
6289 QImode
6290 };
6291
6292 struct constant
6293 {
6294 struct constant *next;
6295 rtx value;
6296 rtx label;
6297 };
6298
6299 struct constant_pool
6300 {
6301 struct constant_pool *next;
6302 rtx first_insn;
6303 rtx pool_insn;
6304 bitmap insns;
6305 rtx emit_pool_after;
6306
6307 struct constant *constants[NR_C_MODES];
6308 struct constant *execute;
6309 rtx label;
6310 int size;
6311 };
6312
6313 /* Allocate new constant_pool structure. */
6314
6315 static struct constant_pool *
s390_alloc_pool(void)6316 s390_alloc_pool (void)
6317 {
6318 struct constant_pool *pool;
6319 int i;
6320
6321 pool = (struct constant_pool *) xmalloc (sizeof *pool);
6322 pool->next = NULL;
6323 for (i = 0; i < NR_C_MODES; i++)
6324 pool->constants[i] = NULL;
6325
6326 pool->execute = NULL;
6327 pool->label = gen_label_rtx ();
6328 pool->first_insn = NULL_RTX;
6329 pool->pool_insn = NULL_RTX;
6330 pool->insns = BITMAP_ALLOC (NULL);
6331 pool->size = 0;
6332 pool->emit_pool_after = NULL_RTX;
6333
6334 return pool;
6335 }
6336
6337 /* Create new constant pool covering instructions starting at INSN
6338 and chain it to the end of POOL_LIST. */
6339
6340 static struct constant_pool *
s390_start_pool(struct constant_pool ** pool_list,rtx insn)6341 s390_start_pool (struct constant_pool **pool_list, rtx insn)
6342 {
6343 struct constant_pool *pool, **prev;
6344
6345 pool = s390_alloc_pool ();
6346 pool->first_insn = insn;
6347
6348 for (prev = pool_list; *prev; prev = &(*prev)->next)
6349 ;
6350 *prev = pool;
6351
6352 return pool;
6353 }
6354
6355 /* End range of instructions covered by POOL at INSN and emit
6356 placeholder insn representing the pool. */
6357
6358 static void
s390_end_pool(struct constant_pool * pool,rtx insn)6359 s390_end_pool (struct constant_pool *pool, rtx insn)
6360 {
6361 rtx pool_size = GEN_INT (pool->size + 8 /* alignment slop */);
6362
6363 if (!insn)
6364 insn = get_last_insn ();
6365
6366 pool->pool_insn = emit_insn_after (gen_pool (pool_size), insn);
6367 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
6368 }
6369
6370 /* Add INSN to the list of insns covered by POOL. */
6371
6372 static void
s390_add_pool_insn(struct constant_pool * pool,rtx insn)6373 s390_add_pool_insn (struct constant_pool *pool, rtx insn)
6374 {
6375 bitmap_set_bit (pool->insns, INSN_UID (insn));
6376 }
6377
6378 /* Return pool out of POOL_LIST that covers INSN. */
6379
6380 static struct constant_pool *
s390_find_pool(struct constant_pool * pool_list,rtx insn)6381 s390_find_pool (struct constant_pool *pool_list, rtx insn)
6382 {
6383 struct constant_pool *pool;
6384
6385 for (pool = pool_list; pool; pool = pool->next)
6386 if (bitmap_bit_p (pool->insns, INSN_UID (insn)))
6387 break;
6388
6389 return pool;
6390 }
6391
6392 /* Add constant VAL of mode MODE to the constant pool POOL. */
6393
6394 static void
s390_add_constant(struct constant_pool * pool,rtx val,enum machine_mode mode)6395 s390_add_constant (struct constant_pool *pool, rtx val, enum machine_mode mode)
6396 {
6397 struct constant *c;
6398 int i;
6399
6400 for (i = 0; i < NR_C_MODES; i++)
6401 if (constant_modes[i] == mode)
6402 break;
6403 gcc_assert (i != NR_C_MODES);
6404
6405 for (c = pool->constants[i]; c != NULL; c = c->next)
6406 if (rtx_equal_p (val, c->value))
6407 break;
6408
6409 if (c == NULL)
6410 {
6411 c = (struct constant *) xmalloc (sizeof *c);
6412 c->value = val;
6413 c->label = gen_label_rtx ();
6414 c->next = pool->constants[i];
6415 pool->constants[i] = c;
6416 pool->size += GET_MODE_SIZE (mode);
6417 }
6418 }
6419
6420 /* Return an rtx that represents the offset of X from the start of
6421 pool POOL. */
6422
6423 static rtx
s390_pool_offset(struct constant_pool * pool,rtx x)6424 s390_pool_offset (struct constant_pool *pool, rtx x)
6425 {
6426 rtx label;
6427
6428 label = gen_rtx_LABEL_REF (GET_MODE (x), pool->label);
6429 x = gen_rtx_UNSPEC (GET_MODE (x), gen_rtvec (2, x, label),
6430 UNSPEC_POOL_OFFSET);
6431 return gen_rtx_CONST (GET_MODE (x), x);
6432 }
6433
6434 /* Find constant VAL of mode MODE in the constant pool POOL.
6435 Return an RTX describing the distance from the start of
6436 the pool to the location of the new constant. */
6437
6438 static rtx
s390_find_constant(struct constant_pool * pool,rtx val,enum machine_mode mode)6439 s390_find_constant (struct constant_pool *pool, rtx val,
6440 enum machine_mode mode)
6441 {
6442 struct constant *c;
6443 int i;
6444
6445 for (i = 0; i < NR_C_MODES; i++)
6446 if (constant_modes[i] == mode)
6447 break;
6448 gcc_assert (i != NR_C_MODES);
6449
6450 for (c = pool->constants[i]; c != NULL; c = c->next)
6451 if (rtx_equal_p (val, c->value))
6452 break;
6453
6454 gcc_assert (c);
6455
6456 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
6457 }
6458
6459 /* Check whether INSN is an execute. Return the label_ref to its
6460 execute target template if so, NULL_RTX otherwise. */
6461
6462 static rtx
s390_execute_label(rtx insn)6463 s390_execute_label (rtx insn)
6464 {
6465 if (NONJUMP_INSN_P (insn)
6466 && GET_CODE (PATTERN (insn)) == PARALLEL
6467 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
6468 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE)
6469 return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2);
6470
6471 return NULL_RTX;
6472 }
6473
6474 /* Add execute target for INSN to the constant pool POOL. */
6475
6476 static void
s390_add_execute(struct constant_pool * pool,rtx insn)6477 s390_add_execute (struct constant_pool *pool, rtx insn)
6478 {
6479 struct constant *c;
6480
6481 for (c = pool->execute; c != NULL; c = c->next)
6482 if (INSN_UID (insn) == INSN_UID (c->value))
6483 break;
6484
6485 if (c == NULL)
6486 {
6487 c = (struct constant *) xmalloc (sizeof *c);
6488 c->value = insn;
6489 c->label = gen_label_rtx ();
6490 c->next = pool->execute;
6491 pool->execute = c;
6492 pool->size += 6;
6493 }
6494 }
6495
6496 /* Find execute target for INSN in the constant pool POOL.
6497 Return an RTX describing the distance from the start of
6498 the pool to the location of the execute target. */
6499
6500 static rtx
s390_find_execute(struct constant_pool * pool,rtx insn)6501 s390_find_execute (struct constant_pool *pool, rtx insn)
6502 {
6503 struct constant *c;
6504
6505 for (c = pool->execute; c != NULL; c = c->next)
6506 if (INSN_UID (insn) == INSN_UID (c->value))
6507 break;
6508
6509 gcc_assert (c);
6510
6511 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
6512 }
6513
6514 /* For an execute INSN, extract the execute target template. */
6515
6516 static rtx
s390_execute_target(rtx insn)6517 s390_execute_target (rtx insn)
6518 {
6519 rtx pattern = PATTERN (insn);
6520 gcc_assert (s390_execute_label (insn));
6521
6522 if (XVECLEN (pattern, 0) == 2)
6523 {
6524 pattern = copy_rtx (XVECEXP (pattern, 0, 1));
6525 }
6526 else
6527 {
6528 rtvec vec = rtvec_alloc (XVECLEN (pattern, 0) - 1);
6529 int i;
6530
6531 for (i = 0; i < XVECLEN (pattern, 0) - 1; i++)
6532 RTVEC_ELT (vec, i) = copy_rtx (XVECEXP (pattern, 0, i + 1));
6533
6534 pattern = gen_rtx_PARALLEL (VOIDmode, vec);
6535 }
6536
6537 return pattern;
6538 }
6539
6540 /* Indicate that INSN cannot be duplicated. This is the case for
6541 execute insns that carry a unique label. */
6542
6543 static bool
s390_cannot_copy_insn_p(rtx insn)6544 s390_cannot_copy_insn_p (rtx insn)
6545 {
6546 rtx label = s390_execute_label (insn);
6547 return label && label != const0_rtx;
6548 }
6549
6550 /* Dump out the constants in POOL. If REMOTE_LABEL is true,
6551 do not emit the pool base label. */
6552
6553 static void
s390_dump_pool(struct constant_pool * pool,bool remote_label)6554 s390_dump_pool (struct constant_pool *pool, bool remote_label)
6555 {
6556 struct constant *c;
6557 rtx insn = pool->pool_insn;
6558 int i;
6559
6560 /* Switch to rodata section. */
6561 if (TARGET_CPU_ZARCH)
6562 {
6563 insn = emit_insn_after (gen_pool_section_start (), insn);
6564 INSN_ADDRESSES_NEW (insn, -1);
6565 }
6566
6567 /* Ensure minimum pool alignment. */
6568 if (TARGET_CPU_ZARCH)
6569 insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn);
6570 else
6571 insn = emit_insn_after (gen_pool_align (GEN_INT (4)), insn);
6572 INSN_ADDRESSES_NEW (insn, -1);
6573
6574 /* Emit pool base label. */
6575 if (!remote_label)
6576 {
6577 insn = emit_label_after (pool->label, insn);
6578 INSN_ADDRESSES_NEW (insn, -1);
6579 }
6580
6581 /* Dump constants in descending alignment requirement order,
6582 ensuring proper alignment for every constant. */
6583 for (i = 0; i < NR_C_MODES; i++)
6584 for (c = pool->constants[i]; c; c = c->next)
6585 {
6586 /* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references. */
6587 rtx value = copy_rtx (c->value);
6588 if (GET_CODE (value) == CONST
6589 && GET_CODE (XEXP (value, 0)) == UNSPEC
6590 && XINT (XEXP (value, 0), 1) == UNSPEC_LTREL_OFFSET
6591 && XVECLEN (XEXP (value, 0), 0) == 1)
6592 value = s390_pool_offset (pool, XVECEXP (XEXP (value, 0), 0, 0));
6593
6594 insn = emit_label_after (c->label, insn);
6595 INSN_ADDRESSES_NEW (insn, -1);
6596
6597 value = gen_rtx_UNSPEC_VOLATILE (constant_modes[i],
6598 gen_rtvec (1, value),
6599 UNSPECV_POOL_ENTRY);
6600 insn = emit_insn_after (value, insn);
6601 INSN_ADDRESSES_NEW (insn, -1);
6602 }
6603
6604 /* Ensure minimum alignment for instructions. */
6605 insn = emit_insn_after (gen_pool_align (GEN_INT (2)), insn);
6606 INSN_ADDRESSES_NEW (insn, -1);
6607
6608 /* Output in-pool execute template insns. */
6609 for (c = pool->execute; c; c = c->next)
6610 {
6611 insn = emit_label_after (c->label, insn);
6612 INSN_ADDRESSES_NEW (insn, -1);
6613
6614 insn = emit_insn_after (s390_execute_target (c->value), insn);
6615 INSN_ADDRESSES_NEW (insn, -1);
6616 }
6617
6618 /* Switch back to previous section. */
6619 if (TARGET_CPU_ZARCH)
6620 {
6621 insn = emit_insn_after (gen_pool_section_end (), insn);
6622 INSN_ADDRESSES_NEW (insn, -1);
6623 }
6624
6625 insn = emit_barrier_after (insn);
6626 INSN_ADDRESSES_NEW (insn, -1);
6627
6628 /* Remove placeholder insn. */
6629 remove_insn (pool->pool_insn);
6630 }
6631
6632 /* Free all memory used by POOL. */
6633
6634 static void
s390_free_pool(struct constant_pool * pool)6635 s390_free_pool (struct constant_pool *pool)
6636 {
6637 struct constant *c, *next;
6638 int i;
6639
6640 for (i = 0; i < NR_C_MODES; i++)
6641 for (c = pool->constants[i]; c; c = next)
6642 {
6643 next = c->next;
6644 free (c);
6645 }
6646
6647 for (c = pool->execute; c; c = next)
6648 {
6649 next = c->next;
6650 free (c);
6651 }
6652
6653 BITMAP_FREE (pool->insns);
6654 free (pool);
6655 }
6656
6657
6658 /* Collect main literal pool. Return NULL on overflow. */
6659
6660 static struct constant_pool *
s390_mainpool_start(void)6661 s390_mainpool_start (void)
6662 {
6663 struct constant_pool *pool;
6664 rtx insn;
6665
6666 pool = s390_alloc_pool ();
6667
6668 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6669 {
6670 if (NONJUMP_INSN_P (insn)
6671 && GET_CODE (PATTERN (insn)) == SET
6672 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC_VOLATILE
6673 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPECV_MAIN_POOL)
6674 {
6675 /* There might be two main_pool instructions if base_reg
6676 is call-clobbered; one for shrink-wrapped code and one
6677 for the rest. We want to keep the first. */
6678 if (pool->pool_insn)
6679 {
6680 insn = PREV_INSN (insn);
6681 delete_insn (NEXT_INSN (insn));
6682 continue;
6683 }
6684 pool->pool_insn = insn;
6685 }
6686
6687 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
6688 {
6689 s390_add_execute (pool, insn);
6690 }
6691 else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
6692 {
6693 rtx pool_ref = NULL_RTX;
6694 find_constant_pool_ref (PATTERN (insn), &pool_ref);
6695 if (pool_ref)
6696 {
6697 rtx constant = get_pool_constant (pool_ref);
6698 enum machine_mode mode = get_pool_mode (pool_ref);
6699 s390_add_constant (pool, constant, mode);
6700 }
6701 }
6702
6703 /* If hot/cold partitioning is enabled we have to make sure that
6704 the literal pool is emitted in the same section where the
6705 initialization of the literal pool base pointer takes place.
6706 emit_pool_after is only used in the non-overflow case on non
6707 Z cpus where we can emit the literal pool at the end of the
6708 function body within the text section. */
6709 if (NOTE_P (insn)
6710 && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS
6711 && !pool->emit_pool_after)
6712 pool->emit_pool_after = PREV_INSN (insn);
6713 }
6714
6715 gcc_assert (pool->pool_insn || pool->size == 0);
6716
6717 if (pool->size >= 4096)
6718 {
6719 /* We're going to chunkify the pool, so remove the main
6720 pool placeholder insn. */
6721 remove_insn (pool->pool_insn);
6722
6723 s390_free_pool (pool);
6724 pool = NULL;
6725 }
6726
6727 /* If the functions ends with the section where the literal pool
6728 should be emitted set the marker to its end. */
6729 if (pool && !pool->emit_pool_after)
6730 pool->emit_pool_after = get_last_insn ();
6731
6732 return pool;
6733 }
6734
6735 /* POOL holds the main literal pool as collected by s390_mainpool_start.
6736 Modify the current function to output the pool constants as well as
6737 the pool register setup instruction. */
6738
6739 static void
s390_mainpool_finish(struct constant_pool * pool)6740 s390_mainpool_finish (struct constant_pool *pool)
6741 {
6742 rtx base_reg = cfun->machine->base_reg;
6743 rtx insn;
6744
6745 /* If the pool is empty, we're done. */
6746 if (pool->size == 0)
6747 {
6748 /* We don't actually need a base register after all. */
6749 cfun->machine->base_reg = NULL_RTX;
6750
6751 if (pool->pool_insn)
6752 remove_insn (pool->pool_insn);
6753 s390_free_pool (pool);
6754 return;
6755 }
6756
6757 /* We need correct insn addresses. */
6758 shorten_branches (get_insns ());
6759
6760 /* On zSeries, we use a LARL to load the pool register. The pool is
6761 located in the .rodata section, so we emit it after the function. */
6762 if (TARGET_CPU_ZARCH)
6763 {
6764 insn = gen_main_base_64 (base_reg, pool->label);
6765 insn = emit_insn_after (insn, pool->pool_insn);
6766 INSN_ADDRESSES_NEW (insn, -1);
6767 remove_insn (pool->pool_insn);
6768
6769 insn = get_last_insn ();
6770 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
6771 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
6772
6773 s390_dump_pool (pool, 0);
6774 }
6775
6776 /* On S/390, if the total size of the function's code plus literal pool
6777 does not exceed 4096 bytes, we use BASR to set up a function base
6778 pointer, and emit the literal pool at the end of the function. */
6779 else if (INSN_ADDRESSES (INSN_UID (pool->emit_pool_after))
6780 + pool->size + 8 /* alignment slop */ < 4096)
6781 {
6782 insn = gen_main_base_31_small (base_reg, pool->label);
6783 insn = emit_insn_after (insn, pool->pool_insn);
6784 INSN_ADDRESSES_NEW (insn, -1);
6785 remove_insn (pool->pool_insn);
6786
6787 insn = emit_label_after (pool->label, insn);
6788 INSN_ADDRESSES_NEW (insn, -1);
6789
6790 /* emit_pool_after will be set by s390_mainpool_start to the
6791 last insn of the section where the literal pool should be
6792 emitted. */
6793 insn = pool->emit_pool_after;
6794
6795 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
6796 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
6797
6798 s390_dump_pool (pool, 1);
6799 }
6800
6801 /* Otherwise, we emit an inline literal pool and use BASR to branch
6802 over it, setting up the pool register at the same time. */
6803 else
6804 {
6805 rtx pool_end = gen_label_rtx ();
6806
6807 insn = gen_main_base_31_large (base_reg, pool->label, pool_end);
6808 insn = emit_jump_insn_after (insn, pool->pool_insn);
6809 JUMP_LABEL (insn) = pool_end;
6810 INSN_ADDRESSES_NEW (insn, -1);
6811 remove_insn (pool->pool_insn);
6812
6813 insn = emit_label_after (pool->label, insn);
6814 INSN_ADDRESSES_NEW (insn, -1);
6815
6816 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
6817 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
6818
6819 insn = emit_label_after (pool_end, pool->pool_insn);
6820 INSN_ADDRESSES_NEW (insn, -1);
6821
6822 s390_dump_pool (pool, 1);
6823 }
6824
6825
6826 /* Replace all literal pool references. */
6827
6828 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6829 {
6830 if (INSN_P (insn))
6831 replace_ltrel_base (&PATTERN (insn));
6832
6833 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
6834 {
6835 rtx addr, pool_ref = NULL_RTX;
6836 find_constant_pool_ref (PATTERN (insn), &pool_ref);
6837 if (pool_ref)
6838 {
6839 if (s390_execute_label (insn))
6840 addr = s390_find_execute (pool, insn);
6841 else
6842 addr = s390_find_constant (pool, get_pool_constant (pool_ref),
6843 get_pool_mode (pool_ref));
6844
6845 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
6846 INSN_CODE (insn) = -1;
6847 }
6848 }
6849 }
6850
6851
6852 /* Free the pool. */
6853 s390_free_pool (pool);
6854 }
6855
6856 /* POOL holds the main literal pool as collected by s390_mainpool_start.
6857 We have decided we cannot use this pool, so revert all changes
6858 to the current function that were done by s390_mainpool_start. */
6859 static void
s390_mainpool_cancel(struct constant_pool * pool)6860 s390_mainpool_cancel (struct constant_pool *pool)
6861 {
6862 /* We didn't actually change the instruction stream, so simply
6863 free the pool memory. */
6864 s390_free_pool (pool);
6865 }
6866
6867
6868 /* Chunkify the literal pool. */
6869
6870 #define S390_POOL_CHUNK_MIN 0xc00
6871 #define S390_POOL_CHUNK_MAX 0xe00
6872
6873 static struct constant_pool *
s390_chunkify_start(void)6874 s390_chunkify_start (void)
6875 {
6876 struct constant_pool *curr_pool = NULL, *pool_list = NULL;
6877 int extra_size = 0;
6878 bitmap far_labels;
6879 rtx pending_ltrel = NULL_RTX;
6880 rtx insn;
6881
6882 rtx (*gen_reload_base) (rtx, rtx) =
6883 TARGET_CPU_ZARCH? gen_reload_base_64 : gen_reload_base_31;
6884
6885
6886 /* We need correct insn addresses. */
6887
6888 shorten_branches (get_insns ());
6889
6890 /* Scan all insns and move literals to pool chunks. */
6891
6892 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6893 {
6894 bool section_switch_p = false;
6895
6896 /* Check for pending LTREL_BASE. */
6897 if (INSN_P (insn))
6898 {
6899 rtx ltrel_base = find_ltrel_base (PATTERN (insn));
6900 if (ltrel_base)
6901 {
6902 gcc_assert (ltrel_base == pending_ltrel);
6903 pending_ltrel = NULL_RTX;
6904 }
6905 }
6906
6907 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
6908 {
6909 if (!curr_pool)
6910 curr_pool = s390_start_pool (&pool_list, insn);
6911
6912 s390_add_execute (curr_pool, insn);
6913 s390_add_pool_insn (curr_pool, insn);
6914 }
6915 else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
6916 {
6917 rtx pool_ref = NULL_RTX;
6918 find_constant_pool_ref (PATTERN (insn), &pool_ref);
6919 if (pool_ref)
6920 {
6921 rtx constant = get_pool_constant (pool_ref);
6922 enum machine_mode mode = get_pool_mode (pool_ref);
6923
6924 if (!curr_pool)
6925 curr_pool = s390_start_pool (&pool_list, insn);
6926
6927 s390_add_constant (curr_pool, constant, mode);
6928 s390_add_pool_insn (curr_pool, insn);
6929
6930 /* Don't split the pool chunk between a LTREL_OFFSET load
6931 and the corresponding LTREL_BASE. */
6932 if (GET_CODE (constant) == CONST
6933 && GET_CODE (XEXP (constant, 0)) == UNSPEC
6934 && XINT (XEXP (constant, 0), 1) == UNSPEC_LTREL_OFFSET)
6935 {
6936 gcc_assert (!pending_ltrel);
6937 pending_ltrel = pool_ref;
6938 }
6939 }
6940 }
6941
6942 if (JUMP_P (insn) || JUMP_TABLE_DATA_P (insn) || LABEL_P (insn))
6943 {
6944 if (curr_pool)
6945 s390_add_pool_insn (curr_pool, insn);
6946 /* An LTREL_BASE must follow within the same basic block. */
6947 gcc_assert (!pending_ltrel);
6948 }
6949
6950 if (NOTE_P (insn))
6951 switch (NOTE_KIND (insn))
6952 {
6953 case NOTE_INSN_SWITCH_TEXT_SECTIONS:
6954 section_switch_p = true;
6955 break;
6956 case NOTE_INSN_VAR_LOCATION:
6957 case NOTE_INSN_CALL_ARG_LOCATION:
6958 continue;
6959 default:
6960 break;
6961 }
6962
6963 if (!curr_pool
6964 || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn)
6965 || INSN_ADDRESSES (INSN_UID (insn)) == -1)
6966 continue;
6967
6968 if (TARGET_CPU_ZARCH)
6969 {
6970 if (curr_pool->size < S390_POOL_CHUNK_MAX)
6971 continue;
6972
6973 s390_end_pool (curr_pool, NULL_RTX);
6974 curr_pool = NULL;
6975 }
6976 else
6977 {
6978 int chunk_size = INSN_ADDRESSES (INSN_UID (insn))
6979 - INSN_ADDRESSES (INSN_UID (curr_pool->first_insn))
6980 + extra_size;
6981
6982 /* We will later have to insert base register reload insns.
6983 Those will have an effect on code size, which we need to
6984 consider here. This calculation makes rather pessimistic
6985 worst-case assumptions. */
6986 if (LABEL_P (insn))
6987 extra_size += 6;
6988
6989 if (chunk_size < S390_POOL_CHUNK_MIN
6990 && curr_pool->size < S390_POOL_CHUNK_MIN
6991 && !section_switch_p)
6992 continue;
6993
6994 /* Pool chunks can only be inserted after BARRIERs ... */
6995 if (BARRIER_P (insn))
6996 {
6997 s390_end_pool (curr_pool, insn);
6998 curr_pool = NULL;
6999 extra_size = 0;
7000 }
7001
7002 /* ... so if we don't find one in time, create one. */
7003 else if (chunk_size > S390_POOL_CHUNK_MAX
7004 || curr_pool->size > S390_POOL_CHUNK_MAX
7005 || section_switch_p)
7006 {
7007 rtx label, jump, barrier, next, prev;
7008
7009 if (!section_switch_p)
7010 {
7011 /* We can insert the barrier only after a 'real' insn. */
7012 if (! NONJUMP_INSN_P (insn) && ! CALL_P (insn))
7013 continue;
7014 if (get_attr_length (insn) == 0)
7015 continue;
7016 /* Don't separate LTREL_BASE from the corresponding
7017 LTREL_OFFSET load. */
7018 if (pending_ltrel)
7019 continue;
7020 next = insn;
7021 do
7022 {
7023 insn = next;
7024 next = NEXT_INSN (insn);
7025 }
7026 while (next
7027 && NOTE_P (next)
7028 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
7029 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION));
7030 }
7031 else
7032 {
7033 gcc_assert (!pending_ltrel);
7034
7035 /* The old pool has to end before the section switch
7036 note in order to make it part of the current
7037 section. */
7038 insn = PREV_INSN (insn);
7039 }
7040
7041 label = gen_label_rtx ();
7042 prev = insn;
7043 if (prev && NOTE_P (prev))
7044 prev = prev_nonnote_insn (prev);
7045 if (prev)
7046 jump = emit_jump_insn_after_setloc (gen_jump (label), insn,
7047 INSN_LOCATION (prev));
7048 else
7049 jump = emit_jump_insn_after_noloc (gen_jump (label), insn);
7050 barrier = emit_barrier_after (jump);
7051 insn = emit_label_after (label, barrier);
7052 JUMP_LABEL (jump) = label;
7053 LABEL_NUSES (label) = 1;
7054
7055 INSN_ADDRESSES_NEW (jump, -1);
7056 INSN_ADDRESSES_NEW (barrier, -1);
7057 INSN_ADDRESSES_NEW (insn, -1);
7058
7059 s390_end_pool (curr_pool, barrier);
7060 curr_pool = NULL;
7061 extra_size = 0;
7062 }
7063 }
7064 }
7065
7066 if (curr_pool)
7067 s390_end_pool (curr_pool, NULL_RTX);
7068 gcc_assert (!pending_ltrel);
7069
7070 /* Find all labels that are branched into
7071 from an insn belonging to a different chunk. */
7072
7073 far_labels = BITMAP_ALLOC (NULL);
7074
7075 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7076 {
7077 rtx table;
7078
7079 /* Labels marked with LABEL_PRESERVE_P can be target
7080 of non-local jumps, so we have to mark them.
7081 The same holds for named labels.
7082
7083 Don't do that, however, if it is the label before
7084 a jump table. */
7085
7086 if (LABEL_P (insn)
7087 && (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn)))
7088 {
7089 rtx vec_insn = NEXT_INSN (insn);
7090 if (! vec_insn || ! JUMP_TABLE_DATA_P (vec_insn))
7091 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn));
7092 }
7093 /* Check potential targets in a table jump (casesi_jump). */
7094 else if (tablejump_p (insn, NULL, &table))
7095 {
7096 rtx vec_pat = PATTERN (table);
7097 int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
7098
7099 for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
7100 {
7101 rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
7102
7103 if (s390_find_pool (pool_list, label)
7104 != s390_find_pool (pool_list, insn))
7105 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
7106 }
7107 }
7108 /* If we have a direct jump (conditional or unconditional),
7109 check all potential targets. */
7110 else if (JUMP_P (insn))
7111 {
7112 rtx pat = PATTERN (insn);
7113
7114 if (GET_CODE (pat) == PARALLEL)
7115 pat = XVECEXP (pat, 0, 0);
7116
7117 if (GET_CODE (pat) == SET)
7118 {
7119 rtx label = JUMP_LABEL (insn);
7120 if (label && !ANY_RETURN_P (label))
7121 {
7122 if (s390_find_pool (pool_list, label)
7123 != s390_find_pool (pool_list, insn))
7124 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
7125 }
7126 }
7127 }
7128 }
7129
7130 /* Insert base register reload insns before every pool. */
7131
7132 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
7133 {
7134 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
7135 curr_pool->label);
7136 rtx insn = curr_pool->first_insn;
7137 INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1);
7138 }
7139
7140 /* Insert base register reload insns at every far label. */
7141
7142 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7143 if (LABEL_P (insn)
7144 && bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn)))
7145 {
7146 struct constant_pool *pool = s390_find_pool (pool_list, insn);
7147 if (pool)
7148 {
7149 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
7150 pool->label);
7151 INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1);
7152 }
7153 }
7154
7155
7156 BITMAP_FREE (far_labels);
7157
7158
7159 /* Recompute insn addresses. */
7160
7161 init_insn_lengths ();
7162 shorten_branches (get_insns ());
7163
7164 return pool_list;
7165 }
7166
7167 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
7168 After we have decided to use this list, finish implementing
7169 all changes to the current function as required. */
7170
7171 static void
s390_chunkify_finish(struct constant_pool * pool_list)7172 s390_chunkify_finish (struct constant_pool *pool_list)
7173 {
7174 struct constant_pool *curr_pool = NULL;
7175 rtx insn;
7176
7177
7178 /* Replace all literal pool references. */
7179
7180 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7181 {
7182 if (INSN_P (insn))
7183 replace_ltrel_base (&PATTERN (insn));
7184
7185 curr_pool = s390_find_pool (pool_list, insn);
7186 if (!curr_pool)
7187 continue;
7188
7189 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
7190 {
7191 rtx addr, pool_ref = NULL_RTX;
7192 find_constant_pool_ref (PATTERN (insn), &pool_ref);
7193 if (pool_ref)
7194 {
7195 if (s390_execute_label (insn))
7196 addr = s390_find_execute (curr_pool, insn);
7197 else
7198 addr = s390_find_constant (curr_pool,
7199 get_pool_constant (pool_ref),
7200 get_pool_mode (pool_ref));
7201
7202 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
7203 INSN_CODE (insn) = -1;
7204 }
7205 }
7206 }
7207
7208 /* Dump out all literal pools. */
7209
7210 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
7211 s390_dump_pool (curr_pool, 0);
7212
7213 /* Free pool list. */
7214
7215 while (pool_list)
7216 {
7217 struct constant_pool *next = pool_list->next;
7218 s390_free_pool (pool_list);
7219 pool_list = next;
7220 }
7221 }
7222
7223 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
7224 We have decided we cannot use this list, so revert all changes
7225 to the current function that were done by s390_chunkify_start. */
7226
7227 static void
s390_chunkify_cancel(struct constant_pool * pool_list)7228 s390_chunkify_cancel (struct constant_pool *pool_list)
7229 {
7230 struct constant_pool *curr_pool = NULL;
7231 rtx insn;
7232
7233 /* Remove all pool placeholder insns. */
7234
7235 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
7236 {
7237 /* Did we insert an extra barrier? Remove it. */
7238 rtx barrier = PREV_INSN (curr_pool->pool_insn);
7239 rtx jump = barrier? PREV_INSN (barrier) : NULL_RTX;
7240 rtx label = NEXT_INSN (curr_pool->pool_insn);
7241
7242 if (jump && JUMP_P (jump)
7243 && barrier && BARRIER_P (barrier)
7244 && label && LABEL_P (label)
7245 && GET_CODE (PATTERN (jump)) == SET
7246 && SET_DEST (PATTERN (jump)) == pc_rtx
7247 && GET_CODE (SET_SRC (PATTERN (jump))) == LABEL_REF
7248 && XEXP (SET_SRC (PATTERN (jump)), 0) == label)
7249 {
7250 remove_insn (jump);
7251 remove_insn (barrier);
7252 remove_insn (label);
7253 }
7254
7255 remove_insn (curr_pool->pool_insn);
7256 }
7257
7258 /* Remove all base register reload insns. */
7259
7260 for (insn = get_insns (); insn; )
7261 {
7262 rtx next_insn = NEXT_INSN (insn);
7263
7264 if (NONJUMP_INSN_P (insn)
7265 && GET_CODE (PATTERN (insn)) == SET
7266 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
7267 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_RELOAD_BASE)
7268 remove_insn (insn);
7269
7270 insn = next_insn;
7271 }
7272
7273 /* Free pool list. */
7274
7275 while (pool_list)
7276 {
7277 struct constant_pool *next = pool_list->next;
7278 s390_free_pool (pool_list);
7279 pool_list = next;
7280 }
7281 }
7282
7283 /* Output the constant pool entry EXP in mode MODE with alignment ALIGN. */
7284
7285 void
s390_output_pool_entry(rtx exp,enum machine_mode mode,unsigned int align)7286 s390_output_pool_entry (rtx exp, enum machine_mode mode, unsigned int align)
7287 {
7288 REAL_VALUE_TYPE r;
7289
7290 switch (GET_MODE_CLASS (mode))
7291 {
7292 case MODE_FLOAT:
7293 case MODE_DECIMAL_FLOAT:
7294 gcc_assert (GET_CODE (exp) == CONST_DOUBLE);
7295
7296 REAL_VALUE_FROM_CONST_DOUBLE (r, exp);
7297 assemble_real (r, mode, align);
7298 break;
7299
7300 case MODE_INT:
7301 assemble_integer (exp, GET_MODE_SIZE (mode), align, 1);
7302 mark_symbol_refs_as_used (exp);
7303 break;
7304
7305 default:
7306 gcc_unreachable ();
7307 }
7308 }
7309
7310
7311 /* Return an RTL expression representing the value of the return address
7312 for the frame COUNT steps up from the current frame. FRAME is the
7313 frame pointer of that frame. */
7314
7315 rtx
s390_return_addr_rtx(int count,rtx frame ATTRIBUTE_UNUSED)7316 s390_return_addr_rtx (int count, rtx frame ATTRIBUTE_UNUSED)
7317 {
7318 int offset;
7319 rtx addr;
7320
7321 /* Without backchain, we fail for all but the current frame. */
7322
7323 if (!TARGET_BACKCHAIN && count > 0)
7324 return NULL_RTX;
7325
7326 /* For the current frame, we need to make sure the initial
7327 value of RETURN_REGNUM is actually saved. */
7328
7329 if (count == 0)
7330 {
7331 /* On non-z architectures branch splitting could overwrite r14. */
7332 if (TARGET_CPU_ZARCH)
7333 return get_hard_reg_initial_val (Pmode, RETURN_REGNUM);
7334 else
7335 {
7336 cfun_frame_layout.save_return_addr_p = true;
7337 return gen_rtx_MEM (Pmode, return_address_pointer_rtx);
7338 }
7339 }
7340
7341 if (TARGET_PACKED_STACK)
7342 offset = -2 * UNITS_PER_LONG;
7343 else
7344 offset = RETURN_REGNUM * UNITS_PER_LONG;
7345
7346 addr = plus_constant (Pmode, frame, offset);
7347 addr = memory_address (Pmode, addr);
7348 return gen_rtx_MEM (Pmode, addr);
7349 }
7350
7351 /* Return an RTL expression representing the back chain stored in
7352 the current stack frame. */
7353
7354 rtx
s390_back_chain_rtx(void)7355 s390_back_chain_rtx (void)
7356 {
7357 rtx chain;
7358
7359 gcc_assert (TARGET_BACKCHAIN);
7360
7361 if (TARGET_PACKED_STACK)
7362 chain = plus_constant (Pmode, stack_pointer_rtx,
7363 STACK_POINTER_OFFSET - UNITS_PER_LONG);
7364 else
7365 chain = stack_pointer_rtx;
7366
7367 chain = gen_rtx_MEM (Pmode, chain);
7368 return chain;
7369 }
7370
7371 /* Find first call clobbered register unused in a function.
7372 This could be used as base register in a leaf function
7373 or for holding the return address before epilogue. */
7374
7375 static int
find_unused_clobbered_reg(void)7376 find_unused_clobbered_reg (void)
7377 {
7378 int i;
7379 for (i = 0; i < 6; i++)
7380 if (!df_regs_ever_live_p (i))
7381 return i;
7382 return 0;
7383 }
7384
7385
7386 /* Helper function for s390_regs_ever_clobbered. Sets the fields in DATA for all
7387 clobbered hard regs in SETREG. */
7388
7389 static void
s390_reg_clobbered_rtx(rtx setreg,const_rtx set_insn ATTRIBUTE_UNUSED,void * data)7390 s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data)
7391 {
7392 char *regs_ever_clobbered = (char *)data;
7393 unsigned int i, regno;
7394 enum machine_mode mode = GET_MODE (setreg);
7395
7396 if (GET_CODE (setreg) == SUBREG)
7397 {
7398 rtx inner = SUBREG_REG (setreg);
7399 if (!GENERAL_REG_P (inner) && !FP_REG_P (inner))
7400 return;
7401 regno = subreg_regno (setreg);
7402 }
7403 else if (GENERAL_REG_P (setreg) || FP_REG_P (setreg))
7404 regno = REGNO (setreg);
7405 else
7406 return;
7407
7408 for (i = regno;
7409 i < regno + HARD_REGNO_NREGS (regno, mode);
7410 i++)
7411 regs_ever_clobbered[i] = 1;
7412 }
7413
7414 /* Walks through all basic blocks of the current function looking
7415 for clobbered hard regs using s390_reg_clobbered_rtx. The fields
7416 of the passed integer array REGS_EVER_CLOBBERED are set to one for
7417 each of those regs. */
7418
7419 static void
s390_regs_ever_clobbered(char regs_ever_clobbered[])7420 s390_regs_ever_clobbered (char regs_ever_clobbered[])
7421 {
7422 basic_block cur_bb;
7423 rtx cur_insn;
7424 unsigned int i;
7425
7426 memset (regs_ever_clobbered, 0, 32);
7427
7428 /* For non-leaf functions we have to consider all call clobbered regs to be
7429 clobbered. */
7430 if (!crtl->is_leaf)
7431 {
7432 for (i = 0; i < 32; i++)
7433 regs_ever_clobbered[i] = call_really_used_regs[i];
7434 }
7435
7436 /* Make the "magic" eh_return registers live if necessary. For regs_ever_live
7437 this work is done by liveness analysis (mark_regs_live_at_end).
7438 Special care is needed for functions containing landing pads. Landing pads
7439 may use the eh registers, but the code which sets these registers is not
7440 contained in that function. Hence s390_regs_ever_clobbered is not able to
7441 deal with this automatically. */
7442 if (crtl->calls_eh_return || cfun->machine->has_landing_pad_p)
7443 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM ; i++)
7444 if (crtl->calls_eh_return
7445 || (cfun->machine->has_landing_pad_p
7446 && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i))))
7447 regs_ever_clobbered[EH_RETURN_DATA_REGNO (i)] = 1;
7448
7449 /* For nonlocal gotos all call-saved registers have to be saved.
7450 This flag is also set for the unwinding code in libgcc.
7451 See expand_builtin_unwind_init. For regs_ever_live this is done by
7452 reload. */
7453 if (crtl->saves_all_registers)
7454 for (i = 0; i < 32; i++)
7455 if (!call_really_used_regs[i])
7456 regs_ever_clobbered[i] = 1;
7457
7458 FOR_EACH_BB_FN (cur_bb, cfun)
7459 {
7460 FOR_BB_INSNS (cur_bb, cur_insn)
7461 {
7462 rtx pat;
7463
7464 if (!INSN_P (cur_insn))
7465 continue;
7466
7467 pat = PATTERN (cur_insn);
7468
7469 /* Ignore GPR restore insns. */
7470 if (epilogue_completed && RTX_FRAME_RELATED_P (cur_insn))
7471 {
7472 if (GET_CODE (pat) == SET
7473 && GENERAL_REG_P (SET_DEST (pat)))
7474 {
7475 /* lgdr */
7476 if (GET_MODE (SET_SRC (pat)) == DImode
7477 && FP_REG_P (SET_SRC (pat)))
7478 continue;
7479
7480 /* l / lg */
7481 if (GET_CODE (SET_SRC (pat)) == MEM)
7482 continue;
7483 }
7484
7485 /* lm / lmg */
7486 if (GET_CODE (pat) == PARALLEL
7487 && load_multiple_operation (pat, VOIDmode))
7488 continue;
7489 }
7490
7491 note_stores (pat,
7492 s390_reg_clobbered_rtx,
7493 regs_ever_clobbered);
7494 }
7495 }
7496 }
7497
7498 /* Determine the frame area which actually has to be accessed
7499 in the function epilogue. The values are stored at the
7500 given pointers AREA_BOTTOM (address of the lowest used stack
7501 address) and AREA_TOP (address of the first item which does
7502 not belong to the stack frame). */
7503
7504 static void
s390_frame_area(int * area_bottom,int * area_top)7505 s390_frame_area (int *area_bottom, int *area_top)
7506 {
7507 int b, t;
7508
7509 b = INT_MAX;
7510 t = INT_MIN;
7511
7512 if (cfun_frame_layout.first_restore_gpr != -1)
7513 {
7514 b = (cfun_frame_layout.gprs_offset
7515 + cfun_frame_layout.first_restore_gpr * UNITS_PER_LONG);
7516 t = b + (cfun_frame_layout.last_restore_gpr
7517 - cfun_frame_layout.first_restore_gpr + 1) * UNITS_PER_LONG;
7518 }
7519
7520 if (TARGET_64BIT && cfun_save_high_fprs_p)
7521 {
7522 b = MIN (b, cfun_frame_layout.f8_offset);
7523 t = MAX (t, (cfun_frame_layout.f8_offset
7524 + cfun_frame_layout.high_fprs * 8));
7525 }
7526
7527 if (!TARGET_64BIT)
7528 {
7529 if (cfun_fpr_save_p (FPR4_REGNUM))
7530 {
7531 b = MIN (b, cfun_frame_layout.f4_offset);
7532 t = MAX (t, cfun_frame_layout.f4_offset + 8);
7533 }
7534 if (cfun_fpr_save_p (FPR6_REGNUM))
7535 {
7536 b = MIN (b, cfun_frame_layout.f4_offset + 8);
7537 t = MAX (t, cfun_frame_layout.f4_offset + 16);
7538 }
7539 }
7540 *area_bottom = b;
7541 *area_top = t;
7542 }
7543 /* Update gpr_save_slots in the frame layout trying to make use of
7544 FPRs as GPR save slots.
7545 This is a helper routine of s390_register_info. */
7546
7547 static void
s390_register_info_gprtofpr()7548 s390_register_info_gprtofpr ()
7549 {
7550 int save_reg_slot = FPR0_REGNUM;
7551 int i, j;
7552
7553 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
7554 return;
7555
7556 for (i = 15; i >= 6; i--)
7557 {
7558 if (cfun_gpr_save_slot (i) == 0)
7559 continue;
7560
7561 /* Advance to the next FP register which can be used as a
7562 GPR save slot. */
7563 while ((!call_really_used_regs[save_reg_slot]
7564 || df_regs_ever_live_p (save_reg_slot)
7565 || cfun_fpr_save_p (save_reg_slot))
7566 && FP_REGNO_P (save_reg_slot))
7567 save_reg_slot++;
7568 if (!FP_REGNO_P (save_reg_slot))
7569 {
7570 /* We only want to use ldgr/lgdr if we can get rid of
7571 stm/lm entirely. So undo the gpr slot allocation in
7572 case we ran out of FPR save slots. */
7573 for (j = 6; j <= 15; j++)
7574 if (FP_REGNO_P (cfun_gpr_save_slot (j)))
7575 cfun_gpr_save_slot (j) = -1;
7576 break;
7577 }
7578 cfun_gpr_save_slot (i) = save_reg_slot++;
7579 }
7580 }
7581
7582 /* Set the bits in fpr_bitmap for FPRs which need to be saved due to
7583 stdarg.
7584 This is a helper routine for s390_register_info. */
7585
7586 static void
s390_register_info_stdarg_fpr()7587 s390_register_info_stdarg_fpr ()
7588 {
7589 int i;
7590 int min_fpr;
7591 int max_fpr;
7592
7593 /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and
7594 f0-f4 for 64 bit. */
7595 if (!cfun->stdarg
7596 || !TARGET_HARD_FLOAT
7597 || !cfun->va_list_fpr_size
7598 || crtl->args.info.fprs >= FP_ARG_NUM_REG)
7599 return;
7600
7601 min_fpr = crtl->args.info.fprs;
7602 max_fpr = min_fpr + cfun->va_list_fpr_size;
7603 if (max_fpr > FP_ARG_NUM_REG)
7604 max_fpr = FP_ARG_NUM_REG;
7605
7606 for (i = min_fpr; i < max_fpr; i++)
7607 cfun_set_fpr_save (i + FPR0_REGNUM);
7608 }
7609
7610 /* Reserve the GPR save slots for GPRs which need to be saved due to
7611 stdarg.
7612 This is a helper routine for s390_register_info. */
7613
7614 static void
s390_register_info_stdarg_gpr()7615 s390_register_info_stdarg_gpr ()
7616 {
7617 int i;
7618 int min_gpr;
7619 int max_gpr;
7620
7621 if (!cfun->stdarg
7622 || !cfun->va_list_gpr_size
7623 || crtl->args.info.gprs >= GP_ARG_NUM_REG)
7624 return;
7625
7626 min_gpr = crtl->args.info.gprs;
7627 max_gpr = min_gpr + cfun->va_list_gpr_size;
7628 if (max_gpr > GP_ARG_NUM_REG)
7629 max_gpr = GP_ARG_NUM_REG;
7630
7631 for (i = min_gpr; i < max_gpr; i++)
7632 cfun_gpr_save_slot (2 + i) = -1;
7633 }
7634
7635 /* The GPR and FPR save slots in cfun->machine->frame_layout are set
7636 for registers which need to be saved in function prologue.
7637 This function can be used until the insns emitted for save/restore
7638 of the regs are visible in the RTL stream. */
7639
7640 static void
s390_register_info()7641 s390_register_info ()
7642 {
7643 int i, j;
7644 char clobbered_regs[32];
7645
7646 gcc_assert (!epilogue_completed);
7647
7648 if (reload_completed)
7649 /* After reload we rely on our own routine to determine which
7650 registers need saving. */
7651 s390_regs_ever_clobbered (clobbered_regs);
7652 else
7653 /* During reload we use regs_ever_live as a base since reload
7654 does changes in there which we otherwise would not be aware
7655 of. */
7656 for (i = 0; i < 32; i++)
7657 clobbered_regs[i] = df_regs_ever_live_p (i);
7658
7659 for (i = 0; i < 32; i++)
7660 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
7661
7662 /* Mark the call-saved FPRs which need to be saved.
7663 This needs to be done before checking the special GPRs since the
7664 stack pointer usage depends on whether high FPRs have to be saved
7665 or not. */
7666 cfun_frame_layout.fpr_bitmap = 0;
7667 cfun_frame_layout.high_fprs = 0;
7668 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
7669 if (clobbered_regs[i] && !call_really_used_regs[i])
7670 {
7671 cfun_set_fpr_save (i);
7672 if (i >= FPR8_REGNUM)
7673 cfun_frame_layout.high_fprs++;
7674 }
7675
7676 if (flag_pic)
7677 clobbered_regs[PIC_OFFSET_TABLE_REGNUM]
7678 |= !!df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
7679
7680 clobbered_regs[BASE_REGNUM]
7681 |= (cfun->machine->base_reg
7682 && REGNO (cfun->machine->base_reg) == BASE_REGNUM);
7683
7684 clobbered_regs[HARD_FRAME_POINTER_REGNUM]
7685 |= !!frame_pointer_needed;
7686
7687 /* On pre z900 machines this might take until machine dependent
7688 reorg to decide.
7689 save_return_addr_p will only be set on non-zarch machines so
7690 there is no risk that r14 goes into an FPR instead of a stack
7691 slot. */
7692 clobbered_regs[RETURN_REGNUM]
7693 |= (!crtl->is_leaf
7694 || TARGET_TPF_PROFILING
7695 || cfun->machine->split_branches_pending_p
7696 || cfun_frame_layout.save_return_addr_p
7697 || crtl->calls_eh_return);
7698
7699 clobbered_regs[STACK_POINTER_REGNUM]
7700 |= (!crtl->is_leaf
7701 || TARGET_TPF_PROFILING
7702 || cfun_save_high_fprs_p
7703 || get_frame_size () > 0
7704 || (reload_completed && cfun_frame_layout.frame_size > 0)
7705 || cfun->calls_alloca);
7706
7707 memset (cfun_frame_layout.gpr_save_slots, 0, 16);
7708
7709 for (i = 6; i < 16; i++)
7710 if (clobbered_regs[i])
7711 cfun_gpr_save_slot (i) = -1;
7712
7713 s390_register_info_stdarg_fpr ();
7714 s390_register_info_gprtofpr ();
7715
7716 /* First find the range of GPRs to be restored. Vararg regs don't
7717 need to be restored so we do it before assigning slots to the
7718 vararg GPRs. */
7719 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++);
7720 for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--);
7721 cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
7722 cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
7723
7724 /* stdarg functions might need to save GPRs 2 to 6. This might
7725 override the GPR->FPR save decision made above for r6 since
7726 vararg regs must go to the stack. */
7727 s390_register_info_stdarg_gpr ();
7728
7729 /* Now the range of GPRs which need saving. */
7730 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++);
7731 for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--);
7732 cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
7733 cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
7734 }
7735
7736 /* This function is called by s390_optimize_prologue in order to get
7737 rid of unnecessary GPR save/restore instructions. The register info
7738 for the GPRs is re-computed and the ranges are re-calculated. */
7739
7740 static void
s390_optimize_register_info()7741 s390_optimize_register_info ()
7742 {
7743 char clobbered_regs[32];
7744 int i, j;
7745
7746 gcc_assert (epilogue_completed);
7747 gcc_assert (!cfun->machine->split_branches_pending_p);
7748
7749 s390_regs_ever_clobbered (clobbered_regs);
7750
7751 for (i = 0; i < 32; i++)
7752 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
7753
7754 /* There is still special treatment needed for cases invisible to
7755 s390_regs_ever_clobbered. */
7756 clobbered_regs[RETURN_REGNUM]
7757 |= (TARGET_TPF_PROFILING
7758 /* When expanding builtin_return_addr in ESA mode we do not
7759 know whether r14 will later be needed as scratch reg when
7760 doing branch splitting. So the builtin always accesses the
7761 r14 save slot and we need to stick to the save/restore
7762 decision for r14 even if it turns out that it didn't get
7763 clobbered. */
7764 || cfun_frame_layout.save_return_addr_p
7765 || crtl->calls_eh_return);
7766
7767 memset (cfun_frame_layout.gpr_save_slots, 0, 6);
7768
7769 for (i = 6; i < 16; i++)
7770 if (!clobbered_regs[i])
7771 cfun_gpr_save_slot (i) = 0;
7772
7773 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++);
7774 for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--);
7775 cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
7776 cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
7777
7778 s390_register_info_stdarg_gpr ();
7779
7780 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++);
7781 for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--);
7782 cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
7783 cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
7784 }
7785
7786 /* Fill cfun->machine with info about frame of current function. */
7787
7788 static void
s390_frame_info(void)7789 s390_frame_info (void)
7790 {
7791 HOST_WIDE_INT lowest_offset;
7792
7793 cfun_frame_layout.first_save_gpr_slot = cfun_frame_layout.first_save_gpr;
7794 cfun_frame_layout.last_save_gpr_slot = cfun_frame_layout.last_save_gpr;
7795
7796 /* The va_arg builtin uses a constant distance of 16 *
7797 UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area
7798 pointer. So even if we are going to save the stack pointer in an
7799 FPR we need the stack space in order to keep the offsets
7800 correct. */
7801 if (cfun->stdarg && cfun_save_arg_fprs_p)
7802 {
7803 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
7804
7805 if (cfun_frame_layout.first_save_gpr_slot == -1)
7806 cfun_frame_layout.first_save_gpr_slot = STACK_POINTER_REGNUM;
7807 }
7808
7809 cfun_frame_layout.frame_size = get_frame_size ();
7810 if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000)
7811 fatal_error ("total size of local variables exceeds architecture limit");
7812
7813 if (!TARGET_PACKED_STACK)
7814 {
7815 /* Fixed stack layout. */
7816 cfun_frame_layout.backchain_offset = 0;
7817 cfun_frame_layout.f0_offset = 16 * UNITS_PER_LONG;
7818 cfun_frame_layout.f4_offset = cfun_frame_layout.f0_offset + 2 * 8;
7819 cfun_frame_layout.f8_offset = -cfun_frame_layout.high_fprs * 8;
7820 cfun_frame_layout.gprs_offset = (cfun_frame_layout.first_save_gpr_slot
7821 * UNITS_PER_LONG);
7822 }
7823 else if (TARGET_BACKCHAIN)
7824 {
7825 /* Kernel stack layout - packed stack, backchain, no float */
7826 gcc_assert (TARGET_SOFT_FLOAT);
7827 cfun_frame_layout.backchain_offset = (STACK_POINTER_OFFSET
7828 - UNITS_PER_LONG);
7829
7830 /* The distance between the backchain and the return address
7831 save slot must not change. So we always need a slot for the
7832 stack pointer which resides in between. */
7833 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
7834
7835 cfun_frame_layout.gprs_offset
7836 = cfun_frame_layout.backchain_offset - cfun_gprs_save_area_size;
7837
7838 /* FPRs will not be saved. Nevertheless pick sane values to
7839 keep area calculations valid. */
7840 cfun_frame_layout.f0_offset =
7841 cfun_frame_layout.f4_offset =
7842 cfun_frame_layout.f8_offset = cfun_frame_layout.gprs_offset;
7843 }
7844 else
7845 {
7846 int num_fprs;
7847
7848 /* Packed stack layout without backchain. */
7849
7850 /* With stdarg FPRs need their dedicated slots. */
7851 num_fprs = (TARGET_64BIT && cfun->stdarg ? 2
7852 : (cfun_fpr_save_p (FPR4_REGNUM) +
7853 cfun_fpr_save_p (FPR6_REGNUM)));
7854 cfun_frame_layout.f4_offset = STACK_POINTER_OFFSET - 8 * num_fprs;
7855
7856 num_fprs = (cfun->stdarg ? 2
7857 : (cfun_fpr_save_p (FPR0_REGNUM)
7858 + cfun_fpr_save_p (FPR2_REGNUM)));
7859 cfun_frame_layout.f0_offset = cfun_frame_layout.f4_offset - 8 * num_fprs;
7860
7861 cfun_frame_layout.gprs_offset
7862 = cfun_frame_layout.f0_offset - cfun_gprs_save_area_size;
7863
7864 cfun_frame_layout.f8_offset = (cfun_frame_layout.gprs_offset
7865 - cfun_frame_layout.high_fprs * 8);
7866 }
7867
7868 if (cfun_save_high_fprs_p)
7869 cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8;
7870
7871 if (!crtl->is_leaf)
7872 cfun_frame_layout.frame_size += crtl->outgoing_args_size;
7873
7874 /* In the following cases we have to allocate a STACK_POINTER_OFFSET
7875 sized area at the bottom of the stack. This is required also for
7876 leaf functions. When GCC generates a local stack reference it
7877 will always add STACK_POINTER_OFFSET to all these references. */
7878 if (crtl->is_leaf
7879 && !TARGET_TPF_PROFILING
7880 && cfun_frame_layout.frame_size == 0
7881 && !cfun->calls_alloca)
7882 return;
7883
7884 /* Calculate the number of bytes we have used in our own register
7885 save area. With the packed stack layout we can re-use the
7886 remaining bytes for normal stack elements. */
7887
7888 if (TARGET_PACKED_STACK)
7889 lowest_offset = MIN (MIN (cfun_frame_layout.f0_offset,
7890 cfun_frame_layout.f4_offset),
7891 cfun_frame_layout.gprs_offset);
7892 else
7893 lowest_offset = 0;
7894
7895 if (TARGET_BACKCHAIN)
7896 lowest_offset = MIN (lowest_offset, cfun_frame_layout.backchain_offset);
7897
7898 cfun_frame_layout.frame_size += STACK_POINTER_OFFSET - lowest_offset;
7899
7900 /* If under 31 bit an odd number of gprs has to be saved we have to
7901 adjust the frame size to sustain 8 byte alignment of stack
7902 frames. */
7903 cfun_frame_layout.frame_size = ((cfun_frame_layout.frame_size +
7904 STACK_BOUNDARY / BITS_PER_UNIT - 1)
7905 & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1));
7906 }
7907
7908 /* Generate frame layout. Fills in register and frame data for the current
7909 function in cfun->machine. This routine can be called multiple times;
7910 it will re-do the complete frame layout every time. */
7911
7912 static void
s390_init_frame_layout(void)7913 s390_init_frame_layout (void)
7914 {
7915 HOST_WIDE_INT frame_size;
7916 int base_used;
7917
7918 gcc_assert (!reload_completed);
7919
7920 /* On S/390 machines, we may need to perform branch splitting, which
7921 will require both base and return address register. We have no
7922 choice but to assume we're going to need them until right at the
7923 end of the machine dependent reorg phase. */
7924 if (!TARGET_CPU_ZARCH)
7925 cfun->machine->split_branches_pending_p = true;
7926
7927 do
7928 {
7929 frame_size = cfun_frame_layout.frame_size;
7930
7931 /* Try to predict whether we'll need the base register. */
7932 base_used = cfun->machine->split_branches_pending_p
7933 || crtl->uses_const_pool
7934 || (!DISP_IN_RANGE (frame_size)
7935 && !CONST_OK_FOR_K (frame_size));
7936
7937 /* Decide which register to use as literal pool base. In small
7938 leaf functions, try to use an unused call-clobbered register
7939 as base register to avoid save/restore overhead. */
7940 if (!base_used)
7941 cfun->machine->base_reg = NULL_RTX;
7942 else if (crtl->is_leaf && !df_regs_ever_live_p (5))
7943 cfun->machine->base_reg = gen_rtx_REG (Pmode, 5);
7944 else
7945 cfun->machine->base_reg = gen_rtx_REG (Pmode, BASE_REGNUM);
7946
7947 s390_register_info ();
7948 s390_frame_info ();
7949 }
7950 while (frame_size != cfun_frame_layout.frame_size);
7951 }
7952
7953 /* Remove the FPR clobbers from a tbegin insn if it can be proven that
7954 the TX is nonescaping. A transaction is considered escaping if
7955 there is at least one path from tbegin returning CC0 to the
7956 function exit block without an tend.
7957
7958 The check so far has some limitations:
7959 - only single tbegin/tend BBs are supported
7960 - the first cond jump after tbegin must separate the CC0 path from ~CC0
7961 - when CC is copied to a GPR and the CC0 check is done with the GPR
7962 this is not supported
7963 */
7964
7965 static void
s390_optimize_nonescaping_tx(void)7966 s390_optimize_nonescaping_tx (void)
7967 {
7968 const unsigned int CC0 = 1 << 3;
7969 basic_block tbegin_bb = NULL;
7970 basic_block tend_bb = NULL;
7971 basic_block bb;
7972 rtx insn;
7973 bool result = true;
7974 int bb_index;
7975 rtx tbegin_insn = NULL_RTX;
7976
7977 if (!cfun->machine->tbegin_p)
7978 return;
7979
7980 for (bb_index = 0; bb_index < n_basic_blocks_for_fn (cfun); bb_index++)
7981 {
7982 bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
7983
7984 if (!bb)
7985 continue;
7986
7987 FOR_BB_INSNS (bb, insn)
7988 {
7989 rtx ite, cc, pat, target;
7990 unsigned HOST_WIDE_INT mask;
7991
7992 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
7993 continue;
7994
7995 pat = PATTERN (insn);
7996
7997 if (GET_CODE (pat) == PARALLEL)
7998 pat = XVECEXP (pat, 0, 0);
7999
8000 if (GET_CODE (pat) != SET
8001 || GET_CODE (SET_SRC (pat)) != UNSPEC_VOLATILE)
8002 continue;
8003
8004 if (XINT (SET_SRC (pat), 1) == UNSPECV_TBEGIN)
8005 {
8006 rtx tmp;
8007
8008 tbegin_insn = insn;
8009
8010 /* Just return if the tbegin doesn't have clobbers. */
8011 if (GET_CODE (PATTERN (insn)) != PARALLEL)
8012 return;
8013
8014 if (tbegin_bb != NULL)
8015 return;
8016
8017 /* Find the next conditional jump. */
8018 for (tmp = NEXT_INSN (insn);
8019 tmp != NULL_RTX;
8020 tmp = NEXT_INSN (tmp))
8021 {
8022 if (reg_set_p (gen_rtx_REG (CCmode, CC_REGNUM), tmp))
8023 return;
8024 if (!JUMP_P (tmp))
8025 continue;
8026
8027 ite = SET_SRC (PATTERN (tmp));
8028 if (GET_CODE (ite) != IF_THEN_ELSE)
8029 continue;
8030
8031 cc = XEXP (XEXP (ite, 0), 0);
8032 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc))
8033 || GET_MODE (cc) != CCRAWmode
8034 || GET_CODE (XEXP (XEXP (ite, 0), 1)) != CONST_INT)
8035 return;
8036
8037 if (bb->succs->length () != 2)
8038 return;
8039
8040 mask = INTVAL (XEXP (XEXP (ite, 0), 1));
8041 if (GET_CODE (XEXP (ite, 0)) == NE)
8042 mask ^= 0xf;
8043
8044 if (mask == CC0)
8045 target = XEXP (ite, 1);
8046 else if (mask == (CC0 ^ 0xf))
8047 target = XEXP (ite, 2);
8048 else
8049 return;
8050
8051 {
8052 edge_iterator ei;
8053 edge e1, e2;
8054
8055 ei = ei_start (bb->succs);
8056 e1 = ei_safe_edge (ei);
8057 ei_next (&ei);
8058 e2 = ei_safe_edge (ei);
8059
8060 if (e2->flags & EDGE_FALLTHRU)
8061 {
8062 e2 = e1;
8063 e1 = ei_safe_edge (ei);
8064 }
8065
8066 if (!(e1->flags & EDGE_FALLTHRU))
8067 return;
8068
8069 tbegin_bb = (target == pc_rtx) ? e1->dest : e2->dest;
8070 }
8071 if (tmp == BB_END (bb))
8072 break;
8073 }
8074 }
8075
8076 if (XINT (SET_SRC (pat), 1) == UNSPECV_TEND)
8077 {
8078 if (tend_bb != NULL)
8079 return;
8080 tend_bb = bb;
8081 }
8082 }
8083 }
8084
8085 /* Either we successfully remove the FPR clobbers here or we are not
8086 able to do anything for this TX. Both cases don't qualify for
8087 another look. */
8088 cfun->machine->tbegin_p = false;
8089
8090 if (tbegin_bb == NULL || tend_bb == NULL)
8091 return;
8092
8093 calculate_dominance_info (CDI_POST_DOMINATORS);
8094 result = dominated_by_p (CDI_POST_DOMINATORS, tbegin_bb, tend_bb);
8095 free_dominance_info (CDI_POST_DOMINATORS);
8096
8097 if (!result)
8098 return;
8099
8100 PATTERN (tbegin_insn) = gen_rtx_PARALLEL (VOIDmode,
8101 gen_rtvec (2,
8102 XVECEXP (PATTERN (tbegin_insn), 0, 0),
8103 XVECEXP (PATTERN (tbegin_insn), 0, 1)));
8104 INSN_CODE (tbegin_insn) = -1;
8105 df_insn_rescan (tbegin_insn);
8106
8107 return;
8108 }
8109
8110 /* Return true if it is legal to put a value with MODE into REGNO. */
8111
8112 bool
s390_hard_regno_mode_ok(unsigned int regno,enum machine_mode mode)8113 s390_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
8114 {
8115 switch (REGNO_REG_CLASS (regno))
8116 {
8117 case FP_REGS:
8118 if (REGNO_PAIR_OK (regno, mode))
8119 {
8120 if (mode == SImode || mode == DImode)
8121 return true;
8122
8123 if (FLOAT_MODE_P (mode) && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
8124 return true;
8125 }
8126 break;
8127 case ADDR_REGS:
8128 if (FRAME_REGNO_P (regno) && mode == Pmode)
8129 return true;
8130
8131 /* fallthrough */
8132 case GENERAL_REGS:
8133 if (REGNO_PAIR_OK (regno, mode))
8134 {
8135 if (TARGET_ZARCH
8136 || (mode != TFmode && mode != TCmode && mode != TDmode))
8137 return true;
8138 }
8139 break;
8140 case CC_REGS:
8141 if (GET_MODE_CLASS (mode) == MODE_CC)
8142 return true;
8143 break;
8144 case ACCESS_REGS:
8145 if (REGNO_PAIR_OK (regno, mode))
8146 {
8147 if (mode == SImode || mode == Pmode)
8148 return true;
8149 }
8150 break;
8151 default:
8152 return false;
8153 }
8154
8155 return false;
8156 }
8157
8158 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
8159
8160 bool
s390_hard_regno_rename_ok(unsigned int old_reg,unsigned int new_reg)8161 s390_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg)
8162 {
8163 /* Once we've decided upon a register to use as base register, it must
8164 no longer be used for any other purpose. */
8165 if (cfun->machine->base_reg)
8166 if (REGNO (cfun->machine->base_reg) == old_reg
8167 || REGNO (cfun->machine->base_reg) == new_reg)
8168 return false;
8169
8170 /* Prevent regrename from using call-saved regs which haven't
8171 actually been saved. This is necessary since regrename assumes
8172 the backend save/restore decisions are based on
8173 df_regs_ever_live. Since we have our own routine we have to tell
8174 regrename manually about it. */
8175 if (GENERAL_REGNO_P (new_reg)
8176 && !call_really_used_regs[new_reg]
8177 && cfun_gpr_save_slot (new_reg) == 0)
8178 return false;
8179
8180 return true;
8181 }
8182
8183 /* Return nonzero if register REGNO can be used as a scratch register
8184 in peephole2. */
8185
8186 static bool
s390_hard_regno_scratch_ok(unsigned int regno)8187 s390_hard_regno_scratch_ok (unsigned int regno)
8188 {
8189 /* See s390_hard_regno_rename_ok. */
8190 if (GENERAL_REGNO_P (regno)
8191 && !call_really_used_regs[regno]
8192 && cfun_gpr_save_slot (regno) == 0)
8193 return false;
8194
8195 return true;
8196 }
8197
8198 /* Maximum number of registers to represent a value of mode MODE
8199 in a register of class RCLASS. */
8200
8201 int
s390_class_max_nregs(enum reg_class rclass,enum machine_mode mode)8202 s390_class_max_nregs (enum reg_class rclass, enum machine_mode mode)
8203 {
8204 switch (rclass)
8205 {
8206 case FP_REGS:
8207 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
8208 return 2 * ((GET_MODE_SIZE (mode) / 2 + 8 - 1) / 8);
8209 else
8210 return (GET_MODE_SIZE (mode) + 8 - 1) / 8;
8211 case ACCESS_REGS:
8212 return (GET_MODE_SIZE (mode) + 4 - 1) / 4;
8213 default:
8214 break;
8215 }
8216 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8217 }
8218
8219 /* Return true if we use LRA instead of reload pass. */
8220 static bool
s390_lra_p(void)8221 s390_lra_p (void)
8222 {
8223 return s390_lra_flag;
8224 }
8225
8226 /* Return true if register FROM can be eliminated via register TO. */
8227
8228 static bool
s390_can_eliminate(const int from,const int to)8229 s390_can_eliminate (const int from, const int to)
8230 {
8231 /* On zSeries machines, we have not marked the base register as fixed.
8232 Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
8233 If a function requires the base register, we say here that this
8234 elimination cannot be performed. This will cause reload to free
8235 up the base register (as if it were fixed). On the other hand,
8236 if the current function does *not* require the base register, we
8237 say here the elimination succeeds, which in turn allows reload
8238 to allocate the base register for any other purpose. */
8239 if (from == BASE_REGNUM && to == BASE_REGNUM)
8240 {
8241 if (TARGET_CPU_ZARCH)
8242 {
8243 s390_init_frame_layout ();
8244 return cfun->machine->base_reg == NULL_RTX;
8245 }
8246
8247 return false;
8248 }
8249
8250 /* Everything else must point into the stack frame. */
8251 gcc_assert (to == STACK_POINTER_REGNUM
8252 || to == HARD_FRAME_POINTER_REGNUM);
8253
8254 gcc_assert (from == FRAME_POINTER_REGNUM
8255 || from == ARG_POINTER_REGNUM
8256 || from == RETURN_ADDRESS_POINTER_REGNUM);
8257
8258 /* Make sure we actually saved the return address. */
8259 if (from == RETURN_ADDRESS_POINTER_REGNUM)
8260 if (!crtl->calls_eh_return
8261 && !cfun->stdarg
8262 && !cfun_frame_layout.save_return_addr_p)
8263 return false;
8264
8265 return true;
8266 }
8267
8268 /* Return offset between register FROM and TO initially after prolog. */
8269
8270 HOST_WIDE_INT
s390_initial_elimination_offset(int from,int to)8271 s390_initial_elimination_offset (int from, int to)
8272 {
8273 HOST_WIDE_INT offset;
8274
8275 /* ??? Why are we called for non-eliminable pairs? */
8276 if (!s390_can_eliminate (from, to))
8277 return 0;
8278
8279 switch (from)
8280 {
8281 case FRAME_POINTER_REGNUM:
8282 offset = (get_frame_size()
8283 + STACK_POINTER_OFFSET
8284 + crtl->outgoing_args_size);
8285 break;
8286
8287 case ARG_POINTER_REGNUM:
8288 s390_init_frame_layout ();
8289 offset = cfun_frame_layout.frame_size + STACK_POINTER_OFFSET;
8290 break;
8291
8292 case RETURN_ADDRESS_POINTER_REGNUM:
8293 s390_init_frame_layout ();
8294
8295 if (cfun_frame_layout.first_save_gpr_slot == -1)
8296 {
8297 /* If it turns out that for stdarg nothing went into the reg
8298 save area we also do not need the return address
8299 pointer. */
8300 if (cfun->stdarg && !cfun_save_arg_fprs_p)
8301 return 0;
8302
8303 gcc_unreachable ();
8304 }
8305
8306 /* In order to make the following work it is not necessary for
8307 r14 to have a save slot. It is sufficient if one other GPR
8308 got one. Since the GPRs are always stored without gaps we
8309 are able to calculate where the r14 save slot would
8310 reside. */
8311 offset = (cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset +
8312 (RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot) *
8313 UNITS_PER_LONG);
8314 break;
8315
8316 case BASE_REGNUM:
8317 offset = 0;
8318 break;
8319
8320 default:
8321 gcc_unreachable ();
8322 }
8323
8324 return offset;
8325 }
8326
8327 /* Emit insn to save fpr REGNUM at offset OFFSET relative
8328 to register BASE. Return generated insn. */
8329
8330 static rtx
save_fpr(rtx base,int offset,int regnum)8331 save_fpr (rtx base, int offset, int regnum)
8332 {
8333 rtx addr;
8334 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
8335
8336 if (regnum >= 16 && regnum <= (16 + FP_ARG_NUM_REG))
8337 set_mem_alias_set (addr, get_varargs_alias_set ());
8338 else
8339 set_mem_alias_set (addr, get_frame_alias_set ());
8340
8341 return emit_move_insn (addr, gen_rtx_REG (DFmode, regnum));
8342 }
8343
8344 /* Emit insn to restore fpr REGNUM from offset OFFSET relative
8345 to register BASE. Return generated insn. */
8346
8347 static rtx
restore_fpr(rtx base,int offset,int regnum)8348 restore_fpr (rtx base, int offset, int regnum)
8349 {
8350 rtx addr;
8351 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
8352 set_mem_alias_set (addr, get_frame_alias_set ());
8353
8354 return emit_move_insn (gen_rtx_REG (DFmode, regnum), addr);
8355 }
8356
8357 /* Return true if REGNO is a global register, but not one
8358 of the special ones that need to be saved/restored in anyway. */
8359
8360 static inline bool
global_not_special_regno_p(int regno)8361 global_not_special_regno_p (int regno)
8362 {
8363 return (global_regs[regno]
8364 /* These registers are special and need to be
8365 restored in any case. */
8366 && !(regno == STACK_POINTER_REGNUM
8367 || regno == RETURN_REGNUM
8368 || regno == BASE_REGNUM
8369 || (flag_pic && regno == (int)PIC_OFFSET_TABLE_REGNUM)));
8370 }
8371
8372 /* Generate insn to save registers FIRST to LAST into
8373 the register save area located at offset OFFSET
8374 relative to register BASE. */
8375
8376 static rtx
save_gprs(rtx base,int offset,int first,int last)8377 save_gprs (rtx base, int offset, int first, int last)
8378 {
8379 rtx addr, insn, note;
8380 int i;
8381
8382 addr = plus_constant (Pmode, base, offset);
8383 addr = gen_rtx_MEM (Pmode, addr);
8384
8385 set_mem_alias_set (addr, get_frame_alias_set ());
8386
8387 /* Special-case single register. */
8388 if (first == last)
8389 {
8390 if (TARGET_64BIT)
8391 insn = gen_movdi (addr, gen_rtx_REG (Pmode, first));
8392 else
8393 insn = gen_movsi (addr, gen_rtx_REG (Pmode, first));
8394
8395 if (!global_not_special_regno_p (first))
8396 RTX_FRAME_RELATED_P (insn) = 1;
8397 return insn;
8398 }
8399
8400
8401 insn = gen_store_multiple (addr,
8402 gen_rtx_REG (Pmode, first),
8403 GEN_INT (last - first + 1));
8404
8405 if (first <= 6 && cfun->stdarg)
8406 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
8407 {
8408 rtx mem = XEXP (XVECEXP (PATTERN (insn), 0, i), 0);
8409
8410 if (first + i <= 6)
8411 set_mem_alias_set (mem, get_varargs_alias_set ());
8412 }
8413
8414 /* We need to set the FRAME_RELATED flag on all SETs
8415 inside the store-multiple pattern.
8416
8417 However, we must not emit DWARF records for registers 2..5
8418 if they are stored for use by variable arguments ...
8419
8420 ??? Unfortunately, it is not enough to simply not the
8421 FRAME_RELATED flags for those SETs, because the first SET
8422 of the PARALLEL is always treated as if it had the flag
8423 set, even if it does not. Therefore we emit a new pattern
8424 without those registers as REG_FRAME_RELATED_EXPR note. */
8425
8426 if (first >= 6 && !global_not_special_regno_p (first))
8427 {
8428 rtx pat = PATTERN (insn);
8429
8430 for (i = 0; i < XVECLEN (pat, 0); i++)
8431 if (GET_CODE (XVECEXP (pat, 0, i)) == SET
8432 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat,
8433 0, i)))))
8434 RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1;
8435
8436 RTX_FRAME_RELATED_P (insn) = 1;
8437 }
8438 else if (last >= 6)
8439 {
8440 int start;
8441
8442 for (start = first >= 6 ? first : 6; start <= last; start++)
8443 if (!global_not_special_regno_p (start))
8444 break;
8445
8446 if (start > last)
8447 return insn;
8448
8449 addr = plus_constant (Pmode, base,
8450 offset + (start - first) * UNITS_PER_LONG);
8451
8452 if (start == last)
8453 {
8454 if (TARGET_64BIT)
8455 note = gen_movdi (gen_rtx_MEM (Pmode, addr),
8456 gen_rtx_REG (Pmode, start));
8457 else
8458 note = gen_movsi (gen_rtx_MEM (Pmode, addr),
8459 gen_rtx_REG (Pmode, start));
8460 note = PATTERN (note);
8461
8462 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
8463 RTX_FRAME_RELATED_P (insn) = 1;
8464
8465 return insn;
8466 }
8467
8468 note = gen_store_multiple (gen_rtx_MEM (Pmode, addr),
8469 gen_rtx_REG (Pmode, start),
8470 GEN_INT (last - start + 1));
8471 note = PATTERN (note);
8472
8473 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
8474
8475 for (i = 0; i < XVECLEN (note, 0); i++)
8476 if (GET_CODE (XVECEXP (note, 0, i)) == SET
8477 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note,
8478 0, i)))))
8479 RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1;
8480
8481 RTX_FRAME_RELATED_P (insn) = 1;
8482 }
8483
8484 return insn;
8485 }
8486
8487 /* Generate insn to restore registers FIRST to LAST from
8488 the register save area located at offset OFFSET
8489 relative to register BASE. */
8490
8491 static rtx
restore_gprs(rtx base,int offset,int first,int last)8492 restore_gprs (rtx base, int offset, int first, int last)
8493 {
8494 rtx addr, insn;
8495
8496 addr = plus_constant (Pmode, base, offset);
8497 addr = gen_rtx_MEM (Pmode, addr);
8498 set_mem_alias_set (addr, get_frame_alias_set ());
8499
8500 /* Special-case single register. */
8501 if (first == last)
8502 {
8503 if (TARGET_64BIT)
8504 insn = gen_movdi (gen_rtx_REG (Pmode, first), addr);
8505 else
8506 insn = gen_movsi (gen_rtx_REG (Pmode, first), addr);
8507
8508 RTX_FRAME_RELATED_P (insn) = 1;
8509 return insn;
8510 }
8511
8512 insn = gen_load_multiple (gen_rtx_REG (Pmode, first),
8513 addr,
8514 GEN_INT (last - first + 1));
8515 RTX_FRAME_RELATED_P (insn) = 1;
8516 return insn;
8517 }
8518
8519 /* Return insn sequence to load the GOT register. */
8520
8521 static GTY(()) rtx got_symbol;
8522 rtx
s390_load_got(void)8523 s390_load_got (void)
8524 {
8525 rtx insns;
8526
8527 /* We cannot use pic_offset_table_rtx here since we use this
8528 function also for non-pic if __tls_get_offset is called and in
8529 that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
8530 aren't usable. */
8531 rtx got_rtx = gen_rtx_REG (Pmode, 12);
8532
8533 if (!got_symbol)
8534 {
8535 got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
8536 SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL;
8537 }
8538
8539 start_sequence ();
8540
8541 if (TARGET_CPU_ZARCH)
8542 {
8543 emit_move_insn (got_rtx, got_symbol);
8544 }
8545 else
8546 {
8547 rtx offset;
8548
8549 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got_symbol),
8550 UNSPEC_LTREL_OFFSET);
8551 offset = gen_rtx_CONST (Pmode, offset);
8552 offset = force_const_mem (Pmode, offset);
8553
8554 emit_move_insn (got_rtx, offset);
8555
8556 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (offset, 0)),
8557 UNSPEC_LTREL_BASE);
8558 offset = gen_rtx_PLUS (Pmode, got_rtx, offset);
8559
8560 emit_move_insn (got_rtx, offset);
8561 }
8562
8563 insns = get_insns ();
8564 end_sequence ();
8565 return insns;
8566 }
8567
8568 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
8569 and the change to the stack pointer. */
8570
8571 static void
s390_emit_stack_tie(void)8572 s390_emit_stack_tie (void)
8573 {
8574 rtx mem = gen_frame_mem (BLKmode,
8575 gen_rtx_REG (Pmode, STACK_POINTER_REGNUM));
8576
8577 emit_insn (gen_stack_tie (mem));
8578 }
8579
8580 /* Copy GPRS into FPR save slots. */
8581
8582 static void
s390_save_gprs_to_fprs(void)8583 s390_save_gprs_to_fprs (void)
8584 {
8585 int i;
8586
8587 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
8588 return;
8589
8590 for (i = 6; i < 16; i++)
8591 {
8592 if (FP_REGNO_P (cfun_gpr_save_slot (i)))
8593 {
8594 rtx insn =
8595 emit_move_insn (gen_rtx_REG (DImode, cfun_gpr_save_slot (i)),
8596 gen_rtx_REG (DImode, i));
8597 RTX_FRAME_RELATED_P (insn) = 1;
8598 }
8599 }
8600 }
8601
8602 /* Restore GPRs from FPR save slots. */
8603
8604 static void
s390_restore_gprs_from_fprs(void)8605 s390_restore_gprs_from_fprs (void)
8606 {
8607 int i;
8608
8609 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
8610 return;
8611
8612 for (i = 6; i < 16; i++)
8613 {
8614 if (FP_REGNO_P (cfun_gpr_save_slot (i)))
8615 {
8616 rtx insn =
8617 emit_move_insn (gen_rtx_REG (DImode, i),
8618 gen_rtx_REG (DImode, cfun_gpr_save_slot (i)));
8619 df_set_regs_ever_live (i, true);
8620 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, i));
8621 if (i == STACK_POINTER_REGNUM)
8622 add_reg_note (insn, REG_CFA_DEF_CFA,
8623 plus_constant (Pmode, stack_pointer_rtx,
8624 STACK_POINTER_OFFSET));
8625 RTX_FRAME_RELATED_P (insn) = 1;
8626 }
8627 }
8628 }
8629
8630
8631 /* A pass run immediately before shrink-wrapping and prologue and epilogue
8632 generation. */
8633
8634 static unsigned int
s390_early_mach(void)8635 s390_early_mach (void)
8636 {
8637 rtx insn;
8638
8639 /* Try to get rid of the FPR clobbers. */
8640 s390_optimize_nonescaping_tx ();
8641
8642 /* Re-compute register info. */
8643 s390_register_info ();
8644
8645 /* If we're using a base register, ensure that it is always valid for
8646 the first non-prologue instruction. */
8647 if (cfun->machine->base_reg)
8648 emit_insn_at_entry (gen_main_pool (cfun->machine->base_reg));
8649
8650 /* Annotate all constant pool references to let the scheduler know
8651 they implicitly use the base register. */
8652 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8653 if (INSN_P (insn))
8654 {
8655 annotate_constant_pool_refs (&PATTERN (insn));
8656 df_insn_rescan (insn);
8657 }
8658 return 0;
8659 }
8660
8661 namespace {
8662
8663 const pass_data pass_data_s390_early_mach =
8664 {
8665 RTL_PASS, /* type */
8666 "early_mach", /* name */
8667 OPTGROUP_NONE, /* optinfo_flags */
8668 false, /* has_gate */
8669 true, /* has_execute */
8670 TV_MACH_DEP, /* tv_id */
8671 0, /* properties_required */
8672 0, /* properties_provided */
8673 0, /* properties_destroyed */
8674 0, /* todo_flags_start */
8675 ( TODO_df_verify | TODO_df_finish
8676 | TODO_verify_rtl_sharing ), /* todo_flags_finish */
8677 };
8678
8679 class pass_s390_early_mach : public rtl_opt_pass
8680 {
8681 public:
pass_s390_early_mach(gcc::context * ctxt)8682 pass_s390_early_mach (gcc::context *ctxt)
8683 : rtl_opt_pass (pass_data_s390_early_mach, ctxt)
8684 {}
8685
8686 /* opt_pass methods: */
execute()8687 unsigned int execute () { return s390_early_mach (); }
8688
8689 }; // class pass_s390_early_mach
8690
8691 } // anon namespace
8692
8693 /* Expand the prologue into a bunch of separate insns. */
8694
8695 void
s390_emit_prologue(void)8696 s390_emit_prologue (void)
8697 {
8698 rtx insn, addr;
8699 rtx temp_reg;
8700 int i;
8701 int offset;
8702 int next_fpr = 0;
8703
8704 /* Choose best register to use for temp use within prologue.
8705 See below for why TPF must use the register 1. */
8706
8707 if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM)
8708 && !crtl->is_leaf
8709 && !TARGET_TPF_PROFILING)
8710 temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
8711 else
8712 temp_reg = gen_rtx_REG (Pmode, 1);
8713
8714 s390_save_gprs_to_fprs ();
8715
8716 /* Save call saved gprs. */
8717 if (cfun_frame_layout.first_save_gpr != -1)
8718 {
8719 insn = save_gprs (stack_pointer_rtx,
8720 cfun_frame_layout.gprs_offset +
8721 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
8722 - cfun_frame_layout.first_save_gpr_slot),
8723 cfun_frame_layout.first_save_gpr,
8724 cfun_frame_layout.last_save_gpr);
8725 emit_insn (insn);
8726 }
8727
8728 /* Dummy insn to mark literal pool slot. */
8729
8730 if (cfun->machine->base_reg)
8731 emit_insn (gen_main_pool (cfun->machine->base_reg));
8732
8733 offset = cfun_frame_layout.f0_offset;
8734
8735 /* Save f0 and f2. */
8736 for (i = FPR0_REGNUM; i <= FPR0_REGNUM + 1; i++)
8737 {
8738 if (cfun_fpr_save_p (i))
8739 {
8740 save_fpr (stack_pointer_rtx, offset, i);
8741 offset += 8;
8742 }
8743 else if (!TARGET_PACKED_STACK || cfun->stdarg)
8744 offset += 8;
8745 }
8746
8747 /* Save f4 and f6. */
8748 offset = cfun_frame_layout.f4_offset;
8749 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
8750 {
8751 if (cfun_fpr_save_p (i))
8752 {
8753 insn = save_fpr (stack_pointer_rtx, offset, i);
8754 offset += 8;
8755
8756 /* If f4 and f6 are call clobbered they are saved due to
8757 stdargs and therefore are not frame related. */
8758 if (!call_really_used_regs[i])
8759 RTX_FRAME_RELATED_P (insn) = 1;
8760 }
8761 else if (!TARGET_PACKED_STACK || call_really_used_regs[i])
8762 offset += 8;
8763 }
8764
8765 if (TARGET_PACKED_STACK
8766 && cfun_save_high_fprs_p
8767 && cfun_frame_layout.f8_offset + cfun_frame_layout.high_fprs * 8 > 0)
8768 {
8769 offset = (cfun_frame_layout.f8_offset
8770 + (cfun_frame_layout.high_fprs - 1) * 8);
8771
8772 for (i = FPR15_REGNUM; i >= FPR8_REGNUM && offset >= 0; i--)
8773 if (cfun_fpr_save_p (i))
8774 {
8775 insn = save_fpr (stack_pointer_rtx, offset, i);
8776
8777 RTX_FRAME_RELATED_P (insn) = 1;
8778 offset -= 8;
8779 }
8780 if (offset >= cfun_frame_layout.f8_offset)
8781 next_fpr = i;
8782 }
8783
8784 if (!TARGET_PACKED_STACK)
8785 next_fpr = cfun_save_high_fprs_p ? FPR15_REGNUM : 0;
8786
8787 if (flag_stack_usage_info)
8788 current_function_static_stack_size = cfun_frame_layout.frame_size;
8789
8790 /* Decrement stack pointer. */
8791
8792 if (cfun_frame_layout.frame_size > 0)
8793 {
8794 rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
8795 rtx real_frame_off;
8796
8797 if (s390_stack_size)
8798 {
8799 HOST_WIDE_INT stack_guard;
8800
8801 if (s390_stack_guard)
8802 stack_guard = s390_stack_guard;
8803 else
8804 {
8805 /* If no value for stack guard is provided the smallest power of 2
8806 larger than the current frame size is chosen. */
8807 stack_guard = 1;
8808 while (stack_guard < cfun_frame_layout.frame_size)
8809 stack_guard <<= 1;
8810 }
8811
8812 if (cfun_frame_layout.frame_size >= s390_stack_size)
8813 {
8814 warning (0, "frame size of function %qs is %wd"
8815 " bytes exceeding user provided stack limit of "
8816 "%d bytes. "
8817 "An unconditional trap is added.",
8818 current_function_name(), cfun_frame_layout.frame_size,
8819 s390_stack_size);
8820 emit_insn (gen_trap ());
8821 }
8822 else
8823 {
8824 /* stack_guard has to be smaller than s390_stack_size.
8825 Otherwise we would emit an AND with zero which would
8826 not match the test under mask pattern. */
8827 if (stack_guard >= s390_stack_size)
8828 {
8829 warning (0, "frame size of function %qs is %wd"
8830 " bytes which is more than half the stack size. "
8831 "The dynamic check would not be reliable. "
8832 "No check emitted for this function.",
8833 current_function_name(),
8834 cfun_frame_layout.frame_size);
8835 }
8836 else
8837 {
8838 HOST_WIDE_INT stack_check_mask = ((s390_stack_size - 1)
8839 & ~(stack_guard - 1));
8840
8841 rtx t = gen_rtx_AND (Pmode, stack_pointer_rtx,
8842 GEN_INT (stack_check_mask));
8843 if (TARGET_64BIT)
8844 emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode,
8845 t, const0_rtx),
8846 t, const0_rtx, const0_rtx));
8847 else
8848 emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode,
8849 t, const0_rtx),
8850 t, const0_rtx, const0_rtx));
8851 }
8852 }
8853 }
8854
8855 if (s390_warn_framesize > 0
8856 && cfun_frame_layout.frame_size >= s390_warn_framesize)
8857 warning (0, "frame size of %qs is %wd bytes",
8858 current_function_name (), cfun_frame_layout.frame_size);
8859
8860 if (s390_warn_dynamicstack_p && cfun->calls_alloca)
8861 warning (0, "%qs uses dynamic stack allocation", current_function_name ());
8862
8863 /* Save incoming stack pointer into temp reg. */
8864 if (TARGET_BACKCHAIN || next_fpr)
8865 insn = emit_insn (gen_move_insn (temp_reg, stack_pointer_rtx));
8866
8867 /* Subtract frame size from stack pointer. */
8868
8869 if (DISP_IN_RANGE (INTVAL (frame_off)))
8870 {
8871 insn = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8872 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8873 frame_off));
8874 insn = emit_insn (insn);
8875 }
8876 else
8877 {
8878 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
8879 frame_off = force_const_mem (Pmode, frame_off);
8880
8881 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, frame_off));
8882 annotate_constant_pool_refs (&PATTERN (insn));
8883 }
8884
8885 RTX_FRAME_RELATED_P (insn) = 1;
8886 real_frame_off = GEN_INT (-cfun_frame_layout.frame_size);
8887 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8888 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8889 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8890 real_frame_off)));
8891
8892 /* Set backchain. */
8893
8894 if (TARGET_BACKCHAIN)
8895 {
8896 if (cfun_frame_layout.backchain_offset)
8897 addr = gen_rtx_MEM (Pmode,
8898 plus_constant (Pmode, stack_pointer_rtx,
8899 cfun_frame_layout.backchain_offset));
8900 else
8901 addr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8902 set_mem_alias_set (addr, get_frame_alias_set ());
8903 insn = emit_insn (gen_move_insn (addr, temp_reg));
8904 }
8905
8906 /* If we support non-call exceptions (e.g. for Java),
8907 we need to make sure the backchain pointer is set up
8908 before any possibly trapping memory access. */
8909 if (TARGET_BACKCHAIN && cfun->can_throw_non_call_exceptions)
8910 {
8911 addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
8912 emit_clobber (addr);
8913 }
8914 }
8915
8916 /* Save fprs 8 - 15 (64 bit ABI). */
8917
8918 if (cfun_save_high_fprs_p && next_fpr)
8919 {
8920 /* If the stack might be accessed through a different register
8921 we have to make sure that the stack pointer decrement is not
8922 moved below the use of the stack slots. */
8923 s390_emit_stack_tie ();
8924
8925 insn = emit_insn (gen_add2_insn (temp_reg,
8926 GEN_INT (cfun_frame_layout.f8_offset)));
8927
8928 offset = 0;
8929
8930 for (i = FPR8_REGNUM; i <= next_fpr; i++)
8931 if (cfun_fpr_save_p (i))
8932 {
8933 rtx addr = plus_constant (Pmode, stack_pointer_rtx,
8934 cfun_frame_layout.frame_size
8935 + cfun_frame_layout.f8_offset
8936 + offset);
8937
8938 insn = save_fpr (temp_reg, offset, i);
8939 offset += 8;
8940 RTX_FRAME_RELATED_P (insn) = 1;
8941 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8942 gen_rtx_SET (VOIDmode,
8943 gen_rtx_MEM (DFmode, addr),
8944 gen_rtx_REG (DFmode, i)));
8945 }
8946 }
8947
8948 /* Set frame pointer, if needed. */
8949
8950 if (frame_pointer_needed)
8951 {
8952 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8953 RTX_FRAME_RELATED_P (insn) = 1;
8954 }
8955
8956 /* Set up got pointer, if needed. */
8957
8958 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
8959 {
8960 rtx insns = s390_load_got ();
8961
8962 for (insn = insns; insn; insn = NEXT_INSN (insn))
8963 annotate_constant_pool_refs (&PATTERN (insn));
8964
8965 emit_insn (insns);
8966 }
8967
8968 if (TARGET_TPF_PROFILING)
8969 {
8970 /* Generate a BAS instruction to serve as a function
8971 entry intercept to facilitate the use of tracing
8972 algorithms located at the branch target. */
8973 emit_insn (gen_prologue_tpf ());
8974
8975 /* Emit a blockage here so that all code
8976 lies between the profiling mechanisms. */
8977 emit_insn (gen_blockage ());
8978 }
8979 }
8980
8981 /* Expand the epilogue into a bunch of separate insns. */
8982
8983 void
s390_emit_epilogue(bool sibcall)8984 s390_emit_epilogue (bool sibcall)
8985 {
8986 rtx frame_pointer, return_reg, cfa_restores = NULL_RTX;
8987 int area_bottom, area_top, offset = 0;
8988 int next_offset;
8989 rtvec p;
8990 int i;
8991
8992 if (TARGET_TPF_PROFILING)
8993 {
8994
8995 /* Generate a BAS instruction to serve as a function
8996 entry intercept to facilitate the use of tracing
8997 algorithms located at the branch target. */
8998
8999 /* Emit a blockage here so that all code
9000 lies between the profiling mechanisms. */
9001 emit_insn (gen_blockage ());
9002
9003 emit_insn (gen_epilogue_tpf ());
9004 }
9005
9006 /* Check whether to use frame or stack pointer for restore. */
9007
9008 frame_pointer = (frame_pointer_needed
9009 ? hard_frame_pointer_rtx : stack_pointer_rtx);
9010
9011 s390_frame_area (&area_bottom, &area_top);
9012
9013 /* Check whether we can access the register save area.
9014 If not, increment the frame pointer as required. */
9015
9016 if (area_top <= area_bottom)
9017 {
9018 /* Nothing to restore. */
9019 }
9020 else if (DISP_IN_RANGE (cfun_frame_layout.frame_size + area_bottom)
9021 && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1))
9022 {
9023 /* Area is in range. */
9024 offset = cfun_frame_layout.frame_size;
9025 }
9026 else
9027 {
9028 rtx insn, frame_off, cfa;
9029
9030 offset = area_bottom < 0 ? -area_bottom : 0;
9031 frame_off = GEN_INT (cfun_frame_layout.frame_size - offset);
9032
9033 cfa = gen_rtx_SET (VOIDmode, frame_pointer,
9034 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
9035 if (DISP_IN_RANGE (INTVAL (frame_off)))
9036 {
9037 insn = gen_rtx_SET (VOIDmode, frame_pointer,
9038 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
9039 insn = emit_insn (insn);
9040 }
9041 else
9042 {
9043 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
9044 frame_off = force_const_mem (Pmode, frame_off);
9045
9046 insn = emit_insn (gen_add2_insn (frame_pointer, frame_off));
9047 annotate_constant_pool_refs (&PATTERN (insn));
9048 }
9049 add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa);
9050 RTX_FRAME_RELATED_P (insn) = 1;
9051 }
9052
9053 /* Restore call saved fprs. */
9054
9055 if (TARGET_64BIT)
9056 {
9057 if (cfun_save_high_fprs_p)
9058 {
9059 next_offset = cfun_frame_layout.f8_offset;
9060 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
9061 {
9062 if (cfun_fpr_save_p (i))
9063 {
9064 restore_fpr (frame_pointer,
9065 offset + next_offset, i);
9066 cfa_restores
9067 = alloc_reg_note (REG_CFA_RESTORE,
9068 gen_rtx_REG (DFmode, i), cfa_restores);
9069 next_offset += 8;
9070 }
9071 }
9072 }
9073
9074 }
9075 else
9076 {
9077 next_offset = cfun_frame_layout.f4_offset;
9078 /* f4, f6 */
9079 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
9080 {
9081 if (cfun_fpr_save_p (i))
9082 {
9083 restore_fpr (frame_pointer,
9084 offset + next_offset, i);
9085 cfa_restores
9086 = alloc_reg_note (REG_CFA_RESTORE,
9087 gen_rtx_REG (DFmode, i), cfa_restores);
9088 next_offset += 8;
9089 }
9090 else if (!TARGET_PACKED_STACK)
9091 next_offset += 8;
9092 }
9093
9094 }
9095
9096 /* Return register. */
9097
9098 return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
9099
9100 /* Restore call saved gprs. */
9101
9102 if (cfun_frame_layout.first_restore_gpr != -1)
9103 {
9104 rtx insn, addr;
9105 int i;
9106
9107 /* Check for global register and save them
9108 to stack location from where they get restored. */
9109
9110 for (i = cfun_frame_layout.first_restore_gpr;
9111 i <= cfun_frame_layout.last_restore_gpr;
9112 i++)
9113 {
9114 if (global_not_special_regno_p (i))
9115 {
9116 addr = plus_constant (Pmode, frame_pointer,
9117 offset + cfun_frame_layout.gprs_offset
9118 + (i - cfun_frame_layout.first_save_gpr_slot)
9119 * UNITS_PER_LONG);
9120 addr = gen_rtx_MEM (Pmode, addr);
9121 set_mem_alias_set (addr, get_frame_alias_set ());
9122 emit_move_insn (addr, gen_rtx_REG (Pmode, i));
9123 }
9124 else
9125 cfa_restores
9126 = alloc_reg_note (REG_CFA_RESTORE,
9127 gen_rtx_REG (Pmode, i), cfa_restores);
9128 }
9129
9130 if (! sibcall)
9131 {
9132 /* Fetch return address from stack before load multiple,
9133 this will do good for scheduling.
9134
9135 Only do this if we already decided that r14 needs to be
9136 saved to a stack slot. (And not just because r14 happens to
9137 be in between two GPRs which need saving.) Otherwise it
9138 would be difficult to take that decision back in
9139 s390_optimize_prologue. */
9140 if (cfun_gpr_save_slot (RETURN_REGNUM) == -1)
9141 {
9142 int return_regnum = find_unused_clobbered_reg();
9143 if (!return_regnum)
9144 return_regnum = 4;
9145 return_reg = gen_rtx_REG (Pmode, return_regnum);
9146
9147 addr = plus_constant (Pmode, frame_pointer,
9148 offset + cfun_frame_layout.gprs_offset
9149 + (RETURN_REGNUM
9150 - cfun_frame_layout.first_save_gpr_slot)
9151 * UNITS_PER_LONG);
9152 addr = gen_rtx_MEM (Pmode, addr);
9153 set_mem_alias_set (addr, get_frame_alias_set ());
9154 emit_move_insn (return_reg, addr);
9155
9156 /* Once we did that optimization we have to make sure
9157 s390_optimize_prologue does not try to remove the
9158 store of r14 since we will not be able to find the
9159 load issued here. */
9160 cfun_frame_layout.save_return_addr_p = true;
9161 }
9162 }
9163
9164 insn = restore_gprs (frame_pointer,
9165 offset + cfun_frame_layout.gprs_offset
9166 + (cfun_frame_layout.first_restore_gpr
9167 - cfun_frame_layout.first_save_gpr_slot)
9168 * UNITS_PER_LONG,
9169 cfun_frame_layout.first_restore_gpr,
9170 cfun_frame_layout.last_restore_gpr);
9171 insn = emit_insn (insn);
9172 REG_NOTES (insn) = cfa_restores;
9173 add_reg_note (insn, REG_CFA_DEF_CFA,
9174 plus_constant (Pmode, stack_pointer_rtx,
9175 STACK_POINTER_OFFSET));
9176 RTX_FRAME_RELATED_P (insn) = 1;
9177 }
9178
9179 s390_restore_gprs_from_fprs ();
9180
9181 if (! sibcall)
9182 {
9183
9184 /* Return to caller. */
9185
9186 p = rtvec_alloc (2);
9187
9188 RTVEC_ELT (p, 0) = ret_rtx;
9189 RTVEC_ELT (p, 1) = gen_rtx_USE (VOIDmode, return_reg);
9190 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
9191 }
9192 }
9193
9194 /* Implement TARGET_SET_UP_BY_PROLOGUE. */
9195
9196 static void
s300_set_up_by_prologue(hard_reg_set_container * regs)9197 s300_set_up_by_prologue (hard_reg_set_container *regs)
9198 {
9199 if (cfun->machine->base_reg
9200 && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
9201 SET_HARD_REG_BIT (regs->set, REGNO (cfun->machine->base_reg));
9202 }
9203
9204 /* Return true if the function can use simple_return to return outside
9205 of a shrink-wrapped region. At present shrink-wrapping is supported
9206 in all cases. */
9207
9208 bool
s390_can_use_simple_return_insn(void)9209 s390_can_use_simple_return_insn (void)
9210 {
9211 return true;
9212 }
9213
9214 /* Return true if the epilogue is guaranteed to contain only a return
9215 instruction and if a direct return can therefore be used instead.
9216 One of the main advantages of using direct return instructions
9217 is that we can then use conditional returns. */
9218
9219 bool
s390_can_use_return_insn(void)9220 s390_can_use_return_insn (void)
9221 {
9222 int i;
9223
9224 if (!reload_completed)
9225 return false;
9226
9227 if (crtl->profile)
9228 return false;
9229
9230 if (TARGET_TPF_PROFILING)
9231 return false;
9232
9233 for (i = 0; i < 16; i++)
9234 if (cfun_gpr_save_slot (i))
9235 return false;
9236
9237 /* For 31 bit this is not covered by the frame_size check below
9238 since f4, f6 are saved in the register save area without needing
9239 additional stack space. */
9240 if (!TARGET_64BIT
9241 && (cfun_fpr_save_p (FPR4_REGNUM) || cfun_fpr_save_p (FPR6_REGNUM)))
9242 return false;
9243
9244 if (cfun->machine->base_reg
9245 && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
9246 return false;
9247
9248 return cfun_frame_layout.frame_size == 0;
9249 }
9250
9251 /* Return the size in bytes of a function argument of
9252 type TYPE and/or mode MODE. At least one of TYPE or
9253 MODE must be specified. */
9254
9255 static int
s390_function_arg_size(enum machine_mode mode,const_tree type)9256 s390_function_arg_size (enum machine_mode mode, const_tree type)
9257 {
9258 if (type)
9259 return int_size_in_bytes (type);
9260
9261 /* No type info available for some library calls ... */
9262 if (mode != BLKmode)
9263 return GET_MODE_SIZE (mode);
9264
9265 /* If we have neither type nor mode, abort */
9266 gcc_unreachable ();
9267 }
9268
9269 /* Return true if a function argument of type TYPE and mode MODE
9270 is to be passed in a floating-point register, if available. */
9271
9272 static bool
s390_function_arg_float(enum machine_mode mode,const_tree type)9273 s390_function_arg_float (enum machine_mode mode, const_tree type)
9274 {
9275 int size = s390_function_arg_size (mode, type);
9276 if (size > 8)
9277 return false;
9278
9279 /* Soft-float changes the ABI: no floating-point registers are used. */
9280 if (TARGET_SOFT_FLOAT)
9281 return false;
9282
9283 /* No type info available for some library calls ... */
9284 if (!type)
9285 return mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode;
9286
9287 /* The ABI says that record types with a single member are treated
9288 just like that member would be. */
9289 while (TREE_CODE (type) == RECORD_TYPE)
9290 {
9291 tree field, single = NULL_TREE;
9292
9293 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
9294 {
9295 if (TREE_CODE (field) != FIELD_DECL)
9296 continue;
9297
9298 if (single == NULL_TREE)
9299 single = TREE_TYPE (field);
9300 else
9301 return false;
9302 }
9303
9304 if (single == NULL_TREE)
9305 return false;
9306 else
9307 type = single;
9308 }
9309
9310 return TREE_CODE (type) == REAL_TYPE;
9311 }
9312
9313 /* Return true if a function argument of type TYPE and mode MODE
9314 is to be passed in an integer register, or a pair of integer
9315 registers, if available. */
9316
9317 static bool
s390_function_arg_integer(enum machine_mode mode,const_tree type)9318 s390_function_arg_integer (enum machine_mode mode, const_tree type)
9319 {
9320 int size = s390_function_arg_size (mode, type);
9321 if (size > 8)
9322 return false;
9323
9324 /* No type info available for some library calls ... */
9325 if (!type)
9326 return GET_MODE_CLASS (mode) == MODE_INT
9327 || (TARGET_SOFT_FLOAT && SCALAR_FLOAT_MODE_P (mode));
9328
9329 /* We accept small integral (and similar) types. */
9330 if (INTEGRAL_TYPE_P (type)
9331 || POINTER_TYPE_P (type)
9332 || TREE_CODE (type) == NULLPTR_TYPE
9333 || TREE_CODE (type) == OFFSET_TYPE
9334 || (TARGET_SOFT_FLOAT && TREE_CODE (type) == REAL_TYPE))
9335 return true;
9336
9337 /* We also accept structs of size 1, 2, 4, 8 that are not
9338 passed in floating-point registers. */
9339 if (AGGREGATE_TYPE_P (type)
9340 && exact_log2 (size) >= 0
9341 && !s390_function_arg_float (mode, type))
9342 return true;
9343
9344 return false;
9345 }
9346
9347 /* Return 1 if a function argument of type TYPE and mode MODE
9348 is to be passed by reference. The ABI specifies that only
9349 structures of size 1, 2, 4, or 8 bytes are passed by value,
9350 all other structures (and complex numbers) are passed by
9351 reference. */
9352
9353 static bool
s390_pass_by_reference(cumulative_args_t ca ATTRIBUTE_UNUSED,enum machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)9354 s390_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
9355 enum machine_mode mode, const_tree type,
9356 bool named ATTRIBUTE_UNUSED)
9357 {
9358 int size = s390_function_arg_size (mode, type);
9359 if (size > 8)
9360 return true;
9361
9362 if (type)
9363 {
9364 if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0)
9365 return 1;
9366
9367 if (TREE_CODE (type) == COMPLEX_TYPE
9368 || TREE_CODE (type) == VECTOR_TYPE)
9369 return 1;
9370 }
9371
9372 return 0;
9373 }
9374
9375 /* Update the data in CUM to advance over an argument of mode MODE and
9376 data type TYPE. (TYPE is null for libcalls where that information
9377 may not be available.). The boolean NAMED specifies whether the
9378 argument is a named argument (as opposed to an unnamed argument
9379 matching an ellipsis). */
9380
9381 static void
s390_function_arg_advance(cumulative_args_t cum_v,enum machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)9382 s390_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
9383 const_tree type, bool named ATTRIBUTE_UNUSED)
9384 {
9385 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9386
9387 if (s390_function_arg_float (mode, type))
9388 {
9389 cum->fprs += 1;
9390 }
9391 else if (s390_function_arg_integer (mode, type))
9392 {
9393 int size = s390_function_arg_size (mode, type);
9394 cum->gprs += ((size + UNITS_PER_LONG - 1) / UNITS_PER_LONG);
9395 }
9396 else
9397 gcc_unreachable ();
9398 }
9399
9400 /* Define where to put the arguments to a function.
9401 Value is zero to push the argument on the stack,
9402 or a hard register in which to store the argument.
9403
9404 MODE is the argument's machine mode.
9405 TYPE is the data type of the argument (as a tree).
9406 This is null for libcalls where that information may
9407 not be available.
9408 CUM is a variable of type CUMULATIVE_ARGS which gives info about
9409 the preceding args and about the function being called.
9410 NAMED is nonzero if this argument is a named parameter
9411 (otherwise it is an extra parameter matching an ellipsis).
9412
9413 On S/390, we use general purpose registers 2 through 6 to
9414 pass integer, pointer, and certain structure arguments, and
9415 floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
9416 to pass floating point arguments. All remaining arguments
9417 are pushed to the stack. */
9418
9419 static rtx
s390_function_arg(cumulative_args_t cum_v,enum machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)9420 s390_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
9421 const_tree type, bool named ATTRIBUTE_UNUSED)
9422 {
9423 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9424
9425 if (s390_function_arg_float (mode, type))
9426 {
9427 if (cum->fprs + 1 > FP_ARG_NUM_REG)
9428 return 0;
9429 else
9430 return gen_rtx_REG (mode, cum->fprs + 16);
9431 }
9432 else if (s390_function_arg_integer (mode, type))
9433 {
9434 int size = s390_function_arg_size (mode, type);
9435 int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
9436
9437 if (cum->gprs + n_gprs > GP_ARG_NUM_REG)
9438 return 0;
9439 else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG)
9440 return gen_rtx_REG (mode, cum->gprs + 2);
9441 else if (n_gprs == 2)
9442 {
9443 rtvec p = rtvec_alloc (2);
9444
9445 RTVEC_ELT (p, 0)
9446 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 2),
9447 const0_rtx);
9448 RTVEC_ELT (p, 1)
9449 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 3),
9450 GEN_INT (4));
9451
9452 return gen_rtx_PARALLEL (mode, p);
9453 }
9454 }
9455
9456 /* After the real arguments, expand_call calls us once again
9457 with a void_type_node type. Whatever we return here is
9458 passed as operand 2 to the call expanders.
9459
9460 We don't need this feature ... */
9461 else if (type == void_type_node)
9462 return const0_rtx;
9463
9464 gcc_unreachable ();
9465 }
9466
9467 /* Return true if return values of type TYPE should be returned
9468 in a memory buffer whose address is passed by the caller as
9469 hidden first argument. */
9470
9471 static bool
s390_return_in_memory(const_tree type,const_tree fundecl ATTRIBUTE_UNUSED)9472 s390_return_in_memory (const_tree type, const_tree fundecl ATTRIBUTE_UNUSED)
9473 {
9474 /* We accept small integral (and similar) types. */
9475 if (INTEGRAL_TYPE_P (type)
9476 || POINTER_TYPE_P (type)
9477 || TREE_CODE (type) == OFFSET_TYPE
9478 || TREE_CODE (type) == REAL_TYPE)
9479 return int_size_in_bytes (type) > 8;
9480
9481 /* Aggregates and similar constructs are always returned
9482 in memory. */
9483 if (AGGREGATE_TYPE_P (type)
9484 || TREE_CODE (type) == COMPLEX_TYPE
9485 || TREE_CODE (type) == VECTOR_TYPE)
9486 return true;
9487
9488 /* ??? We get called on all sorts of random stuff from
9489 aggregate_value_p. We can't abort, but it's not clear
9490 what's safe to return. Pretend it's a struct I guess. */
9491 return true;
9492 }
9493
9494 /* Function arguments and return values are promoted to word size. */
9495
9496 static enum machine_mode
s390_promote_function_mode(const_tree type,enum machine_mode mode,int * punsignedp,const_tree fntype ATTRIBUTE_UNUSED,int for_return ATTRIBUTE_UNUSED)9497 s390_promote_function_mode (const_tree type, enum machine_mode mode,
9498 int *punsignedp,
9499 const_tree fntype ATTRIBUTE_UNUSED,
9500 int for_return ATTRIBUTE_UNUSED)
9501 {
9502 if (INTEGRAL_MODE_P (mode)
9503 && GET_MODE_SIZE (mode) < UNITS_PER_LONG)
9504 {
9505 if (type != NULL_TREE && POINTER_TYPE_P (type))
9506 *punsignedp = POINTERS_EXTEND_UNSIGNED;
9507 return Pmode;
9508 }
9509
9510 return mode;
9511 }
9512
9513 /* Define where to return a (scalar) value of type RET_TYPE.
9514 If RET_TYPE is null, define where to return a (scalar)
9515 value of mode MODE from a libcall. */
9516
9517 static rtx
s390_function_and_libcall_value(enum machine_mode mode,const_tree ret_type,const_tree fntype_or_decl,bool outgoing ATTRIBUTE_UNUSED)9518 s390_function_and_libcall_value (enum machine_mode mode,
9519 const_tree ret_type,
9520 const_tree fntype_or_decl,
9521 bool outgoing ATTRIBUTE_UNUSED)
9522 {
9523 /* For normal functions perform the promotion as
9524 promote_function_mode would do. */
9525 if (ret_type)
9526 {
9527 int unsignedp = TYPE_UNSIGNED (ret_type);
9528 mode = promote_function_mode (ret_type, mode, &unsignedp,
9529 fntype_or_decl, 1);
9530 }
9531
9532 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT || SCALAR_FLOAT_MODE_P (mode));
9533 gcc_assert (GET_MODE_SIZE (mode) <= 8);
9534
9535 if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
9536 return gen_rtx_REG (mode, 16);
9537 else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG
9538 || UNITS_PER_LONG == UNITS_PER_WORD)
9539 return gen_rtx_REG (mode, 2);
9540 else if (GET_MODE_SIZE (mode) == 2 * UNITS_PER_LONG)
9541 {
9542 /* This case is triggered when returning a 64 bit value with
9543 -m31 -mzarch. Although the value would fit into a single
9544 register it has to be forced into a 32 bit register pair in
9545 order to match the ABI. */
9546 rtvec p = rtvec_alloc (2);
9547
9548 RTVEC_ELT (p, 0)
9549 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 2), const0_rtx);
9550 RTVEC_ELT (p, 1)
9551 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 3), GEN_INT (4));
9552
9553 return gen_rtx_PARALLEL (mode, p);
9554 }
9555
9556 gcc_unreachable ();
9557 }
9558
9559 /* Define where to return a scalar return value of type RET_TYPE. */
9560
9561 static rtx
s390_function_value(const_tree ret_type,const_tree fn_decl_or_type,bool outgoing)9562 s390_function_value (const_tree ret_type, const_tree fn_decl_or_type,
9563 bool outgoing)
9564 {
9565 return s390_function_and_libcall_value (TYPE_MODE (ret_type), ret_type,
9566 fn_decl_or_type, outgoing);
9567 }
9568
9569 /* Define where to return a scalar libcall return value of mode
9570 MODE. */
9571
9572 static rtx
s390_libcall_value(enum machine_mode mode,const_rtx fun ATTRIBUTE_UNUSED)9573 s390_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
9574 {
9575 return s390_function_and_libcall_value (mode, NULL_TREE,
9576 NULL_TREE, true);
9577 }
9578
9579
9580 /* Create and return the va_list datatype.
9581
9582 On S/390, va_list is an array type equivalent to
9583
9584 typedef struct __va_list_tag
9585 {
9586 long __gpr;
9587 long __fpr;
9588 void *__overflow_arg_area;
9589 void *__reg_save_area;
9590 } va_list[1];
9591
9592 where __gpr and __fpr hold the number of general purpose
9593 or floating point arguments used up to now, respectively,
9594 __overflow_arg_area points to the stack location of the
9595 next argument passed on the stack, and __reg_save_area
9596 always points to the start of the register area in the
9597 call frame of the current function. The function prologue
9598 saves all registers used for argument passing into this
9599 area if the function uses variable arguments. */
9600
9601 static tree
s390_build_builtin_va_list(void)9602 s390_build_builtin_va_list (void)
9603 {
9604 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
9605
9606 record = lang_hooks.types.make_type (RECORD_TYPE);
9607
9608 type_decl =
9609 build_decl (BUILTINS_LOCATION,
9610 TYPE_DECL, get_identifier ("__va_list_tag"), record);
9611
9612 f_gpr = build_decl (BUILTINS_LOCATION,
9613 FIELD_DECL, get_identifier ("__gpr"),
9614 long_integer_type_node);
9615 f_fpr = build_decl (BUILTINS_LOCATION,
9616 FIELD_DECL, get_identifier ("__fpr"),
9617 long_integer_type_node);
9618 f_ovf = build_decl (BUILTINS_LOCATION,
9619 FIELD_DECL, get_identifier ("__overflow_arg_area"),
9620 ptr_type_node);
9621 f_sav = build_decl (BUILTINS_LOCATION,
9622 FIELD_DECL, get_identifier ("__reg_save_area"),
9623 ptr_type_node);
9624
9625 va_list_gpr_counter_field = f_gpr;
9626 va_list_fpr_counter_field = f_fpr;
9627
9628 DECL_FIELD_CONTEXT (f_gpr) = record;
9629 DECL_FIELD_CONTEXT (f_fpr) = record;
9630 DECL_FIELD_CONTEXT (f_ovf) = record;
9631 DECL_FIELD_CONTEXT (f_sav) = record;
9632
9633 TYPE_STUB_DECL (record) = type_decl;
9634 TYPE_NAME (record) = type_decl;
9635 TYPE_FIELDS (record) = f_gpr;
9636 DECL_CHAIN (f_gpr) = f_fpr;
9637 DECL_CHAIN (f_fpr) = f_ovf;
9638 DECL_CHAIN (f_ovf) = f_sav;
9639
9640 layout_type (record);
9641
9642 /* The correct type is an array type of one element. */
9643 return build_array_type (record, build_index_type (size_zero_node));
9644 }
9645
9646 /* Implement va_start by filling the va_list structure VALIST.
9647 STDARG_P is always true, and ignored.
9648 NEXTARG points to the first anonymous stack argument.
9649
9650 The following global variables are used to initialize
9651 the va_list structure:
9652
9653 crtl->args.info:
9654 holds number of gprs and fprs used for named arguments.
9655 crtl->args.arg_offset_rtx:
9656 holds the offset of the first anonymous stack argument
9657 (relative to the virtual arg pointer). */
9658
9659 static void
s390_va_start(tree valist,rtx nextarg ATTRIBUTE_UNUSED)9660 s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
9661 {
9662 HOST_WIDE_INT n_gpr, n_fpr;
9663 int off;
9664 tree f_gpr, f_fpr, f_ovf, f_sav;
9665 tree gpr, fpr, ovf, sav, t;
9666
9667 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
9668 f_fpr = DECL_CHAIN (f_gpr);
9669 f_ovf = DECL_CHAIN (f_fpr);
9670 f_sav = DECL_CHAIN (f_ovf);
9671
9672 valist = build_simple_mem_ref (valist);
9673 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
9674 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
9675 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
9676 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
9677
9678 /* Count number of gp and fp argument registers used. */
9679
9680 n_gpr = crtl->args.info.gprs;
9681 n_fpr = crtl->args.info.fprs;
9682
9683 if (cfun->va_list_gpr_size)
9684 {
9685 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
9686 build_int_cst (NULL_TREE, n_gpr));
9687 TREE_SIDE_EFFECTS (t) = 1;
9688 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9689 }
9690
9691 if (cfun->va_list_fpr_size)
9692 {
9693 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
9694 build_int_cst (NULL_TREE, n_fpr));
9695 TREE_SIDE_EFFECTS (t) = 1;
9696 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9697 }
9698
9699 /* Find the overflow area. */
9700 if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG
9701 || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG)
9702 {
9703 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
9704
9705 off = INTVAL (crtl->args.arg_offset_rtx);
9706 off = off < 0 ? 0 : off;
9707 if (TARGET_DEBUG_ARG)
9708 fprintf (stderr, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
9709 (int)n_gpr, (int)n_fpr, off);
9710
9711 t = fold_build_pointer_plus_hwi (t, off);
9712
9713 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
9714 TREE_SIDE_EFFECTS (t) = 1;
9715 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9716 }
9717
9718 /* Find the register save area. */
9719 if ((cfun->va_list_gpr_size && n_gpr < GP_ARG_NUM_REG)
9720 || (cfun->va_list_fpr_size && n_fpr < FP_ARG_NUM_REG))
9721 {
9722 t = make_tree (TREE_TYPE (sav), return_address_pointer_rtx);
9723 t = fold_build_pointer_plus_hwi (t, -RETURN_REGNUM * UNITS_PER_LONG);
9724
9725 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
9726 TREE_SIDE_EFFECTS (t) = 1;
9727 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9728 }
9729 }
9730
9731 /* Implement va_arg by updating the va_list structure
9732 VALIST as required to retrieve an argument of type
9733 TYPE, and returning that argument.
9734
9735 Generates code equivalent to:
9736
9737 if (integral value) {
9738 if (size <= 4 && args.gpr < 5 ||
9739 size > 4 && args.gpr < 4 )
9740 ret = args.reg_save_area[args.gpr+8]
9741 else
9742 ret = *args.overflow_arg_area++;
9743 } else if (float value) {
9744 if (args.fgpr < 2)
9745 ret = args.reg_save_area[args.fpr+64]
9746 else
9747 ret = *args.overflow_arg_area++;
9748 } else if (aggregate value) {
9749 if (args.gpr < 5)
9750 ret = *args.reg_save_area[args.gpr]
9751 else
9752 ret = **args.overflow_arg_area++;
9753 } */
9754
9755 static tree
s390_gimplify_va_arg(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p ATTRIBUTE_UNUSED)9756 s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
9757 gimple_seq *post_p ATTRIBUTE_UNUSED)
9758 {
9759 tree f_gpr, f_fpr, f_ovf, f_sav;
9760 tree gpr, fpr, ovf, sav, reg, t, u;
9761 int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg;
9762 tree lab_false, lab_over, addr;
9763
9764 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
9765 f_fpr = DECL_CHAIN (f_gpr);
9766 f_ovf = DECL_CHAIN (f_fpr);
9767 f_sav = DECL_CHAIN (f_ovf);
9768
9769 valist = build_va_arg_indirect_ref (valist);
9770 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
9771 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
9772 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
9773
9774 /* The tree for args* cannot be shared between gpr/fpr and ovf since
9775 both appear on a lhs. */
9776 valist = unshare_expr (valist);
9777 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
9778
9779 size = int_size_in_bytes (type);
9780
9781 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
9782 {
9783 if (TARGET_DEBUG_ARG)
9784 {
9785 fprintf (stderr, "va_arg: aggregate type");
9786 debug_tree (type);
9787 }
9788
9789 /* Aggregates are passed by reference. */
9790 indirect_p = 1;
9791 reg = gpr;
9792 n_reg = 1;
9793
9794 /* kernel stack layout on 31 bit: It is assumed here that no padding
9795 will be added by s390_frame_info because for va_args always an even
9796 number of gprs has to be saved r15-r2 = 14 regs. */
9797 sav_ofs = 2 * UNITS_PER_LONG;
9798 sav_scale = UNITS_PER_LONG;
9799 size = UNITS_PER_LONG;
9800 max_reg = GP_ARG_NUM_REG - n_reg;
9801 }
9802 else if (s390_function_arg_float (TYPE_MODE (type), type))
9803 {
9804 if (TARGET_DEBUG_ARG)
9805 {
9806 fprintf (stderr, "va_arg: float type");
9807 debug_tree (type);
9808 }
9809
9810 /* FP args go in FP registers, if present. */
9811 indirect_p = 0;
9812 reg = fpr;
9813 n_reg = 1;
9814 sav_ofs = 16 * UNITS_PER_LONG;
9815 sav_scale = 8;
9816 max_reg = FP_ARG_NUM_REG - n_reg;
9817 }
9818 else
9819 {
9820 if (TARGET_DEBUG_ARG)
9821 {
9822 fprintf (stderr, "va_arg: other type");
9823 debug_tree (type);
9824 }
9825
9826 /* Otherwise into GP registers. */
9827 indirect_p = 0;
9828 reg = gpr;
9829 n_reg = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
9830
9831 /* kernel stack layout on 31 bit: It is assumed here that no padding
9832 will be added by s390_frame_info because for va_args always an even
9833 number of gprs has to be saved r15-r2 = 14 regs. */
9834 sav_ofs = 2 * UNITS_PER_LONG;
9835
9836 if (size < UNITS_PER_LONG)
9837 sav_ofs += UNITS_PER_LONG - size;
9838
9839 sav_scale = UNITS_PER_LONG;
9840 max_reg = GP_ARG_NUM_REG - n_reg;
9841 }
9842
9843 /* Pull the value out of the saved registers ... */
9844
9845 lab_false = create_artificial_label (UNKNOWN_LOCATION);
9846 lab_over = create_artificial_label (UNKNOWN_LOCATION);
9847 addr = create_tmp_var (ptr_type_node, "addr");
9848
9849 t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
9850 t = build2 (GT_EXPR, boolean_type_node, reg, t);
9851 u = build1 (GOTO_EXPR, void_type_node, lab_false);
9852 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
9853 gimplify_and_add (t, pre_p);
9854
9855 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
9856 u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
9857 fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
9858 t = fold_build_pointer_plus (t, u);
9859
9860 gimplify_assign (addr, t, pre_p);
9861
9862 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
9863
9864 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
9865
9866
9867 /* ... Otherwise out of the overflow area. */
9868
9869 t = ovf;
9870 if (size < UNITS_PER_LONG)
9871 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size);
9872
9873 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
9874
9875 gimplify_assign (addr, t, pre_p);
9876
9877 t = fold_build_pointer_plus_hwi (t, size);
9878 gimplify_assign (ovf, t, pre_p);
9879
9880 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
9881
9882
9883 /* Increment register save count. */
9884
9885 u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
9886 fold_convert (TREE_TYPE (reg), size_int (n_reg)));
9887 gimplify_and_add (u, pre_p);
9888
9889 if (indirect_p)
9890 {
9891 t = build_pointer_type_for_mode (build_pointer_type (type),
9892 ptr_mode, true);
9893 addr = fold_convert (t, addr);
9894 addr = build_va_arg_indirect_ref (addr);
9895 }
9896 else
9897 {
9898 t = build_pointer_type_for_mode (type, ptr_mode, true);
9899 addr = fold_convert (t, addr);
9900 }
9901
9902 return build_va_arg_indirect_ref (addr);
9903 }
9904
9905 /* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX)
9906 expanders.
9907 DEST - Register location where CC will be stored.
9908 TDB - Pointer to a 256 byte area where to store the transaction.
9909 diagnostic block. NULL if TDB is not needed.
9910 RETRY - Retry count value. If non-NULL a retry loop for CC2
9911 is emitted
9912 CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
9913 of the tbegin instruction pattern. */
9914
9915 void
s390_expand_tbegin(rtx dest,rtx tdb,rtx retry,bool clobber_fprs_p)9916 s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
9917 {
9918 rtx retry_plus_two = gen_reg_rtx (SImode);
9919 rtx retry_reg = gen_reg_rtx (SImode);
9920 rtx retry_label = NULL_RTX;
9921
9922 if (retry != NULL_RTX)
9923 {
9924 emit_move_insn (retry_reg, retry);
9925 emit_insn (gen_addsi3 (retry_plus_two, retry_reg, const2_rtx));
9926 emit_insn (gen_addsi3 (retry_reg, retry_reg, const1_rtx));
9927 retry_label = gen_label_rtx ();
9928 emit_label (retry_label);
9929 }
9930
9931 if (clobber_fprs_p)
9932 emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK), tdb));
9933 else
9934 emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
9935 tdb));
9936
9937 emit_move_insn (dest, gen_rtx_UNSPEC (SImode,
9938 gen_rtvec (1, gen_rtx_REG (CCRAWmode,
9939 CC_REGNUM)),
9940 UNSPEC_CC_TO_INT));
9941 if (retry != NULL_RTX)
9942 {
9943 const int CC0 = 1 << 3;
9944 const int CC1 = 1 << 2;
9945 const int CC3 = 1 << 0;
9946 rtx jump;
9947 rtx count = gen_reg_rtx (SImode);
9948 rtx leave_label = gen_label_rtx ();
9949
9950 /* Exit for success and permanent failures. */
9951 jump = s390_emit_jump (leave_label,
9952 gen_rtx_EQ (VOIDmode,
9953 gen_rtx_REG (CCRAWmode, CC_REGNUM),
9954 gen_rtx_CONST_INT (VOIDmode, CC0 | CC1 | CC3)));
9955 LABEL_NUSES (leave_label) = 1;
9956
9957 /* CC2 - transient failure. Perform retry with ppa. */
9958 emit_move_insn (count, retry_plus_two);
9959 emit_insn (gen_subsi3 (count, count, retry_reg));
9960 emit_insn (gen_tx_assist (count));
9961 jump = emit_jump_insn (gen_doloop_si64 (retry_label,
9962 retry_reg,
9963 retry_reg));
9964 JUMP_LABEL (jump) = retry_label;
9965 LABEL_NUSES (retry_label) = 1;
9966 emit_label (leave_label);
9967 }
9968 }
9969
9970 /* Builtins. */
9971
9972 enum s390_builtin
9973 {
9974 S390_BUILTIN_TBEGIN,
9975 S390_BUILTIN_TBEGIN_NOFLOAT,
9976 S390_BUILTIN_TBEGIN_RETRY,
9977 S390_BUILTIN_TBEGIN_RETRY_NOFLOAT,
9978 S390_BUILTIN_TBEGINC,
9979 S390_BUILTIN_TEND,
9980 S390_BUILTIN_TABORT,
9981 S390_BUILTIN_NON_TX_STORE,
9982 S390_BUILTIN_TX_NESTING_DEPTH,
9983 S390_BUILTIN_TX_ASSIST,
9984
9985 S390_BUILTIN_max
9986 };
9987
9988 static enum insn_code const code_for_builtin[S390_BUILTIN_max] = {
9989 CODE_FOR_tbegin,
9990 CODE_FOR_tbegin_nofloat,
9991 CODE_FOR_tbegin_retry,
9992 CODE_FOR_tbegin_retry_nofloat,
9993 CODE_FOR_tbeginc,
9994 CODE_FOR_tend,
9995 CODE_FOR_tabort,
9996 CODE_FOR_ntstg,
9997 CODE_FOR_etnd,
9998 CODE_FOR_tx_assist
9999 };
10000
10001 static void
s390_init_builtins(void)10002 s390_init_builtins (void)
10003 {
10004 tree ftype, uint64_type;
10005 tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"),
10006 NULL, NULL);
10007 tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL);
10008
10009 /* void foo (void) */
10010 ftype = build_function_type_list (void_type_node, NULL_TREE);
10011 add_builtin_function ("__builtin_tbeginc", ftype, S390_BUILTIN_TBEGINC,
10012 BUILT_IN_MD, NULL, NULL_TREE);
10013
10014 /* void foo (int) */
10015 ftype = build_function_type_list (void_type_node, integer_type_node,
10016 NULL_TREE);
10017 add_builtin_function ("__builtin_tabort", ftype,
10018 S390_BUILTIN_TABORT, BUILT_IN_MD, NULL, noreturn_attr);
10019 add_builtin_function ("__builtin_tx_assist", ftype,
10020 S390_BUILTIN_TX_ASSIST, BUILT_IN_MD, NULL, NULL_TREE);
10021
10022 /* int foo (void *) */
10023 ftype = build_function_type_list (integer_type_node, ptr_type_node, NULL_TREE);
10024 add_builtin_function ("__builtin_tbegin", ftype, S390_BUILTIN_TBEGIN,
10025 BUILT_IN_MD, NULL, returns_twice_attr);
10026 add_builtin_function ("__builtin_tbegin_nofloat", ftype,
10027 S390_BUILTIN_TBEGIN_NOFLOAT,
10028 BUILT_IN_MD, NULL, returns_twice_attr);
10029
10030 /* int foo (void *, int) */
10031 ftype = build_function_type_list (integer_type_node, ptr_type_node,
10032 integer_type_node, NULL_TREE);
10033 add_builtin_function ("__builtin_tbegin_retry", ftype,
10034 S390_BUILTIN_TBEGIN_RETRY,
10035 BUILT_IN_MD,
10036 NULL, returns_twice_attr);
10037 add_builtin_function ("__builtin_tbegin_retry_nofloat", ftype,
10038 S390_BUILTIN_TBEGIN_RETRY_NOFLOAT,
10039 BUILT_IN_MD,
10040 NULL, returns_twice_attr);
10041
10042 /* int foo (void) */
10043 ftype = build_function_type_list (integer_type_node, NULL_TREE);
10044 add_builtin_function ("__builtin_tx_nesting_depth", ftype,
10045 S390_BUILTIN_TX_NESTING_DEPTH,
10046 BUILT_IN_MD, NULL, NULL_TREE);
10047 add_builtin_function ("__builtin_tend", ftype,
10048 S390_BUILTIN_TEND, BUILT_IN_MD, NULL, NULL_TREE);
10049
10050 /* void foo (uint64_t *, uint64_t) */
10051 if (TARGET_64BIT)
10052 uint64_type = long_unsigned_type_node;
10053 else
10054 uint64_type = long_long_unsigned_type_node;
10055
10056 ftype = build_function_type_list (void_type_node,
10057 build_pointer_type (uint64_type),
10058 uint64_type, NULL_TREE);
10059 add_builtin_function ("__builtin_non_tx_store", ftype,
10060 S390_BUILTIN_NON_TX_STORE,
10061 BUILT_IN_MD, NULL, NULL_TREE);
10062 }
10063
10064 /* Expand an expression EXP that calls a built-in function,
10065 with result going to TARGET if that's convenient
10066 (and in mode MODE if that's convenient).
10067 SUBTARGET may be used as the target for computing one of EXP's operands.
10068 IGNORE is nonzero if the value is to be ignored. */
10069
10070 static rtx
s390_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,enum machine_mode mode ATTRIBUTE_UNUSED,int ignore ATTRIBUTE_UNUSED)10071 s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10072 enum machine_mode mode ATTRIBUTE_UNUSED,
10073 int ignore ATTRIBUTE_UNUSED)
10074 {
10075 #define MAX_ARGS 2
10076
10077 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10078 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
10079 enum insn_code icode;
10080 rtx op[MAX_ARGS], pat;
10081 int arity;
10082 bool nonvoid;
10083 tree arg;
10084 call_expr_arg_iterator iter;
10085
10086 if (fcode >= S390_BUILTIN_max)
10087 internal_error ("bad builtin fcode");
10088 icode = code_for_builtin[fcode];
10089 if (icode == 0)
10090 internal_error ("bad builtin fcode");
10091
10092 if (!TARGET_HTM)
10093 error ("Transactional execution builtins not enabled (-mhtm)\n");
10094
10095 /* Set a flag in the machine specific cfun part in order to support
10096 saving/restoring of FPRs. */
10097 if (fcode == S390_BUILTIN_TBEGIN || fcode == S390_BUILTIN_TBEGIN_RETRY)
10098 cfun->machine->tbegin_p = true;
10099
10100 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
10101
10102 arity = 0;
10103 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
10104 {
10105 const struct insn_operand_data *insn_op;
10106
10107 if (arg == error_mark_node)
10108 return NULL_RTX;
10109 if (arity >= MAX_ARGS)
10110 return NULL_RTX;
10111
10112 insn_op = &insn_data[icode].operand[arity + nonvoid];
10113
10114 op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
10115
10116 if (!(*insn_op->predicate) (op[arity], insn_op->mode))
10117 {
10118 if (insn_op->predicate == memory_operand)
10119 {
10120 /* Don't move a NULL pointer into a register. Otherwise
10121 we have to rely on combine being able to move it back
10122 in order to get an immediate 0 in the instruction. */
10123 if (op[arity] != const0_rtx)
10124 op[arity] = copy_to_mode_reg (Pmode, op[arity]);
10125 op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]);
10126 }
10127 else
10128 op[arity] = copy_to_mode_reg (insn_op->mode, op[arity]);
10129 }
10130
10131 arity++;
10132 }
10133
10134 if (nonvoid)
10135 {
10136 enum machine_mode tmode = insn_data[icode].operand[0].mode;
10137 if (!target
10138 || GET_MODE (target) != tmode
10139 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
10140 target = gen_reg_rtx (tmode);
10141 }
10142
10143 switch (arity)
10144 {
10145 case 0:
10146 pat = GEN_FCN (icode) (target);
10147 break;
10148 case 1:
10149 if (nonvoid)
10150 pat = GEN_FCN (icode) (target, op[0]);
10151 else
10152 pat = GEN_FCN (icode) (op[0]);
10153 break;
10154 case 2:
10155 if (nonvoid)
10156 pat = GEN_FCN (icode) (target, op[0], op[1]);
10157 else
10158 pat = GEN_FCN (icode) (op[0], op[1]);
10159 break;
10160 default:
10161 gcc_unreachable ();
10162 }
10163 if (!pat)
10164 return NULL_RTX;
10165 emit_insn (pat);
10166
10167 if (nonvoid)
10168 return target;
10169 else
10170 return const0_rtx;
10171 }
10172
10173
10174 /* Output assembly code for the trampoline template to
10175 stdio stream FILE.
10176
10177 On S/390, we use gpr 1 internally in the trampoline code;
10178 gpr 0 is used to hold the static chain. */
10179
10180 static void
s390_asm_trampoline_template(FILE * file)10181 s390_asm_trampoline_template (FILE *file)
10182 {
10183 rtx op[2];
10184 op[0] = gen_rtx_REG (Pmode, 0);
10185 op[1] = gen_rtx_REG (Pmode, 1);
10186
10187 if (TARGET_64BIT)
10188 {
10189 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
10190 output_asm_insn ("lmg\t%0,%1,14(%1)", op); /* 6 byte */
10191 output_asm_insn ("br\t%1", op); /* 2 byte */
10192 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10));
10193 }
10194 else
10195 {
10196 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
10197 output_asm_insn ("lm\t%0,%1,6(%1)", op); /* 4 byte */
10198 output_asm_insn ("br\t%1", op); /* 2 byte */
10199 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8));
10200 }
10201 }
10202
10203 /* Emit RTL insns to initialize the variable parts of a trampoline.
10204 FNADDR is an RTX for the address of the function's pure code.
10205 CXT is an RTX for the static chain value for the function. */
10206
10207 static void
s390_trampoline_init(rtx m_tramp,tree fndecl,rtx cxt)10208 s390_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
10209 {
10210 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10211 rtx mem;
10212
10213 emit_block_move (m_tramp, assemble_trampoline_template (),
10214 GEN_INT (2 * UNITS_PER_LONG), BLOCK_OP_NORMAL);
10215
10216 mem = adjust_address (m_tramp, Pmode, 2 * UNITS_PER_LONG);
10217 emit_move_insn (mem, cxt);
10218 mem = adjust_address (m_tramp, Pmode, 3 * UNITS_PER_LONG);
10219 emit_move_insn (mem, fnaddr);
10220 }
10221
10222 /* Output assembler code to FILE to increment profiler label # LABELNO
10223 for profiling a function entry. */
10224
10225 void
s390_function_profiler(FILE * file,int labelno)10226 s390_function_profiler (FILE *file, int labelno)
10227 {
10228 rtx op[7];
10229
10230 char label[128];
10231 ASM_GENERATE_INTERNAL_LABEL (label, "LP", labelno);
10232
10233 fprintf (file, "# function profiler \n");
10234
10235 op[0] = gen_rtx_REG (Pmode, RETURN_REGNUM);
10236 op[1] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
10237 op[1] = gen_rtx_MEM (Pmode, plus_constant (Pmode, op[1], UNITS_PER_LONG));
10238
10239 op[2] = gen_rtx_REG (Pmode, 1);
10240 op[3] = gen_rtx_SYMBOL_REF (Pmode, label);
10241 SYMBOL_REF_FLAGS (op[3]) = SYMBOL_FLAG_LOCAL;
10242
10243 op[4] = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
10244 if (flag_pic)
10245 {
10246 op[4] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[4]), UNSPEC_PLT);
10247 op[4] = gen_rtx_CONST (Pmode, op[4]);
10248 }
10249
10250 if (TARGET_64BIT)
10251 {
10252 output_asm_insn ("stg\t%0,%1", op);
10253 output_asm_insn ("larl\t%2,%3", op);
10254 output_asm_insn ("brasl\t%0,%4", op);
10255 output_asm_insn ("lg\t%0,%1", op);
10256 }
10257 else if (!flag_pic)
10258 {
10259 op[6] = gen_label_rtx ();
10260
10261 output_asm_insn ("st\t%0,%1", op);
10262 output_asm_insn ("bras\t%2,%l6", op);
10263 output_asm_insn (".long\t%4", op);
10264 output_asm_insn (".long\t%3", op);
10265 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
10266 output_asm_insn ("l\t%0,0(%2)", op);
10267 output_asm_insn ("l\t%2,4(%2)", op);
10268 output_asm_insn ("basr\t%0,%0", op);
10269 output_asm_insn ("l\t%0,%1", op);
10270 }
10271 else
10272 {
10273 op[5] = gen_label_rtx ();
10274 op[6] = gen_label_rtx ();
10275
10276 output_asm_insn ("st\t%0,%1", op);
10277 output_asm_insn ("bras\t%2,%l6", op);
10278 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[5]));
10279 output_asm_insn (".long\t%4-%l5", op);
10280 output_asm_insn (".long\t%3-%l5", op);
10281 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
10282 output_asm_insn ("lr\t%0,%2", op);
10283 output_asm_insn ("a\t%0,0(%2)", op);
10284 output_asm_insn ("a\t%2,4(%2)", op);
10285 output_asm_insn ("basr\t%0,%0", op);
10286 output_asm_insn ("l\t%0,%1", op);
10287 }
10288 }
10289
10290 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
10291 into its SYMBOL_REF_FLAGS. */
10292
10293 static void
s390_encode_section_info(tree decl,rtx rtl,int first)10294 s390_encode_section_info (tree decl, rtx rtl, int first)
10295 {
10296 default_encode_section_info (decl, rtl, first);
10297
10298 if (TREE_CODE (decl) == VAR_DECL)
10299 {
10300 /* If a variable has a forced alignment to < 2 bytes, mark it
10301 with SYMBOL_FLAG_ALIGN1 to prevent it from being used as LARL
10302 operand. */
10303 if (DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 16)
10304 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
10305 if (!DECL_SIZE (decl)
10306 || !DECL_ALIGN (decl)
10307 || !tree_fits_shwi_p (DECL_SIZE (decl))
10308 || (DECL_ALIGN (decl) <= 64
10309 && DECL_ALIGN (decl) != tree_to_shwi (DECL_SIZE (decl))))
10310 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_NOT_NATURALLY_ALIGNED;
10311 }
10312
10313 /* Literal pool references don't have a decl so they are handled
10314 differently here. We rely on the information in the MEM_ALIGN
10315 entry to decide upon natural alignment. */
10316 if (MEM_P (rtl)
10317 && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF
10318 && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0))
10319 && (MEM_ALIGN (rtl) == 0
10320 || GET_MODE_BITSIZE (GET_MODE (rtl)) == 0
10321 || MEM_ALIGN (rtl) < GET_MODE_BITSIZE (GET_MODE (rtl))))
10322 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_NOT_NATURALLY_ALIGNED;
10323 }
10324
10325 /* Output thunk to FILE that implements a C++ virtual function call (with
10326 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
10327 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
10328 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
10329 relative to the resulting this pointer. */
10330
10331 static void
s390_output_mi_thunk(FILE * file,tree thunk ATTRIBUTE_UNUSED,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)10332 s390_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
10333 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10334 tree function)
10335 {
10336 rtx op[10];
10337 int nonlocal = 0;
10338
10339 /* Make sure unwind info is emitted for the thunk if needed. */
10340 final_start_function (emit_barrier (), file, 1);
10341
10342 /* Operand 0 is the target function. */
10343 op[0] = XEXP (DECL_RTL (function), 0);
10344 if (flag_pic && !SYMBOL_REF_LOCAL_P (op[0]))
10345 {
10346 nonlocal = 1;
10347 op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]),
10348 TARGET_64BIT ? UNSPEC_PLT : UNSPEC_GOT);
10349 op[0] = gen_rtx_CONST (Pmode, op[0]);
10350 }
10351
10352 /* Operand 1 is the 'this' pointer. */
10353 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
10354 op[1] = gen_rtx_REG (Pmode, 3);
10355 else
10356 op[1] = gen_rtx_REG (Pmode, 2);
10357
10358 /* Operand 2 is the delta. */
10359 op[2] = GEN_INT (delta);
10360
10361 /* Operand 3 is the vcall_offset. */
10362 op[3] = GEN_INT (vcall_offset);
10363
10364 /* Operand 4 is the temporary register. */
10365 op[4] = gen_rtx_REG (Pmode, 1);
10366
10367 /* Operands 5 to 8 can be used as labels. */
10368 op[5] = NULL_RTX;
10369 op[6] = NULL_RTX;
10370 op[7] = NULL_RTX;
10371 op[8] = NULL_RTX;
10372
10373 /* Operand 9 can be used for temporary register. */
10374 op[9] = NULL_RTX;
10375
10376 /* Generate code. */
10377 if (TARGET_64BIT)
10378 {
10379 /* Setup literal pool pointer if required. */
10380 if ((!DISP_IN_RANGE (delta)
10381 && !CONST_OK_FOR_K (delta)
10382 && !CONST_OK_FOR_Os (delta))
10383 || (!DISP_IN_RANGE (vcall_offset)
10384 && !CONST_OK_FOR_K (vcall_offset)
10385 && !CONST_OK_FOR_Os (vcall_offset)))
10386 {
10387 op[5] = gen_label_rtx ();
10388 output_asm_insn ("larl\t%4,%5", op);
10389 }
10390
10391 /* Add DELTA to this pointer. */
10392 if (delta)
10393 {
10394 if (CONST_OK_FOR_J (delta))
10395 output_asm_insn ("la\t%1,%2(%1)", op);
10396 else if (DISP_IN_RANGE (delta))
10397 output_asm_insn ("lay\t%1,%2(%1)", op);
10398 else if (CONST_OK_FOR_K (delta))
10399 output_asm_insn ("aghi\t%1,%2", op);
10400 else if (CONST_OK_FOR_Os (delta))
10401 output_asm_insn ("agfi\t%1,%2", op);
10402 else
10403 {
10404 op[6] = gen_label_rtx ();
10405 output_asm_insn ("agf\t%1,%6-%5(%4)", op);
10406 }
10407 }
10408
10409 /* Perform vcall adjustment. */
10410 if (vcall_offset)
10411 {
10412 if (DISP_IN_RANGE (vcall_offset))
10413 {
10414 output_asm_insn ("lg\t%4,0(%1)", op);
10415 output_asm_insn ("ag\t%1,%3(%4)", op);
10416 }
10417 else if (CONST_OK_FOR_K (vcall_offset))
10418 {
10419 output_asm_insn ("lghi\t%4,%3", op);
10420 output_asm_insn ("ag\t%4,0(%1)", op);
10421 output_asm_insn ("ag\t%1,0(%4)", op);
10422 }
10423 else if (CONST_OK_FOR_Os (vcall_offset))
10424 {
10425 output_asm_insn ("lgfi\t%4,%3", op);
10426 output_asm_insn ("ag\t%4,0(%1)", op);
10427 output_asm_insn ("ag\t%1,0(%4)", op);
10428 }
10429 else
10430 {
10431 op[7] = gen_label_rtx ();
10432 output_asm_insn ("llgf\t%4,%7-%5(%4)", op);
10433 output_asm_insn ("ag\t%4,0(%1)", op);
10434 output_asm_insn ("ag\t%1,0(%4)", op);
10435 }
10436 }
10437
10438 /* Jump to target. */
10439 output_asm_insn ("jg\t%0", op);
10440
10441 /* Output literal pool if required. */
10442 if (op[5])
10443 {
10444 output_asm_insn (".align\t4", op);
10445 targetm.asm_out.internal_label (file, "L",
10446 CODE_LABEL_NUMBER (op[5]));
10447 }
10448 if (op[6])
10449 {
10450 targetm.asm_out.internal_label (file, "L",
10451 CODE_LABEL_NUMBER (op[6]));
10452 output_asm_insn (".long\t%2", op);
10453 }
10454 if (op[7])
10455 {
10456 targetm.asm_out.internal_label (file, "L",
10457 CODE_LABEL_NUMBER (op[7]));
10458 output_asm_insn (".long\t%3", op);
10459 }
10460 }
10461 else
10462 {
10463 /* Setup base pointer if required. */
10464 if (!vcall_offset
10465 || (!DISP_IN_RANGE (delta)
10466 && !CONST_OK_FOR_K (delta)
10467 && !CONST_OK_FOR_Os (delta))
10468 || (!DISP_IN_RANGE (delta)
10469 && !CONST_OK_FOR_K (vcall_offset)
10470 && !CONST_OK_FOR_Os (vcall_offset)))
10471 {
10472 op[5] = gen_label_rtx ();
10473 output_asm_insn ("basr\t%4,0", op);
10474 targetm.asm_out.internal_label (file, "L",
10475 CODE_LABEL_NUMBER (op[5]));
10476 }
10477
10478 /* Add DELTA to this pointer. */
10479 if (delta)
10480 {
10481 if (CONST_OK_FOR_J (delta))
10482 output_asm_insn ("la\t%1,%2(%1)", op);
10483 else if (DISP_IN_RANGE (delta))
10484 output_asm_insn ("lay\t%1,%2(%1)", op);
10485 else if (CONST_OK_FOR_K (delta))
10486 output_asm_insn ("ahi\t%1,%2", op);
10487 else if (CONST_OK_FOR_Os (delta))
10488 output_asm_insn ("afi\t%1,%2", op);
10489 else
10490 {
10491 op[6] = gen_label_rtx ();
10492 output_asm_insn ("a\t%1,%6-%5(%4)", op);
10493 }
10494 }
10495
10496 /* Perform vcall adjustment. */
10497 if (vcall_offset)
10498 {
10499 if (CONST_OK_FOR_J (vcall_offset))
10500 {
10501 output_asm_insn ("l\t%4,0(%1)", op);
10502 output_asm_insn ("a\t%1,%3(%4)", op);
10503 }
10504 else if (DISP_IN_RANGE (vcall_offset))
10505 {
10506 output_asm_insn ("l\t%4,0(%1)", op);
10507 output_asm_insn ("ay\t%1,%3(%4)", op);
10508 }
10509 else if (CONST_OK_FOR_K (vcall_offset))
10510 {
10511 output_asm_insn ("lhi\t%4,%3", op);
10512 output_asm_insn ("a\t%4,0(%1)", op);
10513 output_asm_insn ("a\t%1,0(%4)", op);
10514 }
10515 else if (CONST_OK_FOR_Os (vcall_offset))
10516 {
10517 output_asm_insn ("iilf\t%4,%3", op);
10518 output_asm_insn ("a\t%4,0(%1)", op);
10519 output_asm_insn ("a\t%1,0(%4)", op);
10520 }
10521 else
10522 {
10523 op[7] = gen_label_rtx ();
10524 output_asm_insn ("l\t%4,%7-%5(%4)", op);
10525 output_asm_insn ("a\t%4,0(%1)", op);
10526 output_asm_insn ("a\t%1,0(%4)", op);
10527 }
10528
10529 /* We had to clobber the base pointer register.
10530 Re-setup the base pointer (with a different base). */
10531 op[5] = gen_label_rtx ();
10532 output_asm_insn ("basr\t%4,0", op);
10533 targetm.asm_out.internal_label (file, "L",
10534 CODE_LABEL_NUMBER (op[5]));
10535 }
10536
10537 /* Jump to target. */
10538 op[8] = gen_label_rtx ();
10539
10540 if (!flag_pic)
10541 output_asm_insn ("l\t%4,%8-%5(%4)", op);
10542 else if (!nonlocal)
10543 output_asm_insn ("a\t%4,%8-%5(%4)", op);
10544 /* We cannot call through .plt, since .plt requires %r12 loaded. */
10545 else if (flag_pic == 1)
10546 {
10547 output_asm_insn ("a\t%4,%8-%5(%4)", op);
10548 output_asm_insn ("l\t%4,%0(%4)", op);
10549 }
10550 else if (flag_pic == 2)
10551 {
10552 op[9] = gen_rtx_REG (Pmode, 0);
10553 output_asm_insn ("l\t%9,%8-4-%5(%4)", op);
10554 output_asm_insn ("a\t%4,%8-%5(%4)", op);
10555 output_asm_insn ("ar\t%4,%9", op);
10556 output_asm_insn ("l\t%4,0(%4)", op);
10557 }
10558
10559 output_asm_insn ("br\t%4", op);
10560
10561 /* Output literal pool. */
10562 output_asm_insn (".align\t4", op);
10563
10564 if (nonlocal && flag_pic == 2)
10565 output_asm_insn (".long\t%0", op);
10566 if (nonlocal)
10567 {
10568 op[0] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
10569 SYMBOL_REF_FLAGS (op[0]) = SYMBOL_FLAG_LOCAL;
10570 }
10571
10572 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[8]));
10573 if (!flag_pic)
10574 output_asm_insn (".long\t%0", op);
10575 else
10576 output_asm_insn (".long\t%0-%5", op);
10577
10578 if (op[6])
10579 {
10580 targetm.asm_out.internal_label (file, "L",
10581 CODE_LABEL_NUMBER (op[6]));
10582 output_asm_insn (".long\t%2", op);
10583 }
10584 if (op[7])
10585 {
10586 targetm.asm_out.internal_label (file, "L",
10587 CODE_LABEL_NUMBER (op[7]));
10588 output_asm_insn (".long\t%3", op);
10589 }
10590 }
10591 final_end_function ();
10592 }
10593
10594 static bool
s390_valid_pointer_mode(enum machine_mode mode)10595 s390_valid_pointer_mode (enum machine_mode mode)
10596 {
10597 return (mode == SImode || (TARGET_64BIT && mode == DImode));
10598 }
10599
10600 /* Checks whether the given CALL_EXPR would use a caller
10601 saved register. This is used to decide whether sibling call
10602 optimization could be performed on the respective function
10603 call. */
10604
10605 static bool
s390_call_saved_register_used(tree call_expr)10606 s390_call_saved_register_used (tree call_expr)
10607 {
10608 CUMULATIVE_ARGS cum_v;
10609 cumulative_args_t cum;
10610 tree parameter;
10611 enum machine_mode mode;
10612 tree type;
10613 rtx parm_rtx;
10614 int reg, i;
10615
10616 INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
10617 cum = pack_cumulative_args (&cum_v);
10618
10619 for (i = 0; i < call_expr_nargs (call_expr); i++)
10620 {
10621 parameter = CALL_EXPR_ARG (call_expr, i);
10622 gcc_assert (parameter);
10623
10624 /* For an undeclared variable passed as parameter we will get
10625 an ERROR_MARK node here. */
10626 if (TREE_CODE (parameter) == ERROR_MARK)
10627 return true;
10628
10629 type = TREE_TYPE (parameter);
10630 gcc_assert (type);
10631
10632 mode = TYPE_MODE (type);
10633 gcc_assert (mode);
10634
10635 if (pass_by_reference (&cum_v, mode, type, true))
10636 {
10637 mode = Pmode;
10638 type = build_pointer_type (type);
10639 }
10640
10641 parm_rtx = s390_function_arg (cum, mode, type, 0);
10642
10643 s390_function_arg_advance (cum, mode, type, 0);
10644
10645 if (!parm_rtx)
10646 continue;
10647
10648 if (REG_P (parm_rtx))
10649 {
10650 for (reg = 0;
10651 reg < HARD_REGNO_NREGS (REGNO (parm_rtx), GET_MODE (parm_rtx));
10652 reg++)
10653 if (!call_used_regs[reg + REGNO (parm_rtx)])
10654 return true;
10655 }
10656
10657 if (GET_CODE (parm_rtx) == PARALLEL)
10658 {
10659 int i;
10660
10661 for (i = 0; i < XVECLEN (parm_rtx, 0); i++)
10662 {
10663 rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0);
10664
10665 gcc_assert (REG_P (r));
10666
10667 for (reg = 0;
10668 reg < HARD_REGNO_NREGS (REGNO (r), GET_MODE (r));
10669 reg++)
10670 if (!call_used_regs[reg + REGNO (r)])
10671 return true;
10672 }
10673 }
10674
10675 }
10676 return false;
10677 }
10678
10679 /* Return true if the given call expression can be
10680 turned into a sibling call.
10681 DECL holds the declaration of the function to be called whereas
10682 EXP is the call expression itself. */
10683
10684 static bool
s390_function_ok_for_sibcall(tree decl,tree exp)10685 s390_function_ok_for_sibcall (tree decl, tree exp)
10686 {
10687 /* The TPF epilogue uses register 1. */
10688 if (TARGET_TPF_PROFILING)
10689 return false;
10690
10691 /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
10692 which would have to be restored before the sibcall. */
10693 if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl))
10694 return false;
10695
10696 /* Register 6 on s390 is available as an argument register but unfortunately
10697 "caller saved". This makes functions needing this register for arguments
10698 not suitable for sibcalls. */
10699 return !s390_call_saved_register_used (exp);
10700 }
10701
10702 /* Return the fixed registers used for condition codes. */
10703
10704 static bool
s390_fixed_condition_code_regs(unsigned int * p1,unsigned int * p2)10705 s390_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
10706 {
10707 *p1 = CC_REGNUM;
10708 *p2 = INVALID_REGNUM;
10709
10710 return true;
10711 }
10712
10713 /* This function is used by the call expanders of the machine description.
10714 It emits the call insn itself together with the necessary operations
10715 to adjust the target address and returns the emitted insn.
10716 ADDR_LOCATION is the target address rtx
10717 TLS_CALL the location of the thread-local symbol
10718 RESULT_REG the register where the result of the call should be stored
10719 RETADDR_REG the register where the return address should be stored
10720 If this parameter is NULL_RTX the call is considered
10721 to be a sibling call. */
10722
10723 rtx
s390_emit_call(rtx addr_location,rtx tls_call,rtx result_reg,rtx retaddr_reg)10724 s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg,
10725 rtx retaddr_reg)
10726 {
10727 bool plt_call = false;
10728 rtx insn;
10729 rtx call;
10730 rtx clobber;
10731 rtvec vec;
10732
10733 /* Direct function calls need special treatment. */
10734 if (GET_CODE (addr_location) == SYMBOL_REF)
10735 {
10736 /* When calling a global routine in PIC mode, we must
10737 replace the symbol itself with the PLT stub. */
10738 if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location))
10739 {
10740 if (retaddr_reg != NULL_RTX)
10741 {
10742 addr_location = gen_rtx_UNSPEC (Pmode,
10743 gen_rtvec (1, addr_location),
10744 UNSPEC_PLT);
10745 addr_location = gen_rtx_CONST (Pmode, addr_location);
10746 plt_call = true;
10747 }
10748 else
10749 /* For -fpic code the PLT entries might use r12 which is
10750 call-saved. Therefore we cannot do a sibcall when
10751 calling directly using a symbol ref. When reaching
10752 this point we decided (in s390_function_ok_for_sibcall)
10753 to do a sibcall for a function pointer but one of the
10754 optimizers was able to get rid of the function pointer
10755 by propagating the symbol ref into the call. This
10756 optimization is illegal for S/390 so we turn the direct
10757 call into a indirect call again. */
10758 addr_location = force_reg (Pmode, addr_location);
10759 }
10760
10761 /* Unless we can use the bras(l) insn, force the
10762 routine address into a register. */
10763 if (!TARGET_SMALL_EXEC && !TARGET_CPU_ZARCH)
10764 {
10765 if (flag_pic)
10766 addr_location = legitimize_pic_address (addr_location, 0);
10767 else
10768 addr_location = force_reg (Pmode, addr_location);
10769 }
10770 }
10771
10772 /* If it is already an indirect call or the code above moved the
10773 SYMBOL_REF to somewhere else make sure the address can be found in
10774 register 1. */
10775 if (retaddr_reg == NULL_RTX
10776 && GET_CODE (addr_location) != SYMBOL_REF
10777 && !plt_call)
10778 {
10779 emit_move_insn (gen_rtx_REG (Pmode, SIBCALL_REGNUM), addr_location);
10780 addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM);
10781 }
10782
10783 addr_location = gen_rtx_MEM (QImode, addr_location);
10784 call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx);
10785
10786 if (result_reg != NULL_RTX)
10787 call = gen_rtx_SET (VOIDmode, result_reg, call);
10788
10789 if (retaddr_reg != NULL_RTX)
10790 {
10791 clobber = gen_rtx_CLOBBER (VOIDmode, retaddr_reg);
10792
10793 if (tls_call != NULL_RTX)
10794 vec = gen_rtvec (3, call, clobber,
10795 gen_rtx_USE (VOIDmode, tls_call));
10796 else
10797 vec = gen_rtvec (2, call, clobber);
10798
10799 call = gen_rtx_PARALLEL (VOIDmode, vec);
10800 }
10801
10802 insn = emit_call_insn (call);
10803
10804 /* 31-bit PLT stubs and tls calls use the GOT register implicitly. */
10805 if ((!TARGET_64BIT && plt_call) || tls_call != NULL_RTX)
10806 {
10807 /* s390_function_ok_for_sibcall should
10808 have denied sibcalls in this case. */
10809 gcc_assert (retaddr_reg != NULL_RTX);
10810 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, 12));
10811 }
10812 return insn;
10813 }
10814
10815 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
10816
10817 static void
s390_conditional_register_usage(void)10818 s390_conditional_register_usage (void)
10819 {
10820 int i;
10821
10822 if (flag_pic)
10823 {
10824 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10825 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10826 }
10827 if (TARGET_CPU_ZARCH)
10828 {
10829 fixed_regs[BASE_REGNUM] = 0;
10830 call_used_regs[BASE_REGNUM] = 0;
10831 fixed_regs[RETURN_REGNUM] = 0;
10832 call_used_regs[RETURN_REGNUM] = 0;
10833 }
10834 if (TARGET_64BIT)
10835 {
10836 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
10837 call_used_regs[i] = call_really_used_regs[i] = 0;
10838 }
10839 else
10840 {
10841 call_used_regs[FPR4_REGNUM] = call_really_used_regs[FPR4_REGNUM] = 0;
10842 call_used_regs[FPR6_REGNUM] = call_really_used_regs[FPR6_REGNUM] = 0;
10843 }
10844
10845 if (TARGET_SOFT_FLOAT)
10846 {
10847 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
10848 call_used_regs[i] = fixed_regs[i] = 1;
10849 }
10850 }
10851
10852 /* Corresponding function to eh_return expander. */
10853
10854 static GTY(()) rtx s390_tpf_eh_return_symbol;
10855 void
s390_emit_tpf_eh_return(rtx target)10856 s390_emit_tpf_eh_return (rtx target)
10857 {
10858 rtx insn, reg;
10859
10860 if (!s390_tpf_eh_return_symbol)
10861 s390_tpf_eh_return_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tpf_eh_return");
10862
10863 reg = gen_rtx_REG (Pmode, 2);
10864
10865 emit_move_insn (reg, target);
10866 insn = s390_emit_call (s390_tpf_eh_return_symbol, NULL_RTX, reg,
10867 gen_rtx_REG (Pmode, RETURN_REGNUM));
10868 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
10869
10870 emit_move_insn (EH_RETURN_HANDLER_RTX, reg);
10871 }
10872
10873 /* Rework the prologue/epilogue to avoid saving/restoring
10874 registers unnecessarily. */
10875
10876 static void
s390_optimize_prologue(void)10877 s390_optimize_prologue (void)
10878 {
10879 rtx insn, new_insn, next_insn;
10880
10881 /* Do a final recompute of the frame-related data. */
10882 s390_optimize_register_info ();
10883
10884 /* If all special registers are in fact used, there's nothing we
10885 can do, so no point in walking the insn list. */
10886
10887 if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM
10888 && cfun_frame_layout.last_save_gpr >= BASE_REGNUM
10889 && (TARGET_CPU_ZARCH
10890 || (cfun_frame_layout.first_save_gpr <= RETURN_REGNUM
10891 && cfun_frame_layout.last_save_gpr >= RETURN_REGNUM)))
10892 return;
10893
10894 /* Search for prologue/epilogue insns and replace them. */
10895
10896 for (insn = get_insns (); insn; insn = next_insn)
10897 {
10898 int first, last, off;
10899 rtx set, base, offset;
10900 rtx pat;
10901
10902 next_insn = NEXT_INSN (insn);
10903
10904 if (! NONJUMP_INSN_P (insn) || ! RTX_FRAME_RELATED_P (insn))
10905 continue;
10906
10907 pat = PATTERN (insn);
10908
10909 /* Remove ldgr/lgdr instructions used for saving and restore
10910 GPRs if possible. */
10911 if (TARGET_Z10
10912 && GET_CODE (pat) == SET
10913 && GET_MODE (SET_SRC (pat)) == DImode
10914 && REG_P (SET_SRC (pat))
10915 && REG_P (SET_DEST (pat)))
10916 {
10917 int src_regno = REGNO (SET_SRC (pat));
10918 int dest_regno = REGNO (SET_DEST (pat));
10919 int gpr_regno;
10920 int fpr_regno;
10921
10922 if (!((GENERAL_REGNO_P (src_regno) && FP_REGNO_P (dest_regno))
10923 || (FP_REGNO_P (src_regno) && GENERAL_REGNO_P (dest_regno))))
10924 continue;
10925
10926 gpr_regno = GENERAL_REGNO_P (src_regno) ? src_regno : dest_regno;
10927 fpr_regno = FP_REGNO_P (src_regno) ? src_regno : dest_regno;
10928
10929 /* GPR must be call-saved, FPR must be call-clobbered. */
10930 if (!call_really_used_regs[fpr_regno]
10931 || call_really_used_regs[gpr_regno])
10932 continue;
10933
10934 /* It must not happen that what we once saved in an FPR now
10935 needs a stack slot. */
10936 gcc_assert (cfun_gpr_save_slot (gpr_regno) != -1);
10937
10938 if (cfun_gpr_save_slot (gpr_regno) == 0)
10939 {
10940 remove_insn (insn);
10941 continue;
10942 }
10943 }
10944
10945 if (GET_CODE (pat) == PARALLEL
10946 && store_multiple_operation (pat, VOIDmode))
10947 {
10948 set = XVECEXP (pat, 0, 0);
10949 first = REGNO (SET_SRC (set));
10950 last = first + XVECLEN (pat, 0) - 1;
10951 offset = const0_rtx;
10952 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
10953 off = INTVAL (offset);
10954
10955 if (GET_CODE (base) != REG || off < 0)
10956 continue;
10957 if (cfun_frame_layout.first_save_gpr != -1
10958 && (cfun_frame_layout.first_save_gpr < first
10959 || cfun_frame_layout.last_save_gpr > last))
10960 continue;
10961 if (REGNO (base) != STACK_POINTER_REGNUM
10962 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
10963 continue;
10964 if (first > BASE_REGNUM || last < BASE_REGNUM)
10965 continue;
10966
10967 if (cfun_frame_layout.first_save_gpr != -1)
10968 {
10969 new_insn = save_gprs (base,
10970 off + (cfun_frame_layout.first_save_gpr
10971 - first) * UNITS_PER_LONG,
10972 cfun_frame_layout.first_save_gpr,
10973 cfun_frame_layout.last_save_gpr);
10974 new_insn = emit_insn_before (new_insn, insn);
10975 INSN_ADDRESSES_NEW (new_insn, -1);
10976 }
10977
10978 remove_insn (insn);
10979 continue;
10980 }
10981
10982 if (cfun_frame_layout.first_save_gpr == -1
10983 && GET_CODE (pat) == SET
10984 && GENERAL_REG_P (SET_SRC (pat))
10985 && GET_CODE (SET_DEST (pat)) == MEM)
10986 {
10987 set = pat;
10988 first = REGNO (SET_SRC (set));
10989 offset = const0_rtx;
10990 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
10991 off = INTVAL (offset);
10992
10993 if (GET_CODE (base) != REG || off < 0)
10994 continue;
10995 if (REGNO (base) != STACK_POINTER_REGNUM
10996 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
10997 continue;
10998
10999 remove_insn (insn);
11000 continue;
11001 }
11002
11003 if (GET_CODE (pat) == PARALLEL
11004 && load_multiple_operation (pat, VOIDmode))
11005 {
11006 set = XVECEXP (pat, 0, 0);
11007 first = REGNO (SET_DEST (set));
11008 last = first + XVECLEN (pat, 0) - 1;
11009 offset = const0_rtx;
11010 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
11011 off = INTVAL (offset);
11012
11013 if (GET_CODE (base) != REG || off < 0)
11014 continue;
11015
11016 if (cfun_frame_layout.first_restore_gpr != -1
11017 && (cfun_frame_layout.first_restore_gpr < first
11018 || cfun_frame_layout.last_restore_gpr > last))
11019 continue;
11020 if (REGNO (base) != STACK_POINTER_REGNUM
11021 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
11022 continue;
11023 if (first > BASE_REGNUM || last < BASE_REGNUM)
11024 continue;
11025
11026 if (cfun_frame_layout.first_restore_gpr != -1)
11027 {
11028 new_insn = restore_gprs (base,
11029 off + (cfun_frame_layout.first_restore_gpr
11030 - first) * UNITS_PER_LONG,
11031 cfun_frame_layout.first_restore_gpr,
11032 cfun_frame_layout.last_restore_gpr);
11033
11034 /* Remove REG_CFA_RESTOREs for registers that we no
11035 longer need to save. */
11036 REG_NOTES (new_insn) = REG_NOTES (insn);
11037 for (rtx *ptr = ®_NOTES (new_insn); *ptr; )
11038 if (REG_NOTE_KIND (*ptr) == REG_CFA_RESTORE
11039 && ((int) REGNO (XEXP (*ptr, 0))
11040 < cfun_frame_layout.first_restore_gpr))
11041 *ptr = XEXP (*ptr, 1);
11042 else
11043 ptr = &XEXP (*ptr, 1);
11044 new_insn = emit_insn_before (new_insn, insn);
11045 RTX_FRAME_RELATED_P (new_insn) = 1;
11046 INSN_ADDRESSES_NEW (new_insn, -1);
11047 }
11048
11049 remove_insn (insn);
11050 continue;
11051 }
11052
11053 if (cfun_frame_layout.first_restore_gpr == -1
11054 && GET_CODE (pat) == SET
11055 && GENERAL_REG_P (SET_DEST (pat))
11056 && GET_CODE (SET_SRC (pat)) == MEM)
11057 {
11058 set = pat;
11059 first = REGNO (SET_DEST (set));
11060 offset = const0_rtx;
11061 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
11062 off = INTVAL (offset);
11063
11064 if (GET_CODE (base) != REG || off < 0)
11065 continue;
11066
11067 if (REGNO (base) != STACK_POINTER_REGNUM
11068 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
11069 continue;
11070
11071 remove_insn (insn);
11072 continue;
11073 }
11074 }
11075 }
11076
11077 /* On z10 and later the dynamic branch prediction must see the
11078 backward jump within a certain windows. If not it falls back to
11079 the static prediction. This function rearranges the loop backward
11080 branch in a way which makes the static prediction always correct.
11081 The function returns true if it added an instruction. */
11082 static bool
s390_fix_long_loop_prediction(rtx insn)11083 s390_fix_long_loop_prediction (rtx insn)
11084 {
11085 rtx set = single_set (insn);
11086 rtx code_label, label_ref, new_label;
11087 rtx uncond_jump;
11088 rtx cur_insn;
11089 rtx tmp;
11090 int distance;
11091
11092 /* This will exclude branch on count and branch on index patterns
11093 since these are correctly statically predicted. */
11094 if (!set
11095 || SET_DEST (set) != pc_rtx
11096 || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
11097 return false;
11098
11099 /* Skip conditional returns. */
11100 if (ANY_RETURN_P (XEXP (SET_SRC (set), 1))
11101 && XEXP (SET_SRC (set), 2) == pc_rtx)
11102 return false;
11103
11104 label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
11105 XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
11106
11107 gcc_assert (GET_CODE (label_ref) == LABEL_REF);
11108
11109 code_label = XEXP (label_ref, 0);
11110
11111 if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
11112 || INSN_ADDRESSES (INSN_UID (insn)) == -1
11113 || (INSN_ADDRESSES (INSN_UID (insn))
11114 - INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE))
11115 return false;
11116
11117 for (distance = 0, cur_insn = PREV_INSN (insn);
11118 distance < PREDICT_DISTANCE - 6;
11119 distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
11120 if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
11121 return false;
11122
11123 new_label = gen_label_rtx ();
11124 uncond_jump = emit_jump_insn_after (
11125 gen_rtx_SET (VOIDmode, pc_rtx,
11126 gen_rtx_LABEL_REF (VOIDmode, code_label)),
11127 insn);
11128 emit_label_after (new_label, uncond_jump);
11129
11130 tmp = XEXP (SET_SRC (set), 1);
11131 XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
11132 XEXP (SET_SRC (set), 2) = tmp;
11133 INSN_CODE (insn) = -1;
11134
11135 XEXP (label_ref, 0) = new_label;
11136 JUMP_LABEL (insn) = new_label;
11137 JUMP_LABEL (uncond_jump) = code_label;
11138
11139 return true;
11140 }
11141
11142 /* Returns 1 if INSN reads the value of REG for purposes not related
11143 to addressing of memory, and 0 otherwise. */
11144 static int
s390_non_addr_reg_read_p(rtx reg,rtx insn)11145 s390_non_addr_reg_read_p (rtx reg, rtx insn)
11146 {
11147 return reg_referenced_p (reg, PATTERN (insn))
11148 && !reg_used_in_mem_p (REGNO (reg), PATTERN (insn));
11149 }
11150
11151 /* Starting from INSN find_cond_jump looks downwards in the insn
11152 stream for a single jump insn which is the last user of the
11153 condition code set in INSN. */
11154 static rtx
find_cond_jump(rtx insn)11155 find_cond_jump (rtx insn)
11156 {
11157 for (; insn; insn = NEXT_INSN (insn))
11158 {
11159 rtx ite, cc;
11160
11161 if (LABEL_P (insn))
11162 break;
11163
11164 if (!JUMP_P (insn))
11165 {
11166 if (reg_mentioned_p (gen_rtx_REG (CCmode, CC_REGNUM), insn))
11167 break;
11168 continue;
11169 }
11170
11171 /* This will be triggered by a return. */
11172 if (GET_CODE (PATTERN (insn)) != SET)
11173 break;
11174
11175 gcc_assert (SET_DEST (PATTERN (insn)) == pc_rtx);
11176 ite = SET_SRC (PATTERN (insn));
11177
11178 if (GET_CODE (ite) != IF_THEN_ELSE)
11179 break;
11180
11181 cc = XEXP (XEXP (ite, 0), 0);
11182 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)))
11183 break;
11184
11185 if (find_reg_note (insn, REG_DEAD, cc))
11186 return insn;
11187 break;
11188 }
11189
11190 return NULL_RTX;
11191 }
11192
11193 /* Swap the condition in COND and the operands in OP0 and OP1 so that
11194 the semantics does not change. If NULL_RTX is passed as COND the
11195 function tries to find the conditional jump starting with INSN. */
11196 static void
s390_swap_cmp(rtx cond,rtx * op0,rtx * op1,rtx insn)11197 s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx insn)
11198 {
11199 rtx tmp = *op0;
11200
11201 if (cond == NULL_RTX)
11202 {
11203 rtx jump = find_cond_jump (NEXT_INSN (insn));
11204 jump = jump ? single_set (jump) : NULL_RTX;
11205
11206 if (jump == NULL_RTX)
11207 return;
11208
11209 cond = XEXP (XEXP (jump, 1), 0);
11210 }
11211
11212 *op0 = *op1;
11213 *op1 = tmp;
11214 PUT_CODE (cond, swap_condition (GET_CODE (cond)));
11215 }
11216
11217 /* On z10, instructions of the compare-and-branch family have the
11218 property to access the register occurring as second operand with
11219 its bits complemented. If such a compare is grouped with a second
11220 instruction that accesses the same register non-complemented, and
11221 if that register's value is delivered via a bypass, then the
11222 pipeline recycles, thereby causing significant performance decline.
11223 This function locates such situations and exchanges the two
11224 operands of the compare. The function return true whenever it
11225 added an insn. */
11226 static bool
s390_z10_optimize_cmp(rtx insn)11227 s390_z10_optimize_cmp (rtx insn)
11228 {
11229 rtx prev_insn, next_insn;
11230 bool insn_added_p = false;
11231 rtx cond, *op0, *op1;
11232
11233 if (GET_CODE (PATTERN (insn)) == PARALLEL)
11234 {
11235 /* Handle compare and branch and branch on count
11236 instructions. */
11237 rtx pattern = single_set (insn);
11238
11239 if (!pattern
11240 || SET_DEST (pattern) != pc_rtx
11241 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
11242 return false;
11243
11244 cond = XEXP (SET_SRC (pattern), 0);
11245 op0 = &XEXP (cond, 0);
11246 op1 = &XEXP (cond, 1);
11247 }
11248 else if (GET_CODE (PATTERN (insn)) == SET)
11249 {
11250 rtx src, dest;
11251
11252 /* Handle normal compare instructions. */
11253 src = SET_SRC (PATTERN (insn));
11254 dest = SET_DEST (PATTERN (insn));
11255
11256 if (!REG_P (dest)
11257 || !CC_REGNO_P (REGNO (dest))
11258 || GET_CODE (src) != COMPARE)
11259 return false;
11260
11261 /* s390_swap_cmp will try to find the conditional
11262 jump when passing NULL_RTX as condition. */
11263 cond = NULL_RTX;
11264 op0 = &XEXP (src, 0);
11265 op1 = &XEXP (src, 1);
11266 }
11267 else
11268 return false;
11269
11270 if (!REG_P (*op0) || !REG_P (*op1))
11271 return false;
11272
11273 if (GET_MODE_CLASS (GET_MODE (*op0)) != MODE_INT)
11274 return false;
11275
11276 /* Swap the COMPARE arguments and its mask if there is a
11277 conflicting access in the previous insn. */
11278 prev_insn = prev_active_insn (insn);
11279 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
11280 && reg_referenced_p (*op1, PATTERN (prev_insn)))
11281 s390_swap_cmp (cond, op0, op1, insn);
11282
11283 /* Check if there is a conflict with the next insn. If there
11284 was no conflict with the previous insn, then swap the
11285 COMPARE arguments and its mask. If we already swapped
11286 the operands, or if swapping them would cause a conflict
11287 with the previous insn, issue a NOP after the COMPARE in
11288 order to separate the two instuctions. */
11289 next_insn = next_active_insn (insn);
11290 if (next_insn != NULL_RTX && INSN_P (next_insn)
11291 && s390_non_addr_reg_read_p (*op1, next_insn))
11292 {
11293 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
11294 && s390_non_addr_reg_read_p (*op0, prev_insn))
11295 {
11296 if (REGNO (*op1) == 0)
11297 emit_insn_after (gen_nop1 (), insn);
11298 else
11299 emit_insn_after (gen_nop (), insn);
11300 insn_added_p = true;
11301 }
11302 else
11303 s390_swap_cmp (cond, op0, op1, insn);
11304 }
11305 return insn_added_p;
11306 }
11307
11308 /* Perform machine-dependent processing. */
11309
11310 static void
s390_reorg(void)11311 s390_reorg (void)
11312 {
11313 bool pool_overflow = false;
11314
11315 /* Make sure all splits have been performed; splits after
11316 machine_dependent_reorg might confuse insn length counts. */
11317 split_all_insns_noflow ();
11318
11319 /* Install the main literal pool and the associated base
11320 register load insns.
11321
11322 In addition, there are two problematic situations we need
11323 to correct:
11324
11325 - the literal pool might be > 4096 bytes in size, so that
11326 some of its elements cannot be directly accessed
11327
11328 - a branch target might be > 64K away from the branch, so that
11329 it is not possible to use a PC-relative instruction.
11330
11331 To fix those, we split the single literal pool into multiple
11332 pool chunks, reloading the pool base register at various
11333 points throughout the function to ensure it always points to
11334 the pool chunk the following code expects, and / or replace
11335 PC-relative branches by absolute branches.
11336
11337 However, the two problems are interdependent: splitting the
11338 literal pool can move a branch further away from its target,
11339 causing the 64K limit to overflow, and on the other hand,
11340 replacing a PC-relative branch by an absolute branch means
11341 we need to put the branch target address into the literal
11342 pool, possibly causing it to overflow.
11343
11344 So, we loop trying to fix up both problems until we manage
11345 to satisfy both conditions at the same time. Note that the
11346 loop is guaranteed to terminate as every pass of the loop
11347 strictly decreases the total number of PC-relative branches
11348 in the function. (This is not completely true as there
11349 might be branch-over-pool insns introduced by chunkify_start.
11350 Those never need to be split however.) */
11351
11352 for (;;)
11353 {
11354 struct constant_pool *pool = NULL;
11355
11356 /* Collect the literal pool. */
11357 if (!pool_overflow)
11358 {
11359 pool = s390_mainpool_start ();
11360 if (!pool)
11361 pool_overflow = true;
11362 }
11363
11364 /* If literal pool overflowed, start to chunkify it. */
11365 if (pool_overflow)
11366 pool = s390_chunkify_start ();
11367
11368 /* Split out-of-range branches. If this has created new
11369 literal pool entries, cancel current chunk list and
11370 recompute it. zSeries machines have large branch
11371 instructions, so we never need to split a branch. */
11372 if (!TARGET_CPU_ZARCH && s390_split_branches ())
11373 {
11374 if (pool_overflow)
11375 s390_chunkify_cancel (pool);
11376 else
11377 s390_mainpool_cancel (pool);
11378
11379 continue;
11380 }
11381
11382 /* If we made it up to here, both conditions are satisfied.
11383 Finish up literal pool related changes. */
11384 if (pool_overflow)
11385 s390_chunkify_finish (pool);
11386 else
11387 s390_mainpool_finish (pool);
11388
11389 /* We're done splitting branches. */
11390 cfun->machine->split_branches_pending_p = false;
11391 break;
11392 }
11393
11394 /* Generate out-of-pool execute target insns. */
11395 if (TARGET_CPU_ZARCH)
11396 {
11397 rtx insn, label, target;
11398
11399 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
11400 {
11401 label = s390_execute_label (insn);
11402 if (!label)
11403 continue;
11404
11405 gcc_assert (label != const0_rtx);
11406
11407 target = emit_label (XEXP (label, 0));
11408 INSN_ADDRESSES_NEW (target, -1);
11409
11410 target = emit_insn (s390_execute_target (insn));
11411 INSN_ADDRESSES_NEW (target, -1);
11412 }
11413 }
11414
11415 /* Try to optimize prologue and epilogue further. */
11416 s390_optimize_prologue ();
11417
11418 /* Walk over the insns and do some >=z10 specific changes. */
11419 if (s390_tune == PROCESSOR_2097_Z10
11420 || s390_tune == PROCESSOR_2817_Z196
11421 || s390_tune == PROCESSOR_2827_ZEC12)
11422 {
11423 rtx insn;
11424 bool insn_added_p = false;
11425
11426 /* The insn lengths and addresses have to be up to date for the
11427 following manipulations. */
11428 shorten_branches (get_insns ());
11429
11430 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
11431 {
11432 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
11433 continue;
11434
11435 if (JUMP_P (insn))
11436 insn_added_p |= s390_fix_long_loop_prediction (insn);
11437
11438 if ((GET_CODE (PATTERN (insn)) == PARALLEL
11439 || GET_CODE (PATTERN (insn)) == SET)
11440 && s390_tune == PROCESSOR_2097_Z10)
11441 insn_added_p |= s390_z10_optimize_cmp (insn);
11442 }
11443
11444 /* Adjust branches if we added new instructions. */
11445 if (insn_added_p)
11446 shorten_branches (get_insns ());
11447 }
11448 }
11449
11450 /* Return true if INSN is a fp load insn writing register REGNO. */
11451 static inline bool
s390_fpload_toreg(rtx insn,unsigned int regno)11452 s390_fpload_toreg (rtx insn, unsigned int regno)
11453 {
11454 rtx set;
11455 enum attr_type flag = s390_safe_attr_type (insn);
11456
11457 if (flag != TYPE_FLOADSF && flag != TYPE_FLOADDF)
11458 return false;
11459
11460 set = single_set (insn);
11461
11462 if (set == NULL_RTX)
11463 return false;
11464
11465 if (!REG_P (SET_DEST (set)) || !MEM_P (SET_SRC (set)))
11466 return false;
11467
11468 if (REGNO (SET_DEST (set)) != regno)
11469 return false;
11470
11471 return true;
11472 }
11473
11474 /* This value describes the distance to be avoided between an
11475 aritmetic fp instruction and an fp load writing the same register.
11476 Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
11477 fine but the exact value has to be avoided. Otherwise the FP
11478 pipeline will throw an exception causing a major penalty. */
11479 #define Z10_EARLYLOAD_DISTANCE 7
11480
11481 /* Rearrange the ready list in order to avoid the situation described
11482 for Z10_EARLYLOAD_DISTANCE. A problematic load instruction is
11483 moved to the very end of the ready list. */
11484 static void
s390_z10_prevent_earlyload_conflicts(rtx * ready,int * nready_p)11485 s390_z10_prevent_earlyload_conflicts (rtx *ready, int *nready_p)
11486 {
11487 unsigned int regno;
11488 int nready = *nready_p;
11489 rtx tmp;
11490 int i;
11491 rtx insn;
11492 rtx set;
11493 enum attr_type flag;
11494 int distance;
11495
11496 /* Skip DISTANCE - 1 active insns. */
11497 for (insn = last_scheduled_insn, distance = Z10_EARLYLOAD_DISTANCE - 1;
11498 distance > 0 && insn != NULL_RTX;
11499 distance--, insn = prev_active_insn (insn))
11500 if (CALL_P (insn) || JUMP_P (insn))
11501 return;
11502
11503 if (insn == NULL_RTX)
11504 return;
11505
11506 set = single_set (insn);
11507
11508 if (set == NULL_RTX || !REG_P (SET_DEST (set))
11509 || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) != MODE_FLOAT)
11510 return;
11511
11512 flag = s390_safe_attr_type (insn);
11513
11514 if (flag == TYPE_FLOADSF || flag == TYPE_FLOADDF)
11515 return;
11516
11517 regno = REGNO (SET_DEST (set));
11518 i = nready - 1;
11519
11520 while (!s390_fpload_toreg (ready[i], regno) && i > 0)
11521 i--;
11522
11523 if (!i)
11524 return;
11525
11526 tmp = ready[i];
11527 memmove (&ready[1], &ready[0], sizeof (rtx) * i);
11528 ready[0] = tmp;
11529 }
11530
11531
11532 /* The s390_sched_state variable tracks the state of the current or
11533 the last instruction group.
11534
11535 0,1,2 number of instructions scheduled in the current group
11536 3 the last group is complete - normal insns
11537 4 the last group was a cracked/expanded insn */
11538
11539 static int s390_sched_state;
11540
11541 #define S390_OOO_SCHED_STATE_NORMAL 3
11542 #define S390_OOO_SCHED_STATE_CRACKED 4
11543
11544 #define S390_OOO_SCHED_ATTR_MASK_CRACKED 0x1
11545 #define S390_OOO_SCHED_ATTR_MASK_EXPANDED 0x2
11546 #define S390_OOO_SCHED_ATTR_MASK_ENDGROUP 0x4
11547 #define S390_OOO_SCHED_ATTR_MASK_GROUPALONE 0x8
11548
11549 static unsigned int
s390_get_sched_attrmask(rtx insn)11550 s390_get_sched_attrmask (rtx insn)
11551 {
11552 unsigned int mask = 0;
11553
11554 if (get_attr_ooo_cracked (insn))
11555 mask |= S390_OOO_SCHED_ATTR_MASK_CRACKED;
11556 if (get_attr_ooo_expanded (insn))
11557 mask |= S390_OOO_SCHED_ATTR_MASK_EXPANDED;
11558 if (get_attr_ooo_endgroup (insn))
11559 mask |= S390_OOO_SCHED_ATTR_MASK_ENDGROUP;
11560 if (get_attr_ooo_groupalone (insn))
11561 mask |= S390_OOO_SCHED_ATTR_MASK_GROUPALONE;
11562 return mask;
11563 }
11564
11565 /* Return the scheduling score for INSN. The higher the score the
11566 better. The score is calculated from the OOO scheduling attributes
11567 of INSN and the scheduling state s390_sched_state. */
11568 static int
s390_sched_score(rtx insn)11569 s390_sched_score (rtx insn)
11570 {
11571 unsigned int mask = s390_get_sched_attrmask (insn);
11572 int score = 0;
11573
11574 switch (s390_sched_state)
11575 {
11576 case 0:
11577 /* Try to put insns into the first slot which would otherwise
11578 break a group. */
11579 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
11580 || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
11581 score += 5;
11582 if ((mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
11583 score += 10;
11584 case 1:
11585 /* Prefer not cracked insns while trying to put together a
11586 group. */
11587 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
11588 && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0
11589 && (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) == 0)
11590 score += 10;
11591 if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) == 0)
11592 score += 5;
11593 break;
11594 case 2:
11595 /* Prefer not cracked insns while trying to put together a
11596 group. */
11597 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
11598 && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0
11599 && (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) == 0)
11600 score += 10;
11601 /* Prefer endgroup insns in the last slot. */
11602 if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) != 0)
11603 score += 10;
11604 break;
11605 case S390_OOO_SCHED_STATE_NORMAL:
11606 /* Prefer not cracked insns if the last was not cracked. */
11607 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
11608 && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0)
11609 score += 5;
11610 if ((mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
11611 score += 10;
11612 break;
11613 case S390_OOO_SCHED_STATE_CRACKED:
11614 /* Try to keep cracked insns together to prevent them from
11615 interrupting groups. */
11616 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
11617 || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
11618 score += 5;
11619 break;
11620 }
11621 return score;
11622 }
11623
11624 /* This function is called via hook TARGET_SCHED_REORDER before
11625 issuing one insn from list READY which contains *NREADYP entries.
11626 For target z10 it reorders load instructions to avoid early load
11627 conflicts in the floating point pipeline */
11628 static int
s390_sched_reorder(FILE * file,int verbose,rtx * ready,int * nreadyp,int clock ATTRIBUTE_UNUSED)11629 s390_sched_reorder (FILE *file, int verbose,
11630 rtx *ready, int *nreadyp, int clock ATTRIBUTE_UNUSED)
11631 {
11632 if (s390_tune == PROCESSOR_2097_Z10)
11633 if (reload_completed && *nreadyp > 1)
11634 s390_z10_prevent_earlyload_conflicts (ready, nreadyp);
11635
11636 if (s390_tune == PROCESSOR_2827_ZEC12
11637 && reload_completed
11638 && *nreadyp > 1)
11639 {
11640 int i;
11641 int last_index = *nreadyp - 1;
11642 int max_index = -1;
11643 int max_score = -1;
11644 rtx tmp;
11645
11646 /* Just move the insn with the highest score to the top (the
11647 end) of the list. A full sort is not needed since a conflict
11648 in the hazard recognition cannot happen. So the top insn in
11649 the ready list will always be taken. */
11650 for (i = last_index; i >= 0; i--)
11651 {
11652 int score;
11653
11654 if (recog_memoized (ready[i]) < 0)
11655 continue;
11656
11657 score = s390_sched_score (ready[i]);
11658 if (score > max_score)
11659 {
11660 max_score = score;
11661 max_index = i;
11662 }
11663 }
11664
11665 if (max_index != -1)
11666 {
11667 if (max_index != last_index)
11668 {
11669 tmp = ready[max_index];
11670 ready[max_index] = ready[last_index];
11671 ready[last_index] = tmp;
11672
11673 if (verbose > 5)
11674 fprintf (file,
11675 "move insn %d to the top of list\n",
11676 INSN_UID (ready[last_index]));
11677 }
11678 else if (verbose > 5)
11679 fprintf (file,
11680 "best insn %d already on top\n",
11681 INSN_UID (ready[last_index]));
11682 }
11683
11684 if (verbose > 5)
11685 {
11686 fprintf (file, "ready list ooo attributes - sched state: %d\n",
11687 s390_sched_state);
11688
11689 for (i = last_index; i >= 0; i--)
11690 {
11691 if (recog_memoized (ready[i]) < 0)
11692 continue;
11693 fprintf (file, "insn %d score: %d: ", INSN_UID (ready[i]),
11694 s390_sched_score (ready[i]));
11695 #define PRINT_OOO_ATTR(ATTR) fprintf (file, "%s ", get_attr_##ATTR (ready[i]) ? #ATTR : "!" #ATTR);
11696 PRINT_OOO_ATTR (ooo_cracked);
11697 PRINT_OOO_ATTR (ooo_expanded);
11698 PRINT_OOO_ATTR (ooo_endgroup);
11699 PRINT_OOO_ATTR (ooo_groupalone);
11700 #undef PRINT_OOO_ATTR
11701 fprintf (file, "\n");
11702 }
11703 }
11704 }
11705
11706 return s390_issue_rate ();
11707 }
11708
11709
11710 /* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
11711 the scheduler has issued INSN. It stores the last issued insn into
11712 last_scheduled_insn in order to make it available for
11713 s390_sched_reorder. */
11714 static int
s390_sched_variable_issue(FILE * file,int verbose,rtx insn,int more)11715 s390_sched_variable_issue (FILE *file, int verbose, rtx insn, int more)
11716 {
11717 last_scheduled_insn = insn;
11718
11719 if (s390_tune == PROCESSOR_2827_ZEC12
11720 && reload_completed
11721 && recog_memoized (insn) >= 0)
11722 {
11723 unsigned int mask = s390_get_sched_attrmask (insn);
11724
11725 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
11726 || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
11727 s390_sched_state = S390_OOO_SCHED_STATE_CRACKED;
11728 else if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) != 0
11729 || (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
11730 s390_sched_state = S390_OOO_SCHED_STATE_NORMAL;
11731 else
11732 {
11733 /* Only normal insns are left (mask == 0). */
11734 switch (s390_sched_state)
11735 {
11736 case 0:
11737 case 1:
11738 case 2:
11739 case S390_OOO_SCHED_STATE_NORMAL:
11740 if (s390_sched_state == S390_OOO_SCHED_STATE_NORMAL)
11741 s390_sched_state = 1;
11742 else
11743 s390_sched_state++;
11744
11745 break;
11746 case S390_OOO_SCHED_STATE_CRACKED:
11747 s390_sched_state = S390_OOO_SCHED_STATE_NORMAL;
11748 break;
11749 }
11750 }
11751 if (verbose > 5)
11752 {
11753 fprintf (file, "insn %d: ", INSN_UID (insn));
11754 #define PRINT_OOO_ATTR(ATTR) \
11755 fprintf (file, "%s ", get_attr_##ATTR (insn) ? #ATTR : "");
11756 PRINT_OOO_ATTR (ooo_cracked);
11757 PRINT_OOO_ATTR (ooo_expanded);
11758 PRINT_OOO_ATTR (ooo_endgroup);
11759 PRINT_OOO_ATTR (ooo_groupalone);
11760 #undef PRINT_OOO_ATTR
11761 fprintf (file, "\n");
11762 fprintf (file, "sched state: %d\n", s390_sched_state);
11763 }
11764 }
11765
11766 if (GET_CODE (PATTERN (insn)) != USE
11767 && GET_CODE (PATTERN (insn)) != CLOBBER)
11768 return more - 1;
11769 else
11770 return more;
11771 }
11772
11773 static void
s390_sched_init(FILE * file ATTRIBUTE_UNUSED,int verbose ATTRIBUTE_UNUSED,int max_ready ATTRIBUTE_UNUSED)11774 s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
11775 int verbose ATTRIBUTE_UNUSED,
11776 int max_ready ATTRIBUTE_UNUSED)
11777 {
11778 last_scheduled_insn = NULL_RTX;
11779 s390_sched_state = 0;
11780 }
11781
11782 /* This function checks the whole of insn X for memory references. The
11783 function always returns zero because the framework it is called
11784 from would stop recursively analyzing the insn upon a return value
11785 other than zero. The real result of this function is updating
11786 counter variable MEM_COUNT. */
11787 static int
check_dpu(rtx * x,unsigned * mem_count)11788 check_dpu (rtx *x, unsigned *mem_count)
11789 {
11790 if (*x != NULL_RTX && MEM_P (*x))
11791 (*mem_count)++;
11792 return 0;
11793 }
11794
11795 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
11796 a new number struct loop *loop should be unrolled if tuned for cpus with
11797 a built-in stride prefetcher.
11798 The loop is analyzed for memory accesses by calling check_dpu for
11799 each rtx of the loop. Depending on the loop_depth and the amount of
11800 memory accesses a new number <=nunroll is returned to improve the
11801 behaviour of the hardware prefetch unit. */
11802 static unsigned
s390_loop_unroll_adjust(unsigned nunroll,struct loop * loop)11803 s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
11804 {
11805 basic_block *bbs;
11806 rtx insn;
11807 unsigned i;
11808 unsigned mem_count = 0;
11809
11810 if (s390_tune != PROCESSOR_2097_Z10
11811 && s390_tune != PROCESSOR_2817_Z196
11812 && s390_tune != PROCESSOR_2827_ZEC12)
11813 return nunroll;
11814
11815 /* Count the number of memory references within the loop body. */
11816 bbs = get_loop_body (loop);
11817 for (i = 0; i < loop->num_nodes; i++)
11818 {
11819 for (insn = BB_HEAD (bbs[i]); insn != BB_END (bbs[i]); insn = NEXT_INSN (insn))
11820 if (INSN_P (insn) && INSN_CODE (insn) != -1)
11821 for_each_rtx (&insn, (rtx_function) check_dpu, &mem_count);
11822 }
11823 free (bbs);
11824
11825 /* Prevent division by zero, and we do not need to adjust nunroll in this case. */
11826 if (mem_count == 0)
11827 return nunroll;
11828
11829 switch (loop_depth(loop))
11830 {
11831 case 1:
11832 return MIN (nunroll, 28 / mem_count);
11833 case 2:
11834 return MIN (nunroll, 22 / mem_count);
11835 default:
11836 return MIN (nunroll, 16 / mem_count);
11837 }
11838 }
11839
11840 static void
s390_option_override(void)11841 s390_option_override (void)
11842 {
11843 unsigned int i;
11844 cl_deferred_option *opt;
11845 vec<cl_deferred_option> *v =
11846 (vec<cl_deferred_option> *) s390_deferred_options;
11847
11848 if (v)
11849 FOR_EACH_VEC_ELT (*v, i, opt)
11850 {
11851 switch (opt->opt_index)
11852 {
11853 case OPT_mhotpatch:
11854 s390_hotpatch_trampoline_halfwords = (opt->value) ?
11855 s390_hotpatch_trampoline_halfwords_default : -1;
11856 break;
11857 case OPT_mhotpatch_:
11858 {
11859 int val;
11860
11861 val = integral_argument (opt->arg);
11862 if (val == -1)
11863 {
11864 /* argument is not a plain number */
11865 error ("argument to %qs should be a non-negative integer",
11866 "-mhotpatch=");
11867 break;
11868 }
11869 else if (val > s390_hotpatch_trampoline_halfwords_max)
11870 {
11871 error ("argument to %qs is too large (max. %d)",
11872 "-mhotpatch=", s390_hotpatch_trampoline_halfwords_max);
11873 break;
11874 }
11875 s390_hotpatch_trampoline_halfwords = val;
11876 break;
11877 }
11878 default:
11879 gcc_unreachable ();
11880 }
11881 }
11882
11883 /* Set up function hooks. */
11884 init_machine_status = s390_init_machine_status;
11885
11886 /* Architecture mode defaults according to ABI. */
11887 if (!(target_flags_explicit & MASK_ZARCH))
11888 {
11889 if (TARGET_64BIT)
11890 target_flags |= MASK_ZARCH;
11891 else
11892 target_flags &= ~MASK_ZARCH;
11893 }
11894
11895 /* Set the march default in case it hasn't been specified on
11896 cmdline. */
11897 if (s390_arch == PROCESSOR_max)
11898 {
11899 s390_arch_string = TARGET_ZARCH? "z900" : "g5";
11900 s390_arch = TARGET_ZARCH ? PROCESSOR_2064_Z900 : PROCESSOR_9672_G5;
11901 s390_arch_flags = processor_flags_table[(int)s390_arch];
11902 }
11903
11904 /* Determine processor to tune for. */
11905 if (s390_tune == PROCESSOR_max)
11906 {
11907 s390_tune = s390_arch;
11908 s390_tune_flags = s390_arch_flags;
11909 }
11910
11911 /* Sanity checks. */
11912 if (TARGET_ZARCH && !TARGET_CPU_ZARCH)
11913 error ("z/Architecture mode not supported on %s", s390_arch_string);
11914 if (TARGET_64BIT && !TARGET_ZARCH)
11915 error ("64-bit ABI not supported in ESA/390 mode");
11916
11917 /* Use hardware DFP if available and not explicitly disabled by
11918 user. E.g. with -m31 -march=z10 -mzarch */
11919 if (!(target_flags_explicit & MASK_HARD_DFP) && TARGET_DFP)
11920 target_flags |= MASK_HARD_DFP;
11921
11922 /* Enable hardware transactions if available and not explicitly
11923 disabled by user. E.g. with -m31 -march=zEC12 -mzarch */
11924 if (!(target_flags_explicit & MASK_OPT_HTM) && TARGET_CPU_HTM && TARGET_ZARCH)
11925 target_flags |= MASK_OPT_HTM;
11926
11927 if (TARGET_HARD_DFP && !TARGET_DFP)
11928 {
11929 if (target_flags_explicit & MASK_HARD_DFP)
11930 {
11931 if (!TARGET_CPU_DFP)
11932 error ("hardware decimal floating point instructions"
11933 " not available on %s", s390_arch_string);
11934 if (!TARGET_ZARCH)
11935 error ("hardware decimal floating point instructions"
11936 " not available in ESA/390 mode");
11937 }
11938 else
11939 target_flags &= ~MASK_HARD_DFP;
11940 }
11941
11942 if ((target_flags_explicit & MASK_SOFT_FLOAT) && TARGET_SOFT_FLOAT)
11943 {
11944 if ((target_flags_explicit & MASK_HARD_DFP) && TARGET_HARD_DFP)
11945 error ("-mhard-dfp can%'t be used in conjunction with -msoft-float");
11946
11947 target_flags &= ~MASK_HARD_DFP;
11948 }
11949
11950 /* Set processor cost function. */
11951 switch (s390_tune)
11952 {
11953 case PROCESSOR_2084_Z990:
11954 s390_cost = &z990_cost;
11955 break;
11956 case PROCESSOR_2094_Z9_109:
11957 s390_cost = &z9_109_cost;
11958 break;
11959 case PROCESSOR_2097_Z10:
11960 s390_cost = &z10_cost;
11961 break;
11962 case PROCESSOR_2817_Z196:
11963 s390_cost = &z196_cost;
11964 break;
11965 case PROCESSOR_2827_ZEC12:
11966 s390_cost = &zEC12_cost;
11967 break;
11968 default:
11969 s390_cost = &z900_cost;
11970 }
11971
11972 if (TARGET_BACKCHAIN && TARGET_PACKED_STACK && TARGET_HARD_FLOAT)
11973 error ("-mbackchain -mpacked-stack -mhard-float are not supported "
11974 "in combination");
11975
11976 if (s390_stack_size)
11977 {
11978 if (s390_stack_guard >= s390_stack_size)
11979 error ("stack size must be greater than the stack guard value");
11980 else if (s390_stack_size > 1 << 16)
11981 error ("stack size must not be greater than 64k");
11982 }
11983 else if (s390_stack_guard)
11984 error ("-mstack-guard implies use of -mstack-size");
11985
11986 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
11987 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
11988 target_flags |= MASK_LONG_DOUBLE_128;
11989 #endif
11990
11991 if (s390_tune == PROCESSOR_2097_Z10
11992 || s390_tune == PROCESSOR_2817_Z196
11993 || s390_tune == PROCESSOR_2827_ZEC12)
11994 {
11995 maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 100,
11996 global_options.x_param_values,
11997 global_options_set.x_param_values);
11998 maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 32,
11999 global_options.x_param_values,
12000 global_options_set.x_param_values);
12001 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 2000,
12002 global_options.x_param_values,
12003 global_options_set.x_param_values);
12004 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 64,
12005 global_options.x_param_values,
12006 global_options_set.x_param_values);
12007 }
12008
12009 maybe_set_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 256,
12010 global_options.x_param_values,
12011 global_options_set.x_param_values);
12012 /* values for loop prefetching */
12013 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, 256,
12014 global_options.x_param_values,
12015 global_options_set.x_param_values);
12016 maybe_set_param_value (PARAM_L1_CACHE_SIZE, 128,
12017 global_options.x_param_values,
12018 global_options_set.x_param_values);
12019 /* s390 has more than 2 levels and the size is much larger. Since
12020 we are always running virtualized assume that we only get a small
12021 part of the caches above l1. */
12022 maybe_set_param_value (PARAM_L2_CACHE_SIZE, 1500,
12023 global_options.x_param_values,
12024 global_options_set.x_param_values);
12025 maybe_set_param_value (PARAM_PREFETCH_MIN_INSN_TO_MEM_RATIO, 2,
12026 global_options.x_param_values,
12027 global_options_set.x_param_values);
12028 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 6,
12029 global_options.x_param_values,
12030 global_options_set.x_param_values);
12031
12032 /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
12033 requires the arch flags to be evaluated already. Since prefetching
12034 is beneficial on s390, we enable it if available. */
12035 if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3)
12036 flag_prefetch_loop_arrays = 1;
12037
12038 /* Use the alternative scheduling-pressure algorithm by default. */
12039 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
12040 global_options.x_param_values,
12041 global_options_set.x_param_values);
12042
12043 if (TARGET_TPF)
12044 {
12045 /* Don't emit DWARF3/4 unless specifically selected. The TPF
12046 debuggers do not yet support DWARF 3/4. */
12047 if (!global_options_set.x_dwarf_strict)
12048 dwarf_strict = 1;
12049 if (!global_options_set.x_dwarf_version)
12050 dwarf_version = 2;
12051 }
12052
12053 /* Register a target-specific optimization-and-lowering pass
12054 to run immediately before prologue and epilogue generation.
12055
12056 Registering the pass must be done at start up. It's
12057 convenient to do it here. */
12058 opt_pass *new_pass = new pass_s390_early_mach (g);
12059 struct register_pass_info insert_pass_s390_early_mach =
12060 {
12061 new_pass, /* pass */
12062 "pro_and_epilogue", /* reference_pass_name */
12063 1, /* ref_pass_instance_number */
12064 PASS_POS_INSERT_BEFORE /* po_op */
12065 };
12066 register_pass (&insert_pass_s390_early_mach);
12067 }
12068
12069 /* Initialize GCC target structure. */
12070
12071 #undef TARGET_ASM_ALIGNED_HI_OP
12072 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
12073 #undef TARGET_ASM_ALIGNED_DI_OP
12074 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
12075 #undef TARGET_ASM_INTEGER
12076 #define TARGET_ASM_INTEGER s390_assemble_integer
12077
12078 #undef TARGET_ASM_OPEN_PAREN
12079 #define TARGET_ASM_OPEN_PAREN ""
12080
12081 #undef TARGET_ASM_CLOSE_PAREN
12082 #define TARGET_ASM_CLOSE_PAREN ""
12083
12084 #undef TARGET_OPTION_OVERRIDE
12085 #define TARGET_OPTION_OVERRIDE s390_option_override
12086
12087 #undef TARGET_ENCODE_SECTION_INFO
12088 #define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
12089
12090 #undef TARGET_SCALAR_MODE_SUPPORTED_P
12091 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
12092
12093 #ifdef HAVE_AS_TLS
12094 #undef TARGET_HAVE_TLS
12095 #define TARGET_HAVE_TLS true
12096 #endif
12097 #undef TARGET_CANNOT_FORCE_CONST_MEM
12098 #define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
12099
12100 #undef TARGET_DELEGITIMIZE_ADDRESS
12101 #define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
12102
12103 #undef TARGET_LEGITIMIZE_ADDRESS
12104 #define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
12105
12106 #undef TARGET_RETURN_IN_MEMORY
12107 #define TARGET_RETURN_IN_MEMORY s390_return_in_memory
12108
12109 #undef TARGET_INIT_BUILTINS
12110 #define TARGET_INIT_BUILTINS s390_init_builtins
12111 #undef TARGET_EXPAND_BUILTIN
12112 #define TARGET_EXPAND_BUILTIN s390_expand_builtin
12113
12114 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
12115 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
12116
12117 #undef TARGET_ASM_OUTPUT_MI_THUNK
12118 #define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
12119 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
12120 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
12121
12122 #undef TARGET_SCHED_ADJUST_PRIORITY
12123 #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
12124 #undef TARGET_SCHED_ISSUE_RATE
12125 #define TARGET_SCHED_ISSUE_RATE s390_issue_rate
12126 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
12127 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
12128
12129 #undef TARGET_SCHED_VARIABLE_ISSUE
12130 #define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
12131 #undef TARGET_SCHED_REORDER
12132 #define TARGET_SCHED_REORDER s390_sched_reorder
12133 #undef TARGET_SCHED_INIT
12134 #define TARGET_SCHED_INIT s390_sched_init
12135
12136 #undef TARGET_CANNOT_COPY_INSN_P
12137 #define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
12138 #undef TARGET_RTX_COSTS
12139 #define TARGET_RTX_COSTS s390_rtx_costs
12140 #undef TARGET_ADDRESS_COST
12141 #define TARGET_ADDRESS_COST s390_address_cost
12142 #undef TARGET_REGISTER_MOVE_COST
12143 #define TARGET_REGISTER_MOVE_COST s390_register_move_cost
12144 #undef TARGET_MEMORY_MOVE_COST
12145 #define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
12146
12147 #undef TARGET_MACHINE_DEPENDENT_REORG
12148 #define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
12149
12150 #undef TARGET_VALID_POINTER_MODE
12151 #define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
12152
12153 #undef TARGET_BUILD_BUILTIN_VA_LIST
12154 #define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
12155 #undef TARGET_EXPAND_BUILTIN_VA_START
12156 #define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
12157 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
12158 #define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
12159
12160 #undef TARGET_PROMOTE_FUNCTION_MODE
12161 #define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
12162 #undef TARGET_PASS_BY_REFERENCE
12163 #define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
12164
12165 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
12166 #define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
12167 #undef TARGET_FUNCTION_ARG
12168 #define TARGET_FUNCTION_ARG s390_function_arg
12169 #undef TARGET_FUNCTION_ARG_ADVANCE
12170 #define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
12171 #undef TARGET_FUNCTION_VALUE
12172 #define TARGET_FUNCTION_VALUE s390_function_value
12173 #undef TARGET_LIBCALL_VALUE
12174 #define TARGET_LIBCALL_VALUE s390_libcall_value
12175
12176 #undef TARGET_FIXED_CONDITION_CODE_REGS
12177 #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
12178
12179 #undef TARGET_CC_MODES_COMPATIBLE
12180 #define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
12181
12182 #undef TARGET_INVALID_WITHIN_DOLOOP
12183 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_null
12184
12185 #ifdef HAVE_AS_TLS
12186 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
12187 #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
12188 #endif
12189
12190 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
12191 #undef TARGET_MANGLE_TYPE
12192 #define TARGET_MANGLE_TYPE s390_mangle_type
12193 #endif
12194
12195 #undef TARGET_SCALAR_MODE_SUPPORTED_P
12196 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
12197
12198 #undef TARGET_PREFERRED_RELOAD_CLASS
12199 #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
12200
12201 #undef TARGET_SECONDARY_RELOAD
12202 #define TARGET_SECONDARY_RELOAD s390_secondary_reload
12203
12204 #undef TARGET_LIBGCC_CMP_RETURN_MODE
12205 #define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
12206
12207 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
12208 #define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
12209
12210 #undef TARGET_LEGITIMATE_ADDRESS_P
12211 #define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
12212
12213 #undef TARGET_LEGITIMATE_CONSTANT_P
12214 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
12215
12216 #undef TARGET_LRA_P
12217 #define TARGET_LRA_P s390_lra_p
12218
12219 #undef TARGET_CAN_ELIMINATE
12220 #define TARGET_CAN_ELIMINATE s390_can_eliminate
12221
12222 #undef TARGET_CONDITIONAL_REGISTER_USAGE
12223 #define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
12224
12225 #undef TARGET_LOOP_UNROLL_ADJUST
12226 #define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
12227
12228 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
12229 #define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
12230 #undef TARGET_TRAMPOLINE_INIT
12231 #define TARGET_TRAMPOLINE_INIT s390_trampoline_init
12232
12233 #undef TARGET_UNWIND_WORD_MODE
12234 #define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
12235
12236 #undef TARGET_CANONICALIZE_COMPARISON
12237 #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
12238
12239 #undef TARGET_HARD_REGNO_SCRATCH_OK
12240 #define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok
12241
12242 #undef TARGET_ATTRIBUTE_TABLE
12243 #define TARGET_ATTRIBUTE_TABLE s390_attribute_table
12244
12245 #undef TARGET_CAN_INLINE_P
12246 #define TARGET_CAN_INLINE_P s390_can_inline_p
12247
12248 #undef TARGET_SET_UP_BY_PROLOGUE
12249 #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
12250
12251 struct gcc_target targetm = TARGET_INITIALIZER;
12252
12253 #include "gt-s390.h"
12254