1 /* Subroutines used for code generation on IBM S/390 and zSeries
2    Copyright (C) 1999-2018 Free Software Foundation, Inc.
3    Contributed by Hartmut Penner (hpenner@de.ibm.com) and
4                   Ulrich Weigand (uweigand@de.ibm.com) and
5                   Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
6 
7 This file is part of GCC.
8 
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13 
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17 for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3.  If not see
21 <http://www.gnu.org/licenses/>.  */
22 
23 #define IN_TARGET_CODE 1
24 
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "backend.h"
29 #include "target.h"
30 #include "target-globals.h"
31 #include "rtl.h"
32 #include "tree.h"
33 #include "gimple.h"
34 #include "cfghooks.h"
35 #include "cfgloop.h"
36 #include "df.h"
37 #include "memmodel.h"
38 #include "tm_p.h"
39 #include "stringpool.h"
40 #include "attribs.h"
41 #include "expmed.h"
42 #include "optabs.h"
43 #include "regs.h"
44 #include "emit-rtl.h"
45 #include "recog.h"
46 #include "cgraph.h"
47 #include "diagnostic-core.h"
48 #include "diagnostic.h"
49 #include "alias.h"
50 #include "fold-const.h"
51 #include "print-tree.h"
52 #include "stor-layout.h"
53 #include "varasm.h"
54 #include "calls.h"
55 #include "conditions.h"
56 #include "output.h"
57 #include "insn-attr.h"
58 #include "flags.h"
59 #include "except.h"
60 #include "dojump.h"
61 #include "explow.h"
62 #include "stmt.h"
63 #include "expr.h"
64 #include "reload.h"
65 #include "cfgrtl.h"
66 #include "cfganal.h"
67 #include "lcm.h"
68 #include "cfgbuild.h"
69 #include "cfgcleanup.h"
70 #include "debug.h"
71 #include "langhooks.h"
72 #include "internal-fn.h"
73 #include "gimple-fold.h"
74 #include "tree-eh.h"
75 #include "gimplify.h"
76 #include "params.h"
77 #include "opts.h"
78 #include "tree-pass.h"
79 #include "context.h"
80 #include "builtins.h"
81 #include "rtl-iter.h"
82 #include "intl.h"
83 #include "tm-constrs.h"
84 #include "tree-vrp.h"
85 #include "symbol-summary.h"
86 #include "ipa-prop.h"
87 #include "ipa-fnsummary.h"
88 #include "sched-int.h"
89 
90 /* This file should be included last.  */
91 #include "target-def.h"
92 
93 static bool s390_hard_regno_mode_ok (unsigned int, machine_mode);
94 
95 /* Remember the last target of s390_set_current_function.  */
96 static GTY(()) tree s390_previous_fndecl;
97 
98 /* Define the specific costs for a given cpu.  */
99 
100 struct processor_costs
101 {
102   /* multiplication */
103   const int m;        /* cost of an M instruction.  */
104   const int mghi;     /* cost of an MGHI instruction.  */
105   const int mh;       /* cost of an MH instruction.  */
106   const int mhi;      /* cost of an MHI instruction.  */
107   const int ml;       /* cost of an ML instruction.  */
108   const int mr;       /* cost of an MR instruction.  */
109   const int ms;       /* cost of an MS instruction.  */
110   const int msg;      /* cost of an MSG instruction.  */
111   const int msgf;     /* cost of an MSGF instruction.  */
112   const int msgfr;    /* cost of an MSGFR instruction.  */
113   const int msgr;     /* cost of an MSGR instruction.  */
114   const int msr;      /* cost of an MSR instruction.  */
115   const int mult_df;  /* cost of multiplication in DFmode.  */
116   const int mxbr;
117   /* square root */
118   const int sqxbr;    /* cost of square root in TFmode.  */
119   const int sqdbr;    /* cost of square root in DFmode.  */
120   const int sqebr;    /* cost of square root in SFmode.  */
121   /* multiply and add */
122   const int madbr;    /* cost of multiply and add in DFmode.  */
123   const int maebr;    /* cost of multiply and add in SFmode.  */
124   /* division */
125   const int dxbr;
126   const int ddbr;
127   const int debr;
128   const int dlgr;
129   const int dlr;
130   const int dr;
131   const int dsgfr;
132   const int dsgr;
133 };
134 
135 #define s390_cost ((const struct processor_costs *)(s390_cost_pointer))
136 
137 static const
138 struct processor_costs z900_cost =
139 {
140   COSTS_N_INSNS (5),     /* M     */
141   COSTS_N_INSNS (10),    /* MGHI  */
142   COSTS_N_INSNS (5),     /* MH    */
143   COSTS_N_INSNS (4),     /* MHI   */
144   COSTS_N_INSNS (5),     /* ML    */
145   COSTS_N_INSNS (5),     /* MR    */
146   COSTS_N_INSNS (4),     /* MS    */
147   COSTS_N_INSNS (15),    /* MSG   */
148   COSTS_N_INSNS (7),     /* MSGF  */
149   COSTS_N_INSNS (7),     /* MSGFR */
150   COSTS_N_INSNS (10),    /* MSGR  */
151   COSTS_N_INSNS (4),     /* MSR   */
152   COSTS_N_INSNS (7),     /* multiplication in DFmode */
153   COSTS_N_INSNS (13),    /* MXBR */
154   COSTS_N_INSNS (136),   /* SQXBR */
155   COSTS_N_INSNS (44),    /* SQDBR */
156   COSTS_N_INSNS (35),    /* SQEBR */
157   COSTS_N_INSNS (18),    /* MADBR */
158   COSTS_N_INSNS (13),    /* MAEBR */
159   COSTS_N_INSNS (134),   /* DXBR */
160   COSTS_N_INSNS (30),    /* DDBR */
161   COSTS_N_INSNS (27),    /* DEBR */
162   COSTS_N_INSNS (220),   /* DLGR */
163   COSTS_N_INSNS (34),    /* DLR */
164   COSTS_N_INSNS (34),    /* DR */
165   COSTS_N_INSNS (32),    /* DSGFR */
166   COSTS_N_INSNS (32),    /* DSGR */
167 };
168 
169 static const
170 struct processor_costs z990_cost =
171 {
172   COSTS_N_INSNS (4),     /* M     */
173   COSTS_N_INSNS (2),     /* MGHI  */
174   COSTS_N_INSNS (2),     /* MH    */
175   COSTS_N_INSNS (2),     /* MHI   */
176   COSTS_N_INSNS (4),     /* ML    */
177   COSTS_N_INSNS (4),     /* MR    */
178   COSTS_N_INSNS (5),     /* MS    */
179   COSTS_N_INSNS (6),     /* MSG   */
180   COSTS_N_INSNS (4),     /* MSGF  */
181   COSTS_N_INSNS (4),     /* MSGFR */
182   COSTS_N_INSNS (4),     /* MSGR  */
183   COSTS_N_INSNS (4),     /* MSR   */
184   COSTS_N_INSNS (1),     /* multiplication in DFmode */
185   COSTS_N_INSNS (28),    /* MXBR */
186   COSTS_N_INSNS (130),   /* SQXBR */
187   COSTS_N_INSNS (66),    /* SQDBR */
188   COSTS_N_INSNS (38),    /* SQEBR */
189   COSTS_N_INSNS (1),     /* MADBR */
190   COSTS_N_INSNS (1),     /* MAEBR */
191   COSTS_N_INSNS (60),    /* DXBR */
192   COSTS_N_INSNS (40),    /* DDBR */
193   COSTS_N_INSNS (26),    /* DEBR */
194   COSTS_N_INSNS (176),   /* DLGR */
195   COSTS_N_INSNS (31),    /* DLR */
196   COSTS_N_INSNS (31),    /* DR */
197   COSTS_N_INSNS (31),    /* DSGFR */
198   COSTS_N_INSNS (31),    /* DSGR */
199 };
200 
201 static const
202 struct processor_costs z9_109_cost =
203 {
204   COSTS_N_INSNS (4),     /* M     */
205   COSTS_N_INSNS (2),     /* MGHI  */
206   COSTS_N_INSNS (2),     /* MH    */
207   COSTS_N_INSNS (2),     /* MHI   */
208   COSTS_N_INSNS (4),     /* ML    */
209   COSTS_N_INSNS (4),     /* MR    */
210   COSTS_N_INSNS (5),     /* MS    */
211   COSTS_N_INSNS (6),     /* MSG   */
212   COSTS_N_INSNS (4),     /* MSGF  */
213   COSTS_N_INSNS (4),     /* MSGFR */
214   COSTS_N_INSNS (4),     /* MSGR  */
215   COSTS_N_INSNS (4),     /* MSR   */
216   COSTS_N_INSNS (1),     /* multiplication in DFmode */
217   COSTS_N_INSNS (28),    /* MXBR */
218   COSTS_N_INSNS (130),   /* SQXBR */
219   COSTS_N_INSNS (66),    /* SQDBR */
220   COSTS_N_INSNS (38),    /* SQEBR */
221   COSTS_N_INSNS (1),     /* MADBR */
222   COSTS_N_INSNS (1),     /* MAEBR */
223   COSTS_N_INSNS (60),    /* DXBR */
224   COSTS_N_INSNS (40),    /* DDBR */
225   COSTS_N_INSNS (26),    /* DEBR */
226   COSTS_N_INSNS (30),    /* DLGR */
227   COSTS_N_INSNS (23),    /* DLR */
228   COSTS_N_INSNS (23),    /* DR */
229   COSTS_N_INSNS (24),    /* DSGFR */
230   COSTS_N_INSNS (24),    /* DSGR */
231 };
232 
233 static const
234 struct processor_costs z10_cost =
235 {
236   COSTS_N_INSNS (10),    /* M     */
237   COSTS_N_INSNS (10),    /* MGHI  */
238   COSTS_N_INSNS (10),    /* MH    */
239   COSTS_N_INSNS (10),    /* MHI   */
240   COSTS_N_INSNS (10),    /* ML    */
241   COSTS_N_INSNS (10),    /* MR    */
242   COSTS_N_INSNS (10),    /* MS    */
243   COSTS_N_INSNS (10),    /* MSG   */
244   COSTS_N_INSNS (10),    /* MSGF  */
245   COSTS_N_INSNS (10),    /* MSGFR */
246   COSTS_N_INSNS (10),    /* MSGR  */
247   COSTS_N_INSNS (10),    /* MSR   */
248   COSTS_N_INSNS (1) ,    /* multiplication in DFmode */
249   COSTS_N_INSNS (50),    /* MXBR */
250   COSTS_N_INSNS (120),   /* SQXBR */
251   COSTS_N_INSNS (52),    /* SQDBR */
252   COSTS_N_INSNS (38),    /* SQEBR */
253   COSTS_N_INSNS (1),     /* MADBR */
254   COSTS_N_INSNS (1),     /* MAEBR */
255   COSTS_N_INSNS (111),   /* DXBR */
256   COSTS_N_INSNS (39),    /* DDBR */
257   COSTS_N_INSNS (32),    /* DEBR */
258   COSTS_N_INSNS (160),   /* DLGR */
259   COSTS_N_INSNS (71),    /* DLR */
260   COSTS_N_INSNS (71),    /* DR */
261   COSTS_N_INSNS (71),    /* DSGFR */
262   COSTS_N_INSNS (71),    /* DSGR */
263 };
264 
265 static const
266 struct processor_costs z196_cost =
267 {
268   COSTS_N_INSNS (7),     /* M     */
269   COSTS_N_INSNS (5),     /* MGHI  */
270   COSTS_N_INSNS (5),     /* MH    */
271   COSTS_N_INSNS (5),     /* MHI   */
272   COSTS_N_INSNS (7),     /* ML    */
273   COSTS_N_INSNS (7),     /* MR    */
274   COSTS_N_INSNS (6),     /* MS    */
275   COSTS_N_INSNS (8),     /* MSG   */
276   COSTS_N_INSNS (6),     /* MSGF  */
277   COSTS_N_INSNS (6),     /* MSGFR */
278   COSTS_N_INSNS (8),     /* MSGR  */
279   COSTS_N_INSNS (6),     /* MSR   */
280   COSTS_N_INSNS (1) ,    /* multiplication in DFmode */
281   COSTS_N_INSNS (40),    /* MXBR B+40 */
282   COSTS_N_INSNS (100),   /* SQXBR B+100 */
283   COSTS_N_INSNS (42),    /* SQDBR B+42 */
284   COSTS_N_INSNS (28),    /* SQEBR B+28 */
285   COSTS_N_INSNS (1),     /* MADBR B */
286   COSTS_N_INSNS (1),     /* MAEBR B */
287   COSTS_N_INSNS (101),   /* DXBR B+101 */
288   COSTS_N_INSNS (29),    /* DDBR */
289   COSTS_N_INSNS (22),    /* DEBR */
290   COSTS_N_INSNS (160),   /* DLGR cracked */
291   COSTS_N_INSNS (160),   /* DLR cracked */
292   COSTS_N_INSNS (160),   /* DR expanded */
293   COSTS_N_INSNS (160),   /* DSGFR cracked */
294   COSTS_N_INSNS (160),   /* DSGR cracked */
295 };
296 
297 static const
298 struct processor_costs zEC12_cost =
299 {
300   COSTS_N_INSNS (7),     /* M     */
301   COSTS_N_INSNS (5),     /* MGHI  */
302   COSTS_N_INSNS (5),     /* MH    */
303   COSTS_N_INSNS (5),     /* MHI   */
304   COSTS_N_INSNS (7),     /* ML    */
305   COSTS_N_INSNS (7),     /* MR    */
306   COSTS_N_INSNS (6),     /* MS    */
307   COSTS_N_INSNS (8),     /* MSG   */
308   COSTS_N_INSNS (6),     /* MSGF  */
309   COSTS_N_INSNS (6),     /* MSGFR */
310   COSTS_N_INSNS (8),     /* MSGR  */
311   COSTS_N_INSNS (6),     /* MSR   */
312   COSTS_N_INSNS (1) ,    /* multiplication in DFmode */
313   COSTS_N_INSNS (40),    /* MXBR B+40 */
314   COSTS_N_INSNS (100),   /* SQXBR B+100 */
315   COSTS_N_INSNS (42),    /* SQDBR B+42 */
316   COSTS_N_INSNS (28),    /* SQEBR B+28 */
317   COSTS_N_INSNS (1),     /* MADBR B */
318   COSTS_N_INSNS (1),     /* MAEBR B */
319   COSTS_N_INSNS (131),   /* DXBR B+131 */
320   COSTS_N_INSNS (29),    /* DDBR */
321   COSTS_N_INSNS (22),    /* DEBR */
322   COSTS_N_INSNS (160),   /* DLGR cracked */
323   COSTS_N_INSNS (160),   /* DLR cracked */
324   COSTS_N_INSNS (160),   /* DR expanded */
325   COSTS_N_INSNS (160),   /* DSGFR cracked */
326   COSTS_N_INSNS (160),   /* DSGR cracked */
327 };
328 
329 static struct
330 {
331   /* The preferred name to be used in user visible output.  */
332   const char *const name;
333   /* CPU name as it should be passed to Binutils via .machine  */
334   const char *const binutils_name;
335   const enum processor_type processor;
336   const struct processor_costs *cost;
337 }
338 const processor_table[] =
339 {
340   { "g5",     "g5",     PROCESSOR_9672_G5,     &z900_cost },
341   { "g6",     "g6",     PROCESSOR_9672_G6,     &z900_cost },
342   { "z900",   "z900",   PROCESSOR_2064_Z900,   &z900_cost },
343   { "z990",   "z990",   PROCESSOR_2084_Z990,   &z990_cost },
344   { "z9-109", "z9-109", PROCESSOR_2094_Z9_109, &z9_109_cost },
345   { "z9-ec",  "z9-ec",  PROCESSOR_2094_Z9_EC,  &z9_109_cost },
346   { "z10",    "z10",    PROCESSOR_2097_Z10,    &z10_cost },
347   { "z196",   "z196",   PROCESSOR_2817_Z196,   &z196_cost },
348   { "zEC12",  "zEC12",  PROCESSOR_2827_ZEC12,  &zEC12_cost },
349   { "z13",    "z13",    PROCESSOR_2964_Z13,    &zEC12_cost },
350   { "z14",    "arch12", PROCESSOR_3906_Z14,    &zEC12_cost },
351   { "native", "",       PROCESSOR_NATIVE,      NULL }
352 };
353 
354 extern int reload_completed;
355 
356 /* Kept up to date using the SCHED_VARIABLE_ISSUE hook.  */
357 static rtx_insn *last_scheduled_insn;
358 #define MAX_SCHED_UNITS 3
359 static int last_scheduled_unit_distance[MAX_SCHED_UNITS];
360 
361 #define NUM_SIDES 2
362 static int current_side = 1;
363 #define LONGRUNNING_THRESHOLD 5
364 
365 /* Estimate of number of cycles a long-running insn occupies an
366    execution unit.  */
367 static unsigned fxu_longrunning[NUM_SIDES];
368 static unsigned vfu_longrunning[NUM_SIDES];
369 
370 /* Factor to scale latencies by, determined by measurements.  */
371 #define LATENCY_FACTOR 4
372 
373 /* The maximum score added for an instruction whose unit hasn't been
374    in use for MAX_SCHED_MIX_DISTANCE steps.  Increase this value to
375    give instruction mix scheduling more priority over instruction
376    grouping.  */
377 #define MAX_SCHED_MIX_SCORE      8
378 
379 /* The maximum distance up to which individual scores will be
380    calculated.  Everything beyond this gives MAX_SCHED_MIX_SCORE.
381    Increase this with the OOO windows size of the machine.  */
382 #define MAX_SCHED_MIX_DISTANCE 100
383 
384 /* Structure used to hold the components of a S/390 memory
385    address.  A legitimate address on S/390 is of the general
386    form
387           base + index + displacement
388    where any of the components is optional.
389 
390    base and index are registers of the class ADDR_REGS,
391    displacement is an unsigned 12-bit immediate constant.  */
392 
393 struct s390_address
394 {
395   rtx base;
396   rtx indx;
397   rtx disp;
398   bool pointer;
399   bool literal_pool;
400 };
401 
402 /* Few accessor macros for struct cfun->machine->s390_frame_layout.  */
403 
404 #define cfun_frame_layout (cfun->machine->frame_layout)
405 #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
406 #define cfun_save_arg_fprs_p (!!(TARGET_64BIT				\
407 				 ? cfun_frame_layout.fpr_bitmap & 0x0f	\
408 				 : cfun_frame_layout.fpr_bitmap & 0x03))
409 #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
410   cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
411 #define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |=    \
412   (1 << (REGNO - FPR0_REGNUM)))
413 #define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap &    \
414   (1 << (REGNO - FPR0_REGNUM))))
415 #define cfun_gpr_save_slot(REGNO) \
416   cfun->machine->frame_layout.gpr_save_slots[REGNO]
417 
418 /* Number of GPRs and FPRs used for argument passing.  */
419 #define GP_ARG_NUM_REG 5
420 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
421 #define VEC_ARG_NUM_REG 8
422 
423 /* A couple of shortcuts.  */
424 #define CONST_OK_FOR_J(x) \
425 	CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
426 #define CONST_OK_FOR_K(x) \
427 	CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
428 #define CONST_OK_FOR_Os(x) \
429         CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
430 #define CONST_OK_FOR_Op(x) \
431         CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
432 #define CONST_OK_FOR_On(x) \
433         CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
434 
435 #define REGNO_PAIR_OK(REGNO, MODE)                               \
436   (s390_hard_regno_nregs ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
437 
438 /* That's the read ahead of the dynamic branch prediction unit in
439    bytes on a z10 (or higher) CPU.  */
440 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
441 
442 /* Masks per jump target register indicating which thunk need to be
443    generated.  */
444 static GTY(()) int indirect_branch_prez10thunk_mask = 0;
445 static GTY(()) int indirect_branch_z10thunk_mask = 0;
446 
447 #define INDIRECT_BRANCH_NUM_OPTIONS 4
448 
449 enum s390_indirect_branch_option
450   {
451     s390_opt_indirect_branch_jump = 0,
452     s390_opt_indirect_branch_call,
453     s390_opt_function_return_reg,
454     s390_opt_function_return_mem
455   };
456 
457 static GTY(()) int indirect_branch_table_label_no[INDIRECT_BRANCH_NUM_OPTIONS] = { 0 };
458 const char *indirect_branch_table_label[INDIRECT_BRANCH_NUM_OPTIONS] = \
459   { "LJUMP", "LCALL", "LRETREG", "LRETMEM" };
460 const char *indirect_branch_table_name[INDIRECT_BRANCH_NUM_OPTIONS] =	\
461   { ".s390_indirect_jump", ".s390_indirect_call",
462     ".s390_return_reg", ".s390_return_mem" };
463 
464 bool
s390_return_addr_from_memory()465 s390_return_addr_from_memory ()
466 {
467   return cfun_gpr_save_slot(RETURN_REGNUM) == SAVE_SLOT_STACK;
468 }
469 
470 /* Indicate which ABI has been used for passing vector args.
471    0 - no vector type arguments have been passed where the ABI is relevant
472    1 - the old ABI has been used
473    2 - a vector type argument has been passed either in a vector register
474        or on the stack by value  */
475 static int s390_vector_abi = 0;
476 
477 /* Set the vector ABI marker if TYPE is subject to the vector ABI
478    switch.  The vector ABI affects only vector data types.  There are
479    two aspects of the vector ABI relevant here:
480 
481    1. vectors >= 16 bytes have an alignment of 8 bytes with the new
482    ABI and natural alignment with the old.
483 
484    2. vector <= 16 bytes are passed in VRs or by value on the stack
485    with the new ABI but by reference on the stack with the old.
486 
487    If ARG_P is true TYPE is used for a function argument or return
488    value.  The ABI marker then is set for all vector data types.  If
489    ARG_P is false only type 1 vectors are being checked.  */
490 
491 static void
s390_check_type_for_vector_abi(const_tree type,bool arg_p,bool in_struct_p)492 s390_check_type_for_vector_abi (const_tree type, bool arg_p, bool in_struct_p)
493 {
494   static hash_set<const_tree> visited_types_hash;
495 
496   if (s390_vector_abi)
497     return;
498 
499   if (type == NULL_TREE || TREE_CODE (type) == ERROR_MARK)
500     return;
501 
502   if (visited_types_hash.contains (type))
503     return;
504 
505   visited_types_hash.add (type);
506 
507   if (VECTOR_TYPE_P (type))
508     {
509       int type_size = int_size_in_bytes (type);
510 
511       /* Outside arguments only the alignment is changing and this
512 	 only happens for vector types >= 16 bytes.  */
513       if (!arg_p && type_size < 16)
514 	return;
515 
516       /* In arguments vector types > 16 are passed as before (GCC
517 	 never enforced the bigger alignment for arguments which was
518 	 required by the old vector ABI).  However, it might still be
519 	 ABI relevant due to the changed alignment if it is a struct
520 	 member.  */
521       if (arg_p && type_size > 16 && !in_struct_p)
522 	return;
523 
524       s390_vector_abi = TARGET_VX_ABI ? 2 : 1;
525     }
526   else if (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE)
527     {
528       /* ARRAY_TYPE: Since with neither of the ABIs we have more than
529 	 natural alignment there will never be ABI dependent padding
530 	 in an array type.  That's why we do not set in_struct_p to
531 	 true here.  */
532       s390_check_type_for_vector_abi (TREE_TYPE (type), arg_p, in_struct_p);
533     }
534   else if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
535     {
536       tree arg_chain;
537 
538       /* Check the return type.  */
539       s390_check_type_for_vector_abi (TREE_TYPE (type), true, false);
540 
541       for (arg_chain = TYPE_ARG_TYPES (type);
542 	   arg_chain;
543 	   arg_chain = TREE_CHAIN (arg_chain))
544 	s390_check_type_for_vector_abi (TREE_VALUE (arg_chain), true, false);
545     }
546   else if (RECORD_OR_UNION_TYPE_P (type))
547     {
548       tree field;
549 
550       for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
551 	{
552 	  if (TREE_CODE (field) != FIELD_DECL)
553 	    continue;
554 
555 	  s390_check_type_for_vector_abi (TREE_TYPE (field), arg_p, true);
556 	}
557     }
558 }
559 
560 
561 /* System z builtins.  */
562 
563 #include "s390-builtins.h"
564 
565 const unsigned int bflags_builtin[S390_BUILTIN_MAX + 1] =
566   {
567 #undef B_DEF
568 #undef OB_DEF
569 #undef OB_DEF_VAR
570 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, ...) BFLAGS,
571 #define OB_DEF(...)
572 #define OB_DEF_VAR(...)
573 #include "s390-builtins.def"
574     0
575   };
576 
577 const unsigned int opflags_builtin[S390_BUILTIN_MAX + 1] =
578   {
579 #undef B_DEF
580 #undef OB_DEF
581 #undef OB_DEF_VAR
582 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, ...) OPFLAGS,
583 #define OB_DEF(...)
584 #define OB_DEF_VAR(...)
585 #include "s390-builtins.def"
586     0
587   };
588 
589 const unsigned int bflags_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1] =
590   {
591 #undef B_DEF
592 #undef OB_DEF
593 #undef OB_DEF_VAR
594 #define B_DEF(...)
595 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, ...) BFLAGS,
596 #define OB_DEF_VAR(...)
597 #include "s390-builtins.def"
598     0
599   };
600 
601 const unsigned int
602 bflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
603   {
604 #undef B_DEF
605 #undef OB_DEF
606 #undef OB_DEF_VAR
607 #define B_DEF(...)
608 #define OB_DEF(...)
609 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) FLAGS,
610 #include "s390-builtins.def"
611     0
612   };
613 
614 const unsigned int
615 opflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
616   {
617 #undef B_DEF
618 #undef OB_DEF
619 #undef OB_DEF_VAR
620 #define B_DEF(...)
621 #define OB_DEF(...)
622 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) OPFLAGS,
623 #include "s390-builtins.def"
624     0
625   };
626 
627 tree s390_builtin_types[BT_MAX];
628 tree s390_builtin_fn_types[BT_FN_MAX];
629 tree s390_builtin_decls[S390_BUILTIN_MAX +
630 			S390_OVERLOADED_BUILTIN_MAX +
631 			S390_OVERLOADED_BUILTIN_VAR_MAX];
632 
633 static enum insn_code const code_for_builtin[S390_BUILTIN_MAX + 1] = {
634 #undef B_DEF
635 #undef OB_DEF
636 #undef OB_DEF_VAR
637 #define B_DEF(NAME, PATTERN, ...) CODE_FOR_##PATTERN,
638 #define OB_DEF(...)
639 #define OB_DEF_VAR(...)
640 
641 #include "s390-builtins.def"
642   CODE_FOR_nothing
643 };
644 
645 static void
s390_init_builtins(void)646 s390_init_builtins (void)
647 {
648   /* These definitions are being used in s390-builtins.def.  */
649   tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"),
650 				       NULL, NULL);
651   tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL);
652   tree c_uint64_type_node;
653 
654   /* The uint64_type_node from tree.c is not compatible to the C99
655      uint64_t data type.  What we want is c_uint64_type_node from
656      c-common.c.  But since backend code is not supposed to interface
657      with the frontend we recreate it here.  */
658   if (TARGET_64BIT)
659     c_uint64_type_node = long_unsigned_type_node;
660   else
661     c_uint64_type_node = long_long_unsigned_type_node;
662 
663 #undef DEF_TYPE
664 #define DEF_TYPE(INDEX, NODE, CONST_P)			\
665   if (s390_builtin_types[INDEX] == NULL)		\
666     s390_builtin_types[INDEX] = (!CONST_P) ?		\
667       (NODE) : build_type_variant ((NODE), 1, 0);
668 
669 #undef DEF_POINTER_TYPE
670 #define DEF_POINTER_TYPE(INDEX, INDEX_BASE)				\
671   if (s390_builtin_types[INDEX] == NULL)				\
672     s390_builtin_types[INDEX] =						\
673       build_pointer_type (s390_builtin_types[INDEX_BASE]);
674 
675 #undef DEF_DISTINCT_TYPE
676 #define DEF_DISTINCT_TYPE(INDEX, INDEX_BASE)				\
677   if (s390_builtin_types[INDEX] == NULL)				\
678     s390_builtin_types[INDEX] =						\
679       build_distinct_type_copy (s390_builtin_types[INDEX_BASE]);
680 
681 #undef DEF_VECTOR_TYPE
682 #define DEF_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS)			\
683   if (s390_builtin_types[INDEX] == NULL)				\
684     s390_builtin_types[INDEX] =						\
685       build_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
686 
687 #undef DEF_OPAQUE_VECTOR_TYPE
688 #define DEF_OPAQUE_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS)		\
689   if (s390_builtin_types[INDEX] == NULL)				\
690     s390_builtin_types[INDEX] =						\
691       build_opaque_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
692 
693 #undef DEF_FN_TYPE
694 #define DEF_FN_TYPE(INDEX, args...)				\
695   if (s390_builtin_fn_types[INDEX] == NULL)			\
696     s390_builtin_fn_types[INDEX] =				\
697       build_function_type_list (args, NULL_TREE);
698 #undef DEF_OV_TYPE
699 #define DEF_OV_TYPE(...)
700 #include "s390-builtin-types.def"
701 
702 #undef B_DEF
703 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, FNTYPE)		\
704   if (s390_builtin_decls[S390_BUILTIN_##NAME] == NULL)			\
705     s390_builtin_decls[S390_BUILTIN_##NAME] =				\
706       add_builtin_function ("__builtin_" #NAME,				\
707 			    s390_builtin_fn_types[FNTYPE],		\
708 			    S390_BUILTIN_##NAME,			\
709 			    BUILT_IN_MD,				\
710 			    NULL,					\
711 			    ATTRS);
712 #undef OB_DEF
713 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, FNTYPE)	\
714   if (s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] \
715       == NULL)								\
716     s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \
717       add_builtin_function ("__builtin_" #NAME,				\
718 			    s390_builtin_fn_types[FNTYPE],		\
719 			    S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX, \
720 			    BUILT_IN_MD,				\
721 			    NULL,					\
722 			    0);
723 #undef OB_DEF_VAR
724 #define OB_DEF_VAR(...)
725 #include "s390-builtins.def"
726 
727 }
728 
729 /* Return true if ARG is appropriate as argument number ARGNUM of
730    builtin DECL.  The operand flags from s390-builtins.def have to
731    passed as OP_FLAGS.  */
732 bool
s390_const_operand_ok(tree arg,int argnum,int op_flags,tree decl)733 s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl)
734 {
735   if (O_UIMM_P (op_flags))
736     {
737       int bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32 };
738       int bitwidth = bitwidths[op_flags - O_U1];
739 
740       if (!tree_fits_uhwi_p (arg)
741 	  || tree_to_uhwi (arg) > (HOST_WIDE_INT_1U << bitwidth) - 1)
742 	{
743 	  error("constant argument %d for builtin %qF is out of range (0.."
744 		HOST_WIDE_INT_PRINT_UNSIGNED ")",
745 		argnum, decl,
746 		(HOST_WIDE_INT_1U << bitwidth) - 1);
747 	  return false;
748 	}
749     }
750 
751   if (O_SIMM_P (op_flags))
752     {
753       int bitwidths[] = { 2, 3, 4, 5, 8, 12, 16, 32 };
754       int bitwidth = bitwidths[op_flags - O_S2];
755 
756       if (!tree_fits_shwi_p (arg)
757 	  || tree_to_shwi (arg) < -(HOST_WIDE_INT_1 << (bitwidth - 1))
758 	  || tree_to_shwi (arg) > ((HOST_WIDE_INT_1 << (bitwidth - 1)) - 1))
759 	{
760 	  error("constant argument %d for builtin %qF is out of range ("
761 		HOST_WIDE_INT_PRINT_DEC ".."
762 		HOST_WIDE_INT_PRINT_DEC ")",
763 		argnum, decl,
764 		-(HOST_WIDE_INT_1 << (bitwidth - 1)),
765 		(HOST_WIDE_INT_1 << (bitwidth - 1)) - 1);
766 	  return false;
767 	}
768     }
769   return true;
770 }
771 
772 /* Expand an expression EXP that calls a built-in function,
773    with result going to TARGET if that's convenient
774    (and in mode MODE if that's convenient).
775    SUBTARGET may be used as the target for computing one of EXP's operands.
776    IGNORE is nonzero if the value is to be ignored.  */
777 
778 static rtx
s390_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,int ignore ATTRIBUTE_UNUSED)779 s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
780 		     machine_mode mode ATTRIBUTE_UNUSED,
781 		     int ignore ATTRIBUTE_UNUSED)
782 {
783 #define MAX_ARGS 6
784 
785   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
786   unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
787   enum insn_code icode;
788   rtx op[MAX_ARGS], pat;
789   int arity;
790   bool nonvoid;
791   tree arg;
792   call_expr_arg_iterator iter;
793   unsigned int all_op_flags = opflags_for_builtin (fcode);
794   machine_mode last_vec_mode = VOIDmode;
795 
796   if (TARGET_DEBUG_ARG)
797     {
798       fprintf (stderr,
799 	       "s390_expand_builtin, code = %4d, %s, bflags = 0x%x\n",
800 	       (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl)),
801 	       bflags_for_builtin (fcode));
802     }
803 
804   if (S390_USE_TARGET_ATTRIBUTE)
805     {
806       unsigned int bflags;
807 
808       bflags = bflags_for_builtin (fcode);
809       if ((bflags & B_HTM) && !TARGET_HTM)
810 	{
811 	  error ("builtin %qF is not supported without -mhtm "
812 		 "(default with -march=zEC12 and higher).", fndecl);
813 	  return const0_rtx;
814 	}
815       if (((bflags & B_VX) || (bflags & B_VXE)) && !TARGET_VX)
816 	{
817 	  error ("builtin %qF requires -mvx "
818 		 "(default with -march=z13 and higher).", fndecl);
819 	  return const0_rtx;
820 	}
821 
822       if ((bflags & B_VXE) && !TARGET_VXE)
823 	{
824 	  error ("Builtin %qF requires z14 or higher.", fndecl);
825 	  return const0_rtx;
826 	}
827     }
828   if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET
829       && fcode < S390_ALL_BUILTIN_MAX)
830     {
831       gcc_unreachable ();
832     }
833   else if (fcode < S390_OVERLOADED_BUILTIN_OFFSET)
834     {
835       icode = code_for_builtin[fcode];
836       /* Set a flag in the machine specific cfun part in order to support
837 	 saving/restoring of FPRs.  */
838       if (fcode == S390_BUILTIN_tbegin || fcode == S390_BUILTIN_tbegin_retry)
839 	cfun->machine->tbegin_p = true;
840     }
841   else if (fcode < S390_OVERLOADED_BUILTIN_VAR_OFFSET)
842     {
843       error ("unresolved overloaded builtin");
844       return const0_rtx;
845     }
846   else
847     internal_error ("bad builtin fcode");
848 
849   if (icode == 0)
850     internal_error ("bad builtin icode");
851 
852   nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
853 
854   if (nonvoid)
855     {
856       machine_mode tmode = insn_data[icode].operand[0].mode;
857       if (!target
858 	  || GET_MODE (target) != tmode
859 	  || !(*insn_data[icode].operand[0].predicate) (target, tmode))
860 	target = gen_reg_rtx (tmode);
861 
862       /* There are builtins (e.g. vec_promote) with no vector
863 	 arguments but an element selector.  So we have to also look
864 	 at the vector return type when emitting the modulo
865 	 operation.  */
866       if (VECTOR_MODE_P (insn_data[icode].operand[0].mode))
867 	last_vec_mode = insn_data[icode].operand[0].mode;
868     }
869 
870   arity = 0;
871   FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
872     {
873       rtx tmp_rtx;
874       const struct insn_operand_data *insn_op;
875       unsigned int op_flags = all_op_flags & ((1 << O_SHIFT) - 1);
876 
877       all_op_flags = all_op_flags >> O_SHIFT;
878 
879       if (arg == error_mark_node)
880 	return NULL_RTX;
881       if (arity >= MAX_ARGS)
882 	return NULL_RTX;
883 
884       if (O_IMM_P (op_flags)
885 	  && TREE_CODE (arg) != INTEGER_CST)
886 	{
887 	  error ("constant value required for builtin %qF argument %d",
888 		 fndecl, arity + 1);
889 	  return const0_rtx;
890 	}
891 
892       if (!s390_const_operand_ok (arg, arity + 1, op_flags, fndecl))
893 	return const0_rtx;
894 
895       insn_op = &insn_data[icode].operand[arity + nonvoid];
896       op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
897 
898       /* expand_expr truncates constants to the target mode only if it
899 	 is "convenient".  However, our checks below rely on this
900 	 being done.  */
901       if (CONST_INT_P (op[arity])
902 	  && SCALAR_INT_MODE_P (insn_op->mode)
903 	  && GET_MODE (op[arity]) != insn_op->mode)
904 	op[arity] = GEN_INT (trunc_int_for_mode (INTVAL (op[arity]),
905 						 insn_op->mode));
906 
907       /* Wrap the expanded RTX for pointer types into a MEM expr with
908 	 the proper mode.  This allows us to use e.g. (match_operand
909 	 "memory_operand"..) in the insn patterns instead of (mem
910 	 (match_operand "address_operand)).  This is helpful for
911 	 patterns not just accepting MEMs.  */
912       if (POINTER_TYPE_P (TREE_TYPE (arg))
913 	  && insn_op->predicate != address_operand)
914 	op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]);
915 
916       /* Expand the module operation required on element selectors.  */
917       if (op_flags == O_ELEM)
918 	{
919 	  gcc_assert (last_vec_mode != VOIDmode);
920 	  op[arity] = simplify_expand_binop (SImode, code_to_optab (AND),
921 					     op[arity],
922 					     GEN_INT (GET_MODE_NUNITS (last_vec_mode) - 1),
923 					     NULL_RTX, 1, OPTAB_DIRECT);
924 	}
925 
926       /* Record the vector mode used for an element selector.  This assumes:
927 	 1. There is no builtin with two different vector modes and an element selector
928          2. The element selector comes after the vector type it is referring to.
929 	 This currently the true for all the builtins but FIXME we
930 	 should better check for that.  */
931       if (VECTOR_MODE_P (insn_op->mode))
932 	last_vec_mode = insn_op->mode;
933 
934       if (insn_op->predicate (op[arity], insn_op->mode))
935 	{
936 	  arity++;
937 	  continue;
938 	}
939 
940       /* A memory operand is rejected by the memory_operand predicate.
941 	 Try making the address legal by copying it into a register.  */
942       if (MEM_P (op[arity])
943 	  && insn_op->predicate == memory_operand
944 	  && (GET_MODE (XEXP (op[arity], 0)) == Pmode
945 	      || GET_MODE (XEXP (op[arity], 0)) == VOIDmode))
946 	{
947 	  op[arity] = replace_equiv_address (op[arity],
948 					     copy_to_mode_reg (Pmode,
949 					       XEXP (op[arity], 0)));
950 	}
951       /* Some of the builtins require different modes/types than the
952 	 pattern in order to implement a specific API.  Instead of
953 	 adding many expanders which do the mode change we do it here.
954 	 E.g. s390_vec_add_u128 required to have vector unsigned char
955 	 arguments is mapped to addti3.  */
956       else if (insn_op->mode != VOIDmode
957 	       && GET_MODE (op[arity]) != VOIDmode
958 	       && GET_MODE (op[arity]) != insn_op->mode
959 	       && ((tmp_rtx = simplify_gen_subreg (insn_op->mode, op[arity],
960 						   GET_MODE (op[arity]), 0))
961 		   != NULL_RTX))
962 	{
963 	  op[arity] = tmp_rtx;
964 	}
965 
966       /* The predicate rejects the operand although the mode is fine.
967 	 Copy the operand to register.  */
968       if (!insn_op->predicate (op[arity], insn_op->mode)
969 	  && (GET_MODE (op[arity]) == insn_op->mode
970 	      || GET_MODE (op[arity]) == VOIDmode
971 	      || (insn_op->predicate == address_operand
972 		  && GET_MODE (op[arity]) == Pmode)))
973 	{
974 	  /* An address_operand usually has VOIDmode in the expander
975 	     so we cannot use this.  */
976 	  machine_mode target_mode =
977 	    (insn_op->predicate == address_operand
978 	     ? (machine_mode) Pmode : insn_op->mode);
979 	  op[arity] = copy_to_mode_reg (target_mode, op[arity]);
980 	}
981 
982       if (!insn_op->predicate (op[arity], insn_op->mode))
983 	{
984 	  error ("invalid argument %d for builtin %qF", arity + 1, fndecl);
985 	  return const0_rtx;
986 	}
987       arity++;
988     }
989 
990   switch (arity)
991     {
992     case 0:
993       pat = GEN_FCN (icode) (target);
994       break;
995     case 1:
996       if (nonvoid)
997         pat = GEN_FCN (icode) (target, op[0]);
998       else
999 	pat = GEN_FCN (icode) (op[0]);
1000       break;
1001     case 2:
1002       if (nonvoid)
1003 	pat = GEN_FCN (icode) (target, op[0], op[1]);
1004       else
1005 	pat = GEN_FCN (icode) (op[0], op[1]);
1006       break;
1007     case 3:
1008       if (nonvoid)
1009 	pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
1010       else
1011 	pat = GEN_FCN (icode) (op[0], op[1], op[2]);
1012       break;
1013     case 4:
1014       if (nonvoid)
1015 	pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
1016       else
1017 	pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
1018       break;
1019     case 5:
1020       if (nonvoid)
1021 	pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
1022       else
1023 	pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
1024       break;
1025     case 6:
1026       if (nonvoid)
1027 	pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4], op[5]);
1028       else
1029 	pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
1030       break;
1031     default:
1032       gcc_unreachable ();
1033     }
1034   if (!pat)
1035     return NULL_RTX;
1036   emit_insn (pat);
1037 
1038   if (nonvoid)
1039     return target;
1040   else
1041     return const0_rtx;
1042 }
1043 
1044 
1045 static const int s390_hotpatch_hw_max = 1000000;
1046 static int s390_hotpatch_hw_before_label = 0;
1047 static int s390_hotpatch_hw_after_label = 0;
1048 
1049 /* Check whether the hotpatch attribute is applied to a function and, if it has
1050    an argument, the argument is valid.  */
1051 
1052 static tree
s390_handle_hotpatch_attribute(tree * node,tree name,tree args,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)1053 s390_handle_hotpatch_attribute (tree *node, tree name, tree args,
1054 				int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1055 {
1056   tree expr;
1057   tree expr2;
1058   int err;
1059 
1060   if (TREE_CODE (*node) != FUNCTION_DECL)
1061     {
1062       warning (OPT_Wattributes, "%qE attribute only applies to functions",
1063 	       name);
1064       *no_add_attrs = true;
1065     }
1066   if (args != NULL && TREE_CHAIN (args) != NULL)
1067     {
1068       expr = TREE_VALUE (args);
1069       expr2 = TREE_VALUE (TREE_CHAIN (args));
1070     }
1071   if (args == NULL || TREE_CHAIN (args) == NULL)
1072     err = 1;
1073   else if (TREE_CODE (expr) != INTEGER_CST
1074 	   || !INTEGRAL_TYPE_P (TREE_TYPE (expr))
1075 	   || wi::gtu_p (wi::to_wide (expr), s390_hotpatch_hw_max))
1076     err = 1;
1077   else if (TREE_CODE (expr2) != INTEGER_CST
1078 	   || !INTEGRAL_TYPE_P (TREE_TYPE (expr2))
1079 	   || wi::gtu_p (wi::to_wide (expr2), s390_hotpatch_hw_max))
1080     err = 1;
1081   else
1082     err = 0;
1083   if (err)
1084     {
1085       error ("requested %qE attribute is not a comma separated pair of"
1086 	     " non-negative integer constants or too large (max. %d)", name,
1087 	     s390_hotpatch_hw_max);
1088       *no_add_attrs = true;
1089     }
1090 
1091   return NULL_TREE;
1092 }
1093 
1094 /* Expand the s390_vector_bool type attribute.  */
1095 
1096 static tree
s390_handle_vectorbool_attribute(tree * node,tree name ATTRIBUTE_UNUSED,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)1097 s390_handle_vectorbool_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
1098 				  tree args ATTRIBUTE_UNUSED,
1099 				  int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1100 {
1101   tree type = *node, result = NULL_TREE;
1102   machine_mode mode;
1103 
1104   while (POINTER_TYPE_P (type)
1105 	 || TREE_CODE (type) == FUNCTION_TYPE
1106 	 || TREE_CODE (type) == METHOD_TYPE
1107 	 || TREE_CODE (type) == ARRAY_TYPE)
1108     type = TREE_TYPE (type);
1109 
1110   mode = TYPE_MODE (type);
1111   switch (mode)
1112     {
1113     case E_DImode: case E_V2DImode:
1114       result = s390_builtin_types[BT_BV2DI];
1115       break;
1116     case E_SImode: case E_V4SImode:
1117       result = s390_builtin_types[BT_BV4SI];
1118       break;
1119     case E_HImode: case E_V8HImode:
1120       result = s390_builtin_types[BT_BV8HI];
1121       break;
1122     case E_QImode: case E_V16QImode:
1123       result = s390_builtin_types[BT_BV16QI];
1124       break;
1125     default:
1126       break;
1127     }
1128 
1129   *no_add_attrs = true;  /* No need to hang on to the attribute.  */
1130 
1131   if (result)
1132     *node = lang_hooks.types.reconstruct_complex_type (*node, result);
1133 
1134   return NULL_TREE;
1135 }
1136 
1137 /* Check syntax of function decl attributes having a string type value.  */
1138 
1139 static tree
s390_handle_string_attribute(tree * node,tree name ATTRIBUTE_UNUSED,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)1140 s390_handle_string_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
1141 			      tree args ATTRIBUTE_UNUSED,
1142 			      int flags ATTRIBUTE_UNUSED,
1143 			      bool *no_add_attrs)
1144 {
1145   tree cst;
1146 
1147   if (TREE_CODE (*node) != FUNCTION_DECL)
1148     {
1149       warning (OPT_Wattributes, "%qE attribute only applies to functions",
1150 	       name);
1151       *no_add_attrs = true;
1152     }
1153 
1154   cst = TREE_VALUE (args);
1155 
1156   if (TREE_CODE (cst) != STRING_CST)
1157     {
1158       warning (OPT_Wattributes,
1159 	       "%qE attribute requires a string constant argument",
1160 	       name);
1161       *no_add_attrs = true;
1162     }
1163 
1164   if (is_attribute_p ("indirect_branch", name)
1165       || is_attribute_p ("indirect_branch_call", name)
1166       || is_attribute_p ("function_return", name)
1167       || is_attribute_p ("function_return_reg", name)
1168       || is_attribute_p ("function_return_mem", name))
1169     {
1170       if (strcmp (TREE_STRING_POINTER (cst), "keep") != 0
1171 	  && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0
1172 	  && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0)
1173       {
1174 	warning (OPT_Wattributes,
1175 		 "argument to %qE attribute is not "
1176 		 "(keep|thunk|thunk-extern)", name);
1177 	*no_add_attrs = true;
1178       }
1179     }
1180 
1181   if (is_attribute_p ("indirect_branch_jump", name)
1182       && strcmp (TREE_STRING_POINTER (cst), "keep") != 0
1183       && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0
1184       && strcmp (TREE_STRING_POINTER (cst), "thunk-inline") != 0
1185       && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0)
1186     {
1187       warning (OPT_Wattributes,
1188 	       "argument to %qE attribute is not "
1189 	       "(keep|thunk|thunk-inline|thunk-extern)", name);
1190       *no_add_attrs = true;
1191     }
1192 
1193   return NULL_TREE;
1194 }
1195 
1196 static const struct attribute_spec s390_attribute_table[] = {
1197   { "hotpatch", 2, 2, true, false, false, false,
1198     s390_handle_hotpatch_attribute, NULL },
1199   { "s390_vector_bool", 0, 0, false, true, false, true,
1200     s390_handle_vectorbool_attribute, NULL },
1201   { "indirect_branch", 1, 1, true, false, false, false,
1202     s390_handle_string_attribute, NULL },
1203   { "indirect_branch_jump", 1, 1, true, false, false, false,
1204     s390_handle_string_attribute, NULL },
1205   { "indirect_branch_call", 1, 1, true, false, false, false,
1206     s390_handle_string_attribute, NULL },
1207   { "function_return", 1, 1, true, false, false, false,
1208     s390_handle_string_attribute, NULL },
1209   { "function_return_reg", 1, 1, true, false, false, false,
1210     s390_handle_string_attribute, NULL },
1211   { "function_return_mem", 1, 1, true, false, false, false,
1212     s390_handle_string_attribute, NULL },
1213 
1214   /* End element.  */
1215   { NULL,        0, 0, false, false, false, false, NULL, NULL }
1216 };
1217 
1218 /* Return the alignment for LABEL.  We default to the -falign-labels
1219    value except for the literal pool base label.  */
1220 int
s390_label_align(rtx_insn * label)1221 s390_label_align (rtx_insn *label)
1222 {
1223   rtx_insn *prev_insn = prev_active_insn (label);
1224   rtx set, src;
1225 
1226   if (prev_insn == NULL_RTX)
1227     goto old;
1228 
1229   set = single_set (prev_insn);
1230 
1231   if (set == NULL_RTX)
1232     goto old;
1233 
1234   src = SET_SRC (set);
1235 
1236   /* Don't align literal pool base labels.  */
1237   if (GET_CODE (src) == UNSPEC
1238       && XINT (src, 1) == UNSPEC_MAIN_BASE)
1239     return 0;
1240 
1241  old:
1242   return align_labels_log;
1243 }
1244 
1245 static GTY(()) rtx got_symbol;
1246 
1247 /* Return the GOT table symbol.  The symbol will be created when the
1248    function is invoked for the first time.  */
1249 
1250 static rtx
s390_got_symbol(void)1251 s390_got_symbol (void)
1252 {
1253   if (!got_symbol)
1254     {
1255       got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
1256       SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL;
1257     }
1258 
1259   return got_symbol;
1260 }
1261 
1262 static scalar_int_mode
s390_libgcc_cmp_return_mode(void)1263 s390_libgcc_cmp_return_mode (void)
1264 {
1265   return TARGET_64BIT ? DImode : SImode;
1266 }
1267 
1268 static scalar_int_mode
s390_libgcc_shift_count_mode(void)1269 s390_libgcc_shift_count_mode (void)
1270 {
1271   return TARGET_64BIT ? DImode : SImode;
1272 }
1273 
1274 static scalar_int_mode
s390_unwind_word_mode(void)1275 s390_unwind_word_mode (void)
1276 {
1277   return TARGET_64BIT ? DImode : SImode;
1278 }
1279 
1280 /* Return true if the back end supports mode MODE.  */
1281 static bool
s390_scalar_mode_supported_p(scalar_mode mode)1282 s390_scalar_mode_supported_p (scalar_mode mode)
1283 {
1284   /* In contrast to the default implementation reject TImode constants on 31bit
1285      TARGET_ZARCH for ABI compliance.  */
1286   if (!TARGET_64BIT && TARGET_ZARCH && mode == TImode)
1287     return false;
1288 
1289   if (DECIMAL_FLOAT_MODE_P (mode))
1290     return default_decimal_float_supported_p ();
1291 
1292   return default_scalar_mode_supported_p (mode);
1293 }
1294 
1295 /* Return true if the back end supports vector mode MODE.  */
1296 static bool
s390_vector_mode_supported_p(machine_mode mode)1297 s390_vector_mode_supported_p (machine_mode mode)
1298 {
1299   machine_mode inner;
1300 
1301   if (!VECTOR_MODE_P (mode)
1302       || !TARGET_VX
1303       || GET_MODE_SIZE (mode) > 16)
1304     return false;
1305 
1306   inner = GET_MODE_INNER (mode);
1307 
1308   switch (inner)
1309     {
1310     case E_QImode:
1311     case E_HImode:
1312     case E_SImode:
1313     case E_DImode:
1314     case E_TImode:
1315     case E_SFmode:
1316     case E_DFmode:
1317     case E_TFmode:
1318       return true;
1319     default:
1320       return false;
1321     }
1322 }
1323 
1324 /* Set the has_landing_pad_p flag in struct machine_function to VALUE.  */
1325 
1326 void
s390_set_has_landing_pad_p(bool value)1327 s390_set_has_landing_pad_p (bool value)
1328 {
1329   cfun->machine->has_landing_pad_p = value;
1330 }
1331 
1332 /* If two condition code modes are compatible, return a condition code
1333    mode which is compatible with both.  Otherwise, return
1334    VOIDmode.  */
1335 
1336 static machine_mode
s390_cc_modes_compatible(machine_mode m1,machine_mode m2)1337 s390_cc_modes_compatible (machine_mode m1, machine_mode m2)
1338 {
1339   if (m1 == m2)
1340     return m1;
1341 
1342   switch (m1)
1343     {
1344     case E_CCZmode:
1345       if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode
1346 	  || m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode)
1347         return m2;
1348       return VOIDmode;
1349 
1350     case E_CCSmode:
1351     case E_CCUmode:
1352     case E_CCTmode:
1353     case E_CCSRmode:
1354     case E_CCURmode:
1355     case E_CCZ1mode:
1356       if (m2 == CCZmode)
1357 	return m1;
1358 
1359       return VOIDmode;
1360 
1361     default:
1362       return VOIDmode;
1363     }
1364   return VOIDmode;
1365 }
1366 
1367 /* Return true if SET either doesn't set the CC register, or else
1368    the source and destination have matching CC modes and that
1369    CC mode is at least as constrained as REQ_MODE.  */
1370 
1371 static bool
s390_match_ccmode_set(rtx set,machine_mode req_mode)1372 s390_match_ccmode_set (rtx set, machine_mode req_mode)
1373 {
1374   machine_mode set_mode;
1375 
1376   gcc_assert (GET_CODE (set) == SET);
1377 
1378   /* These modes are supposed to be used only in CC consumer
1379      patterns.  */
1380   gcc_assert (req_mode != CCVIALLmode && req_mode != CCVIANYmode
1381 	      && req_mode != CCVFALLmode && req_mode != CCVFANYmode);
1382 
1383   if (GET_CODE (SET_DEST (set)) != REG || !CC_REGNO_P (REGNO (SET_DEST (set))))
1384     return 1;
1385 
1386   set_mode = GET_MODE (SET_DEST (set));
1387   switch (set_mode)
1388     {
1389     case E_CCZ1mode:
1390     case E_CCSmode:
1391     case E_CCSRmode:
1392     case E_CCUmode:
1393     case E_CCURmode:
1394     case E_CCLmode:
1395     case E_CCL1mode:
1396     case E_CCL2mode:
1397     case E_CCL3mode:
1398     case E_CCT1mode:
1399     case E_CCT2mode:
1400     case E_CCT3mode:
1401     case E_CCVEQmode:
1402     case E_CCVIHmode:
1403     case E_CCVIHUmode:
1404     case E_CCVFHmode:
1405     case E_CCVFHEmode:
1406       if (req_mode != set_mode)
1407         return 0;
1408       break;
1409 
1410     case E_CCZmode:
1411       if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode
1412 	  && req_mode != CCSRmode && req_mode != CCURmode
1413 	  && req_mode != CCZ1mode)
1414         return 0;
1415       break;
1416 
1417     case E_CCAPmode:
1418     case E_CCANmode:
1419       if (req_mode != CCAmode)
1420         return 0;
1421       break;
1422 
1423     default:
1424       gcc_unreachable ();
1425     }
1426 
1427   return (GET_MODE (SET_SRC (set)) == set_mode);
1428 }
1429 
1430 /* Return true if every SET in INSN that sets the CC register
1431    has source and destination with matching CC modes and that
1432    CC mode is at least as constrained as REQ_MODE.
1433    If REQ_MODE is VOIDmode, always return false.  */
1434 
1435 bool
s390_match_ccmode(rtx_insn * insn,machine_mode req_mode)1436 s390_match_ccmode (rtx_insn *insn, machine_mode req_mode)
1437 {
1438   int i;
1439 
1440   /* s390_tm_ccmode returns VOIDmode to indicate failure.  */
1441   if (req_mode == VOIDmode)
1442     return false;
1443 
1444   if (GET_CODE (PATTERN (insn)) == SET)
1445     return s390_match_ccmode_set (PATTERN (insn), req_mode);
1446 
1447   if (GET_CODE (PATTERN (insn)) == PARALLEL)
1448       for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1449         {
1450           rtx set = XVECEXP (PATTERN (insn), 0, i);
1451           if (GET_CODE (set) == SET)
1452             if (!s390_match_ccmode_set (set, req_mode))
1453               return false;
1454         }
1455 
1456   return true;
1457 }
1458 
1459 /* If a test-under-mask instruction can be used to implement
1460    (compare (and ... OP1) OP2), return the CC mode required
1461    to do that.  Otherwise, return VOIDmode.
1462    MIXED is true if the instruction can distinguish between
1463    CC1 and CC2 for mixed selected bits (TMxx), it is false
1464    if the instruction cannot (TM).  */
1465 
1466 machine_mode
s390_tm_ccmode(rtx op1,rtx op2,bool mixed)1467 s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
1468 {
1469   int bit0, bit1;
1470 
1471   /* ??? Fixme: should work on CONST_WIDE_INT as well.  */
1472   if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT)
1473     return VOIDmode;
1474 
1475   /* Selected bits all zero: CC0.
1476      e.g.: int a; if ((a & (16 + 128)) == 0) */
1477   if (INTVAL (op2) == 0)
1478     return CCTmode;
1479 
1480   /* Selected bits all one: CC3.
1481      e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
1482   if (INTVAL (op2) == INTVAL (op1))
1483     return CCT3mode;
1484 
1485   /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
1486      int a;
1487      if ((a & (16 + 128)) == 16)         -> CCT1
1488      if ((a & (16 + 128)) == 128)        -> CCT2  */
1489   if (mixed)
1490     {
1491       bit1 = exact_log2 (INTVAL (op2));
1492       bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2));
1493       if (bit0 != -1 && bit1 != -1)
1494         return bit0 > bit1 ? CCT1mode : CCT2mode;
1495     }
1496 
1497   return VOIDmode;
1498 }
1499 
1500 /* Given a comparison code OP (EQ, NE, etc.) and the operands
1501    OP0 and OP1 of a COMPARE, return the mode to be used for the
1502    comparison.  */
1503 
1504 machine_mode
s390_select_ccmode(enum rtx_code code,rtx op0,rtx op1)1505 s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
1506 {
1507   switch (code)
1508     {
1509       case EQ:
1510       case NE:
1511 	if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1512 	    && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1513 	  return CCAPmode;
1514 	if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1515 	    && CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))))
1516 	  return CCAPmode;
1517 	if ((GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS
1518 	     || GET_CODE (op1) == NEG)
1519 	    && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1520 	  return CCLmode;
1521 
1522 	if (GET_CODE (op0) == AND)
1523 	  {
1524 	    /* Check whether we can potentially do it via TM.  */
1525 	    machine_mode ccmode;
1526 	    ccmode = s390_tm_ccmode (XEXP (op0, 1), op1, 1);
1527 	    if (ccmode != VOIDmode)
1528 	      {
1529 		/* Relax CCTmode to CCZmode to allow fall-back to AND
1530 		   if that turns out to be beneficial.  */
1531 	        return ccmode == CCTmode ? CCZmode : ccmode;
1532 	      }
1533 	  }
1534 
1535 	if (register_operand (op0, HImode)
1536 	    && GET_CODE (op1) == CONST_INT
1537 	    && (INTVAL (op1) == -1 || INTVAL (op1) == 65535))
1538 	  return CCT3mode;
1539 	if (register_operand (op0, QImode)
1540 	    && GET_CODE (op1) == CONST_INT
1541 	    && (INTVAL (op1) == -1 || INTVAL (op1) == 255))
1542 	  return CCT3mode;
1543 
1544 	return CCZmode;
1545 
1546       case LE:
1547       case LT:
1548       case GE:
1549       case GT:
1550 	/* The only overflow condition of NEG and ABS happens when
1551 	   -INT_MAX is used as parameter, which stays negative. So
1552 	   we have an overflow from a positive value to a negative.
1553 	   Using CCAP mode the resulting cc can be used for comparisons.  */
1554 	if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1555 	    && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1556 	  return CCAPmode;
1557 
1558  	/* If constants are involved in an add instruction it is possible to use
1559  	   the resulting cc for comparisons with zero. Knowing the sign of the
1560 	   constant the overflow behavior gets predictable. e.g.:
1561  	     int a, b; if ((b = a + c) > 0)
1562  	   with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP  */
1563 	if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1564 	    && (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1)))
1565 		|| (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os")
1566 		    /* Avoid INT32_MIN on 32 bit.  */
1567 		    && (!TARGET_ZARCH || INTVAL (XEXP (op0, 1)) != -0x7fffffff - 1))))
1568 	  {
1569 	    if (INTVAL (XEXP((op0), 1)) < 0)
1570 	      return CCANmode;
1571 	    else
1572 	      return CCAPmode;
1573 	  }
1574 	/* Fall through.  */
1575       case UNORDERED:
1576       case ORDERED:
1577       case UNEQ:
1578       case UNLE:
1579       case UNLT:
1580       case UNGE:
1581       case UNGT:
1582       case LTGT:
1583 	if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1584 	    && GET_CODE (op1) != CONST_INT)
1585 	  return CCSRmode;
1586 	return CCSmode;
1587 
1588       case LTU:
1589       case GEU:
1590 	if (GET_CODE (op0) == PLUS
1591 	    && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1592 	  return CCL1mode;
1593 
1594 	if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1595 	    && GET_CODE (op1) != CONST_INT)
1596 	  return CCURmode;
1597 	return CCUmode;
1598 
1599       case LEU:
1600       case GTU:
1601 	if (GET_CODE (op0) == MINUS
1602 	    && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1603 	  return CCL2mode;
1604 
1605 	if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1606 	    && GET_CODE (op1) != CONST_INT)
1607 	  return CCURmode;
1608 	return CCUmode;
1609 
1610       default:
1611 	gcc_unreachable ();
1612     }
1613 }
1614 
1615 /* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
1616    that we can implement more efficiently.  */
1617 
1618 static void
s390_canonicalize_comparison(int * code,rtx * op0,rtx * op1,bool op0_preserve_value)1619 s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1620 			      bool op0_preserve_value)
1621 {
1622   if (op0_preserve_value)
1623     return;
1624 
1625   /* Convert ZERO_EXTRACT back to AND to enable TM patterns.  */
1626   if ((*code == EQ || *code == NE)
1627       && *op1 == const0_rtx
1628       && GET_CODE (*op0) == ZERO_EXTRACT
1629       && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1630       && GET_CODE (XEXP (*op0, 2)) == CONST_INT
1631       && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1632     {
1633       rtx inner = XEXP (*op0, 0);
1634       HOST_WIDE_INT modesize = GET_MODE_BITSIZE (GET_MODE (inner));
1635       HOST_WIDE_INT len = INTVAL (XEXP (*op0, 1));
1636       HOST_WIDE_INT pos = INTVAL (XEXP (*op0, 2));
1637 
1638       if (len > 0 && len < modesize
1639 	  && pos >= 0 && pos + len <= modesize
1640 	  && modesize <= HOST_BITS_PER_WIDE_INT)
1641 	{
1642 	  unsigned HOST_WIDE_INT block;
1643 	  block = (HOST_WIDE_INT_1U << len) - 1;
1644 	  block <<= modesize - pos - len;
1645 
1646 	  *op0 = gen_rtx_AND (GET_MODE (inner), inner,
1647 			      gen_int_mode (block, GET_MODE (inner)));
1648 	}
1649     }
1650 
1651   /* Narrow AND of memory against immediate to enable TM.  */
1652   if ((*code == EQ || *code == NE)
1653       && *op1 == const0_rtx
1654       && GET_CODE (*op0) == AND
1655       && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1656       && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1657     {
1658       rtx inner = XEXP (*op0, 0);
1659       rtx mask = XEXP (*op0, 1);
1660 
1661       /* Ignore paradoxical SUBREGs if all extra bits are masked out.  */
1662       if (GET_CODE (inner) == SUBREG
1663 	  && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner)))
1664 	  && (GET_MODE_SIZE (GET_MODE (inner))
1665 	      >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1666 	  && ((INTVAL (mask)
1667                & GET_MODE_MASK (GET_MODE (inner))
1668                & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner))))
1669 	      == 0))
1670 	inner = SUBREG_REG (inner);
1671 
1672       /* Do not change volatile MEMs.  */
1673       if (MEM_P (inner) && !MEM_VOLATILE_P (inner))
1674 	{
1675 	  int part = s390_single_part (XEXP (*op0, 1),
1676 				       GET_MODE (inner), QImode, 0);
1677 	  if (part >= 0)
1678 	    {
1679 	      mask = gen_int_mode (s390_extract_part (mask, QImode, 0), QImode);
1680 	      inner = adjust_address_nv (inner, QImode, part);
1681 	      *op0 = gen_rtx_AND (QImode, inner, mask);
1682 	    }
1683 	}
1684     }
1685 
1686   /* Narrow comparisons against 0xffff to HImode if possible.  */
1687   if ((*code == EQ || *code == NE)
1688       && GET_CODE (*op1) == CONST_INT
1689       && INTVAL (*op1) == 0xffff
1690       && SCALAR_INT_MODE_P (GET_MODE (*op0))
1691       && (nonzero_bits (*op0, GET_MODE (*op0))
1692 	  & ~HOST_WIDE_INT_UC (0xffff)) == 0)
1693     {
1694       *op0 = gen_lowpart (HImode, *op0);
1695       *op1 = constm1_rtx;
1696     }
1697 
1698   /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible.  */
1699   if (GET_CODE (*op0) == UNSPEC
1700       && XINT (*op0, 1) == UNSPEC_STRCMPCC_TO_INT
1701       && XVECLEN (*op0, 0) == 1
1702       && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode
1703       && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1704       && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1705       && *op1 == const0_rtx)
1706     {
1707       enum rtx_code new_code = UNKNOWN;
1708       switch (*code)
1709 	{
1710 	  case EQ: new_code = EQ;  break;
1711 	  case NE: new_code = NE;  break;
1712 	  case LT: new_code = GTU; break;
1713 	  case GT: new_code = LTU; break;
1714 	  case LE: new_code = GEU; break;
1715 	  case GE: new_code = LEU; break;
1716 	  default: break;
1717 	}
1718 
1719       if (new_code != UNKNOWN)
1720 	{
1721 	  *op0 = XVECEXP (*op0, 0, 0);
1722 	  *code = new_code;
1723 	}
1724     }
1725 
1726   /* Remove redundant UNSPEC_CC_TO_INT conversions if possible.  */
1727   if (GET_CODE (*op0) == UNSPEC
1728       && XINT (*op0, 1) == UNSPEC_CC_TO_INT
1729       && XVECLEN (*op0, 0) == 1
1730       && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1731       && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1732       && CONST_INT_P (*op1))
1733     {
1734       enum rtx_code new_code = UNKNOWN;
1735       switch (GET_MODE (XVECEXP (*op0, 0, 0)))
1736 	{
1737 	case E_CCZmode:
1738 	case E_CCRAWmode:
1739 	  switch (*code)
1740 	    {
1741 	    case EQ: new_code = EQ;  break;
1742 	    case NE: new_code = NE;  break;
1743 	    default: break;
1744 	    }
1745 	  break;
1746 	default: break;
1747 	}
1748 
1749       if (new_code != UNKNOWN)
1750 	{
1751 	  /* For CCRAWmode put the required cc mask into the second
1752 	     operand.  */
1753         if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode
1754             && INTVAL (*op1) >= 0 && INTVAL (*op1) <= 3)
1755 	    *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1)));
1756 	  *op0 = XVECEXP (*op0, 0, 0);
1757 	  *code = new_code;
1758 	}
1759     }
1760 
1761   /* Simplify cascaded EQ, NE with const0_rtx.  */
1762   if ((*code == NE || *code == EQ)
1763       && (GET_CODE (*op0) == EQ || GET_CODE (*op0) == NE)
1764       && GET_MODE (*op0) == SImode
1765       && GET_MODE (XEXP (*op0, 0)) == CCZ1mode
1766       && REG_P (XEXP (*op0, 0))
1767       && XEXP (*op0, 1) == const0_rtx
1768       && *op1 == const0_rtx)
1769     {
1770       if ((*code == EQ && GET_CODE (*op0) == NE)
1771           || (*code == NE && GET_CODE (*op0) == EQ))
1772 	*code = EQ;
1773       else
1774 	*code = NE;
1775       *op0 = XEXP (*op0, 0);
1776     }
1777 
1778   /* Prefer register over memory as first operand.  */
1779   if (MEM_P (*op0) && REG_P (*op1))
1780     {
1781       rtx tem = *op0; *op0 = *op1; *op1 = tem;
1782       *code = (int)swap_condition ((enum rtx_code)*code);
1783     }
1784 
1785   /* A comparison result is compared against zero.  Replace it with
1786      the (perhaps inverted) original comparison.
1787      This probably should be done by simplify_relational_operation.  */
1788   if ((*code == EQ || *code == NE)
1789       && *op1 == const0_rtx
1790       && COMPARISON_P (*op0)
1791       && CC_REG_P (XEXP (*op0, 0)))
1792     {
1793       enum rtx_code new_code;
1794 
1795       if (*code == EQ)
1796 	new_code = reversed_comparison_code_parts (GET_CODE (*op0),
1797 						   XEXP (*op0, 0),
1798 						   XEXP (*op1, 0), NULL);
1799       else
1800 	new_code = GET_CODE (*op0);
1801 
1802       if (new_code != UNKNOWN)
1803 	{
1804 	  *code = new_code;
1805 	  *op1 = XEXP (*op0, 1);
1806 	  *op0 = XEXP (*op0, 0);
1807 	}
1808     }
1809 }
1810 
1811 
1812 /* Emit a compare instruction suitable to implement the comparison
1813    OP0 CODE OP1.  Return the correct condition RTL to be placed in
1814    the IF_THEN_ELSE of the conditional branch testing the result.  */
1815 
1816 rtx
s390_emit_compare(enum rtx_code code,rtx op0,rtx op1)1817 s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
1818 {
1819   machine_mode mode = s390_select_ccmode (code, op0, op1);
1820   rtx cc;
1821 
1822   if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
1823     {
1824       /* Do not output a redundant compare instruction if a
1825 	 compare_and_swap pattern already computed the result and the
1826 	 machine modes are compatible.  */
1827       gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
1828 		  == GET_MODE (op0));
1829       cc = op0;
1830     }
1831   else
1832     {
1833       cc = gen_rtx_REG (mode, CC_REGNUM);
1834       emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, op0, op1)));
1835     }
1836 
1837   return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
1838 }
1839 
1840 /* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
1841    matches CMP.
1842    Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
1843    conditional branch testing the result.  */
1844 
1845 static rtx
s390_emit_compare_and_swap(enum rtx_code code,rtx old,rtx mem,rtx cmp,rtx new_rtx,machine_mode ccmode)1846 s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
1847 			    rtx cmp, rtx new_rtx, machine_mode ccmode)
1848 {
1849   rtx cc;
1850 
1851   cc = gen_rtx_REG (ccmode, CC_REGNUM);
1852   switch (GET_MODE (mem))
1853     {
1854     case E_SImode:
1855       emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp,
1856 							 new_rtx, cc));
1857       break;
1858     case E_DImode:
1859       emit_insn (gen_atomic_compare_and_swapdi_internal (old, mem, cmp,
1860 							 new_rtx, cc));
1861       break;
1862     case E_TImode:
1863 	emit_insn (gen_atomic_compare_and_swapti_internal (old, mem, cmp,
1864 							   new_rtx, cc));
1865       break;
1866     case E_QImode:
1867     case E_HImode:
1868     default:
1869       gcc_unreachable ();
1870     }
1871   return s390_emit_compare (code, cc, const0_rtx);
1872 }
1873 
1874 /* Emit a jump instruction to TARGET and return it.  If COND is
1875    NULL_RTX, emit an unconditional jump, else a conditional jump under
1876    condition COND.  */
1877 
1878 rtx_insn *
s390_emit_jump(rtx target,rtx cond)1879 s390_emit_jump (rtx target, rtx cond)
1880 {
1881   rtx insn;
1882 
1883   target = gen_rtx_LABEL_REF (VOIDmode, target);
1884   if (cond)
1885     target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
1886 
1887   insn = gen_rtx_SET (pc_rtx, target);
1888   return emit_jump_insn (insn);
1889 }
1890 
1891 /* Return branch condition mask to implement a branch
1892    specified by CODE.  Return -1 for invalid comparisons.  */
1893 
1894 int
s390_branch_condition_mask(rtx code)1895 s390_branch_condition_mask (rtx code)
1896 {
1897   const int CC0 = 1 << 3;
1898   const int CC1 = 1 << 2;
1899   const int CC2 = 1 << 1;
1900   const int CC3 = 1 << 0;
1901 
1902   gcc_assert (GET_CODE (XEXP (code, 0)) == REG);
1903   gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM);
1904   gcc_assert (XEXP (code, 1) == const0_rtx
1905 	      || (GET_MODE (XEXP (code, 0)) == CCRAWmode
1906 		  && CONST_INT_P (XEXP (code, 1))));
1907 
1908 
1909   switch (GET_MODE (XEXP (code, 0)))
1910     {
1911     case E_CCZmode:
1912     case E_CCZ1mode:
1913       switch (GET_CODE (code))
1914         {
1915         case EQ:	return CC0;
1916 	case NE:	return CC1 | CC2 | CC3;
1917 	default:	return -1;
1918         }
1919       break;
1920 
1921     case E_CCT1mode:
1922       switch (GET_CODE (code))
1923         {
1924         case EQ:	return CC1;
1925 	case NE:	return CC0 | CC2 | CC3;
1926 	default:	return -1;
1927         }
1928       break;
1929 
1930     case E_CCT2mode:
1931       switch (GET_CODE (code))
1932         {
1933         case EQ:	return CC2;
1934 	case NE:	return CC0 | CC1 | CC3;
1935 	default:	return -1;
1936         }
1937       break;
1938 
1939     case E_CCT3mode:
1940       switch (GET_CODE (code))
1941         {
1942         case EQ:	return CC3;
1943 	case NE:	return CC0 | CC1 | CC2;
1944 	default:	return -1;
1945         }
1946       break;
1947 
1948     case E_CCLmode:
1949       switch (GET_CODE (code))
1950         {
1951         case EQ:	return CC0 | CC2;
1952 	case NE:	return CC1 | CC3;
1953 	default:	return -1;
1954         }
1955       break;
1956 
1957     case E_CCL1mode:
1958       switch (GET_CODE (code))
1959         {
1960 	case LTU:	return CC2 | CC3;  /* carry */
1961 	case GEU:	return CC0 | CC1;  /* no carry */
1962 	default:	return -1;
1963         }
1964       break;
1965 
1966     case E_CCL2mode:
1967       switch (GET_CODE (code))
1968         {
1969 	case GTU:	return CC0 | CC1;  /* borrow */
1970 	case LEU:	return CC2 | CC3;  /* no borrow */
1971 	default:	return -1;
1972         }
1973       break;
1974 
1975     case E_CCL3mode:
1976       switch (GET_CODE (code))
1977 	{
1978 	case EQ:	return CC0 | CC2;
1979 	case NE:	return CC1 | CC3;
1980 	case LTU:	return CC1;
1981 	case GTU:	return CC3;
1982 	case LEU:	return CC1 | CC2;
1983 	case GEU:	return CC2 | CC3;
1984 	default:	return -1;
1985 	}
1986 
1987     case E_CCUmode:
1988       switch (GET_CODE (code))
1989         {
1990         case EQ:	return CC0;
1991         case NE:	return CC1 | CC2 | CC3;
1992         case LTU:	return CC1;
1993         case GTU:	return CC2;
1994         case LEU:	return CC0 | CC1;
1995         case GEU:	return CC0 | CC2;
1996 	default:	return -1;
1997         }
1998       break;
1999 
2000     case E_CCURmode:
2001       switch (GET_CODE (code))
2002         {
2003         case EQ:	return CC0;
2004         case NE:	return CC2 | CC1 | CC3;
2005         case LTU:	return CC2;
2006         case GTU:	return CC1;
2007         case LEU:	return CC0 | CC2;
2008         case GEU:	return CC0 | CC1;
2009 	default:	return -1;
2010         }
2011       break;
2012 
2013     case E_CCAPmode:
2014       switch (GET_CODE (code))
2015         {
2016         case EQ:	return CC0;
2017         case NE:	return CC1 | CC2 | CC3;
2018         case LT:	return CC1 | CC3;
2019         case GT:	return CC2;
2020         case LE:	return CC0 | CC1 | CC3;
2021         case GE:	return CC0 | CC2;
2022 	default:	return -1;
2023         }
2024       break;
2025 
2026     case E_CCANmode:
2027       switch (GET_CODE (code))
2028         {
2029         case EQ:	return CC0;
2030         case NE:	return CC1 | CC2 | CC3;
2031         case LT:	return CC1;
2032         case GT:	return CC2 | CC3;
2033         case LE:	return CC0 | CC1;
2034         case GE:	return CC0 | CC2 | CC3;
2035 	default:	return -1;
2036         }
2037       break;
2038 
2039     case E_CCSmode:
2040       switch (GET_CODE (code))
2041         {
2042         case EQ:	return CC0;
2043         case NE:	return CC1 | CC2 | CC3;
2044         case LT:	return CC1;
2045         case GT:	return CC2;
2046         case LE:	return CC0 | CC1;
2047         case GE:	return CC0 | CC2;
2048 	case UNORDERED:	return CC3;
2049 	case ORDERED:	return CC0 | CC1 | CC2;
2050 	case UNEQ:	return CC0 | CC3;
2051         case UNLT:	return CC1 | CC3;
2052         case UNGT:	return CC2 | CC3;
2053         case UNLE:	return CC0 | CC1 | CC3;
2054         case UNGE:	return CC0 | CC2 | CC3;
2055 	case LTGT:	return CC1 | CC2;
2056 	default:	return -1;
2057         }
2058       break;
2059 
2060     case E_CCSRmode:
2061       switch (GET_CODE (code))
2062         {
2063         case EQ:	return CC0;
2064         case NE:	return CC2 | CC1 | CC3;
2065         case LT:	return CC2;
2066         case GT:	return CC1;
2067         case LE:	return CC0 | CC2;
2068         case GE:	return CC0 | CC1;
2069 	case UNORDERED:	return CC3;
2070 	case ORDERED:	return CC0 | CC2 | CC1;
2071 	case UNEQ:	return CC0 | CC3;
2072         case UNLT:	return CC2 | CC3;
2073         case UNGT:	return CC1 | CC3;
2074         case UNLE:	return CC0 | CC2 | CC3;
2075         case UNGE:	return CC0 | CC1 | CC3;
2076 	case LTGT:	return CC2 | CC1;
2077 	default:	return -1;
2078         }
2079       break;
2080 
2081       /* Vector comparison modes.  */
2082       /* CC2 will never be set.  It however is part of the negated
2083 	 masks.  */
2084     case E_CCVIALLmode:
2085       switch (GET_CODE (code))
2086 	{
2087 	case EQ:
2088 	case GTU:
2089 	case GT:
2090 	case GE:        return CC0;
2091 	  /* The inverted modes are in fact *any* modes.  */
2092 	case NE:
2093 	case LEU:
2094 	case LE:
2095 	case LT:        return CC3 | CC1 | CC2;
2096 	default:        return -1;
2097 	}
2098 
2099     case E_CCVIANYmode:
2100       switch (GET_CODE (code))
2101 	{
2102 	case EQ:
2103 	case GTU:
2104 	case GT:
2105 	case GE:        return CC0 | CC1;
2106 	  /* The inverted modes are in fact *all* modes.  */
2107 	case NE:
2108 	case LEU:
2109 	case LE:
2110 	case LT:        return CC3 | CC2;
2111 	default:        return -1;
2112 	}
2113     case E_CCVFALLmode:
2114       switch (GET_CODE (code))
2115 	{
2116 	case EQ:
2117 	case GT:
2118 	case GE:        return CC0;
2119 	  /* The inverted modes are in fact *any* modes.  */
2120 	case NE:
2121 	case UNLE:
2122 	case UNLT:      return CC3 | CC1 | CC2;
2123 	default:        return -1;
2124 	}
2125 
2126     case E_CCVFANYmode:
2127       switch (GET_CODE (code))
2128 	{
2129 	case EQ:
2130 	case GT:
2131 	case GE:        return CC0 | CC1;
2132 	  /* The inverted modes are in fact *all* modes.  */
2133 	case NE:
2134 	case UNLE:
2135 	case UNLT:      return CC3 | CC2;
2136 	default:        return -1;
2137 	}
2138 
2139     case E_CCRAWmode:
2140       switch (GET_CODE (code))
2141 	{
2142 	case EQ:
2143 	  return INTVAL (XEXP (code, 1));
2144 	case NE:
2145 	  return (INTVAL (XEXP (code, 1))) ^ 0xf;
2146 	default:
2147 	  gcc_unreachable ();
2148 	}
2149 
2150     default:
2151       return -1;
2152     }
2153 }
2154 
2155 
2156 /* Return branch condition mask to implement a compare and branch
2157    specified by CODE.  Return -1 for invalid comparisons.  */
2158 
2159 int
s390_compare_and_branch_condition_mask(rtx code)2160 s390_compare_and_branch_condition_mask (rtx code)
2161 {
2162   const int CC0 = 1 << 3;
2163   const int CC1 = 1 << 2;
2164   const int CC2 = 1 << 1;
2165 
2166   switch (GET_CODE (code))
2167     {
2168     case EQ:
2169       return CC0;
2170     case NE:
2171       return CC1 | CC2;
2172     case LT:
2173     case LTU:
2174       return CC1;
2175     case GT:
2176     case GTU:
2177       return CC2;
2178     case LE:
2179     case LEU:
2180       return CC0 | CC1;
2181     case GE:
2182     case GEU:
2183       return CC0 | CC2;
2184     default:
2185       gcc_unreachable ();
2186     }
2187   return -1;
2188 }
2189 
2190 /* If INV is false, return assembler mnemonic string to implement
2191    a branch specified by CODE.  If INV is true, return mnemonic
2192    for the corresponding inverted branch.  */
2193 
2194 static const char *
s390_branch_condition_mnemonic(rtx code,int inv)2195 s390_branch_condition_mnemonic (rtx code, int inv)
2196 {
2197   int mask;
2198 
2199   static const char *const mnemonic[16] =
2200     {
2201       NULL, "o", "h", "nle",
2202       "l", "nhe", "lh", "ne",
2203       "e", "nlh", "he", "nl",
2204       "le", "nh", "no", NULL
2205     };
2206 
2207   if (GET_CODE (XEXP (code, 0)) == REG
2208       && REGNO (XEXP (code, 0)) == CC_REGNUM
2209       && (XEXP (code, 1) == const0_rtx
2210 	  || (GET_MODE (XEXP (code, 0)) == CCRAWmode
2211 	      && CONST_INT_P (XEXP (code, 1)))))
2212     mask = s390_branch_condition_mask (code);
2213   else
2214     mask = s390_compare_and_branch_condition_mask (code);
2215 
2216   gcc_assert (mask >= 0);
2217 
2218   if (inv)
2219     mask ^= 15;
2220 
2221   gcc_assert (mask >= 1 && mask <= 14);
2222 
2223   return mnemonic[mask];
2224 }
2225 
2226 /* Return the part of op which has a value different from def.
2227    The size of the part is determined by mode.
2228    Use this function only if you already know that op really
2229    contains such a part.  */
2230 
2231 unsigned HOST_WIDE_INT
s390_extract_part(rtx op,machine_mode mode,int def)2232 s390_extract_part (rtx op, machine_mode mode, int def)
2233 {
2234   unsigned HOST_WIDE_INT value = 0;
2235   int max_parts = HOST_BITS_PER_WIDE_INT / GET_MODE_BITSIZE (mode);
2236   int part_bits = GET_MODE_BITSIZE (mode);
2237   unsigned HOST_WIDE_INT part_mask = (HOST_WIDE_INT_1U << part_bits) - 1;
2238   int i;
2239 
2240   for (i = 0; i < max_parts; i++)
2241     {
2242       if (i == 0)
2243 	value = UINTVAL (op);
2244       else
2245 	value >>= part_bits;
2246 
2247       if ((value & part_mask) != (def & part_mask))
2248 	return value & part_mask;
2249     }
2250 
2251   gcc_unreachable ();
2252 }
2253 
2254 /* If OP is an integer constant of mode MODE with exactly one
2255    part of mode PART_MODE unequal to DEF, return the number of that
2256    part. Otherwise, return -1.  */
2257 
2258 int
s390_single_part(rtx op,machine_mode mode,machine_mode part_mode,int def)2259 s390_single_part (rtx op,
2260 		  machine_mode mode,
2261 		  machine_mode part_mode,
2262 		  int def)
2263 {
2264   unsigned HOST_WIDE_INT value = 0;
2265   int n_parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (part_mode);
2266   unsigned HOST_WIDE_INT part_mask
2267     = (HOST_WIDE_INT_1U << GET_MODE_BITSIZE (part_mode)) - 1;
2268   int i, part = -1;
2269 
2270   if (GET_CODE (op) != CONST_INT)
2271     return -1;
2272 
2273   for (i = 0; i < n_parts; i++)
2274     {
2275       if (i == 0)
2276 	value = UINTVAL (op);
2277       else
2278 	value >>= GET_MODE_BITSIZE (part_mode);
2279 
2280       if ((value & part_mask) != (def & part_mask))
2281 	{
2282 	  if (part != -1)
2283 	    return -1;
2284 	  else
2285 	    part = i;
2286 	}
2287     }
2288   return part == -1 ? -1 : n_parts - 1 - part;
2289 }
2290 
2291 /* Return true if IN contains a contiguous bitfield in the lower SIZE
2292    bits and no other bits are set in (the lower SIZE bits of) IN.
2293 
2294    PSTART and PEND can be used to obtain the start and end
2295    position (inclusive) of the bitfield relative to 64
2296    bits. *PSTART / *PEND gives the position of the first/last bit
2297    of the bitfield counting from the highest order bit starting
2298    with zero.  */
2299 
2300 bool
s390_contiguous_bitmask_nowrap_p(unsigned HOST_WIDE_INT in,int size,int * pstart,int * pend)2301 s390_contiguous_bitmask_nowrap_p (unsigned HOST_WIDE_INT in, int size,
2302 				  int *pstart, int *pend)
2303 {
2304   int start;
2305   int end = -1;
2306   int lowbit = HOST_BITS_PER_WIDE_INT - 1;
2307   int highbit = HOST_BITS_PER_WIDE_INT - size;
2308   unsigned HOST_WIDE_INT bitmask = HOST_WIDE_INT_1U;
2309 
2310   gcc_assert (!!pstart == !!pend);
2311   for (start = lowbit; start >= highbit; bitmask <<= 1, start--)
2312     if (end == -1)
2313       {
2314 	/* Look for the rightmost bit of a contiguous range of ones.  */
2315 	if (bitmask & in)
2316 	  /* Found it.  */
2317 	  end = start;
2318       }
2319     else
2320       {
2321 	/* Look for the firt zero bit after the range of ones.  */
2322 	if (! (bitmask & in))
2323 	  /* Found it.  */
2324 	  break;
2325       }
2326   /* We're one past the last one-bit.  */
2327   start++;
2328 
2329   if (end == -1)
2330     /* No one bits found.  */
2331     return false;
2332 
2333   if (start > highbit)
2334     {
2335       unsigned HOST_WIDE_INT mask;
2336 
2337       /* Calculate a mask for all bits beyond the contiguous bits.  */
2338       mask = ((~HOST_WIDE_INT_0U >> highbit)
2339 	      & (~HOST_WIDE_INT_0U << (lowbit - start + 1)));
2340       if (mask & in)
2341 	/* There are more bits set beyond the first range of one bits.  */
2342 	return false;
2343     }
2344 
2345   if (pstart)
2346     {
2347       *pstart = start;
2348       *pend = end;
2349     }
2350 
2351   return true;
2352 }
2353 
2354 /* Same as s390_contiguous_bitmask_nowrap_p but also returns true
2355    if ~IN contains a contiguous bitfield.  In that case, *END is <
2356    *START.
2357 
2358    If WRAP_P is true, a bitmask that wraps around is also tested.
2359    When a wraparoud occurs *START is greater than *END (in
2360    non-null pointers), and the uppermost (64 - SIZE) bits are thus
2361    part of the range.  If WRAP_P is false, no wraparound is
2362    tested.  */
2363 
2364 bool
s390_contiguous_bitmask_p(unsigned HOST_WIDE_INT in,bool wrap_p,int size,int * start,int * end)2365 s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, bool wrap_p,
2366 			   int size, int *start, int *end)
2367 {
2368   int bs = HOST_BITS_PER_WIDE_INT;
2369   bool b;
2370 
2371   gcc_assert (!!start == !!end);
2372   if ((in & ((~HOST_WIDE_INT_0U) >> (bs - size))) == 0)
2373     /* This cannot be expressed as a contiguous bitmask.  Exit early because
2374        the second call of s390_contiguous_bitmask_nowrap_p would accept this as
2375        a valid bitmask.  */
2376     return false;
2377   b = s390_contiguous_bitmask_nowrap_p (in, size, start, end);
2378   if (b)
2379     return true;
2380   if (! wrap_p)
2381     return false;
2382   b = s390_contiguous_bitmask_nowrap_p (~in, size, start, end);
2383   if (b && start)
2384     {
2385       int s = *start;
2386       int e = *end;
2387 
2388       gcc_assert (s >= 1);
2389       *start = ((e + 1) & (bs - 1));
2390       *end = ((s - 1 + bs) & (bs - 1));
2391     }
2392 
2393   return b;
2394 }
2395 
2396 /* Return true if OP contains the same contiguous bitfield in *all*
2397    its elements.  START and END can be used to obtain the start and
2398    end position of the bitfield.
2399 
2400    START/STOP give the position of the first/last bit of the bitfield
2401    counting from the lowest order bit starting with zero.  In order to
2402    use these values for S/390 instructions this has to be converted to
2403    "bits big endian" style.  */
2404 
2405 bool
s390_contiguous_bitmask_vector_p(rtx op,int * start,int * end)2406 s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end)
2407 {
2408   unsigned HOST_WIDE_INT mask;
2409   int size;
2410   rtx elt;
2411   bool b;
2412 
2413   gcc_assert (!!start == !!end);
2414   if (!const_vec_duplicate_p (op, &elt)
2415       || !CONST_INT_P (elt))
2416     return false;
2417 
2418   size = GET_MODE_UNIT_BITSIZE (GET_MODE (op));
2419 
2420   /* We cannot deal with V1TI/V1TF. This would require a vgmq.  */
2421   if (size > 64)
2422     return false;
2423 
2424   mask = UINTVAL (elt);
2425 
2426   b = s390_contiguous_bitmask_p (mask, true, size, start, end);
2427   if (b)
2428     {
2429       if (start)
2430 	{
2431 	  *start -= (HOST_BITS_PER_WIDE_INT - size);
2432 	  *end -= (HOST_BITS_PER_WIDE_INT - size);
2433 	}
2434       return true;
2435     }
2436   else
2437     return false;
2438 }
2439 
2440 /* Return true if C consists only of byte chunks being either 0 or
2441    0xff.  If MASK is !=NULL a byte mask is generated which is
2442    appropriate for the vector generate byte mask instruction.  */
2443 
2444 bool
s390_bytemask_vector_p(rtx op,unsigned * mask)2445 s390_bytemask_vector_p (rtx op, unsigned *mask)
2446 {
2447   int i;
2448   unsigned tmp_mask = 0;
2449   int nunit, unit_size;
2450 
2451   if (!VECTOR_MODE_P (GET_MODE (op))
2452       || GET_CODE (op) != CONST_VECTOR
2453       || !CONST_INT_P (XVECEXP (op, 0, 0)))
2454     return false;
2455 
2456   nunit = GET_MODE_NUNITS (GET_MODE (op));
2457   unit_size = GET_MODE_UNIT_SIZE (GET_MODE (op));
2458 
2459   for (i = 0; i < nunit; i++)
2460     {
2461       unsigned HOST_WIDE_INT c;
2462       int j;
2463 
2464       if (!CONST_INT_P (XVECEXP (op, 0, i)))
2465 	return false;
2466 
2467       c = UINTVAL (XVECEXP (op, 0, i));
2468       for (j = 0; j < unit_size; j++)
2469 	{
2470 	  if ((c & 0xff) != 0 && (c & 0xff) != 0xff)
2471 	    return false;
2472 	  tmp_mask |= (c & 1) << ((nunit - 1 - i) * unit_size + j);
2473 	  c = c >> BITS_PER_UNIT;
2474 	}
2475     }
2476 
2477   if (mask != NULL)
2478     *mask = tmp_mask;
2479 
2480   return true;
2481 }
2482 
2483 /* Check whether a rotate of ROTL followed by an AND of CONTIG is
2484    equivalent to a shift followed by the AND.  In particular, CONTIG
2485    should not overlap the (rotated) bit 0/bit 63 gap.  Negative values
2486    for ROTL indicate a rotate to the right.  */
2487 
2488 bool
s390_extzv_shift_ok(int bitsize,int rotl,unsigned HOST_WIDE_INT contig)2489 s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig)
2490 {
2491   int start, end;
2492   bool ok;
2493 
2494   ok = s390_contiguous_bitmask_nowrap_p (contig, bitsize, &start, &end);
2495   gcc_assert (ok);
2496 
2497   if (rotl >= 0)
2498     return (64 - end >= rotl);
2499   else
2500     {
2501       /* Translate "- rotate right" in BITSIZE mode to "rotate left" in
2502 	 DIMode.  */
2503       rotl = -rotl + (64 - bitsize);
2504       return (start >= rotl);
2505     }
2506 }
2507 
2508 /* Check whether we can (and want to) split a double-word
2509    move in mode MODE from SRC to DST into two single-word
2510    moves, moving the subword FIRST_SUBWORD first.  */
2511 
2512 bool
s390_split_ok_p(rtx dst,rtx src,machine_mode mode,int first_subword)2513 s390_split_ok_p (rtx dst, rtx src, machine_mode mode, int first_subword)
2514 {
2515   /* Floating point and vector registers cannot be split.  */
2516   if (FP_REG_P (src) || FP_REG_P (dst) || VECTOR_REG_P (src) || VECTOR_REG_P (dst))
2517     return false;
2518 
2519   /* Non-offsettable memory references cannot be split.  */
2520   if ((GET_CODE (src) == MEM && !offsettable_memref_p (src))
2521       || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst)))
2522     return false;
2523 
2524   /* Moving the first subword must not clobber a register
2525      needed to move the second subword.  */
2526   if (register_operand (dst, mode))
2527     {
2528       rtx subreg = operand_subword (dst, first_subword, 0, mode);
2529       if (reg_overlap_mentioned_p (subreg, src))
2530         return false;
2531     }
2532 
2533   return true;
2534 }
2535 
2536 /* Return true if it can be proven that [MEM1, MEM1 + SIZE]
2537    and [MEM2, MEM2 + SIZE] do overlap and false
2538    otherwise.  */
2539 
2540 bool
s390_overlap_p(rtx mem1,rtx mem2,HOST_WIDE_INT size)2541 s390_overlap_p (rtx mem1, rtx mem2, HOST_WIDE_INT size)
2542 {
2543   rtx addr1, addr2, addr_delta;
2544   HOST_WIDE_INT delta;
2545 
2546   if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2547     return true;
2548 
2549   if (size == 0)
2550     return false;
2551 
2552   addr1 = XEXP (mem1, 0);
2553   addr2 = XEXP (mem2, 0);
2554 
2555   addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2556 
2557   /* This overlapping check is used by peepholes merging memory block operations.
2558      Overlapping operations would otherwise be recognized by the S/390 hardware
2559      and would fall back to a slower implementation. Allowing overlapping
2560      operations would lead to slow code but not to wrong code. Therefore we are
2561      somewhat optimistic if we cannot prove that the memory blocks are
2562      overlapping.
2563      That's why we return false here although this may accept operations on
2564      overlapping memory areas.  */
2565   if (!addr_delta || GET_CODE (addr_delta) != CONST_INT)
2566     return false;
2567 
2568   delta = INTVAL (addr_delta);
2569 
2570   if (delta == 0
2571       || (delta > 0 && delta < size)
2572       || (delta < 0 && -delta < size))
2573     return true;
2574 
2575   return false;
2576 }
2577 
2578 /* Check whether the address of memory reference MEM2 equals exactly
2579    the address of memory reference MEM1 plus DELTA.  Return true if
2580    we can prove this to be the case, false otherwise.  */
2581 
2582 bool
s390_offset_p(rtx mem1,rtx mem2,rtx delta)2583 s390_offset_p (rtx mem1, rtx mem2, rtx delta)
2584 {
2585   rtx addr1, addr2, addr_delta;
2586 
2587   if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2588     return false;
2589 
2590   addr1 = XEXP (mem1, 0);
2591   addr2 = XEXP (mem2, 0);
2592 
2593   addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2594   if (!addr_delta || !rtx_equal_p (addr_delta, delta))
2595     return false;
2596 
2597   return true;
2598 }
2599 
2600 /* Expand logical operator CODE in mode MODE with operands OPERANDS.  */
2601 
2602 void
s390_expand_logical_operator(enum rtx_code code,machine_mode mode,rtx * operands)2603 s390_expand_logical_operator (enum rtx_code code, machine_mode mode,
2604 			      rtx *operands)
2605 {
2606   machine_mode wmode = mode;
2607   rtx dst = operands[0];
2608   rtx src1 = operands[1];
2609   rtx src2 = operands[2];
2610   rtx op, clob, tem;
2611 
2612   /* If we cannot handle the operation directly, use a temp register.  */
2613   if (!s390_logical_operator_ok_p (operands))
2614     dst = gen_reg_rtx (mode);
2615 
2616   /* QImode and HImode patterns make sense only if we have a destination
2617      in memory.  Otherwise perform the operation in SImode.  */
2618   if ((mode == QImode || mode == HImode) && GET_CODE (dst) != MEM)
2619     wmode = SImode;
2620 
2621   /* Widen operands if required.  */
2622   if (mode != wmode)
2623     {
2624       if (GET_CODE (dst) == SUBREG
2625 	  && (tem = simplify_subreg (wmode, dst, mode, 0)) != 0)
2626 	dst = tem;
2627       else if (REG_P (dst))
2628 	dst = gen_rtx_SUBREG (wmode, dst, 0);
2629       else
2630         dst = gen_reg_rtx (wmode);
2631 
2632       if (GET_CODE (src1) == SUBREG
2633 	  && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0)
2634 	src1 = tem;
2635       else if (GET_MODE (src1) != VOIDmode)
2636 	src1 = gen_rtx_SUBREG (wmode, force_reg (mode, src1), 0);
2637 
2638       if (GET_CODE (src2) == SUBREG
2639 	  && (tem = simplify_subreg (wmode, src2, mode, 0)) != 0)
2640 	src2 = tem;
2641       else if (GET_MODE (src2) != VOIDmode)
2642 	src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0);
2643     }
2644 
2645   /* Emit the instruction.  */
2646   op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, wmode, src1, src2));
2647   clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
2648   emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
2649 
2650   /* Fix up the destination if needed.  */
2651   if (dst != operands[0])
2652     emit_move_insn (operands[0], gen_lowpart (mode, dst));
2653 }
2654 
2655 /* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR).  */
2656 
2657 bool
s390_logical_operator_ok_p(rtx * operands)2658 s390_logical_operator_ok_p (rtx *operands)
2659 {
2660   /* If the destination operand is in memory, it needs to coincide
2661      with one of the source operands.  After reload, it has to be
2662      the first source operand.  */
2663   if (GET_CODE (operands[0]) == MEM)
2664     return rtx_equal_p (operands[0], operands[1])
2665 	   || (!reload_completed && rtx_equal_p (operands[0], operands[2]));
2666 
2667   return true;
2668 }
2669 
2670 /* Narrow logical operation CODE of memory operand MEMOP with immediate
2671    operand IMMOP to switch from SS to SI type instructions.  */
2672 
2673 void
s390_narrow_logical_operator(enum rtx_code code,rtx * memop,rtx * immop)2674 s390_narrow_logical_operator (enum rtx_code code, rtx *memop, rtx *immop)
2675 {
2676   int def = code == AND ? -1 : 0;
2677   HOST_WIDE_INT mask;
2678   int part;
2679 
2680   gcc_assert (GET_CODE (*memop) == MEM);
2681   gcc_assert (!MEM_VOLATILE_P (*memop));
2682 
2683   mask = s390_extract_part (*immop, QImode, def);
2684   part = s390_single_part (*immop, GET_MODE (*memop), QImode, def);
2685   gcc_assert (part >= 0);
2686 
2687   *memop = adjust_address (*memop, QImode, part);
2688   *immop = gen_int_mode (mask, QImode);
2689 }
2690 
2691 
2692 /* How to allocate a 'struct machine_function'.  */
2693 
2694 static struct machine_function *
s390_init_machine_status(void)2695 s390_init_machine_status (void)
2696 {
2697   return ggc_cleared_alloc<machine_function> ();
2698 }
2699 
2700 /* Map for smallest class containing reg regno.  */
2701 
2702 const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
2703 { GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,  /*  0 */
2704   ADDR_REGS,    ADDR_REGS, ADDR_REGS, ADDR_REGS,  /*  4 */
2705   ADDR_REGS,    ADDR_REGS, ADDR_REGS, ADDR_REGS,  /*  8 */
2706   ADDR_REGS,    ADDR_REGS, ADDR_REGS, ADDR_REGS,  /* 12 */
2707   FP_REGS,      FP_REGS,   FP_REGS,   FP_REGS,    /* 16 */
2708   FP_REGS,      FP_REGS,   FP_REGS,   FP_REGS,    /* 20 */
2709   FP_REGS,      FP_REGS,   FP_REGS,   FP_REGS,    /* 24 */
2710   FP_REGS,      FP_REGS,   FP_REGS,   FP_REGS,    /* 28 */
2711   ADDR_REGS,    CC_REGS,   ADDR_REGS, ADDR_REGS,  /* 32 */
2712   ACCESS_REGS,	ACCESS_REGS, VEC_REGS, VEC_REGS,  /* 36 */
2713   VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS,         /* 40 */
2714   VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS,         /* 44 */
2715   VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS,         /* 48 */
2716   VEC_REGS, VEC_REGS                              /* 52 */
2717 };
2718 
2719 /* Return attribute type of insn.  */
2720 
2721 static enum attr_type
s390_safe_attr_type(rtx_insn * insn)2722 s390_safe_attr_type (rtx_insn *insn)
2723 {
2724   if (recog_memoized (insn) >= 0)
2725     return get_attr_type (insn);
2726   else
2727     return TYPE_NONE;
2728 }
2729 
2730 /* Return true if DISP is a valid short displacement.  */
2731 
2732 static bool
s390_short_displacement(rtx disp)2733 s390_short_displacement (rtx disp)
2734 {
2735   /* No displacement is OK.  */
2736   if (!disp)
2737     return true;
2738 
2739   /* Without the long displacement facility we don't need to
2740      distingiush between long and short displacement.  */
2741   if (!TARGET_LONG_DISPLACEMENT)
2742     return true;
2743 
2744   /* Integer displacement in range.  */
2745   if (GET_CODE (disp) == CONST_INT)
2746     return INTVAL (disp) >= 0 && INTVAL (disp) < 4096;
2747 
2748   /* GOT offset is not OK, the GOT can be large.  */
2749   if (GET_CODE (disp) == CONST
2750       && GET_CODE (XEXP (disp, 0)) == UNSPEC
2751       && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOT
2752           || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF))
2753     return false;
2754 
2755   /* All other symbolic constants are literal pool references,
2756      which are OK as the literal pool must be small.  */
2757   if (GET_CODE (disp) == CONST)
2758     return true;
2759 
2760   return false;
2761 }
2762 
2763 /* Decompose a RTL expression ADDR for a memory address into
2764    its components, returned in OUT.
2765 
2766    Returns false if ADDR is not a valid memory address, true
2767    otherwise.  If OUT is NULL, don't return the components,
2768    but check for validity only.
2769 
2770    Note: Only addresses in canonical form are recognized.
2771    LEGITIMIZE_ADDRESS should convert non-canonical forms to the
2772    canonical form so that they will be recognized.  */
2773 
2774 static int
s390_decompose_address(rtx addr,struct s390_address * out)2775 s390_decompose_address (rtx addr, struct s390_address *out)
2776 {
2777   HOST_WIDE_INT offset = 0;
2778   rtx base = NULL_RTX;
2779   rtx indx = NULL_RTX;
2780   rtx disp = NULL_RTX;
2781   rtx orig_disp;
2782   bool pointer = false;
2783   bool base_ptr = false;
2784   bool indx_ptr = false;
2785   bool literal_pool = false;
2786 
2787   /* We may need to substitute the literal pool base register into the address
2788      below.  However, at this point we do not know which register is going to
2789      be used as base, so we substitute the arg pointer register.  This is going
2790      to be treated as holding a pointer below -- it shouldn't be used for any
2791      other purpose.  */
2792   rtx fake_pool_base = gen_rtx_REG (Pmode, ARG_POINTER_REGNUM);
2793 
2794   /* Decompose address into base + index + displacement.  */
2795 
2796   if (GET_CODE (addr) == REG || GET_CODE (addr) == UNSPEC)
2797     base = addr;
2798 
2799   else if (GET_CODE (addr) == PLUS)
2800     {
2801       rtx op0 = XEXP (addr, 0);
2802       rtx op1 = XEXP (addr, 1);
2803       enum rtx_code code0 = GET_CODE (op0);
2804       enum rtx_code code1 = GET_CODE (op1);
2805 
2806       if (code0 == REG || code0 == UNSPEC)
2807 	{
2808 	  if (code1 == REG || code1 == UNSPEC)
2809 	    {
2810 	      indx = op0;	/* index + base */
2811 	      base = op1;
2812 	    }
2813 
2814 	  else
2815 	    {
2816 	      base = op0;	/* base + displacement */
2817 	      disp = op1;
2818 	    }
2819 	}
2820 
2821       else if (code0 == PLUS)
2822 	{
2823 	  indx = XEXP (op0, 0);	/* index + base + disp */
2824 	  base = XEXP (op0, 1);
2825 	  disp = op1;
2826 	}
2827 
2828       else
2829 	{
2830 	  return false;
2831 	}
2832     }
2833 
2834   else
2835     disp = addr;		/* displacement */
2836 
2837   /* Extract integer part of displacement.  */
2838   orig_disp = disp;
2839   if (disp)
2840     {
2841       if (GET_CODE (disp) == CONST_INT)
2842 	{
2843 	  offset = INTVAL (disp);
2844 	  disp = NULL_RTX;
2845 	}
2846       else if (GET_CODE (disp) == CONST
2847 	       && GET_CODE (XEXP (disp, 0)) == PLUS
2848 	       && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
2849 	{
2850 	  offset = INTVAL (XEXP (XEXP (disp, 0), 1));
2851 	  disp = XEXP (XEXP (disp, 0), 0);
2852 	}
2853     }
2854 
2855   /* Strip off CONST here to avoid special case tests later.  */
2856   if (disp && GET_CODE (disp) == CONST)
2857     disp = XEXP (disp, 0);
2858 
2859   /* We can convert literal pool addresses to
2860      displacements by basing them off the base register.  */
2861   if (disp && GET_CODE (disp) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (disp))
2862     {
2863       if (base || indx)
2864 	return false;
2865 
2866       base = fake_pool_base, literal_pool = true;
2867 
2868       /* Mark up the displacement.  */
2869       disp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, disp),
2870 			     UNSPEC_LTREL_OFFSET);
2871     }
2872 
2873   /* Validate base register.  */
2874   if (base)
2875     {
2876       if (GET_CODE (base) == UNSPEC)
2877 	switch (XINT (base, 1))
2878 	  {
2879 	  case UNSPEC_LTREF:
2880 	    if (!disp)
2881 	      disp = gen_rtx_UNSPEC (Pmode,
2882 				     gen_rtvec (1, XVECEXP (base, 0, 0)),
2883 				     UNSPEC_LTREL_OFFSET);
2884 	    else
2885 	      return false;
2886 
2887 	    base = XVECEXP (base, 0, 1);
2888 	    break;
2889 
2890 	  case UNSPEC_LTREL_BASE:
2891 	    if (XVECLEN (base, 0) == 1)
2892 	      base = fake_pool_base, literal_pool = true;
2893 	    else
2894 	      base = XVECEXP (base, 0, 1);
2895 	    break;
2896 
2897 	  default:
2898 	    return false;
2899 	  }
2900 
2901       if (!REG_P (base) || GET_MODE (base) != Pmode)
2902 	return false;
2903 
2904       if (REGNO (base) == STACK_POINTER_REGNUM
2905 	  || REGNO (base) == FRAME_POINTER_REGNUM
2906 	  || ((reload_completed || reload_in_progress)
2907 	      && frame_pointer_needed
2908 	      && REGNO (base) == HARD_FRAME_POINTER_REGNUM)
2909 	  || REGNO (base) == ARG_POINTER_REGNUM
2910           || (flag_pic
2911               && REGNO (base) == PIC_OFFSET_TABLE_REGNUM))
2912         pointer = base_ptr = true;
2913 
2914       if ((reload_completed || reload_in_progress)
2915 	  && base == cfun->machine->base_reg)
2916         pointer = base_ptr = literal_pool = true;
2917     }
2918 
2919   /* Validate index register.  */
2920   if (indx)
2921     {
2922       if (GET_CODE (indx) == UNSPEC)
2923 	switch (XINT (indx, 1))
2924 	  {
2925 	  case UNSPEC_LTREF:
2926 	    if (!disp)
2927 	      disp = gen_rtx_UNSPEC (Pmode,
2928 				     gen_rtvec (1, XVECEXP (indx, 0, 0)),
2929 				     UNSPEC_LTREL_OFFSET);
2930 	    else
2931 	      return false;
2932 
2933 	    indx = XVECEXP (indx, 0, 1);
2934 	    break;
2935 
2936 	  case UNSPEC_LTREL_BASE:
2937 	    if (XVECLEN (indx, 0) == 1)
2938 	      indx = fake_pool_base, literal_pool = true;
2939 	    else
2940 	      indx = XVECEXP (indx, 0, 1);
2941 	    break;
2942 
2943 	  default:
2944 	    return false;
2945 	  }
2946 
2947       if (!REG_P (indx) || GET_MODE (indx) != Pmode)
2948 	return false;
2949 
2950       if (REGNO (indx) == STACK_POINTER_REGNUM
2951 	  || REGNO (indx) == FRAME_POINTER_REGNUM
2952 	  || ((reload_completed || reload_in_progress)
2953 	      && frame_pointer_needed
2954 	      && REGNO (indx) == HARD_FRAME_POINTER_REGNUM)
2955 	  || REGNO (indx) == ARG_POINTER_REGNUM
2956           || (flag_pic
2957               && REGNO (indx) == PIC_OFFSET_TABLE_REGNUM))
2958         pointer = indx_ptr = true;
2959 
2960       if ((reload_completed || reload_in_progress)
2961 	  && indx == cfun->machine->base_reg)
2962         pointer = indx_ptr = literal_pool = true;
2963     }
2964 
2965   /* Prefer to use pointer as base, not index.  */
2966   if (base && indx && !base_ptr
2967       && (indx_ptr || (!REG_POINTER (base) && REG_POINTER (indx))))
2968     {
2969       rtx tmp = base;
2970       base = indx;
2971       indx = tmp;
2972     }
2973 
2974   /* Validate displacement.  */
2975   if (!disp)
2976     {
2977       /* If virtual registers are involved, the displacement will change later
2978 	 anyway as the virtual registers get eliminated.  This could make a
2979 	 valid displacement invalid, but it is more likely to make an invalid
2980 	 displacement valid, because we sometimes access the register save area
2981 	 via negative offsets to one of those registers.
2982 	 Thus we don't check the displacement for validity here.  If after
2983 	 elimination the displacement turns out to be invalid after all,
2984 	 this is fixed up by reload in any case.  */
2985       /* LRA maintains always displacements up to date and we need to
2986 	 know the displacement is right during all LRA not only at the
2987 	 final elimination.  */
2988       if (lra_in_progress
2989 	  || (base != arg_pointer_rtx
2990 	      && indx != arg_pointer_rtx
2991 	      && base != return_address_pointer_rtx
2992 	      && indx != return_address_pointer_rtx
2993 	      && base != frame_pointer_rtx
2994 	      && indx != frame_pointer_rtx
2995 	      && base != virtual_stack_vars_rtx
2996 	      && indx != virtual_stack_vars_rtx))
2997 	if (!DISP_IN_RANGE (offset))
2998 	  return false;
2999     }
3000   else
3001     {
3002       /* All the special cases are pointers.  */
3003       pointer = true;
3004 
3005       /* In the small-PIC case, the linker converts @GOT
3006          and @GOTNTPOFF offsets to possible displacements.  */
3007       if (GET_CODE (disp) == UNSPEC
3008           && (XINT (disp, 1) == UNSPEC_GOT
3009 	      || XINT (disp, 1) == UNSPEC_GOTNTPOFF)
3010 	  && flag_pic == 1)
3011         {
3012 	  ;
3013         }
3014 
3015       /* Accept pool label offsets.  */
3016       else if (GET_CODE (disp) == UNSPEC
3017 	       && XINT (disp, 1) == UNSPEC_POOL_OFFSET)
3018 	;
3019 
3020       /* Accept literal pool references.  */
3021       else if (GET_CODE (disp) == UNSPEC
3022 	       && XINT (disp, 1) == UNSPEC_LTREL_OFFSET)
3023         {
3024 	  /* In case CSE pulled a non literal pool reference out of
3025 	     the pool we have to reject the address.  This is
3026 	     especially important when loading the GOT pointer on non
3027 	     zarch CPUs.  In this case the literal pool contains an lt
3028 	     relative offset to the _GLOBAL_OFFSET_TABLE_ label which
3029 	     will most likely exceed the displacement.  */
3030 	  if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
3031 	      || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp, 0, 0)))
3032 	    return false;
3033 
3034 	  orig_disp = gen_rtx_CONST (Pmode, disp);
3035 	  if (offset)
3036 	    {
3037 	      /* If we have an offset, make sure it does not
3038 		 exceed the size of the constant pool entry.  */
3039 	      rtx sym = XVECEXP (disp, 0, 0);
3040 	      if (offset >= GET_MODE_SIZE (get_pool_mode (sym)))
3041 		return false;
3042 
3043               orig_disp = plus_constant (Pmode, orig_disp, offset);
3044 	    }
3045         }
3046 
3047       else
3048 	return false;
3049     }
3050 
3051   if (!base && !indx)
3052     pointer = true;
3053 
3054   if (out)
3055     {
3056       out->base = base;
3057       out->indx = indx;
3058       out->disp = orig_disp;
3059       out->pointer = pointer;
3060       out->literal_pool = literal_pool;
3061     }
3062 
3063   return true;
3064 }
3065 
3066 /* Decompose a RTL expression OP for an address style operand into its
3067    components, and return the base register in BASE and the offset in
3068    OFFSET.  While OP looks like an address it is never supposed to be
3069    used as such.
3070 
3071    Return true if OP is a valid address operand, false if not.  */
3072 
3073 bool
s390_decompose_addrstyle_without_index(rtx op,rtx * base,HOST_WIDE_INT * offset)3074 s390_decompose_addrstyle_without_index (rtx op, rtx *base,
3075 					HOST_WIDE_INT *offset)
3076 {
3077   rtx off = NULL_RTX;
3078 
3079   /* We can have an integer constant, an address register,
3080      or a sum of the two.  */
3081   if (CONST_SCALAR_INT_P (op))
3082     {
3083       off = op;
3084       op = NULL_RTX;
3085     }
3086   if (op && GET_CODE (op) == PLUS && CONST_SCALAR_INT_P (XEXP (op, 1)))
3087     {
3088       off = XEXP (op, 1);
3089       op = XEXP (op, 0);
3090     }
3091   while (op && GET_CODE (op) == SUBREG)
3092     op = SUBREG_REG (op);
3093 
3094   if (op && GET_CODE (op) != REG)
3095     return false;
3096 
3097   if (offset)
3098     {
3099       if (off == NULL_RTX)
3100 	*offset = 0;
3101       else if (CONST_INT_P (off))
3102 	*offset = INTVAL (off);
3103       else if (CONST_WIDE_INT_P (off))
3104 	/* The offset will anyway be cut down to 12 bits so take just
3105 	   the lowest order chunk of the wide int.  */
3106 	*offset = CONST_WIDE_INT_ELT (off, 0);
3107       else
3108 	gcc_unreachable ();
3109     }
3110   if (base)
3111     *base = op;
3112 
3113    return true;
3114 }
3115 
3116 
3117 /* Return true if CODE is a valid address without index.  */
3118 
3119 bool
s390_legitimate_address_without_index_p(rtx op)3120 s390_legitimate_address_without_index_p (rtx op)
3121 {
3122   struct s390_address addr;
3123 
3124   if (!s390_decompose_address (XEXP (op, 0), &addr))
3125     return false;
3126   if (addr.indx)
3127     return false;
3128 
3129   return true;
3130 }
3131 
3132 
3133 /* Return TRUE if ADDR is an operand valid for a load/store relative
3134    instruction.  Be aware that the alignment of the operand needs to
3135    be checked separately.
3136    Valid addresses are single references or a sum of a reference and a
3137    constant integer. Return these parts in SYMREF and ADDEND.  You can
3138    pass NULL in REF and/or ADDEND if you are not interested in these
3139    values.  Literal pool references are *not* considered symbol
3140    references.  */
3141 
3142 static bool
s390_loadrelative_operand_p(rtx addr,rtx * symref,HOST_WIDE_INT * addend)3143 s390_loadrelative_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
3144 {
3145   HOST_WIDE_INT tmpaddend = 0;
3146 
3147   if (GET_CODE (addr) == CONST)
3148     addr = XEXP (addr, 0);
3149 
3150   if (GET_CODE (addr) == PLUS)
3151     {
3152       if (!CONST_INT_P (XEXP (addr, 1)))
3153 	return false;
3154 
3155       tmpaddend = INTVAL (XEXP (addr, 1));
3156       addr = XEXP (addr, 0);
3157     }
3158 
3159   if ((GET_CODE (addr) == SYMBOL_REF && !CONSTANT_POOL_ADDRESS_P (addr))
3160       || (GET_CODE (addr) == UNSPEC
3161 	  && (XINT (addr, 1) == UNSPEC_GOTENT
3162 	      || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
3163     {
3164       if (symref)
3165 	*symref = addr;
3166       if (addend)
3167 	*addend = tmpaddend;
3168 
3169       return true;
3170     }
3171   return false;
3172 }
3173 
3174 /* Return true if the address in OP is valid for constraint letter C
3175    if wrapped in a MEM rtx.  Set LIT_POOL_OK to true if it literal
3176    pool MEMs should be accepted.  Only the Q, R, S, T constraint
3177    letters are allowed for C.  */
3178 
3179 static int
s390_check_qrst_address(char c,rtx op,bool lit_pool_ok)3180 s390_check_qrst_address (char c, rtx op, bool lit_pool_ok)
3181 {
3182   struct s390_address addr;
3183   bool decomposed = false;
3184 
3185   if (!address_operand (op, GET_MODE (op)))
3186     return 0;
3187 
3188   /* This check makes sure that no symbolic address (except literal
3189      pool references) are accepted by the R or T constraints.  */
3190   if (s390_loadrelative_operand_p (op, NULL, NULL))
3191     return 0;
3192 
3193   /* Ensure literal pool references are only accepted if LIT_POOL_OK.  */
3194   if (!lit_pool_ok)
3195     {
3196       if (!s390_decompose_address (op, &addr))
3197 	return 0;
3198       if (addr.literal_pool)
3199 	return 0;
3200       decomposed = true;
3201     }
3202 
3203   /* With reload, we sometimes get intermediate address forms that are
3204      actually invalid as-is, but we need to accept them in the most
3205      generic cases below ('R' or 'T'), since reload will in fact fix
3206      them up.  LRA behaves differently here; we never see such forms,
3207      but on the other hand, we need to strictly reject every invalid
3208      address form.  Perform this check right up front.  */
3209   if (lra_in_progress)
3210     {
3211       if (!decomposed && !s390_decompose_address (op, &addr))
3212 	return 0;
3213       decomposed = true;
3214     }
3215 
3216   switch (c)
3217     {
3218     case 'Q': /* no index short displacement */
3219       if (!decomposed && !s390_decompose_address (op, &addr))
3220 	return 0;
3221       if (addr.indx)
3222 	return 0;
3223       if (!s390_short_displacement (addr.disp))
3224 	return 0;
3225       break;
3226 
3227     case 'R': /* with index short displacement */
3228       if (TARGET_LONG_DISPLACEMENT)
3229 	{
3230 	  if (!decomposed && !s390_decompose_address (op, &addr))
3231 	    return 0;
3232 	  if (!s390_short_displacement (addr.disp))
3233 	    return 0;
3234 	}
3235       /* Any invalid address here will be fixed up by reload,
3236 	 so accept it for the most generic constraint.  */
3237       break;
3238 
3239     case 'S': /* no index long displacement */
3240       if (!decomposed && !s390_decompose_address (op, &addr))
3241 	return 0;
3242       if (addr.indx)
3243 	return 0;
3244       break;
3245 
3246     case 'T': /* with index long displacement */
3247       /* Any invalid address here will be fixed up by reload,
3248 	 so accept it for the most generic constraint.  */
3249       break;
3250 
3251     default:
3252       return 0;
3253     }
3254   return 1;
3255 }
3256 
3257 
3258 /* Evaluates constraint strings described by the regular expression
3259    ([A|B|Z](Q|R|S|T))|Y and returns 1 if OP is a valid operand for
3260    the constraint given in STR, or 0 else.  */
3261 
3262 int
s390_mem_constraint(const char * str,rtx op)3263 s390_mem_constraint (const char *str, rtx op)
3264 {
3265   char c = str[0];
3266 
3267   switch (c)
3268     {
3269     case 'A':
3270       /* Check for offsettable variants of memory constraints.  */
3271       if (!MEM_P (op) || MEM_VOLATILE_P (op))
3272 	return 0;
3273       if ((reload_completed || reload_in_progress)
3274 	  ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op))
3275 	return 0;
3276       return s390_check_qrst_address (str[1], XEXP (op, 0), true);
3277     case 'B':
3278       /* Check for non-literal-pool variants of memory constraints.  */
3279       if (!MEM_P (op))
3280 	return 0;
3281       return s390_check_qrst_address (str[1], XEXP (op, 0), false);
3282     case 'Q':
3283     case 'R':
3284     case 'S':
3285     case 'T':
3286       if (GET_CODE (op) != MEM)
3287 	return 0;
3288       return s390_check_qrst_address (c, XEXP (op, 0), true);
3289     case 'Y':
3290       /* Simply check for the basic form of a shift count.  Reload will
3291 	 take care of making sure we have a proper base register.  */
3292       if (!s390_decompose_addrstyle_without_index (op, NULL, NULL))
3293 	return 0;
3294       break;
3295     case 'Z':
3296       return s390_check_qrst_address (str[1], op, true);
3297     default:
3298       return 0;
3299     }
3300   return 1;
3301 }
3302 
3303 
3304 /* Evaluates constraint strings starting with letter O.  Input
3305    parameter C is the second letter following the "O" in the constraint
3306    string. Returns 1 if VALUE meets the respective constraint and 0
3307    otherwise.  */
3308 
3309 int
s390_O_constraint_str(const char c,HOST_WIDE_INT value)3310 s390_O_constraint_str (const char c, HOST_WIDE_INT value)
3311 {
3312   if (!TARGET_EXTIMM)
3313     return 0;
3314 
3315   switch (c)
3316     {
3317     case 's':
3318       return trunc_int_for_mode (value, SImode) == value;
3319 
3320     case 'p':
3321       return value == 0
3322 	|| s390_single_part (GEN_INT (value), DImode, SImode, 0) == 1;
3323 
3324     case 'n':
3325       return s390_single_part (GEN_INT (value - 1), DImode, SImode, -1) == 1;
3326 
3327     default:
3328       gcc_unreachable ();
3329     }
3330 }
3331 
3332 
3333 /* Evaluates constraint strings starting with letter N.  Parameter STR
3334    contains the letters following letter "N" in the constraint string.
3335    Returns true if VALUE matches the constraint.  */
3336 
3337 int
s390_N_constraint_str(const char * str,HOST_WIDE_INT value)3338 s390_N_constraint_str (const char *str, HOST_WIDE_INT value)
3339 {
3340   machine_mode mode, part_mode;
3341   int def;
3342   int part, part_goal;
3343 
3344 
3345   if (str[0] == 'x')
3346     part_goal = -1;
3347   else
3348     part_goal = str[0] - '0';
3349 
3350   switch (str[1])
3351     {
3352     case 'Q':
3353       part_mode = QImode;
3354       break;
3355     case 'H':
3356       part_mode = HImode;
3357       break;
3358     case 'S':
3359       part_mode = SImode;
3360       break;
3361     default:
3362       return 0;
3363     }
3364 
3365   switch (str[2])
3366     {
3367     case 'H':
3368       mode = HImode;
3369       break;
3370     case 'S':
3371       mode = SImode;
3372       break;
3373     case 'D':
3374       mode = DImode;
3375       break;
3376     default:
3377       return 0;
3378     }
3379 
3380   switch (str[3])
3381     {
3382     case '0':
3383       def = 0;
3384       break;
3385     case 'F':
3386       def = -1;
3387       break;
3388     default:
3389       return 0;
3390     }
3391 
3392   if (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (part_mode))
3393     return 0;
3394 
3395   part = s390_single_part (GEN_INT (value), mode, part_mode, def);
3396   if (part < 0)
3397     return 0;
3398   if (part_goal != -1 && part_goal != part)
3399     return 0;
3400 
3401   return 1;
3402 }
3403 
3404 
3405 /* Returns true if the input parameter VALUE is a float zero.  */
3406 
3407 int
s390_float_const_zero_p(rtx value)3408 s390_float_const_zero_p (rtx value)
3409 {
3410   return (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT
3411 	  && value == CONST0_RTX (GET_MODE (value)));
3412 }
3413 
3414 /* Implement TARGET_REGISTER_MOVE_COST.  */
3415 
3416 static int
s390_register_move_cost(machine_mode mode,reg_class_t from,reg_class_t to)3417 s390_register_move_cost (machine_mode mode,
3418                          reg_class_t from, reg_class_t to)
3419 {
3420   /* On s390, copy between fprs and gprs is expensive.  */
3421 
3422   /* It becomes somewhat faster having ldgr/lgdr.  */
3423   if (TARGET_Z10 && GET_MODE_SIZE (mode) == 8)
3424     {
3425       /* ldgr is single cycle. */
3426       if (reg_classes_intersect_p (from, GENERAL_REGS)
3427 	  && reg_classes_intersect_p (to, FP_REGS))
3428 	return 1;
3429       /* lgdr needs 3 cycles. */
3430       if (reg_classes_intersect_p (to, GENERAL_REGS)
3431 	  && reg_classes_intersect_p (from, FP_REGS))
3432 	return 3;
3433     }
3434 
3435   /* Otherwise copying is done via memory.  */
3436   if ((reg_classes_intersect_p (from, GENERAL_REGS)
3437        && reg_classes_intersect_p (to, FP_REGS))
3438       || (reg_classes_intersect_p (from, FP_REGS)
3439 	  && reg_classes_intersect_p (to, GENERAL_REGS)))
3440     return 10;
3441 
3442   return 1;
3443 }
3444 
3445 /* Implement TARGET_MEMORY_MOVE_COST.  */
3446 
3447 static int
s390_memory_move_cost(machine_mode mode ATTRIBUTE_UNUSED,reg_class_t rclass ATTRIBUTE_UNUSED,bool in ATTRIBUTE_UNUSED)3448 s390_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
3449 		       reg_class_t rclass ATTRIBUTE_UNUSED,
3450 		       bool in ATTRIBUTE_UNUSED)
3451 {
3452   return 2;
3453 }
3454 
3455 /* Compute a (partial) cost for rtx X.  Return true if the complete
3456    cost has been computed, and false if subexpressions should be
3457    scanned.  In either case, *TOTAL contains the cost result.  The
3458    initial value of *TOTAL is the default value computed by
3459    rtx_cost.  It may be left unmodified.  OUTER_CODE contains the
3460    code of the superexpression of x.  */
3461 
3462 static bool
s390_rtx_costs(rtx x,machine_mode mode,int outer_code,int opno ATTRIBUTE_UNUSED,int * total,bool speed ATTRIBUTE_UNUSED)3463 s390_rtx_costs (rtx x, machine_mode mode, int outer_code,
3464 		int opno ATTRIBUTE_UNUSED,
3465 		int *total, bool speed ATTRIBUTE_UNUSED)
3466 {
3467   int code = GET_CODE (x);
3468   switch (code)
3469     {
3470     case CONST:
3471     case CONST_INT:
3472     case LABEL_REF:
3473     case SYMBOL_REF:
3474     case CONST_DOUBLE:
3475     case CONST_WIDE_INT:
3476     case MEM:
3477       *total = 0;
3478       return true;
3479 
3480     case SET:
3481       {
3482 	/* Without this a conditional move instruction would be
3483 	   accounted as 3 * COSTS_N_INSNS (set, if_then_else,
3484 	   comparison operator).  That's a bit pessimistic.  */
3485 
3486 	if (!TARGET_Z196 || GET_CODE (SET_SRC (x)) != IF_THEN_ELSE)
3487 	  return false;
3488 
3489 	rtx cond = XEXP (SET_SRC (x), 0);
3490 
3491 	if (!CC_REG_P (XEXP (cond, 0)) || !CONST_INT_P (XEXP (cond, 1)))
3492 	  return false;
3493 
3494 	/* It is going to be a load/store on condition.  Make it
3495 	   slightly more expensive than a normal load.  */
3496 	*total = COSTS_N_INSNS (1) + 1;
3497 
3498 	rtx dst = SET_DEST (x);
3499 	rtx then = XEXP (SET_SRC (x), 1);
3500 	rtx els = XEXP (SET_SRC (x), 2);
3501 
3502 	/* It is a real IF-THEN-ELSE.  An additional move will be
3503 	   needed to implement that.  */
3504 	if (reload_completed
3505 	    && !rtx_equal_p (dst, then)
3506 	    && !rtx_equal_p (dst, els))
3507 	  *total += COSTS_N_INSNS (1) / 2;
3508 
3509 	/* A minor penalty for constants we cannot directly handle.  */
3510 	if ((CONST_INT_P (then) || CONST_INT_P (els))
3511 	    && (!TARGET_Z13 || MEM_P (dst)
3512 		|| (CONST_INT_P (then) && !satisfies_constraint_K (then))
3513 		|| (CONST_INT_P (els) && !satisfies_constraint_K (els))))
3514 	  *total += COSTS_N_INSNS (1) / 2;
3515 
3516 	/* A store on condition can only handle register src operands.  */
3517 	if (MEM_P (dst) && (!REG_P (then) || !REG_P (els)))
3518 	  *total += COSTS_N_INSNS (1) / 2;
3519 
3520 	return true;
3521       }
3522     case IOR:
3523       /* risbg */
3524       if (GET_CODE (XEXP (x, 0)) == AND
3525 	  && GET_CODE (XEXP (x, 1)) == ASHIFT
3526 	  && REG_P (XEXP (XEXP (x, 0), 0))
3527 	  && REG_P (XEXP (XEXP (x, 1), 0))
3528 	  && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3529 	  && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3530 	  && (UINTVAL (XEXP (XEXP (x, 0), 1)) ==
3531 	      (HOST_WIDE_INT_1U << UINTVAL (XEXP (XEXP (x, 1), 1))) - 1))
3532 	{
3533 	  *total = COSTS_N_INSNS (2);
3534 	  return true;
3535 	}
3536 
3537       /* ~AND on a 128 bit mode.  This can be done using a vector
3538 	 instruction.  */
3539       if (TARGET_VXE
3540 	  && GET_CODE (XEXP (x, 0)) == NOT
3541 	  && GET_CODE (XEXP (x, 1)) == NOT
3542 	  && REG_P (XEXP (XEXP (x, 0), 0))
3543 	  && REG_P (XEXP (XEXP (x, 1), 0))
3544 	  && GET_MODE_SIZE (GET_MODE (XEXP (XEXP (x, 0), 0))) == 16
3545 	  && s390_hard_regno_mode_ok (VR0_REGNUM,
3546 				      GET_MODE (XEXP (XEXP (x, 0), 0))))
3547 	{
3548 	  *total = COSTS_N_INSNS (1);
3549 	  return true;
3550 	}
3551       /* fallthrough */
3552     case ASHIFT:
3553     case ASHIFTRT:
3554     case LSHIFTRT:
3555     case ROTATE:
3556     case ROTATERT:
3557     case AND:
3558     case XOR:
3559     case NEG:
3560     case NOT:
3561       *total = COSTS_N_INSNS (1);
3562       return false;
3563 
3564     case PLUS:
3565     case MINUS:
3566       *total = COSTS_N_INSNS (1);
3567       return false;
3568 
3569     case MULT:
3570       switch (mode)
3571 	{
3572 	case E_SImode:
3573 	  {
3574 	    rtx left = XEXP (x, 0);
3575 	    rtx right = XEXP (x, 1);
3576 	    if (GET_CODE (right) == CONST_INT
3577 		&& CONST_OK_FOR_K (INTVAL (right)))
3578 	      *total = s390_cost->mhi;
3579 	    else if (GET_CODE (left) == SIGN_EXTEND)
3580 	      *total = s390_cost->mh;
3581 	    else
3582 	      *total = s390_cost->ms;  /* msr, ms, msy */
3583 	    break;
3584 	  }
3585 	case E_DImode:
3586 	  {
3587 	    rtx left = XEXP (x, 0);
3588 	    rtx right = XEXP (x, 1);
3589 	    if (TARGET_ZARCH)
3590 	      {
3591 		if (GET_CODE (right) == CONST_INT
3592 		    && CONST_OK_FOR_K (INTVAL (right)))
3593 		  *total = s390_cost->mghi;
3594 		else if (GET_CODE (left) == SIGN_EXTEND)
3595 		  *total = s390_cost->msgf;
3596 		else
3597 		  *total = s390_cost->msg;  /* msgr, msg */
3598 	      }
3599 	    else /* TARGET_31BIT */
3600 	      {
3601 		if (GET_CODE (left) == SIGN_EXTEND
3602 		    && GET_CODE (right) == SIGN_EXTEND)
3603 		  /* mulsidi case: mr, m */
3604 		  *total = s390_cost->m;
3605 		else if (GET_CODE (left) == ZERO_EXTEND
3606 			 && GET_CODE (right) == ZERO_EXTEND
3607 			 && TARGET_CPU_ZARCH)
3608 		  /* umulsidi case: ml, mlr */
3609 		  *total = s390_cost->ml;
3610 		else
3611 		  /* Complex calculation is required.  */
3612 		  *total = COSTS_N_INSNS (40);
3613 	      }
3614 	    break;
3615 	  }
3616 	case E_SFmode:
3617 	case E_DFmode:
3618 	  *total = s390_cost->mult_df;
3619 	  break;
3620 	case E_TFmode:
3621 	  *total = s390_cost->mxbr;
3622 	  break;
3623 	default:
3624 	  return false;
3625 	}
3626       return false;
3627 
3628     case FMA:
3629       switch (mode)
3630 	{
3631 	case E_DFmode:
3632 	  *total = s390_cost->madbr;
3633 	  break;
3634 	case E_SFmode:
3635 	  *total = s390_cost->maebr;
3636 	  break;
3637 	default:
3638 	  return false;
3639 	}
3640       /* Negate in the third argument is free: FMSUB.  */
3641       if (GET_CODE (XEXP (x, 2)) == NEG)
3642 	{
3643 	  *total += (rtx_cost (XEXP (x, 0), mode, FMA, 0, speed)
3644 		     + rtx_cost (XEXP (x, 1), mode, FMA, 1, speed)
3645 		     + rtx_cost (XEXP (XEXP (x, 2), 0), mode, FMA, 2, speed));
3646 	  return true;
3647 	}
3648       return false;
3649 
3650     case UDIV:
3651     case UMOD:
3652       if (mode == TImode) 	       /* 128 bit division */
3653 	*total = s390_cost->dlgr;
3654       else if (mode == DImode)
3655 	{
3656 	  rtx right = XEXP (x, 1);
3657 	  if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3658 	    *total = s390_cost->dlr;
3659 	  else 	                               /* 64 by 64 bit division */
3660 	    *total = s390_cost->dlgr;
3661 	}
3662       else if (mode == SImode)         /* 32 bit division */
3663 	*total = s390_cost->dlr;
3664       return false;
3665 
3666     case DIV:
3667     case MOD:
3668       if (mode == DImode)
3669 	{
3670 	  rtx right = XEXP (x, 1);
3671 	  if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3672 	    if (TARGET_ZARCH)
3673 	      *total = s390_cost->dsgfr;
3674 	    else
3675 	      *total = s390_cost->dr;
3676 	  else 	                               /* 64 by 64 bit division */
3677 	    *total = s390_cost->dsgr;
3678 	}
3679       else if (mode == SImode)         /* 32 bit division */
3680 	*total = s390_cost->dlr;
3681       else if (mode == SFmode)
3682 	{
3683 	  *total = s390_cost->debr;
3684 	}
3685       else if (mode == DFmode)
3686 	{
3687 	  *total = s390_cost->ddbr;
3688 	}
3689       else if (mode == TFmode)
3690 	{
3691 	  *total = s390_cost->dxbr;
3692 	}
3693       return false;
3694 
3695     case SQRT:
3696       if (mode == SFmode)
3697 	*total = s390_cost->sqebr;
3698       else if (mode == DFmode)
3699 	*total = s390_cost->sqdbr;
3700       else /* TFmode */
3701 	*total = s390_cost->sqxbr;
3702       return false;
3703 
3704     case SIGN_EXTEND:
3705     case ZERO_EXTEND:
3706       if (outer_code == MULT || outer_code == DIV || outer_code == MOD
3707 	  || outer_code == PLUS || outer_code == MINUS
3708 	  || outer_code == COMPARE)
3709 	*total = 0;
3710       return false;
3711 
3712     case COMPARE:
3713       *total = COSTS_N_INSNS (1);
3714       if (GET_CODE (XEXP (x, 0)) == AND
3715 	  && GET_CODE (XEXP (x, 1)) == CONST_INT
3716 	  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
3717 	{
3718 	  rtx op0 = XEXP (XEXP (x, 0), 0);
3719 	  rtx op1 = XEXP (XEXP (x, 0), 1);
3720 	  rtx op2 = XEXP (x, 1);
3721 
3722 	  if (memory_operand (op0, GET_MODE (op0))
3723 	      && s390_tm_ccmode (op1, op2, 0) != VOIDmode)
3724 	    return true;
3725 	  if (register_operand (op0, GET_MODE (op0))
3726 	      && s390_tm_ccmode (op1, op2, 1) != VOIDmode)
3727 	    return true;
3728 	}
3729       return false;
3730 
3731     default:
3732       return false;
3733     }
3734 }
3735 
3736 /* Return the cost of an address rtx ADDR.  */
3737 
3738 static int
s390_address_cost(rtx addr,machine_mode mode ATTRIBUTE_UNUSED,addr_space_t as ATTRIBUTE_UNUSED,bool speed ATTRIBUTE_UNUSED)3739 s390_address_cost (rtx addr, machine_mode mode ATTRIBUTE_UNUSED,
3740 		   addr_space_t as ATTRIBUTE_UNUSED,
3741 		   bool speed ATTRIBUTE_UNUSED)
3742 {
3743   struct s390_address ad;
3744   if (!s390_decompose_address (addr, &ad))
3745     return 1000;
3746 
3747   return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
3748 }
3749 
3750 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
3751 static int
s390_builtin_vectorization_cost(enum vect_cost_for_stmt type_of_cost,tree vectype,int misalign ATTRIBUTE_UNUSED)3752 s390_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
3753 				 tree vectype,
3754 				 int misalign ATTRIBUTE_UNUSED)
3755 {
3756   switch (type_of_cost)
3757     {
3758       case scalar_stmt:
3759       case scalar_load:
3760       case scalar_store:
3761       case vector_stmt:
3762       case vector_load:
3763       case vector_store:
3764       case vector_gather_load:
3765       case vector_scatter_store:
3766       case vec_to_scalar:
3767       case scalar_to_vec:
3768       case cond_branch_not_taken:
3769       case vec_perm:
3770       case vec_promote_demote:
3771       case unaligned_load:
3772       case unaligned_store:
3773 	return 1;
3774 
3775       case cond_branch_taken:
3776 	return 3;
3777 
3778       case vec_construct:
3779 	return TYPE_VECTOR_SUBPARTS (vectype) - 1;
3780 
3781       default:
3782 	gcc_unreachable ();
3783     }
3784 }
3785 
3786 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
3787    otherwise return 0.  */
3788 
3789 int
tls_symbolic_operand(rtx op)3790 tls_symbolic_operand (rtx op)
3791 {
3792   if (GET_CODE (op) != SYMBOL_REF)
3793     return 0;
3794   return SYMBOL_REF_TLS_MODEL (op);
3795 }
3796 
3797 /* Split DImode access register reference REG (on 64-bit) into its constituent
3798    low and high parts, and store them into LO and HI.  Note that gen_lowpart/
3799    gen_highpart cannot be used as they assume all registers are word-sized,
3800    while our access registers have only half that size.  */
3801 
3802 void
s390_split_access_reg(rtx reg,rtx * lo,rtx * hi)3803 s390_split_access_reg (rtx reg, rtx *lo, rtx *hi)
3804 {
3805   gcc_assert (TARGET_64BIT);
3806   gcc_assert (ACCESS_REG_P (reg));
3807   gcc_assert (GET_MODE (reg) == DImode);
3808   gcc_assert (!(REGNO (reg) & 1));
3809 
3810   *lo = gen_rtx_REG (SImode, REGNO (reg) + 1);
3811   *hi = gen_rtx_REG (SImode, REGNO (reg));
3812 }
3813 
3814 /* Return true if OP contains a symbol reference */
3815 
3816 bool
symbolic_reference_mentioned_p(rtx op)3817 symbolic_reference_mentioned_p (rtx op)
3818 {
3819   const char *fmt;
3820   int i;
3821 
3822   if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3823     return 1;
3824 
3825   fmt = GET_RTX_FORMAT (GET_CODE (op));
3826   for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3827     {
3828       if (fmt[i] == 'E')
3829 	{
3830 	  int j;
3831 
3832 	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3833 	    if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3834 	      return 1;
3835 	}
3836 
3837       else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3838 	return 1;
3839     }
3840 
3841   return 0;
3842 }
3843 
3844 /* Return true if OP contains a reference to a thread-local symbol.  */
3845 
3846 bool
tls_symbolic_reference_mentioned_p(rtx op)3847 tls_symbolic_reference_mentioned_p (rtx op)
3848 {
3849   const char *fmt;
3850   int i;
3851 
3852   if (GET_CODE (op) == SYMBOL_REF)
3853     return tls_symbolic_operand (op);
3854 
3855   fmt = GET_RTX_FORMAT (GET_CODE (op));
3856   for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3857     {
3858       if (fmt[i] == 'E')
3859 	{
3860 	  int j;
3861 
3862 	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3863 	    if (tls_symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3864 	      return true;
3865 	}
3866 
3867       else if (fmt[i] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op, i)))
3868 	return true;
3869     }
3870 
3871   return false;
3872 }
3873 
3874 
3875 /* Return true if OP is a legitimate general operand when
3876    generating PIC code.  It is given that flag_pic is on
3877    and that OP satisfies CONSTANT_P.  */
3878 
3879 int
legitimate_pic_operand_p(rtx op)3880 legitimate_pic_operand_p (rtx op)
3881 {
3882   /* Accept all non-symbolic constants.  */
3883   if (!SYMBOLIC_CONST (op))
3884     return 1;
3885 
3886   /* Reject everything else; must be handled
3887      via emit_symbolic_move.  */
3888   return 0;
3889 }
3890 
3891 /* Returns true if the constant value OP is a legitimate general operand.
3892    It is given that OP satisfies CONSTANT_P.  */
3893 
3894 static bool
s390_legitimate_constant_p(machine_mode mode,rtx op)3895 s390_legitimate_constant_p (machine_mode mode, rtx op)
3896 {
3897   if (TARGET_VX && VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR)
3898     {
3899       if (GET_MODE_SIZE (mode) != 16)
3900 	return 0;
3901 
3902       if (!satisfies_constraint_j00 (op)
3903 	  && !satisfies_constraint_jm1 (op)
3904 	  && !satisfies_constraint_jKK (op)
3905 	  && !satisfies_constraint_jxx (op)
3906 	  && !satisfies_constraint_jyy (op))
3907 	return 0;
3908     }
3909 
3910   /* Accept all non-symbolic constants.  */
3911   if (!SYMBOLIC_CONST (op))
3912     return 1;
3913 
3914   /* Accept immediate LARL operands.  */
3915   if (TARGET_CPU_ZARCH && larl_operand (op, mode))
3916     return 1;
3917 
3918   /* Thread-local symbols are never legal constants.  This is
3919      so that emit_call knows that computing such addresses
3920      might require a function call.  */
3921   if (TLS_SYMBOLIC_CONST (op))
3922     return 0;
3923 
3924   /* In the PIC case, symbolic constants must *not* be
3925      forced into the literal pool.  We accept them here,
3926      so that they will be handled by emit_symbolic_move.  */
3927   if (flag_pic)
3928     return 1;
3929 
3930   /* All remaining non-PIC symbolic constants are
3931      forced into the literal pool.  */
3932   return 0;
3933 }
3934 
3935 /* Determine if it's legal to put X into the constant pool.  This
3936    is not possible if X contains the address of a symbol that is
3937    not constant (TLS) or not known at final link time (PIC).  */
3938 
3939 static bool
s390_cannot_force_const_mem(machine_mode mode,rtx x)3940 s390_cannot_force_const_mem (machine_mode mode, rtx x)
3941 {
3942   switch (GET_CODE (x))
3943     {
3944     case CONST_INT:
3945     case CONST_DOUBLE:
3946     case CONST_WIDE_INT:
3947     case CONST_VECTOR:
3948       /* Accept all non-symbolic constants.  */
3949       return false;
3950 
3951     case LABEL_REF:
3952       /* Labels are OK iff we are non-PIC.  */
3953       return flag_pic != 0;
3954 
3955     case SYMBOL_REF:
3956       /* 'Naked' TLS symbol references are never OK,
3957          non-TLS symbols are OK iff we are non-PIC.  */
3958       if (tls_symbolic_operand (x))
3959 	return true;
3960       else
3961 	return flag_pic != 0;
3962 
3963     case CONST:
3964       return s390_cannot_force_const_mem (mode, XEXP (x, 0));
3965     case PLUS:
3966     case MINUS:
3967       return s390_cannot_force_const_mem (mode, XEXP (x, 0))
3968 	     || s390_cannot_force_const_mem (mode, XEXP (x, 1));
3969 
3970     case UNSPEC:
3971       switch (XINT (x, 1))
3972 	{
3973 	/* Only lt-relative or GOT-relative UNSPECs are OK.  */
3974 	case UNSPEC_LTREL_OFFSET:
3975 	case UNSPEC_GOT:
3976 	case UNSPEC_GOTOFF:
3977 	case UNSPEC_PLTOFF:
3978 	case UNSPEC_TLSGD:
3979 	case UNSPEC_TLSLDM:
3980 	case UNSPEC_NTPOFF:
3981 	case UNSPEC_DTPOFF:
3982 	case UNSPEC_GOTNTPOFF:
3983 	case UNSPEC_INDNTPOFF:
3984 	  return false;
3985 
3986 	/* If the literal pool shares the code section, be put
3987 	   execute template placeholders into the pool as well.  */
3988 	case UNSPEC_INSN:
3989 	  return TARGET_CPU_ZARCH;
3990 
3991 	default:
3992 	  return true;
3993 	}
3994       break;
3995 
3996     default:
3997       gcc_unreachable ();
3998     }
3999 }
4000 
4001 /* Returns true if the constant value OP is a legitimate general
4002    operand during and after reload.  The difference to
4003    legitimate_constant_p is that this function will not accept
4004    a constant that would need to be forced to the literal pool
4005    before it can be used as operand.
4006    This function accepts all constants which can be loaded directly
4007    into a GPR.  */
4008 
4009 bool
legitimate_reload_constant_p(rtx op)4010 legitimate_reload_constant_p (rtx op)
4011 {
4012   /* Accept la(y) operands.  */
4013   if (GET_CODE (op) == CONST_INT
4014       && DISP_IN_RANGE (INTVAL (op)))
4015     return true;
4016 
4017   /* Accept l(g)hi/l(g)fi operands.  */
4018   if (GET_CODE (op) == CONST_INT
4019       && (CONST_OK_FOR_K (INTVAL (op)) || CONST_OK_FOR_Os (INTVAL (op))))
4020     return true;
4021 
4022   /* Accept lliXX operands.  */
4023   if (TARGET_ZARCH
4024       && GET_CODE (op) == CONST_INT
4025       && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
4026       && s390_single_part (op, word_mode, HImode, 0) >= 0)
4027   return true;
4028 
4029   if (TARGET_EXTIMM
4030       && GET_CODE (op) == CONST_INT
4031       && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
4032       && s390_single_part (op, word_mode, SImode, 0) >= 0)
4033     return true;
4034 
4035   /* Accept larl operands.  */
4036   if (TARGET_CPU_ZARCH
4037       && larl_operand (op, VOIDmode))
4038     return true;
4039 
4040   /* Accept floating-point zero operands that fit into a single GPR.  */
4041   if (GET_CODE (op) == CONST_DOUBLE
4042       && s390_float_const_zero_p (op)
4043       && GET_MODE_SIZE (GET_MODE (op)) <= UNITS_PER_WORD)
4044     return true;
4045 
4046   /* Accept double-word operands that can be split.  */
4047   if (GET_CODE (op) == CONST_WIDE_INT
4048       || (GET_CODE (op) == CONST_INT
4049 	  && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op)))
4050     {
4051       machine_mode dword_mode = word_mode == SImode ? DImode : TImode;
4052       rtx hi = operand_subword (op, 0, 0, dword_mode);
4053       rtx lo = operand_subword (op, 1, 0, dword_mode);
4054       return legitimate_reload_constant_p (hi)
4055 	     && legitimate_reload_constant_p (lo);
4056     }
4057 
4058   /* Everything else cannot be handled without reload.  */
4059   return false;
4060 }
4061 
4062 /* Returns true if the constant value OP is a legitimate fp operand
4063    during and after reload.
4064    This function accepts all constants which can be loaded directly
4065    into an FPR.  */
4066 
4067 static bool
legitimate_reload_fp_constant_p(rtx op)4068 legitimate_reload_fp_constant_p (rtx op)
4069 {
4070   /* Accept floating-point zero operands if the load zero instruction
4071      can be used.  Prior to z196 the load fp zero instruction caused a
4072      performance penalty if the result is used as BFP number.  */
4073   if (TARGET_Z196
4074       && GET_CODE (op) == CONST_DOUBLE
4075       && s390_float_const_zero_p (op))
4076     return true;
4077 
4078   return false;
4079 }
4080 
4081 /* Returns true if the constant value OP is a legitimate vector operand
4082    during and after reload.
4083    This function accepts all constants which can be loaded directly
4084    into an VR.  */
4085 
4086 static bool
legitimate_reload_vector_constant_p(rtx op)4087 legitimate_reload_vector_constant_p (rtx op)
4088 {
4089   if (TARGET_VX && GET_MODE_SIZE (GET_MODE (op)) == 16
4090       && (satisfies_constraint_j00 (op)
4091 	  || satisfies_constraint_jm1 (op)
4092 	  || satisfies_constraint_jKK (op)
4093 	  || satisfies_constraint_jxx (op)
4094 	  || satisfies_constraint_jyy (op)))
4095     return true;
4096 
4097   return false;
4098 }
4099 
4100 /* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
4101    return the class of reg to actually use.  */
4102 
4103 static reg_class_t
s390_preferred_reload_class(rtx op,reg_class_t rclass)4104 s390_preferred_reload_class (rtx op, reg_class_t rclass)
4105 {
4106   switch (GET_CODE (op))
4107     {
4108       /* Constants we cannot reload into general registers
4109 	 must be forced into the literal pool.  */
4110       case CONST_VECTOR:
4111       case CONST_DOUBLE:
4112       case CONST_INT:
4113       case CONST_WIDE_INT:
4114 	if (reg_class_subset_p (GENERAL_REGS, rclass)
4115 	    && legitimate_reload_constant_p (op))
4116 	  return GENERAL_REGS;
4117 	else if (reg_class_subset_p (ADDR_REGS, rclass)
4118 		 && legitimate_reload_constant_p (op))
4119 	  return ADDR_REGS;
4120 	else if (reg_class_subset_p (FP_REGS, rclass)
4121 		 && legitimate_reload_fp_constant_p (op))
4122 	  return FP_REGS;
4123 	else if (reg_class_subset_p (VEC_REGS, rclass)
4124 		 && legitimate_reload_vector_constant_p (op))
4125 	  return VEC_REGS;
4126 
4127 	return NO_REGS;
4128 
4129       /* If a symbolic constant or a PLUS is reloaded,
4130 	 it is most likely being used as an address, so
4131 	 prefer ADDR_REGS.  If 'class' is not a superset
4132 	 of ADDR_REGS, e.g. FP_REGS, reject this reload.  */
4133       case CONST:
4134 	/* Symrefs cannot be pushed into the literal pool with -fPIC
4135 	   so we *MUST NOT* return NO_REGS for these cases
4136 	   (s390_cannot_force_const_mem will return true).
4137 
4138 	   On the other hand we MUST return NO_REGS for symrefs with
4139 	   invalid addend which might have been pushed to the literal
4140 	   pool (no -fPIC).  Usually we would expect them to be
4141 	   handled via secondary reload but this does not happen if
4142 	   they are used as literal pool slot replacement in reload
4143 	   inheritance (see emit_input_reload_insns).  */
4144 	if (TARGET_CPU_ZARCH
4145 	    && GET_CODE (XEXP (op, 0)) == PLUS
4146 	    && GET_CODE (XEXP (XEXP(op, 0), 0)) == SYMBOL_REF
4147 	    && GET_CODE (XEXP (XEXP(op, 0), 1)) == CONST_INT)
4148 	  {
4149 	    if (flag_pic && reg_class_subset_p (ADDR_REGS, rclass))
4150 	      return ADDR_REGS;
4151 	    else
4152 	      return NO_REGS;
4153 	  }
4154 	/* fallthrough */
4155       case LABEL_REF:
4156       case SYMBOL_REF:
4157 	if (!legitimate_reload_constant_p (op))
4158           return NO_REGS;
4159 	/* fallthrough */
4160       case PLUS:
4161 	/* load address will be used.  */
4162 	if (reg_class_subset_p (ADDR_REGS, rclass))
4163 	  return ADDR_REGS;
4164 	else
4165 	  return NO_REGS;
4166 
4167       default:
4168 	break;
4169     }
4170 
4171   return rclass;
4172 }
4173 
4174 /* Return true if ADDR is SYMBOL_REF + addend with addend being a
4175    multiple of ALIGNMENT and the SYMBOL_REF being naturally
4176    aligned.  */
4177 
4178 bool
s390_check_symref_alignment(rtx addr,HOST_WIDE_INT alignment)4179 s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment)
4180 {
4181   HOST_WIDE_INT addend;
4182   rtx symref;
4183 
4184   /* The "required alignment" might be 0 (e.g. for certain structs
4185      accessed via BLKmode).  Early abort in this case, as well as when
4186      an alignment > 8 is required.  */
4187   if (alignment < 2 || alignment > 8)
4188     return false;
4189 
4190   if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4191     return false;
4192 
4193   if (addend & (alignment - 1))
4194     return false;
4195 
4196   if (GET_CODE (symref) == SYMBOL_REF)
4197     {
4198       /* We have load-relative instructions for 2-byte, 4-byte, and
4199          8-byte alignment so allow only these.  */
4200       switch (alignment)
4201 	{
4202 	case 8:	return !SYMBOL_FLAG_NOTALIGN8_P (symref);
4203 	case 4:	return !SYMBOL_FLAG_NOTALIGN4_P (symref);
4204 	case 2:	return !SYMBOL_FLAG_NOTALIGN2_P (symref);
4205 	default: return false;
4206 	}
4207     }
4208 
4209   if (GET_CODE (symref) == UNSPEC
4210       && alignment <= UNITS_PER_LONG)
4211     return true;
4212 
4213   return false;
4214 }
4215 
4216 /* ADDR is moved into REG using larl.  If ADDR isn't a valid larl
4217    operand SCRATCH is used to reload the even part of the address and
4218    adding one.  */
4219 
4220 void
s390_reload_larl_operand(rtx reg,rtx addr,rtx scratch)4221 s390_reload_larl_operand (rtx reg, rtx addr, rtx scratch)
4222 {
4223   HOST_WIDE_INT addend;
4224   rtx symref;
4225 
4226   if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4227     gcc_unreachable ();
4228 
4229   if (!(addend & 1))
4230     /* Easy case.  The addend is even so larl will do fine.  */
4231     emit_move_insn (reg, addr);
4232   else
4233     {
4234       /* We can leave the scratch register untouched if the target
4235 	 register is a valid base register.  */
4236       if (REGNO (reg) < FIRST_PSEUDO_REGISTER
4237 	  && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS)
4238 	scratch = reg;
4239 
4240       gcc_assert (REGNO (scratch) < FIRST_PSEUDO_REGISTER);
4241       gcc_assert (REGNO_REG_CLASS (REGNO (scratch)) == ADDR_REGS);
4242 
4243       if (addend != 1)
4244 	emit_move_insn (scratch,
4245 			gen_rtx_CONST (Pmode,
4246 				       gen_rtx_PLUS (Pmode, symref,
4247 						     GEN_INT (addend - 1))));
4248       else
4249 	emit_move_insn (scratch, symref);
4250 
4251       /* Increment the address using la in order to avoid clobbering cc.  */
4252       s390_load_address (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx));
4253     }
4254 }
4255 
4256 /* Generate what is necessary to move between REG and MEM using
4257    SCRATCH.  The direction is given by TOMEM.  */
4258 
4259 void
s390_reload_symref_address(rtx reg,rtx mem,rtx scratch,bool tomem)4260 s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem)
4261 {
4262   /* Reload might have pulled a constant out of the literal pool.
4263      Force it back in.  */
4264   if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
4265       || GET_CODE (mem) == CONST_WIDE_INT
4266       || GET_CODE (mem) == CONST_VECTOR
4267       || GET_CODE (mem) == CONST)
4268     mem = force_const_mem (GET_MODE (reg), mem);
4269 
4270   gcc_assert (MEM_P (mem));
4271 
4272   /* For a load from memory we can leave the scratch register
4273      untouched if the target register is a valid base register.  */
4274   if (!tomem
4275       && REGNO (reg) < FIRST_PSEUDO_REGISTER
4276       && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS
4277       && GET_MODE (reg) == GET_MODE (scratch))
4278     scratch = reg;
4279 
4280   /* Load address into scratch register.  Since we can't have a
4281      secondary reload for a secondary reload we have to cover the case
4282      where larl would need a secondary reload here as well.  */
4283   s390_reload_larl_operand (scratch, XEXP (mem, 0), scratch);
4284 
4285   /* Now we can use a standard load/store to do the move.  */
4286   if (tomem)
4287     emit_move_insn (replace_equiv_address (mem, scratch), reg);
4288   else
4289     emit_move_insn (reg, replace_equiv_address (mem, scratch));
4290 }
4291 
4292 /* Inform reload about cases where moving X with a mode MODE to a register in
4293    RCLASS requires an extra scratch or immediate register.  Return the class
4294    needed for the immediate register.  */
4295 
4296 static reg_class_t
s390_secondary_reload(bool in_p,rtx x,reg_class_t rclass_i,machine_mode mode,secondary_reload_info * sri)4297 s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
4298 		       machine_mode mode, secondary_reload_info *sri)
4299 {
4300   enum reg_class rclass = (enum reg_class) rclass_i;
4301 
4302   /* Intermediate register needed.  */
4303   if (reg_classes_intersect_p (CC_REGS, rclass))
4304     return GENERAL_REGS;
4305 
4306   if (TARGET_VX)
4307     {
4308       /* The vst/vl vector move instructions allow only for short
4309 	 displacements.  */
4310       if (MEM_P (x)
4311 	  && GET_CODE (XEXP (x, 0)) == PLUS
4312 	  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4313 	  && !SHORT_DISP_IN_RANGE(INTVAL (XEXP (XEXP (x, 0), 1)))
4314 	  && reg_class_subset_p (rclass, VEC_REGS)
4315 	  && (!reg_class_subset_p (rclass, FP_REGS)
4316 	      || (GET_MODE_SIZE (mode) > 8
4317 		  && s390_class_max_nregs (FP_REGS, mode) == 1)))
4318 	{
4319 	  if (in_p)
4320 	    sri->icode = (TARGET_64BIT ?
4321 			  CODE_FOR_reloaddi_la_in :
4322 			  CODE_FOR_reloadsi_la_in);
4323 	  else
4324 	    sri->icode = (TARGET_64BIT ?
4325 			  CODE_FOR_reloaddi_la_out :
4326 			  CODE_FOR_reloadsi_la_out);
4327 	}
4328     }
4329 
4330   if (TARGET_Z10)
4331     {
4332       HOST_WIDE_INT offset;
4333       rtx symref;
4334 
4335       /* On z10 several optimizer steps may generate larl operands with
4336 	 an odd addend.  */
4337       if (in_p
4338 	  && s390_loadrelative_operand_p (x, &symref, &offset)
4339 	  && mode == Pmode
4340 	  && !SYMBOL_FLAG_NOTALIGN2_P (symref)
4341 	  && (offset & 1) == 1)
4342 	sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10
4343 		      : CODE_FOR_reloadsi_larl_odd_addend_z10);
4344 
4345       /* Handle all the (mem (symref)) accesses we cannot use the z10
4346 	 instructions for.  */
4347       if (MEM_P (x)
4348 	  && s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL)
4349 	  && (mode == QImode
4350 	      || !reg_class_subset_p (rclass, GENERAL_REGS)
4351 	      || GET_MODE_SIZE (mode) > UNITS_PER_WORD
4352 	      || !s390_check_symref_alignment (XEXP (x, 0),
4353 					       GET_MODE_SIZE (mode))))
4354 	{
4355 #define __SECONDARY_RELOAD_CASE(M,m)					\
4356 	  case E_##M##mode:						\
4357 	    if (TARGET_64BIT)						\
4358 	      sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 :	\
4359                                   CODE_FOR_reload##m##di_tomem_z10;	\
4360 	    else							\
4361   	      sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 :	\
4362                                   CODE_FOR_reload##m##si_tomem_z10;	\
4363 	  break;
4364 
4365 	  switch (GET_MODE (x))
4366 	    {
4367 	      __SECONDARY_RELOAD_CASE (QI, qi);
4368 	      __SECONDARY_RELOAD_CASE (HI, hi);
4369 	      __SECONDARY_RELOAD_CASE (SI, si);
4370 	      __SECONDARY_RELOAD_CASE (DI, di);
4371 	      __SECONDARY_RELOAD_CASE (TI, ti);
4372 	      __SECONDARY_RELOAD_CASE (SF, sf);
4373 	      __SECONDARY_RELOAD_CASE (DF, df);
4374 	      __SECONDARY_RELOAD_CASE (TF, tf);
4375 	      __SECONDARY_RELOAD_CASE (SD, sd);
4376 	      __SECONDARY_RELOAD_CASE (DD, dd);
4377 	      __SECONDARY_RELOAD_CASE (TD, td);
4378 	      __SECONDARY_RELOAD_CASE (V1QI, v1qi);
4379 	      __SECONDARY_RELOAD_CASE (V2QI, v2qi);
4380 	      __SECONDARY_RELOAD_CASE (V4QI, v4qi);
4381 	      __SECONDARY_RELOAD_CASE (V8QI, v8qi);
4382 	      __SECONDARY_RELOAD_CASE (V16QI, v16qi);
4383 	      __SECONDARY_RELOAD_CASE (V1HI, v1hi);
4384 	      __SECONDARY_RELOAD_CASE (V2HI, v2hi);
4385 	      __SECONDARY_RELOAD_CASE (V4HI, v4hi);
4386 	      __SECONDARY_RELOAD_CASE (V8HI, v8hi);
4387 	      __SECONDARY_RELOAD_CASE (V1SI, v1si);
4388 	      __SECONDARY_RELOAD_CASE (V2SI, v2si);
4389 	      __SECONDARY_RELOAD_CASE (V4SI, v4si);
4390 	      __SECONDARY_RELOAD_CASE (V1DI, v1di);
4391 	      __SECONDARY_RELOAD_CASE (V2DI, v2di);
4392 	      __SECONDARY_RELOAD_CASE (V1TI, v1ti);
4393 	      __SECONDARY_RELOAD_CASE (V1SF, v1sf);
4394 	      __SECONDARY_RELOAD_CASE (V2SF, v2sf);
4395 	      __SECONDARY_RELOAD_CASE (V4SF, v4sf);
4396 	      __SECONDARY_RELOAD_CASE (V1DF, v1df);
4397 	      __SECONDARY_RELOAD_CASE (V2DF, v2df);
4398 	      __SECONDARY_RELOAD_CASE (V1TF, v1tf);
4399 	    default:
4400 	      gcc_unreachable ();
4401 	    }
4402 #undef __SECONDARY_RELOAD_CASE
4403 	}
4404     }
4405 
4406   /* We need a scratch register when loading a PLUS expression which
4407      is not a legitimate operand of the LOAD ADDRESS instruction.  */
4408   /* LRA can deal with transformation of plus op very well -- so we
4409      don't need to prompt LRA in this case.  */
4410   if (! lra_in_progress && in_p && s390_plus_operand (x, mode))
4411     sri->icode = (TARGET_64BIT ?
4412 		  CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
4413 
4414   /* Performing a multiword move from or to memory we have to make sure the
4415      second chunk in memory is addressable without causing a displacement
4416      overflow.  If that would be the case we calculate the address in
4417      a scratch register.  */
4418   if (MEM_P (x)
4419       && GET_CODE (XEXP (x, 0)) == PLUS
4420       && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4421       && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1))
4422 			 + GET_MODE_SIZE (mode) - 1))
4423     {
4424       /* For GENERAL_REGS a displacement overflow is no problem if occurring
4425 	 in a s_operand address since we may fallback to lm/stm.  So we only
4426 	 have to care about overflows in the b+i+d case.  */
4427       if ((reg_classes_intersect_p (GENERAL_REGS, rclass)
4428 	   && s390_class_max_nregs (GENERAL_REGS, mode) > 1
4429 	   && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
4430 	  /* For FP_REGS no lm/stm is available so this check is triggered
4431 	     for displacement overflows in b+i+d and b+d like addresses.  */
4432 	  || (reg_classes_intersect_p (FP_REGS, rclass)
4433 	      && s390_class_max_nregs (FP_REGS, mode) > 1))
4434 	{
4435 	  if (in_p)
4436 	    sri->icode = (TARGET_64BIT ?
4437 			  CODE_FOR_reloaddi_la_in :
4438 			  CODE_FOR_reloadsi_la_in);
4439 	  else
4440 	    sri->icode = (TARGET_64BIT ?
4441 			  CODE_FOR_reloaddi_la_out :
4442 			  CODE_FOR_reloadsi_la_out);
4443 	}
4444     }
4445 
4446   /* A scratch address register is needed when a symbolic constant is
4447      copied to r0 compiling with -fPIC.  In other cases the target
4448      register might be used as temporary (see legitimize_pic_address).  */
4449   if (in_p && SYMBOLIC_CONST (x) && flag_pic == 2 && rclass != ADDR_REGS)
4450     sri->icode = (TARGET_64BIT ?
4451 		  CODE_FOR_reloaddi_PIC_addr :
4452 		  CODE_FOR_reloadsi_PIC_addr);
4453 
4454   /* Either scratch or no register needed.  */
4455   return NO_REGS;
4456 }
4457 
4458 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
4459 
4460    We need secondary memory to move data between GPRs and FPRs.
4461 
4462    - With DFP the ldgr lgdr instructions are available.  Due to the
4463      different alignment we cannot use them for SFmode.  For 31 bit a
4464      64 bit value in GPR would be a register pair so here we still
4465      need to go via memory.
4466 
4467    - With z13 we can do the SF/SImode moves with vlgvf.  Due to the
4468      overlapping of FPRs and VRs we still disallow TF/TD modes to be
4469      in full VRs so as before also on z13 we do these moves via
4470      memory.
4471 
4472      FIXME: Should we try splitting it into two vlgvg's/vlvg's instead?  */
4473 
4474 static bool
s390_secondary_memory_needed(machine_mode mode,reg_class_t class1,reg_class_t class2)4475 s390_secondary_memory_needed (machine_mode mode,
4476 			      reg_class_t class1, reg_class_t class2)
4477 {
4478   return (((reg_classes_intersect_p (class1, VEC_REGS)
4479 	    && reg_classes_intersect_p (class2, GENERAL_REGS))
4480 	   || (reg_classes_intersect_p (class1, GENERAL_REGS)
4481 	       && reg_classes_intersect_p (class2, VEC_REGS)))
4482 	  && (!TARGET_DFP || !TARGET_64BIT || GET_MODE_SIZE (mode) != 8)
4483 	  && (!TARGET_VX || (SCALAR_FLOAT_MODE_P (mode)
4484 			     && GET_MODE_SIZE (mode) > 8)));
4485 }
4486 
4487 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
4488 
4489    get_secondary_mem widens its argument to BITS_PER_WORD which loses on 64bit
4490    because the movsi and movsf patterns don't handle r/f moves.  */
4491 
4492 static machine_mode
s390_secondary_memory_needed_mode(machine_mode mode)4493 s390_secondary_memory_needed_mode (machine_mode mode)
4494 {
4495   if (GET_MODE_BITSIZE (mode) < 32)
4496     return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
4497   return mode;
4498 }
4499 
4500 /* Generate code to load SRC, which is PLUS that is not a
4501    legitimate operand for the LA instruction, into TARGET.
4502    SCRATCH may be used as scratch register.  */
4503 
4504 void
s390_expand_plus_operand(rtx target,rtx src,rtx scratch)4505 s390_expand_plus_operand (rtx target, rtx src,
4506 			  rtx scratch)
4507 {
4508   rtx sum1, sum2;
4509   struct s390_address ad;
4510 
4511   /* src must be a PLUS; get its two operands.  */
4512   gcc_assert (GET_CODE (src) == PLUS);
4513   gcc_assert (GET_MODE (src) == Pmode);
4514 
4515   /* Check if any of the two operands is already scheduled
4516      for replacement by reload.  This can happen e.g. when
4517      float registers occur in an address.  */
4518   sum1 = find_replacement (&XEXP (src, 0));
4519   sum2 = find_replacement (&XEXP (src, 1));
4520   src = gen_rtx_PLUS (Pmode, sum1, sum2);
4521 
4522   /* If the address is already strictly valid, there's nothing to do.  */
4523   if (!s390_decompose_address (src, &ad)
4524       || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4525       || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
4526     {
4527       /* Otherwise, one of the operands cannot be an address register;
4528          we reload its value into the scratch register.  */
4529       if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15)
4530 	{
4531 	  emit_move_insn (scratch, sum1);
4532 	  sum1 = scratch;
4533 	}
4534       if (true_regnum (sum2) < 1 || true_regnum (sum2) > 15)
4535 	{
4536 	  emit_move_insn (scratch, sum2);
4537 	  sum2 = scratch;
4538 	}
4539 
4540       /* According to the way these invalid addresses are generated
4541          in reload.c, it should never happen (at least on s390) that
4542          *neither* of the PLUS components, after find_replacements
4543          was applied, is an address register.  */
4544       if (sum1 == scratch && sum2 == scratch)
4545 	{
4546 	  debug_rtx (src);
4547 	  gcc_unreachable ();
4548 	}
4549 
4550       src = gen_rtx_PLUS (Pmode, sum1, sum2);
4551     }
4552 
4553   /* Emit the LOAD ADDRESS pattern.  Note that reload of PLUS
4554      is only ever performed on addresses, so we can mark the
4555      sum as legitimate for LA in any case.  */
4556   s390_load_address (target, src);
4557 }
4558 
4559 
4560 /* Return true if ADDR is a valid memory address.
4561    STRICT specifies whether strict register checking applies.  */
4562 
4563 static bool
s390_legitimate_address_p(machine_mode mode,rtx addr,bool strict)4564 s390_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4565 {
4566   struct s390_address ad;
4567 
4568   if (TARGET_Z10
4569       && larl_operand (addr, VOIDmode)
4570       && (mode == VOIDmode
4571 	  || s390_check_symref_alignment (addr, GET_MODE_SIZE (mode))))
4572     return true;
4573 
4574   if (!s390_decompose_address (addr, &ad))
4575     return false;
4576 
4577   if (strict)
4578     {
4579       if (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4580 	return false;
4581 
4582       if (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))
4583 	return false;
4584     }
4585   else
4586     {
4587       if (ad.base
4588 	  && !(REGNO (ad.base) >= FIRST_PSEUDO_REGISTER
4589 	       || REGNO_REG_CLASS (REGNO (ad.base)) == ADDR_REGS))
4590 	return false;
4591 
4592       if (ad.indx
4593 	  && !(REGNO (ad.indx) >= FIRST_PSEUDO_REGISTER
4594 	       || REGNO_REG_CLASS (REGNO (ad.indx)) == ADDR_REGS))
4595 	  return false;
4596     }
4597   return true;
4598 }
4599 
4600 /* Return true if OP is a valid operand for the LA instruction.
4601    In 31-bit, we need to prove that the result is used as an
4602    address, as LA performs only a 31-bit addition.  */
4603 
4604 bool
legitimate_la_operand_p(rtx op)4605 legitimate_la_operand_p (rtx op)
4606 {
4607   struct s390_address addr;
4608   if (!s390_decompose_address (op, &addr))
4609     return false;
4610 
4611   return (TARGET_64BIT || addr.pointer);
4612 }
4613 
4614 /* Return true if it is valid *and* preferable to use LA to
4615    compute the sum of OP1 and OP2.  */
4616 
4617 bool
preferred_la_operand_p(rtx op1,rtx op2)4618 preferred_la_operand_p (rtx op1, rtx op2)
4619 {
4620   struct s390_address addr;
4621 
4622   if (op2 != const0_rtx)
4623     op1 = gen_rtx_PLUS (Pmode, op1, op2);
4624 
4625   if (!s390_decompose_address (op1, &addr))
4626     return false;
4627   if (addr.base && !REGNO_OK_FOR_BASE_P (REGNO (addr.base)))
4628     return false;
4629   if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx)))
4630     return false;
4631 
4632   /* Avoid LA instructions with index register on z196; it is
4633      preferable to use regular add instructions when possible.
4634      Starting with zEC12 the la with index register is "uncracked"
4635      again.  */
4636   if (addr.indx && s390_tune == PROCESSOR_2817_Z196)
4637     return false;
4638 
4639   if (!TARGET_64BIT && !addr.pointer)
4640     return false;
4641 
4642   if (addr.pointer)
4643     return true;
4644 
4645   if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base))
4646       || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx)))
4647     return true;
4648 
4649   return false;
4650 }
4651 
4652 /* Emit a forced load-address operation to load SRC into DST.
4653    This will use the LOAD ADDRESS instruction even in situations
4654    where legitimate_la_operand_p (SRC) returns false.  */
4655 
4656 void
s390_load_address(rtx dst,rtx src)4657 s390_load_address (rtx dst, rtx src)
4658 {
4659   if (TARGET_64BIT)
4660     emit_move_insn (dst, src);
4661   else
4662     emit_insn (gen_force_la_31 (dst, src));
4663 }
4664 
4665 /* Return true if it ok to use SYMBOL_REF in a relative address.  */
4666 
4667 bool
s390_rel_address_ok_p(rtx symbol_ref)4668 s390_rel_address_ok_p (rtx symbol_ref)
4669 {
4670   tree decl;
4671 
4672   if (symbol_ref == s390_got_symbol () || CONSTANT_POOL_ADDRESS_P (symbol_ref))
4673     return true;
4674 
4675   decl = SYMBOL_REF_DECL (symbol_ref);
4676 
4677   if (!flag_pic || SYMBOL_REF_LOCAL_P (symbol_ref))
4678     return (s390_pic_data_is_text_relative
4679 	    || (decl
4680 		&& TREE_CODE (decl) == FUNCTION_DECL));
4681 
4682   return false;
4683 }
4684 
4685 /* Return a legitimate reference for ORIG (an address) using the
4686    register REG.  If REG is 0, a new pseudo is generated.
4687 
4688    There are two types of references that must be handled:
4689 
4690    1. Global data references must load the address from the GOT, via
4691       the PIC reg.  An insn is emitted to do this load, and the reg is
4692       returned.
4693 
4694    2. Static data references, constant pool addresses, and code labels
4695       compute the address as an offset from the GOT, whose base is in
4696       the PIC reg.  Static data objects have SYMBOL_FLAG_LOCAL set to
4697       differentiate them from global data objects.  The returned
4698       address is the PIC reg + an unspec constant.
4699 
4700    TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
4701    reg also appears in the address.  */
4702 
4703 rtx
legitimize_pic_address(rtx orig,rtx reg)4704 legitimize_pic_address (rtx orig, rtx reg)
4705 {
4706   rtx addr = orig;
4707   rtx addend = const0_rtx;
4708   rtx new_rtx = orig;
4709 
4710   gcc_assert (!TLS_SYMBOLIC_CONST (addr));
4711 
4712   if (GET_CODE (addr) == CONST)
4713     addr = XEXP (addr, 0);
4714 
4715   if (GET_CODE (addr) == PLUS)
4716     {
4717       addend = XEXP (addr, 1);
4718       addr = XEXP (addr, 0);
4719     }
4720 
4721   if ((GET_CODE (addr) == LABEL_REF
4722        || (SYMBOL_REF_P (addr) && s390_rel_address_ok_p (addr))
4723        || (GET_CODE (addr) == UNSPEC &&
4724 	   (XINT (addr, 1) == UNSPEC_GOTENT
4725 	    || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
4726       && GET_CODE (addend) == CONST_INT)
4727     {
4728       /* This can be locally addressed.  */
4729 
4730       /* larl_operand requires UNSPECs to be wrapped in a const rtx.  */
4731       rtx const_addr = (GET_CODE (addr) == UNSPEC ?
4732 			gen_rtx_CONST (Pmode, addr) : addr);
4733 
4734       if (TARGET_CPU_ZARCH
4735 	  && larl_operand (const_addr, VOIDmode)
4736 	  && INTVAL (addend) < HOST_WIDE_INT_1 << 31
4737 	  && INTVAL (addend) >= -(HOST_WIDE_INT_1 << 31))
4738 	{
4739 	  if (INTVAL (addend) & 1)
4740 	    {
4741 	      /* LARL can't handle odd offsets, so emit a pair of LARL
4742 		 and LA.  */
4743 	      rtx temp = reg? reg : gen_reg_rtx (Pmode);
4744 
4745 	      if (!DISP_IN_RANGE (INTVAL (addend)))
4746 		{
4747 		  HOST_WIDE_INT even = INTVAL (addend) - 1;
4748 		  addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (even));
4749 		  addr = gen_rtx_CONST (Pmode, addr);
4750 		  addend = const1_rtx;
4751 		}
4752 
4753 	      emit_move_insn (temp, addr);
4754 	      new_rtx = gen_rtx_PLUS (Pmode, temp, addend);
4755 
4756 	      if (reg != 0)
4757 		{
4758 		  s390_load_address (reg, new_rtx);
4759 		  new_rtx = reg;
4760 		}
4761 	    }
4762 	  else
4763 	    {
4764 	      /* If the offset is even, we can just use LARL.  This
4765 		 will happen automatically.  */
4766 	    }
4767 	}
4768       else
4769 	{
4770 	  /* No larl - Access local symbols relative to the GOT.  */
4771 
4772 	  rtx temp = reg? reg : gen_reg_rtx (Pmode);
4773 
4774 	  if (reload_in_progress || reload_completed)
4775 	    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4776 
4777 	  addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
4778 	  if (addend != const0_rtx)
4779 	    addr = gen_rtx_PLUS (Pmode, addr, addend);
4780 	  addr = gen_rtx_CONST (Pmode, addr);
4781 	  addr = force_const_mem (Pmode, addr);
4782 	  emit_move_insn (temp, addr);
4783 
4784 	  new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4785 	  if (reg != 0)
4786 	    {
4787 	      s390_load_address (reg, new_rtx);
4788 	      new_rtx = reg;
4789 	    }
4790 	}
4791     }
4792   else if (GET_CODE (addr) == SYMBOL_REF && addend == const0_rtx)
4793     {
4794       /* A non-local symbol reference without addend.
4795 
4796 	 The symbol ref is wrapped into an UNSPEC to make sure the
4797 	 proper operand modifier (@GOT or @GOTENT) will be emitted.
4798 	 This will tell the linker to put the symbol into the GOT.
4799 
4800 	 Additionally the code dereferencing the GOT slot is emitted here.
4801 
4802 	 An addend to the symref needs to be added afterwards.
4803 	 legitimize_pic_address calls itself recursively to handle
4804 	 that case.  So no need to do it here.  */
4805 
4806       if (reg == 0)
4807         reg = gen_reg_rtx (Pmode);
4808 
4809       if (TARGET_Z10)
4810 	{
4811 	  /* Use load relative if possible.
4812 	     lgrl <target>, sym@GOTENT  */
4813 	  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4814 	  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4815 	  new_rtx = gen_const_mem (GET_MODE (reg), new_rtx);
4816 
4817 	  emit_move_insn (reg, new_rtx);
4818 	  new_rtx = reg;
4819 	}
4820       else if (flag_pic == 1)
4821         {
4822           /* Assume GOT offset is a valid displacement operand (< 4k
4823              or < 512k with z990).  This is handled the same way in
4824              both 31- and 64-bit code (@GOT).
4825              lg <target>, sym@GOT(r12)  */
4826 
4827 	  if (reload_in_progress || reload_completed)
4828 	    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4829 
4830           new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
4831           new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4832           new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
4833           new_rtx = gen_const_mem (Pmode, new_rtx);
4834           emit_move_insn (reg, new_rtx);
4835           new_rtx = reg;
4836         }
4837       else if (TARGET_CPU_ZARCH)
4838         {
4839           /* If the GOT offset might be >= 4k, we determine the position
4840              of the GOT entry via a PC-relative LARL (@GOTENT).
4841 	     larl temp, sym@GOTENT
4842              lg   <target>, 0(temp) */
4843 
4844           rtx temp = reg ? reg : gen_reg_rtx (Pmode);
4845 
4846 	  gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
4847 		      || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
4848 
4849           new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4850           new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4851 	  emit_move_insn (temp, new_rtx);
4852 
4853 	  new_rtx = gen_const_mem (Pmode, temp);
4854           emit_move_insn (reg, new_rtx);
4855 
4856           new_rtx = reg;
4857         }
4858       else
4859         {
4860           /* If the GOT offset might be >= 4k, we have to load it
4861              from the literal pool (@GOT).
4862 
4863 	     lg temp, lit-litbase(r13)
4864              lg <target>, 0(temp)
4865 	     lit:  .long sym@GOT  */
4866 
4867           rtx temp = reg ? reg : gen_reg_rtx (Pmode);
4868 
4869 	  gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
4870 		      || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
4871 
4872 	  if (reload_in_progress || reload_completed)
4873 	    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4874 
4875           addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
4876           addr = gen_rtx_CONST (Pmode, addr);
4877           addr = force_const_mem (Pmode, addr);
4878           emit_move_insn (temp, addr);
4879 
4880           new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4881           new_rtx = gen_const_mem (Pmode, new_rtx);
4882           emit_move_insn (reg, new_rtx);
4883           new_rtx = reg;
4884         }
4885     }
4886   else if (GET_CODE (addr) == UNSPEC && GET_CODE (addend) == CONST_INT)
4887     {
4888       gcc_assert (XVECLEN (addr, 0) == 1);
4889       switch (XINT (addr, 1))
4890 	{
4891 	  /* These address symbols (or PLT slots) relative to the GOT
4892 	     (not GOT slots!).  In general this will exceed the
4893 	     displacement range so these value belong into the literal
4894 	     pool.  */
4895 	case UNSPEC_GOTOFF:
4896 	case UNSPEC_PLTOFF:
4897 	  new_rtx = force_const_mem (Pmode, orig);
4898 	  break;
4899 
4900 	  /* For -fPIC the GOT size might exceed the displacement
4901 	     range so make sure the value is in the literal pool.  */
4902 	case UNSPEC_GOT:
4903 	  if (flag_pic == 2)
4904 	    new_rtx = force_const_mem (Pmode, orig);
4905 	  break;
4906 
4907 	  /* For @GOTENT larl is used.  This is handled like local
4908 	     symbol refs.  */
4909 	case UNSPEC_GOTENT:
4910 	  gcc_unreachable ();
4911 	  break;
4912 
4913 	  /* @PLT is OK as is on 64-bit, must be converted to
4914 	     GOT-relative @PLTOFF on 31-bit.  */
4915 	case UNSPEC_PLT:
4916 	  if (!TARGET_CPU_ZARCH)
4917 	    {
4918 	      rtx temp = reg? reg : gen_reg_rtx (Pmode);
4919 
4920 	      if (reload_in_progress || reload_completed)
4921 		df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4922 
4923 	      addr = XVECEXP (addr, 0, 0);
4924 	      addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
4925 				     UNSPEC_PLTOFF);
4926 	      if (addend != const0_rtx)
4927 		addr = gen_rtx_PLUS (Pmode, addr, addend);
4928 	      addr = gen_rtx_CONST (Pmode, addr);
4929 	      addr = force_const_mem (Pmode, addr);
4930 	      emit_move_insn (temp, addr);
4931 
4932 	      new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4933 	      if (reg != 0)
4934 		{
4935 		  s390_load_address (reg, new_rtx);
4936 		  new_rtx = reg;
4937 		}
4938 	    }
4939 	  else
4940 	    /* On 64 bit larl can be used.  This case is handled like
4941 	       local symbol refs.  */
4942 	    gcc_unreachable ();
4943 	  break;
4944 
4945 	  /* Everything else cannot happen.  */
4946 	default:
4947 	  gcc_unreachable ();
4948 	}
4949     }
4950   else if (addend != const0_rtx)
4951     {
4952       /* Otherwise, compute the sum.  */
4953 
4954       rtx base = legitimize_pic_address (addr, reg);
4955       new_rtx  = legitimize_pic_address (addend,
4956 					 base == reg ? NULL_RTX : reg);
4957       if (GET_CODE (new_rtx) == CONST_INT)
4958 	new_rtx = plus_constant (Pmode, base, INTVAL (new_rtx));
4959       else
4960 	{
4961 	  if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
4962 	    {
4963 	      base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
4964 	      new_rtx = XEXP (new_rtx, 1);
4965 	    }
4966 	  new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
4967 	}
4968 
4969       if (GET_CODE (new_rtx) == CONST)
4970 	new_rtx = XEXP (new_rtx, 0);
4971       new_rtx = force_operand (new_rtx, 0);
4972     }
4973 
4974   return new_rtx;
4975 }
4976 
4977 /* Load the thread pointer into a register.  */
4978 
4979 rtx
s390_get_thread_pointer(void)4980 s390_get_thread_pointer (void)
4981 {
4982   rtx tp = gen_reg_rtx (Pmode);
4983 
4984   emit_move_insn (tp, gen_rtx_REG (Pmode, TP_REGNUM));
4985   mark_reg_pointer (tp, BITS_PER_WORD);
4986 
4987   return tp;
4988 }
4989 
4990 /* Emit a tls call insn. The call target is the SYMBOL_REF stored
4991    in s390_tls_symbol which always refers to __tls_get_offset.
4992    The returned offset is written to RESULT_REG and an USE rtx is
4993    generated for TLS_CALL.  */
4994 
4995 static GTY(()) rtx s390_tls_symbol;
4996 
4997 static void
s390_emit_tls_call_insn(rtx result_reg,rtx tls_call)4998 s390_emit_tls_call_insn (rtx result_reg, rtx tls_call)
4999 {
5000   rtx insn;
5001 
5002   if (!flag_pic)
5003     emit_insn (s390_load_got ());
5004 
5005   if (!s390_tls_symbol)
5006     s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset");
5007 
5008   insn = s390_emit_call (s390_tls_symbol, tls_call, result_reg,
5009 			 gen_rtx_REG (Pmode, RETURN_REGNUM));
5010 
5011   use_reg (&CALL_INSN_FUNCTION_USAGE (insn), result_reg);
5012   RTL_CONST_CALL_P (insn) = 1;
5013 }
5014 
5015 /* ADDR contains a thread-local SYMBOL_REF.  Generate code to compute
5016    this (thread-local) address.  REG may be used as temporary.  */
5017 
5018 static rtx
legitimize_tls_address(rtx addr,rtx reg)5019 legitimize_tls_address (rtx addr, rtx reg)
5020 {
5021   rtx new_rtx, tls_call, temp, base, r2;
5022   rtx_insn *insn;
5023 
5024   if (GET_CODE (addr) == SYMBOL_REF)
5025     switch (tls_symbolic_operand (addr))
5026       {
5027       case TLS_MODEL_GLOBAL_DYNAMIC:
5028 	start_sequence ();
5029 	r2 = gen_rtx_REG (Pmode, 2);
5030 	tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD);
5031 	new_rtx = gen_rtx_CONST (Pmode, tls_call);
5032 	new_rtx = force_const_mem (Pmode, new_rtx);
5033 	emit_move_insn (r2, new_rtx);
5034 	s390_emit_tls_call_insn (r2, tls_call);
5035 	insn = get_insns ();
5036 	end_sequence ();
5037 
5038 	new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
5039 	temp = gen_reg_rtx (Pmode);
5040 	emit_libcall_block (insn, temp, r2, new_rtx);
5041 
5042 	new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5043 	if (reg != 0)
5044 	  {
5045 	    s390_load_address (reg, new_rtx);
5046 	    new_rtx = reg;
5047 	  }
5048 	break;
5049 
5050       case TLS_MODEL_LOCAL_DYNAMIC:
5051 	start_sequence ();
5052 	r2 = gen_rtx_REG (Pmode, 2);
5053 	tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM);
5054 	new_rtx = gen_rtx_CONST (Pmode, tls_call);
5055 	new_rtx = force_const_mem (Pmode, new_rtx);
5056 	emit_move_insn (r2, new_rtx);
5057 	s390_emit_tls_call_insn (r2, tls_call);
5058 	insn = get_insns ();
5059 	end_sequence ();
5060 
5061 	new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
5062 	temp = gen_reg_rtx (Pmode);
5063 	emit_libcall_block (insn, temp, r2, new_rtx);
5064 
5065 	new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5066 	base = gen_reg_rtx (Pmode);
5067 	s390_load_address (base, new_rtx);
5068 
5069 	new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF);
5070 	new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5071 	new_rtx = force_const_mem (Pmode, new_rtx);
5072 	temp = gen_reg_rtx (Pmode);
5073 	emit_move_insn (temp, new_rtx);
5074 
5075 	new_rtx = gen_rtx_PLUS (Pmode, base, temp);
5076 	if (reg != 0)
5077 	  {
5078 	    s390_load_address (reg, new_rtx);
5079 	    new_rtx = reg;
5080 	  }
5081 	break;
5082 
5083       case TLS_MODEL_INITIAL_EXEC:
5084 	if (flag_pic == 1)
5085 	  {
5086 	    /* Assume GOT offset < 4k.  This is handled the same way
5087 	       in both 31- and 64-bit code.  */
5088 
5089 	    if (reload_in_progress || reload_completed)
5090 	      df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
5091 
5092 	    new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
5093 	    new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5094 	    new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
5095 	    new_rtx = gen_const_mem (Pmode, new_rtx);
5096 	    temp = gen_reg_rtx (Pmode);
5097 	    emit_move_insn (temp, new_rtx);
5098 	  }
5099 	else if (TARGET_CPU_ZARCH)
5100 	  {
5101 	    /* If the GOT offset might be >= 4k, we determine the position
5102 	       of the GOT entry via a PC-relative LARL.  */
5103 
5104 	    new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
5105 	    new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5106 	    temp = gen_reg_rtx (Pmode);
5107 	    emit_move_insn (temp, new_rtx);
5108 
5109 	    new_rtx = gen_const_mem (Pmode, temp);
5110 	    temp = gen_reg_rtx (Pmode);
5111 	    emit_move_insn (temp, new_rtx);
5112 	  }
5113 	else if (flag_pic)
5114 	  {
5115 	    /* If the GOT offset might be >= 4k, we have to load it
5116 	       from the literal pool.  */
5117 
5118 	    if (reload_in_progress || reload_completed)
5119 	      df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
5120 
5121 	    new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
5122 	    new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5123 	    new_rtx = force_const_mem (Pmode, new_rtx);
5124 	    temp = gen_reg_rtx (Pmode);
5125 	    emit_move_insn (temp, new_rtx);
5126 
5127             new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
5128 	    new_rtx = gen_const_mem (Pmode, new_rtx);
5129 
5130 	    new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
5131 	    temp = gen_reg_rtx (Pmode);
5132 	    emit_insn (gen_rtx_SET (temp, new_rtx));
5133 	  }
5134 	else
5135 	  {
5136 	    /* In position-dependent code, load the absolute address of
5137 	       the GOT entry from the literal pool.  */
5138 
5139 	    new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
5140 	    new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5141 	    new_rtx = force_const_mem (Pmode, new_rtx);
5142 	    temp = gen_reg_rtx (Pmode);
5143 	    emit_move_insn (temp, new_rtx);
5144 
5145 	    new_rtx = temp;
5146 	    new_rtx = gen_const_mem (Pmode, new_rtx);
5147 	    new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
5148 	    temp = gen_reg_rtx (Pmode);
5149 	    emit_insn (gen_rtx_SET (temp, new_rtx));
5150 	  }
5151 
5152 	new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5153 	if (reg != 0)
5154 	  {
5155 	    s390_load_address (reg, new_rtx);
5156 	    new_rtx = reg;
5157 	  }
5158 	break;
5159 
5160       case TLS_MODEL_LOCAL_EXEC:
5161 	new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
5162 	new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5163 	new_rtx = force_const_mem (Pmode, new_rtx);
5164         temp = gen_reg_rtx (Pmode);
5165 	emit_move_insn (temp, new_rtx);
5166 
5167 	new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5168 	if (reg != 0)
5169 	  {
5170 	    s390_load_address (reg, new_rtx);
5171 	    new_rtx = reg;
5172 	  }
5173 	break;
5174 
5175       default:
5176 	gcc_unreachable ();
5177       }
5178 
5179   else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == UNSPEC)
5180     {
5181       switch (XINT (XEXP (addr, 0), 1))
5182 	{
5183 	case UNSPEC_INDNTPOFF:
5184 	  gcc_assert (TARGET_CPU_ZARCH);
5185 	  new_rtx = addr;
5186 	  break;
5187 
5188 	default:
5189 	  gcc_unreachable ();
5190 	}
5191     }
5192 
5193   else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
5194 	   && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
5195     {
5196       new_rtx = XEXP (XEXP (addr, 0), 0);
5197       if (GET_CODE (new_rtx) != SYMBOL_REF)
5198 	new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5199 
5200       new_rtx = legitimize_tls_address (new_rtx, reg);
5201       new_rtx = plus_constant (Pmode, new_rtx,
5202 			       INTVAL (XEXP (XEXP (addr, 0), 1)));
5203       new_rtx = force_operand (new_rtx, 0);
5204     }
5205 
5206   else
5207     gcc_unreachable ();  /* for now ... */
5208 
5209   return new_rtx;
5210 }
5211 
5212 /* Emit insns making the address in operands[1] valid for a standard
5213    move to operands[0].  operands[1] is replaced by an address which
5214    should be used instead of the former RTX to emit the move
5215    pattern.  */
5216 
5217 void
emit_symbolic_move(rtx * operands)5218 emit_symbolic_move (rtx *operands)
5219 {
5220   rtx temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
5221 
5222   if (GET_CODE (operands[0]) == MEM)
5223     operands[1] = force_reg (Pmode, operands[1]);
5224   else if (TLS_SYMBOLIC_CONST (operands[1]))
5225     operands[1] = legitimize_tls_address (operands[1], temp);
5226   else if (flag_pic)
5227     operands[1] = legitimize_pic_address (operands[1], temp);
5228 }
5229 
5230 /* Try machine-dependent ways of modifying an illegitimate address X
5231    to be legitimate.  If we find one, return the new, valid address.
5232 
5233    OLDX is the address as it was before break_out_memory_refs was called.
5234    In some cases it is useful to look at this to decide what needs to be done.
5235 
5236    MODE is the mode of the operand pointed to by X.
5237 
5238    When -fpic is used, special handling is needed for symbolic references.
5239    See comments by legitimize_pic_address for details.  */
5240 
5241 static rtx
s390_legitimize_address(rtx x,rtx oldx ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED)5242 s390_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
5243 			 machine_mode mode ATTRIBUTE_UNUSED)
5244 {
5245   rtx constant_term = const0_rtx;
5246 
5247   if (TLS_SYMBOLIC_CONST (x))
5248     {
5249       x = legitimize_tls_address (x, 0);
5250 
5251       if (s390_legitimate_address_p (mode, x, FALSE))
5252 	return x;
5253     }
5254   else if (GET_CODE (x) == PLUS
5255 	   && (TLS_SYMBOLIC_CONST (XEXP (x, 0))
5256 	       || TLS_SYMBOLIC_CONST (XEXP (x, 1))))
5257     {
5258       return x;
5259     }
5260   else if (flag_pic)
5261     {
5262       if (SYMBOLIC_CONST (x)
5263           || (GET_CODE (x) == PLUS
5264               && (SYMBOLIC_CONST (XEXP (x, 0))
5265                   || SYMBOLIC_CONST (XEXP (x, 1)))))
5266 	  x = legitimize_pic_address (x, 0);
5267 
5268       if (s390_legitimate_address_p (mode, x, FALSE))
5269 	return x;
5270     }
5271 
5272   x = eliminate_constant_term (x, &constant_term);
5273 
5274   /* Optimize loading of large displacements by splitting them
5275      into the multiple of 4K and the rest; this allows the
5276      former to be CSE'd if possible.
5277 
5278      Don't do this if the displacement is added to a register
5279      pointing into the stack frame, as the offsets will
5280      change later anyway.  */
5281 
5282   if (GET_CODE (constant_term) == CONST_INT
5283       && !TARGET_LONG_DISPLACEMENT
5284       && !DISP_IN_RANGE (INTVAL (constant_term))
5285       && !(REG_P (x) && REGNO_PTR_FRAME_P (REGNO (x))))
5286     {
5287       HOST_WIDE_INT lower = INTVAL (constant_term) & 0xfff;
5288       HOST_WIDE_INT upper = INTVAL (constant_term) ^ lower;
5289 
5290       rtx temp = gen_reg_rtx (Pmode);
5291       rtx val  = force_operand (GEN_INT (upper), temp);
5292       if (val != temp)
5293 	emit_move_insn (temp, val);
5294 
5295       x = gen_rtx_PLUS (Pmode, x, temp);
5296       constant_term = GEN_INT (lower);
5297     }
5298 
5299   if (GET_CODE (x) == PLUS)
5300     {
5301       if (GET_CODE (XEXP (x, 0)) == REG)
5302 	{
5303 	  rtx temp = gen_reg_rtx (Pmode);
5304 	  rtx val  = force_operand (XEXP (x, 1), temp);
5305 	  if (val != temp)
5306 	    emit_move_insn (temp, val);
5307 
5308 	  x = gen_rtx_PLUS (Pmode, XEXP (x, 0), temp);
5309 	}
5310 
5311       else if (GET_CODE (XEXP (x, 1)) == REG)
5312 	{
5313 	  rtx temp = gen_reg_rtx (Pmode);
5314 	  rtx val  = force_operand (XEXP (x, 0), temp);
5315 	  if (val != temp)
5316 	    emit_move_insn (temp, val);
5317 
5318 	  x = gen_rtx_PLUS (Pmode, temp, XEXP (x, 1));
5319 	}
5320     }
5321 
5322   if (constant_term != const0_rtx)
5323     x = gen_rtx_PLUS (Pmode, x, constant_term);
5324 
5325   return x;
5326 }
5327 
5328 /* Try a machine-dependent way of reloading an illegitimate address AD
5329    operand.  If we find one, push the reload and return the new address.
5330 
5331    MODE is the mode of the enclosing MEM.  OPNUM is the operand number
5332    and TYPE is the reload type of the current reload.  */
5333 
5334 rtx
legitimize_reload_address(rtx ad,machine_mode mode ATTRIBUTE_UNUSED,int opnum,int type)5335 legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
5336 			   int opnum, int type)
5337 {
5338   if (!optimize || TARGET_LONG_DISPLACEMENT)
5339     return NULL_RTX;
5340 
5341   if (GET_CODE (ad) == PLUS)
5342     {
5343       rtx tem = simplify_binary_operation (PLUS, Pmode,
5344 					   XEXP (ad, 0), XEXP (ad, 1));
5345       if (tem)
5346 	ad = tem;
5347     }
5348 
5349   if (GET_CODE (ad) == PLUS
5350       && GET_CODE (XEXP (ad, 0)) == REG
5351       && GET_CODE (XEXP (ad, 1)) == CONST_INT
5352       && !DISP_IN_RANGE (INTVAL (XEXP (ad, 1))))
5353     {
5354       HOST_WIDE_INT lower = INTVAL (XEXP (ad, 1)) & 0xfff;
5355       HOST_WIDE_INT upper = INTVAL (XEXP (ad, 1)) ^ lower;
5356       rtx cst, tem, new_rtx;
5357 
5358       cst = GEN_INT (upper);
5359       if (!legitimate_reload_constant_p (cst))
5360 	cst = force_const_mem (Pmode, cst);
5361 
5362       tem = gen_rtx_PLUS (Pmode, XEXP (ad, 0), cst);
5363       new_rtx = gen_rtx_PLUS (Pmode, tem, GEN_INT (lower));
5364 
5365       push_reload (XEXP (tem, 1), 0, &XEXP (tem, 1), 0,
5366 		   BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
5367 		   opnum, (enum reload_type) type);
5368       return new_rtx;
5369     }
5370 
5371   return NULL_RTX;
5372 }
5373 
5374 /* Emit code to move LEN bytes from DST to SRC.  */
5375 
5376 bool
s390_expand_movmem(rtx dst,rtx src,rtx len)5377 s390_expand_movmem (rtx dst, rtx src, rtx len)
5378 {
5379   /* When tuning for z10 or higher we rely on the Glibc functions to
5380      do the right thing. Only for constant lengths below 64k we will
5381      generate inline code.  */
5382   if (s390_tune >= PROCESSOR_2097_Z10
5383       && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5384     return false;
5385 
5386   /* Expand memcpy for constant length operands without a loop if it
5387      is shorter that way.
5388 
5389      With a constant length argument a
5390      memcpy loop (without pfd) is 36 bytes -> 6 * mvc  */
5391   if (GET_CODE (len) == CONST_INT
5392       && INTVAL (len) >= 0
5393       && INTVAL (len) <= 256 * 6
5394       && (!TARGET_MVCLE || INTVAL (len) <= 256))
5395     {
5396       HOST_WIDE_INT o, l;
5397 
5398       for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
5399 	{
5400 	  rtx newdst = adjust_address (dst, BLKmode, o);
5401 	  rtx newsrc = adjust_address (src, BLKmode, o);
5402 	  emit_insn (gen_movmem_short (newdst, newsrc,
5403 				       GEN_INT (l > 256 ? 255 : l - 1)));
5404 	}
5405     }
5406 
5407   else if (TARGET_MVCLE)
5408     {
5409       emit_insn (gen_movmem_long (dst, src, convert_to_mode (Pmode, len, 1)));
5410     }
5411 
5412   else
5413     {
5414       rtx dst_addr, src_addr, count, blocks, temp;
5415       rtx_code_label *loop_start_label = gen_label_rtx ();
5416       rtx_code_label *loop_end_label = gen_label_rtx ();
5417       rtx_code_label *end_label = gen_label_rtx ();
5418       machine_mode mode;
5419 
5420       mode = GET_MODE (len);
5421       if (mode == VOIDmode)
5422         mode = Pmode;
5423 
5424       dst_addr = gen_reg_rtx (Pmode);
5425       src_addr = gen_reg_rtx (Pmode);
5426       count = gen_reg_rtx (mode);
5427       blocks = gen_reg_rtx (mode);
5428 
5429       convert_move (count, len, 1);
5430       emit_cmp_and_jump_insns (count, const0_rtx,
5431 			       EQ, NULL_RTX, mode, 1, end_label);
5432 
5433       emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5434       emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX));
5435       dst = change_address (dst, VOIDmode, dst_addr);
5436       src = change_address (src, VOIDmode, src_addr);
5437 
5438       temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5439 			   OPTAB_DIRECT);
5440       if (temp != count)
5441         emit_move_insn (count, temp);
5442 
5443       temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5444 			   OPTAB_DIRECT);
5445       if (temp != blocks)
5446         emit_move_insn (blocks, temp);
5447 
5448       emit_cmp_and_jump_insns (blocks, const0_rtx,
5449 			       EQ, NULL_RTX, mode, 1, loop_end_label);
5450 
5451       emit_label (loop_start_label);
5452 
5453       if (TARGET_Z10
5454 	  && (GET_CODE (len) != CONST_INT || INTVAL (len) > 768))
5455 	{
5456 	  rtx prefetch;
5457 
5458 	  /* Issue a read prefetch for the +3 cache line.  */
5459 	  prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, src_addr, GEN_INT (768)),
5460 				   const0_rtx, const0_rtx);
5461 	  PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5462 	  emit_insn (prefetch);
5463 
5464 	  /* Issue a write prefetch for the +3 cache line.  */
5465 	  prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (768)),
5466 				   const1_rtx, const0_rtx);
5467 	  PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5468 	  emit_insn (prefetch);
5469 	}
5470 
5471       emit_insn (gen_movmem_short (dst, src, GEN_INT (255)));
5472       s390_load_address (dst_addr,
5473 			 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5474       s390_load_address (src_addr,
5475 			 gen_rtx_PLUS (Pmode, src_addr, GEN_INT (256)));
5476 
5477       temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5478 			   OPTAB_DIRECT);
5479       if (temp != blocks)
5480         emit_move_insn (blocks, temp);
5481 
5482       emit_cmp_and_jump_insns (blocks, const0_rtx,
5483 			       EQ, NULL_RTX, mode, 1, loop_end_label);
5484 
5485       emit_jump (loop_start_label);
5486       emit_label (loop_end_label);
5487 
5488       emit_insn (gen_movmem_short (dst, src,
5489 				   convert_to_mode (Pmode, count, 1)));
5490       emit_label (end_label);
5491     }
5492   return true;
5493 }
5494 
5495 /* Emit code to set LEN bytes at DST to VAL.
5496    Make use of clrmem if VAL is zero.  */
5497 
5498 void
s390_expand_setmem(rtx dst,rtx len,rtx val)5499 s390_expand_setmem (rtx dst, rtx len, rtx val)
5500 {
5501   if (GET_CODE (len) == CONST_INT && INTVAL (len) <= 0)
5502     return;
5503 
5504   gcc_assert (GET_CODE (val) == CONST_INT || GET_MODE (val) == QImode);
5505 
5506   /* Expand setmem/clrmem for a constant length operand without a
5507      loop if it will be shorter that way.
5508      With a constant length and without pfd argument a
5509      clrmem loop is 32 bytes -> 5.3 * xc
5510      setmem loop is 36 bytes -> 3.6 * (mvi/stc + mvc) */
5511   if (GET_CODE (len) == CONST_INT
5512       && ((INTVAL (len) <= 256 * 5 && val == const0_rtx)
5513 	  || INTVAL (len) <= 257 * 3)
5514       && (!TARGET_MVCLE || INTVAL (len) <= 256))
5515     {
5516       HOST_WIDE_INT o, l;
5517 
5518       if (val == const0_rtx)
5519 	/* clrmem: emit 256 byte blockwise XCs.  */
5520 	for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
5521 	  {
5522 	    rtx newdst = adjust_address (dst, BLKmode, o);
5523 	    emit_insn (gen_clrmem_short (newdst,
5524 					 GEN_INT (l > 256 ? 255 : l - 1)));
5525 	  }
5526       else
5527 	/* setmem: emit 1(mvi) + 256(mvc) byte blockwise memsets by
5528 	   setting first byte to val and using a 256 byte mvc with one
5529 	   byte overlap to propagate the byte.  */
5530 	for (l = INTVAL (len), o = 0; l > 0; l -= 257, o += 257)
5531 	  {
5532 	    rtx newdst = adjust_address (dst, BLKmode, o);
5533 	    emit_move_insn (adjust_address (dst, QImode, o), val);
5534 	    if (l > 1)
5535 	      {
5536 		rtx newdstp1 = adjust_address (dst, BLKmode, o + 1);
5537 		emit_insn (gen_movmem_short (newdstp1, newdst,
5538 					     GEN_INT (l > 257 ? 255 : l - 2)));
5539 	      }
5540 	  }
5541     }
5542 
5543   else if (TARGET_MVCLE)
5544     {
5545       val = force_not_mem (convert_modes (Pmode, QImode, val, 1));
5546       if (TARGET_64BIT)
5547 	emit_insn (gen_setmem_long_di (dst, convert_to_mode (Pmode, len, 1),
5548 				       val));
5549       else
5550 	emit_insn (gen_setmem_long_si (dst, convert_to_mode (Pmode, len, 1),
5551 				       val));
5552     }
5553 
5554   else
5555     {
5556       rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX;
5557       rtx_code_label *loop_start_label = gen_label_rtx ();
5558       rtx_code_label *onebyte_end_label = gen_label_rtx ();
5559       rtx_code_label *zerobyte_end_label = gen_label_rtx ();
5560       rtx_code_label *restbyte_end_label = gen_label_rtx ();
5561       machine_mode mode;
5562 
5563       mode = GET_MODE (len);
5564       if (mode == VOIDmode)
5565 	mode = Pmode;
5566 
5567       dst_addr = gen_reg_rtx (Pmode);
5568       count = gen_reg_rtx (mode);
5569       blocks = gen_reg_rtx (mode);
5570 
5571       convert_move (count, len, 1);
5572       emit_cmp_and_jump_insns (count, const0_rtx,
5573 			       EQ, NULL_RTX, mode, 1, zerobyte_end_label,
5574 			       profile_probability::very_unlikely ());
5575 
5576       /* We need to make a copy of the target address since memset is
5577 	 supposed to return it unmodified.  We have to make it here
5578 	 already since the new reg is used at onebyte_end_label.  */
5579       emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5580       dst = change_address (dst, VOIDmode, dst_addr);
5581 
5582       if (val != const0_rtx)
5583 	{
5584 	  /* When using the overlapping mvc the original target
5585 	     address is only accessed as single byte entity (even by
5586 	     the mvc reading this value).  */
5587 	  set_mem_size (dst, 1);
5588 	  dstp1 = adjust_address (dst, VOIDmode, 1);
5589 	  emit_cmp_and_jump_insns (count,
5590 				   const1_rtx, EQ, NULL_RTX, mode, 1,
5591 				   onebyte_end_label,
5592 				   profile_probability::very_unlikely ());
5593 	}
5594 
5595       /* There is one unconditional (mvi+mvc)/xc after the loop
5596 	 dealing with the rest of the bytes, subtracting two (mvi+mvc)
5597 	 or one (xc) here leaves this number of bytes to be handled by
5598 	 it.  */
5599       temp = expand_binop (mode, add_optab, count,
5600 			   val == const0_rtx ? constm1_rtx : GEN_INT (-2),
5601 			   count, 1, OPTAB_DIRECT);
5602       if (temp != count)
5603 	emit_move_insn (count, temp);
5604 
5605       temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5606 			   OPTAB_DIRECT);
5607       if (temp != blocks)
5608 	emit_move_insn (blocks, temp);
5609 
5610       emit_cmp_and_jump_insns (blocks, const0_rtx,
5611 			       EQ, NULL_RTX, mode, 1, restbyte_end_label);
5612 
5613       emit_jump (loop_start_label);
5614 
5615       if (val != const0_rtx)
5616 	{
5617 	  /* The 1 byte != 0 special case.  Not handled efficiently
5618 	     since we require two jumps for that.  However, this
5619 	     should be very rare.  */
5620 	  emit_label (onebyte_end_label);
5621 	  emit_move_insn (adjust_address (dst, QImode, 0), val);
5622 	  emit_jump (zerobyte_end_label);
5623 	}
5624 
5625       emit_label (loop_start_label);
5626 
5627       if (TARGET_Z10
5628 	  && (GET_CODE (len) != CONST_INT || INTVAL (len) > 1024))
5629 	{
5630 	  /* Issue a write prefetch for the +4 cache line.  */
5631 	  rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr,
5632 						     GEN_INT (1024)),
5633 				       const1_rtx, const0_rtx);
5634 	  emit_insn (prefetch);
5635 	  PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5636 	}
5637 
5638       if (val == const0_rtx)
5639 	emit_insn (gen_clrmem_short (dst, GEN_INT (255)));
5640       else
5641 	{
5642 	  /* Set the first byte in the block to the value and use an
5643 	     overlapping mvc for the block.  */
5644 	  emit_move_insn (adjust_address (dst, QImode, 0), val);
5645 	  emit_insn (gen_movmem_short (dstp1, dst, GEN_INT (254)));
5646 	}
5647       s390_load_address (dst_addr,
5648 			 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5649 
5650       temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5651 			   OPTAB_DIRECT);
5652       if (temp != blocks)
5653 	emit_move_insn (blocks, temp);
5654 
5655       emit_cmp_and_jump_insns (blocks, const0_rtx,
5656 			       NE, NULL_RTX, mode, 1, loop_start_label);
5657 
5658       emit_label (restbyte_end_label);
5659 
5660       if (val == const0_rtx)
5661 	emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1)));
5662       else
5663 	{
5664 	  /* Set the first byte in the block to the value and use an
5665 	     overlapping mvc for the block.  */
5666 	  emit_move_insn (adjust_address (dst, QImode, 0), val);
5667 	  /* execute only uses the lowest 8 bits of count that's
5668 	     exactly what we need here.  */
5669 	  emit_insn (gen_movmem_short (dstp1, dst,
5670 				       convert_to_mode (Pmode, count, 1)));
5671 	}
5672 
5673       emit_label (zerobyte_end_label);
5674     }
5675 }
5676 
5677 /* Emit code to compare LEN bytes at OP0 with those at OP1,
5678    and return the result in TARGET.  */
5679 
5680 bool
s390_expand_cmpmem(rtx target,rtx op0,rtx op1,rtx len)5681 s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
5682 {
5683   rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM);
5684   rtx tmp;
5685 
5686   /* When tuning for z10 or higher we rely on the Glibc functions to
5687      do the right thing. Only for constant lengths below 64k we will
5688      generate inline code.  */
5689   if (s390_tune >= PROCESSOR_2097_Z10
5690       && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5691     return false;
5692 
5693   /* As the result of CMPINT is inverted compared to what we need,
5694      we have to swap the operands.  */
5695   tmp = op0; op0 = op1; op1 = tmp;
5696 
5697   if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
5698     {
5699       if (INTVAL (len) > 0)
5700         {
5701           emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1)));
5702           emit_insn (gen_cmpint (target, ccreg));
5703         }
5704       else
5705         emit_move_insn (target, const0_rtx);
5706     }
5707   else if (TARGET_MVCLE)
5708     {
5709       emit_insn (gen_cmpmem_long (op0, op1, convert_to_mode (Pmode, len, 1)));
5710       emit_insn (gen_cmpint (target, ccreg));
5711     }
5712   else
5713     {
5714       rtx addr0, addr1, count, blocks, temp;
5715       rtx_code_label *loop_start_label = gen_label_rtx ();
5716       rtx_code_label *loop_end_label = gen_label_rtx ();
5717       rtx_code_label *end_label = gen_label_rtx ();
5718       machine_mode mode;
5719 
5720       mode = GET_MODE (len);
5721       if (mode == VOIDmode)
5722         mode = Pmode;
5723 
5724       addr0 = gen_reg_rtx (Pmode);
5725       addr1 = gen_reg_rtx (Pmode);
5726       count = gen_reg_rtx (mode);
5727       blocks = gen_reg_rtx (mode);
5728 
5729       convert_move (count, len, 1);
5730       emit_cmp_and_jump_insns (count, const0_rtx,
5731 			       EQ, NULL_RTX, mode, 1, end_label);
5732 
5733       emit_move_insn (addr0, force_operand (XEXP (op0, 0), NULL_RTX));
5734       emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX));
5735       op0 = change_address (op0, VOIDmode, addr0);
5736       op1 = change_address (op1, VOIDmode, addr1);
5737 
5738       temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5739 			   OPTAB_DIRECT);
5740       if (temp != count)
5741         emit_move_insn (count, temp);
5742 
5743       temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5744 			   OPTAB_DIRECT);
5745       if (temp != blocks)
5746         emit_move_insn (blocks, temp);
5747 
5748       emit_cmp_and_jump_insns (blocks, const0_rtx,
5749 			       EQ, NULL_RTX, mode, 1, loop_end_label);
5750 
5751       emit_label (loop_start_label);
5752 
5753       if (TARGET_Z10
5754 	  && (GET_CODE (len) != CONST_INT || INTVAL (len) > 512))
5755 	{
5756 	  rtx prefetch;
5757 
5758 	  /* Issue a read prefetch for the +2 cache line of operand 1.  */
5759 	  prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr0, GEN_INT (512)),
5760 				   const0_rtx, const0_rtx);
5761 	  emit_insn (prefetch);
5762 	  PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5763 
5764 	  /* Issue a read prefetch for the +2 cache line of operand 2.  */
5765 	  prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr1, GEN_INT (512)),
5766 				   const0_rtx, const0_rtx);
5767 	  emit_insn (prefetch);
5768 	  PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5769 	}
5770 
5771       emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (255)));
5772       temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
5773       temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5774 			gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx);
5775       temp = gen_rtx_SET (pc_rtx, temp);
5776       emit_jump_insn (temp);
5777 
5778       s390_load_address (addr0,
5779 			 gen_rtx_PLUS (Pmode, addr0, GEN_INT (256)));
5780       s390_load_address (addr1,
5781 			 gen_rtx_PLUS (Pmode, addr1, GEN_INT (256)));
5782 
5783       temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5784 			   OPTAB_DIRECT);
5785       if (temp != blocks)
5786         emit_move_insn (blocks, temp);
5787 
5788       emit_cmp_and_jump_insns (blocks, const0_rtx,
5789 			       EQ, NULL_RTX, mode, 1, loop_end_label);
5790 
5791       emit_jump (loop_start_label);
5792       emit_label (loop_end_label);
5793 
5794       emit_insn (gen_cmpmem_short (op0, op1,
5795 				   convert_to_mode (Pmode, count, 1)));
5796       emit_label (end_label);
5797 
5798       emit_insn (gen_cmpint (target, ccreg));
5799     }
5800   return true;
5801 }
5802 
5803 /* Emit a conditional jump to LABEL for condition code mask MASK using
5804    comparsion operator COMPARISON.  Return the emitted jump insn.  */
5805 
5806 static rtx_insn *
s390_emit_ccraw_jump(HOST_WIDE_INT mask,enum rtx_code comparison,rtx label)5807 s390_emit_ccraw_jump (HOST_WIDE_INT mask, enum rtx_code comparison, rtx label)
5808 {
5809   rtx temp;
5810 
5811   gcc_assert (comparison == EQ || comparison == NE);
5812   gcc_assert (mask > 0 && mask < 15);
5813 
5814   temp = gen_rtx_fmt_ee (comparison, VOIDmode,
5815 			 gen_rtx_REG (CCRAWmode, CC_REGNUM), GEN_INT (mask));
5816   temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5817 			       gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
5818   temp = gen_rtx_SET (pc_rtx, temp);
5819   return emit_jump_insn (temp);
5820 }
5821 
5822 /* Emit the instructions to implement strlen of STRING and store the
5823    result in TARGET.  The string has the known ALIGNMENT.  This
5824    version uses vector instructions and is therefore not appropriate
5825    for targets prior to z13.  */
5826 
5827 void
s390_expand_vec_strlen(rtx target,rtx string,rtx alignment)5828 s390_expand_vec_strlen (rtx target, rtx string, rtx alignment)
5829 {
5830   rtx highest_index_to_load_reg = gen_reg_rtx (Pmode);
5831   rtx str_reg = gen_reg_rtx (V16QImode);
5832   rtx str_addr_base_reg = gen_reg_rtx (Pmode);
5833   rtx str_idx_reg = gen_reg_rtx (Pmode);
5834   rtx result_reg = gen_reg_rtx (V16QImode);
5835   rtx is_aligned_label = gen_label_rtx ();
5836   rtx into_loop_label = NULL_RTX;
5837   rtx loop_start_label = gen_label_rtx ();
5838   rtx temp;
5839   rtx len = gen_reg_rtx (QImode);
5840   rtx cond;
5841 
5842   s390_load_address (str_addr_base_reg, XEXP (string, 0));
5843   emit_move_insn (str_idx_reg, const0_rtx);
5844 
5845   if (INTVAL (alignment) < 16)
5846     {
5847       /* Check whether the address happens to be aligned properly so
5848 	 jump directly to the aligned loop.  */
5849       emit_cmp_and_jump_insns (gen_rtx_AND (Pmode,
5850 					    str_addr_base_reg, GEN_INT (15)),
5851 			       const0_rtx, EQ, NULL_RTX,
5852 			       Pmode, 1, is_aligned_label);
5853 
5854       temp = gen_reg_rtx (Pmode);
5855       temp = expand_binop (Pmode, and_optab, str_addr_base_reg,
5856 			   GEN_INT (15), temp, 1, OPTAB_DIRECT);
5857       gcc_assert (REG_P (temp));
5858       highest_index_to_load_reg =
5859 	expand_binop (Pmode, sub_optab, GEN_INT (15), temp,
5860 		      highest_index_to_load_reg, 1, OPTAB_DIRECT);
5861       gcc_assert (REG_P (highest_index_to_load_reg));
5862       emit_insn (gen_vllv16qi (str_reg,
5863 		   convert_to_mode (SImode, highest_index_to_load_reg, 1),
5864 		   gen_rtx_MEM (BLKmode, str_addr_base_reg)));
5865 
5866       into_loop_label = gen_label_rtx ();
5867       s390_emit_jump (into_loop_label, NULL_RTX);
5868       emit_barrier ();
5869     }
5870 
5871   emit_label (is_aligned_label);
5872   LABEL_NUSES (is_aligned_label) = INTVAL (alignment) < 16 ? 2 : 1;
5873 
5874   /* Reaching this point we are only performing 16 bytes aligned
5875      loads.  */
5876   emit_move_insn (highest_index_to_load_reg, GEN_INT (15));
5877 
5878   emit_label (loop_start_label);
5879   LABEL_NUSES (loop_start_label) = 1;
5880 
5881   /* Load 16 bytes of the string into VR.  */
5882   emit_move_insn (str_reg,
5883 		  gen_rtx_MEM (V16QImode,
5884 			       gen_rtx_PLUS (Pmode, str_idx_reg,
5885 					     str_addr_base_reg)));
5886   if (into_loop_label != NULL_RTX)
5887     {
5888       emit_label (into_loop_label);
5889       LABEL_NUSES (into_loop_label) = 1;
5890     }
5891 
5892   /* Increment string index by 16 bytes.  */
5893   expand_binop (Pmode, add_optab, str_idx_reg, GEN_INT (16),
5894 		str_idx_reg, 1, OPTAB_DIRECT);
5895 
5896   emit_insn (gen_vec_vfenesv16qi (result_reg, str_reg, str_reg,
5897 				  GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
5898 
5899   add_int_reg_note (s390_emit_ccraw_jump (8, NE, loop_start_label),
5900 		    REG_BR_PROB,
5901 		    profile_probability::very_likely ().to_reg_br_prob_note ());
5902   emit_insn (gen_vec_extractv16qiqi (len, result_reg, GEN_INT (7)));
5903 
5904   /* If the string pointer wasn't aligned we have loaded less then 16
5905      bytes and the remaining bytes got filled with zeros (by vll).
5906      Now we have to check whether the resulting index lies within the
5907      bytes actually part of the string.  */
5908 
5909   cond = s390_emit_compare (GT, convert_to_mode (Pmode, len, 1),
5910 			    highest_index_to_load_reg);
5911   s390_load_address (highest_index_to_load_reg,
5912 		     gen_rtx_PLUS (Pmode, highest_index_to_load_reg,
5913 				   const1_rtx));
5914   if (TARGET_64BIT)
5915     emit_insn (gen_movdicc (str_idx_reg, cond,
5916 			    highest_index_to_load_reg, str_idx_reg));
5917   else
5918     emit_insn (gen_movsicc (str_idx_reg, cond,
5919 			    highest_index_to_load_reg, str_idx_reg));
5920 
5921   add_reg_br_prob_note (s390_emit_jump (is_aligned_label, cond),
5922 		        profile_probability::very_unlikely ());
5923 
5924   expand_binop (Pmode, add_optab, str_idx_reg,
5925 		GEN_INT (-16), str_idx_reg, 1, OPTAB_DIRECT);
5926   /* FIXME: len is already zero extended - so avoid the llgcr emitted
5927      here.  */
5928   temp = expand_binop (Pmode, add_optab, str_idx_reg,
5929 		       convert_to_mode (Pmode, len, 1),
5930 		       target, 1, OPTAB_DIRECT);
5931   if (temp != target)
5932     emit_move_insn (target, temp);
5933 }
5934 
5935 void
s390_expand_vec_movstr(rtx result,rtx dst,rtx src)5936 s390_expand_vec_movstr (rtx result, rtx dst, rtx src)
5937 {
5938   rtx temp = gen_reg_rtx (Pmode);
5939   rtx src_addr = XEXP (src, 0);
5940   rtx dst_addr = XEXP (dst, 0);
5941   rtx src_addr_reg = gen_reg_rtx (Pmode);
5942   rtx dst_addr_reg = gen_reg_rtx (Pmode);
5943   rtx offset = gen_reg_rtx (Pmode);
5944   rtx vsrc = gen_reg_rtx (V16QImode);
5945   rtx vpos = gen_reg_rtx (V16QImode);
5946   rtx loadlen = gen_reg_rtx (SImode);
5947   rtx gpos_qi = gen_reg_rtx(QImode);
5948   rtx gpos = gen_reg_rtx (SImode);
5949   rtx done_label = gen_label_rtx ();
5950   rtx loop_label = gen_label_rtx ();
5951   rtx exit_label = gen_label_rtx ();
5952   rtx full_label = gen_label_rtx ();
5953 
5954   /* Perform a quick check for string ending on the first up to 16
5955      bytes and exit early if successful.  */
5956 
5957   emit_insn (gen_vlbb (vsrc, src, GEN_INT (6)));
5958   emit_insn (gen_lcbb (loadlen, src_addr, GEN_INT (6)));
5959   emit_insn (gen_vfenezv16qi (vpos, vsrc, vsrc));
5960   emit_insn (gen_vec_extractv16qiqi (gpos_qi, vpos, GEN_INT (7)));
5961   emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
5962   /* gpos is the byte index if a zero was found and 16 otherwise.
5963      So if it is lower than the loaded bytes we have a hit.  */
5964   emit_cmp_and_jump_insns (gpos, loadlen, GE, NULL_RTX, SImode, 1,
5965 			   full_label);
5966   emit_insn (gen_vstlv16qi (vsrc, gpos, dst));
5967 
5968   force_expand_binop (Pmode, add_optab, dst_addr, gpos, result,
5969 		      1, OPTAB_DIRECT);
5970   emit_jump (exit_label);
5971   emit_barrier ();
5972 
5973   emit_label (full_label);
5974   LABEL_NUSES (full_label) = 1;
5975 
5976   /* Calculate `offset' so that src + offset points to the last byte
5977      before 16 byte alignment.  */
5978 
5979   /* temp = src_addr & 0xf */
5980   force_expand_binop (Pmode, and_optab, src_addr, GEN_INT (15), temp,
5981 		      1, OPTAB_DIRECT);
5982 
5983   /* offset = 0xf - temp */
5984   emit_move_insn (offset, GEN_INT (15));
5985   force_expand_binop (Pmode, sub_optab, offset, temp, offset,
5986 		      1, OPTAB_DIRECT);
5987 
5988   /* Store `offset' bytes in the dstination string.  The quick check
5989      has loaded at least `offset' bytes into vsrc.  */
5990 
5991   emit_insn (gen_vstlv16qi (vsrc, gen_lowpart (SImode, offset), dst));
5992 
5993   /* Advance to the next byte to be loaded.  */
5994   force_expand_binop (Pmode, add_optab, offset, const1_rtx, offset,
5995 		      1, OPTAB_DIRECT);
5996 
5997   /* Make sure the addresses are single regs which can be used as a
5998      base.  */
5999   emit_move_insn (src_addr_reg, src_addr);
6000   emit_move_insn (dst_addr_reg, dst_addr);
6001 
6002   /* MAIN LOOP */
6003 
6004   emit_label (loop_label);
6005   LABEL_NUSES (loop_label) = 1;
6006 
6007   emit_move_insn (vsrc,
6008 		  gen_rtx_MEM (V16QImode,
6009 			       gen_rtx_PLUS (Pmode, src_addr_reg, offset)));
6010 
6011   emit_insn (gen_vec_vfenesv16qi (vpos, vsrc, vsrc,
6012 				  GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
6013   add_int_reg_note (s390_emit_ccraw_jump (8, EQ, done_label),
6014 		    REG_BR_PROB, profile_probability::very_unlikely ()
6015 				  .to_reg_br_prob_note ());
6016 
6017   emit_move_insn (gen_rtx_MEM (V16QImode,
6018 			       gen_rtx_PLUS (Pmode, dst_addr_reg, offset)),
6019 		  vsrc);
6020   /* offset += 16 */
6021   force_expand_binop (Pmode, add_optab, offset, GEN_INT (16),
6022 		      offset,  1, OPTAB_DIRECT);
6023 
6024   emit_jump (loop_label);
6025   emit_barrier ();
6026 
6027   /* REGULAR EXIT */
6028 
6029   /* We are done.  Add the offset of the zero character to the dst_addr
6030      pointer to get the result.  */
6031 
6032   emit_label (done_label);
6033   LABEL_NUSES (done_label) = 1;
6034 
6035   force_expand_binop (Pmode, add_optab, dst_addr_reg, offset, dst_addr_reg,
6036 		      1, OPTAB_DIRECT);
6037 
6038   emit_insn (gen_vec_extractv16qiqi (gpos_qi, vpos, GEN_INT (7)));
6039   emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
6040 
6041   emit_insn (gen_vstlv16qi (vsrc, gpos, gen_rtx_MEM (BLKmode, dst_addr_reg)));
6042 
6043   force_expand_binop (Pmode, add_optab, dst_addr_reg, gpos, result,
6044 		      1, OPTAB_DIRECT);
6045 
6046   /* EARLY EXIT */
6047 
6048   emit_label (exit_label);
6049   LABEL_NUSES (exit_label) = 1;
6050 }
6051 
6052 
6053 /* Expand conditional increment or decrement using alc/slb instructions.
6054    Should generate code setting DST to either SRC or SRC + INCREMENT,
6055    depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
6056    Returns true if successful, false otherwise.
6057 
6058    That makes it possible to implement some if-constructs without jumps e.g.:
6059    (borrow = CC0 | CC1 and carry = CC2 | CC3)
6060    unsigned int a, b, c;
6061    if (a < b)  c++; -> CCU  b > a  -> CC2;    c += carry;
6062    if (a < b)  c--; -> CCL3 a - b  -> borrow; c -= borrow;
6063    if (a <= b) c++; -> CCL3 b - a  -> borrow; c += carry;
6064    if (a <= b) c--; -> CCU  a <= b -> borrow; c -= borrow;
6065 
6066    Checks for EQ and NE with a nonzero value need an additional xor e.g.:
6067    if (a == b) c++; -> CCL3 a ^= b; 0 - a  -> borrow;    c += carry;
6068    if (a == b) c--; -> CCU  a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
6069    if (a != b) c++; -> CCU  a ^= b; a > 0  -> CC2;       c += carry;
6070    if (a != b) c--; -> CCL3 a ^= b; 0 - a  -> borrow;    c -= borrow; */
6071 
6072 bool
s390_expand_addcc(enum rtx_code cmp_code,rtx cmp_op0,rtx cmp_op1,rtx dst,rtx src,rtx increment)6073 s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1,
6074 		   rtx dst, rtx src, rtx increment)
6075 {
6076   machine_mode cmp_mode;
6077   machine_mode cc_mode;
6078   rtx op_res;
6079   rtx insn;
6080   rtvec p;
6081   int ret;
6082 
6083   if ((GET_MODE (cmp_op0) == SImode || GET_MODE (cmp_op0) == VOIDmode)
6084       && (GET_MODE (cmp_op1) == SImode || GET_MODE (cmp_op1) == VOIDmode))
6085     cmp_mode = SImode;
6086   else if ((GET_MODE (cmp_op0) == DImode || GET_MODE (cmp_op0) == VOIDmode)
6087 	   && (GET_MODE (cmp_op1) == DImode || GET_MODE (cmp_op1) == VOIDmode))
6088     cmp_mode = DImode;
6089   else
6090     return false;
6091 
6092   /* Try ADD LOGICAL WITH CARRY.  */
6093   if (increment == const1_rtx)
6094     {
6095       /* Determine CC mode to use.  */
6096       if (cmp_code == EQ || cmp_code == NE)
6097 	{
6098 	  if (cmp_op1 != const0_rtx)
6099 	    {
6100 	      cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
6101 					     NULL_RTX, 0, OPTAB_WIDEN);
6102 	      cmp_op1 = const0_rtx;
6103 	    }
6104 
6105 	  cmp_code = cmp_code == EQ ? LEU : GTU;
6106 	}
6107 
6108       if (cmp_code == LTU || cmp_code == LEU)
6109 	{
6110 	  rtx tem = cmp_op0;
6111 	  cmp_op0 = cmp_op1;
6112 	  cmp_op1 = tem;
6113 	  cmp_code = swap_condition (cmp_code);
6114 	}
6115 
6116       switch (cmp_code)
6117 	{
6118 	  case GTU:
6119 	    cc_mode = CCUmode;
6120 	    break;
6121 
6122 	  case GEU:
6123 	    cc_mode = CCL3mode;
6124 	    break;
6125 
6126 	  default:
6127 	    return false;
6128 	}
6129 
6130       /* Emit comparison instruction pattern. */
6131       if (!register_operand (cmp_op0, cmp_mode))
6132 	cmp_op0 = force_reg (cmp_mode, cmp_op0);
6133 
6134       insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
6135 			  gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
6136       /* We use insn_invalid_p here to add clobbers if required.  */
6137       ret = insn_invalid_p (emit_insn (insn), false);
6138       gcc_assert (!ret);
6139 
6140       /* Emit ALC instruction pattern.  */
6141       op_res = gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
6142 			       gen_rtx_REG (cc_mode, CC_REGNUM),
6143 			       const0_rtx);
6144 
6145       if (src != const0_rtx)
6146 	{
6147 	  if (!register_operand (src, GET_MODE (dst)))
6148 	    src = force_reg (GET_MODE (dst), src);
6149 
6150 	  op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, src);
6151 	  op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, const0_rtx);
6152 	}
6153 
6154       p = rtvec_alloc (2);
6155       RTVEC_ELT (p, 0) =
6156         gen_rtx_SET (dst, op_res);
6157       RTVEC_ELT (p, 1) =
6158 	gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6159       emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
6160 
6161       return true;
6162     }
6163 
6164   /* Try SUBTRACT LOGICAL WITH BORROW.  */
6165   if (increment == constm1_rtx)
6166     {
6167       /* Determine CC mode to use.  */
6168       if (cmp_code == EQ || cmp_code == NE)
6169 	{
6170 	  if (cmp_op1 != const0_rtx)
6171 	    {
6172 	      cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
6173 					     NULL_RTX, 0, OPTAB_WIDEN);
6174 	      cmp_op1 = const0_rtx;
6175 	    }
6176 
6177 	  cmp_code = cmp_code == EQ ? LEU : GTU;
6178 	}
6179 
6180       if (cmp_code == GTU || cmp_code == GEU)
6181 	{
6182 	  rtx tem = cmp_op0;
6183 	  cmp_op0 = cmp_op1;
6184 	  cmp_op1 = tem;
6185 	  cmp_code = swap_condition (cmp_code);
6186 	}
6187 
6188       switch (cmp_code)
6189 	{
6190 	  case LEU:
6191 	    cc_mode = CCUmode;
6192 	    break;
6193 
6194 	  case LTU:
6195 	    cc_mode = CCL3mode;
6196 	    break;
6197 
6198 	  default:
6199 	    return false;
6200 	}
6201 
6202       /* Emit comparison instruction pattern. */
6203       if (!register_operand (cmp_op0, cmp_mode))
6204 	cmp_op0 = force_reg (cmp_mode, cmp_op0);
6205 
6206       insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
6207 			  gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
6208       /* We use insn_invalid_p here to add clobbers if required.  */
6209       ret = insn_invalid_p (emit_insn (insn), false);
6210       gcc_assert (!ret);
6211 
6212       /* Emit SLB instruction pattern.  */
6213       if (!register_operand (src, GET_MODE (dst)))
6214 	src = force_reg (GET_MODE (dst), src);
6215 
6216       op_res = gen_rtx_MINUS (GET_MODE (dst),
6217 			      gen_rtx_MINUS (GET_MODE (dst), src, const0_rtx),
6218 			      gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
6219 					      gen_rtx_REG (cc_mode, CC_REGNUM),
6220 					      const0_rtx));
6221       p = rtvec_alloc (2);
6222       RTVEC_ELT (p, 0) =
6223         gen_rtx_SET (dst, op_res);
6224       RTVEC_ELT (p, 1) =
6225 	gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6226       emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
6227 
6228       return true;
6229     }
6230 
6231   return false;
6232 }
6233 
6234 /* Expand code for the insv template. Return true if successful.  */
6235 
6236 bool
s390_expand_insv(rtx dest,rtx op1,rtx op2,rtx src)6237 s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
6238 {
6239   int bitsize = INTVAL (op1);
6240   int bitpos = INTVAL (op2);
6241   machine_mode mode = GET_MODE (dest);
6242   machine_mode smode;
6243   int smode_bsize, mode_bsize;
6244   rtx op, clobber;
6245 
6246   if (bitsize + bitpos > GET_MODE_BITSIZE (mode))
6247     return false;
6248 
6249   /* Generate INSERT IMMEDIATE (IILL et al).  */
6250   /* (set (ze (reg)) (const_int)).  */
6251   if (TARGET_ZARCH
6252       && register_operand (dest, word_mode)
6253       && (bitpos % 16) == 0
6254       && (bitsize % 16) == 0
6255       && const_int_operand (src, VOIDmode))
6256     {
6257       HOST_WIDE_INT val = INTVAL (src);
6258       int regpos = bitpos + bitsize;
6259 
6260       while (regpos > bitpos)
6261 	{
6262 	  machine_mode putmode;
6263 	  int putsize;
6264 
6265 	  if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32))
6266 	    putmode = SImode;
6267 	  else
6268 	    putmode = HImode;
6269 
6270 	  putsize = GET_MODE_BITSIZE (putmode);
6271 	  regpos -= putsize;
6272 	  emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6273 						GEN_INT (putsize),
6274 						GEN_INT (regpos)),
6275 			  gen_int_mode (val, putmode));
6276 	  val >>= putsize;
6277 	}
6278       gcc_assert (regpos == bitpos);
6279       return true;
6280     }
6281 
6282   smode = smallest_int_mode_for_size (bitsize);
6283   smode_bsize = GET_MODE_BITSIZE (smode);
6284   mode_bsize = GET_MODE_BITSIZE (mode);
6285 
6286   /* Generate STORE CHARACTERS UNDER MASK (STCM et al).  */
6287   if (bitpos == 0
6288       && (bitsize % BITS_PER_UNIT) == 0
6289       && MEM_P (dest)
6290       && (register_operand (src, word_mode)
6291 	  || const_int_operand (src, VOIDmode)))
6292     {
6293       /* Emit standard pattern if possible.  */
6294       if (smode_bsize == bitsize)
6295 	{
6296 	  emit_move_insn (adjust_address (dest, smode, 0),
6297 			  gen_lowpart (smode, src));
6298 	  return true;
6299 	}
6300 
6301       /* (set (ze (mem)) (const_int)).  */
6302       else if (const_int_operand (src, VOIDmode))
6303 	{
6304 	  int size = bitsize / BITS_PER_UNIT;
6305 	  rtx src_mem = adjust_address (force_const_mem (word_mode, src),
6306 					BLKmode,
6307 					UNITS_PER_WORD - size);
6308 
6309 	  dest = adjust_address (dest, BLKmode, 0);
6310 	  set_mem_size (dest, size);
6311 	  s390_expand_movmem (dest, src_mem, GEN_INT (size));
6312 	  return true;
6313 	}
6314 
6315       /* (set (ze (mem)) (reg)).  */
6316       else if (register_operand (src, word_mode))
6317 	{
6318 	  if (bitsize <= 32)
6319 	    emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1,
6320 						  const0_rtx), src);
6321 	  else
6322 	    {
6323 	      /* Emit st,stcmh sequence.  */
6324 	      int stcmh_width = bitsize - 32;
6325 	      int size = stcmh_width / BITS_PER_UNIT;
6326 
6327 	      emit_move_insn (adjust_address (dest, SImode, size),
6328 			      gen_lowpart (SImode, src));
6329 	      set_mem_size (dest, size);
6330 	      emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6331 						    GEN_INT (stcmh_width),
6332 						    const0_rtx),
6333 			      gen_rtx_LSHIFTRT (word_mode, src, GEN_INT (32)));
6334 	    }
6335 	  return true;
6336 	}
6337     }
6338 
6339   /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al).  */
6340   if ((bitpos % BITS_PER_UNIT) == 0
6341       && (bitsize % BITS_PER_UNIT) == 0
6342       && (bitpos & 32) == ((bitpos + bitsize - 1) & 32)
6343       && MEM_P (src)
6344       && (mode == DImode || mode == SImode)
6345       && register_operand (dest, mode))
6346     {
6347       /* Emit a strict_low_part pattern if possible.  */
6348       if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
6349 	{
6350 	  op = gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (smode, dest));
6351 	  op = gen_rtx_SET (op, gen_lowpart (smode, src));
6352 	  clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6353 	  emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)));
6354 	  return true;
6355 	}
6356 
6357       /* ??? There are more powerful versions of ICM that are not
6358 	 completely represented in the md file.  */
6359     }
6360 
6361   /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al).  */
6362   if (TARGET_Z10 && (mode == DImode || mode == SImode))
6363     {
6364       machine_mode mode_s = GET_MODE (src);
6365 
6366       if (CONSTANT_P (src))
6367 	{
6368 	  /* For constant zero values the representation with AND
6369 	     appears to be folded in more situations than the (set
6370 	     (zero_extract) ...).
6371 	     We only do this when the start and end of the bitfield
6372 	     remain in the same SImode chunk.  That way nihf or nilf
6373 	     can be used.
6374 	     The AND patterns might still generate a risbg for this.  */
6375 	  if (src == const0_rtx && bitpos / 32  == (bitpos + bitsize - 1) / 32)
6376 	    return false;
6377 	  else
6378 	    src = force_reg (mode, src);
6379 	}
6380       else if (mode_s != mode)
6381 	{
6382 	  gcc_assert (GET_MODE_BITSIZE (mode_s) >= bitsize);
6383 	  src = force_reg (mode_s, src);
6384 	  src = gen_lowpart (mode, src);
6385 	}
6386 
6387       op = gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2),
6388       op = gen_rtx_SET (op, src);
6389 
6390       if (!TARGET_ZEC12)
6391 	{
6392 	  clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6393 	  op = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber));
6394 	}
6395       emit_insn (op);
6396 
6397       return true;
6398     }
6399 
6400   return false;
6401 }
6402 
6403 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
6404    register that holds VAL of mode MODE shifted by COUNT bits.  */
6405 
6406 static inline rtx
s390_expand_mask_and_shift(rtx val,machine_mode mode,rtx count)6407 s390_expand_mask_and_shift (rtx val, machine_mode mode, rtx count)
6408 {
6409   val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
6410 			     NULL_RTX, 1, OPTAB_DIRECT);
6411   return expand_simple_binop (SImode, ASHIFT, val, count,
6412 			      NULL_RTX, 1, OPTAB_DIRECT);
6413 }
6414 
6415 /* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
6416    the result in TARGET.  */
6417 
6418 void
s390_expand_vec_compare(rtx target,enum rtx_code cond,rtx cmp_op1,rtx cmp_op2)6419 s390_expand_vec_compare (rtx target, enum rtx_code cond,
6420 			 rtx cmp_op1, rtx cmp_op2)
6421 {
6422   machine_mode mode = GET_MODE (target);
6423   bool neg_p = false, swap_p = false;
6424   rtx tmp;
6425 
6426   if (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_VECTOR_FLOAT)
6427     {
6428       switch (cond)
6429 	{
6430 	  /* NE a != b -> !(a == b) */
6431 	case NE:   cond = EQ; neg_p = true;                break;
6432 	  /* UNGT a u> b -> !(b >= a) */
6433 	case UNGT: cond = GE; neg_p = true; swap_p = true; break;
6434 	  /* UNGE a u>= b -> !(b > a) */
6435 	case UNGE: cond = GT; neg_p = true; swap_p = true; break;
6436 	  /* LE: a <= b -> b >= a */
6437 	case LE:   cond = GE;               swap_p = true; break;
6438 	  /* UNLE: a u<= b -> !(a > b) */
6439 	case UNLE: cond = GT; neg_p = true;                break;
6440 	  /* LT: a < b -> b > a */
6441 	case LT:   cond = GT;               swap_p = true; break;
6442 	  /* UNLT: a u< b -> !(a >= b) */
6443 	case UNLT: cond = GE; neg_p = true;                break;
6444 	case UNEQ:
6445 	  emit_insn (gen_vec_cmpuneq (target, cmp_op1, cmp_op2));
6446 	  return;
6447 	case LTGT:
6448 	  emit_insn (gen_vec_cmpltgt (target, cmp_op1, cmp_op2));
6449 	  return;
6450 	case ORDERED:
6451 	  emit_insn (gen_vec_ordered (target, cmp_op1, cmp_op2));
6452 	  return;
6453 	case UNORDERED:
6454 	  emit_insn (gen_vec_unordered (target, cmp_op1, cmp_op2));
6455 	  return;
6456 	default: break;
6457 	}
6458     }
6459   else
6460     {
6461       switch (cond)
6462 	{
6463 	  /* NE: a != b -> !(a == b) */
6464 	case NE:  cond = EQ;  neg_p = true;                break;
6465 	  /* GE: a >= b -> !(b > a) */
6466 	case GE:  cond = GT;  neg_p = true; swap_p = true; break;
6467 	  /* GEU: a >= b -> !(b > a) */
6468 	case GEU: cond = GTU; neg_p = true; swap_p = true; break;
6469 	  /* LE: a <= b -> !(a > b) */
6470 	case LE:  cond = GT;  neg_p = true;                break;
6471 	  /* LEU: a <= b -> !(a > b) */
6472 	case LEU: cond = GTU; neg_p = true;                break;
6473 	  /* LT: a < b -> b > a */
6474 	case LT:  cond = GT;                swap_p = true; break;
6475 	  /* LTU: a < b -> b > a */
6476 	case LTU: cond = GTU;               swap_p = true; break;
6477 	default: break;
6478 	}
6479     }
6480 
6481   if (swap_p)
6482     {
6483       tmp = cmp_op1; cmp_op1 = cmp_op2; cmp_op2 = tmp;
6484     }
6485 
6486   emit_insn (gen_rtx_SET (target, gen_rtx_fmt_ee (cond,
6487 						  mode,
6488 						  cmp_op1, cmp_op2)));
6489   if (neg_p)
6490     emit_insn (gen_rtx_SET (target, gen_rtx_NOT (mode, target)));
6491 }
6492 
6493 /* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into
6494    TARGET if either all (ALL_P is true) or any (ALL_P is false) of the
6495    elements in CMP1 and CMP2 fulfill the comparison.
6496    This function is only used to emit patterns for the vx builtins and
6497    therefore only handles comparison codes required by the
6498    builtins.  */
6499 void
s390_expand_vec_compare_cc(rtx target,enum rtx_code code,rtx cmp1,rtx cmp2,bool all_p)6500 s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
6501 			    rtx cmp1, rtx cmp2, bool all_p)
6502 {
6503   machine_mode cc_producer_mode, cc_consumer_mode, scratch_mode;
6504   rtx tmp_reg = gen_reg_rtx (SImode);
6505   bool swap_p = false;
6506 
6507   if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_INT)
6508     {
6509       switch (code)
6510 	{
6511 	case EQ:
6512 	case NE:
6513 	  cc_producer_mode = CCVEQmode;
6514 	  break;
6515 	case GE:
6516 	case LT:
6517 	  code = swap_condition (code);
6518 	  swap_p = true;
6519 	  /* fallthrough */
6520 	case GT:
6521 	case LE:
6522 	  cc_producer_mode = CCVIHmode;
6523 	  break;
6524 	case GEU:
6525 	case LTU:
6526 	  code = swap_condition (code);
6527 	  swap_p = true;
6528 	  /* fallthrough */
6529 	case GTU:
6530 	case LEU:
6531 	  cc_producer_mode = CCVIHUmode;
6532 	  break;
6533 	default:
6534 	  gcc_unreachable ();
6535 	}
6536 
6537       scratch_mode = GET_MODE (cmp1);
6538       /* These codes represent inverted CC interpretations.  Inverting
6539 	 an ALL CC mode results in an ANY CC mode and the other way
6540 	 around.  Invert the all_p flag here to compensate for
6541 	 that.  */
6542       if (code == NE || code == LE || code == LEU)
6543 	all_p = !all_p;
6544 
6545       cc_consumer_mode = all_p ? CCVIALLmode : CCVIANYmode;
6546     }
6547   else if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_FLOAT)
6548     {
6549       bool inv_p = false;
6550 
6551       switch (code)
6552 	{
6553 	case EQ:   cc_producer_mode = CCVEQmode;  break;
6554 	case NE:   cc_producer_mode = CCVEQmode;  inv_p = true; break;
6555 	case GT:   cc_producer_mode = CCVFHmode;  break;
6556 	case GE:   cc_producer_mode = CCVFHEmode; break;
6557 	case UNLE: cc_producer_mode = CCVFHmode;  inv_p = true; break;
6558 	case UNLT: cc_producer_mode = CCVFHEmode; inv_p = true; break;
6559 	case LT:   cc_producer_mode = CCVFHmode;  code = GT; swap_p = true; break;
6560 	case LE:   cc_producer_mode = CCVFHEmode; code = GE; swap_p = true; break;
6561 	default: gcc_unreachable ();
6562 	}
6563       scratch_mode = mode_for_int_vector (GET_MODE (cmp1)).require ();
6564 
6565       if (inv_p)
6566 	all_p = !all_p;
6567 
6568       cc_consumer_mode = all_p ? CCVFALLmode : CCVFANYmode;
6569     }
6570   else
6571     gcc_unreachable ();
6572 
6573   if (swap_p)
6574     {
6575       rtx tmp = cmp2;
6576       cmp2 = cmp1;
6577       cmp1 = tmp;
6578     }
6579 
6580   emit_insn (gen_rtx_PARALLEL (VOIDmode,
6581 	       gen_rtvec (2, gen_rtx_SET (
6582 			       gen_rtx_REG (cc_producer_mode, CC_REGNUM),
6583 			       gen_rtx_COMPARE (cc_producer_mode, cmp1, cmp2)),
6584 			  gen_rtx_CLOBBER (VOIDmode,
6585 					   gen_rtx_SCRATCH (scratch_mode)))));
6586   emit_move_insn (target, const0_rtx);
6587   emit_move_insn (tmp_reg, const1_rtx);
6588 
6589   emit_move_insn (target,
6590 		  gen_rtx_IF_THEN_ELSE (SImode,
6591 		    gen_rtx_fmt_ee (code, VOIDmode,
6592 				    gen_rtx_REG (cc_consumer_mode, CC_REGNUM),
6593 				    const0_rtx),
6594 					tmp_reg, target));
6595 }
6596 
6597 /* Invert the comparison CODE applied to a CC mode.  This is only safe
6598    if we know whether there result was created by a floating point
6599    compare or not.  For the CCV modes this is encoded as part of the
6600    mode.  */
6601 enum rtx_code
s390_reverse_condition(machine_mode mode,enum rtx_code code)6602 s390_reverse_condition (machine_mode mode, enum rtx_code code)
6603 {
6604   /* Reversal of FP compares takes care -- an ordered compare
6605      becomes an unordered compare and vice versa.  */
6606   if (mode == CCVFALLmode || mode == CCVFANYmode)
6607     return reverse_condition_maybe_unordered (code);
6608   else if (mode == CCVIALLmode || mode == CCVIANYmode)
6609     return reverse_condition (code);
6610   else
6611     gcc_unreachable ();
6612 }
6613 
6614 /* Generate a vector comparison expression loading either elements of
6615    THEN or ELS into TARGET depending on the comparison COND of CMP_OP1
6616    and CMP_OP2.  */
6617 
6618 void
s390_expand_vcond(rtx target,rtx then,rtx els,enum rtx_code cond,rtx cmp_op1,rtx cmp_op2)6619 s390_expand_vcond (rtx target, rtx then, rtx els,
6620 		   enum rtx_code cond, rtx cmp_op1, rtx cmp_op2)
6621 {
6622   rtx tmp;
6623   machine_mode result_mode;
6624   rtx result_target;
6625 
6626   machine_mode target_mode = GET_MODE (target);
6627   machine_mode cmp_mode = GET_MODE (cmp_op1);
6628   rtx op = (cond == LT) ? els : then;
6629 
6630   /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
6631      and x < 0 ? 1 : 0 into (unsigned) x >> 31.  Likewise
6632      for short and byte (x >> 15 and x >> 7 respectively).  */
6633   if ((cond == LT || cond == GE)
6634       && target_mode == cmp_mode
6635       && cmp_op2 == CONST0_RTX (cmp_mode)
6636       && op == CONST0_RTX (target_mode)
6637       && s390_vector_mode_supported_p (target_mode)
6638       && GET_MODE_CLASS (target_mode) == MODE_VECTOR_INT)
6639     {
6640       rtx negop = (cond == LT) ? then : els;
6641 
6642       int shift = GET_MODE_BITSIZE (GET_MODE_INNER (target_mode)) - 1;
6643 
6644       /* if x < 0 ? 1 : 0 or if x >= 0 ? 0 : 1 */
6645       if (negop == CONST1_RTX (target_mode))
6646 	{
6647 	  rtx res = expand_simple_binop (cmp_mode, LSHIFTRT, cmp_op1,
6648 					 GEN_INT (shift), target,
6649 					 1, OPTAB_DIRECT);
6650 	  if (res != target)
6651 	    emit_move_insn (target, res);
6652 	  return;
6653 	}
6654 
6655       /* if x < 0 ? -1 : 0 or if x >= 0 ? 0 : -1 */
6656       else if (all_ones_operand (negop, target_mode))
6657 	{
6658 	  rtx res = expand_simple_binop (cmp_mode, ASHIFTRT, cmp_op1,
6659 					 GEN_INT (shift), target,
6660 					 0, OPTAB_DIRECT);
6661 	  if (res != target)
6662 	    emit_move_insn (target, res);
6663 	  return;
6664 	}
6665     }
6666 
6667   /* We always use an integral type vector to hold the comparison
6668      result.  */
6669   result_mode = mode_for_int_vector (cmp_mode).require ();
6670   result_target = gen_reg_rtx (result_mode);
6671 
6672   /* We allow vector immediates as comparison operands that
6673      can be handled by the optimization above but not by the
6674      following code.  Hence, force them into registers here.  */
6675   if (!REG_P (cmp_op1))
6676     cmp_op1 = force_reg (GET_MODE (cmp_op1), cmp_op1);
6677 
6678   if (!REG_P (cmp_op2))
6679     cmp_op2 = force_reg (GET_MODE (cmp_op2), cmp_op2);
6680 
6681   s390_expand_vec_compare (result_target, cond,
6682 			   cmp_op1, cmp_op2);
6683 
6684   /* If the results are supposed to be either -1 or 0 we are done
6685      since this is what our compare instructions generate anyway.  */
6686   if (all_ones_operand (then, GET_MODE (then))
6687       && const0_operand (els, GET_MODE (els)))
6688     {
6689       emit_move_insn (target, gen_rtx_SUBREG (target_mode,
6690 					      result_target, 0));
6691       return;
6692     }
6693 
6694   /* Otherwise we will do a vsel afterwards.  */
6695   /* This gets triggered e.g.
6696      with gcc.c-torture/compile/pr53410-1.c */
6697   if (!REG_P (then))
6698     then = force_reg (target_mode, then);
6699 
6700   if (!REG_P (els))
6701     els = force_reg (target_mode, els);
6702 
6703   tmp = gen_rtx_fmt_ee (EQ, VOIDmode,
6704 			result_target,
6705 			CONST0_RTX (result_mode));
6706 
6707   /* We compared the result against zero above so we have to swap then
6708      and els here.  */
6709   tmp = gen_rtx_IF_THEN_ELSE (target_mode, tmp, els, then);
6710 
6711   gcc_assert (target_mode == GET_MODE (then));
6712   emit_insn (gen_rtx_SET (target, tmp));
6713 }
6714 
6715 /* Emit the RTX necessary to initialize the vector TARGET with values
6716    in VALS.  */
6717 void
s390_expand_vec_init(rtx target,rtx vals)6718 s390_expand_vec_init (rtx target, rtx vals)
6719 {
6720   machine_mode mode = GET_MODE (target);
6721   machine_mode inner_mode = GET_MODE_INNER (mode);
6722   int n_elts = GET_MODE_NUNITS (mode);
6723   bool all_same = true, all_regs = true, all_const_int = true;
6724   rtx x;
6725   int i;
6726 
6727   for (i = 0; i < n_elts; ++i)
6728     {
6729       x = XVECEXP (vals, 0, i);
6730 
6731       if (!CONST_INT_P (x))
6732 	all_const_int = false;
6733 
6734       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6735 	all_same = false;
6736 
6737       if (!REG_P (x))
6738 	all_regs = false;
6739     }
6740 
6741   /* Use vector gen mask or vector gen byte mask if possible.  */
6742   if (all_same && all_const_int
6743       && (XVECEXP (vals, 0, 0) == const0_rtx
6744 	  || s390_contiguous_bitmask_vector_p (XVECEXP (vals, 0, 0),
6745 					       NULL, NULL)
6746 	  || s390_bytemask_vector_p (XVECEXP (vals, 0, 0), NULL)))
6747     {
6748       emit_insn (gen_rtx_SET (target,
6749 			      gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))));
6750       return;
6751     }
6752 
6753   /* Use vector replicate instructions.  vlrep/vrepi/vrep  */
6754   if (all_same)
6755     {
6756       rtx elem = XVECEXP (vals, 0, 0);
6757 
6758       /* vec_splats accepts general_operand as source.  */
6759       if (!general_operand (elem, GET_MODE (elem)))
6760 	elem = force_reg (inner_mode, elem);
6761 
6762       emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, elem)));
6763       return;
6764     }
6765 
6766   if (all_regs
6767       && REG_P (target)
6768       && n_elts == 2
6769       && GET_MODE_SIZE (inner_mode) == 8)
6770     {
6771       /* Use vector load pair.  */
6772       emit_insn (gen_rtx_SET (target,
6773 			      gen_rtx_VEC_CONCAT (mode,
6774 						  XVECEXP (vals, 0, 0),
6775 						  XVECEXP (vals, 0, 1))));
6776       return;
6777     }
6778 
6779   /* Use vector load logical element and zero.  */
6780   if (TARGET_VXE && (mode == V4SImode || mode == V4SFmode))
6781     {
6782       bool found = true;
6783 
6784       x = XVECEXP (vals, 0, 0);
6785       if (memory_operand (x, inner_mode))
6786 	{
6787 	  for (i = 1; i < n_elts; ++i)
6788 	    found = found && XVECEXP (vals, 0, i) == const0_rtx;
6789 
6790 	  if (found)
6791 	    {
6792 	      machine_mode half_mode = (inner_mode == SFmode
6793 					? V2SFmode : V2SImode);
6794 	      emit_insn (gen_rtx_SET (target,
6795 			      gen_rtx_VEC_CONCAT (mode,
6796 						  gen_rtx_VEC_CONCAT (half_mode,
6797 								      x,
6798 								      const0_rtx),
6799 						  gen_rtx_VEC_CONCAT (half_mode,
6800 								      const0_rtx,
6801 								      const0_rtx))));
6802 	      return;
6803 	    }
6804 	}
6805     }
6806 
6807   /* We are about to set the vector elements one by one.  Zero out the
6808      full register first in order to help the data flow framework to
6809      detect it as full VR set.  */
6810   emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
6811 
6812   /* Unfortunately the vec_init expander is not allowed to fail.  So
6813      we have to implement the fallback ourselves.  */
6814   for (i = 0; i < n_elts; i++)
6815     {
6816       rtx elem = XVECEXP (vals, 0, i);
6817       if (!general_operand (elem, GET_MODE (elem)))
6818 	elem = force_reg (inner_mode, elem);
6819 
6820       emit_insn (gen_rtx_SET (target,
6821 			      gen_rtx_UNSPEC (mode,
6822 					      gen_rtvec (3, elem,
6823 							 GEN_INT (i), target),
6824 					      UNSPEC_VEC_SET)));
6825     }
6826 }
6827 
6828 /* Structure to hold the initial parameters for a compare_and_swap operation
6829    in HImode and QImode.  */
6830 
6831 struct alignment_context
6832 {
6833   rtx memsi;	  /* SI aligned memory location.  */
6834   rtx shift;	  /* Bit offset with regard to lsb.  */
6835   rtx modemask;	  /* Mask of the HQImode shifted by SHIFT bits.  */
6836   rtx modemaski;  /* ~modemask */
6837   bool aligned;	  /* True if memory is aligned, false else.  */
6838 };
6839 
6840 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
6841    structure AC for transparent simplifying, if the memory alignment is known
6842    to be at least 32bit.  MEM is the memory location for the actual operation
6843    and MODE its mode.  */
6844 
6845 static void
init_alignment_context(struct alignment_context * ac,rtx mem,machine_mode mode)6846 init_alignment_context (struct alignment_context *ac, rtx mem,
6847 			machine_mode mode)
6848 {
6849   ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode));
6850   ac->aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode));
6851 
6852   if (ac->aligned)
6853     ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned.  */
6854   else
6855     {
6856       /* Alignment is unknown.  */
6857       rtx byteoffset, addr, align;
6858 
6859       /* Force the address into a register.  */
6860       addr = force_reg (Pmode, XEXP (mem, 0));
6861 
6862       /* Align it to SImode.  */
6863       align = expand_simple_binop (Pmode, AND, addr,
6864 				   GEN_INT (-GET_MODE_SIZE (SImode)),
6865 				   NULL_RTX, 1, OPTAB_DIRECT);
6866       /* Generate MEM.  */
6867       ac->memsi = gen_rtx_MEM (SImode, align);
6868       MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem);
6869       set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER);
6870       set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode));
6871 
6872       /* Calculate shiftcount.  */
6873       byteoffset = expand_simple_binop (Pmode, AND, addr,
6874 					GEN_INT (GET_MODE_SIZE (SImode) - 1),
6875 					NULL_RTX, 1, OPTAB_DIRECT);
6876       /* As we already have some offset, evaluate the remaining distance.  */
6877       ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
6878 				      NULL_RTX, 1, OPTAB_DIRECT);
6879     }
6880 
6881   /* Shift is the byte count, but we need the bitcount.  */
6882   ac->shift = expand_simple_binop (SImode, ASHIFT, ac->shift, GEN_INT (3),
6883 				   NULL_RTX, 1, OPTAB_DIRECT);
6884 
6885   /* Calculate masks.  */
6886   ac->modemask = expand_simple_binop (SImode, ASHIFT,
6887 				      GEN_INT (GET_MODE_MASK (mode)),
6888 				      ac->shift, NULL_RTX, 1, OPTAB_DIRECT);
6889   ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask,
6890 				      NULL_RTX, 1);
6891 }
6892 
6893 /* A subroutine of s390_expand_cs_hqi.  Insert INS into VAL.  If possible,
6894    use a single insv insn into SEQ2.  Otherwise, put prep insns in SEQ1 and
6895    perform the merge in SEQ2.  */
6896 
6897 static rtx
s390_two_part_insv(struct alignment_context * ac,rtx * seq1,rtx * seq2,machine_mode mode,rtx val,rtx ins)6898 s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
6899 		    machine_mode mode, rtx val, rtx ins)
6900 {
6901   rtx tmp;
6902 
6903   if (ac->aligned)
6904     {
6905       start_sequence ();
6906       tmp = copy_to_mode_reg (SImode, val);
6907       if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)),
6908 			    const0_rtx, ins))
6909 	{
6910 	  *seq1 = NULL;
6911 	  *seq2 = get_insns ();
6912 	  end_sequence ();
6913 	  return tmp;
6914 	}
6915       end_sequence ();
6916     }
6917 
6918   /* Failed to use insv.  Generate a two part shift and mask.  */
6919   start_sequence ();
6920   tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
6921   *seq1 = get_insns ();
6922   end_sequence ();
6923 
6924   start_sequence ();
6925   tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
6926   *seq2 = get_insns ();
6927   end_sequence ();
6928 
6929   return tmp;
6930 }
6931 
6932 /* Expand an atomic compare and swap operation for HImode and QImode.  MEM is
6933    the memory location, CMP the old value to compare MEM with and NEW_RTX the
6934    value to set if CMP == MEM.  */
6935 
6936 static void
s390_expand_cs_hqi(machine_mode mode,rtx btarget,rtx vtarget,rtx mem,rtx cmp,rtx new_rtx,bool is_weak)6937 s390_expand_cs_hqi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
6938 		    rtx cmp, rtx new_rtx, bool is_weak)
6939 {
6940   struct alignment_context ac;
6941   rtx cmpv, newv, val, cc, seq0, seq1, seq2, seq3;
6942   rtx res = gen_reg_rtx (SImode);
6943   rtx_code_label *csloop = NULL, *csend = NULL;
6944 
6945   gcc_assert (MEM_P (mem));
6946 
6947   init_alignment_context (&ac, mem, mode);
6948 
6949   /* Load full word.  Subsequent loads are performed by CS.  */
6950   val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski,
6951 			     NULL_RTX, 1, OPTAB_DIRECT);
6952 
6953   /* Prepare insertions of cmp and new_rtx into the loaded value.  When
6954      possible, we try to use insv to make this happen efficiently.  If
6955      that fails we'll generate code both inside and outside the loop.  */
6956   cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp);
6957   newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx);
6958 
6959   if (seq0)
6960     emit_insn (seq0);
6961   if (seq1)
6962     emit_insn (seq1);
6963 
6964   /* Start CS loop.  */
6965   if (!is_weak)
6966     {
6967       /* Begin assuming success.  */
6968       emit_move_insn (btarget, const1_rtx);
6969 
6970       csloop = gen_label_rtx ();
6971       csend = gen_label_rtx ();
6972       emit_label (csloop);
6973     }
6974 
6975   /* val = "<mem>00..0<mem>"
6976    * cmp = "00..0<cmp>00..0"
6977    * new = "00..0<new>00..0"
6978    */
6979 
6980   emit_insn (seq2);
6981   emit_insn (seq3);
6982 
6983   cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv, CCZ1mode);
6984   if (is_weak)
6985     emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1)));
6986   else
6987     {
6988       rtx tmp;
6989 
6990       /* Jump to end if we're done (likely?).  */
6991       s390_emit_jump (csend, cc);
6992 
6993       /* Check for changes outside mode, and loop internal if so.
6994 	 Arrange the moves so that the compare is adjacent to the
6995 	 branch so that we can generate CRJ.  */
6996       tmp = copy_to_reg (val);
6997       force_expand_binop (SImode, and_optab, res, ac.modemaski, val,
6998 			  1, OPTAB_DIRECT);
6999       cc = s390_emit_compare (NE, val, tmp);
7000       s390_emit_jump (csloop, cc);
7001 
7002       /* Failed.  */
7003       emit_move_insn (btarget, const0_rtx);
7004       emit_label (csend);
7005     }
7006 
7007   /* Return the correct part of the bitfield.  */
7008   convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
7009 					      NULL_RTX, 1, OPTAB_DIRECT), 1);
7010 }
7011 
7012 /* Variant of s390_expand_cs for SI, DI and TI modes.  */
7013 static void
s390_expand_cs_tdsi(machine_mode mode,rtx btarget,rtx vtarget,rtx mem,rtx cmp,rtx new_rtx,bool is_weak)7014 s390_expand_cs_tdsi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
7015 		     rtx cmp, rtx new_rtx, bool is_weak)
7016 {
7017   rtx output = vtarget;
7018   rtx_code_label *skip_cs_label = NULL;
7019   bool do_const_opt = false;
7020 
7021   if (!register_operand (output, mode))
7022     output = gen_reg_rtx (mode);
7023 
7024   /* If IS_WEAK is true and the INPUT value is a constant, compare the memory
7025      with the constant first and skip the compare_and_swap because its very
7026      expensive and likely to fail anyway.
7027      Note 1: This is done only for IS_WEAK.  C11 allows optimizations that may
7028      cause spurious in that case.
7029      Note 2: It may be useful to do this also for non-constant INPUT.
7030      Note 3: Currently only targets with "load on condition" are supported
7031      (z196 and newer).  */
7032 
7033   if (TARGET_Z196
7034       && (mode == SImode || mode == DImode))
7035     do_const_opt = (is_weak && CONST_INT_P (cmp));
7036 
7037   if (do_const_opt)
7038     {
7039       rtx cc = gen_rtx_REG (CCZmode, CC_REGNUM);
7040 
7041       skip_cs_label = gen_label_rtx ();
7042       emit_move_insn (btarget, const0_rtx);
7043       if (CONST_INT_P (cmp) && INTVAL (cmp) == 0)
7044 	{
7045 	  rtvec lt = rtvec_alloc (2);
7046 
7047 	  /* Load-and-test + conditional jump.  */
7048 	  RTVEC_ELT (lt, 0)
7049 	    = gen_rtx_SET (cc, gen_rtx_COMPARE (CCZmode, mem, cmp));
7050 	  RTVEC_ELT (lt, 1) = gen_rtx_SET (output, mem);
7051 	  emit_insn (gen_rtx_PARALLEL (VOIDmode, lt));
7052 	}
7053       else
7054 	{
7055 	  emit_move_insn (output, mem);
7056 	  emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (CCZmode, output, cmp)));
7057 	}
7058       s390_emit_jump (skip_cs_label, gen_rtx_NE (VOIDmode, cc, const0_rtx));
7059       add_reg_br_prob_note (get_last_insn (),
7060 		            profile_probability::very_unlikely ());
7061       /* If the jump is not taken, OUTPUT is the expected value.  */
7062       cmp = output;
7063       /* Reload newval to a register manually, *after* the compare and jump
7064 	 above.  Otherwise Reload might place it before the jump.  */
7065     }
7066   else
7067     cmp = force_reg (mode, cmp);
7068   new_rtx = force_reg (mode, new_rtx);
7069   s390_emit_compare_and_swap (EQ, output, mem, cmp, new_rtx,
7070 			      (do_const_opt) ? CCZmode : CCZ1mode);
7071   if (skip_cs_label != NULL)
7072     emit_label (skip_cs_label);
7073 
7074   /* We deliberately accept non-register operands in the predicate
7075      to ensure the write back to the output operand happens *before*
7076      the store-flags code below.  This makes it easier for combine
7077      to merge the store-flags code with a potential test-and-branch
7078      pattern following (immediately!) afterwards.  */
7079   if (output != vtarget)
7080     emit_move_insn (vtarget, output);
7081 
7082   if (do_const_opt)
7083     {
7084       rtx cc, cond, ite;
7085 
7086       /* Do not use gen_cstorecc4 here because it writes either 1 or 0, but
7087 	 btarget has already been initialized with 0 above.  */
7088       cc = gen_rtx_REG (CCZmode, CC_REGNUM);
7089       cond = gen_rtx_EQ (VOIDmode, cc, const0_rtx);
7090       ite = gen_rtx_IF_THEN_ELSE (SImode, cond, const1_rtx, btarget);
7091       emit_insn (gen_rtx_SET (btarget, ite));
7092     }
7093   else
7094     {
7095       rtx cc, cond;
7096 
7097       cc = gen_rtx_REG (CCZ1mode, CC_REGNUM);
7098       cond = gen_rtx_EQ (SImode, cc, const0_rtx);
7099       emit_insn (gen_cstorecc4 (btarget, cond, cc, const0_rtx));
7100     }
7101 }
7102 
7103 /* Expand an atomic compare and swap operation.  MEM is the memory location,
7104    CMP the old value to compare MEM with and NEW_RTX the value to set if
7105    CMP == MEM.  */
7106 
7107 void
s390_expand_cs(machine_mode mode,rtx btarget,rtx vtarget,rtx mem,rtx cmp,rtx new_rtx,bool is_weak)7108 s390_expand_cs (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
7109 		rtx cmp, rtx new_rtx, bool is_weak)
7110 {
7111   switch (mode)
7112     {
7113     case E_TImode:
7114     case E_DImode:
7115     case E_SImode:
7116       s390_expand_cs_tdsi (mode, btarget, vtarget, mem, cmp, new_rtx, is_weak);
7117       break;
7118     case E_HImode:
7119     case E_QImode:
7120       s390_expand_cs_hqi (mode, btarget, vtarget, mem, cmp, new_rtx, is_weak);
7121       break;
7122     default:
7123       gcc_unreachable ();
7124     }
7125 }
7126 
7127 /* Expand an atomic_exchange operation simulated with a compare-and-swap loop.
7128    The memory location MEM is set to INPUT.  OUTPUT is set to the previous value
7129    of MEM.  */
7130 
7131 void
s390_expand_atomic_exchange_tdsi(rtx output,rtx mem,rtx input)7132 s390_expand_atomic_exchange_tdsi (rtx output, rtx mem, rtx input)
7133 {
7134   machine_mode mode = GET_MODE (mem);
7135   rtx_code_label *csloop;
7136 
7137   if (TARGET_Z196
7138       && (mode == DImode || mode == SImode)
7139       && CONST_INT_P (input) && INTVAL (input) == 0)
7140     {
7141       emit_move_insn (output, const0_rtx);
7142       if (mode == DImode)
7143 	emit_insn (gen_atomic_fetch_anddi (output, mem, const0_rtx, input));
7144       else
7145 	emit_insn (gen_atomic_fetch_andsi (output, mem, const0_rtx, input));
7146       return;
7147     }
7148 
7149   input = force_reg (mode, input);
7150   emit_move_insn (output, mem);
7151   csloop = gen_label_rtx ();
7152   emit_label (csloop);
7153   s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, output, mem, output,
7154 						      input, CCZ1mode));
7155 }
7156 
7157 /* Expand an atomic operation CODE of mode MODE.  MEM is the memory location
7158    and VAL the value to play with.  If AFTER is true then store the value
7159    MEM holds after the operation, if AFTER is false then store the value MEM
7160    holds before the operation.  If TARGET is zero then discard that value, else
7161    store it to TARGET.  */
7162 
7163 void
s390_expand_atomic(machine_mode mode,enum rtx_code code,rtx target,rtx mem,rtx val,bool after)7164 s390_expand_atomic (machine_mode mode, enum rtx_code code,
7165 		    rtx target, rtx mem, rtx val, bool after)
7166 {
7167   struct alignment_context ac;
7168   rtx cmp;
7169   rtx new_rtx = gen_reg_rtx (SImode);
7170   rtx orig = gen_reg_rtx (SImode);
7171   rtx_code_label *csloop = gen_label_rtx ();
7172 
7173   gcc_assert (!target || register_operand (target, VOIDmode));
7174   gcc_assert (MEM_P (mem));
7175 
7176   init_alignment_context (&ac, mem, mode);
7177 
7178   /* Shift val to the correct bit positions.
7179      Preserve "icm", but prevent "ex icm".  */
7180   if (!(ac.aligned && code == SET && MEM_P (val)))
7181     val = s390_expand_mask_and_shift (val, mode, ac.shift);
7182 
7183   /* Further preparation insns.  */
7184   if (code == PLUS || code == MINUS)
7185     emit_move_insn (orig, val);
7186   else if (code == MULT || code == AND) /* val = "11..1<val>11..1" */
7187     val = expand_simple_binop (SImode, XOR, val, ac.modemaski,
7188 			       NULL_RTX, 1, OPTAB_DIRECT);
7189 
7190   /* Load full word.  Subsequent loads are performed by CS.  */
7191   cmp = force_reg (SImode, ac.memsi);
7192 
7193   /* Start CS loop.  */
7194   emit_label (csloop);
7195   emit_move_insn (new_rtx, cmp);
7196 
7197   /* Patch new with val at correct position.  */
7198   switch (code)
7199     {
7200     case PLUS:
7201     case MINUS:
7202       val = expand_simple_binop (SImode, code, new_rtx, orig,
7203 				 NULL_RTX, 1, OPTAB_DIRECT);
7204       val = expand_simple_binop (SImode, AND, val, ac.modemask,
7205 				 NULL_RTX, 1, OPTAB_DIRECT);
7206       /* FALLTHRU */
7207     case SET:
7208       if (ac.aligned && MEM_P (val))
7209 	store_bit_field (new_rtx, GET_MODE_BITSIZE (mode), 0,
7210 			 0, 0, SImode, val, false);
7211       else
7212 	{
7213 	  new_rtx = expand_simple_binop (SImode, AND, new_rtx, ac.modemaski,
7214 				     NULL_RTX, 1, OPTAB_DIRECT);
7215 	  new_rtx = expand_simple_binop (SImode, IOR, new_rtx, val,
7216 				     NULL_RTX, 1, OPTAB_DIRECT);
7217 	}
7218       break;
7219     case AND:
7220     case IOR:
7221     case XOR:
7222       new_rtx = expand_simple_binop (SImode, code, new_rtx, val,
7223 				 NULL_RTX, 1, OPTAB_DIRECT);
7224       break;
7225     case MULT: /* NAND */
7226       new_rtx = expand_simple_binop (SImode, AND, new_rtx, val,
7227 				 NULL_RTX, 1, OPTAB_DIRECT);
7228       new_rtx = expand_simple_binop (SImode, XOR, new_rtx, ac.modemask,
7229 				 NULL_RTX, 1, OPTAB_DIRECT);
7230       break;
7231     default:
7232       gcc_unreachable ();
7233     }
7234 
7235   s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, cmp,
7236 						      ac.memsi, cmp, new_rtx,
7237 						      CCZ1mode));
7238 
7239   /* Return the correct part of the bitfield.  */
7240   if (target)
7241     convert_move (target, expand_simple_binop (SImode, LSHIFTRT,
7242 					       after ? new_rtx : cmp, ac.shift,
7243 					       NULL_RTX, 1, OPTAB_DIRECT), 1);
7244 }
7245 
7246 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7247    We need to emit DTP-relative relocations.  */
7248 
7249 static void s390_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
7250 
7251 static void
s390_output_dwarf_dtprel(FILE * file,int size,rtx x)7252 s390_output_dwarf_dtprel (FILE *file, int size, rtx x)
7253 {
7254   switch (size)
7255     {
7256     case 4:
7257       fputs ("\t.long\t", file);
7258       break;
7259     case 8:
7260       fputs ("\t.quad\t", file);
7261       break;
7262     default:
7263       gcc_unreachable ();
7264     }
7265   output_addr_const (file, x);
7266   fputs ("@DTPOFF", file);
7267 }
7268 
7269 /* Return the proper mode for REGNO being represented in the dwarf
7270    unwind table.  */
7271 machine_mode
s390_dwarf_frame_reg_mode(int regno)7272 s390_dwarf_frame_reg_mode (int regno)
7273 {
7274   machine_mode save_mode = default_dwarf_frame_reg_mode (regno);
7275 
7276   /* Make sure not to return DImode for any GPR with -m31 -mzarch.  */
7277   if (GENERAL_REGNO_P (regno))
7278     save_mode = Pmode;
7279 
7280   /* The rightmost 64 bits of vector registers are call-clobbered.  */
7281   if (GET_MODE_SIZE (save_mode) > 8)
7282     save_mode = DImode;
7283 
7284   return save_mode;
7285 }
7286 
7287 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
7288 /* Implement TARGET_MANGLE_TYPE.  */
7289 
7290 static const char *
s390_mangle_type(const_tree type)7291 s390_mangle_type (const_tree type)
7292 {
7293   type = TYPE_MAIN_VARIANT (type);
7294 
7295   if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
7296       && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
7297     return NULL;
7298 
7299   if (type == s390_builtin_types[BT_BV16QI]) return "U6__boolc";
7300   if (type == s390_builtin_types[BT_BV8HI]) return "U6__bools";
7301   if (type == s390_builtin_types[BT_BV4SI]) return "U6__booli";
7302   if (type == s390_builtin_types[BT_BV2DI]) return "U6__booll";
7303 
7304   if (TYPE_MAIN_VARIANT (type) == long_double_type_node
7305       && TARGET_LONG_DOUBLE_128)
7306     return "g";
7307 
7308   /* For all other types, use normal C++ mangling.  */
7309   return NULL;
7310 }
7311 #endif
7312 
7313 /* In the name of slightly smaller debug output, and to cater to
7314    general assembler lossage, recognize various UNSPEC sequences
7315    and turn them back into a direct symbol reference.  */
7316 
7317 static rtx
s390_delegitimize_address(rtx orig_x)7318 s390_delegitimize_address (rtx orig_x)
7319 {
7320   rtx x, y;
7321 
7322   orig_x = delegitimize_mem_from_attrs (orig_x);
7323   x = orig_x;
7324 
7325   /* Extract the symbol ref from:
7326      (plus:SI (reg:SI 12 %r12)
7327               (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
7328 	                            UNSPEC_GOTOFF/PLTOFF)))
7329      and
7330      (plus:SI (reg:SI 12 %r12)
7331               (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
7332                                              UNSPEC_GOTOFF/PLTOFF)
7333 				 (const_int 4 [0x4]))))  */
7334   if (GET_CODE (x) == PLUS
7335       && REG_P (XEXP (x, 0))
7336       && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
7337       && GET_CODE (XEXP (x, 1)) == CONST)
7338     {
7339       HOST_WIDE_INT offset = 0;
7340 
7341       /* The const operand.  */
7342       y = XEXP (XEXP (x, 1), 0);
7343 
7344       if (GET_CODE (y) == PLUS
7345 	  && GET_CODE (XEXP (y, 1)) == CONST_INT)
7346 	{
7347 	  offset = INTVAL (XEXP (y, 1));
7348 	  y = XEXP (y, 0);
7349 	}
7350 
7351       if (GET_CODE (y) == UNSPEC
7352 	  && (XINT (y, 1) == UNSPEC_GOTOFF
7353 	      || XINT (y, 1) == UNSPEC_PLTOFF))
7354 	return plus_constant (Pmode, XVECEXP (y, 0, 0), offset);
7355     }
7356 
7357   if (GET_CODE (x) != MEM)
7358     return orig_x;
7359 
7360   x = XEXP (x, 0);
7361   if (GET_CODE (x) == PLUS
7362       && GET_CODE (XEXP (x, 1)) == CONST
7363       && GET_CODE (XEXP (x, 0)) == REG
7364       && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7365     {
7366       y = XEXP (XEXP (x, 1), 0);
7367       if (GET_CODE (y) == UNSPEC
7368 	  && XINT (y, 1) == UNSPEC_GOT)
7369 	y = XVECEXP (y, 0, 0);
7370       else
7371 	return orig_x;
7372     }
7373   else if (GET_CODE (x) == CONST)
7374     {
7375       /* Extract the symbol ref from:
7376 	 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
7377 	                               UNSPEC_PLT/GOTENT)))  */
7378 
7379       y = XEXP (x, 0);
7380       if (GET_CODE (y) == UNSPEC
7381 	  && (XINT (y, 1) == UNSPEC_GOTENT
7382 	      || XINT (y, 1) == UNSPEC_PLT))
7383 	y = XVECEXP (y, 0, 0);
7384       else
7385 	return orig_x;
7386     }
7387   else
7388     return orig_x;
7389 
7390   if (GET_MODE (orig_x) != Pmode)
7391     {
7392       if (GET_MODE (orig_x) == BLKmode)
7393 	return orig_x;
7394       y = lowpart_subreg (GET_MODE (orig_x), y, Pmode);
7395       if (y == NULL_RTX)
7396 	return orig_x;
7397     }
7398   return y;
7399 }
7400 
7401 /* Output operand OP to stdio stream FILE.
7402    OP is an address (register + offset) which is not used to address data;
7403    instead the rightmost bits are interpreted as the value.  */
7404 
7405 static void
print_addrstyle_operand(FILE * file,rtx op)7406 print_addrstyle_operand (FILE *file, rtx op)
7407 {
7408   HOST_WIDE_INT offset;
7409   rtx base;
7410 
7411   /* Extract base register and offset.  */
7412   if (!s390_decompose_addrstyle_without_index (op, &base, &offset))
7413     gcc_unreachable ();
7414 
7415   /* Sanity check.  */
7416   if (base)
7417     {
7418       gcc_assert (GET_CODE (base) == REG);
7419       gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER);
7420       gcc_assert (REGNO_REG_CLASS (REGNO (base)) == ADDR_REGS);
7421     }
7422 
7423   /* Offsets are constricted to twelve bits.  */
7424   fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset & ((1 << 12) - 1));
7425   if (base)
7426     fprintf (file, "(%s)", reg_names[REGNO (base)]);
7427 }
7428 
7429 /* Assigns the number of NOP halfwords to be emitted before and after the
7430    function label to *HW_BEFORE and *HW_AFTER.  Both pointers must not be NULL.
7431    If hotpatching is disabled for the function, the values are set to zero.
7432 */
7433 
7434 static void
s390_function_num_hotpatch_hw(tree decl,int * hw_before,int * hw_after)7435 s390_function_num_hotpatch_hw (tree decl,
7436 			       int *hw_before,
7437 			       int *hw_after)
7438 {
7439   tree attr;
7440 
7441   attr = lookup_attribute ("hotpatch", DECL_ATTRIBUTES (decl));
7442 
7443   /* Handle the arguments of the hotpatch attribute.  The values
7444      specified via attribute might override the cmdline argument
7445      values.  */
7446   if (attr)
7447     {
7448       tree args = TREE_VALUE (attr);
7449 
7450       *hw_before = TREE_INT_CST_LOW (TREE_VALUE (args));
7451       *hw_after = TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (args)));
7452     }
7453   else
7454     {
7455       /* Use the values specified by the cmdline arguments.  */
7456       *hw_before = s390_hotpatch_hw_before_label;
7457       *hw_after = s390_hotpatch_hw_after_label;
7458     }
7459 }
7460 
7461 /* Write the current .machine and .machinemode specification to the assembler
7462    file.  */
7463 
7464 #ifdef HAVE_AS_MACHINE_MACHINEMODE
7465 static void
s390_asm_output_machine_for_arch(FILE * asm_out_file)7466 s390_asm_output_machine_for_arch (FILE *asm_out_file)
7467 {
7468   fprintf (asm_out_file, "\t.machinemode %s\n",
7469 	   (TARGET_ZARCH) ? "zarch" : "esa");
7470   fprintf (asm_out_file, "\t.machine \"%s",
7471 	   processor_table[s390_arch].binutils_name);
7472   if (S390_USE_ARCHITECTURE_MODIFIERS)
7473     {
7474       int cpu_flags;
7475 
7476       cpu_flags = processor_flags_table[(int) s390_arch];
7477       if (TARGET_HTM && !(cpu_flags & PF_TX))
7478 	fprintf (asm_out_file, "+htm");
7479       else if (!TARGET_HTM && (cpu_flags & PF_TX))
7480 	fprintf (asm_out_file, "+nohtm");
7481       if (TARGET_VX && !(cpu_flags & PF_VX))
7482 	fprintf (asm_out_file, "+vx");
7483       else if (!TARGET_VX && (cpu_flags & PF_VX))
7484 	fprintf (asm_out_file, "+novx");
7485     }
7486   fprintf (asm_out_file, "\"\n");
7487 }
7488 
7489 /* Write an extra function header before the very start of the function.  */
7490 
7491 void
s390_asm_output_function_prefix(FILE * asm_out_file,const char * fnname ATTRIBUTE_UNUSED)7492 s390_asm_output_function_prefix (FILE *asm_out_file,
7493 				 const char *fnname ATTRIBUTE_UNUSED)
7494 {
7495   if (DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl) == NULL)
7496     return;
7497   /* Since only the function specific options are saved but not the indications
7498      which options are set, it's too much work here to figure out which options
7499      have actually changed.  Thus, generate .machine and .machinemode whenever a
7500      function has the target attribute or pragma.  */
7501   fprintf (asm_out_file, "\t.machinemode push\n");
7502   fprintf (asm_out_file, "\t.machine push\n");
7503   s390_asm_output_machine_for_arch (asm_out_file);
7504 }
7505 
7506 /* Write an extra function footer after the very end of the function.  */
7507 
7508 void
s390_asm_declare_function_size(FILE * asm_out_file,const char * fnname,tree decl)7509 s390_asm_declare_function_size (FILE *asm_out_file,
7510 				const char *fnname, tree decl)
7511 {
7512   if (!flag_inhibit_size_directive)
7513     ASM_OUTPUT_MEASURED_SIZE (asm_out_file, fnname);
7514   if (DECL_FUNCTION_SPECIFIC_TARGET (decl) == NULL)
7515     return;
7516   fprintf (asm_out_file, "\t.machine pop\n");
7517   fprintf (asm_out_file, "\t.machinemode pop\n");
7518 }
7519 #endif
7520 
7521 /* Write the extra assembler code needed to declare a function properly.  */
7522 
7523 void
s390_asm_output_function_label(FILE * asm_out_file,const char * fname,tree decl)7524 s390_asm_output_function_label (FILE *asm_out_file, const char *fname,
7525 				tree decl)
7526 {
7527   int hw_before, hw_after;
7528 
7529   s390_function_num_hotpatch_hw (decl, &hw_before, &hw_after);
7530   if (hw_before > 0)
7531     {
7532       unsigned int function_alignment;
7533       int i;
7534 
7535       /* Add a trampoline code area before the function label and initialize it
7536 	 with two-byte nop instructions.  This area can be overwritten with code
7537 	 that jumps to a patched version of the function.  */
7538       asm_fprintf (asm_out_file, "\tnopr\t%%r0"
7539 		   "\t# pre-label NOPs for hotpatch (%d halfwords)\n",
7540 		   hw_before);
7541       for (i = 1; i < hw_before; i++)
7542 	fputs ("\tnopr\t%r0\n", asm_out_file);
7543 
7544       /* Note:  The function label must be aligned so that (a) the bytes of the
7545 	 following nop do not cross a cacheline boundary, and (b) a jump address
7546 	 (eight bytes for 64 bit targets, 4 bytes for 32 bit targets) can be
7547 	 stored directly before the label without crossing a cacheline
7548 	 boundary.  All this is necessary to make sure the trampoline code can
7549 	 be changed atomically.
7550 	 This alignment is done automatically using the FOUNCTION_BOUNDARY, but
7551 	 if there are NOPs before the function label, the alignment is placed
7552 	 before them.  So it is necessary to duplicate the alignment after the
7553 	 NOPs.  */
7554       function_alignment = MAX (8, DECL_ALIGN (decl) / BITS_PER_UNIT);
7555       if (! DECL_USER_ALIGN (decl))
7556 	function_alignment = MAX (function_alignment,
7557 				  (unsigned int) align_functions);
7558       fputs ("\t# alignment for hotpatch\n", asm_out_file);
7559       ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (function_alignment));
7560     }
7561 
7562   if (S390_USE_TARGET_ATTRIBUTE && TARGET_DEBUG_ARG)
7563     {
7564       asm_fprintf (asm_out_file, "\t# fn:%s ar%d\n", fname, s390_arch);
7565       asm_fprintf (asm_out_file, "\t# fn:%s tu%d\n", fname, s390_tune);
7566       asm_fprintf (asm_out_file, "\t# fn:%s sg%d\n", fname, s390_stack_guard);
7567       asm_fprintf (asm_out_file, "\t# fn:%s ss%d\n", fname, s390_stack_size);
7568       asm_fprintf (asm_out_file, "\t# fn:%s bc%d\n", fname, s390_branch_cost);
7569       asm_fprintf (asm_out_file, "\t# fn:%s wf%d\n", fname,
7570 		   s390_warn_framesize);
7571       asm_fprintf (asm_out_file, "\t# fn:%s ba%d\n", fname, TARGET_BACKCHAIN);
7572       asm_fprintf (asm_out_file, "\t# fn:%s hd%d\n", fname, TARGET_HARD_DFP);
7573       asm_fprintf (asm_out_file, "\t# fn:%s hf%d\n", fname, !TARGET_SOFT_FLOAT);
7574       asm_fprintf (asm_out_file, "\t# fn:%s ht%d\n", fname, TARGET_OPT_HTM);
7575       asm_fprintf (asm_out_file, "\t# fn:%s vx%d\n", fname, TARGET_OPT_VX);
7576       asm_fprintf (asm_out_file, "\t# fn:%s ps%d\n", fname,
7577 		   TARGET_PACKED_STACK);
7578       asm_fprintf (asm_out_file, "\t# fn:%s se%d\n", fname, TARGET_SMALL_EXEC);
7579       asm_fprintf (asm_out_file, "\t# fn:%s mv%d\n", fname, TARGET_MVCLE);
7580       asm_fprintf (asm_out_file, "\t# fn:%s zv%d\n", fname, TARGET_ZVECTOR);
7581       asm_fprintf (asm_out_file, "\t# fn:%s wd%d\n", fname,
7582 		   s390_warn_dynamicstack_p);
7583     }
7584   ASM_OUTPUT_LABEL (asm_out_file, fname);
7585   if (hw_after > 0)
7586     asm_fprintf (asm_out_file,
7587 		 "\t# post-label NOPs for hotpatch (%d halfwords)\n",
7588 		 hw_after);
7589 }
7590 
7591 /* Output machine-dependent UNSPECs occurring in address constant X
7592    in assembler syntax to stdio stream FILE.  Returns true if the
7593    constant X could be recognized, false otherwise.  */
7594 
7595 static bool
s390_output_addr_const_extra(FILE * file,rtx x)7596 s390_output_addr_const_extra (FILE *file, rtx x)
7597 {
7598   if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1)
7599     switch (XINT (x, 1))
7600       {
7601       case UNSPEC_GOTENT:
7602 	output_addr_const (file, XVECEXP (x, 0, 0));
7603 	fprintf (file, "@GOTENT");
7604 	return true;
7605       case UNSPEC_GOT:
7606 	output_addr_const (file, XVECEXP (x, 0, 0));
7607 	fprintf (file, "@GOT");
7608 	return true;
7609       case UNSPEC_GOTOFF:
7610 	output_addr_const (file, XVECEXP (x, 0, 0));
7611 	fprintf (file, "@GOTOFF");
7612 	return true;
7613       case UNSPEC_PLT:
7614 	output_addr_const (file, XVECEXP (x, 0, 0));
7615 	fprintf (file, "@PLT");
7616 	return true;
7617       case UNSPEC_PLTOFF:
7618 	output_addr_const (file, XVECEXP (x, 0, 0));
7619 	fprintf (file, "@PLTOFF");
7620 	return true;
7621       case UNSPEC_TLSGD:
7622 	output_addr_const (file, XVECEXP (x, 0, 0));
7623 	fprintf (file, "@TLSGD");
7624 	return true;
7625       case UNSPEC_TLSLDM:
7626 	assemble_name (file, get_some_local_dynamic_name ());
7627 	fprintf (file, "@TLSLDM");
7628 	return true;
7629       case UNSPEC_DTPOFF:
7630 	output_addr_const (file, XVECEXP (x, 0, 0));
7631 	fprintf (file, "@DTPOFF");
7632 	return true;
7633       case UNSPEC_NTPOFF:
7634 	output_addr_const (file, XVECEXP (x, 0, 0));
7635 	fprintf (file, "@NTPOFF");
7636 	return true;
7637       case UNSPEC_GOTNTPOFF:
7638 	output_addr_const (file, XVECEXP (x, 0, 0));
7639 	fprintf (file, "@GOTNTPOFF");
7640 	return true;
7641       case UNSPEC_INDNTPOFF:
7642 	output_addr_const (file, XVECEXP (x, 0, 0));
7643 	fprintf (file, "@INDNTPOFF");
7644 	return true;
7645       }
7646 
7647   if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 2)
7648     switch (XINT (x, 1))
7649       {
7650       case UNSPEC_POOL_OFFSET:
7651 	x = gen_rtx_MINUS (GET_MODE (x), XVECEXP (x, 0, 0), XVECEXP (x, 0, 1));
7652 	output_addr_const (file, x);
7653 	return true;
7654       }
7655   return false;
7656 }
7657 
7658 /* Output address operand ADDR in assembler syntax to
7659    stdio stream FILE.  */
7660 
7661 void
print_operand_address(FILE * file,rtx addr)7662 print_operand_address (FILE *file, rtx addr)
7663 {
7664   struct s390_address ad;
7665   memset (&ad, 0, sizeof (s390_address));
7666 
7667   if (s390_loadrelative_operand_p (addr, NULL, NULL))
7668     {
7669       if (!TARGET_Z10)
7670 	{
7671 	  output_operand_lossage ("symbolic memory references are "
7672 				  "only supported on z10 or later");
7673 	  return;
7674 	}
7675       output_addr_const (file, addr);
7676       return;
7677     }
7678 
7679   if (!s390_decompose_address (addr, &ad)
7680       || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7681       || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
7682     output_operand_lossage ("cannot decompose address");
7683 
7684   if (ad.disp)
7685     output_addr_const (file, ad.disp);
7686   else
7687     fprintf (file, "0");
7688 
7689   if (ad.base && ad.indx)
7690     fprintf (file, "(%s,%s)", reg_names[REGNO (ad.indx)],
7691                               reg_names[REGNO (ad.base)]);
7692   else if (ad.base)
7693     fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7694 }
7695 
7696 /* Output operand X in assembler syntax to stdio stream FILE.
7697    CODE specified the format flag.  The following format flags
7698    are recognized:
7699 
7700     'C': print opcode suffix for branch condition.
7701     'D': print opcode suffix for inverse branch condition.
7702     'E': print opcode suffix for branch on index instruction.
7703     'G': print the size of the operand in bytes.
7704     'J': print tls_load/tls_gdcall/tls_ldcall suffix
7705     'M': print the second word of a TImode operand.
7706     'N': print the second word of a DImode operand.
7707     'O': print only the displacement of a memory reference or address.
7708     'R': print only the base register of a memory reference or address.
7709     'S': print S-type memory reference (base+displacement).
7710     'Y': print address style operand without index (e.g. shift count or setmem
7711 	 operand).
7712 
7713     'b': print integer X as if it's an unsigned byte.
7714     'c': print integer X as if it's an signed byte.
7715     'e': "end" contiguous bitmask X in either DImode or vector inner mode.
7716     'f': "end" contiguous bitmask X in SImode.
7717     'h': print integer X as if it's a signed halfword.
7718     'i': print the first nonzero HImode part of X.
7719     'j': print the first HImode part unequal to -1 of X.
7720     'k': print the first nonzero SImode part of X.
7721     'm': print the first SImode part unequal to -1 of X.
7722     'o': print integer X as if it's an unsigned 32bit word.
7723     's': "start" of contiguous bitmask X in either DImode or vector inner mode.
7724     't': CONST_INT: "start" of contiguous bitmask X in SImode.
7725          CONST_VECTOR: Generate a bitmask for vgbm instruction.
7726     'x': print integer X as if it's an unsigned halfword.
7727     'v': print register number as vector register (v1 instead of f1).
7728 */
7729 
7730 void
print_operand(FILE * file,rtx x,int code)7731 print_operand (FILE *file, rtx x, int code)
7732 {
7733   HOST_WIDE_INT ival;
7734 
7735   switch (code)
7736     {
7737     case 'C':
7738       fprintf (file, s390_branch_condition_mnemonic (x, FALSE));
7739       return;
7740 
7741     case 'D':
7742       fprintf (file, s390_branch_condition_mnemonic (x, TRUE));
7743       return;
7744 
7745     case 'E':
7746       if (GET_CODE (x) == LE)
7747 	fprintf (file, "l");
7748       else if (GET_CODE (x) == GT)
7749 	fprintf (file, "h");
7750       else
7751 	output_operand_lossage ("invalid comparison operator "
7752 				"for 'E' output modifier");
7753       return;
7754 
7755     case 'J':
7756       if (GET_CODE (x) == SYMBOL_REF)
7757 	{
7758 	  fprintf (file, "%s", ":tls_load:");
7759 	  output_addr_const (file, x);
7760 	}
7761       else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
7762 	{
7763 	  fprintf (file, "%s", ":tls_gdcall:");
7764 	  output_addr_const (file, XVECEXP (x, 0, 0));
7765 	}
7766       else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM)
7767 	{
7768 	  fprintf (file, "%s", ":tls_ldcall:");
7769 	  const char *name = get_some_local_dynamic_name ();
7770 	  gcc_assert (name);
7771 	  assemble_name (file, name);
7772 	}
7773       else
7774 	output_operand_lossage ("invalid reference for 'J' output modifier");
7775       return;
7776 
7777     case 'G':
7778       fprintf (file, "%u", GET_MODE_SIZE (GET_MODE (x)));
7779       return;
7780 
7781     case 'O':
7782       {
7783         struct s390_address ad;
7784 	int ret;
7785 
7786 	ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7787 
7788 	if (!ret
7789 	    || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7790 	    || ad.indx)
7791 	  {
7792 	    output_operand_lossage ("invalid address for 'O' output modifier");
7793 	    return;
7794 	  }
7795 
7796         if (ad.disp)
7797           output_addr_const (file, ad.disp);
7798         else
7799           fprintf (file, "0");
7800       }
7801       return;
7802 
7803     case 'R':
7804       {
7805         struct s390_address ad;
7806 	int ret;
7807 
7808 	ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7809 
7810 	if (!ret
7811 	    || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7812 	    || ad.indx)
7813 	  {
7814 	    output_operand_lossage ("invalid address for 'R' output modifier");
7815 	    return;
7816 	  }
7817 
7818         if (ad.base)
7819           fprintf (file, "%s", reg_names[REGNO (ad.base)]);
7820         else
7821           fprintf (file, "0");
7822       }
7823       return;
7824 
7825     case 'S':
7826       {
7827 	struct s390_address ad;
7828 	int ret;
7829 
7830 	if (!MEM_P (x))
7831 	  {
7832 	    output_operand_lossage ("memory reference expected for "
7833 				    "'S' output modifier");
7834 	    return;
7835 	  }
7836 	ret = s390_decompose_address (XEXP (x, 0), &ad);
7837 
7838 	if (!ret
7839 	    || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7840 	    || ad.indx)
7841 	  {
7842 	    output_operand_lossage ("invalid address for 'S' output modifier");
7843 	    return;
7844 	  }
7845 
7846 	if (ad.disp)
7847 	  output_addr_const (file, ad.disp);
7848 	else
7849 	  fprintf (file, "0");
7850 
7851 	if (ad.base)
7852 	  fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7853       }
7854       return;
7855 
7856     case 'N':
7857       if (GET_CODE (x) == REG)
7858 	x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7859       else if (GET_CODE (x) == MEM)
7860 	x = change_address (x, VOIDmode,
7861 			    plus_constant (Pmode, XEXP (x, 0), 4));
7862       else
7863 	output_operand_lossage ("register or memory expression expected "
7864 				"for 'N' output modifier");
7865       break;
7866 
7867     case 'M':
7868       if (GET_CODE (x) == REG)
7869 	x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7870       else if (GET_CODE (x) == MEM)
7871 	x = change_address (x, VOIDmode,
7872 			    plus_constant (Pmode, XEXP (x, 0), 8));
7873       else
7874 	output_operand_lossage ("register or memory expression expected "
7875 				"for 'M' output modifier");
7876       break;
7877 
7878     case 'Y':
7879       print_addrstyle_operand (file, x);
7880       return;
7881     }
7882 
7883   switch (GET_CODE (x))
7884     {
7885     case REG:
7886       /* Print FP regs as fx instead of vx when they are accessed
7887 	 through non-vector mode.  */
7888       if (code == 'v'
7889 	  || VECTOR_NOFP_REG_P (x)
7890 	  || (FP_REG_P (x) && VECTOR_MODE_P (GET_MODE (x)))
7891 	  || (VECTOR_REG_P (x)
7892 	      && (GET_MODE_SIZE (GET_MODE (x)) /
7893 		  s390_class_max_nregs (FP_REGS, GET_MODE (x))) > 8))
7894 	fprintf (file, "%%v%s", reg_names[REGNO (x)] + 2);
7895       else
7896 	fprintf (file, "%s", reg_names[REGNO (x)]);
7897       break;
7898 
7899     case MEM:
7900       output_address (GET_MODE (x), XEXP (x, 0));
7901       break;
7902 
7903     case CONST:
7904     case CODE_LABEL:
7905     case LABEL_REF:
7906     case SYMBOL_REF:
7907       output_addr_const (file, x);
7908       break;
7909 
7910     case CONST_INT:
7911       ival = INTVAL (x);
7912       switch (code)
7913 	{
7914 	case 0:
7915 	  break;
7916 	case 'b':
7917 	  ival &= 0xff;
7918 	  break;
7919 	case 'c':
7920 	  ival = ((ival & 0xff) ^ 0x80) - 0x80;
7921 	  break;
7922 	case 'x':
7923 	  ival &= 0xffff;
7924 	  break;
7925 	case 'h':
7926 	  ival = ((ival & 0xffff) ^ 0x8000) - 0x8000;
7927 	  break;
7928 	case 'i':
7929 	  ival = s390_extract_part (x, HImode, 0);
7930 	  break;
7931 	case 'j':
7932 	  ival = s390_extract_part (x, HImode, -1);
7933 	  break;
7934 	case 'k':
7935 	  ival = s390_extract_part (x, SImode, 0);
7936 	  break;
7937 	case 'm':
7938 	  ival = s390_extract_part (x, SImode, -1);
7939 	  break;
7940 	case 'o':
7941 	  ival &= 0xffffffff;
7942 	  break;
7943 	case 'e': case 'f':
7944 	case 's': case 't':
7945 	  {
7946 	    int start, end;
7947 	    int len;
7948 	    bool ok;
7949 
7950 	    len = (code == 's' || code == 'e' ? 64 : 32);
7951 	    ok = s390_contiguous_bitmask_p (ival, true, len, &start, &end);
7952 	    gcc_assert (ok);
7953 	    if (code == 's' || code == 't')
7954 	      ival = start;
7955 	    else
7956 	      ival = end;
7957 	  }
7958 	  break;
7959 	default:
7960 	  output_operand_lossage ("invalid constant for output modifier '%c'", code);
7961 	}
7962       fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
7963       break;
7964 
7965     case CONST_WIDE_INT:
7966       if (code == 'b')
7967         fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7968 		 CONST_WIDE_INT_ELT (x, 0) & 0xff);
7969       else if (code == 'x')
7970         fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7971 		 CONST_WIDE_INT_ELT (x, 0) & 0xffff);
7972       else if (code == 'h')
7973         fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7974 		 ((CONST_WIDE_INT_ELT (x, 0) & 0xffff) ^ 0x8000) - 0x8000);
7975       else
7976 	{
7977 	  if (code == 0)
7978 	    output_operand_lossage ("invalid constant - try using "
7979 				    "an output modifier");
7980 	  else
7981 	    output_operand_lossage ("invalid constant for output modifier '%c'",
7982 				    code);
7983 	}
7984       break;
7985     case CONST_VECTOR:
7986       switch (code)
7987 	{
7988 	case 'h':
7989 	  gcc_assert (const_vec_duplicate_p (x));
7990 	  fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7991 		   ((INTVAL (XVECEXP (x, 0, 0)) & 0xffff) ^ 0x8000) - 0x8000);
7992 	  break;
7993 	case 'e':
7994 	case 's':
7995 	  {
7996 	    int start, end;
7997 	    bool ok;
7998 
7999 	    ok = s390_contiguous_bitmask_vector_p (x, &start, &end);
8000 	    gcc_assert (ok);
8001 	    ival = (code == 's') ? start : end;
8002 	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
8003 	  }
8004 	  break;
8005 	case 't':
8006 	  {
8007 	    unsigned mask;
8008 	    bool ok = s390_bytemask_vector_p (x, &mask);
8009 	    gcc_assert (ok);
8010 	    fprintf (file, "%u", mask);
8011 	  }
8012 	  break;
8013 
8014 	default:
8015 	  output_operand_lossage ("invalid constant vector for output "
8016 				  "modifier '%c'", code);
8017 	}
8018       break;
8019 
8020     default:
8021       if (code == 0)
8022 	output_operand_lossage ("invalid expression - try using "
8023 				"an output modifier");
8024       else
8025 	output_operand_lossage ("invalid expression for output "
8026 				"modifier '%c'", code);
8027       break;
8028     }
8029 }
8030 
8031 /* Target hook for assembling integer objects.  We need to define it
8032    here to work a round a bug in some versions of GAS, which couldn't
8033    handle values smaller than INT_MIN when printed in decimal.  */
8034 
8035 static bool
s390_assemble_integer(rtx x,unsigned int size,int aligned_p)8036 s390_assemble_integer (rtx x, unsigned int size, int aligned_p)
8037 {
8038   if (size == 8 && aligned_p
8039       && GET_CODE (x) == CONST_INT && INTVAL (x) < INT_MIN)
8040     {
8041       fprintf (asm_out_file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n",
8042 	       INTVAL (x));
8043       return true;
8044     }
8045   return default_assemble_integer (x, size, aligned_p);
8046 }
8047 
8048 /* Returns true if register REGNO is used  for forming
8049    a memory address in expression X.  */
8050 
8051 static bool
reg_used_in_mem_p(int regno,rtx x)8052 reg_used_in_mem_p (int regno, rtx x)
8053 {
8054   enum rtx_code code = GET_CODE (x);
8055   int i, j;
8056   const char *fmt;
8057 
8058   if (code == MEM)
8059     {
8060       if (refers_to_regno_p (regno, XEXP (x, 0)))
8061 	return true;
8062     }
8063   else if (code == SET
8064 	   && GET_CODE (SET_DEST (x)) == PC)
8065     {
8066       if (refers_to_regno_p (regno, SET_SRC (x)))
8067 	return true;
8068     }
8069 
8070   fmt = GET_RTX_FORMAT (code);
8071   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8072     {
8073       if (fmt[i] == 'e'
8074 	  && reg_used_in_mem_p (regno, XEXP (x, i)))
8075 	return true;
8076 
8077       else if (fmt[i] == 'E')
8078 	for (j = 0; j < XVECLEN (x, i); j++)
8079 	  if (reg_used_in_mem_p (regno, XVECEXP (x, i, j)))
8080 	    return true;
8081     }
8082   return false;
8083 }
8084 
8085 /* Returns true if expression DEP_RTX sets an address register
8086    used by instruction INSN to address memory.  */
8087 
8088 static bool
addr_generation_dependency_p(rtx dep_rtx,rtx_insn * insn)8089 addr_generation_dependency_p (rtx dep_rtx, rtx_insn *insn)
8090 {
8091   rtx target, pat;
8092 
8093   if (NONJUMP_INSN_P (dep_rtx))
8094     dep_rtx = PATTERN (dep_rtx);
8095 
8096   if (GET_CODE (dep_rtx) == SET)
8097     {
8098       target = SET_DEST (dep_rtx);
8099       if (GET_CODE (target) == STRICT_LOW_PART)
8100 	target = XEXP (target, 0);
8101       while (GET_CODE (target) == SUBREG)
8102 	target = SUBREG_REG (target);
8103 
8104       if (GET_CODE (target) == REG)
8105 	{
8106 	  int regno = REGNO (target);
8107 
8108 	  if (s390_safe_attr_type (insn) == TYPE_LA)
8109 	    {
8110 	      pat = PATTERN (insn);
8111 	      if (GET_CODE (pat) == PARALLEL)
8112 		{
8113 		  gcc_assert (XVECLEN (pat, 0) == 2);
8114 		  pat = XVECEXP (pat, 0, 0);
8115 		}
8116 	      gcc_assert (GET_CODE (pat) == SET);
8117 	      return refers_to_regno_p (regno, SET_SRC (pat));
8118 	    }
8119 	  else if (get_attr_atype (insn) == ATYPE_AGEN)
8120 	    return reg_used_in_mem_p (regno, PATTERN (insn));
8121 	}
8122     }
8123   return false;
8124 }
8125 
8126 /* Return 1, if dep_insn sets register used in insn in the agen unit.  */
8127 
8128 int
s390_agen_dep_p(rtx_insn * dep_insn,rtx_insn * insn)8129 s390_agen_dep_p (rtx_insn *dep_insn, rtx_insn *insn)
8130 {
8131   rtx dep_rtx = PATTERN (dep_insn);
8132   int i;
8133 
8134   if (GET_CODE (dep_rtx) == SET
8135       && addr_generation_dependency_p (dep_rtx, insn))
8136     return 1;
8137   else if (GET_CODE (dep_rtx) == PARALLEL)
8138     {
8139       for (i = 0; i < XVECLEN (dep_rtx, 0); i++)
8140 	{
8141 	  if (addr_generation_dependency_p (XVECEXP (dep_rtx, 0, i), insn))
8142 	    return 1;
8143 	}
8144     }
8145   return 0;
8146 }
8147 
8148 
8149 /* A C statement (sans semicolon) to update the integer scheduling priority
8150    INSN_PRIORITY (INSN).  Increase the priority to execute the INSN earlier,
8151    reduce the priority to execute INSN later.  Do not define this macro if
8152    you do not need to adjust the scheduling priorities of insns.
8153 
8154    A STD instruction should be scheduled earlier,
8155    in order to use the bypass.  */
8156 static int
s390_adjust_priority(rtx_insn * insn,int priority)8157 s390_adjust_priority (rtx_insn *insn, int priority)
8158 {
8159   if (! INSN_P (insn))
8160     return priority;
8161 
8162   if (s390_tune <= PROCESSOR_2064_Z900)
8163     return priority;
8164 
8165   switch (s390_safe_attr_type (insn))
8166     {
8167       case TYPE_FSTOREDF:
8168       case TYPE_FSTORESF:
8169 	priority = priority << 3;
8170 	break;
8171       case TYPE_STORE:
8172       case TYPE_STM:
8173 	priority = priority << 1;
8174 	break;
8175       default:
8176         break;
8177     }
8178   return priority;
8179 }
8180 
8181 
8182 /* The number of instructions that can be issued per cycle.  */
8183 
8184 static int
s390_issue_rate(void)8185 s390_issue_rate (void)
8186 {
8187   switch (s390_tune)
8188     {
8189     case PROCESSOR_2084_Z990:
8190     case PROCESSOR_2094_Z9_109:
8191     case PROCESSOR_2094_Z9_EC:
8192     case PROCESSOR_2817_Z196:
8193       return 3;
8194     case PROCESSOR_2097_Z10:
8195       return 2;
8196     case PROCESSOR_9672_G5:
8197     case PROCESSOR_9672_G6:
8198     case PROCESSOR_2064_Z900:
8199       /* Starting with EC12 we use the sched_reorder hook to take care
8200 	 of instruction dispatch constraints.  The algorithm only
8201 	 picks the best instruction and assumes only a single
8202 	 instruction gets issued per cycle.  */
8203     case PROCESSOR_2827_ZEC12:
8204     case PROCESSOR_2964_Z13:
8205     case PROCESSOR_3906_Z14:
8206     default:
8207       return 1;
8208     }
8209 }
8210 
8211 static int
s390_first_cycle_multipass_dfa_lookahead(void)8212 s390_first_cycle_multipass_dfa_lookahead (void)
8213 {
8214   return 4;
8215 }
8216 
8217 /* Annotate every literal pool reference in X by an UNSPEC_LTREF expression.
8218    Fix up MEMs as required.  */
8219 
8220 static void
annotate_constant_pool_refs(rtx * x)8221 annotate_constant_pool_refs (rtx *x)
8222 {
8223   int i, j;
8224   const char *fmt;
8225 
8226   gcc_assert (GET_CODE (*x) != SYMBOL_REF
8227 	      || !CONSTANT_POOL_ADDRESS_P (*x));
8228 
8229   /* Literal pool references can only occur inside a MEM ...  */
8230   if (GET_CODE (*x) == MEM)
8231     {
8232       rtx memref = XEXP (*x, 0);
8233 
8234       if (GET_CODE (memref) == SYMBOL_REF
8235 	  && CONSTANT_POOL_ADDRESS_P (memref))
8236 	{
8237 	  rtx base = cfun->machine->base_reg;
8238 	  rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, memref, base),
8239 				     UNSPEC_LTREF);
8240 
8241 	  *x = replace_equiv_address (*x, addr);
8242 	  return;
8243 	}
8244 
8245       if (GET_CODE (memref) == CONST
8246 	  && GET_CODE (XEXP (memref, 0)) == PLUS
8247 	  && GET_CODE (XEXP (XEXP (memref, 0), 1)) == CONST_INT
8248 	  && GET_CODE (XEXP (XEXP (memref, 0), 0)) == SYMBOL_REF
8249 	  && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref, 0), 0)))
8250 	{
8251 	  HOST_WIDE_INT off = INTVAL (XEXP (XEXP (memref, 0), 1));
8252 	  rtx sym = XEXP (XEXP (memref, 0), 0);
8253 	  rtx base = cfun->machine->base_reg;
8254 	  rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
8255 				     UNSPEC_LTREF);
8256 
8257 	  *x = replace_equiv_address (*x, plus_constant (Pmode, addr, off));
8258 	  return;
8259 	}
8260     }
8261 
8262   /* ... or a load-address type pattern.  */
8263   if (GET_CODE (*x) == SET)
8264     {
8265       rtx addrref = SET_SRC (*x);
8266 
8267       if (GET_CODE (addrref) == SYMBOL_REF
8268 	  && CONSTANT_POOL_ADDRESS_P (addrref))
8269 	{
8270 	  rtx base = cfun->machine->base_reg;
8271 	  rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addrref, base),
8272 				     UNSPEC_LTREF);
8273 
8274 	  SET_SRC (*x) = addr;
8275 	  return;
8276 	}
8277 
8278       if (GET_CODE (addrref) == CONST
8279 	  && GET_CODE (XEXP (addrref, 0)) == PLUS
8280 	  && GET_CODE (XEXP (XEXP (addrref, 0), 1)) == CONST_INT
8281 	  && GET_CODE (XEXP (XEXP (addrref, 0), 0)) == SYMBOL_REF
8282 	  && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref, 0), 0)))
8283 	{
8284 	  HOST_WIDE_INT off = INTVAL (XEXP (XEXP (addrref, 0), 1));
8285 	  rtx sym = XEXP (XEXP (addrref, 0), 0);
8286 	  rtx base = cfun->machine->base_reg;
8287 	  rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
8288 				     UNSPEC_LTREF);
8289 
8290 	  SET_SRC (*x) = plus_constant (Pmode, addr, off);
8291 	  return;
8292 	}
8293     }
8294 
8295   /* Annotate LTREL_BASE as well.  */
8296   if (GET_CODE (*x) == UNSPEC
8297       && XINT (*x, 1) == UNSPEC_LTREL_BASE)
8298     {
8299       rtx base = cfun->machine->base_reg;
8300       *x = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XVECEXP (*x, 0, 0), base),
8301 				  UNSPEC_LTREL_BASE);
8302       return;
8303     }
8304 
8305   fmt = GET_RTX_FORMAT (GET_CODE (*x));
8306   for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8307     {
8308       if (fmt[i] == 'e')
8309         {
8310           annotate_constant_pool_refs (&XEXP (*x, i));
8311         }
8312       else if (fmt[i] == 'E')
8313         {
8314           for (j = 0; j < XVECLEN (*x, i); j++)
8315             annotate_constant_pool_refs (&XVECEXP (*x, i, j));
8316         }
8317     }
8318 }
8319 
8320 /* Split all branches that exceed the maximum distance.
8321    Returns true if this created a new literal pool entry.  */
8322 
8323 static int
s390_split_branches(void)8324 s390_split_branches (void)
8325 {
8326   rtx temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
8327   int new_literal = 0, ret;
8328   rtx_insn *insn;
8329   rtx pat, target;
8330   rtx *label;
8331 
8332   /* We need correct insn addresses.  */
8333 
8334   shorten_branches (get_insns ());
8335 
8336   /* Find all branches that exceed 64KB, and split them.  */
8337 
8338   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8339     {
8340       if (! JUMP_P (insn) || tablejump_p (insn, NULL, NULL))
8341 	continue;
8342 
8343       pat = PATTERN (insn);
8344       if (GET_CODE (pat) == PARALLEL)
8345 	pat = XVECEXP (pat, 0, 0);
8346       if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
8347 	continue;
8348 
8349       if (GET_CODE (SET_SRC (pat)) == LABEL_REF)
8350 	{
8351 	  label = &SET_SRC (pat);
8352 	}
8353       else if (GET_CODE (SET_SRC (pat)) == IF_THEN_ELSE)
8354 	{
8355 	  if (GET_CODE (XEXP (SET_SRC (pat), 1)) == LABEL_REF)
8356 	    label = &XEXP (SET_SRC (pat), 1);
8357           else if (GET_CODE (XEXP (SET_SRC (pat), 2)) == LABEL_REF)
8358             label = &XEXP (SET_SRC (pat), 2);
8359 	  else
8360 	    continue;
8361         }
8362       else
8363 	continue;
8364 
8365       if (get_attr_length (insn) <= 4)
8366 	continue;
8367 
8368       /* We are going to use the return register as scratch register,
8369 	 make sure it will be saved/restored by the prologue/epilogue.  */
8370       cfun_frame_layout.save_return_addr_p = 1;
8371 
8372       if (!flag_pic)
8373 	{
8374 	  new_literal = 1;
8375 	  rtx mem = force_const_mem (Pmode, *label);
8376 	  rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, mem),
8377 						 insn);
8378 	  INSN_ADDRESSES_NEW (set_insn, -1);
8379 	  annotate_constant_pool_refs (&PATTERN (set_insn));
8380 
8381 	  target = temp_reg;
8382 	}
8383       else
8384 	{
8385 	  new_literal = 1;
8386 	  target = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, *label),
8387 				   UNSPEC_LTREL_OFFSET);
8388 	  target = gen_rtx_CONST (Pmode, target);
8389 	  target = force_const_mem (Pmode, target);
8390 	  rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, target),
8391 						 insn);
8392 	  INSN_ADDRESSES_NEW (set_insn, -1);
8393 	  annotate_constant_pool_refs (&PATTERN (set_insn));
8394 
8395           target = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XEXP (target, 0),
8396 							cfun->machine->base_reg),
8397 				   UNSPEC_LTREL_BASE);
8398 	  target = gen_rtx_PLUS (Pmode, temp_reg, target);
8399 	}
8400 
8401       ret = validate_change (insn, label, target, 0);
8402       gcc_assert (ret);
8403     }
8404 
8405   return new_literal;
8406 }
8407 
8408 
8409 /* Find an annotated literal pool symbol referenced in RTX X,
8410    and store it at REF.  Will abort if X contains references to
8411    more than one such pool symbol; multiple references to the same
8412    symbol are allowed, however.
8413 
8414    The rtx pointed to by REF must be initialized to NULL_RTX
8415    by the caller before calling this routine.  */
8416 
8417 static void
find_constant_pool_ref(rtx x,rtx * ref)8418 find_constant_pool_ref (rtx x, rtx *ref)
8419 {
8420   int i, j;
8421   const char *fmt;
8422 
8423   /* Ignore LTREL_BASE references.  */
8424   if (GET_CODE (x) == UNSPEC
8425       && XINT (x, 1) == UNSPEC_LTREL_BASE)
8426     return;
8427   /* Likewise POOL_ENTRY insns.  */
8428   if (GET_CODE (x) == UNSPEC_VOLATILE
8429       && XINT (x, 1) == UNSPECV_POOL_ENTRY)
8430     return;
8431 
8432   gcc_assert (GET_CODE (x) != SYMBOL_REF
8433               || !CONSTANT_POOL_ADDRESS_P (x));
8434 
8435   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_LTREF)
8436     {
8437       rtx sym = XVECEXP (x, 0, 0);
8438       gcc_assert (GET_CODE (sym) == SYMBOL_REF
8439 	          && CONSTANT_POOL_ADDRESS_P (sym));
8440 
8441       if (*ref == NULL_RTX)
8442 	*ref = sym;
8443       else
8444 	gcc_assert (*ref == sym);
8445 
8446       return;
8447     }
8448 
8449   fmt = GET_RTX_FORMAT (GET_CODE (x));
8450   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8451     {
8452       if (fmt[i] == 'e')
8453         {
8454           find_constant_pool_ref (XEXP (x, i), ref);
8455         }
8456       else if (fmt[i] == 'E')
8457         {
8458           for (j = 0; j < XVECLEN (x, i); j++)
8459             find_constant_pool_ref (XVECEXP (x, i, j), ref);
8460         }
8461     }
8462 }
8463 
8464 /* Replace every reference to the annotated literal pool
8465    symbol REF in X by its base plus OFFSET.  */
8466 
8467 static void
replace_constant_pool_ref(rtx * x,rtx ref,rtx offset)8468 replace_constant_pool_ref (rtx *x, rtx ref, rtx offset)
8469 {
8470   int i, j;
8471   const char *fmt;
8472 
8473   gcc_assert (*x != ref);
8474 
8475   if (GET_CODE (*x) == UNSPEC
8476       && XINT (*x, 1) == UNSPEC_LTREF
8477       && XVECEXP (*x, 0, 0) == ref)
8478     {
8479       *x = gen_rtx_PLUS (Pmode, XVECEXP (*x, 0, 1), offset);
8480       return;
8481     }
8482 
8483   if (GET_CODE (*x) == PLUS
8484       && GET_CODE (XEXP (*x, 1)) == CONST_INT
8485       && GET_CODE (XEXP (*x, 0)) == UNSPEC
8486       && XINT (XEXP (*x, 0), 1) == UNSPEC_LTREF
8487       && XVECEXP (XEXP (*x, 0), 0, 0) == ref)
8488     {
8489       rtx addr = gen_rtx_PLUS (Pmode, XVECEXP (XEXP (*x, 0), 0, 1), offset);
8490       *x = plus_constant (Pmode, addr, INTVAL (XEXP (*x, 1)));
8491       return;
8492     }
8493 
8494   fmt = GET_RTX_FORMAT (GET_CODE (*x));
8495   for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8496     {
8497       if (fmt[i] == 'e')
8498         {
8499           replace_constant_pool_ref (&XEXP (*x, i), ref, offset);
8500         }
8501       else if (fmt[i] == 'E')
8502         {
8503           for (j = 0; j < XVECLEN (*x, i); j++)
8504             replace_constant_pool_ref (&XVECEXP (*x, i, j), ref, offset);
8505         }
8506     }
8507 }
8508 
8509 /* Check whether X contains an UNSPEC_LTREL_BASE.
8510    Return its constant pool symbol if found, NULL_RTX otherwise.  */
8511 
8512 static rtx
find_ltrel_base(rtx x)8513 find_ltrel_base (rtx x)
8514 {
8515   int i, j;
8516   const char *fmt;
8517 
8518   if (GET_CODE (x) == UNSPEC
8519       && XINT (x, 1) == UNSPEC_LTREL_BASE)
8520     return XVECEXP (x, 0, 0);
8521 
8522   fmt = GET_RTX_FORMAT (GET_CODE (x));
8523   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8524     {
8525       if (fmt[i] == 'e')
8526         {
8527           rtx fnd = find_ltrel_base (XEXP (x, i));
8528 	  if (fnd)
8529 	    return fnd;
8530         }
8531       else if (fmt[i] == 'E')
8532         {
8533           for (j = 0; j < XVECLEN (x, i); j++)
8534 	    {
8535               rtx fnd = find_ltrel_base (XVECEXP (x, i, j));
8536 	      if (fnd)
8537 		return fnd;
8538 	    }
8539         }
8540     }
8541 
8542   return NULL_RTX;
8543 }
8544 
8545 /* Replace any occurrence of UNSPEC_LTREL_BASE in X with its base.  */
8546 
8547 static void
replace_ltrel_base(rtx * x)8548 replace_ltrel_base (rtx *x)
8549 {
8550   int i, j;
8551   const char *fmt;
8552 
8553   if (GET_CODE (*x) == UNSPEC
8554       && XINT (*x, 1) == UNSPEC_LTREL_BASE)
8555     {
8556       *x = XVECEXP (*x, 0, 1);
8557       return;
8558     }
8559 
8560   fmt = GET_RTX_FORMAT (GET_CODE (*x));
8561   for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8562     {
8563       if (fmt[i] == 'e')
8564         {
8565           replace_ltrel_base (&XEXP (*x, i));
8566         }
8567       else if (fmt[i] == 'E')
8568         {
8569           for (j = 0; j < XVECLEN (*x, i); j++)
8570             replace_ltrel_base (&XVECEXP (*x, i, j));
8571         }
8572     }
8573 }
8574 
8575 
8576 /* We keep a list of constants which we have to add to internal
8577    constant tables in the middle of large functions.  */
8578 
8579 #define NR_C_MODES 32
8580 machine_mode constant_modes[NR_C_MODES] =
8581 {
8582   TFmode, TImode, TDmode,
8583   V16QImode, V8HImode, V4SImode, V2DImode, V1TImode,
8584   V4SFmode, V2DFmode, V1TFmode,
8585   DFmode, DImode, DDmode,
8586   V8QImode, V4HImode, V2SImode, V1DImode, V2SFmode, V1DFmode,
8587   SFmode, SImode, SDmode,
8588   V4QImode, V2HImode, V1SImode,  V1SFmode,
8589   HImode,
8590   V2QImode, V1HImode,
8591   QImode,
8592   V1QImode
8593 };
8594 
8595 struct constant
8596 {
8597   struct constant *next;
8598   rtx value;
8599   rtx_code_label *label;
8600 };
8601 
8602 struct constant_pool
8603 {
8604   struct constant_pool *next;
8605   rtx_insn *first_insn;
8606   rtx_insn *pool_insn;
8607   bitmap insns;
8608   rtx_insn *emit_pool_after;
8609 
8610   struct constant *constants[NR_C_MODES];
8611   struct constant *execute;
8612   rtx_code_label *label;
8613   int size;
8614 };
8615 
8616 /* Allocate new constant_pool structure.  */
8617 
8618 static struct constant_pool *
s390_alloc_pool(void)8619 s390_alloc_pool (void)
8620 {
8621   struct constant_pool *pool;
8622   int i;
8623 
8624   pool = (struct constant_pool *) xmalloc (sizeof *pool);
8625   pool->next = NULL;
8626   for (i = 0; i < NR_C_MODES; i++)
8627     pool->constants[i] = NULL;
8628 
8629   pool->execute = NULL;
8630   pool->label = gen_label_rtx ();
8631   pool->first_insn = NULL;
8632   pool->pool_insn = NULL;
8633   pool->insns = BITMAP_ALLOC (NULL);
8634   pool->size = 0;
8635   pool->emit_pool_after = NULL;
8636 
8637   return pool;
8638 }
8639 
8640 /* Create new constant pool covering instructions starting at INSN
8641    and chain it to the end of POOL_LIST.  */
8642 
8643 static struct constant_pool *
s390_start_pool(struct constant_pool ** pool_list,rtx_insn * insn)8644 s390_start_pool (struct constant_pool **pool_list, rtx_insn *insn)
8645 {
8646   struct constant_pool *pool, **prev;
8647 
8648   pool = s390_alloc_pool ();
8649   pool->first_insn = insn;
8650 
8651   for (prev = pool_list; *prev; prev = &(*prev)->next)
8652     ;
8653   *prev = pool;
8654 
8655   return pool;
8656 }
8657 
8658 /* End range of instructions covered by POOL at INSN and emit
8659    placeholder insn representing the pool.  */
8660 
8661 static void
s390_end_pool(struct constant_pool * pool,rtx_insn * insn)8662 s390_end_pool (struct constant_pool *pool, rtx_insn *insn)
8663 {
8664   rtx pool_size = GEN_INT (pool->size + 8 /* alignment slop */);
8665 
8666   if (!insn)
8667     insn = get_last_insn ();
8668 
8669   pool->pool_insn = emit_insn_after (gen_pool (pool_size), insn);
8670   INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8671 }
8672 
8673 /* Add INSN to the list of insns covered by POOL.  */
8674 
8675 static void
s390_add_pool_insn(struct constant_pool * pool,rtx insn)8676 s390_add_pool_insn (struct constant_pool *pool, rtx insn)
8677 {
8678   bitmap_set_bit (pool->insns, INSN_UID (insn));
8679 }
8680 
8681 /* Return pool out of POOL_LIST that covers INSN.  */
8682 
8683 static struct constant_pool *
s390_find_pool(struct constant_pool * pool_list,rtx insn)8684 s390_find_pool (struct constant_pool *pool_list, rtx insn)
8685 {
8686   struct constant_pool *pool;
8687 
8688   for (pool = pool_list; pool; pool = pool->next)
8689     if (bitmap_bit_p (pool->insns, INSN_UID (insn)))
8690       break;
8691 
8692   return pool;
8693 }
8694 
8695 /* Add constant VAL of mode MODE to the constant pool POOL.  */
8696 
8697 static void
s390_add_constant(struct constant_pool * pool,rtx val,machine_mode mode)8698 s390_add_constant (struct constant_pool *pool, rtx val, machine_mode mode)
8699 {
8700   struct constant *c;
8701   int i;
8702 
8703   for (i = 0; i < NR_C_MODES; i++)
8704     if (constant_modes[i] == mode)
8705       break;
8706   gcc_assert (i != NR_C_MODES);
8707 
8708   for (c = pool->constants[i]; c != NULL; c = c->next)
8709     if (rtx_equal_p (val, c->value))
8710       break;
8711 
8712   if (c == NULL)
8713     {
8714       c = (struct constant *) xmalloc (sizeof *c);
8715       c->value = val;
8716       c->label = gen_label_rtx ();
8717       c->next = pool->constants[i];
8718       pool->constants[i] = c;
8719       pool->size += GET_MODE_SIZE (mode);
8720     }
8721 }
8722 
8723 /* Return an rtx that represents the offset of X from the start of
8724    pool POOL.  */
8725 
8726 static rtx
s390_pool_offset(struct constant_pool * pool,rtx x)8727 s390_pool_offset (struct constant_pool *pool, rtx x)
8728 {
8729   rtx label;
8730 
8731   label = gen_rtx_LABEL_REF (GET_MODE (x), pool->label);
8732   x = gen_rtx_UNSPEC (GET_MODE (x), gen_rtvec (2, x, label),
8733 		      UNSPEC_POOL_OFFSET);
8734   return gen_rtx_CONST (GET_MODE (x), x);
8735 }
8736 
8737 /* Find constant VAL of mode MODE in the constant pool POOL.
8738    Return an RTX describing the distance from the start of
8739    the pool to the location of the new constant.  */
8740 
8741 static rtx
s390_find_constant(struct constant_pool * pool,rtx val,machine_mode mode)8742 s390_find_constant (struct constant_pool *pool, rtx val,
8743 		    machine_mode mode)
8744 {
8745   struct constant *c;
8746   int i;
8747 
8748   for (i = 0; i < NR_C_MODES; i++)
8749     if (constant_modes[i] == mode)
8750       break;
8751   gcc_assert (i != NR_C_MODES);
8752 
8753   for (c = pool->constants[i]; c != NULL; c = c->next)
8754     if (rtx_equal_p (val, c->value))
8755       break;
8756 
8757   gcc_assert (c);
8758 
8759   return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8760 }
8761 
8762 /* Check whether INSN is an execute.  Return the label_ref to its
8763    execute target template if so, NULL_RTX otherwise.  */
8764 
8765 static rtx
s390_execute_label(rtx insn)8766 s390_execute_label (rtx insn)
8767 {
8768   if (INSN_P (insn)
8769       && GET_CODE (PATTERN (insn)) == PARALLEL
8770       && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
8771       && (XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE
8772 	  || XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE_JUMP))
8773     {
8774       if (XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE)
8775 	return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2);
8776       else
8777 	{
8778 	  gcc_assert (JUMP_P (insn));
8779 	  /* For jump insns as execute target:
8780 	     - There is one operand less in the parallel (the
8781 	       modification register of the execute is always 0).
8782 	     - The execute target label is wrapped into an
8783 	       if_then_else in order to hide it from jump analysis.  */
8784 	  return XEXP (XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 0), 0);
8785 	}
8786     }
8787 
8788   return NULL_RTX;
8789 }
8790 
8791 /* Add execute target for INSN to the constant pool POOL.  */
8792 
8793 static void
s390_add_execute(struct constant_pool * pool,rtx insn)8794 s390_add_execute (struct constant_pool *pool, rtx insn)
8795 {
8796   struct constant *c;
8797 
8798   for (c = pool->execute; c != NULL; c = c->next)
8799     if (INSN_UID (insn) == INSN_UID (c->value))
8800       break;
8801 
8802   if (c == NULL)
8803     {
8804       c = (struct constant *) xmalloc (sizeof *c);
8805       c->value = insn;
8806       c->label = gen_label_rtx ();
8807       c->next = pool->execute;
8808       pool->execute = c;
8809       pool->size += 6;
8810     }
8811 }
8812 
8813 /* Find execute target for INSN in the constant pool POOL.
8814    Return an RTX describing the distance from the start of
8815    the pool to the location of the execute target.  */
8816 
8817 static rtx
s390_find_execute(struct constant_pool * pool,rtx insn)8818 s390_find_execute (struct constant_pool *pool, rtx insn)
8819 {
8820   struct constant *c;
8821 
8822   for (c = pool->execute; c != NULL; c = c->next)
8823     if (INSN_UID (insn) == INSN_UID (c->value))
8824       break;
8825 
8826   gcc_assert (c);
8827 
8828   return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8829 }
8830 
8831 /* For an execute INSN, extract the execute target template.  */
8832 
8833 static rtx
s390_execute_target(rtx insn)8834 s390_execute_target (rtx insn)
8835 {
8836   rtx pattern = PATTERN (insn);
8837   gcc_assert (s390_execute_label (insn));
8838 
8839   if (XVECLEN (pattern, 0) == 2)
8840     {
8841       pattern = copy_rtx (XVECEXP (pattern, 0, 1));
8842     }
8843   else
8844     {
8845       rtvec vec = rtvec_alloc (XVECLEN (pattern, 0) - 1);
8846       int i;
8847 
8848       for (i = 0; i < XVECLEN (pattern, 0) - 1; i++)
8849 	RTVEC_ELT (vec, i) = copy_rtx (XVECEXP (pattern, 0, i + 1));
8850 
8851       pattern = gen_rtx_PARALLEL (VOIDmode, vec);
8852     }
8853 
8854   return pattern;
8855 }
8856 
8857 /* Indicate that INSN cannot be duplicated.  This is the case for
8858    execute insns that carry a unique label.  */
8859 
8860 static bool
s390_cannot_copy_insn_p(rtx_insn * insn)8861 s390_cannot_copy_insn_p (rtx_insn *insn)
8862 {
8863   rtx label = s390_execute_label (insn);
8864   return label && label != const0_rtx;
8865 }
8866 
8867 /* Dump out the constants in POOL.  If REMOTE_LABEL is true,
8868    do not emit the pool base label.  */
8869 
8870 static void
s390_dump_pool(struct constant_pool * pool,bool remote_label)8871 s390_dump_pool (struct constant_pool *pool, bool remote_label)
8872 {
8873   struct constant *c;
8874   rtx_insn *insn = pool->pool_insn;
8875   int i;
8876 
8877   /* Switch to rodata section.  */
8878   if (TARGET_CPU_ZARCH)
8879     {
8880       insn = emit_insn_after (gen_pool_section_start (), insn);
8881       INSN_ADDRESSES_NEW (insn, -1);
8882     }
8883 
8884   /* Ensure minimum pool alignment.  */
8885   if (TARGET_CPU_ZARCH)
8886     insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn);
8887   else
8888     insn = emit_insn_after (gen_pool_align (GEN_INT (4)), insn);
8889   INSN_ADDRESSES_NEW (insn, -1);
8890 
8891   /* Emit pool base label.  */
8892   if (!remote_label)
8893     {
8894       insn = emit_label_after (pool->label, insn);
8895       INSN_ADDRESSES_NEW (insn, -1);
8896     }
8897 
8898   /* Dump constants in descending alignment requirement order,
8899      ensuring proper alignment for every constant.  */
8900   for (i = 0; i < NR_C_MODES; i++)
8901     for (c = pool->constants[i]; c; c = c->next)
8902       {
8903 	/* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references.  */
8904 	rtx value = copy_rtx (c->value);
8905 	if (GET_CODE (value) == CONST
8906 	    && GET_CODE (XEXP (value, 0)) == UNSPEC
8907 	    && XINT (XEXP (value, 0), 1) == UNSPEC_LTREL_OFFSET
8908 	    && XVECLEN (XEXP (value, 0), 0) == 1)
8909 	  value = s390_pool_offset (pool, XVECEXP (XEXP (value, 0), 0, 0));
8910 
8911 	insn = emit_label_after (c->label, insn);
8912 	INSN_ADDRESSES_NEW (insn, -1);
8913 
8914 	value = gen_rtx_UNSPEC_VOLATILE (constant_modes[i],
8915 					 gen_rtvec (1, value),
8916 					 UNSPECV_POOL_ENTRY);
8917 	insn = emit_insn_after (value, insn);
8918 	INSN_ADDRESSES_NEW (insn, -1);
8919       }
8920 
8921   /* Ensure minimum alignment for instructions.  */
8922   insn = emit_insn_after (gen_pool_align (GEN_INT (2)), insn);
8923   INSN_ADDRESSES_NEW (insn, -1);
8924 
8925   /* Output in-pool execute template insns.  */
8926   for (c = pool->execute; c; c = c->next)
8927     {
8928       insn = emit_label_after (c->label, insn);
8929       INSN_ADDRESSES_NEW (insn, -1);
8930 
8931       insn = emit_insn_after (s390_execute_target (c->value), insn);
8932       INSN_ADDRESSES_NEW (insn, -1);
8933     }
8934 
8935   /* Switch back to previous section.  */
8936   if (TARGET_CPU_ZARCH)
8937     {
8938       insn = emit_insn_after (gen_pool_section_end (), insn);
8939       INSN_ADDRESSES_NEW (insn, -1);
8940     }
8941 
8942   insn = emit_barrier_after (insn);
8943   INSN_ADDRESSES_NEW (insn, -1);
8944 
8945   /* Remove placeholder insn.  */
8946   remove_insn (pool->pool_insn);
8947 }
8948 
8949 /* Free all memory used by POOL.  */
8950 
8951 static void
s390_free_pool(struct constant_pool * pool)8952 s390_free_pool (struct constant_pool *pool)
8953 {
8954   struct constant *c, *next;
8955   int i;
8956 
8957   for (i = 0; i < NR_C_MODES; i++)
8958     for (c = pool->constants[i]; c; c = next)
8959       {
8960 	next = c->next;
8961 	free (c);
8962       }
8963 
8964   for (c = pool->execute; c; c = next)
8965     {
8966       next = c->next;
8967       free (c);
8968     }
8969 
8970   BITMAP_FREE (pool->insns);
8971   free (pool);
8972 }
8973 
8974 
8975 /* Collect main literal pool.  Return NULL on overflow.  */
8976 
8977 static struct constant_pool *
s390_mainpool_start(void)8978 s390_mainpool_start (void)
8979 {
8980   struct constant_pool *pool;
8981   rtx_insn *insn;
8982 
8983   pool = s390_alloc_pool ();
8984 
8985   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8986     {
8987       if (NONJUMP_INSN_P (insn)
8988 	  && GET_CODE (PATTERN (insn)) == SET
8989 	  && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC_VOLATILE
8990 	  && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPECV_MAIN_POOL)
8991 	{
8992 	  /* There might be two main_pool instructions if base_reg
8993 	     is call-clobbered; one for shrink-wrapped code and one
8994 	     for the rest.  We want to keep the first.  */
8995 	  if (pool->pool_insn)
8996 	    {
8997 	      insn = PREV_INSN (insn);
8998 	      delete_insn (NEXT_INSN (insn));
8999 	      continue;
9000 	    }
9001 	  pool->pool_insn = insn;
9002 	}
9003 
9004       if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
9005 	{
9006 	  s390_add_execute (pool, insn);
9007 	}
9008       else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9009 	{
9010 	  rtx pool_ref = NULL_RTX;
9011 	  find_constant_pool_ref (PATTERN (insn), &pool_ref);
9012 	  if (pool_ref)
9013 	    {
9014 	      rtx constant = get_pool_constant (pool_ref);
9015 	      machine_mode mode = get_pool_mode (pool_ref);
9016 	      s390_add_constant (pool, constant, mode);
9017 	    }
9018 	}
9019 
9020       /* If hot/cold partitioning is enabled we have to make sure that
9021 	 the literal pool is emitted in the same section where the
9022 	 initialization of the literal pool base pointer takes place.
9023 	 emit_pool_after is only used in the non-overflow case on non
9024 	 Z cpus where we can emit the literal pool at the end of the
9025 	 function body within the text section.  */
9026       if (NOTE_P (insn)
9027 	  && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS
9028 	  && !pool->emit_pool_after)
9029 	pool->emit_pool_after = PREV_INSN (insn);
9030     }
9031 
9032   gcc_assert (pool->pool_insn || pool->size == 0);
9033 
9034   if (pool->size >= 4096)
9035     {
9036       /* We're going to chunkify the pool, so remove the main
9037 	 pool placeholder insn.  */
9038       remove_insn (pool->pool_insn);
9039 
9040       s390_free_pool (pool);
9041       pool = NULL;
9042     }
9043 
9044   /* If the functions ends with the section where the literal pool
9045      should be emitted set the marker to its end.  */
9046   if (pool && !pool->emit_pool_after)
9047     pool->emit_pool_after = get_last_insn ();
9048 
9049   return pool;
9050 }
9051 
9052 /* POOL holds the main literal pool as collected by s390_mainpool_start.
9053    Modify the current function to output the pool constants as well as
9054    the pool register setup instruction.  */
9055 
9056 static void
s390_mainpool_finish(struct constant_pool * pool)9057 s390_mainpool_finish (struct constant_pool *pool)
9058 {
9059   rtx base_reg = cfun->machine->base_reg;
9060 
9061   /* If the pool is empty, we're done.  */
9062   if (pool->size == 0)
9063     {
9064       /* We don't actually need a base register after all.  */
9065       cfun->machine->base_reg = NULL_RTX;
9066 
9067       if (pool->pool_insn)
9068 	remove_insn (pool->pool_insn);
9069       s390_free_pool (pool);
9070       return;
9071     }
9072 
9073   /* We need correct insn addresses.  */
9074   shorten_branches (get_insns ());
9075 
9076   /* On zSeries, we use a LARL to load the pool register.  The pool is
9077      located in the .rodata section, so we emit it after the function.  */
9078   if (TARGET_CPU_ZARCH)
9079     {
9080       rtx set = gen_main_base_64 (base_reg, pool->label);
9081       rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
9082       INSN_ADDRESSES_NEW (insn, -1);
9083       remove_insn (pool->pool_insn);
9084 
9085       insn = get_last_insn ();
9086       pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
9087       INSN_ADDRESSES_NEW (pool->pool_insn, -1);
9088 
9089       s390_dump_pool (pool, 0);
9090     }
9091 
9092   /* On S/390, if the total size of the function's code plus literal pool
9093      does not exceed 4096 bytes, we use BASR to set up a function base
9094      pointer, and emit the literal pool at the end of the function.  */
9095   else if (INSN_ADDRESSES (INSN_UID (pool->emit_pool_after))
9096 	   + pool->size + 8 /* alignment slop */ < 4096)
9097     {
9098       rtx set = gen_main_base_31_small (base_reg, pool->label);
9099       rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
9100       INSN_ADDRESSES_NEW (insn, -1);
9101       remove_insn (pool->pool_insn);
9102 
9103       insn = emit_label_after (pool->label, insn);
9104       INSN_ADDRESSES_NEW (insn, -1);
9105 
9106       /* emit_pool_after will be set by s390_mainpool_start to the
9107 	 last insn of the section where the literal pool should be
9108 	 emitted.  */
9109       insn = pool->emit_pool_after;
9110 
9111       pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
9112       INSN_ADDRESSES_NEW (pool->pool_insn, -1);
9113 
9114       s390_dump_pool (pool, 1);
9115     }
9116 
9117   /* Otherwise, we emit an inline literal pool and use BASR to branch
9118      over it, setting up the pool register at the same time.  */
9119   else
9120     {
9121       rtx_code_label *pool_end = gen_label_rtx ();
9122 
9123       rtx pat = gen_main_base_31_large (base_reg, pool->label, pool_end);
9124       rtx_insn *insn = emit_jump_insn_after (pat, pool->pool_insn);
9125       JUMP_LABEL (insn) = pool_end;
9126       INSN_ADDRESSES_NEW (insn, -1);
9127       remove_insn (pool->pool_insn);
9128 
9129       insn = emit_label_after (pool->label, insn);
9130       INSN_ADDRESSES_NEW (insn, -1);
9131 
9132       pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
9133       INSN_ADDRESSES_NEW (pool->pool_insn, -1);
9134 
9135       insn = emit_label_after (pool_end, pool->pool_insn);
9136       INSN_ADDRESSES_NEW (insn, -1);
9137 
9138       s390_dump_pool (pool, 1);
9139     }
9140 
9141 
9142   /* Replace all literal pool references.  */
9143 
9144   for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
9145     {
9146       if (INSN_P (insn))
9147 	replace_ltrel_base (&PATTERN (insn));
9148 
9149       if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9150         {
9151           rtx addr, pool_ref = NULL_RTX;
9152           find_constant_pool_ref (PATTERN (insn), &pool_ref);
9153           if (pool_ref)
9154             {
9155 	      if (s390_execute_label (insn))
9156 		addr = s390_find_execute (pool, insn);
9157 	      else
9158 		addr = s390_find_constant (pool, get_pool_constant (pool_ref),
9159 						 get_pool_mode (pool_ref));
9160 
9161               replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
9162               INSN_CODE (insn) = -1;
9163             }
9164         }
9165     }
9166 
9167 
9168   /* Free the pool.  */
9169   s390_free_pool (pool);
9170 }
9171 
9172 /* POOL holds the main literal pool as collected by s390_mainpool_start.
9173    We have decided we cannot use this pool, so revert all changes
9174    to the current function that were done by s390_mainpool_start.  */
9175 static void
s390_mainpool_cancel(struct constant_pool * pool)9176 s390_mainpool_cancel (struct constant_pool *pool)
9177 {
9178   /* We didn't actually change the instruction stream, so simply
9179      free the pool memory.  */
9180   s390_free_pool (pool);
9181 }
9182 
9183 
9184 /* Chunkify the literal pool.  */
9185 
9186 #define S390_POOL_CHUNK_MIN	0xc00
9187 #define S390_POOL_CHUNK_MAX	0xe00
9188 
9189 static struct constant_pool *
s390_chunkify_start(void)9190 s390_chunkify_start (void)
9191 {
9192   struct constant_pool *curr_pool = NULL, *pool_list = NULL;
9193   int extra_size = 0;
9194   bitmap far_labels;
9195   rtx pending_ltrel = NULL_RTX;
9196   rtx_insn *insn;
9197 
9198   rtx (*gen_reload_base) (rtx, rtx) =
9199     TARGET_CPU_ZARCH? gen_reload_base_64 : gen_reload_base_31;
9200 
9201 
9202   /* We need correct insn addresses.  */
9203 
9204   shorten_branches (get_insns ());
9205 
9206   /* Scan all insns and move literals to pool chunks.  */
9207 
9208   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9209     {
9210       bool section_switch_p = false;
9211 
9212       /* Check for pending LTREL_BASE.  */
9213       if (INSN_P (insn))
9214 	{
9215 	  rtx ltrel_base = find_ltrel_base (PATTERN (insn));
9216 	  if (ltrel_base)
9217 	    {
9218 	      gcc_assert (ltrel_base == pending_ltrel);
9219 	      pending_ltrel = NULL_RTX;
9220 	    }
9221 	}
9222 
9223       if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
9224 	{
9225 	  if (!curr_pool)
9226 	    curr_pool = s390_start_pool (&pool_list, insn);
9227 
9228 	  s390_add_execute (curr_pool, insn);
9229 	  s390_add_pool_insn (curr_pool, insn);
9230 	}
9231       else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9232 	{
9233 	  rtx pool_ref = NULL_RTX;
9234 	  find_constant_pool_ref (PATTERN (insn), &pool_ref);
9235 	  if (pool_ref)
9236 	    {
9237 	      rtx constant = get_pool_constant (pool_ref);
9238 	      machine_mode mode = get_pool_mode (pool_ref);
9239 
9240 	      if (!curr_pool)
9241 		curr_pool = s390_start_pool (&pool_list, insn);
9242 
9243 	      s390_add_constant (curr_pool, constant, mode);
9244 	      s390_add_pool_insn (curr_pool, insn);
9245 
9246 	      /* Don't split the pool chunk between a LTREL_OFFSET load
9247 		 and the corresponding LTREL_BASE.  */
9248 	      if (GET_CODE (constant) == CONST
9249 		  && GET_CODE (XEXP (constant, 0)) == UNSPEC
9250 		  && XINT (XEXP (constant, 0), 1) == UNSPEC_LTREL_OFFSET)
9251 		{
9252 		  gcc_assert (!pending_ltrel);
9253 		  pending_ltrel = pool_ref;
9254 		}
9255 	    }
9256 	}
9257 
9258       if (JUMP_P (insn) || JUMP_TABLE_DATA_P (insn) || LABEL_P (insn))
9259 	{
9260 	  if (curr_pool)
9261 	    s390_add_pool_insn (curr_pool, insn);
9262 	  /* An LTREL_BASE must follow within the same basic block.  */
9263 	  gcc_assert (!pending_ltrel);
9264 	}
9265 
9266       if (NOTE_P (insn))
9267 	switch (NOTE_KIND (insn))
9268 	  {
9269 	  case NOTE_INSN_SWITCH_TEXT_SECTIONS:
9270 	    section_switch_p = true;
9271 	    break;
9272 	  case NOTE_INSN_VAR_LOCATION:
9273 	    continue;
9274 	  default:
9275 	    break;
9276 	  }
9277 
9278       if (!curr_pool
9279 	  || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn)
9280           || INSN_ADDRESSES (INSN_UID (insn)) == -1)
9281 	continue;
9282 
9283       if (TARGET_CPU_ZARCH)
9284 	{
9285 	  if (curr_pool->size < S390_POOL_CHUNK_MAX)
9286 	    continue;
9287 
9288 	  s390_end_pool (curr_pool, NULL);
9289 	  curr_pool = NULL;
9290 	}
9291       else
9292 	{
9293           int chunk_size = INSN_ADDRESSES (INSN_UID (insn))
9294 			   - INSN_ADDRESSES (INSN_UID (curr_pool->first_insn))
9295 			 + extra_size;
9296 
9297 	  /* We will later have to insert base register reload insns.
9298 	     Those will have an effect on code size, which we need to
9299 	     consider here.  This calculation makes rather pessimistic
9300 	     worst-case assumptions.  */
9301 	  if (LABEL_P (insn))
9302 	    extra_size += 6;
9303 
9304 	  if (chunk_size < S390_POOL_CHUNK_MIN
9305 	      && curr_pool->size < S390_POOL_CHUNK_MIN
9306 	      && !section_switch_p)
9307 	    continue;
9308 
9309 	  /* Pool chunks can only be inserted after BARRIERs ...  */
9310 	  if (BARRIER_P (insn))
9311 	    {
9312 	      s390_end_pool (curr_pool, insn);
9313 	      curr_pool = NULL;
9314 	      extra_size = 0;
9315 	    }
9316 
9317 	  /* ... so if we don't find one in time, create one.  */
9318           else if (chunk_size > S390_POOL_CHUNK_MAX
9319 	           || curr_pool->size > S390_POOL_CHUNK_MAX
9320 		   || section_switch_p)
9321 	    {
9322 	      rtx_insn *label, *jump, *barrier, *next, *prev;
9323 
9324 	      if (!section_switch_p)
9325 		{
9326 		  /* We can insert the barrier only after a 'real' insn.  */
9327 		  if (! NONJUMP_INSN_P (insn) && ! CALL_P (insn))
9328 		    continue;
9329 		  if (get_attr_length (insn) == 0)
9330 		    continue;
9331 		  /* Don't separate LTREL_BASE from the corresponding
9332 		     LTREL_OFFSET load.  */
9333 		  if (pending_ltrel)
9334 		    continue;
9335 		  next = insn;
9336 		  do
9337 		    {
9338 		      insn = next;
9339 		      next = NEXT_INSN (insn);
9340 		    }
9341 		  while (next
9342 			 && NOTE_P (next)
9343 			 && NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION);
9344 		}
9345 	      else
9346 		{
9347 		  gcc_assert (!pending_ltrel);
9348 
9349 		  /* The old pool has to end before the section switch
9350 		     note in order to make it part of the current
9351 		     section.  */
9352 		  insn = PREV_INSN (insn);
9353 		}
9354 
9355 	      label = gen_label_rtx ();
9356 	      prev = insn;
9357 	      if (prev && NOTE_P (prev))
9358 		prev = prev_nonnote_insn (prev);
9359 	      if (prev)
9360 		jump = emit_jump_insn_after_setloc (gen_jump (label), insn,
9361 						    INSN_LOCATION (prev));
9362 	      else
9363 		jump = emit_jump_insn_after_noloc (gen_jump (label), insn);
9364 	      barrier = emit_barrier_after (jump);
9365 	      insn = emit_label_after (label, barrier);
9366 	      JUMP_LABEL (jump) = label;
9367 	      LABEL_NUSES (label) = 1;
9368 
9369 	      INSN_ADDRESSES_NEW (jump, -1);
9370 	      INSN_ADDRESSES_NEW (barrier, -1);
9371 	      INSN_ADDRESSES_NEW (insn, -1);
9372 
9373 	      s390_end_pool (curr_pool, barrier);
9374 	      curr_pool = NULL;
9375 	      extra_size = 0;
9376 	    }
9377 	}
9378     }
9379 
9380   if (curr_pool)
9381     s390_end_pool (curr_pool, NULL);
9382   gcc_assert (!pending_ltrel);
9383 
9384   /* Find all labels that are branched into
9385      from an insn belonging to a different chunk.  */
9386 
9387   far_labels = BITMAP_ALLOC (NULL);
9388 
9389   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9390     {
9391       rtx_jump_table_data *table;
9392 
9393       /* Labels marked with LABEL_PRESERVE_P can be target
9394 	 of non-local jumps, so we have to mark them.
9395 	 The same holds for named labels.
9396 
9397 	 Don't do that, however, if it is the label before
9398 	 a jump table.  */
9399 
9400       if (LABEL_P (insn)
9401 	  && (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn)))
9402 	{
9403 	  rtx_insn *vec_insn = NEXT_INSN (insn);
9404 	  if (! vec_insn || ! JUMP_TABLE_DATA_P (vec_insn))
9405 	    bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn));
9406 	}
9407       /* Check potential targets in a table jump (casesi_jump).  */
9408       else if (tablejump_p (insn, NULL, &table))
9409 	{
9410 	  rtx vec_pat = PATTERN (table);
9411 	  int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
9412 
9413 	  for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
9414 	    {
9415 	      rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
9416 
9417 	      if (s390_find_pool (pool_list, label)
9418 		  != s390_find_pool (pool_list, insn))
9419 		bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
9420 	    }
9421 	}
9422       /* If we have a direct jump (conditional or unconditional),
9423 	 check all potential targets.  */
9424       else if (JUMP_P (insn))
9425 	{
9426 	  rtx pat = PATTERN (insn);
9427 
9428 	  if (GET_CODE (pat) == PARALLEL)
9429 	    pat = XVECEXP (pat, 0, 0);
9430 
9431 	  if (GET_CODE (pat) == SET)
9432 	    {
9433 	      rtx label = JUMP_LABEL (insn);
9434 	      if (label && !ANY_RETURN_P (label))
9435 		{
9436 		  if (s390_find_pool (pool_list, label)
9437 		      != s390_find_pool (pool_list, insn))
9438 		    bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
9439 		}
9440 	    }
9441 	}
9442     }
9443 
9444   /* Insert base register reload insns before every pool.  */
9445 
9446   for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9447     {
9448       rtx new_insn = gen_reload_base (cfun->machine->base_reg,
9449 				      curr_pool->label);
9450       rtx_insn *insn = curr_pool->first_insn;
9451       INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1);
9452     }
9453 
9454   /* Insert base register reload insns at every far label.  */
9455 
9456   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9457     if (LABEL_P (insn)
9458         && bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn)))
9459       {
9460 	struct constant_pool *pool = s390_find_pool (pool_list, insn);
9461 	if (pool)
9462 	  {
9463 	    rtx new_insn = gen_reload_base (cfun->machine->base_reg,
9464 					    pool->label);
9465 	    INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1);
9466 	  }
9467       }
9468 
9469 
9470   BITMAP_FREE (far_labels);
9471 
9472 
9473   /* Recompute insn addresses.  */
9474 
9475   init_insn_lengths ();
9476   shorten_branches (get_insns ());
9477 
9478   return pool_list;
9479 }
9480 
9481 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
9482    After we have decided to use this list, finish implementing
9483    all changes to the current function as required.  */
9484 
9485 static void
s390_chunkify_finish(struct constant_pool * pool_list)9486 s390_chunkify_finish (struct constant_pool *pool_list)
9487 {
9488   struct constant_pool *curr_pool = NULL;
9489   rtx_insn *insn;
9490 
9491 
9492   /* Replace all literal pool references.  */
9493 
9494   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9495     {
9496       if (INSN_P (insn))
9497 	replace_ltrel_base (&PATTERN (insn));
9498 
9499       curr_pool = s390_find_pool (pool_list, insn);
9500       if (!curr_pool)
9501 	continue;
9502 
9503       if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9504         {
9505           rtx addr, pool_ref = NULL_RTX;
9506           find_constant_pool_ref (PATTERN (insn), &pool_ref);
9507           if (pool_ref)
9508             {
9509 	      if (s390_execute_label (insn))
9510 		addr = s390_find_execute (curr_pool, insn);
9511 	      else
9512 		addr = s390_find_constant (curr_pool,
9513 					   get_pool_constant (pool_ref),
9514 					   get_pool_mode (pool_ref));
9515 
9516               replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
9517               INSN_CODE (insn) = -1;
9518             }
9519         }
9520     }
9521 
9522   /* Dump out all literal pools.  */
9523 
9524   for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9525     s390_dump_pool (curr_pool, 0);
9526 
9527   /* Free pool list.  */
9528 
9529   while (pool_list)
9530     {
9531       struct constant_pool *next = pool_list->next;
9532       s390_free_pool (pool_list);
9533       pool_list = next;
9534     }
9535 }
9536 
9537 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
9538    We have decided we cannot use this list, so revert all changes
9539    to the current function that were done by s390_chunkify_start.  */
9540 
9541 static void
s390_chunkify_cancel(struct constant_pool * pool_list)9542 s390_chunkify_cancel (struct constant_pool *pool_list)
9543 {
9544   struct constant_pool *curr_pool = NULL;
9545   rtx_insn *insn;
9546 
9547   /* Remove all pool placeholder insns.  */
9548 
9549   for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9550     {
9551       /* Did we insert an extra barrier?  Remove it.  */
9552       rtx_insn *barrier = PREV_INSN (curr_pool->pool_insn);
9553       rtx_insn *jump = barrier? PREV_INSN (barrier) : NULL;
9554       rtx_insn *label = NEXT_INSN (curr_pool->pool_insn);
9555 
9556       if (jump && JUMP_P (jump)
9557 	  && barrier && BARRIER_P (barrier)
9558 	  && label && LABEL_P (label)
9559 	  && GET_CODE (PATTERN (jump)) == SET
9560 	  && SET_DEST (PATTERN (jump)) == pc_rtx
9561 	  && GET_CODE (SET_SRC (PATTERN (jump))) == LABEL_REF
9562 	  && XEXP (SET_SRC (PATTERN (jump)), 0) == label)
9563 	{
9564 	  remove_insn (jump);
9565 	  remove_insn (barrier);
9566 	  remove_insn (label);
9567 	}
9568 
9569       remove_insn (curr_pool->pool_insn);
9570     }
9571 
9572   /* Remove all base register reload insns.  */
9573 
9574   for (insn = get_insns (); insn; )
9575     {
9576       rtx_insn *next_insn = NEXT_INSN (insn);
9577 
9578       if (NONJUMP_INSN_P (insn)
9579 	  && GET_CODE (PATTERN (insn)) == SET
9580 	  && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
9581 	  && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_RELOAD_BASE)
9582 	remove_insn (insn);
9583 
9584       insn = next_insn;
9585     }
9586 
9587   /* Free pool list.  */
9588 
9589   while (pool_list)
9590     {
9591       struct constant_pool *next = pool_list->next;
9592       s390_free_pool (pool_list);
9593       pool_list = next;
9594     }
9595 }
9596 
9597 /* Output the constant pool entry EXP in mode MODE with alignment ALIGN.  */
9598 
9599 void
s390_output_pool_entry(rtx exp,machine_mode mode,unsigned int align)9600 s390_output_pool_entry (rtx exp, machine_mode mode, unsigned int align)
9601 {
9602   switch (GET_MODE_CLASS (mode))
9603     {
9604     case MODE_FLOAT:
9605     case MODE_DECIMAL_FLOAT:
9606       gcc_assert (GET_CODE (exp) == CONST_DOUBLE);
9607 
9608       assemble_real (*CONST_DOUBLE_REAL_VALUE (exp),
9609 		     as_a <scalar_float_mode> (mode), align);
9610       break;
9611 
9612     case MODE_INT:
9613       assemble_integer (exp, GET_MODE_SIZE (mode), align, 1);
9614       mark_symbol_refs_as_used (exp);
9615       break;
9616 
9617     case MODE_VECTOR_INT:
9618     case MODE_VECTOR_FLOAT:
9619       {
9620 	int i;
9621 	machine_mode inner_mode;
9622 	gcc_assert (GET_CODE (exp) == CONST_VECTOR);
9623 
9624 	inner_mode = GET_MODE_INNER (GET_MODE (exp));
9625 	for (i = 0; i < XVECLEN (exp, 0); i++)
9626 	  s390_output_pool_entry (XVECEXP (exp, 0, i),
9627 				  inner_mode,
9628 				  i == 0
9629 				  ? align
9630 				  : GET_MODE_BITSIZE (inner_mode));
9631       }
9632       break;
9633 
9634     default:
9635       gcc_unreachable ();
9636     }
9637 }
9638 
9639 
9640 /* Return an RTL expression representing the value of the return address
9641    for the frame COUNT steps up from the current frame.  FRAME is the
9642    frame pointer of that frame.  */
9643 
9644 rtx
s390_return_addr_rtx(int count,rtx frame ATTRIBUTE_UNUSED)9645 s390_return_addr_rtx (int count, rtx frame ATTRIBUTE_UNUSED)
9646 {
9647   int offset;
9648   rtx addr;
9649 
9650   /* Without backchain, we fail for all but the current frame.  */
9651 
9652   if (!TARGET_BACKCHAIN && count > 0)
9653     return NULL_RTX;
9654 
9655   /* For the current frame, we need to make sure the initial
9656      value of RETURN_REGNUM is actually saved.  */
9657 
9658   if (count == 0)
9659     {
9660       /* On non-z architectures branch splitting could overwrite r14.  */
9661       if (TARGET_CPU_ZARCH)
9662 	return get_hard_reg_initial_val (Pmode, RETURN_REGNUM);
9663       else
9664 	{
9665 	  cfun_frame_layout.save_return_addr_p = true;
9666 	  return gen_rtx_MEM (Pmode, return_address_pointer_rtx);
9667 	}
9668     }
9669 
9670   if (TARGET_PACKED_STACK)
9671     offset = -2 * UNITS_PER_LONG;
9672   else
9673     offset = RETURN_REGNUM * UNITS_PER_LONG;
9674 
9675   addr = plus_constant (Pmode, frame, offset);
9676   addr = memory_address (Pmode, addr);
9677   return gen_rtx_MEM (Pmode, addr);
9678 }
9679 
9680 /* Return an RTL expression representing the back chain stored in
9681    the current stack frame.  */
9682 
9683 rtx
s390_back_chain_rtx(void)9684 s390_back_chain_rtx (void)
9685 {
9686   rtx chain;
9687 
9688   gcc_assert (TARGET_BACKCHAIN);
9689 
9690   if (TARGET_PACKED_STACK)
9691     chain = plus_constant (Pmode, stack_pointer_rtx,
9692 			   STACK_POINTER_OFFSET - UNITS_PER_LONG);
9693   else
9694     chain = stack_pointer_rtx;
9695 
9696   chain = gen_rtx_MEM (Pmode, chain);
9697   return chain;
9698 }
9699 
9700 /* Find first call clobbered register unused in a function.
9701    This could be used as base register in a leaf function
9702    or for holding the return address before epilogue.  */
9703 
9704 static int
find_unused_clobbered_reg(void)9705 find_unused_clobbered_reg (void)
9706 {
9707   int i;
9708   for (i = 0; i < 6; i++)
9709     if (!df_regs_ever_live_p (i))
9710       return i;
9711   return 0;
9712 }
9713 
9714 
9715 /* Helper function for s390_regs_ever_clobbered.  Sets the fields in DATA for all
9716    clobbered hard regs in SETREG.  */
9717 
9718 static void
s390_reg_clobbered_rtx(rtx setreg,const_rtx set_insn ATTRIBUTE_UNUSED,void * data)9719 s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data)
9720 {
9721   char *regs_ever_clobbered = (char *)data;
9722   unsigned int i, regno;
9723   machine_mode mode = GET_MODE (setreg);
9724 
9725   if (GET_CODE (setreg) == SUBREG)
9726     {
9727       rtx inner = SUBREG_REG (setreg);
9728       if (!GENERAL_REG_P (inner) && !FP_REG_P (inner))
9729 	return;
9730       regno = subreg_regno (setreg);
9731     }
9732   else if (GENERAL_REG_P (setreg) || FP_REG_P (setreg))
9733     regno = REGNO (setreg);
9734   else
9735     return;
9736 
9737   for (i = regno;
9738        i < end_hard_regno (mode, regno);
9739        i++)
9740     regs_ever_clobbered[i] = 1;
9741 }
9742 
9743 /* Walks through all basic blocks of the current function looking
9744    for clobbered hard regs using s390_reg_clobbered_rtx.  The fields
9745    of the passed integer array REGS_EVER_CLOBBERED are set to one for
9746    each of those regs.  */
9747 
9748 static void
s390_regs_ever_clobbered(char regs_ever_clobbered[])9749 s390_regs_ever_clobbered (char regs_ever_clobbered[])
9750 {
9751   basic_block cur_bb;
9752   rtx_insn *cur_insn;
9753   unsigned int i;
9754 
9755   memset (regs_ever_clobbered, 0, 32);
9756 
9757   /* For non-leaf functions we have to consider all call clobbered regs to be
9758      clobbered.  */
9759   if (!crtl->is_leaf)
9760     {
9761       for (i = 0; i < 32; i++)
9762 	regs_ever_clobbered[i] = call_really_used_regs[i];
9763     }
9764 
9765   /* Make the "magic" eh_return registers live if necessary.  For regs_ever_live
9766      this work is done by liveness analysis (mark_regs_live_at_end).
9767      Special care is needed for functions containing landing pads.  Landing pads
9768      may use the eh registers, but the code which sets these registers is not
9769      contained in that function.  Hence s390_regs_ever_clobbered is not able to
9770      deal with this automatically.  */
9771   if (crtl->calls_eh_return || cfun->machine->has_landing_pad_p)
9772     for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM ; i++)
9773       if (crtl->calls_eh_return
9774 	  || (cfun->machine->has_landing_pad_p
9775 	      && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i))))
9776 	regs_ever_clobbered[EH_RETURN_DATA_REGNO (i)] = 1;
9777 
9778   /* For nonlocal gotos all call-saved registers have to be saved.
9779      This flag is also set for the unwinding code in libgcc.
9780      See expand_builtin_unwind_init.  For regs_ever_live this is done by
9781      reload.  */
9782   if (crtl->saves_all_registers)
9783     for (i = 0; i < 32; i++)
9784       if (!call_really_used_regs[i])
9785 	regs_ever_clobbered[i] = 1;
9786 
9787   FOR_EACH_BB_FN (cur_bb, cfun)
9788     {
9789       FOR_BB_INSNS (cur_bb, cur_insn)
9790 	{
9791 	  rtx pat;
9792 
9793 	  if (!INSN_P (cur_insn))
9794 	    continue;
9795 
9796 	  pat = PATTERN (cur_insn);
9797 
9798 	  /* Ignore GPR restore insns.  */
9799 	  if (epilogue_completed && RTX_FRAME_RELATED_P (cur_insn))
9800 	    {
9801 	      if (GET_CODE (pat) == SET
9802 		  && GENERAL_REG_P (SET_DEST (pat)))
9803 		{
9804 		  /* lgdr  */
9805 		  if (GET_MODE (SET_SRC (pat)) == DImode
9806 		      && FP_REG_P (SET_SRC (pat)))
9807 		    continue;
9808 
9809 		  /* l / lg  */
9810 		  if (GET_CODE (SET_SRC (pat)) == MEM)
9811 		    continue;
9812 		}
9813 
9814 	      /* lm / lmg */
9815 	      if (GET_CODE (pat) == PARALLEL
9816 		  && load_multiple_operation (pat, VOIDmode))
9817 		continue;
9818 	    }
9819 
9820 	  note_stores (pat,
9821 		       s390_reg_clobbered_rtx,
9822 		       regs_ever_clobbered);
9823 	}
9824     }
9825 }
9826 
9827 /* Determine the frame area which actually has to be accessed
9828    in the function epilogue. The values are stored at the
9829    given pointers AREA_BOTTOM (address of the lowest used stack
9830    address) and AREA_TOP (address of the first item which does
9831    not belong to the stack frame).  */
9832 
9833 static void
s390_frame_area(int * area_bottom,int * area_top)9834 s390_frame_area (int *area_bottom, int *area_top)
9835 {
9836   int b, t;
9837 
9838   b = INT_MAX;
9839   t = INT_MIN;
9840 
9841   if (cfun_frame_layout.first_restore_gpr != -1)
9842     {
9843       b = (cfun_frame_layout.gprs_offset
9844 	   + cfun_frame_layout.first_restore_gpr * UNITS_PER_LONG);
9845       t = b + (cfun_frame_layout.last_restore_gpr
9846 	       - cfun_frame_layout.first_restore_gpr + 1) * UNITS_PER_LONG;
9847     }
9848 
9849   if (TARGET_64BIT && cfun_save_high_fprs_p)
9850     {
9851       b = MIN (b, cfun_frame_layout.f8_offset);
9852       t = MAX (t, (cfun_frame_layout.f8_offset
9853 		   + cfun_frame_layout.high_fprs * 8));
9854     }
9855 
9856   if (!TARGET_64BIT)
9857     {
9858       if (cfun_fpr_save_p (FPR4_REGNUM))
9859 	{
9860 	  b = MIN (b, cfun_frame_layout.f4_offset);
9861 	  t = MAX (t, cfun_frame_layout.f4_offset + 8);
9862 	}
9863       if (cfun_fpr_save_p (FPR6_REGNUM))
9864 	{
9865 	  b = MIN (b, cfun_frame_layout.f4_offset + 8);
9866 	  t = MAX (t, cfun_frame_layout.f4_offset + 16);
9867 	}
9868     }
9869   *area_bottom = b;
9870   *area_top = t;
9871 }
9872 /* Update gpr_save_slots in the frame layout trying to make use of
9873    FPRs as GPR save slots.
9874    This is a helper routine of s390_register_info.  */
9875 
9876 static void
s390_register_info_gprtofpr()9877 s390_register_info_gprtofpr ()
9878 {
9879   int save_reg_slot = FPR0_REGNUM;
9880   int i, j;
9881 
9882   if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
9883     return;
9884 
9885   /* builtin_eh_return needs to be able to modify the return address
9886      on the stack.  It could also adjust the FPR save slot instead but
9887      is it worth the trouble?!  */
9888   if (crtl->calls_eh_return)
9889     return;
9890 
9891   for (i = 15; i >= 6; i--)
9892     {
9893       if (cfun_gpr_save_slot (i) == SAVE_SLOT_NONE)
9894 	continue;
9895 
9896       /* Advance to the next FP register which can be used as a
9897 	 GPR save slot.  */
9898       while ((!call_really_used_regs[save_reg_slot]
9899 	      || df_regs_ever_live_p (save_reg_slot)
9900 	      || cfun_fpr_save_p (save_reg_slot))
9901 	     && FP_REGNO_P (save_reg_slot))
9902 	save_reg_slot++;
9903       if (!FP_REGNO_P (save_reg_slot))
9904 	{
9905 	  /* We only want to use ldgr/lgdr if we can get rid of
9906 	     stm/lm entirely.  So undo the gpr slot allocation in
9907 	     case we ran out of FPR save slots.  */
9908 	  for (j = 6; j <= 15; j++)
9909 	    if (FP_REGNO_P (cfun_gpr_save_slot (j)))
9910 	      cfun_gpr_save_slot (j) = SAVE_SLOT_STACK;
9911 	  break;
9912 	}
9913       cfun_gpr_save_slot (i) = save_reg_slot++;
9914     }
9915 }
9916 
9917 /* Set the bits in fpr_bitmap for FPRs which need to be saved due to
9918    stdarg.
9919    This is a helper routine for s390_register_info.  */
9920 
9921 static void
s390_register_info_stdarg_fpr()9922 s390_register_info_stdarg_fpr ()
9923 {
9924   int i;
9925   int min_fpr;
9926   int max_fpr;
9927 
9928   /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and
9929      f0-f4 for 64 bit.  */
9930   if (!cfun->stdarg
9931       || !TARGET_HARD_FLOAT
9932       || !cfun->va_list_fpr_size
9933       || crtl->args.info.fprs >= FP_ARG_NUM_REG)
9934     return;
9935 
9936   min_fpr = crtl->args.info.fprs;
9937   max_fpr = min_fpr + cfun->va_list_fpr_size - 1;
9938   if (max_fpr >= FP_ARG_NUM_REG)
9939     max_fpr = FP_ARG_NUM_REG - 1;
9940 
9941   /* FPR argument regs start at f0.  */
9942   min_fpr += FPR0_REGNUM;
9943   max_fpr += FPR0_REGNUM;
9944 
9945   for (i = min_fpr; i <= max_fpr; i++)
9946     cfun_set_fpr_save (i);
9947 }
9948 
9949 /* Reserve the GPR save slots for GPRs which need to be saved due to
9950    stdarg.
9951    This is a helper routine for s390_register_info.  */
9952 
9953 static void
s390_register_info_stdarg_gpr()9954 s390_register_info_stdarg_gpr ()
9955 {
9956   int i;
9957   int min_gpr;
9958   int max_gpr;
9959 
9960   if (!cfun->stdarg
9961       || !cfun->va_list_gpr_size
9962       || crtl->args.info.gprs >= GP_ARG_NUM_REG)
9963     return;
9964 
9965   min_gpr = crtl->args.info.gprs;
9966   max_gpr = min_gpr + cfun->va_list_gpr_size - 1;
9967   if (max_gpr >= GP_ARG_NUM_REG)
9968     max_gpr = GP_ARG_NUM_REG - 1;
9969 
9970   /* GPR argument regs start at r2.  */
9971   min_gpr += GPR2_REGNUM;
9972   max_gpr += GPR2_REGNUM;
9973 
9974   /* If r6 was supposed to be saved into an FPR and now needs to go to
9975      the stack for vararg we have to adjust the restore range to make
9976      sure that the restore is done from stack as well.  */
9977   if (FP_REGNO_P (cfun_gpr_save_slot (GPR6_REGNUM))
9978       && min_gpr <= GPR6_REGNUM
9979       && max_gpr >= GPR6_REGNUM)
9980     {
9981       if (cfun_frame_layout.first_restore_gpr == -1
9982 	  || cfun_frame_layout.first_restore_gpr > GPR6_REGNUM)
9983 	cfun_frame_layout.first_restore_gpr = GPR6_REGNUM;
9984       if (cfun_frame_layout.last_restore_gpr == -1
9985 	  || cfun_frame_layout.last_restore_gpr < GPR6_REGNUM)
9986 	cfun_frame_layout.last_restore_gpr = GPR6_REGNUM;
9987     }
9988 
9989   if (cfun_frame_layout.first_save_gpr == -1
9990       || cfun_frame_layout.first_save_gpr > min_gpr)
9991     cfun_frame_layout.first_save_gpr = min_gpr;
9992 
9993   if (cfun_frame_layout.last_save_gpr == -1
9994       || cfun_frame_layout.last_save_gpr < max_gpr)
9995     cfun_frame_layout.last_save_gpr = max_gpr;
9996 
9997   for (i = min_gpr; i <= max_gpr; i++)
9998     cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
9999 }
10000 
10001 /* Calculate the save and restore ranges for stm(g) and lm(g) in the
10002    prologue and epilogue.  */
10003 
10004 static void
s390_register_info_set_ranges()10005 s390_register_info_set_ranges ()
10006 {
10007   int i, j;
10008 
10009   /* Find the first and the last save slot supposed to use the stack
10010      to set the restore range.
10011      Vararg regs might be marked as save to stack but only the
10012      call-saved regs really need restoring (i.e. r6).  This code
10013      assumes that the vararg regs have not yet been recorded in
10014      cfun_gpr_save_slot.  */
10015   for (i = 0; i < 16 && cfun_gpr_save_slot (i) != SAVE_SLOT_STACK; i++);
10016   for (j = 15; j > i && cfun_gpr_save_slot (j) != SAVE_SLOT_STACK; j--);
10017   cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
10018   cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
10019   cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
10020   cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
10021 }
10022 
10023 /* The GPR and FPR save slots in cfun->machine->frame_layout are set
10024    for registers which need to be saved in function prologue.
10025    This function can be used until the insns emitted for save/restore
10026    of the regs are visible in the RTL stream.  */
10027 
10028 static void
s390_register_info()10029 s390_register_info ()
10030 {
10031   int i;
10032   char clobbered_regs[32];
10033 
10034   gcc_assert (!epilogue_completed);
10035 
10036   if (reload_completed)
10037     /* After reload we rely on our own routine to determine which
10038        registers need saving.  */
10039     s390_regs_ever_clobbered (clobbered_regs);
10040   else
10041     /* During reload we use regs_ever_live as a base since reload
10042        does changes in there which we otherwise would not be aware
10043        of.  */
10044     for (i = 0; i < 32; i++)
10045       clobbered_regs[i] = df_regs_ever_live_p (i);
10046 
10047   for (i = 0; i < 32; i++)
10048     clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
10049 
10050   /* Mark the call-saved FPRs which need to be saved.
10051      This needs to be done before checking the special GPRs since the
10052      stack pointer usage depends on whether high FPRs have to be saved
10053      or not.  */
10054   cfun_frame_layout.fpr_bitmap = 0;
10055   cfun_frame_layout.high_fprs = 0;
10056   for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
10057     if (clobbered_regs[i] && !call_really_used_regs[i])
10058       {
10059 	cfun_set_fpr_save (i);
10060 	if (i >= FPR8_REGNUM)
10061 	  cfun_frame_layout.high_fprs++;
10062       }
10063 
10064   /* Register 12 is used for GOT address, but also as temp in prologue
10065      for split-stack stdarg functions (unless r14 is available).  */
10066   clobbered_regs[12]
10067     |= ((flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
10068 	|| (flag_split_stack && cfun->stdarg
10069 	    && (crtl->is_leaf || TARGET_TPF_PROFILING
10070 		|| has_hard_reg_initial_val (Pmode, RETURN_REGNUM))));
10071 
10072   clobbered_regs[BASE_REGNUM]
10073     |= (cfun->machine->base_reg
10074 	&& REGNO (cfun->machine->base_reg) == BASE_REGNUM);
10075 
10076   clobbered_regs[HARD_FRAME_POINTER_REGNUM]
10077     |= !!frame_pointer_needed;
10078 
10079   /* On pre z900 machines this might take until machine dependent
10080      reorg to decide.
10081      save_return_addr_p will only be set on non-zarch machines so
10082      there is no risk that r14 goes into an FPR instead of a stack
10083      slot.  */
10084   clobbered_regs[RETURN_REGNUM]
10085     |= (!crtl->is_leaf
10086 	|| TARGET_TPF_PROFILING
10087 	|| cfun->machine->split_branches_pending_p
10088 	|| cfun_frame_layout.save_return_addr_p
10089 	|| crtl->calls_eh_return);
10090 
10091   clobbered_regs[STACK_POINTER_REGNUM]
10092     |= (!crtl->is_leaf
10093 	|| TARGET_TPF_PROFILING
10094 	|| cfun_save_high_fprs_p
10095 	|| get_frame_size () > 0
10096 	|| (reload_completed && cfun_frame_layout.frame_size > 0)
10097 	|| cfun->calls_alloca);
10098 
10099   memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 16);
10100 
10101   for (i = 6; i < 16; i++)
10102     if (clobbered_regs[i])
10103       cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
10104 
10105   s390_register_info_stdarg_fpr ();
10106   s390_register_info_gprtofpr ();
10107   s390_register_info_set_ranges ();
10108   /* stdarg functions might need to save GPRs 2 to 6.  This might
10109      override the GPR->FPR save decision made by
10110      s390_register_info_gprtofpr for r6 since vararg regs must go to
10111      the stack.  */
10112   s390_register_info_stdarg_gpr ();
10113 }
10114 
10115 /* Return true if REGNO is a global register, but not one
10116    of the special ones that need to be saved/restored in anyway.  */
10117 
10118 static inline bool
global_not_special_regno_p(int regno)10119 global_not_special_regno_p (int regno)
10120 {
10121   return (global_regs[regno]
10122 	  /* These registers are special and need to be
10123 	     restored in any case.  */
10124 	  && !(regno == STACK_POINTER_REGNUM
10125 	       || regno == RETURN_REGNUM
10126 	       || regno == BASE_REGNUM
10127 	       || (flag_pic && regno == (int)PIC_OFFSET_TABLE_REGNUM)));
10128 }
10129 
10130 /* This function is called by s390_optimize_prologue in order to get
10131    rid of unnecessary GPR save/restore instructions.  The register info
10132    for the GPRs is re-computed and the ranges are re-calculated.  */
10133 
10134 static void
s390_optimize_register_info()10135 s390_optimize_register_info ()
10136 {
10137   char clobbered_regs[32];
10138   int i;
10139 
10140   gcc_assert (epilogue_completed);
10141   gcc_assert (!cfun->machine->split_branches_pending_p);
10142 
10143   s390_regs_ever_clobbered (clobbered_regs);
10144 
10145   /* Global registers do not need to be saved and restored unless it
10146      is one of our special regs.  (r12, r13, r14, or r15).  */
10147   for (i = 0; i < 32; i++)
10148     clobbered_regs[i] = clobbered_regs[i] && !global_not_special_regno_p (i);
10149 
10150   /* There is still special treatment needed for cases invisible to
10151      s390_regs_ever_clobbered.  */
10152   clobbered_regs[RETURN_REGNUM]
10153     |= (TARGET_TPF_PROFILING
10154 	/* When expanding builtin_return_addr in ESA mode we do not
10155 	   know whether r14 will later be needed as scratch reg when
10156 	   doing branch splitting.  So the builtin always accesses the
10157 	   r14 save slot and we need to stick to the save/restore
10158 	   decision for r14 even if it turns out that it didn't get
10159 	   clobbered.  */
10160 	|| cfun_frame_layout.save_return_addr_p
10161 	|| crtl->calls_eh_return);
10162 
10163   memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 6);
10164 
10165   for (i = 6; i < 16; i++)
10166     if (!clobbered_regs[i])
10167       cfun_gpr_save_slot (i) = SAVE_SLOT_NONE;
10168 
10169   s390_register_info_set_ranges ();
10170   s390_register_info_stdarg_gpr ();
10171 }
10172 
10173 /* Fill cfun->machine with info about frame of current function.  */
10174 
10175 static void
s390_frame_info(void)10176 s390_frame_info (void)
10177 {
10178   HOST_WIDE_INT lowest_offset;
10179 
10180   cfun_frame_layout.first_save_gpr_slot = cfun_frame_layout.first_save_gpr;
10181   cfun_frame_layout.last_save_gpr_slot = cfun_frame_layout.last_save_gpr;
10182 
10183   /* The va_arg builtin uses a constant distance of 16 *
10184      UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area
10185      pointer.  So even if we are going to save the stack pointer in an
10186      FPR we need the stack space in order to keep the offsets
10187      correct.  */
10188   if (cfun->stdarg && cfun_save_arg_fprs_p)
10189     {
10190       cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
10191 
10192       if (cfun_frame_layout.first_save_gpr_slot == -1)
10193 	cfun_frame_layout.first_save_gpr_slot = STACK_POINTER_REGNUM;
10194     }
10195 
10196   cfun_frame_layout.frame_size = get_frame_size ();
10197   if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000)
10198     fatal_error (input_location,
10199 		 "total size of local variables exceeds architecture limit");
10200 
10201   if (!TARGET_PACKED_STACK)
10202     {
10203       /* Fixed stack layout.  */
10204       cfun_frame_layout.backchain_offset = 0;
10205       cfun_frame_layout.f0_offset = 16 * UNITS_PER_LONG;
10206       cfun_frame_layout.f4_offset = cfun_frame_layout.f0_offset + 2 * 8;
10207       cfun_frame_layout.f8_offset = -cfun_frame_layout.high_fprs * 8;
10208       cfun_frame_layout.gprs_offset = (cfun_frame_layout.first_save_gpr_slot
10209 				       * UNITS_PER_LONG);
10210     }
10211   else if (TARGET_BACKCHAIN)
10212     {
10213       /* Kernel stack layout - packed stack, backchain, no float  */
10214       gcc_assert (TARGET_SOFT_FLOAT);
10215       cfun_frame_layout.backchain_offset = (STACK_POINTER_OFFSET
10216 					    - UNITS_PER_LONG);
10217 
10218       /* The distance between the backchain and the return address
10219 	 save slot must not change.  So we always need a slot for the
10220 	 stack pointer which resides in between.  */
10221       cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
10222 
10223       cfun_frame_layout.gprs_offset
10224 	= cfun_frame_layout.backchain_offset - cfun_gprs_save_area_size;
10225 
10226       /* FPRs will not be saved.  Nevertheless pick sane values to
10227 	 keep area calculations valid.  */
10228       cfun_frame_layout.f0_offset =
10229 	cfun_frame_layout.f4_offset =
10230 	cfun_frame_layout.f8_offset = cfun_frame_layout.gprs_offset;
10231     }
10232   else
10233     {
10234       int num_fprs;
10235 
10236       /* Packed stack layout without backchain.  */
10237 
10238       /* With stdarg FPRs need their dedicated slots.  */
10239       num_fprs = (TARGET_64BIT && cfun->stdarg ? 2
10240 		  : (cfun_fpr_save_p (FPR4_REGNUM) +
10241 		     cfun_fpr_save_p (FPR6_REGNUM)));
10242       cfun_frame_layout.f4_offset = STACK_POINTER_OFFSET - 8 * num_fprs;
10243 
10244       num_fprs = (cfun->stdarg ? 2
10245 		  : (cfun_fpr_save_p (FPR0_REGNUM)
10246 		     + cfun_fpr_save_p (FPR2_REGNUM)));
10247       cfun_frame_layout.f0_offset = cfun_frame_layout.f4_offset - 8 * num_fprs;
10248 
10249       cfun_frame_layout.gprs_offset
10250 	= cfun_frame_layout.f0_offset - cfun_gprs_save_area_size;
10251 
10252       cfun_frame_layout.f8_offset = (cfun_frame_layout.gprs_offset
10253 				     - cfun_frame_layout.high_fprs * 8);
10254     }
10255 
10256   if (cfun_save_high_fprs_p)
10257     cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8;
10258 
10259   if (!crtl->is_leaf)
10260     cfun_frame_layout.frame_size += crtl->outgoing_args_size;
10261 
10262   /* In the following cases we have to allocate a STACK_POINTER_OFFSET
10263      sized area at the bottom of the stack.  This is required also for
10264      leaf functions.  When GCC generates a local stack reference it
10265      will always add STACK_POINTER_OFFSET to all these references.  */
10266   if (crtl->is_leaf
10267       && !TARGET_TPF_PROFILING
10268       && cfun_frame_layout.frame_size == 0
10269       && !cfun->calls_alloca)
10270     return;
10271 
10272   /* Calculate the number of bytes we have used in our own register
10273      save area.  With the packed stack layout we can re-use the
10274      remaining bytes for normal stack elements.  */
10275 
10276   if (TARGET_PACKED_STACK)
10277     lowest_offset = MIN (MIN (cfun_frame_layout.f0_offset,
10278 			      cfun_frame_layout.f4_offset),
10279 			 cfun_frame_layout.gprs_offset);
10280   else
10281     lowest_offset = 0;
10282 
10283   if (TARGET_BACKCHAIN)
10284     lowest_offset = MIN (lowest_offset, cfun_frame_layout.backchain_offset);
10285 
10286   cfun_frame_layout.frame_size += STACK_POINTER_OFFSET - lowest_offset;
10287 
10288   /* If under 31 bit an odd number of gprs has to be saved we have to
10289      adjust the frame size to sustain 8 byte alignment of stack
10290      frames.  */
10291   cfun_frame_layout.frame_size = ((cfun_frame_layout.frame_size +
10292 				   STACK_BOUNDARY / BITS_PER_UNIT - 1)
10293 				  & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1));
10294 }
10295 
10296 /* Generate frame layout.  Fills in register and frame data for the current
10297    function in cfun->machine.  This routine can be called multiple times;
10298    it will re-do the complete frame layout every time.  */
10299 
10300 static void
s390_init_frame_layout(void)10301 s390_init_frame_layout (void)
10302 {
10303   HOST_WIDE_INT frame_size;
10304   int base_used;
10305 
10306   /* After LRA the frame layout is supposed to be read-only and should
10307      not be re-computed.  */
10308   if (reload_completed)
10309     return;
10310 
10311   /* On S/390 machines, we may need to perform branch splitting, which
10312      will require both base and return address register.  We have no
10313      choice but to assume we're going to need them until right at the
10314      end of the machine dependent reorg phase.  */
10315   if (!TARGET_CPU_ZARCH)
10316     cfun->machine->split_branches_pending_p = true;
10317 
10318   do
10319     {
10320       frame_size = cfun_frame_layout.frame_size;
10321 
10322       /* Try to predict whether we'll need the base register.  */
10323       base_used = cfun->machine->split_branches_pending_p
10324 		  || crtl->uses_const_pool
10325 		  || (!DISP_IN_RANGE (frame_size)
10326 		      && !CONST_OK_FOR_K (frame_size));
10327 
10328       /* Decide which register to use as literal pool base.  In small
10329 	 leaf functions, try to use an unused call-clobbered register
10330 	 as base register to avoid save/restore overhead.  */
10331       if (!base_used)
10332 	cfun->machine->base_reg = NULL_RTX;
10333       else
10334 	{
10335 	  int br = 0;
10336 
10337 	  if (crtl->is_leaf)
10338 	    /* Prefer r5 (most likely to be free).  */
10339 	    for (br = 5; br >= 2 && df_regs_ever_live_p (br); br--)
10340 	      ;
10341 	  cfun->machine->base_reg =
10342 	    gen_rtx_REG (Pmode, (br >= 2) ? br : BASE_REGNUM);
10343 	}
10344 
10345       s390_register_info ();
10346       s390_frame_info ();
10347     }
10348   while (frame_size != cfun_frame_layout.frame_size);
10349 }
10350 
10351 /* Remove the FPR clobbers from a tbegin insn if it can be proven that
10352    the TX is nonescaping.  A transaction is considered escaping if
10353    there is at least one path from tbegin returning CC0 to the
10354    function exit block without an tend.
10355 
10356    The check so far has some limitations:
10357    - only single tbegin/tend BBs are supported
10358    - the first cond jump after tbegin must separate the CC0 path from ~CC0
10359    - when CC is copied to a GPR and the CC0 check is done with the GPR
10360      this is not supported
10361 */
10362 
10363 static void
s390_optimize_nonescaping_tx(void)10364 s390_optimize_nonescaping_tx (void)
10365 {
10366   const unsigned int CC0 = 1 << 3;
10367   basic_block tbegin_bb = NULL;
10368   basic_block tend_bb = NULL;
10369   basic_block bb;
10370   rtx_insn *insn;
10371   bool result = true;
10372   int bb_index;
10373   rtx_insn *tbegin_insn = NULL;
10374 
10375   if (!cfun->machine->tbegin_p)
10376     return;
10377 
10378   for (bb_index = 0; bb_index < n_basic_blocks_for_fn (cfun); bb_index++)
10379     {
10380       bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
10381 
10382       if (!bb)
10383 	continue;
10384 
10385       FOR_BB_INSNS (bb, insn)
10386 	{
10387 	  rtx ite, cc, pat, target;
10388 	  unsigned HOST_WIDE_INT mask;
10389 
10390 	  if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
10391 	    continue;
10392 
10393 	  pat = PATTERN (insn);
10394 
10395 	  if (GET_CODE (pat) == PARALLEL)
10396 	    pat = XVECEXP (pat, 0, 0);
10397 
10398 	  if (GET_CODE (pat) != SET
10399 	      || GET_CODE (SET_SRC (pat)) != UNSPEC_VOLATILE)
10400 	    continue;
10401 
10402 	  if (XINT (SET_SRC (pat), 1) == UNSPECV_TBEGIN)
10403 	    {
10404 	      rtx_insn *tmp;
10405 
10406 	      tbegin_insn = insn;
10407 
10408 	      /* Just return if the tbegin doesn't have clobbers.  */
10409 	      if (GET_CODE (PATTERN (insn)) != PARALLEL)
10410 		return;
10411 
10412 	      if (tbegin_bb != NULL)
10413 		return;
10414 
10415 	      /* Find the next conditional jump.  */
10416 	      for (tmp = NEXT_INSN (insn);
10417 		   tmp != NULL_RTX;
10418 		   tmp = NEXT_INSN (tmp))
10419 		{
10420 		  if (reg_set_p (gen_rtx_REG (CCmode, CC_REGNUM), tmp))
10421 		    return;
10422 		  if (!JUMP_P (tmp))
10423 		    continue;
10424 
10425 		  ite = SET_SRC (PATTERN (tmp));
10426 		  if (GET_CODE (ite) != IF_THEN_ELSE)
10427 		    continue;
10428 
10429 		  cc = XEXP (XEXP (ite, 0), 0);
10430 		  if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc))
10431 		      || GET_MODE (cc) != CCRAWmode
10432 		      || GET_CODE (XEXP (XEXP (ite, 0), 1)) != CONST_INT)
10433 		    return;
10434 
10435 		  if (bb->succs->length () != 2)
10436 		    return;
10437 
10438 		  mask = INTVAL (XEXP (XEXP (ite, 0), 1));
10439 		  if (GET_CODE (XEXP (ite, 0)) == NE)
10440 		    mask ^= 0xf;
10441 
10442 		  if (mask == CC0)
10443 		    target = XEXP (ite, 1);
10444 		  else if (mask == (CC0 ^ 0xf))
10445 		    target = XEXP (ite, 2);
10446 		  else
10447 		    return;
10448 
10449 		  {
10450 		    edge_iterator ei;
10451 		    edge e1, e2;
10452 
10453 		    ei = ei_start (bb->succs);
10454 		    e1 = ei_safe_edge (ei);
10455 		    ei_next (&ei);
10456 		    e2 = ei_safe_edge (ei);
10457 
10458 		    if (e2->flags & EDGE_FALLTHRU)
10459 		      {
10460 			e2 = e1;
10461 			e1 = ei_safe_edge (ei);
10462 		      }
10463 
10464 		    if (!(e1->flags & EDGE_FALLTHRU))
10465 		      return;
10466 
10467 		    tbegin_bb = (target == pc_rtx) ? e1->dest : e2->dest;
10468 		  }
10469 		  if (tmp == BB_END (bb))
10470 		    break;
10471 		}
10472 	    }
10473 
10474 	  if (XINT (SET_SRC (pat), 1) == UNSPECV_TEND)
10475 	    {
10476 	      if (tend_bb != NULL)
10477 		return;
10478 	      tend_bb = bb;
10479 	    }
10480 	}
10481     }
10482 
10483   /* Either we successfully remove the FPR clobbers here or we are not
10484      able to do anything for this TX.  Both cases don't qualify for
10485      another look.  */
10486   cfun->machine->tbegin_p = false;
10487 
10488   if (tbegin_bb == NULL || tend_bb == NULL)
10489     return;
10490 
10491   calculate_dominance_info (CDI_POST_DOMINATORS);
10492   result = dominated_by_p (CDI_POST_DOMINATORS, tbegin_bb, tend_bb);
10493   free_dominance_info (CDI_POST_DOMINATORS);
10494 
10495   if (!result)
10496     return;
10497 
10498   PATTERN (tbegin_insn) = gen_rtx_PARALLEL (VOIDmode,
10499 			    gen_rtvec (2,
10500 				       XVECEXP (PATTERN (tbegin_insn), 0, 0),
10501 				       XVECEXP (PATTERN (tbegin_insn), 0, 1)));
10502   INSN_CODE (tbegin_insn) = -1;
10503   df_insn_rescan (tbegin_insn);
10504 
10505   return;
10506 }
10507 
10508 /* Implement TARGET_HARD_REGNO_NREGS.  Because all registers in a class
10509    have the same size, this is equivalent to CLASS_MAX_NREGS.  */
10510 
10511 static unsigned int
s390_hard_regno_nregs(unsigned int regno,machine_mode mode)10512 s390_hard_regno_nregs (unsigned int regno, machine_mode mode)
10513 {
10514   return s390_class_max_nregs (REGNO_REG_CLASS (regno), mode);
10515 }
10516 
10517 /* Implement TARGET_HARD_REGNO_MODE_OK.
10518 
10519    Integer modes <= word size fit into any GPR.
10520    Integer modes > word size fit into successive GPRs, starting with
10521    an even-numbered register.
10522    SImode and DImode fit into FPRs as well.
10523 
10524    Floating point modes <= word size fit into any FPR or GPR.
10525    Floating point modes > word size (i.e. DFmode on 32-bit) fit
10526    into any FPR, or an even-odd GPR pair.
10527    TFmode fits only into an even-odd FPR pair.
10528 
10529    Complex floating point modes fit either into two FPRs, or into
10530    successive GPRs (again starting with an even number).
10531    TCmode fits only into two successive even-odd FPR pairs.
10532 
10533    Condition code modes fit only into the CC register.  */
10534 
10535 static bool
s390_hard_regno_mode_ok(unsigned int regno,machine_mode mode)10536 s390_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
10537 {
10538   if (!TARGET_VX && VECTOR_NOFP_REGNO_P (regno))
10539     return false;
10540 
10541   switch (REGNO_REG_CLASS (regno))
10542     {
10543     case VEC_REGS:
10544       return ((GET_MODE_CLASS (mode) == MODE_INT
10545 	       && s390_class_max_nregs (VEC_REGS, mode) == 1)
10546 	      || mode == DFmode
10547 	      || (TARGET_VXE && mode == SFmode)
10548 	      || s390_vector_mode_supported_p (mode));
10549       break;
10550     case FP_REGS:
10551       if (TARGET_VX
10552 	  && ((GET_MODE_CLASS (mode) == MODE_INT
10553 	       && s390_class_max_nregs (FP_REGS, mode) == 1)
10554 	      || mode == DFmode
10555 	      || s390_vector_mode_supported_p (mode)))
10556 	return true;
10557 
10558       if (REGNO_PAIR_OK (regno, mode))
10559 	{
10560 	  if (mode == SImode || mode == DImode)
10561 	    return true;
10562 
10563 	  if (FLOAT_MODE_P (mode) && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
10564 	    return true;
10565 	}
10566       break;
10567     case ADDR_REGS:
10568       if (FRAME_REGNO_P (regno) && mode == Pmode)
10569 	return true;
10570 
10571       /* fallthrough */
10572     case GENERAL_REGS:
10573       if (REGNO_PAIR_OK (regno, mode))
10574 	{
10575 	  if (TARGET_ZARCH
10576 	      || (mode != TFmode && mode != TCmode && mode != TDmode))
10577 	    return true;
10578 	}
10579       break;
10580     case CC_REGS:
10581       if (GET_MODE_CLASS (mode) == MODE_CC)
10582 	return true;
10583       break;
10584     case ACCESS_REGS:
10585       if (REGNO_PAIR_OK (regno, mode))
10586 	{
10587 	  if (mode == SImode || mode == Pmode)
10588 	    return true;
10589 	}
10590       break;
10591     default:
10592       return false;
10593     }
10594 
10595   return false;
10596 }
10597 
10598 /* Implement TARGET_MODES_TIEABLE_P.  */
10599 
10600 static bool
s390_modes_tieable_p(machine_mode mode1,machine_mode mode2)10601 s390_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10602 {
10603   return ((mode1 == SFmode || mode1 == DFmode)
10604 	  == (mode2 == SFmode || mode2 == DFmode));
10605 }
10606 
10607 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG.  */
10608 
10609 bool
s390_hard_regno_rename_ok(unsigned int old_reg,unsigned int new_reg)10610 s390_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg)
10611 {
10612    /* Once we've decided upon a register to use as base register, it must
10613       no longer be used for any other purpose.  */
10614   if (cfun->machine->base_reg)
10615     if (REGNO (cfun->machine->base_reg) == old_reg
10616 	|| REGNO (cfun->machine->base_reg) == new_reg)
10617       return false;
10618 
10619   /* Prevent regrename from using call-saved regs which haven't
10620      actually been saved.  This is necessary since regrename assumes
10621      the backend save/restore decisions are based on
10622      df_regs_ever_live.  Since we have our own routine we have to tell
10623      regrename manually about it.  */
10624   if (GENERAL_REGNO_P (new_reg)
10625       && !call_really_used_regs[new_reg]
10626       && cfun_gpr_save_slot (new_reg) == SAVE_SLOT_NONE)
10627     return false;
10628 
10629   return true;
10630 }
10631 
10632 /* Return nonzero if register REGNO can be used as a scratch register
10633    in peephole2.  */
10634 
10635 static bool
s390_hard_regno_scratch_ok(unsigned int regno)10636 s390_hard_regno_scratch_ok (unsigned int regno)
10637 {
10638   /* See s390_hard_regno_rename_ok.  */
10639   if (GENERAL_REGNO_P (regno)
10640       && !call_really_used_regs[regno]
10641       && cfun_gpr_save_slot (regno) == SAVE_SLOT_NONE)
10642     return false;
10643 
10644   return true;
10645 }
10646 
10647 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED.  When generating
10648    code that runs in z/Architecture mode, but conforms to the 31-bit
10649    ABI, GPRs can hold 8 bytes; the ABI guarantees only that the lower 4
10650    bytes are saved across calls, however.  */
10651 
10652 static bool
s390_hard_regno_call_part_clobbered(unsigned int regno,machine_mode mode)10653 s390_hard_regno_call_part_clobbered (unsigned int regno, machine_mode mode)
10654 {
10655   if (!TARGET_64BIT
10656       && TARGET_ZARCH
10657       && GET_MODE_SIZE (mode) > 4
10658       && ((regno >= 6 && regno <= 15) || regno == 32))
10659     return true;
10660 
10661   if (TARGET_VX
10662       && GET_MODE_SIZE (mode) > 8
10663       && (((TARGET_64BIT && regno >= 24 && regno <= 31))
10664 	  || (!TARGET_64BIT && (regno == 18 || regno == 19))))
10665     return true;
10666 
10667   return false;
10668 }
10669 
10670 /* Maximum number of registers to represent a value of mode MODE
10671    in a register of class RCLASS.  */
10672 
10673 int
s390_class_max_nregs(enum reg_class rclass,machine_mode mode)10674 s390_class_max_nregs (enum reg_class rclass, machine_mode mode)
10675 {
10676   int reg_size;
10677   bool reg_pair_required_p = false;
10678 
10679   switch (rclass)
10680     {
10681     case FP_REGS:
10682     case VEC_REGS:
10683       reg_size = TARGET_VX ? 16 : 8;
10684 
10685       /* TF and TD modes would fit into a VR but we put them into a
10686 	 register pair since we do not have 128bit FP instructions on
10687 	 full VRs.  */
10688       if (TARGET_VX
10689 	  && SCALAR_FLOAT_MODE_P (mode)
10690 	  && GET_MODE_SIZE (mode) >= 16)
10691 	reg_pair_required_p = true;
10692 
10693       /* Even if complex types would fit into a single FPR/VR we force
10694 	 them into a register pair to deal with the parts more easily.
10695 	 (FIXME: What about complex ints?)  */
10696       if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
10697 	reg_pair_required_p = true;
10698       break;
10699     case ACCESS_REGS:
10700       reg_size = 4;
10701       break;
10702     default:
10703       reg_size = UNITS_PER_WORD;
10704       break;
10705     }
10706 
10707   if (reg_pair_required_p)
10708     return 2 * ((GET_MODE_SIZE (mode) / 2 + reg_size - 1) / reg_size);
10709 
10710   return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
10711 }
10712 
10713 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
10714 
10715 static bool
s390_can_change_mode_class(machine_mode from_mode,machine_mode to_mode,reg_class_t rclass)10716 s390_can_change_mode_class (machine_mode from_mode,
10717 			    machine_mode to_mode,
10718 			    reg_class_t rclass)
10719 {
10720   machine_mode small_mode;
10721   machine_mode big_mode;
10722 
10723   /* V1TF and TF have different representations in vector
10724      registers.  */
10725   if (reg_classes_intersect_p (VEC_REGS, rclass)
10726       && ((from_mode == V1TFmode && to_mode == TFmode)
10727 	  || (from_mode == TFmode && to_mode == V1TFmode)))
10728     return false;
10729 
10730   if (GET_MODE_SIZE (from_mode) == GET_MODE_SIZE (to_mode))
10731     return true;
10732 
10733   if (GET_MODE_SIZE (from_mode) < GET_MODE_SIZE (to_mode))
10734     {
10735       small_mode = from_mode;
10736       big_mode = to_mode;
10737     }
10738   else
10739     {
10740       small_mode = to_mode;
10741       big_mode = from_mode;
10742     }
10743 
10744   /* Values residing in VRs are little-endian style.  All modes are
10745      placed left-aligned in an VR.  This means that we cannot allow
10746      switching between modes with differing sizes.  Also if the vector
10747      facility is available we still place TFmode values in VR register
10748      pairs, since the only instructions we have operating on TFmodes
10749      only deal with register pairs.  Therefore we have to allow DFmode
10750      subregs of TFmodes to enable the TFmode splitters.  */
10751   if (reg_classes_intersect_p (VEC_REGS, rclass)
10752       && (GET_MODE_SIZE (small_mode) < 8
10753 	  || s390_class_max_nregs (VEC_REGS, big_mode) == 1))
10754     return false;
10755 
10756   /* Likewise for access registers, since they have only half the
10757      word size on 64-bit.  */
10758   if (reg_classes_intersect_p (ACCESS_REGS, rclass))
10759     return false;
10760 
10761   return true;
10762 }
10763 
10764 /* Return true if we use LRA instead of reload pass.  */
10765 static bool
s390_lra_p(void)10766 s390_lra_p (void)
10767 {
10768   return s390_lra_flag;
10769 }
10770 
10771 /* Return true if register FROM can be eliminated via register TO.  */
10772 
10773 static bool
s390_can_eliminate(const int from,const int to)10774 s390_can_eliminate (const int from, const int to)
10775 {
10776   /* On zSeries machines, we have not marked the base register as fixed.
10777      Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
10778      If a function requires the base register, we say here that this
10779      elimination cannot be performed.  This will cause reload to free
10780      up the base register (as if it were fixed).  On the other hand,
10781      if the current function does *not* require the base register, we
10782      say here the elimination succeeds, which in turn allows reload
10783      to allocate the base register for any other purpose.  */
10784   if (from == BASE_REGNUM && to == BASE_REGNUM)
10785     {
10786       if (TARGET_CPU_ZARCH)
10787 	{
10788 	  s390_init_frame_layout ();
10789 	  return cfun->machine->base_reg == NULL_RTX;
10790 	}
10791 
10792       return false;
10793     }
10794 
10795   /* Everything else must point into the stack frame.  */
10796   gcc_assert (to == STACK_POINTER_REGNUM
10797 	      || to == HARD_FRAME_POINTER_REGNUM);
10798 
10799   gcc_assert (from == FRAME_POINTER_REGNUM
10800 	      || from == ARG_POINTER_REGNUM
10801 	      || from == RETURN_ADDRESS_POINTER_REGNUM);
10802 
10803   /* Make sure we actually saved the return address.  */
10804   if (from == RETURN_ADDRESS_POINTER_REGNUM)
10805     if (!crtl->calls_eh_return
10806 	&& !cfun->stdarg
10807 	&& !cfun_frame_layout.save_return_addr_p)
10808       return false;
10809 
10810   return true;
10811 }
10812 
10813 /* Return offset between register FROM and TO initially after prolog.  */
10814 
10815 HOST_WIDE_INT
s390_initial_elimination_offset(int from,int to)10816 s390_initial_elimination_offset (int from, int to)
10817 {
10818   HOST_WIDE_INT offset;
10819 
10820   /* ??? Why are we called for non-eliminable pairs?  */
10821   if (!s390_can_eliminate (from, to))
10822     return 0;
10823 
10824   switch (from)
10825     {
10826     case FRAME_POINTER_REGNUM:
10827       offset = (get_frame_size()
10828 		+ STACK_POINTER_OFFSET
10829 		+ crtl->outgoing_args_size);
10830       break;
10831 
10832     case ARG_POINTER_REGNUM:
10833       s390_init_frame_layout ();
10834       offset = cfun_frame_layout.frame_size + STACK_POINTER_OFFSET;
10835       break;
10836 
10837     case RETURN_ADDRESS_POINTER_REGNUM:
10838       s390_init_frame_layout ();
10839 
10840       if (cfun_frame_layout.first_save_gpr_slot == -1)
10841 	{
10842 	  /* If it turns out that for stdarg nothing went into the reg
10843 	     save area we also do not need the return address
10844 	     pointer.  */
10845 	  if (cfun->stdarg && !cfun_save_arg_fprs_p)
10846 	    return 0;
10847 
10848 	  gcc_unreachable ();
10849 	}
10850 
10851       /* In order to make the following work it is not necessary for
10852 	 r14 to have a save slot.  It is sufficient if one other GPR
10853 	 got one.  Since the GPRs are always stored without gaps we
10854 	 are able to calculate where the r14 save slot would
10855 	 reside.  */
10856       offset = (cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset +
10857 		(RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot) *
10858 		UNITS_PER_LONG);
10859       break;
10860 
10861     case BASE_REGNUM:
10862       offset = 0;
10863       break;
10864 
10865     default:
10866       gcc_unreachable ();
10867     }
10868 
10869   return offset;
10870 }
10871 
10872 /* Emit insn to save fpr REGNUM at offset OFFSET relative
10873    to register BASE.  Return generated insn.  */
10874 
10875 static rtx
save_fpr(rtx base,int offset,int regnum)10876 save_fpr (rtx base, int offset, int regnum)
10877 {
10878   rtx addr;
10879   addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10880 
10881   if (regnum >= 16 && regnum <= (16 + FP_ARG_NUM_REG))
10882     set_mem_alias_set (addr, get_varargs_alias_set ());
10883   else
10884     set_mem_alias_set (addr, get_frame_alias_set ());
10885 
10886   return emit_move_insn (addr, gen_rtx_REG (DFmode, regnum));
10887 }
10888 
10889 /* Emit insn to restore fpr REGNUM from offset OFFSET relative
10890    to register BASE.  Return generated insn.  */
10891 
10892 static rtx
restore_fpr(rtx base,int offset,int regnum)10893 restore_fpr (rtx base, int offset, int regnum)
10894 {
10895   rtx addr;
10896   addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10897   set_mem_alias_set (addr, get_frame_alias_set ());
10898 
10899   return emit_move_insn (gen_rtx_REG (DFmode, regnum), addr);
10900 }
10901 
10902 /* Generate insn to save registers FIRST to LAST into
10903    the register save area located at offset OFFSET
10904    relative to register BASE.  */
10905 
10906 static rtx
save_gprs(rtx base,int offset,int first,int last)10907 save_gprs (rtx base, int offset, int first, int last)
10908 {
10909   rtx addr, insn, note;
10910   int i;
10911 
10912   addr = plus_constant (Pmode, base, offset);
10913   addr = gen_rtx_MEM (Pmode, addr);
10914 
10915   set_mem_alias_set (addr, get_frame_alias_set ());
10916 
10917   /* Special-case single register.  */
10918   if (first == last)
10919     {
10920       if (TARGET_64BIT)
10921         insn = gen_movdi (addr, gen_rtx_REG (Pmode, first));
10922       else
10923         insn = gen_movsi (addr, gen_rtx_REG (Pmode, first));
10924 
10925       if (!global_not_special_regno_p (first))
10926 	RTX_FRAME_RELATED_P (insn) = 1;
10927       return insn;
10928     }
10929 
10930 
10931   insn = gen_store_multiple (addr,
10932 			     gen_rtx_REG (Pmode, first),
10933 			     GEN_INT (last - first + 1));
10934 
10935   if (first <= 6 && cfun->stdarg)
10936     for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
10937       {
10938 	rtx mem = XEXP (XVECEXP (PATTERN (insn), 0, i), 0);
10939 
10940 	if (first + i <= 6)
10941 	  set_mem_alias_set (mem, get_varargs_alias_set ());
10942       }
10943 
10944   /* We need to set the FRAME_RELATED flag on all SETs
10945      inside the store-multiple pattern.
10946 
10947      However, we must not emit DWARF records for registers 2..5
10948      if they are stored for use by variable arguments ...
10949 
10950      ??? Unfortunately, it is not enough to simply not the
10951      FRAME_RELATED flags for those SETs, because the first SET
10952      of the PARALLEL is always treated as if it had the flag
10953      set, even if it does not.  Therefore we emit a new pattern
10954      without those registers as REG_FRAME_RELATED_EXPR note.  */
10955 
10956   if (first >= 6 && !global_not_special_regno_p (first))
10957     {
10958       rtx pat = PATTERN (insn);
10959 
10960       for (i = 0; i < XVECLEN (pat, 0); i++)
10961 	if (GET_CODE (XVECEXP (pat, 0, i)) == SET
10962 	    && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat,
10963 								     0, i)))))
10964 	  RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1;
10965 
10966       RTX_FRAME_RELATED_P (insn) = 1;
10967     }
10968   else if (last >= 6)
10969     {
10970       int start;
10971 
10972       for (start = first >= 6 ? first : 6; start <= last; start++)
10973 	if (!global_not_special_regno_p (start))
10974 	  break;
10975 
10976       if (start > last)
10977 	return insn;
10978 
10979       addr = plus_constant (Pmode, base,
10980 			    offset + (start - first) * UNITS_PER_LONG);
10981 
10982       if (start == last)
10983 	{
10984 	  if (TARGET_64BIT)
10985 	    note = gen_movdi (gen_rtx_MEM (Pmode, addr),
10986 			      gen_rtx_REG (Pmode, start));
10987 	  else
10988 	    note = gen_movsi (gen_rtx_MEM (Pmode, addr),
10989 			      gen_rtx_REG (Pmode, start));
10990 	  note = PATTERN (note);
10991 
10992 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10993 	  RTX_FRAME_RELATED_P (insn) = 1;
10994 
10995 	  return insn;
10996 	}
10997 
10998       note = gen_store_multiple (gen_rtx_MEM (Pmode, addr),
10999 				 gen_rtx_REG (Pmode, start),
11000 				 GEN_INT (last - start + 1));
11001       note = PATTERN (note);
11002 
11003       add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
11004 
11005       for (i = 0; i < XVECLEN (note, 0); i++)
11006 	if (GET_CODE (XVECEXP (note, 0, i)) == SET
11007 	    && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note,
11008 								     0, i)))))
11009 	  RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1;
11010 
11011       RTX_FRAME_RELATED_P (insn) = 1;
11012     }
11013 
11014   return insn;
11015 }
11016 
11017 /* Generate insn to restore registers FIRST to LAST from
11018    the register save area located at offset OFFSET
11019    relative to register BASE.  */
11020 
11021 static rtx
restore_gprs(rtx base,int offset,int first,int last)11022 restore_gprs (rtx base, int offset, int first, int last)
11023 {
11024   rtx addr, insn;
11025 
11026   addr = plus_constant (Pmode, base, offset);
11027   addr = gen_rtx_MEM (Pmode, addr);
11028   set_mem_alias_set (addr, get_frame_alias_set ());
11029 
11030   /* Special-case single register.  */
11031   if (first == last)
11032     {
11033       if (TARGET_64BIT)
11034         insn = gen_movdi (gen_rtx_REG (Pmode, first), addr);
11035       else
11036         insn = gen_movsi (gen_rtx_REG (Pmode, first), addr);
11037 
11038       RTX_FRAME_RELATED_P (insn) = 1;
11039       return insn;
11040     }
11041 
11042   insn = gen_load_multiple (gen_rtx_REG (Pmode, first),
11043 			    addr,
11044 			    GEN_INT (last - first + 1));
11045   RTX_FRAME_RELATED_P (insn) = 1;
11046   return insn;
11047 }
11048 
11049 /* Return insn sequence to load the GOT register.  */
11050 
11051 rtx_insn *
s390_load_got(void)11052 s390_load_got (void)
11053 {
11054   rtx_insn *insns;
11055 
11056   /* We cannot use pic_offset_table_rtx here since we use this
11057      function also for non-pic if __tls_get_offset is called and in
11058      that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
11059      aren't usable.  */
11060   rtx got_rtx = gen_rtx_REG (Pmode, 12);
11061 
11062   start_sequence ();
11063 
11064   if (TARGET_CPU_ZARCH)
11065     {
11066       emit_move_insn (got_rtx, s390_got_symbol ());
11067     }
11068   else
11069     {
11070       rtx offset;
11071 
11072       offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, s390_got_symbol ()),
11073 			       UNSPEC_LTREL_OFFSET);
11074       offset = gen_rtx_CONST (Pmode, offset);
11075       offset = force_const_mem (Pmode, offset);
11076 
11077       emit_move_insn (got_rtx, offset);
11078 
11079       offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (offset, 0)),
11080 			       UNSPEC_LTREL_BASE);
11081       offset = gen_rtx_PLUS (Pmode, got_rtx, offset);
11082 
11083       emit_move_insn (got_rtx, offset);
11084     }
11085 
11086   insns = get_insns ();
11087   end_sequence ();
11088   return insns;
11089 }
11090 
11091 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
11092    and the change to the stack pointer.  */
11093 
11094 static void
s390_emit_stack_tie(void)11095 s390_emit_stack_tie (void)
11096 {
11097   rtx mem = gen_frame_mem (BLKmode,
11098 			   gen_rtx_REG (Pmode, STACK_POINTER_REGNUM));
11099 
11100   emit_insn (gen_stack_tie (mem));
11101 }
11102 
11103 /* Copy GPRS into FPR save slots.  */
11104 
11105 static void
s390_save_gprs_to_fprs(void)11106 s390_save_gprs_to_fprs (void)
11107 {
11108   int i;
11109 
11110   if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
11111     return;
11112 
11113   for (i = 6; i < 16; i++)
11114     {
11115       if (FP_REGNO_P (cfun_gpr_save_slot (i)))
11116 	{
11117 	  rtx_insn *insn =
11118 	    emit_move_insn (gen_rtx_REG (DImode, cfun_gpr_save_slot (i)),
11119 			    gen_rtx_REG (DImode, i));
11120 	  RTX_FRAME_RELATED_P (insn) = 1;
11121 	  /* This prevents dwarf2cfi from interpreting the set.  Doing
11122 	     so it might emit def_cfa_register infos setting an FPR as
11123 	     new CFA.  */
11124 	  add_reg_note (insn, REG_CFA_REGISTER, copy_rtx (PATTERN (insn)));
11125 	}
11126     }
11127 }
11128 
11129 /* Restore GPRs from FPR save slots.  */
11130 
11131 static void
s390_restore_gprs_from_fprs(void)11132 s390_restore_gprs_from_fprs (void)
11133 {
11134   int i;
11135 
11136   if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
11137     return;
11138 
11139   for (i = 6; i < 16; i++)
11140     {
11141       rtx_insn *insn;
11142 
11143       if (!FP_REGNO_P (cfun_gpr_save_slot (i)))
11144 	continue;
11145 
11146       rtx fpr = gen_rtx_REG (DImode, cfun_gpr_save_slot (i));
11147 
11148       if (i == STACK_POINTER_REGNUM)
11149 	insn = emit_insn (gen_stack_restore_from_fpr (fpr));
11150       else
11151 	insn = emit_move_insn (gen_rtx_REG (DImode, i), fpr);
11152 
11153       df_set_regs_ever_live (i, true);
11154       add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, i));
11155       if (i == STACK_POINTER_REGNUM)
11156 	add_reg_note (insn, REG_CFA_DEF_CFA,
11157 		      plus_constant (Pmode, stack_pointer_rtx,
11158 				     STACK_POINTER_OFFSET));
11159       RTX_FRAME_RELATED_P (insn) = 1;
11160     }
11161 }
11162 
11163 
11164 /* A pass run immediately before shrink-wrapping and prologue and epilogue
11165    generation.  */
11166 
11167 namespace {
11168 
11169 const pass_data pass_data_s390_early_mach =
11170 {
11171   RTL_PASS, /* type */
11172   "early_mach", /* name */
11173   OPTGROUP_NONE, /* optinfo_flags */
11174   TV_MACH_DEP, /* tv_id */
11175   0, /* properties_required */
11176   0, /* properties_provided */
11177   0, /* properties_destroyed */
11178   0, /* todo_flags_start */
11179   ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */
11180 };
11181 
11182 class pass_s390_early_mach : public rtl_opt_pass
11183 {
11184 public:
pass_s390_early_mach(gcc::context * ctxt)11185   pass_s390_early_mach (gcc::context *ctxt)
11186     : rtl_opt_pass (pass_data_s390_early_mach, ctxt)
11187   {}
11188 
11189   /* opt_pass methods: */
11190   virtual unsigned int execute (function *);
11191 
11192 }; // class pass_s390_early_mach
11193 
11194 unsigned int
execute(function * fun)11195 pass_s390_early_mach::execute (function *fun)
11196 {
11197   rtx_insn *insn;
11198 
11199   /* Try to get rid of the FPR clobbers.  */
11200   s390_optimize_nonescaping_tx ();
11201 
11202   /* Re-compute register info.  */
11203   s390_register_info ();
11204 
11205   /* If we're using a base register, ensure that it is always valid for
11206      the first non-prologue instruction.  */
11207   if (fun->machine->base_reg)
11208     emit_insn_at_entry (gen_main_pool (fun->machine->base_reg));
11209 
11210   /* Annotate all constant pool references to let the scheduler know
11211      they implicitly use the base register.  */
11212   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
11213     if (INSN_P (insn))
11214       {
11215 	annotate_constant_pool_refs (&PATTERN (insn));
11216 	df_insn_rescan (insn);
11217       }
11218   return 0;
11219 }
11220 
11221 } // anon namespace
11222 
11223 /* Calculate TARGET = REG + OFFSET as s390_emit_prologue would do it.
11224    - push too big immediates to the literal pool and annotate the refs
11225    - emit frame related notes for stack pointer changes.  */
11226 
11227 static rtx
s390_prologue_plus_offset(rtx target,rtx reg,rtx offset,bool frame_related_p)11228 s390_prologue_plus_offset (rtx target, rtx reg, rtx offset, bool frame_related_p)
11229 {
11230   rtx insn;
11231   rtx orig_offset = offset;
11232 
11233   gcc_assert (REG_P (target));
11234   gcc_assert (REG_P (reg));
11235   gcc_assert (CONST_INT_P (offset));
11236 
11237   if (offset == const0_rtx)                               /* lr/lgr */
11238     {
11239       insn = emit_move_insn (target, reg);
11240     }
11241   else if (DISP_IN_RANGE (INTVAL (offset)))               /* la */
11242     {
11243       insn = emit_move_insn (target, gen_rtx_PLUS (Pmode, reg,
11244 						   offset));
11245     }
11246   else
11247     {
11248       if (!satisfies_constraint_K (offset)                /* ahi/aghi */
11249 	  && (!TARGET_EXTIMM
11250 	      || (!satisfies_constraint_Op (offset)       /* alfi/algfi */
11251 		  && !satisfies_constraint_On (offset)))) /* slfi/slgfi */
11252 	offset = force_const_mem (Pmode, offset);
11253 
11254       if (target != reg)
11255 	{
11256 	  insn = emit_move_insn (target, reg);
11257 	  RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0;
11258 	}
11259 
11260       insn = emit_insn (gen_add2_insn (target, offset));
11261 
11262       if (!CONST_INT_P (offset))
11263 	{
11264 	  annotate_constant_pool_refs (&PATTERN (insn));
11265 
11266 	  if (frame_related_p)
11267 	    add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11268 			  gen_rtx_SET (target,
11269 				       gen_rtx_PLUS (Pmode, target,
11270 						     orig_offset)));
11271 	}
11272     }
11273 
11274   RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0;
11275 
11276   /* If this is a stack adjustment and we are generating a stack clash
11277      prologue, then add a REG_STACK_CHECK note to signal that this insn
11278      should be left alone.  */
11279   if (flag_stack_clash_protection && target == stack_pointer_rtx)
11280     add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
11281 
11282   return insn;
11283 }
11284 
11285 /* Emit a compare instruction with a volatile memory access as stack
11286    probe.  It does not waste store tags and does not clobber any
11287    registers apart from the condition code.  */
11288 static void
s390_emit_stack_probe(rtx addr)11289 s390_emit_stack_probe (rtx addr)
11290 {
11291   rtx tmp = gen_rtx_MEM (Pmode, addr);
11292   MEM_VOLATILE_P (tmp) = 1;
11293   s390_emit_compare (EQ, gen_rtx_REG (Pmode, 0), tmp);
11294   emit_insn (gen_blockage ());
11295 }
11296 
11297 /* Use a runtime loop if we have to emit more probes than this.  */
11298 #define MIN_UNROLL_PROBES 3
11299 
11300 /* Allocate SIZE bytes of stack space, using TEMP_REG as a temporary
11301    if necessary.  LAST_PROBE_OFFSET contains the offset of the closest
11302    probe relative to the stack pointer.
11303 
11304    Note that SIZE is negative.
11305 
11306    The return value is true if TEMP_REG has been clobbered.  */
11307 static bool
allocate_stack_space(rtx size,HOST_WIDE_INT last_probe_offset,rtx temp_reg)11308 allocate_stack_space (rtx size, HOST_WIDE_INT last_probe_offset,
11309 		      rtx temp_reg)
11310 {
11311   bool temp_reg_clobbered_p = false;
11312   HOST_WIDE_INT probe_interval
11313     = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL);
11314   HOST_WIDE_INT guard_size
11315     = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE);
11316 
11317   if (flag_stack_clash_protection)
11318     {
11319       if (last_probe_offset + -INTVAL (size) < guard_size)
11320 	dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
11321       else
11322 	{
11323 	  rtx offset = GEN_INT (probe_interval - UNITS_PER_LONG);
11324 	  HOST_WIDE_INT rounded_size = -INTVAL (size) & -probe_interval;
11325 	  HOST_WIDE_INT num_probes = rounded_size / probe_interval;
11326 	  HOST_WIDE_INT residual = -INTVAL (size) - rounded_size;
11327 
11328 	  if (num_probes < MIN_UNROLL_PROBES)
11329 	    {
11330 	      /* Emit unrolled probe statements.  */
11331 
11332 	      for (unsigned int i = 0; i < num_probes; i++)
11333 		{
11334 		  s390_prologue_plus_offset (stack_pointer_rtx,
11335 					     stack_pointer_rtx,
11336 					     GEN_INT (-probe_interval), true);
11337 		  s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
11338 						       stack_pointer_rtx,
11339 						       offset));
11340 		}
11341 	      dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
11342 	    }
11343 	  else
11344 	    {
11345 	      /* Emit a loop probing the pages.  */
11346 
11347 	      rtx_code_label *loop_start_label = gen_label_rtx ();
11348 
11349 	      /* From now on temp_reg will be the CFA register.  */
11350 	      s390_prologue_plus_offset (temp_reg, stack_pointer_rtx,
11351 					 GEN_INT (-rounded_size), true);
11352 	      emit_label (loop_start_label);
11353 
11354 	      s390_prologue_plus_offset (stack_pointer_rtx,
11355 					 stack_pointer_rtx,
11356 					 GEN_INT (-probe_interval), false);
11357 	      s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
11358 						   stack_pointer_rtx,
11359 						   offset));
11360 	      emit_cmp_and_jump_insns (stack_pointer_rtx, temp_reg,
11361 				       GT, NULL_RTX,
11362 				       Pmode, 1, loop_start_label);
11363 
11364 	      /* Without this make_edges ICEes.  */
11365 	      JUMP_LABEL (get_last_insn ()) = loop_start_label;
11366 	      LABEL_NUSES (loop_start_label) = 1;
11367 
11368 	      /* That's going to be a NOP since stack pointer and
11369 		 temp_reg are supposed to be the same here.  We just
11370 		 emit it to set the CFA reg back to r15.  */
11371 	      s390_prologue_plus_offset (stack_pointer_rtx, temp_reg,
11372 					 const0_rtx, true);
11373 	      temp_reg_clobbered_p = true;
11374 	      dump_stack_clash_frame_info (PROBE_LOOP, residual != 0);
11375 	    }
11376 
11377 	  /* Handle any residual allocation request.  */
11378 	  s390_prologue_plus_offset (stack_pointer_rtx,
11379 				     stack_pointer_rtx,
11380 				     GEN_INT (-residual), true);
11381 	  last_probe_offset += residual;
11382 	  if (last_probe_offset >= probe_interval)
11383 	    s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
11384 						 stack_pointer_rtx,
11385 						 GEN_INT (residual
11386 							  - UNITS_PER_LONG)));
11387 
11388 	  return temp_reg_clobbered_p;
11389 	}
11390     }
11391 
11392   /* Subtract frame size from stack pointer.  */
11393   s390_prologue_plus_offset (stack_pointer_rtx,
11394 			     stack_pointer_rtx,
11395 			     size, true);
11396 
11397   return temp_reg_clobbered_p;
11398 }
11399 
11400 /* Expand the prologue into a bunch of separate insns.  */
11401 
11402 void
s390_emit_prologue(void)11403 s390_emit_prologue (void)
11404 {
11405   rtx insn, addr;
11406   rtx temp_reg;
11407   int i;
11408   int offset;
11409   int next_fpr = 0;
11410 
11411   /* Choose best register to use for temp use within prologue.
11412      TPF with profiling must avoid the register 14 - the tracing function
11413      needs the original contents of r14 to be preserved.  */
11414 
11415   if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM)
11416       && !crtl->is_leaf
11417       && !TARGET_TPF_PROFILING)
11418     temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
11419   else if (flag_split_stack && cfun->stdarg)
11420     temp_reg = gen_rtx_REG (Pmode, 12);
11421   else
11422     temp_reg = gen_rtx_REG (Pmode, 1);
11423 
11424   /* When probing for stack-clash mitigation, we have to track the distance
11425      between the stack pointer and closest known reference.
11426 
11427      Most of the time we have to make a worst case assumption.  The
11428      only exception is when TARGET_BACKCHAIN is active, in which case
11429      we know *sp (offset 0) was written.  */
11430   HOST_WIDE_INT probe_interval
11431     = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL);
11432   HOST_WIDE_INT last_probe_offset
11433     = (TARGET_BACKCHAIN
11434        ? (TARGET_PACKED_STACK ? STACK_POINTER_OFFSET - UNITS_PER_LONG : 0)
11435        : probe_interval - (STACK_BOUNDARY / UNITS_PER_WORD));
11436 
11437   s390_save_gprs_to_fprs ();
11438 
11439   /* Save call saved gprs.  */
11440   if (cfun_frame_layout.first_save_gpr != -1)
11441     {
11442       insn = save_gprs (stack_pointer_rtx,
11443 			cfun_frame_layout.gprs_offset +
11444 			UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
11445 					  - cfun_frame_layout.first_save_gpr_slot),
11446 			cfun_frame_layout.first_save_gpr,
11447 			cfun_frame_layout.last_save_gpr);
11448 
11449       /* This is not 100% correct.  If we have more than one register saved,
11450 	 then LAST_PROBE_OFFSET can move even closer to sp.  */
11451       last_probe_offset
11452 	= (cfun_frame_layout.gprs_offset +
11453 	   UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
11454 			     - cfun_frame_layout.first_save_gpr_slot));
11455 
11456       emit_insn (insn);
11457     }
11458 
11459   /* Dummy insn to mark literal pool slot.  */
11460 
11461   if (cfun->machine->base_reg)
11462     emit_insn (gen_main_pool (cfun->machine->base_reg));
11463 
11464   offset = cfun_frame_layout.f0_offset;
11465 
11466   /* Save f0 and f2.  */
11467   for (i = FPR0_REGNUM; i <= FPR0_REGNUM + 1; i++)
11468     {
11469       if (cfun_fpr_save_p (i))
11470 	{
11471 	  save_fpr (stack_pointer_rtx, offset, i);
11472 	  if (offset < last_probe_offset)
11473 	    last_probe_offset = offset;
11474 	  offset += 8;
11475 	}
11476       else if (!TARGET_PACKED_STACK || cfun->stdarg)
11477 	offset += 8;
11478     }
11479 
11480   /* Save f4 and f6.  */
11481   offset = cfun_frame_layout.f4_offset;
11482   for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
11483     {
11484       if (cfun_fpr_save_p (i))
11485 	{
11486 	  insn = save_fpr (stack_pointer_rtx, offset, i);
11487 	  if (offset < last_probe_offset)
11488 	    last_probe_offset = offset;
11489 	  offset += 8;
11490 
11491 	  /* If f4 and f6 are call clobbered they are saved due to
11492 	     stdargs and therefore are not frame related.  */
11493 	  if (!call_really_used_regs[i])
11494 	    RTX_FRAME_RELATED_P (insn) = 1;
11495 	}
11496       else if (!TARGET_PACKED_STACK || call_really_used_regs[i])
11497 	offset += 8;
11498     }
11499 
11500   if (TARGET_PACKED_STACK
11501       && cfun_save_high_fprs_p
11502       && cfun_frame_layout.f8_offset + cfun_frame_layout.high_fprs * 8 > 0)
11503     {
11504       offset = (cfun_frame_layout.f8_offset
11505 		+ (cfun_frame_layout.high_fprs - 1) * 8);
11506 
11507       for (i = FPR15_REGNUM; i >= FPR8_REGNUM && offset >= 0; i--)
11508 	if (cfun_fpr_save_p (i))
11509 	  {
11510 	    insn = save_fpr (stack_pointer_rtx, offset, i);
11511 	    if (offset < last_probe_offset)
11512 	      last_probe_offset = offset;
11513 
11514 	    RTX_FRAME_RELATED_P (insn) = 1;
11515 	    offset -= 8;
11516 	  }
11517       if (offset >= cfun_frame_layout.f8_offset)
11518 	next_fpr = i;
11519     }
11520 
11521   if (!TARGET_PACKED_STACK)
11522     next_fpr = cfun_save_high_fprs_p ? FPR15_REGNUM : 0;
11523 
11524   if (flag_stack_usage_info)
11525     current_function_static_stack_size = cfun_frame_layout.frame_size;
11526 
11527   /* Decrement stack pointer.  */
11528 
11529   if (cfun_frame_layout.frame_size > 0)
11530     {
11531       rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
11532       rtx_insn *stack_pointer_backup_loc;
11533       bool temp_reg_clobbered_p;
11534 
11535       if (s390_stack_size)
11536 	{
11537 	  HOST_WIDE_INT stack_guard;
11538 
11539 	  if (s390_stack_guard)
11540 	    stack_guard = s390_stack_guard;
11541 	  else
11542 	    {
11543 	      /* If no value for stack guard is provided the smallest power of 2
11544 		 larger than the current frame size is chosen.  */
11545 	      stack_guard = 1;
11546 	      while (stack_guard < cfun_frame_layout.frame_size)
11547 		stack_guard <<= 1;
11548 	    }
11549 
11550 	  if (cfun_frame_layout.frame_size >= s390_stack_size)
11551 	    {
11552 	      warning (0, "frame size of function %qs is %wd"
11553 		       " bytes exceeding user provided stack limit of "
11554 		       "%d bytes.  "
11555 		       "An unconditional trap is added.",
11556 		       current_function_name(), cfun_frame_layout.frame_size,
11557 		       s390_stack_size);
11558 	      emit_insn (gen_trap ());
11559 	      emit_barrier ();
11560 	    }
11561 	  else
11562 	    {
11563 	      /* stack_guard has to be smaller than s390_stack_size.
11564 		 Otherwise we would emit an AND with zero which would
11565 		 not match the test under mask pattern.  */
11566 	      if (stack_guard >= s390_stack_size)
11567 		{
11568 		  warning (0, "frame size of function %qs is %wd"
11569 			   " bytes which is more than half the stack size. "
11570 			   "The dynamic check would not be reliable. "
11571 			   "No check emitted for this function.",
11572 			   current_function_name(),
11573 			   cfun_frame_layout.frame_size);
11574 		}
11575 	      else
11576 		{
11577 		  HOST_WIDE_INT stack_check_mask = ((s390_stack_size - 1)
11578 						    & ~(stack_guard - 1));
11579 
11580 		  rtx t = gen_rtx_AND (Pmode, stack_pointer_rtx,
11581 				       GEN_INT (stack_check_mask));
11582 		  if (TARGET_64BIT)
11583 		    emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode,
11584 							 t, const0_rtx),
11585 					     t, const0_rtx, const0_rtx));
11586 		  else
11587 		    emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode,
11588 							 t, const0_rtx),
11589 					     t, const0_rtx, const0_rtx));
11590 		}
11591 	    }
11592   	}
11593 
11594       if (s390_warn_framesize > 0
11595 	  && cfun_frame_layout.frame_size >= s390_warn_framesize)
11596 	warning (0, "frame size of %qs is %wd bytes",
11597 		 current_function_name (), cfun_frame_layout.frame_size);
11598 
11599       if (s390_warn_dynamicstack_p && cfun->calls_alloca)
11600 	warning (0, "%qs uses dynamic stack allocation", current_function_name ());
11601 
11602       /* Save the location where we could backup the incoming stack
11603 	 pointer.  */
11604       stack_pointer_backup_loc = get_last_insn ();
11605 
11606       temp_reg_clobbered_p = allocate_stack_space (frame_off, last_probe_offset,
11607 						   temp_reg);
11608 
11609       if (TARGET_BACKCHAIN || next_fpr)
11610 	{
11611 	  if (temp_reg_clobbered_p)
11612 	    {
11613 	      /* allocate_stack_space had to make use of temp_reg and
11614 		 we need it to hold a backup of the incoming stack
11615 		 pointer.  Calculate back that value from the current
11616 		 stack pointer.  */
11617 	      s390_prologue_plus_offset (temp_reg, stack_pointer_rtx,
11618 					 GEN_INT (cfun_frame_layout.frame_size),
11619 					 false);
11620 	    }
11621 	  else
11622 	    {
11623 	      /* allocate_stack_space didn't actually required
11624 		 temp_reg.  Insert the stack pointer backup insn
11625 		 before the stack pointer decrement code - knowing now
11626 		 that the value will survive.  */
11627 	      emit_insn_after (gen_move_insn (temp_reg, stack_pointer_rtx),
11628 			       stack_pointer_backup_loc);
11629 	    }
11630 	}
11631 
11632       /* Set backchain.  */
11633 
11634       if (TARGET_BACKCHAIN)
11635 	{
11636 	  if (cfun_frame_layout.backchain_offset)
11637 	    addr = gen_rtx_MEM (Pmode,
11638 				plus_constant (Pmode, stack_pointer_rtx,
11639 				  cfun_frame_layout.backchain_offset));
11640 	  else
11641 	    addr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
11642 	  set_mem_alias_set (addr, get_frame_alias_set ());
11643 	  insn = emit_insn (gen_move_insn (addr, temp_reg));
11644 	}
11645 
11646       /* If we support non-call exceptions (e.g. for Java),
11647 	 we need to make sure the backchain pointer is set up
11648 	 before any possibly trapping memory access.  */
11649       if (TARGET_BACKCHAIN && cfun->can_throw_non_call_exceptions)
11650 	{
11651 	  addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
11652 	  emit_clobber (addr);
11653 	}
11654     }
11655   else if (flag_stack_clash_protection)
11656     dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
11657 
11658   /* Save fprs 8 - 15 (64 bit ABI).  */
11659 
11660   if (cfun_save_high_fprs_p && next_fpr)
11661     {
11662       /* If the stack might be accessed through a different register
11663 	 we have to make sure that the stack pointer decrement is not
11664 	 moved below the use of the stack slots.  */
11665       s390_emit_stack_tie ();
11666 
11667       insn = emit_insn (gen_add2_insn (temp_reg,
11668 				       GEN_INT (cfun_frame_layout.f8_offset)));
11669 
11670       offset = 0;
11671 
11672       for (i = FPR8_REGNUM; i <= next_fpr; i++)
11673 	if (cfun_fpr_save_p (i))
11674 	  {
11675 	    rtx addr = plus_constant (Pmode, stack_pointer_rtx,
11676 				      cfun_frame_layout.frame_size
11677 				      + cfun_frame_layout.f8_offset
11678 				      + offset);
11679 
11680 	    insn = save_fpr (temp_reg, offset, i);
11681 	    offset += 8;
11682 	    RTX_FRAME_RELATED_P (insn) = 1;
11683 	    add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11684 			  gen_rtx_SET (gen_rtx_MEM (DFmode, addr),
11685 				       gen_rtx_REG (DFmode, i)));
11686 	  }
11687     }
11688 
11689   /* Set frame pointer, if needed.  */
11690 
11691   if (frame_pointer_needed)
11692     {
11693       insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11694       RTX_FRAME_RELATED_P (insn) = 1;
11695     }
11696 
11697   /* Set up got pointer, if needed.  */
11698 
11699   if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
11700     {
11701       rtx_insn *insns = s390_load_got ();
11702 
11703       for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn))
11704 	annotate_constant_pool_refs (&PATTERN (insn));
11705 
11706       emit_insn (insns);
11707     }
11708 
11709   if (TARGET_TPF_PROFILING)
11710     {
11711       /* Generate a BAS instruction to serve as a function
11712 	 entry intercept to facilitate the use of tracing
11713 	 algorithms located at the branch target.  */
11714       emit_insn (gen_prologue_tpf ());
11715 
11716       /* Emit a blockage here so that all code
11717 	 lies between the profiling mechanisms.  */
11718       emit_insn (gen_blockage ());
11719     }
11720 }
11721 
11722 /* Expand the epilogue into a bunch of separate insns.  */
11723 
11724 void
s390_emit_epilogue(bool sibcall)11725 s390_emit_epilogue (bool sibcall)
11726 {
11727   rtx frame_pointer, return_reg, cfa_restores = NULL_RTX;
11728   int area_bottom, area_top, offset = 0;
11729   int next_offset;
11730   int i;
11731 
11732   if (TARGET_TPF_PROFILING)
11733     {
11734 
11735       /* Generate a BAS instruction to serve as a function
11736 	 entry intercept to facilitate the use of tracing
11737 	 algorithms located at the branch target.  */
11738 
11739       /* Emit a blockage here so that all code
11740          lies between the profiling mechanisms.  */
11741       emit_insn (gen_blockage ());
11742 
11743       emit_insn (gen_epilogue_tpf ());
11744     }
11745 
11746   /* Check whether to use frame or stack pointer for restore.  */
11747 
11748   frame_pointer = (frame_pointer_needed
11749 		   ? hard_frame_pointer_rtx : stack_pointer_rtx);
11750 
11751   s390_frame_area (&area_bottom, &area_top);
11752 
11753   /* Check whether we can access the register save area.
11754      If not, increment the frame pointer as required.  */
11755 
11756   if (area_top <= area_bottom)
11757     {
11758       /* Nothing to restore.  */
11759     }
11760   else if (DISP_IN_RANGE (cfun_frame_layout.frame_size + area_bottom)
11761            && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1))
11762     {
11763       /* Area is in range.  */
11764       offset = cfun_frame_layout.frame_size;
11765     }
11766   else
11767     {
11768       rtx insn, frame_off, cfa;
11769 
11770       offset = area_bottom < 0 ? -area_bottom : 0;
11771       frame_off = GEN_INT (cfun_frame_layout.frame_size - offset);
11772 
11773       cfa = gen_rtx_SET (frame_pointer,
11774 			 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11775       if (DISP_IN_RANGE (INTVAL (frame_off)))
11776 	{
11777 	  insn = gen_rtx_SET (frame_pointer,
11778 			      gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11779 	  insn = emit_insn (insn);
11780 	}
11781       else
11782 	{
11783 	  if (!CONST_OK_FOR_K (INTVAL (frame_off)))
11784 	    frame_off = force_const_mem (Pmode, frame_off);
11785 
11786 	  insn = emit_insn (gen_add2_insn (frame_pointer, frame_off));
11787 	  annotate_constant_pool_refs (&PATTERN (insn));
11788 	}
11789       add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa);
11790       RTX_FRAME_RELATED_P (insn) = 1;
11791     }
11792 
11793   /* Restore call saved fprs.  */
11794 
11795   if (TARGET_64BIT)
11796     {
11797       if (cfun_save_high_fprs_p)
11798 	{
11799 	  next_offset = cfun_frame_layout.f8_offset;
11800 	  for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
11801 	    {
11802 	      if (cfun_fpr_save_p (i))
11803 		{
11804 		  restore_fpr (frame_pointer,
11805 			       offset + next_offset, i);
11806 		  cfa_restores
11807 		    = alloc_reg_note (REG_CFA_RESTORE,
11808 				      gen_rtx_REG (DFmode, i), cfa_restores);
11809 		  next_offset += 8;
11810 		}
11811 	    }
11812 	}
11813 
11814     }
11815   else
11816     {
11817       next_offset = cfun_frame_layout.f4_offset;
11818       /* f4, f6 */
11819       for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
11820 	{
11821 	  if (cfun_fpr_save_p (i))
11822 	    {
11823 	      restore_fpr (frame_pointer,
11824 			   offset + next_offset, i);
11825 	      cfa_restores
11826 		= alloc_reg_note (REG_CFA_RESTORE,
11827 				  gen_rtx_REG (DFmode, i), cfa_restores);
11828 	      next_offset += 8;
11829 	    }
11830 	  else if (!TARGET_PACKED_STACK)
11831 	    next_offset += 8;
11832 	}
11833 
11834     }
11835 
11836   /* Return register.  */
11837 
11838   return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
11839 
11840   /* Restore call saved gprs.  */
11841 
11842   if (cfun_frame_layout.first_restore_gpr != -1)
11843     {
11844       rtx insn, addr;
11845       int i;
11846 
11847       /* Check for global register and save them
11848 	 to stack location from where they get restored.  */
11849 
11850       for (i = cfun_frame_layout.first_restore_gpr;
11851 	   i <= cfun_frame_layout.last_restore_gpr;
11852 	   i++)
11853 	{
11854 	  if (global_not_special_regno_p (i))
11855 	    {
11856 	      addr = plus_constant (Pmode, frame_pointer,
11857 				    offset + cfun_frame_layout.gprs_offset
11858 				    + (i - cfun_frame_layout.first_save_gpr_slot)
11859 				    * UNITS_PER_LONG);
11860 	      addr = gen_rtx_MEM (Pmode, addr);
11861 	      set_mem_alias_set (addr, get_frame_alias_set ());
11862 	      emit_move_insn (addr, gen_rtx_REG (Pmode, i));
11863 	    }
11864 	  else
11865 	    cfa_restores
11866 	      = alloc_reg_note (REG_CFA_RESTORE,
11867 				gen_rtx_REG (Pmode, i), cfa_restores);
11868 	}
11869 
11870       /* Fetch return address from stack before load multiple,
11871 	 this will do good for scheduling.
11872 
11873 	 Only do this if we already decided that r14 needs to be
11874 	 saved to a stack slot. (And not just because r14 happens to
11875 	 be in between two GPRs which need saving.)  Otherwise it
11876 	 would be difficult to take that decision back in
11877 	 s390_optimize_prologue.
11878 
11879 	 This optimization is only helpful on in-order machines.  */
11880       if (! sibcall
11881 	  && cfun_gpr_save_slot (RETURN_REGNUM) == SAVE_SLOT_STACK
11882 	  && s390_tune <= PROCESSOR_2097_Z10)
11883 	{
11884 	  int return_regnum = find_unused_clobbered_reg();
11885 	  if (!return_regnum
11886 	      || (TARGET_INDIRECT_BRANCH_NOBP_RET_OPTION
11887 		  && !TARGET_CPU_Z10
11888 		  && return_regnum == INDIRECT_BRANCH_THUNK_REGNUM))
11889 	    {
11890 	      gcc_assert (INDIRECT_BRANCH_THUNK_REGNUM != 4);
11891 	      return_regnum = 4;
11892 	    }
11893 	  return_reg = gen_rtx_REG (Pmode, return_regnum);
11894 
11895 	  addr = plus_constant (Pmode, frame_pointer,
11896 				offset + cfun_frame_layout.gprs_offset
11897 				+ (RETURN_REGNUM
11898 				   - cfun_frame_layout.first_save_gpr_slot)
11899 				* UNITS_PER_LONG);
11900 	  addr = gen_rtx_MEM (Pmode, addr);
11901 	  set_mem_alias_set (addr, get_frame_alias_set ());
11902 	  emit_move_insn (return_reg, addr);
11903 
11904 	  /* Once we did that optimization we have to make sure
11905 	     s390_optimize_prologue does not try to remove the store
11906 	     of r14 since we will not be able to find the load issued
11907 	     here.  */
11908 	  cfun_frame_layout.save_return_addr_p = true;
11909 	}
11910 
11911       insn = restore_gprs (frame_pointer,
11912 			   offset + cfun_frame_layout.gprs_offset
11913 			   + (cfun_frame_layout.first_restore_gpr
11914 			      - cfun_frame_layout.first_save_gpr_slot)
11915 			   * UNITS_PER_LONG,
11916 			   cfun_frame_layout.first_restore_gpr,
11917 			   cfun_frame_layout.last_restore_gpr);
11918       insn = emit_insn (insn);
11919       REG_NOTES (insn) = cfa_restores;
11920       add_reg_note (insn, REG_CFA_DEF_CFA,
11921 		    plus_constant (Pmode, stack_pointer_rtx,
11922 				   STACK_POINTER_OFFSET));
11923       RTX_FRAME_RELATED_P (insn) = 1;
11924     }
11925 
11926   s390_restore_gprs_from_fprs ();
11927 
11928   if (! sibcall)
11929     emit_jump_insn (gen_return_use (return_reg));
11930 }
11931 
11932 /* Implement TARGET_SET_UP_BY_PROLOGUE.  */
11933 
11934 static void
s300_set_up_by_prologue(hard_reg_set_container * regs)11935 s300_set_up_by_prologue (hard_reg_set_container *regs)
11936 {
11937   if (cfun->machine->base_reg
11938       && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
11939     SET_HARD_REG_BIT (regs->set, REGNO (cfun->machine->base_reg));
11940 }
11941 
11942 /* -fsplit-stack support.  */
11943 
11944 /* A SYMBOL_REF for __morestack.  */
11945 static GTY(()) rtx morestack_ref;
11946 
11947 /* When using -fsplit-stack, the allocation routines set a field in
11948    the TCB to the bottom of the stack plus this much space, measured
11949    in bytes.  */
11950 
11951 #define SPLIT_STACK_AVAILABLE 1024
11952 
11953 /* Emit -fsplit-stack prologue, which goes before the regular function
11954    prologue.  */
11955 
11956 void
s390_expand_split_stack_prologue(void)11957 s390_expand_split_stack_prologue (void)
11958 {
11959   rtx r1, guard, cc = NULL;
11960   rtx_insn *insn;
11961   /* Offset from thread pointer to __private_ss.  */
11962   int psso = TARGET_64BIT ? 0x38 : 0x20;
11963   /* Pointer size in bytes.  */
11964   /* Frame size and argument size - the two parameters to __morestack.  */
11965   HOST_WIDE_INT frame_size = cfun_frame_layout.frame_size;
11966   /* Align argument size to 8 bytes - simplifies __morestack code.  */
11967   HOST_WIDE_INT args_size = crtl->args.size >= 0
11968 			    ? ((crtl->args.size + 7) & ~7)
11969 			    : 0;
11970   /* Label to be called by __morestack.  */
11971   rtx_code_label *call_done = NULL;
11972   rtx_code_label *parm_base = NULL;
11973   rtx tmp;
11974 
11975   gcc_assert (flag_split_stack && reload_completed);
11976   if (!TARGET_CPU_ZARCH)
11977     {
11978       sorry ("CPUs older than z900 are not supported for -fsplit-stack");
11979       return;
11980     }
11981 
11982   r1 = gen_rtx_REG (Pmode, 1);
11983 
11984   /* If no stack frame will be allocated, don't do anything.  */
11985   if (!frame_size)
11986     {
11987       if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11988 	{
11989 	  /* If va_start is used, just use r15.  */
11990 	  emit_move_insn (r1,
11991 			 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11992 				       GEN_INT (STACK_POINTER_OFFSET)));
11993 
11994 	}
11995       return;
11996     }
11997 
11998   if (morestack_ref == NULL_RTX)
11999     {
12000       morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
12001       SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
12002 					   | SYMBOL_FLAG_FUNCTION);
12003     }
12004 
12005   if (CONST_OK_FOR_K (frame_size) || CONST_OK_FOR_Op (frame_size))
12006     {
12007       /* If frame_size will fit in an add instruction, do a stack space
12008 	 check, and only call __morestack if there's not enough space.  */
12009 
12010       /* Get thread pointer.  r1 is the only register we can always destroy - r0
12011 	 could contain a static chain (and cannot be used to address memory
12012 	 anyway), r2-r6 can contain parameters, and r6-r15 are callee-saved.  */
12013       emit_move_insn (r1, gen_rtx_REG (Pmode, TP_REGNUM));
12014       /* Aim at __private_ss.  */
12015       guard = gen_rtx_MEM (Pmode, plus_constant (Pmode, r1, psso));
12016 
12017       /* If less that 1kiB used, skip addition and compare directly with
12018 	 __private_ss.  */
12019       if (frame_size > SPLIT_STACK_AVAILABLE)
12020 	{
12021 	  emit_move_insn (r1, guard);
12022 	  if (TARGET_64BIT)
12023 	    emit_insn (gen_adddi3 (r1, r1, GEN_INT (frame_size)));
12024 	  else
12025 	    emit_insn (gen_addsi3 (r1, r1, GEN_INT (frame_size)));
12026 	  guard = r1;
12027 	}
12028 
12029       /* Compare the (maybe adjusted) guard with the stack pointer.  */
12030       cc = s390_emit_compare (LT, stack_pointer_rtx, guard);
12031     }
12032 
12033   call_done = gen_label_rtx ();
12034   parm_base = gen_label_rtx ();
12035 
12036   /* Emit the parameter block.  */
12037   tmp = gen_split_stack_data (parm_base, call_done,
12038 			      GEN_INT (frame_size),
12039 			      GEN_INT (args_size));
12040   insn = emit_insn (tmp);
12041   add_reg_note (insn, REG_LABEL_OPERAND, call_done);
12042   LABEL_NUSES (call_done)++;
12043   add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
12044   LABEL_NUSES (parm_base)++;
12045 
12046   /* %r1 = litbase.  */
12047   insn = emit_move_insn (r1, gen_rtx_LABEL_REF (VOIDmode, parm_base));
12048   add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
12049   LABEL_NUSES (parm_base)++;
12050 
12051   /* Now, we need to call __morestack.  It has very special calling
12052      conventions: it preserves param/return/static chain registers for
12053      calling main function body, and looks for its own parameters at %r1. */
12054 
12055   if (cc != NULL)
12056     {
12057       tmp = gen_split_stack_cond_call (morestack_ref, cc, call_done);
12058 
12059       insn = emit_jump_insn (tmp);
12060       JUMP_LABEL (insn) = call_done;
12061       LABEL_NUSES (call_done)++;
12062 
12063       /* Mark the jump as very unlikely to be taken.  */
12064       add_reg_br_prob_note (insn,
12065 		            profile_probability::very_unlikely ());
12066 
12067       if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12068 	{
12069 	  /* If va_start is used, and __morestack was not called, just use
12070 	     r15.  */
12071 	  emit_move_insn (r1,
12072 			 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12073 				       GEN_INT (STACK_POINTER_OFFSET)));
12074 	}
12075     }
12076   else
12077     {
12078       tmp = gen_split_stack_call (morestack_ref, call_done);
12079       insn = emit_jump_insn (tmp);
12080       JUMP_LABEL (insn) = call_done;
12081       LABEL_NUSES (call_done)++;
12082       emit_barrier ();
12083     }
12084 
12085   /* __morestack will call us here.  */
12086 
12087   emit_label (call_done);
12088 }
12089 
12090 /* We may have to tell the dataflow pass that the split stack prologue
12091    is initializing a register.  */
12092 
12093 static void
s390_live_on_entry(bitmap regs)12094 s390_live_on_entry (bitmap regs)
12095 {
12096   if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12097     {
12098       gcc_assert (flag_split_stack);
12099       bitmap_set_bit (regs, 1);
12100     }
12101 }
12102 
12103 /* Return true if the function can use simple_return to return outside
12104    of a shrink-wrapped region.  At present shrink-wrapping is supported
12105    in all cases.  */
12106 
12107 bool
s390_can_use_simple_return_insn(void)12108 s390_can_use_simple_return_insn (void)
12109 {
12110   return true;
12111 }
12112 
12113 /* Return true if the epilogue is guaranteed to contain only a return
12114    instruction and if a direct return can therefore be used instead.
12115    One of the main advantages of using direct return instructions
12116    is that we can then use conditional returns.  */
12117 
12118 bool
s390_can_use_return_insn(void)12119 s390_can_use_return_insn (void)
12120 {
12121   int i;
12122 
12123   if (!reload_completed)
12124     return false;
12125 
12126   if (crtl->profile)
12127     return false;
12128 
12129   if (TARGET_TPF_PROFILING)
12130     return false;
12131 
12132   for (i = 0; i < 16; i++)
12133     if (cfun_gpr_save_slot (i) != SAVE_SLOT_NONE)
12134       return false;
12135 
12136   /* For 31 bit this is not covered by the frame_size check below
12137      since f4, f6 are saved in the register save area without needing
12138      additional stack space.  */
12139   if (!TARGET_64BIT
12140       && (cfun_fpr_save_p (FPR4_REGNUM) || cfun_fpr_save_p (FPR6_REGNUM)))
12141     return false;
12142 
12143   if (cfun->machine->base_reg
12144       && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
12145     return false;
12146 
12147   return cfun_frame_layout.frame_size == 0;
12148 }
12149 
12150 /* The VX ABI differs for vararg functions.  Therefore we need the
12151    prototype of the callee to be available when passing vector type
12152    values.  */
12153 static const char *
s390_invalid_arg_for_unprototyped_fn(const_tree typelist,const_tree funcdecl,const_tree val)12154 s390_invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
12155 {
12156   return ((TARGET_VX_ABI
12157 	   && typelist == 0
12158 	   && VECTOR_TYPE_P (TREE_TYPE (val))
12159 	   && (funcdecl == NULL_TREE
12160 	       || (TREE_CODE (funcdecl) == FUNCTION_DECL
12161 		   && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
12162 	  ? N_("vector argument passed to unprototyped function")
12163 	  : NULL);
12164 }
12165 
12166 
12167 /* Return the size in bytes of a function argument of
12168    type TYPE and/or mode MODE.  At least one of TYPE or
12169    MODE must be specified.  */
12170 
12171 static int
s390_function_arg_size(machine_mode mode,const_tree type)12172 s390_function_arg_size (machine_mode mode, const_tree type)
12173 {
12174   if (type)
12175     return int_size_in_bytes (type);
12176 
12177   /* No type info available for some library calls ...  */
12178   if (mode != BLKmode)
12179     return GET_MODE_SIZE (mode);
12180 
12181   /* If we have neither type nor mode, abort */
12182   gcc_unreachable ();
12183 }
12184 
12185 /* Return true if a function argument of type TYPE and mode MODE
12186    is to be passed in a vector register, if available.  */
12187 
12188 bool
s390_function_arg_vector(machine_mode mode,const_tree type)12189 s390_function_arg_vector (machine_mode mode, const_tree type)
12190 {
12191   if (!TARGET_VX_ABI)
12192     return false;
12193 
12194   if (s390_function_arg_size (mode, type) > 16)
12195     return false;
12196 
12197   /* No type info available for some library calls ...  */
12198   if (!type)
12199     return VECTOR_MODE_P (mode);
12200 
12201   /* The ABI says that record types with a single member are treated
12202      just like that member would be.  */
12203   while (TREE_CODE (type) == RECORD_TYPE)
12204     {
12205       tree field, single = NULL_TREE;
12206 
12207       for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
12208 	{
12209 	  if (TREE_CODE (field) != FIELD_DECL)
12210 	    continue;
12211 
12212 	  if (single == NULL_TREE)
12213 	    single = TREE_TYPE (field);
12214 	  else
12215 	    return false;
12216 	}
12217 
12218       if (single == NULL_TREE)
12219 	return false;
12220       else
12221 	{
12222 	  /* If the field declaration adds extra byte due to
12223 	     e.g. padding this is not accepted as vector type.  */
12224 	  if (int_size_in_bytes (single) <= 0
12225 	      || int_size_in_bytes (single) != int_size_in_bytes (type))
12226 	    return false;
12227 	  type = single;
12228 	}
12229     }
12230 
12231   return VECTOR_TYPE_P (type);
12232 }
12233 
12234 /* Return true if a function argument of type TYPE and mode MODE
12235    is to be passed in a floating-point register, if available.  */
12236 
12237 static bool
s390_function_arg_float(machine_mode mode,const_tree type)12238 s390_function_arg_float (machine_mode mode, const_tree type)
12239 {
12240   if (s390_function_arg_size (mode, type) > 8)
12241     return false;
12242 
12243   /* Soft-float changes the ABI: no floating-point registers are used.  */
12244   if (TARGET_SOFT_FLOAT)
12245     return false;
12246 
12247   /* No type info available for some library calls ...  */
12248   if (!type)
12249     return mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode;
12250 
12251   /* The ABI says that record types with a single member are treated
12252      just like that member would be.  */
12253   while (TREE_CODE (type) == RECORD_TYPE)
12254     {
12255       tree field, single = NULL_TREE;
12256 
12257       for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
12258 	{
12259 	  if (TREE_CODE (field) != FIELD_DECL)
12260 	    continue;
12261 
12262 	  if (single == NULL_TREE)
12263 	    single = TREE_TYPE (field);
12264 	  else
12265 	    return false;
12266 	}
12267 
12268       if (single == NULL_TREE)
12269 	return false;
12270       else
12271 	type = single;
12272     }
12273 
12274   return TREE_CODE (type) == REAL_TYPE;
12275 }
12276 
12277 /* Return true if a function argument of type TYPE and mode MODE
12278    is to be passed in an integer register, or a pair of integer
12279    registers, if available.  */
12280 
12281 static bool
s390_function_arg_integer(machine_mode mode,const_tree type)12282 s390_function_arg_integer (machine_mode mode, const_tree type)
12283 {
12284   int size = s390_function_arg_size (mode, type);
12285   if (size > 8)
12286     return false;
12287 
12288   /* No type info available for some library calls ...  */
12289   if (!type)
12290     return GET_MODE_CLASS (mode) == MODE_INT
12291 	   || (TARGET_SOFT_FLOAT &&  SCALAR_FLOAT_MODE_P (mode));
12292 
12293   /* We accept small integral (and similar) types.  */
12294   if (INTEGRAL_TYPE_P (type)
12295       || POINTER_TYPE_P (type)
12296       || TREE_CODE (type) == NULLPTR_TYPE
12297       || TREE_CODE (type) == OFFSET_TYPE
12298       || (TARGET_SOFT_FLOAT && TREE_CODE (type) == REAL_TYPE))
12299     return true;
12300 
12301   /* We also accept structs of size 1, 2, 4, 8 that are not
12302      passed in floating-point registers.  */
12303   if (AGGREGATE_TYPE_P (type)
12304       && exact_log2 (size) >= 0
12305       && !s390_function_arg_float (mode, type))
12306     return true;
12307 
12308   return false;
12309 }
12310 
12311 /* Return 1 if a function argument of type TYPE and mode MODE
12312    is to be passed by reference.  The ABI specifies that only
12313    structures of size 1, 2, 4, or 8 bytes are passed by value,
12314    all other structures (and complex numbers) are passed by
12315    reference.  */
12316 
12317 static bool
s390_pass_by_reference(cumulative_args_t ca ATTRIBUTE_UNUSED,machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)12318 s390_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
12319 			machine_mode mode, const_tree type,
12320 			bool named ATTRIBUTE_UNUSED)
12321 {
12322   int size = s390_function_arg_size (mode, type);
12323 
12324   if (s390_function_arg_vector (mode, type))
12325     return false;
12326 
12327   if (size > 8)
12328     return true;
12329 
12330   if (type)
12331     {
12332       if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0)
12333         return true;
12334 
12335       if (TREE_CODE (type) == COMPLEX_TYPE
12336 	  || TREE_CODE (type) == VECTOR_TYPE)
12337 	return true;
12338     }
12339 
12340   return false;
12341 }
12342 
12343 /* Update the data in CUM to advance over an argument of mode MODE and
12344    data type TYPE.  (TYPE is null for libcalls where that information
12345    may not be available.).  The boolean NAMED specifies whether the
12346    argument is a named argument (as opposed to an unnamed argument
12347    matching an ellipsis).  */
12348 
12349 static void
s390_function_arg_advance(cumulative_args_t cum_v,machine_mode mode,const_tree type,bool named)12350 s390_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
12351 			   const_tree type, bool named)
12352 {
12353   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12354 
12355   if (s390_function_arg_vector (mode, type))
12356     {
12357       /* We are called for unnamed vector stdarg arguments which are
12358 	 passed on the stack.  In this case this hook does not have to
12359 	 do anything since stack arguments are tracked by common
12360 	 code.  */
12361       if (!named)
12362 	return;
12363       cum->vrs += 1;
12364     }
12365   else if (s390_function_arg_float (mode, type))
12366     {
12367       cum->fprs += 1;
12368     }
12369   else if (s390_function_arg_integer (mode, type))
12370     {
12371       int size = s390_function_arg_size (mode, type);
12372       cum->gprs += ((size + UNITS_PER_LONG - 1) / UNITS_PER_LONG);
12373     }
12374   else
12375     gcc_unreachable ();
12376 }
12377 
12378 /* Define where to put the arguments to a function.
12379    Value is zero to push the argument on the stack,
12380    or a hard register in which to store the argument.
12381 
12382    MODE is the argument's machine mode.
12383    TYPE is the data type of the argument (as a tree).
12384     This is null for libcalls where that information may
12385     not be available.
12386    CUM is a variable of type CUMULATIVE_ARGS which gives info about
12387     the preceding args and about the function being called.
12388    NAMED is nonzero if this argument is a named parameter
12389     (otherwise it is an extra parameter matching an ellipsis).
12390 
12391    On S/390, we use general purpose registers 2 through 6 to
12392    pass integer, pointer, and certain structure arguments, and
12393    floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
12394    to pass floating point arguments.  All remaining arguments
12395    are pushed to the stack.  */
12396 
12397 static rtx
s390_function_arg(cumulative_args_t cum_v,machine_mode mode,const_tree type,bool named)12398 s390_function_arg (cumulative_args_t cum_v, machine_mode mode,
12399 		   const_tree type, bool named)
12400 {
12401   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12402 
12403   if (!named)
12404     s390_check_type_for_vector_abi (type, true, false);
12405 
12406   if (s390_function_arg_vector (mode, type))
12407     {
12408       /* Vector arguments being part of the ellipsis are passed on the
12409 	 stack.  */
12410       if (!named || (cum->vrs + 1 > VEC_ARG_NUM_REG))
12411 	return NULL_RTX;
12412 
12413       return gen_rtx_REG (mode, cum->vrs + FIRST_VEC_ARG_REGNO);
12414     }
12415   else if (s390_function_arg_float (mode, type))
12416     {
12417       if (cum->fprs + 1 > FP_ARG_NUM_REG)
12418 	return NULL_RTX;
12419       else
12420 	return gen_rtx_REG (mode, cum->fprs + 16);
12421     }
12422   else if (s390_function_arg_integer (mode, type))
12423     {
12424       int size = s390_function_arg_size (mode, type);
12425       int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
12426 
12427       if (cum->gprs + n_gprs > GP_ARG_NUM_REG)
12428 	return NULL_RTX;
12429       else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG)
12430 	return gen_rtx_REG (mode, cum->gprs + 2);
12431       else if (n_gprs == 2)
12432 	{
12433 	  rtvec p = rtvec_alloc (2);
12434 
12435 	  RTVEC_ELT (p, 0)
12436 	    = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 2),
12437 					 const0_rtx);
12438 	  RTVEC_ELT (p, 1)
12439 	    = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 3),
12440 					 GEN_INT (4));
12441 
12442 	  return gen_rtx_PARALLEL (mode, p);
12443 	}
12444     }
12445 
12446   /* After the real arguments, expand_call calls us once again
12447      with a void_type_node type.  Whatever we return here is
12448      passed as operand 2 to the call expanders.
12449 
12450      We don't need this feature ...  */
12451   else if (type == void_type_node)
12452     return const0_rtx;
12453 
12454   gcc_unreachable ();
12455 }
12456 
12457 /* Implement TARGET_FUNCTION_ARG_BOUNDARY.  Vector arguments are
12458    left-justified when placed on the stack during parameter passing.  */
12459 
12460 static pad_direction
s390_function_arg_padding(machine_mode mode,const_tree type)12461 s390_function_arg_padding (machine_mode mode, const_tree type)
12462 {
12463   if (s390_function_arg_vector (mode, type))
12464     return PAD_UPWARD;
12465 
12466   return default_function_arg_padding (mode, type);
12467 }
12468 
12469 /* Return true if return values of type TYPE should be returned
12470    in a memory buffer whose address is passed by the caller as
12471    hidden first argument.  */
12472 
12473 static bool
s390_return_in_memory(const_tree type,const_tree fundecl ATTRIBUTE_UNUSED)12474 s390_return_in_memory (const_tree type, const_tree fundecl ATTRIBUTE_UNUSED)
12475 {
12476   /* We accept small integral (and similar) types.  */
12477   if (INTEGRAL_TYPE_P (type)
12478       || POINTER_TYPE_P (type)
12479       || TREE_CODE (type) == OFFSET_TYPE
12480       || TREE_CODE (type) == REAL_TYPE)
12481     return int_size_in_bytes (type) > 8;
12482 
12483   /* vector types which fit into a VR.  */
12484   if (TARGET_VX_ABI
12485       && VECTOR_TYPE_P (type)
12486       && int_size_in_bytes (type) <= 16)
12487     return false;
12488 
12489   /* Aggregates and similar constructs are always returned
12490      in memory.  */
12491   if (AGGREGATE_TYPE_P (type)
12492       || TREE_CODE (type) == COMPLEX_TYPE
12493       || VECTOR_TYPE_P (type))
12494     return true;
12495 
12496   /* ??? We get called on all sorts of random stuff from
12497      aggregate_value_p.  We can't abort, but it's not clear
12498      what's safe to return.  Pretend it's a struct I guess.  */
12499   return true;
12500 }
12501 
12502 /* Function arguments and return values are promoted to word size.  */
12503 
12504 static machine_mode
s390_promote_function_mode(const_tree type,machine_mode mode,int * punsignedp,const_tree fntype ATTRIBUTE_UNUSED,int for_return ATTRIBUTE_UNUSED)12505 s390_promote_function_mode (const_tree type, machine_mode mode,
12506                             int *punsignedp,
12507                             const_tree fntype ATTRIBUTE_UNUSED,
12508                             int for_return ATTRIBUTE_UNUSED)
12509 {
12510   if (INTEGRAL_MODE_P (mode)
12511       && GET_MODE_SIZE (mode) < UNITS_PER_LONG)
12512     {
12513       if (type != NULL_TREE && POINTER_TYPE_P (type))
12514 	*punsignedp = POINTERS_EXTEND_UNSIGNED;
12515       return Pmode;
12516     }
12517 
12518   return mode;
12519 }
12520 
12521 /* Define where to return a (scalar) value of type RET_TYPE.
12522    If RET_TYPE is null, define where to return a (scalar)
12523    value of mode MODE from a libcall.  */
12524 
12525 static rtx
s390_function_and_libcall_value(machine_mode mode,const_tree ret_type,const_tree fntype_or_decl,bool outgoing ATTRIBUTE_UNUSED)12526 s390_function_and_libcall_value (machine_mode mode,
12527 				 const_tree ret_type,
12528 				 const_tree fntype_or_decl,
12529 				 bool outgoing ATTRIBUTE_UNUSED)
12530 {
12531   /* For vector return types it is important to use the RET_TYPE
12532      argument whenever available since the middle-end might have
12533      changed the mode to a scalar mode.  */
12534   bool vector_ret_type_p = ((ret_type && VECTOR_TYPE_P (ret_type))
12535 			    || (!ret_type && VECTOR_MODE_P (mode)));
12536 
12537   /* For normal functions perform the promotion as
12538      promote_function_mode would do.  */
12539   if (ret_type)
12540     {
12541       int unsignedp = TYPE_UNSIGNED (ret_type);
12542       mode = promote_function_mode (ret_type, mode, &unsignedp,
12543 				    fntype_or_decl, 1);
12544     }
12545 
12546   gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
12547 	      || SCALAR_FLOAT_MODE_P (mode)
12548 	      || (TARGET_VX_ABI && vector_ret_type_p));
12549   gcc_assert (GET_MODE_SIZE (mode) <= (TARGET_VX_ABI ? 16 : 8));
12550 
12551   if (TARGET_VX_ABI && vector_ret_type_p)
12552     return gen_rtx_REG (mode, FIRST_VEC_ARG_REGNO);
12553   else if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
12554     return gen_rtx_REG (mode, 16);
12555   else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG
12556 	   || UNITS_PER_LONG == UNITS_PER_WORD)
12557     return gen_rtx_REG (mode, 2);
12558   else if (GET_MODE_SIZE (mode) == 2 * UNITS_PER_LONG)
12559     {
12560       /* This case is triggered when returning a 64 bit value with
12561 	 -m31 -mzarch.  Although the value would fit into a single
12562 	 register it has to be forced into a 32 bit register pair in
12563 	 order to match the ABI.  */
12564       rtvec p = rtvec_alloc (2);
12565 
12566       RTVEC_ELT (p, 0)
12567 	= gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 2), const0_rtx);
12568       RTVEC_ELT (p, 1)
12569 	= gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 3), GEN_INT (4));
12570 
12571       return gen_rtx_PARALLEL (mode, p);
12572     }
12573 
12574   gcc_unreachable ();
12575 }
12576 
12577 /* Define where to return a scalar return value of type RET_TYPE.  */
12578 
12579 static rtx
s390_function_value(const_tree ret_type,const_tree fn_decl_or_type,bool outgoing)12580 s390_function_value (const_tree ret_type, const_tree fn_decl_or_type,
12581 		     bool outgoing)
12582 {
12583   return s390_function_and_libcall_value (TYPE_MODE (ret_type), ret_type,
12584 					  fn_decl_or_type, outgoing);
12585 }
12586 
12587 /* Define where to return a scalar libcall return value of mode
12588    MODE.  */
12589 
12590 static rtx
s390_libcall_value(machine_mode mode,const_rtx fun ATTRIBUTE_UNUSED)12591 s390_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
12592 {
12593   return s390_function_and_libcall_value (mode, NULL_TREE,
12594 					  NULL_TREE, true);
12595 }
12596 
12597 
12598 /* Create and return the va_list datatype.
12599 
12600    On S/390, va_list is an array type equivalent to
12601 
12602       typedef struct __va_list_tag
12603         {
12604             long __gpr;
12605             long __fpr;
12606             void *__overflow_arg_area;
12607             void *__reg_save_area;
12608         } va_list[1];
12609 
12610    where __gpr and __fpr hold the number of general purpose
12611    or floating point arguments used up to now, respectively,
12612    __overflow_arg_area points to the stack location of the
12613    next argument passed on the stack, and __reg_save_area
12614    always points to the start of the register area in the
12615    call frame of the current function.  The function prologue
12616    saves all registers used for argument passing into this
12617    area if the function uses variable arguments.  */
12618 
12619 static tree
s390_build_builtin_va_list(void)12620 s390_build_builtin_va_list (void)
12621 {
12622   tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
12623 
12624   record = lang_hooks.types.make_type (RECORD_TYPE);
12625 
12626   type_decl =
12627     build_decl (BUILTINS_LOCATION,
12628 		TYPE_DECL, get_identifier ("__va_list_tag"), record);
12629 
12630   f_gpr = build_decl (BUILTINS_LOCATION,
12631 		      FIELD_DECL, get_identifier ("__gpr"),
12632 		      long_integer_type_node);
12633   f_fpr = build_decl (BUILTINS_LOCATION,
12634 		      FIELD_DECL, get_identifier ("__fpr"),
12635 		      long_integer_type_node);
12636   f_ovf = build_decl (BUILTINS_LOCATION,
12637 		      FIELD_DECL, get_identifier ("__overflow_arg_area"),
12638 		      ptr_type_node);
12639   f_sav = build_decl (BUILTINS_LOCATION,
12640 		      FIELD_DECL, get_identifier ("__reg_save_area"),
12641 		      ptr_type_node);
12642 
12643   va_list_gpr_counter_field = f_gpr;
12644   va_list_fpr_counter_field = f_fpr;
12645 
12646   DECL_FIELD_CONTEXT (f_gpr) = record;
12647   DECL_FIELD_CONTEXT (f_fpr) = record;
12648   DECL_FIELD_CONTEXT (f_ovf) = record;
12649   DECL_FIELD_CONTEXT (f_sav) = record;
12650 
12651   TYPE_STUB_DECL (record) = type_decl;
12652   TYPE_NAME (record) = type_decl;
12653   TYPE_FIELDS (record) = f_gpr;
12654   DECL_CHAIN (f_gpr) = f_fpr;
12655   DECL_CHAIN (f_fpr) = f_ovf;
12656   DECL_CHAIN (f_ovf) = f_sav;
12657 
12658   layout_type (record);
12659 
12660   /* The correct type is an array type of one element.  */
12661   return build_array_type (record, build_index_type (size_zero_node));
12662 }
12663 
12664 /* Implement va_start by filling the va_list structure VALIST.
12665    STDARG_P is always true, and ignored.
12666    NEXTARG points to the first anonymous stack argument.
12667 
12668    The following global variables are used to initialize
12669    the va_list structure:
12670 
12671      crtl->args.info:
12672        holds number of gprs and fprs used for named arguments.
12673      crtl->args.arg_offset_rtx:
12674        holds the offset of the first anonymous stack argument
12675        (relative to the virtual arg pointer).  */
12676 
12677 static void
s390_va_start(tree valist,rtx nextarg ATTRIBUTE_UNUSED)12678 s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
12679 {
12680   HOST_WIDE_INT n_gpr, n_fpr;
12681   int off;
12682   tree f_gpr, f_fpr, f_ovf, f_sav;
12683   tree gpr, fpr, ovf, sav, t;
12684 
12685   f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12686   f_fpr = DECL_CHAIN (f_gpr);
12687   f_ovf = DECL_CHAIN (f_fpr);
12688   f_sav = DECL_CHAIN (f_ovf);
12689 
12690   valist = build_simple_mem_ref (valist);
12691   gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12692   fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
12693   ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
12694   sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
12695 
12696   /* Count number of gp and fp argument registers used.  */
12697 
12698   n_gpr = crtl->args.info.gprs;
12699   n_fpr = crtl->args.info.fprs;
12700 
12701   if (cfun->va_list_gpr_size)
12702     {
12703       t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
12704 		  build_int_cst (NULL_TREE, n_gpr));
12705       TREE_SIDE_EFFECTS (t) = 1;
12706       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12707     }
12708 
12709   if (cfun->va_list_fpr_size)
12710     {
12711       t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
12712 	          build_int_cst (NULL_TREE, n_fpr));
12713       TREE_SIDE_EFFECTS (t) = 1;
12714       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12715     }
12716 
12717   if (flag_split_stack
12718      && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
12719          == NULL)
12720      && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
12721     {
12722       rtx reg;
12723       rtx_insn *seq;
12724 
12725       reg = gen_reg_rtx (Pmode);
12726       cfun->machine->split_stack_varargs_pointer = reg;
12727 
12728       start_sequence ();
12729       emit_move_insn (reg, gen_rtx_REG (Pmode, 1));
12730       seq = get_insns ();
12731       end_sequence ();
12732 
12733       push_topmost_sequence ();
12734       emit_insn_after (seq, entry_of_function ());
12735       pop_topmost_sequence ();
12736     }
12737 
12738   /* Find the overflow area.
12739      FIXME: This currently is too pessimistic when the vector ABI is
12740      enabled.  In that case we *always* set up the overflow area
12741      pointer.  */
12742   if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG
12743       || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG
12744       || TARGET_VX_ABI)
12745     {
12746       if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
12747         t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
12748       else
12749         t = make_tree (TREE_TYPE (ovf), cfun->machine->split_stack_varargs_pointer);
12750 
12751       off = INTVAL (crtl->args.arg_offset_rtx);
12752       off = off < 0 ? 0 : off;
12753       if (TARGET_DEBUG_ARG)
12754 	fprintf (stderr, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
12755 		 (int)n_gpr, (int)n_fpr, off);
12756 
12757       t = fold_build_pointer_plus_hwi (t, off);
12758 
12759       t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
12760       TREE_SIDE_EFFECTS (t) = 1;
12761       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12762     }
12763 
12764   /* Find the register save area.  */
12765   if ((cfun->va_list_gpr_size && n_gpr < GP_ARG_NUM_REG)
12766       || (cfun->va_list_fpr_size && n_fpr < FP_ARG_NUM_REG))
12767     {
12768       t = make_tree (TREE_TYPE (sav), return_address_pointer_rtx);
12769       t = fold_build_pointer_plus_hwi (t, -RETURN_REGNUM * UNITS_PER_LONG);
12770 
12771       t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
12772       TREE_SIDE_EFFECTS (t) = 1;
12773       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12774     }
12775 }
12776 
12777 /* Implement va_arg by updating the va_list structure
12778    VALIST as required to retrieve an argument of type
12779    TYPE, and returning that argument.
12780 
12781    Generates code equivalent to:
12782 
12783    if (integral value) {
12784      if (size  <= 4 && args.gpr < 5 ||
12785          size  > 4 && args.gpr < 4 )
12786        ret = args.reg_save_area[args.gpr+8]
12787      else
12788        ret = *args.overflow_arg_area++;
12789    } else if (vector value) {
12790        ret = *args.overflow_arg_area;
12791        args.overflow_arg_area += size / 8;
12792    } else if (float value) {
12793      if (args.fgpr < 2)
12794        ret = args.reg_save_area[args.fpr+64]
12795      else
12796        ret = *args.overflow_arg_area++;
12797    } else if (aggregate value) {
12798      if (args.gpr < 5)
12799        ret = *args.reg_save_area[args.gpr]
12800      else
12801        ret = **args.overflow_arg_area++;
12802    } */
12803 
12804 static tree
s390_gimplify_va_arg(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p ATTRIBUTE_UNUSED)12805 s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
12806 		      gimple_seq *post_p ATTRIBUTE_UNUSED)
12807 {
12808   tree f_gpr, f_fpr, f_ovf, f_sav;
12809   tree gpr, fpr, ovf, sav, reg, t, u;
12810   int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg;
12811   tree lab_false, lab_over = NULL_TREE;
12812   tree addr = create_tmp_var (ptr_type_node, "addr");
12813   bool left_align_p; /* How a value < UNITS_PER_LONG is aligned within
12814 			a stack slot.  */
12815 
12816   f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12817   f_fpr = DECL_CHAIN (f_gpr);
12818   f_ovf = DECL_CHAIN (f_fpr);
12819   f_sav = DECL_CHAIN (f_ovf);
12820 
12821   gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12822   fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
12823   sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
12824 
12825   /* The tree for args* cannot be shared between gpr/fpr and ovf since
12826      both appear on a lhs.  */
12827   valist = unshare_expr (valist);
12828   ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
12829 
12830   size = int_size_in_bytes (type);
12831 
12832   s390_check_type_for_vector_abi (type, true, false);
12833 
12834   if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
12835     {
12836       if (TARGET_DEBUG_ARG)
12837 	{
12838 	  fprintf (stderr, "va_arg: aggregate type");
12839 	  debug_tree (type);
12840 	}
12841 
12842       /* Aggregates are passed by reference.  */
12843       indirect_p = 1;
12844       reg = gpr;
12845       n_reg = 1;
12846 
12847       /* kernel stack layout on 31 bit: It is assumed here that no padding
12848 	 will be added by s390_frame_info because for va_args always an even
12849 	 number of gprs has to be saved r15-r2 = 14 regs.  */
12850       sav_ofs = 2 * UNITS_PER_LONG;
12851       sav_scale = UNITS_PER_LONG;
12852       size = UNITS_PER_LONG;
12853       max_reg = GP_ARG_NUM_REG - n_reg;
12854       left_align_p = false;
12855     }
12856   else if (s390_function_arg_vector (TYPE_MODE (type), type))
12857     {
12858       if (TARGET_DEBUG_ARG)
12859 	{
12860 	  fprintf (stderr, "va_arg: vector type");
12861 	  debug_tree (type);
12862 	}
12863 
12864       indirect_p = 0;
12865       reg = NULL_TREE;
12866       n_reg = 0;
12867       sav_ofs = 0;
12868       sav_scale = 8;
12869       max_reg = 0;
12870       left_align_p = true;
12871     }
12872   else if (s390_function_arg_float (TYPE_MODE (type), type))
12873     {
12874       if (TARGET_DEBUG_ARG)
12875 	{
12876 	  fprintf (stderr, "va_arg: float type");
12877 	  debug_tree (type);
12878 	}
12879 
12880       /* FP args go in FP registers, if present.  */
12881       indirect_p = 0;
12882       reg = fpr;
12883       n_reg = 1;
12884       sav_ofs = 16 * UNITS_PER_LONG;
12885       sav_scale = 8;
12886       max_reg = FP_ARG_NUM_REG - n_reg;
12887       left_align_p = false;
12888     }
12889   else
12890     {
12891       if (TARGET_DEBUG_ARG)
12892 	{
12893 	  fprintf (stderr, "va_arg: other type");
12894 	  debug_tree (type);
12895 	}
12896 
12897       /* Otherwise into GP registers.  */
12898       indirect_p = 0;
12899       reg = gpr;
12900       n_reg = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
12901 
12902       /* kernel stack layout on 31 bit: It is assumed here that no padding
12903 	 will be added by s390_frame_info because for va_args always an even
12904 	 number of gprs has to be saved r15-r2 = 14 regs.  */
12905       sav_ofs = 2 * UNITS_PER_LONG;
12906 
12907       if (size < UNITS_PER_LONG)
12908 	sav_ofs += UNITS_PER_LONG - size;
12909 
12910       sav_scale = UNITS_PER_LONG;
12911       max_reg = GP_ARG_NUM_REG - n_reg;
12912       left_align_p = false;
12913     }
12914 
12915   /* Pull the value out of the saved registers ...  */
12916 
12917   if (reg != NULL_TREE)
12918     {
12919       /*
12920 	if (reg > ((typeof (reg))max_reg))
12921           goto lab_false;
12922 
12923         addr = sav + sav_ofs + reg * save_scale;
12924 
12925 	goto lab_over;
12926 
12927         lab_false:
12928       */
12929 
12930       lab_false = create_artificial_label (UNKNOWN_LOCATION);
12931       lab_over = create_artificial_label (UNKNOWN_LOCATION);
12932 
12933       t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
12934       t = build2 (GT_EXPR, boolean_type_node, reg, t);
12935       u = build1 (GOTO_EXPR, void_type_node, lab_false);
12936       t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
12937       gimplify_and_add (t, pre_p);
12938 
12939       t = fold_build_pointer_plus_hwi (sav, sav_ofs);
12940       u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
12941 		  fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
12942       t = fold_build_pointer_plus (t, u);
12943 
12944       gimplify_assign (addr, t, pre_p);
12945 
12946       gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
12947 
12948       gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
12949     }
12950 
12951   /* ... Otherwise out of the overflow area.  */
12952 
12953   t = ovf;
12954   if (size < UNITS_PER_LONG && !left_align_p)
12955     t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size);
12956 
12957   gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
12958 
12959   gimplify_assign (addr, t, pre_p);
12960 
12961   if (size < UNITS_PER_LONG && left_align_p)
12962     t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG);
12963   else
12964     t = fold_build_pointer_plus_hwi (t, size);
12965 
12966   gimplify_assign (ovf, t, pre_p);
12967 
12968   if (reg != NULL_TREE)
12969     gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
12970 
12971 
12972   /* Increment register save count.  */
12973 
12974   if (n_reg > 0)
12975     {
12976       u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
12977 		  fold_convert (TREE_TYPE (reg), size_int (n_reg)));
12978       gimplify_and_add (u, pre_p);
12979     }
12980 
12981   if (indirect_p)
12982     {
12983       t = build_pointer_type_for_mode (build_pointer_type (type),
12984 				       ptr_mode, true);
12985       addr = fold_convert (t, addr);
12986       addr = build_va_arg_indirect_ref (addr);
12987     }
12988   else
12989     {
12990       t = build_pointer_type_for_mode (type, ptr_mode, true);
12991       addr = fold_convert (t, addr);
12992     }
12993 
12994   return build_va_arg_indirect_ref (addr);
12995 }
12996 
12997 /* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX)
12998    expanders.
12999    DEST  - Register location where CC will be stored.
13000    TDB   - Pointer to a 256 byte area where to store the transaction.
13001            diagnostic block. NULL if TDB is not needed.
13002    RETRY - Retry count value.  If non-NULL a retry loop for CC2
13003            is emitted
13004    CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
13005                     of the tbegin instruction pattern.  */
13006 
13007 void
s390_expand_tbegin(rtx dest,rtx tdb,rtx retry,bool clobber_fprs_p)13008 s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
13009 {
13010   rtx retry_plus_two = gen_reg_rtx (SImode);
13011   rtx retry_reg = gen_reg_rtx (SImode);
13012   rtx_code_label *retry_label = NULL;
13013 
13014   if (retry != NULL_RTX)
13015     {
13016       emit_move_insn (retry_reg, retry);
13017       emit_insn (gen_addsi3 (retry_plus_two, retry_reg, const2_rtx));
13018       emit_insn (gen_addsi3 (retry_reg, retry_reg, const1_rtx));
13019       retry_label = gen_label_rtx ();
13020       emit_label (retry_label);
13021     }
13022 
13023   if (clobber_fprs_p)
13024     {
13025       if (TARGET_VX)
13026 	emit_insn (gen_tbegin_1_z13 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
13027 				     tdb));
13028       else
13029 	emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
13030 				 tdb));
13031     }
13032   else
13033     emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
13034 				     tdb));
13035 
13036   emit_move_insn (dest, gen_rtx_UNSPEC (SImode,
13037 					gen_rtvec (1, gen_rtx_REG (CCRAWmode,
13038 								   CC_REGNUM)),
13039 					UNSPEC_CC_TO_INT));
13040   if (retry != NULL_RTX)
13041     {
13042       const int CC0 = 1 << 3;
13043       const int CC1 = 1 << 2;
13044       const int CC3 = 1 << 0;
13045       rtx jump;
13046       rtx count = gen_reg_rtx (SImode);
13047       rtx_code_label *leave_label = gen_label_rtx ();
13048 
13049       /* Exit for success and permanent failures.  */
13050       jump = s390_emit_jump (leave_label,
13051 			     gen_rtx_EQ (VOIDmode,
13052 			       gen_rtx_REG (CCRAWmode, CC_REGNUM),
13053 			       gen_rtx_CONST_INT (VOIDmode, CC0 | CC1 | CC3)));
13054       LABEL_NUSES (leave_label) = 1;
13055 
13056       /* CC2 - transient failure. Perform retry with ppa.  */
13057       emit_move_insn (count, retry_plus_two);
13058       emit_insn (gen_subsi3 (count, count, retry_reg));
13059       emit_insn (gen_tx_assist (count));
13060       jump = emit_jump_insn (gen_doloop_si64 (retry_label,
13061 					      retry_reg,
13062 					      retry_reg));
13063       JUMP_LABEL (jump) = retry_label;
13064       LABEL_NUSES (retry_label) = 1;
13065       emit_label (leave_label);
13066     }
13067 }
13068 
13069 
13070 /* Return the decl for the target specific builtin with the function
13071    code FCODE.  */
13072 
13073 static tree
s390_builtin_decl(unsigned fcode,bool initialized_p ATTRIBUTE_UNUSED)13074 s390_builtin_decl (unsigned fcode, bool initialized_p ATTRIBUTE_UNUSED)
13075 {
13076   if (fcode >= S390_BUILTIN_MAX)
13077     return error_mark_node;
13078 
13079   return s390_builtin_decls[fcode];
13080 }
13081 
13082 /* We call mcount before the function prologue.  So a profiled leaf
13083    function should stay a leaf function.  */
13084 
13085 static bool
s390_keep_leaf_when_profiled()13086 s390_keep_leaf_when_profiled ()
13087 {
13088   return true;
13089 }
13090 
13091 /* Output assembly code for the trampoline template to
13092    stdio stream FILE.
13093 
13094    On S/390, we use gpr 1 internally in the trampoline code;
13095    gpr 0 is used to hold the static chain.  */
13096 
13097 static void
s390_asm_trampoline_template(FILE * file)13098 s390_asm_trampoline_template (FILE *file)
13099 {
13100   rtx op[2];
13101   op[0] = gen_rtx_REG (Pmode, 0);
13102   op[1] = gen_rtx_REG (Pmode, 1);
13103 
13104   if (TARGET_64BIT)
13105     {
13106       output_asm_insn ("basr\t%1,0", op);         /* 2 byte */
13107       output_asm_insn ("lmg\t%0,%1,14(%1)", op);  /* 6 byte */
13108       output_asm_insn ("br\t%1", op);             /* 2 byte */
13109       ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10));
13110     }
13111   else
13112     {
13113       output_asm_insn ("basr\t%1,0", op);         /* 2 byte */
13114       output_asm_insn ("lm\t%0,%1,6(%1)", op);    /* 4 byte */
13115       output_asm_insn ("br\t%1", op);             /* 2 byte */
13116       ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8));
13117     }
13118 }
13119 
13120 /* Emit RTL insns to initialize the variable parts of a trampoline.
13121    FNADDR is an RTX for the address of the function's pure code.
13122    CXT is an RTX for the static chain value for the function.  */
13123 
13124 static void
s390_trampoline_init(rtx m_tramp,tree fndecl,rtx cxt)13125 s390_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
13126 {
13127   rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
13128   rtx mem;
13129 
13130   emit_block_move (m_tramp, assemble_trampoline_template (),
13131 		   GEN_INT (2 * UNITS_PER_LONG), BLOCK_OP_NORMAL);
13132 
13133   mem = adjust_address (m_tramp, Pmode, 2 * UNITS_PER_LONG);
13134   emit_move_insn (mem, cxt);
13135   mem = adjust_address (m_tramp, Pmode, 3 * UNITS_PER_LONG);
13136   emit_move_insn (mem, fnaddr);
13137 }
13138 
13139 /* Output assembler code to FILE to increment profiler label # LABELNO
13140    for profiling a function entry.  */
13141 
13142 void
s390_function_profiler(FILE * file,int labelno)13143 s390_function_profiler (FILE *file, int labelno)
13144 {
13145   rtx op[7];
13146 
13147   char label[128];
13148   ASM_GENERATE_INTERNAL_LABEL (label, "LP", labelno);
13149 
13150   fprintf (file, "# function profiler \n");
13151 
13152   op[0] = gen_rtx_REG (Pmode, RETURN_REGNUM);
13153   op[1] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
13154   op[1] = gen_rtx_MEM (Pmode, plus_constant (Pmode, op[1], UNITS_PER_LONG));
13155 
13156   op[2] = gen_rtx_REG (Pmode, 1);
13157   op[3] = gen_rtx_SYMBOL_REF (Pmode, label);
13158   SYMBOL_REF_FLAGS (op[3]) = SYMBOL_FLAG_LOCAL;
13159 
13160   op[4] = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
13161   if (flag_pic)
13162     {
13163       op[4] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[4]), UNSPEC_PLT);
13164       op[4] = gen_rtx_CONST (Pmode, op[4]);
13165     }
13166 
13167   if (TARGET_64BIT)
13168     {
13169       output_asm_insn ("stg\t%0,%1", op);
13170       output_asm_insn ("larl\t%2,%3", op);
13171       output_asm_insn ("brasl\t%0,%4", op);
13172       output_asm_insn ("lg\t%0,%1", op);
13173     }
13174   else if (TARGET_CPU_ZARCH)
13175     {
13176       output_asm_insn ("st\t%0,%1", op);
13177       output_asm_insn ("larl\t%2,%3", op);
13178       output_asm_insn ("brasl\t%0,%4", op);
13179       output_asm_insn ("l\t%0,%1", op);
13180     }
13181   else if (!flag_pic)
13182     {
13183       op[6] = gen_label_rtx ();
13184 
13185       output_asm_insn ("st\t%0,%1", op);
13186       output_asm_insn ("bras\t%2,%l6", op);
13187       output_asm_insn (".long\t%4", op);
13188       output_asm_insn (".long\t%3", op);
13189       targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
13190       output_asm_insn ("l\t%0,0(%2)", op);
13191       output_asm_insn ("l\t%2,4(%2)", op);
13192       output_asm_insn ("basr\t%0,%0", op);
13193       output_asm_insn ("l\t%0,%1", op);
13194     }
13195   else
13196     {
13197       op[5] = gen_label_rtx ();
13198       op[6] = gen_label_rtx ();
13199 
13200       output_asm_insn ("st\t%0,%1", op);
13201       output_asm_insn ("bras\t%2,%l6", op);
13202       targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[5]));
13203       output_asm_insn (".long\t%4-%l5", op);
13204       output_asm_insn (".long\t%3-%l5", op);
13205       targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
13206       output_asm_insn ("lr\t%0,%2", op);
13207       output_asm_insn ("a\t%0,0(%2)", op);
13208       output_asm_insn ("a\t%2,4(%2)", op);
13209       output_asm_insn ("basr\t%0,%0", op);
13210       output_asm_insn ("l\t%0,%1", op);
13211     }
13212 }
13213 
13214 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
13215    into its SYMBOL_REF_FLAGS.  */
13216 
13217 static void
s390_encode_section_info(tree decl,rtx rtl,int first)13218 s390_encode_section_info (tree decl, rtx rtl, int first)
13219 {
13220   default_encode_section_info (decl, rtl, first);
13221 
13222   if (TREE_CODE (decl) == VAR_DECL)
13223     {
13224       /* Store the alignment to be able to check if we can use
13225 	 a larl/load-relative instruction.  We only handle the cases
13226 	 that can go wrong (i.e. no FUNC_DECLs).  */
13227       if (DECL_ALIGN (decl) == 0 || DECL_ALIGN (decl) % 16)
13228 	SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
13229       else if (DECL_ALIGN (decl) % 32)
13230 	SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
13231       else if (DECL_ALIGN (decl) % 64)
13232 	SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
13233     }
13234 
13235   /* Literal pool references don't have a decl so they are handled
13236      differently here.  We rely on the information in the MEM_ALIGN
13237      entry to decide upon the alignment.  */
13238   if (MEM_P (rtl)
13239       && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF
13240       && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0)))
13241     {
13242       if (MEM_ALIGN (rtl) == 0 || MEM_ALIGN (rtl) % 16)
13243 	SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
13244       else if (MEM_ALIGN (rtl) % 32)
13245 	SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
13246       else if (MEM_ALIGN (rtl) % 64)
13247 	SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
13248     }
13249 }
13250 
13251 /* Output thunk to FILE that implements a C++ virtual function call (with
13252    multiple inheritance) to FUNCTION.  The thunk adjusts the this pointer
13253    by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
13254    stored at VCALL_OFFSET in the vtable whose address is located at offset 0
13255    relative to the resulting this pointer.  */
13256 
13257 static void
s390_output_mi_thunk(FILE * file,tree thunk ATTRIBUTE_UNUSED,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)13258 s390_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
13259 		      HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
13260 		      tree function)
13261 {
13262   rtx op[10];
13263   int nonlocal = 0;
13264 
13265   /* Make sure unwind info is emitted for the thunk if needed.  */
13266   final_start_function (emit_barrier (), file, 1);
13267 
13268   /* Operand 0 is the target function.  */
13269   op[0] = XEXP (DECL_RTL (function), 0);
13270   if (flag_pic && !SYMBOL_REF_LOCAL_P (op[0]))
13271     {
13272       nonlocal = 1;
13273       op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]),
13274 			      TARGET_64BIT ? UNSPEC_PLT : UNSPEC_GOT);
13275       op[0] = gen_rtx_CONST (Pmode, op[0]);
13276     }
13277 
13278   /* Operand 1 is the 'this' pointer.  */
13279   if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
13280     op[1] = gen_rtx_REG (Pmode, 3);
13281   else
13282     op[1] = gen_rtx_REG (Pmode, 2);
13283 
13284   /* Operand 2 is the delta.  */
13285   op[2] = GEN_INT (delta);
13286 
13287   /* Operand 3 is the vcall_offset.  */
13288   op[3] = GEN_INT (vcall_offset);
13289 
13290   /* Operand 4 is the temporary register.  */
13291   op[4] = gen_rtx_REG (Pmode, 1);
13292 
13293   /* Operands 5 to 8 can be used as labels.  */
13294   op[5] = NULL_RTX;
13295   op[6] = NULL_RTX;
13296   op[7] = NULL_RTX;
13297   op[8] = NULL_RTX;
13298 
13299   /* Operand 9 can be used for temporary register.  */
13300   op[9] = NULL_RTX;
13301 
13302   /* Generate code.  */
13303   if (TARGET_64BIT)
13304     {
13305       /* Setup literal pool pointer if required.  */
13306       if ((!DISP_IN_RANGE (delta)
13307 	   && !CONST_OK_FOR_K (delta)
13308 	   && !CONST_OK_FOR_Os (delta))
13309 	  || (!DISP_IN_RANGE (vcall_offset)
13310 	      && !CONST_OK_FOR_K (vcall_offset)
13311 	      && !CONST_OK_FOR_Os (vcall_offset)))
13312 	{
13313 	  op[5] = gen_label_rtx ();
13314 	  output_asm_insn ("larl\t%4,%5", op);
13315 	}
13316 
13317       /* Add DELTA to this pointer.  */
13318       if (delta)
13319 	{
13320 	  if (CONST_OK_FOR_J (delta))
13321 	    output_asm_insn ("la\t%1,%2(%1)", op);
13322 	  else if (DISP_IN_RANGE (delta))
13323 	    output_asm_insn ("lay\t%1,%2(%1)", op);
13324 	  else if (CONST_OK_FOR_K (delta))
13325 	    output_asm_insn ("aghi\t%1,%2", op);
13326  	  else if (CONST_OK_FOR_Os (delta))
13327  	    output_asm_insn ("agfi\t%1,%2", op);
13328 	  else
13329 	    {
13330 	      op[6] = gen_label_rtx ();
13331 	      output_asm_insn ("agf\t%1,%6-%5(%4)", op);
13332 	    }
13333 	}
13334 
13335       /* Perform vcall adjustment.  */
13336       if (vcall_offset)
13337 	{
13338 	  if (DISP_IN_RANGE (vcall_offset))
13339 	    {
13340 	      output_asm_insn ("lg\t%4,0(%1)", op);
13341 	      output_asm_insn ("ag\t%1,%3(%4)", op);
13342 	    }
13343 	  else if (CONST_OK_FOR_K (vcall_offset))
13344 	    {
13345 	      output_asm_insn ("lghi\t%4,%3", op);
13346 	      output_asm_insn ("ag\t%4,0(%1)", op);
13347 	      output_asm_insn ("ag\t%1,0(%4)", op);
13348 	    }
13349  	  else if (CONST_OK_FOR_Os (vcall_offset))
13350  	    {
13351  	      output_asm_insn ("lgfi\t%4,%3", op);
13352  	      output_asm_insn ("ag\t%4,0(%1)", op);
13353  	      output_asm_insn ("ag\t%1,0(%4)", op);
13354  	    }
13355 	  else
13356 	    {
13357 	      op[7] = gen_label_rtx ();
13358 	      output_asm_insn ("llgf\t%4,%7-%5(%4)", op);
13359 	      output_asm_insn ("ag\t%4,0(%1)", op);
13360 	      output_asm_insn ("ag\t%1,0(%4)", op);
13361 	    }
13362 	}
13363 
13364       /* Jump to target.  */
13365       output_asm_insn ("jg\t%0", op);
13366 
13367       /* Output literal pool if required.  */
13368       if (op[5])
13369 	{
13370 	  output_asm_insn (".align\t4", op);
13371 	  targetm.asm_out.internal_label (file, "L",
13372 					  CODE_LABEL_NUMBER (op[5]));
13373 	}
13374       if (op[6])
13375 	{
13376 	  targetm.asm_out.internal_label (file, "L",
13377 					  CODE_LABEL_NUMBER (op[6]));
13378 	  output_asm_insn (".long\t%2", op);
13379 	}
13380       if (op[7])
13381 	{
13382 	  targetm.asm_out.internal_label (file, "L",
13383 					  CODE_LABEL_NUMBER (op[7]));
13384 	  output_asm_insn (".long\t%3", op);
13385 	}
13386     }
13387   else
13388     {
13389       /* Setup base pointer if required.  */
13390       if (!vcall_offset
13391 	  || (!DISP_IN_RANGE (delta)
13392               && !CONST_OK_FOR_K (delta)
13393 	      && !CONST_OK_FOR_Os (delta))
13394 	  || (!DISP_IN_RANGE (delta)
13395               && !CONST_OK_FOR_K (vcall_offset)
13396 	      && !CONST_OK_FOR_Os (vcall_offset)))
13397 	{
13398 	  op[5] = gen_label_rtx ();
13399 	  output_asm_insn ("basr\t%4,0", op);
13400 	  targetm.asm_out.internal_label (file, "L",
13401 					  CODE_LABEL_NUMBER (op[5]));
13402 	}
13403 
13404       /* Add DELTA to this pointer.  */
13405       if (delta)
13406 	{
13407 	  if (CONST_OK_FOR_J (delta))
13408 	    output_asm_insn ("la\t%1,%2(%1)", op);
13409 	  else if (DISP_IN_RANGE (delta))
13410 	    output_asm_insn ("lay\t%1,%2(%1)", op);
13411 	  else if (CONST_OK_FOR_K (delta))
13412 	    output_asm_insn ("ahi\t%1,%2", op);
13413 	  else if (CONST_OK_FOR_Os (delta))
13414  	    output_asm_insn ("afi\t%1,%2", op);
13415 	  else
13416 	    {
13417 	      op[6] = gen_label_rtx ();
13418 	      output_asm_insn ("a\t%1,%6-%5(%4)", op);
13419 	    }
13420 	}
13421 
13422       /* Perform vcall adjustment.  */
13423       if (vcall_offset)
13424         {
13425 	  if (CONST_OK_FOR_J (vcall_offset))
13426 	    {
13427 	      output_asm_insn ("l\t%4,0(%1)", op);
13428 	      output_asm_insn ("a\t%1,%3(%4)", op);
13429 	    }
13430 	  else if (DISP_IN_RANGE (vcall_offset))
13431 	    {
13432 	      output_asm_insn ("l\t%4,0(%1)", op);
13433 	      output_asm_insn ("ay\t%1,%3(%4)", op);
13434 	    }
13435 	  else if (CONST_OK_FOR_K (vcall_offset))
13436 	    {
13437 	      output_asm_insn ("lhi\t%4,%3", op);
13438 	      output_asm_insn ("a\t%4,0(%1)", op);
13439 	      output_asm_insn ("a\t%1,0(%4)", op);
13440 	    }
13441 	  else if (CONST_OK_FOR_Os (vcall_offset))
13442  	    {
13443  	      output_asm_insn ("iilf\t%4,%3", op);
13444  	      output_asm_insn ("a\t%4,0(%1)", op);
13445  	      output_asm_insn ("a\t%1,0(%4)", op);
13446  	    }
13447 	  else
13448 	    {
13449 	      op[7] = gen_label_rtx ();
13450 	      output_asm_insn ("l\t%4,%7-%5(%4)", op);
13451 	      output_asm_insn ("a\t%4,0(%1)", op);
13452 	      output_asm_insn ("a\t%1,0(%4)", op);
13453 	    }
13454 
13455 	  /* We had to clobber the base pointer register.
13456 	     Re-setup the base pointer (with a different base).  */
13457 	  op[5] = gen_label_rtx ();
13458 	  output_asm_insn ("basr\t%4,0", op);
13459 	  targetm.asm_out.internal_label (file, "L",
13460 					  CODE_LABEL_NUMBER (op[5]));
13461 	}
13462 
13463       /* Jump to target.  */
13464       op[8] = gen_label_rtx ();
13465 
13466       if (!flag_pic)
13467 	output_asm_insn ("l\t%4,%8-%5(%4)", op);
13468       else if (!nonlocal)
13469 	output_asm_insn ("a\t%4,%8-%5(%4)", op);
13470       /* We cannot call through .plt, since .plt requires %r12 loaded.  */
13471       else if (flag_pic == 1)
13472 	{
13473 	  output_asm_insn ("a\t%4,%8-%5(%4)", op);
13474 	  output_asm_insn ("l\t%4,%0(%4)", op);
13475 	}
13476       else if (flag_pic == 2)
13477 	{
13478 	  op[9] = gen_rtx_REG (Pmode, 0);
13479 	  output_asm_insn ("l\t%9,%8-4-%5(%4)", op);
13480 	  output_asm_insn ("a\t%4,%8-%5(%4)", op);
13481 	  output_asm_insn ("ar\t%4,%9", op);
13482 	  output_asm_insn ("l\t%4,0(%4)", op);
13483 	}
13484 
13485       output_asm_insn ("br\t%4", op);
13486 
13487       /* Output literal pool.  */
13488       output_asm_insn (".align\t4", op);
13489 
13490       if (nonlocal && flag_pic == 2)
13491 	output_asm_insn (".long\t%0", op);
13492       if (nonlocal)
13493 	{
13494 	  op[0] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
13495 	  SYMBOL_REF_FLAGS (op[0]) = SYMBOL_FLAG_LOCAL;
13496 	}
13497 
13498       targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[8]));
13499       if (!flag_pic)
13500 	output_asm_insn (".long\t%0", op);
13501       else
13502 	output_asm_insn (".long\t%0-%5", op);
13503 
13504       if (op[6])
13505 	{
13506 	  targetm.asm_out.internal_label (file, "L",
13507 					  CODE_LABEL_NUMBER (op[6]));
13508 	  output_asm_insn (".long\t%2", op);
13509 	}
13510       if (op[7])
13511 	{
13512 	  targetm.asm_out.internal_label (file, "L",
13513 					  CODE_LABEL_NUMBER (op[7]));
13514 	  output_asm_insn (".long\t%3", op);
13515 	}
13516     }
13517   final_end_function ();
13518 }
13519 
13520 /* Output either an indirect jump or a an indirect call
13521    (RETURN_ADDR_REGNO != INVALID_REGNUM) with target register REGNO
13522    using a branch trampoline disabling branch target prediction.  */
13523 
13524 void
s390_indirect_branch_via_thunk(unsigned int regno,unsigned int return_addr_regno,rtx comparison_operator,enum s390_indirect_branch_type type)13525 s390_indirect_branch_via_thunk (unsigned int regno,
13526 				unsigned int return_addr_regno,
13527 				rtx comparison_operator,
13528 				enum s390_indirect_branch_type type)
13529 {
13530   enum s390_indirect_branch_option option;
13531 
13532   if (type == s390_indirect_branch_type_return)
13533     {
13534       if (s390_return_addr_from_memory ())
13535 	option = s390_opt_function_return_mem;
13536       else
13537 	option = s390_opt_function_return_reg;
13538     }
13539   else if (type == s390_indirect_branch_type_jump)
13540     option = s390_opt_indirect_branch_jump;
13541   else if (type == s390_indirect_branch_type_call)
13542     option = s390_opt_indirect_branch_call;
13543   else
13544     gcc_unreachable ();
13545 
13546   if (TARGET_INDIRECT_BRANCH_TABLE)
13547     {
13548       char label[32];
13549 
13550       ASM_GENERATE_INTERNAL_LABEL (label,
13551 				   indirect_branch_table_label[option],
13552 				   indirect_branch_table_label_no[option]++);
13553       ASM_OUTPUT_LABEL (asm_out_file, label);
13554     }
13555 
13556   if (return_addr_regno != INVALID_REGNUM)
13557     {
13558       gcc_assert (comparison_operator == NULL_RTX);
13559       fprintf (asm_out_file, " \tbrasl\t%%r%d,", return_addr_regno);
13560     }
13561   else
13562     {
13563       fputs (" \tjg", asm_out_file);
13564       if (comparison_operator != NULL_RTX)
13565 	print_operand (asm_out_file, comparison_operator, 'C');
13566 
13567       fputs ("\t", asm_out_file);
13568     }
13569 
13570   if (TARGET_CPU_Z10)
13571     fprintf (asm_out_file,
13572 	     TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL "\n",
13573 	     regno);
13574   else
13575     fprintf (asm_out_file,
13576 	     TARGET_INDIRECT_BRANCH_THUNK_NAME_EX "\n",
13577 	     INDIRECT_BRANCH_THUNK_REGNUM, regno);
13578 
13579   if ((option == s390_opt_indirect_branch_jump
13580        && cfun->machine->indirect_branch_jump == indirect_branch_thunk)
13581       || (option == s390_opt_indirect_branch_call
13582 	  && cfun->machine->indirect_branch_call == indirect_branch_thunk)
13583       || (option == s390_opt_function_return_reg
13584 	  && cfun->machine->function_return_reg == indirect_branch_thunk)
13585       || (option == s390_opt_function_return_mem
13586 	  && cfun->machine->function_return_mem == indirect_branch_thunk))
13587     {
13588       if (TARGET_CPU_Z10)
13589 	indirect_branch_z10thunk_mask |= (1 << regno);
13590       else
13591 	indirect_branch_prez10thunk_mask |= (1 << regno);
13592     }
13593 }
13594 
13595 /* Output an inline thunk for indirect jumps.  EXECUTE_TARGET can
13596    either be an address register or a label pointing to the location
13597    of the jump instruction.  */
13598 
13599 void
s390_indirect_branch_via_inline_thunk(rtx execute_target)13600 s390_indirect_branch_via_inline_thunk (rtx execute_target)
13601 {
13602   if (TARGET_INDIRECT_BRANCH_TABLE)
13603     {
13604       char label[32];
13605 
13606       ASM_GENERATE_INTERNAL_LABEL (label,
13607 				   indirect_branch_table_label[s390_opt_indirect_branch_jump],
13608 				   indirect_branch_table_label_no[s390_opt_indirect_branch_jump]++);
13609       ASM_OUTPUT_LABEL (asm_out_file, label);
13610     }
13611 
13612   if (!TARGET_ZARCH)
13613     fputs ("\t.machinemode zarch\n", asm_out_file);
13614 
13615   if (REG_P (execute_target))
13616     fprintf (asm_out_file, "\tex\t%%r0,0(%%r%d)\n", REGNO (execute_target));
13617   else
13618     output_asm_insn ("\texrl\t%%r0,%0", &execute_target);
13619 
13620   if (!TARGET_ZARCH)
13621     fputs ("\t.machinemode esa\n", asm_out_file);
13622 
13623   fputs ("0:\tj\t0b\n", asm_out_file);
13624 }
13625 
13626 static bool
s390_valid_pointer_mode(scalar_int_mode mode)13627 s390_valid_pointer_mode (scalar_int_mode mode)
13628 {
13629   return (mode == SImode || (TARGET_64BIT && mode == DImode));
13630 }
13631 
13632 /* Checks whether the given CALL_EXPR would use a caller
13633    saved register.  This is used to decide whether sibling call
13634    optimization could be performed on the respective function
13635    call.  */
13636 
13637 static bool
s390_call_saved_register_used(tree call_expr)13638 s390_call_saved_register_used (tree call_expr)
13639 {
13640   CUMULATIVE_ARGS cum_v;
13641   cumulative_args_t cum;
13642   tree parameter;
13643   machine_mode mode;
13644   tree type;
13645   rtx parm_rtx;
13646   int reg, i;
13647 
13648   INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
13649   cum = pack_cumulative_args (&cum_v);
13650 
13651   for (i = 0; i < call_expr_nargs (call_expr); i++)
13652     {
13653       parameter = CALL_EXPR_ARG (call_expr, i);
13654       gcc_assert (parameter);
13655 
13656       /* For an undeclared variable passed as parameter we will get
13657 	 an ERROR_MARK node here.  */
13658       if (TREE_CODE (parameter) == ERROR_MARK)
13659 	return true;
13660 
13661       type = TREE_TYPE (parameter);
13662       gcc_assert (type);
13663 
13664       mode = TYPE_MODE (type);
13665       gcc_assert (mode);
13666 
13667       /* We assume that in the target function all parameters are
13668 	 named.  This only has an impact on vector argument register
13669 	 usage none of which is call-saved.  */
13670       if (pass_by_reference (&cum_v, mode, type, true))
13671  	{
13672  	  mode = Pmode;
13673  	  type = build_pointer_type (type);
13674  	}
13675 
13676        parm_rtx = s390_function_arg (cum, mode, type, true);
13677 
13678        s390_function_arg_advance (cum, mode, type, true);
13679 
13680        if (!parm_rtx)
13681 	 continue;
13682 
13683        if (REG_P (parm_rtx))
13684   	 {
13685 	   for (reg = 0; reg < REG_NREGS (parm_rtx); reg++)
13686 	     if (!call_used_regs[reg + REGNO (parm_rtx)])
13687  	       return true;
13688 	 }
13689 
13690        if (GET_CODE (parm_rtx) == PARALLEL)
13691 	 {
13692 	   int i;
13693 
13694 	   for (i = 0; i < XVECLEN (parm_rtx, 0); i++)
13695 	     {
13696 	       rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0);
13697 
13698 	       gcc_assert (REG_P (r));
13699 
13700 	       for (reg = 0; reg < REG_NREGS (r); reg++)
13701 		 if (!call_used_regs[reg + REGNO (r)])
13702 		   return true;
13703 	     }
13704 	 }
13705 
13706     }
13707   return false;
13708 }
13709 
13710 /* Return true if the given call expression can be
13711    turned into a sibling call.
13712    DECL holds the declaration of the function to be called whereas
13713    EXP is the call expression itself.  */
13714 
13715 static bool
s390_function_ok_for_sibcall(tree decl,tree exp)13716 s390_function_ok_for_sibcall (tree decl, tree exp)
13717 {
13718   /* The TPF epilogue uses register 1.  */
13719   if (TARGET_TPF_PROFILING)
13720     return false;
13721 
13722   /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
13723      which would have to be restored before the sibcall.  */
13724   if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl))
13725     return false;
13726 
13727   /* The thunks for indirect branches require r1 if no exrl is
13728      available.  r1 might not be available when doing a sibling
13729      call.  */
13730   if (TARGET_INDIRECT_BRANCH_NOBP_CALL
13731       && !TARGET_CPU_Z10
13732       && !decl)
13733     return false;
13734 
13735   /* Register 6 on s390 is available as an argument register but unfortunately
13736      "caller saved". This makes functions needing this register for arguments
13737      not suitable for sibcalls.  */
13738   return !s390_call_saved_register_used (exp);
13739 }
13740 
13741 /* Return the fixed registers used for condition codes.  */
13742 
13743 static bool
s390_fixed_condition_code_regs(unsigned int * p1,unsigned int * p2)13744 s390_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
13745 {
13746   *p1 = CC_REGNUM;
13747   *p2 = INVALID_REGNUM;
13748 
13749   return true;
13750 }
13751 
13752 /* This function is used by the call expanders of the machine description.
13753    It emits the call insn itself together with the necessary operations
13754    to adjust the target address and returns the emitted insn.
13755    ADDR_LOCATION is the target address rtx
13756    TLS_CALL the location of the thread-local symbol
13757    RESULT_REG the register where the result of the call should be stored
13758    RETADDR_REG the register where the return address should be stored
13759                If this parameter is NULL_RTX the call is considered
13760                to be a sibling call.  */
13761 
13762 rtx_insn *
s390_emit_call(rtx addr_location,rtx tls_call,rtx result_reg,rtx retaddr_reg)13763 s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg,
13764 		rtx retaddr_reg)
13765 {
13766   bool plt_call = false;
13767   rtx_insn *insn;
13768   rtx vec[4] = { NULL_RTX };
13769   int elts = 0;
13770   rtx *call = &vec[0];
13771   rtx *clobber_ret_reg = &vec[1];
13772   rtx *use = &vec[2];
13773   rtx *clobber_thunk_reg = &vec[3];
13774   int i;
13775 
13776   /* Direct function calls need special treatment.  */
13777   if (GET_CODE (addr_location) == SYMBOL_REF)
13778     {
13779       /* When calling a global routine in PIC mode, we must
13780          replace the symbol itself with the PLT stub.  */
13781       if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location))
13782         {
13783 	  if (TARGET_64BIT || retaddr_reg != NULL_RTX)
13784 	    {
13785 	      addr_location = gen_rtx_UNSPEC (Pmode,
13786 					      gen_rtvec (1, addr_location),
13787 					      UNSPEC_PLT);
13788 	      addr_location = gen_rtx_CONST (Pmode, addr_location);
13789 	      plt_call = true;
13790 	    }
13791 	  else
13792 	    /* For -fpic code the PLT entries might use r12 which is
13793 	       call-saved.  Therefore we cannot do a sibcall when
13794 	       calling directly using a symbol ref.  When reaching
13795 	       this point we decided (in s390_function_ok_for_sibcall)
13796 	       to do a sibcall for a function pointer but one of the
13797 	       optimizers was able to get rid of the function pointer
13798 	       by propagating the symbol ref into the call.  This
13799 	       optimization is illegal for S/390 so we turn the direct
13800 	       call into a indirect call again.  */
13801 	    addr_location = force_reg (Pmode, addr_location);
13802         }
13803 
13804       /* Unless we can use the bras(l) insn, force the
13805          routine address into a register.  */
13806       if (!TARGET_SMALL_EXEC && !TARGET_CPU_ZARCH)
13807         {
13808 	  if (flag_pic)
13809 	    addr_location = legitimize_pic_address (addr_location, 0);
13810 	  else
13811 	    addr_location = force_reg (Pmode, addr_location);
13812 	}
13813     }
13814 
13815   /* If it is already an indirect call or the code above moved the
13816      SYMBOL_REF to somewhere else make sure the address can be found in
13817      register 1.  */
13818   if (retaddr_reg == NULL_RTX
13819       && GET_CODE (addr_location) != SYMBOL_REF
13820       && !plt_call)
13821     {
13822       emit_move_insn (gen_rtx_REG (Pmode, SIBCALL_REGNUM), addr_location);
13823       addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM);
13824     }
13825 
13826   if (TARGET_INDIRECT_BRANCH_NOBP_CALL
13827       && GET_CODE (addr_location) != SYMBOL_REF
13828       && !plt_call)
13829     {
13830       /* Indirect branch thunks require the target to be a single GPR.  */
13831       addr_location = force_reg (Pmode, addr_location);
13832 
13833       /* Without exrl the indirect branch thunks need an additional
13834 	 register for larl;ex */
13835       if (!TARGET_CPU_Z10)
13836 	{
13837 	  *clobber_thunk_reg = gen_rtx_REG (Pmode, INDIRECT_BRANCH_THUNK_REGNUM);
13838 	  *clobber_thunk_reg = gen_rtx_CLOBBER (VOIDmode, *clobber_thunk_reg);
13839 	}
13840     }
13841 
13842   addr_location = gen_rtx_MEM (QImode, addr_location);
13843   *call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx);
13844 
13845   if (result_reg != NULL_RTX)
13846     *call = gen_rtx_SET (result_reg, *call);
13847 
13848   if (retaddr_reg != NULL_RTX)
13849     {
13850       *clobber_ret_reg = gen_rtx_CLOBBER (VOIDmode, retaddr_reg);
13851 
13852       if (tls_call != NULL_RTX)
13853 	*use = gen_rtx_USE (VOIDmode, tls_call);
13854     }
13855 
13856 
13857   for (i = 0; i < 4; i++)
13858     if (vec[i] != NULL_RTX)
13859       elts++;
13860 
13861   if (elts > 1)
13862     {
13863       rtvec v;
13864       int e = 0;
13865 
13866       v = rtvec_alloc (elts);
13867       for (i = 0; i < 4; i++)
13868 	if (vec[i] != NULL_RTX)
13869 	  {
13870 	    RTVEC_ELT (v, e) = vec[i];
13871 	    e++;
13872 	  }
13873 
13874       *call = gen_rtx_PARALLEL (VOIDmode, v);
13875     }
13876 
13877   insn = emit_call_insn (*call);
13878 
13879   /* 31-bit PLT stubs and tls calls use the GOT register implicitly.  */
13880   if ((!TARGET_64BIT && plt_call) || tls_call != NULL_RTX)
13881     {
13882       /* s390_function_ok_for_sibcall should
13883 	 have denied sibcalls in this case.  */
13884       gcc_assert (retaddr_reg != NULL_RTX);
13885       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, 12));
13886     }
13887   return insn;
13888 }
13889 
13890 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE.  */
13891 
13892 static void
s390_conditional_register_usage(void)13893 s390_conditional_register_usage (void)
13894 {
13895   int i;
13896 
13897   if (flag_pic)
13898     {
13899       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13900       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13901     }
13902   if (TARGET_CPU_ZARCH)
13903     {
13904       fixed_regs[BASE_REGNUM] = 0;
13905       call_used_regs[BASE_REGNUM] = 0;
13906       fixed_regs[RETURN_REGNUM] = 0;
13907       call_used_regs[RETURN_REGNUM] = 0;
13908     }
13909   if (TARGET_64BIT)
13910     {
13911       for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
13912 	call_used_regs[i] = call_really_used_regs[i] = 0;
13913     }
13914   else
13915     {
13916       call_used_regs[FPR4_REGNUM] = call_really_used_regs[FPR4_REGNUM] = 0;
13917       call_used_regs[FPR6_REGNUM] = call_really_used_regs[FPR6_REGNUM] = 0;
13918     }
13919 
13920   if (TARGET_SOFT_FLOAT)
13921     {
13922       for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
13923 	call_used_regs[i] = fixed_regs[i] = 1;
13924     }
13925 
13926   /* Disable v16 - v31 for non-vector target.  */
13927   if (!TARGET_VX)
13928     {
13929       for (i = VR16_REGNUM; i <= VR31_REGNUM; i++)
13930 	fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
13931     }
13932 }
13933 
13934 /* Corresponding function to eh_return expander.  */
13935 
13936 static GTY(()) rtx s390_tpf_eh_return_symbol;
13937 void
s390_emit_tpf_eh_return(rtx target)13938 s390_emit_tpf_eh_return (rtx target)
13939 {
13940   rtx_insn *insn;
13941   rtx reg, orig_ra;
13942 
13943   if (!s390_tpf_eh_return_symbol)
13944     s390_tpf_eh_return_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tpf_eh_return");
13945 
13946   reg = gen_rtx_REG (Pmode, 2);
13947   orig_ra = gen_rtx_REG (Pmode, 3);
13948 
13949   emit_move_insn (reg, target);
13950   emit_move_insn (orig_ra, get_hard_reg_initial_val (Pmode, RETURN_REGNUM));
13951   insn = s390_emit_call (s390_tpf_eh_return_symbol, NULL_RTX, reg,
13952                                      gen_rtx_REG (Pmode, RETURN_REGNUM));
13953   use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
13954   use_reg (&CALL_INSN_FUNCTION_USAGE (insn), orig_ra);
13955 
13956   emit_move_insn (EH_RETURN_HANDLER_RTX, reg);
13957 }
13958 
13959 /* Rework the prologue/epilogue to avoid saving/restoring
13960    registers unnecessarily.  */
13961 
13962 static void
s390_optimize_prologue(void)13963 s390_optimize_prologue (void)
13964 {
13965   rtx_insn *insn, *new_insn, *next_insn;
13966 
13967   /* Do a final recompute of the frame-related data.  */
13968   s390_optimize_register_info ();
13969 
13970   /* If all special registers are in fact used, there's nothing we
13971      can do, so no point in walking the insn list.  */
13972 
13973   if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM
13974       && cfun_frame_layout.last_save_gpr >= BASE_REGNUM
13975       && (TARGET_CPU_ZARCH
13976           || (cfun_frame_layout.first_save_gpr <= RETURN_REGNUM
13977               && cfun_frame_layout.last_save_gpr >= RETURN_REGNUM)))
13978     return;
13979 
13980   /* Search for prologue/epilogue insns and replace them.  */
13981 
13982   for (insn = get_insns (); insn; insn = next_insn)
13983     {
13984       int first, last, off;
13985       rtx set, base, offset;
13986       rtx pat;
13987 
13988       next_insn = NEXT_INSN (insn);
13989 
13990       if (! NONJUMP_INSN_P (insn) || ! RTX_FRAME_RELATED_P (insn))
13991 	continue;
13992 
13993       pat = PATTERN (insn);
13994 
13995       /* Remove ldgr/lgdr instructions used for saving and restore
13996 	 GPRs if possible.  */
13997       if (TARGET_Z10)
13998 	{
13999 	  rtx tmp_pat = pat;
14000 
14001 	  if (INSN_CODE (insn) == CODE_FOR_stack_restore_from_fpr)
14002 	    tmp_pat = XVECEXP (pat, 0, 0);
14003 
14004 	  if (GET_CODE (tmp_pat) == SET
14005 	      && GET_MODE (SET_SRC (tmp_pat)) == DImode
14006 	      && REG_P (SET_SRC (tmp_pat))
14007 	      && REG_P (SET_DEST (tmp_pat)))
14008 	    {
14009 	      int src_regno = REGNO (SET_SRC (tmp_pat));
14010 	      int dest_regno = REGNO (SET_DEST (tmp_pat));
14011 	      int gpr_regno;
14012 	      int fpr_regno;
14013 
14014 	      if (!((GENERAL_REGNO_P (src_regno)
14015 		     && FP_REGNO_P (dest_regno))
14016 		    || (FP_REGNO_P (src_regno)
14017 			&& GENERAL_REGNO_P (dest_regno))))
14018 		continue;
14019 
14020 	      gpr_regno = GENERAL_REGNO_P (src_regno) ? src_regno : dest_regno;
14021 	      fpr_regno = FP_REGNO_P (src_regno) ? src_regno : dest_regno;
14022 
14023 	      /* GPR must be call-saved, FPR must be call-clobbered.  */
14024 	      if (!call_really_used_regs[fpr_regno]
14025 		  || call_really_used_regs[gpr_regno])
14026 		continue;
14027 
14028 	      /* It must not happen that what we once saved in an FPR now
14029 		 needs a stack slot.  */
14030 	      gcc_assert (cfun_gpr_save_slot (gpr_regno) != SAVE_SLOT_STACK);
14031 
14032 	      if (cfun_gpr_save_slot (gpr_regno) == SAVE_SLOT_NONE)
14033 		{
14034 		  remove_insn (insn);
14035 		  continue;
14036 		}
14037 	    }
14038 	}
14039 
14040       if (GET_CODE (pat) == PARALLEL
14041 	  && store_multiple_operation (pat, VOIDmode))
14042 	{
14043 	  set = XVECEXP (pat, 0, 0);
14044 	  first = REGNO (SET_SRC (set));
14045 	  last = first + XVECLEN (pat, 0) - 1;
14046 	  offset = const0_rtx;
14047 	  base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
14048 	  off = INTVAL (offset);
14049 
14050 	  if (GET_CODE (base) != REG || off < 0)
14051 	    continue;
14052 	  if (cfun_frame_layout.first_save_gpr != -1
14053 	      && (cfun_frame_layout.first_save_gpr < first
14054 		  || cfun_frame_layout.last_save_gpr > last))
14055 	    continue;
14056 	  if (REGNO (base) != STACK_POINTER_REGNUM
14057 	      && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
14058 	    continue;
14059 	  if (first > BASE_REGNUM || last < BASE_REGNUM)
14060 	    continue;
14061 
14062 	  if (cfun_frame_layout.first_save_gpr != -1)
14063 	    {
14064 	      rtx s_pat = save_gprs (base,
14065 				     off + (cfun_frame_layout.first_save_gpr
14066 					    - first) * UNITS_PER_LONG,
14067 				     cfun_frame_layout.first_save_gpr,
14068 				     cfun_frame_layout.last_save_gpr);
14069 	      new_insn = emit_insn_before (s_pat, insn);
14070 	      INSN_ADDRESSES_NEW (new_insn, -1);
14071 	    }
14072 
14073 	  remove_insn (insn);
14074 	  continue;
14075 	}
14076 
14077       if (cfun_frame_layout.first_save_gpr == -1
14078 	  && GET_CODE (pat) == SET
14079 	  && GENERAL_REG_P (SET_SRC (pat))
14080 	  && GET_CODE (SET_DEST (pat)) == MEM)
14081 	{
14082 	  set = pat;
14083 	  first = REGNO (SET_SRC (set));
14084 	  offset = const0_rtx;
14085 	  base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
14086 	  off = INTVAL (offset);
14087 
14088 	  if (GET_CODE (base) != REG || off < 0)
14089 	    continue;
14090 	  if (REGNO (base) != STACK_POINTER_REGNUM
14091 	      && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
14092 	    continue;
14093 
14094 	  remove_insn (insn);
14095 	  continue;
14096 	}
14097 
14098       if (GET_CODE (pat) == PARALLEL
14099 	  && load_multiple_operation (pat, VOIDmode))
14100 	{
14101 	  set = XVECEXP (pat, 0, 0);
14102 	  first = REGNO (SET_DEST (set));
14103 	  last = first + XVECLEN (pat, 0) - 1;
14104 	  offset = const0_rtx;
14105 	  base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
14106 	  off = INTVAL (offset);
14107 
14108 	  if (GET_CODE (base) != REG || off < 0)
14109 	    continue;
14110 
14111 	  if (cfun_frame_layout.first_restore_gpr != -1
14112 	      && (cfun_frame_layout.first_restore_gpr < first
14113 		  || cfun_frame_layout.last_restore_gpr > last))
14114 	    continue;
14115 	  if (REGNO (base) != STACK_POINTER_REGNUM
14116 	      && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
14117 	    continue;
14118 	  if (first > BASE_REGNUM || last < BASE_REGNUM)
14119 	    continue;
14120 
14121 	  if (cfun_frame_layout.first_restore_gpr != -1)
14122 	    {
14123 	      rtx rpat = restore_gprs (base,
14124 				       off + (cfun_frame_layout.first_restore_gpr
14125 					      - first) * UNITS_PER_LONG,
14126 				       cfun_frame_layout.first_restore_gpr,
14127 				       cfun_frame_layout.last_restore_gpr);
14128 
14129 	      /* Remove REG_CFA_RESTOREs for registers that we no
14130 		 longer need to save.  */
14131 	      REG_NOTES (rpat) = REG_NOTES (insn);
14132 	      for (rtx *ptr = &REG_NOTES (rpat); *ptr; )
14133 		if (REG_NOTE_KIND (*ptr) == REG_CFA_RESTORE
14134 		    && ((int) REGNO (XEXP (*ptr, 0))
14135 			< cfun_frame_layout.first_restore_gpr))
14136 		  *ptr = XEXP (*ptr, 1);
14137 		else
14138 		  ptr = &XEXP (*ptr, 1);
14139 	      new_insn = emit_insn_before (rpat, insn);
14140 	      RTX_FRAME_RELATED_P (new_insn) = 1;
14141 	      INSN_ADDRESSES_NEW (new_insn, -1);
14142 	    }
14143 
14144 	  remove_insn (insn);
14145 	  continue;
14146 	}
14147 
14148       if (cfun_frame_layout.first_restore_gpr == -1
14149 	  && GET_CODE (pat) == SET
14150 	  && GENERAL_REG_P (SET_DEST (pat))
14151 	  && GET_CODE (SET_SRC (pat)) == MEM)
14152 	{
14153 	  set = pat;
14154 	  first = REGNO (SET_DEST (set));
14155 	  offset = const0_rtx;
14156 	  base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
14157 	  off = INTVAL (offset);
14158 
14159 	  if (GET_CODE (base) != REG || off < 0)
14160 	    continue;
14161 
14162 	  if (REGNO (base) != STACK_POINTER_REGNUM
14163 	      && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
14164 	    continue;
14165 
14166 	  remove_insn (insn);
14167 	  continue;
14168 	}
14169     }
14170 }
14171 
14172 /* On z10 and later the dynamic branch prediction must see the
14173    backward jump within a certain windows.  If not it falls back to
14174    the static prediction.  This function rearranges the loop backward
14175    branch in a way which makes the static prediction always correct.
14176    The function returns true if it added an instruction.  */
14177 static bool
s390_fix_long_loop_prediction(rtx_insn * insn)14178 s390_fix_long_loop_prediction (rtx_insn *insn)
14179 {
14180   rtx set = single_set (insn);
14181   rtx code_label, label_ref;
14182   rtx_insn *uncond_jump;
14183   rtx_insn *cur_insn;
14184   rtx tmp;
14185   int distance;
14186 
14187   /* This will exclude branch on count and branch on index patterns
14188      since these are correctly statically predicted.  */
14189   if (!set
14190       || SET_DEST (set) != pc_rtx
14191       || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
14192     return false;
14193 
14194   /* Skip conditional returns.  */
14195   if (ANY_RETURN_P (XEXP (SET_SRC (set), 1))
14196       && XEXP (SET_SRC (set), 2) == pc_rtx)
14197     return false;
14198 
14199   label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
14200 	       XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
14201 
14202   gcc_assert (GET_CODE (label_ref) == LABEL_REF);
14203 
14204   code_label = XEXP (label_ref, 0);
14205 
14206   if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
14207       || INSN_ADDRESSES (INSN_UID (insn)) == -1
14208       || (INSN_ADDRESSES (INSN_UID (insn))
14209 	  - INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE))
14210     return false;
14211 
14212   for (distance = 0, cur_insn = PREV_INSN (insn);
14213        distance < PREDICT_DISTANCE - 6;
14214        distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
14215     if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
14216       return false;
14217 
14218   rtx_code_label *new_label = gen_label_rtx ();
14219   uncond_jump = emit_jump_insn_after (
14220 		  gen_rtx_SET (pc_rtx,
14221 			       gen_rtx_LABEL_REF (VOIDmode, code_label)),
14222 		  insn);
14223   emit_label_after (new_label, uncond_jump);
14224 
14225   tmp = XEXP (SET_SRC (set), 1);
14226   XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
14227   XEXP (SET_SRC (set), 2) = tmp;
14228   INSN_CODE (insn) = -1;
14229 
14230   XEXP (label_ref, 0) = new_label;
14231   JUMP_LABEL (insn) = new_label;
14232   JUMP_LABEL (uncond_jump) = code_label;
14233 
14234   return true;
14235 }
14236 
14237 /* Returns 1 if INSN reads the value of REG for purposes not related
14238    to addressing of memory, and 0 otherwise.  */
14239 static int
s390_non_addr_reg_read_p(rtx reg,rtx_insn * insn)14240 s390_non_addr_reg_read_p (rtx reg, rtx_insn *insn)
14241 {
14242   return reg_referenced_p (reg, PATTERN (insn))
14243     && !reg_used_in_mem_p (REGNO (reg), PATTERN (insn));
14244 }
14245 
14246 /* Starting from INSN find_cond_jump looks downwards in the insn
14247    stream for a single jump insn which is the last user of the
14248    condition code set in INSN.  */
14249 static rtx_insn *
find_cond_jump(rtx_insn * insn)14250 find_cond_jump (rtx_insn *insn)
14251 {
14252   for (; insn; insn = NEXT_INSN (insn))
14253     {
14254       rtx ite, cc;
14255 
14256       if (LABEL_P (insn))
14257 	break;
14258 
14259       if (!JUMP_P (insn))
14260 	{
14261 	  if (reg_mentioned_p (gen_rtx_REG (CCmode, CC_REGNUM), insn))
14262 	    break;
14263 	  continue;
14264 	}
14265 
14266       /* This will be triggered by a return.  */
14267       if (GET_CODE (PATTERN (insn)) != SET)
14268 	break;
14269 
14270       gcc_assert (SET_DEST (PATTERN (insn)) == pc_rtx);
14271       ite = SET_SRC (PATTERN (insn));
14272 
14273       if (GET_CODE (ite) != IF_THEN_ELSE)
14274 	break;
14275 
14276       cc = XEXP (XEXP (ite, 0), 0);
14277       if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)))
14278 	break;
14279 
14280       if (find_reg_note (insn, REG_DEAD, cc))
14281 	return insn;
14282       break;
14283     }
14284 
14285   return NULL;
14286 }
14287 
14288 /* Swap the condition in COND and the operands in OP0 and OP1 so that
14289    the semantics does not change.  If NULL_RTX is passed as COND the
14290    function tries to find the conditional jump starting with INSN.  */
14291 static void
s390_swap_cmp(rtx cond,rtx * op0,rtx * op1,rtx_insn * insn)14292 s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx_insn *insn)
14293 {
14294   rtx tmp = *op0;
14295 
14296   if (cond == NULL_RTX)
14297     {
14298       rtx_insn *jump = find_cond_jump (NEXT_INSN (insn));
14299       rtx set = jump ? single_set (jump) : NULL_RTX;
14300 
14301       if (set == NULL_RTX)
14302 	return;
14303 
14304       cond = XEXP (SET_SRC (set), 0);
14305     }
14306 
14307   *op0 = *op1;
14308   *op1 = tmp;
14309   PUT_CODE (cond, swap_condition (GET_CODE (cond)));
14310 }
14311 
14312 /* On z10, instructions of the compare-and-branch family have the
14313    property to access the register occurring as second operand with
14314    its bits complemented.  If such a compare is grouped with a second
14315    instruction that accesses the same register non-complemented, and
14316    if that register's value is delivered via a bypass, then the
14317    pipeline recycles, thereby causing significant performance decline.
14318    This function locates such situations and exchanges the two
14319    operands of the compare.  The function return true whenever it
14320    added an insn.  */
14321 static bool
s390_z10_optimize_cmp(rtx_insn * insn)14322 s390_z10_optimize_cmp (rtx_insn *insn)
14323 {
14324   rtx_insn *prev_insn, *next_insn;
14325   bool insn_added_p = false;
14326   rtx cond, *op0, *op1;
14327 
14328   if (GET_CODE (PATTERN (insn)) == PARALLEL)
14329     {
14330       /* Handle compare and branch and branch on count
14331 	 instructions.  */
14332       rtx pattern = single_set (insn);
14333 
14334       if (!pattern
14335 	  || SET_DEST (pattern) != pc_rtx
14336 	  || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
14337 	return false;
14338 
14339       cond = XEXP (SET_SRC (pattern), 0);
14340       op0 = &XEXP (cond, 0);
14341       op1 = &XEXP (cond, 1);
14342     }
14343   else if (GET_CODE (PATTERN (insn)) == SET)
14344     {
14345       rtx src, dest;
14346 
14347       /* Handle normal compare instructions.  */
14348       src = SET_SRC (PATTERN (insn));
14349       dest = SET_DEST (PATTERN (insn));
14350 
14351       if (!REG_P (dest)
14352 	  || !CC_REGNO_P (REGNO (dest))
14353 	  || GET_CODE (src) != COMPARE)
14354 	return false;
14355 
14356       /* s390_swap_cmp will try to find the conditional
14357 	 jump when passing NULL_RTX as condition.  */
14358       cond = NULL_RTX;
14359       op0 = &XEXP (src, 0);
14360       op1 = &XEXP (src, 1);
14361     }
14362   else
14363     return false;
14364 
14365   if (!REG_P (*op0) || !REG_P (*op1))
14366     return false;
14367 
14368   if (GET_MODE_CLASS (GET_MODE (*op0)) != MODE_INT)
14369     return false;
14370 
14371   /* Swap the COMPARE arguments and its mask if there is a
14372      conflicting access in the previous insn.  */
14373   prev_insn = prev_active_insn (insn);
14374   if (prev_insn != NULL_RTX && INSN_P (prev_insn)
14375       && reg_referenced_p (*op1, PATTERN (prev_insn)))
14376     s390_swap_cmp (cond, op0, op1, insn);
14377 
14378   /* Check if there is a conflict with the next insn. If there
14379      was no conflict with the previous insn, then swap the
14380      COMPARE arguments and its mask.  If we already swapped
14381      the operands, or if swapping them would cause a conflict
14382      with the previous insn, issue a NOP after the COMPARE in
14383      order to separate the two instuctions.  */
14384   next_insn = next_active_insn (insn);
14385   if (next_insn != NULL_RTX && INSN_P (next_insn)
14386       && s390_non_addr_reg_read_p (*op1, next_insn))
14387     {
14388       if (prev_insn != NULL_RTX && INSN_P (prev_insn)
14389 	  && s390_non_addr_reg_read_p (*op0, prev_insn))
14390 	{
14391 	  if (REGNO (*op1) == 0)
14392 	    emit_insn_after (gen_nop_lr1 (), insn);
14393 	  else
14394 	    emit_insn_after (gen_nop_lr0 (), insn);
14395 	  insn_added_p = true;
14396 	}
14397       else
14398 	s390_swap_cmp (cond, op0, op1, insn);
14399     }
14400   return insn_added_p;
14401 }
14402 
14403 /* Number of INSNs to be scanned backward in the last BB of the loop
14404    and forward in the first BB of the loop.  This usually should be a
14405    bit more than the number of INSNs which could go into one
14406    group.  */
14407 #define S390_OSC_SCAN_INSN_NUM 5
14408 
14409 /* Scan LOOP for static OSC collisions and return true if a osc_break
14410    should be issued for this loop.  */
14411 static bool
s390_adjust_loop_scan_osc(struct loop * loop)14412 s390_adjust_loop_scan_osc (struct loop* loop)
14413 
14414 {
14415   HARD_REG_SET modregs, newregs;
14416   rtx_insn *insn, *store_insn = NULL;
14417   rtx set;
14418   struct s390_address addr_store, addr_load;
14419   subrtx_iterator::array_type array;
14420   int insn_count;
14421 
14422   CLEAR_HARD_REG_SET (modregs);
14423 
14424   insn_count = 0;
14425   FOR_BB_INSNS_REVERSE (loop->latch, insn)
14426     {
14427       if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14428 	continue;
14429 
14430       insn_count++;
14431       if (insn_count > S390_OSC_SCAN_INSN_NUM)
14432 	return false;
14433 
14434       find_all_hard_reg_sets (insn, &newregs, true);
14435       IOR_HARD_REG_SET (modregs, newregs);
14436 
14437       set = single_set (insn);
14438       if (!set)
14439 	continue;
14440 
14441       if (MEM_P (SET_DEST (set))
14442 	  && s390_decompose_address (XEXP (SET_DEST (set), 0), &addr_store))
14443 	{
14444 	  store_insn = insn;
14445 	  break;
14446 	}
14447     }
14448 
14449   if (store_insn == NULL_RTX)
14450     return false;
14451 
14452   insn_count = 0;
14453   FOR_BB_INSNS (loop->header, insn)
14454     {
14455       if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14456 	continue;
14457 
14458       if (insn == store_insn)
14459 	return false;
14460 
14461       insn_count++;
14462       if (insn_count > S390_OSC_SCAN_INSN_NUM)
14463 	return false;
14464 
14465       find_all_hard_reg_sets (insn, &newregs, true);
14466       IOR_HARD_REG_SET (modregs, newregs);
14467 
14468       set = single_set (insn);
14469       if (!set)
14470 	continue;
14471 
14472       /* An intermediate store disrupts static OSC checking
14473 	 anyway.  */
14474       if (MEM_P (SET_DEST (set))
14475 	  && s390_decompose_address (XEXP (SET_DEST (set), 0), NULL))
14476 	return false;
14477 
14478       FOR_EACH_SUBRTX (iter, array, SET_SRC (set), NONCONST)
14479 	if (MEM_P (*iter)
14480 	    && s390_decompose_address (XEXP (*iter, 0), &addr_load)
14481 	    && rtx_equal_p (addr_load.base, addr_store.base)
14482 	    && rtx_equal_p (addr_load.indx, addr_store.indx)
14483 	    && rtx_equal_p (addr_load.disp, addr_store.disp))
14484 	  {
14485 	    if ((addr_load.base != NULL_RTX
14486 		 && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.base)))
14487 		|| (addr_load.indx != NULL_RTX
14488 		    && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.indx))))
14489 	      return true;
14490 	  }
14491     }
14492   return false;
14493 }
14494 
14495 /* Look for adjustments which can be done on simple innermost
14496    loops.  */
14497 static void
s390_adjust_loops()14498 s390_adjust_loops ()
14499 {
14500   struct loop *loop = NULL;
14501 
14502   df_analyze ();
14503   compute_bb_for_insn ();
14504 
14505   /* Find the loops.  */
14506   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
14507 
14508   FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
14509     {
14510       if (dump_file)
14511 	{
14512 	  flow_loop_dump (loop, dump_file, NULL, 0);
14513 	  fprintf (dump_file, ";;  OSC loop scan Loop: ");
14514 	}
14515       if (loop->latch == NULL
14516 	  || pc_set (BB_END (loop->latch)) == NULL_RTX
14517 	  || !s390_adjust_loop_scan_osc (loop))
14518 	{
14519 	  if (dump_file)
14520 	    {
14521 	      if (loop->latch == NULL)
14522 		fprintf (dump_file, " muliple backward jumps\n");
14523 	      else
14524 		{
14525 		  fprintf (dump_file, " header insn: %d latch insn: %d ",
14526 			   INSN_UID (BB_HEAD (loop->header)),
14527 			   INSN_UID (BB_END (loop->latch)));
14528 		  if (pc_set (BB_END (loop->latch)) == NULL_RTX)
14529 		    fprintf (dump_file, " loop does not end with jump\n");
14530 		  else
14531 		    fprintf (dump_file, " not instrumented\n");
14532 		}
14533 	    }
14534 	}
14535       else
14536 	{
14537 	  rtx_insn *new_insn;
14538 
14539 	  if (dump_file)
14540 	    fprintf (dump_file, " adding OSC break insn: ");
14541 	  new_insn = emit_insn_before (gen_osc_break (),
14542 				       BB_END (loop->latch));
14543 	  INSN_ADDRESSES_NEW (new_insn, -1);
14544 	}
14545     }
14546 
14547   loop_optimizer_finalize ();
14548 
14549   df_finish_pass (false);
14550 }
14551 
14552 /* Perform machine-dependent processing.  */
14553 
14554 static void
s390_reorg(void)14555 s390_reorg (void)
14556 {
14557   bool pool_overflow = false;
14558   int hw_before, hw_after;
14559 
14560   if (s390_tune == PROCESSOR_2964_Z13)
14561     s390_adjust_loops ();
14562 
14563   /* Make sure all splits have been performed; splits after
14564      machine_dependent_reorg might confuse insn length counts.  */
14565   split_all_insns_noflow ();
14566 
14567   /* Install the main literal pool and the associated base
14568      register load insns.
14569 
14570      In addition, there are two problematic situations we need
14571      to correct:
14572 
14573      - the literal pool might be > 4096 bytes in size, so that
14574        some of its elements cannot be directly accessed
14575 
14576      - a branch target might be > 64K away from the branch, so that
14577        it is not possible to use a PC-relative instruction.
14578 
14579      To fix those, we split the single literal pool into multiple
14580      pool chunks, reloading the pool base register at various
14581      points throughout the function to ensure it always points to
14582      the pool chunk the following code expects, and / or replace
14583      PC-relative branches by absolute branches.
14584 
14585      However, the two problems are interdependent: splitting the
14586      literal pool can move a branch further away from its target,
14587      causing the 64K limit to overflow, and on the other hand,
14588      replacing a PC-relative branch by an absolute branch means
14589      we need to put the branch target address into the literal
14590      pool, possibly causing it to overflow.
14591 
14592      So, we loop trying to fix up both problems until we manage
14593      to satisfy both conditions at the same time.  Note that the
14594      loop is guaranteed to terminate as every pass of the loop
14595      strictly decreases the total number of PC-relative branches
14596      in the function.  (This is not completely true as there
14597      might be branch-over-pool insns introduced by chunkify_start.
14598      Those never need to be split however.)  */
14599 
14600   for (;;)
14601     {
14602       struct constant_pool *pool = NULL;
14603 
14604       /* Collect the literal pool.  */
14605       if (!pool_overflow)
14606 	{
14607 	  pool = s390_mainpool_start ();
14608 	  if (!pool)
14609 	    pool_overflow = true;
14610 	}
14611 
14612       /* If literal pool overflowed, start to chunkify it.  */
14613       if (pool_overflow)
14614         pool = s390_chunkify_start ();
14615 
14616       /* Split out-of-range branches.  If this has created new
14617 	 literal pool entries, cancel current chunk list and
14618 	 recompute it.  zSeries machines have large branch
14619 	 instructions, so we never need to split a branch.  */
14620       if (!TARGET_CPU_ZARCH && s390_split_branches ())
14621         {
14622           if (pool_overflow)
14623             s390_chunkify_cancel (pool);
14624 	  else
14625             s390_mainpool_cancel (pool);
14626 
14627           continue;
14628         }
14629 
14630       /* If we made it up to here, both conditions are satisfied.
14631 	 Finish up literal pool related changes.  */
14632       if (pool_overflow)
14633 	s390_chunkify_finish (pool);
14634       else
14635 	s390_mainpool_finish (pool);
14636 
14637       /* We're done splitting branches.  */
14638       cfun->machine->split_branches_pending_p = false;
14639       break;
14640     }
14641 
14642   /* Generate out-of-pool execute target insns.  */
14643   if (TARGET_CPU_ZARCH)
14644     {
14645       rtx_insn *insn, *target;
14646       rtx label;
14647 
14648       for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14649 	{
14650 	  label = s390_execute_label (insn);
14651 	  if (!label)
14652 	    continue;
14653 
14654 	  gcc_assert (label != const0_rtx);
14655 
14656 	  target = emit_label (XEXP (label, 0));
14657 	  INSN_ADDRESSES_NEW (target, -1);
14658 
14659 	  if (JUMP_P (insn))
14660 	    {
14661 	      target = emit_jump_insn (s390_execute_target (insn));
14662 	      /* This is important in order to keep a table jump
14663 		 pointing at the jump table label.  Only this makes it
14664 		 being recognized as table jump.  */
14665 	      JUMP_LABEL (target) = JUMP_LABEL (insn);
14666 	    }
14667 	  else
14668 	    target = emit_insn (s390_execute_target (insn));
14669 	  INSN_ADDRESSES_NEW (target, -1);
14670 	}
14671     }
14672 
14673   /* Try to optimize prologue and epilogue further.  */
14674   s390_optimize_prologue ();
14675 
14676   /* Walk over the insns and do some >=z10 specific changes.  */
14677   if (s390_tune >= PROCESSOR_2097_Z10)
14678     {
14679       rtx_insn *insn;
14680       bool insn_added_p = false;
14681 
14682       /* The insn lengths and addresses have to be up to date for the
14683 	 following manipulations.  */
14684       shorten_branches (get_insns ());
14685 
14686       for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14687 	{
14688 	  if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14689 	    continue;
14690 
14691 	  if (JUMP_P (insn))
14692 	    insn_added_p |= s390_fix_long_loop_prediction (insn);
14693 
14694 	  if ((GET_CODE (PATTERN (insn)) == PARALLEL
14695 	       || GET_CODE (PATTERN (insn)) == SET)
14696 	      && s390_tune == PROCESSOR_2097_Z10)
14697 	    insn_added_p |= s390_z10_optimize_cmp (insn);
14698 	}
14699 
14700       /* Adjust branches if we added new instructions.  */
14701       if (insn_added_p)
14702 	shorten_branches (get_insns ());
14703     }
14704 
14705   s390_function_num_hotpatch_hw (current_function_decl, &hw_before, &hw_after);
14706   if (hw_after > 0)
14707     {
14708       rtx_insn *insn;
14709 
14710       /* Insert NOPs for hotpatching. */
14711       for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14712 	/* Emit NOPs
14713 	    1. inside the area covered by debug information to allow setting
14714 	       breakpoints at the NOPs,
14715 	    2. before any insn which results in an asm instruction,
14716 	    3. before in-function labels to avoid jumping to the NOPs, for
14717 	       example as part of a loop,
14718 	    4. before any barrier in case the function is completely empty
14719 	       (__builtin_unreachable ()) and has neither internal labels nor
14720 	       active insns.
14721 	*/
14722 	if (active_insn_p (insn) || BARRIER_P (insn) || LABEL_P (insn))
14723 	  break;
14724       /* Output a series of NOPs before the first active insn.  */
14725       while (insn && hw_after > 0)
14726 	{
14727 	  if (hw_after >= 3 && TARGET_CPU_ZARCH)
14728 	    {
14729 	      emit_insn_before (gen_nop_6_byte (), insn);
14730 	      hw_after -= 3;
14731 	    }
14732 	  else if (hw_after >= 2)
14733 	    {
14734 	      emit_insn_before (gen_nop_4_byte (), insn);
14735 	      hw_after -= 2;
14736 	    }
14737 	  else
14738 	    {
14739 	      emit_insn_before (gen_nop_2_byte (), insn);
14740 	      hw_after -= 1;
14741 	    }
14742 	}
14743     }
14744 }
14745 
14746 /* Return true if INSN is a fp load insn writing register REGNO.  */
14747 static inline bool
s390_fpload_toreg(rtx_insn * insn,unsigned int regno)14748 s390_fpload_toreg (rtx_insn *insn, unsigned int regno)
14749 {
14750   rtx set;
14751   enum attr_type flag = s390_safe_attr_type (insn);
14752 
14753   if (flag != TYPE_FLOADSF && flag != TYPE_FLOADDF)
14754     return false;
14755 
14756   set = single_set (insn);
14757 
14758   if (set == NULL_RTX)
14759     return false;
14760 
14761   if (!REG_P (SET_DEST (set)) || !MEM_P (SET_SRC (set)))
14762     return false;
14763 
14764   if (REGNO (SET_DEST (set)) != regno)
14765     return false;
14766 
14767   return true;
14768 }
14769 
14770 /* This value describes the distance to be avoided between an
14771    arithmetic fp instruction and an fp load writing the same register.
14772    Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
14773    fine but the exact value has to be avoided. Otherwise the FP
14774    pipeline will throw an exception causing a major penalty.  */
14775 #define Z10_EARLYLOAD_DISTANCE 7
14776 
14777 /* Rearrange the ready list in order to avoid the situation described
14778    for Z10_EARLYLOAD_DISTANCE.  A problematic load instruction is
14779    moved to the very end of the ready list.  */
14780 static void
s390_z10_prevent_earlyload_conflicts(rtx_insn ** ready,int * nready_p)14781 s390_z10_prevent_earlyload_conflicts (rtx_insn **ready, int *nready_p)
14782 {
14783   unsigned int regno;
14784   int nready = *nready_p;
14785   rtx_insn *tmp;
14786   int i;
14787   rtx_insn *insn;
14788   rtx set;
14789   enum attr_type flag;
14790   int distance;
14791 
14792   /* Skip DISTANCE - 1 active insns.  */
14793   for (insn = last_scheduled_insn, distance = Z10_EARLYLOAD_DISTANCE - 1;
14794        distance > 0 && insn != NULL_RTX;
14795        distance--, insn = prev_active_insn (insn))
14796     if (CALL_P (insn) || JUMP_P (insn))
14797       return;
14798 
14799   if (insn == NULL_RTX)
14800     return;
14801 
14802   set = single_set (insn);
14803 
14804   if (set == NULL_RTX || !REG_P (SET_DEST (set))
14805       || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) != MODE_FLOAT)
14806     return;
14807 
14808   flag = s390_safe_attr_type (insn);
14809 
14810   if (flag == TYPE_FLOADSF || flag == TYPE_FLOADDF)
14811     return;
14812 
14813   regno = REGNO (SET_DEST (set));
14814   i = nready - 1;
14815 
14816   while (!s390_fpload_toreg (ready[i], regno) && i > 0)
14817     i--;
14818 
14819   if (!i)
14820     return;
14821 
14822   tmp = ready[i];
14823   memmove (&ready[1], &ready[0], sizeof (rtx_insn *) * i);
14824   ready[0] = tmp;
14825 }
14826 
14827 /* Returns TRUE if BB is entered via a fallthru edge and all other
14828    incoming edges are less than unlikely.  */
14829 static bool
s390_bb_fallthru_entry_likely(basic_block bb)14830 s390_bb_fallthru_entry_likely (basic_block bb)
14831 {
14832   edge e, fallthru_edge;
14833   edge_iterator ei;
14834 
14835   if (!bb)
14836     return false;
14837 
14838   fallthru_edge = find_fallthru_edge (bb->preds);
14839   if (!fallthru_edge)
14840     return false;
14841 
14842   FOR_EACH_EDGE (e, ei, bb->preds)
14843     if (e != fallthru_edge
14844 	&& e->probability >= profile_probability::unlikely ())
14845       return false;
14846 
14847   return true;
14848 }
14849 
14850 /* The s390_sched_state variable tracks the state of the current or
14851    the last instruction group.
14852 
14853    0,1,2 number of instructions scheduled in the current group
14854    3     the last group is complete - normal insns
14855    4     the last group was a cracked/expanded insn */
14856 
14857 static int s390_sched_state = 0;
14858 
14859 #define S390_SCHED_STATE_NORMAL  3
14860 #define S390_SCHED_STATE_CRACKED 4
14861 
14862 #define S390_SCHED_ATTR_MASK_CRACKED    0x1
14863 #define S390_SCHED_ATTR_MASK_EXPANDED   0x2
14864 #define S390_SCHED_ATTR_MASK_ENDGROUP   0x4
14865 #define S390_SCHED_ATTR_MASK_GROUPALONE 0x8
14866 
14867 static unsigned int
s390_get_sched_attrmask(rtx_insn * insn)14868 s390_get_sched_attrmask (rtx_insn *insn)
14869 {
14870   unsigned int mask = 0;
14871 
14872   switch (s390_tune)
14873     {
14874     case PROCESSOR_2827_ZEC12:
14875       if (get_attr_zEC12_cracked (insn))
14876 	mask |= S390_SCHED_ATTR_MASK_CRACKED;
14877       if (get_attr_zEC12_expanded (insn))
14878 	mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14879       if (get_attr_zEC12_endgroup (insn))
14880 	mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14881       if (get_attr_zEC12_groupalone (insn))
14882 	mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14883       break;
14884     case PROCESSOR_2964_Z13:
14885     case PROCESSOR_3906_Z14:
14886       if (get_attr_z13_cracked (insn))
14887 	mask |= S390_SCHED_ATTR_MASK_CRACKED;
14888       if (get_attr_z13_expanded (insn))
14889 	mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14890       if (get_attr_z13_endgroup (insn))
14891 	mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14892       if (get_attr_z13_groupalone (insn))
14893 	mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14894       break;
14895     default:
14896       gcc_unreachable ();
14897     }
14898   return mask;
14899 }
14900 
14901 static unsigned int
s390_get_unit_mask(rtx_insn * insn,int * units)14902 s390_get_unit_mask (rtx_insn *insn, int *units)
14903 {
14904   unsigned int mask = 0;
14905 
14906   switch (s390_tune)
14907     {
14908     case PROCESSOR_2964_Z13:
14909     case PROCESSOR_3906_Z14:
14910       *units = 3;
14911       if (get_attr_z13_unit_lsu (insn))
14912 	mask |= 1 << 0;
14913       if (get_attr_z13_unit_fxu (insn))
14914 	mask |= 1 << 1;
14915       if (get_attr_z13_unit_vfu (insn))
14916 	mask |= 1 << 2;
14917       break;
14918     default:
14919       gcc_unreachable ();
14920     }
14921   return mask;
14922 }
14923 
14924 /* Return the scheduling score for INSN.  The higher the score the
14925    better.  The score is calculated from the OOO scheduling attributes
14926    of INSN and the scheduling state s390_sched_state.  */
14927 static int
s390_sched_score(rtx_insn * insn)14928 s390_sched_score (rtx_insn *insn)
14929 {
14930   unsigned int mask = s390_get_sched_attrmask (insn);
14931   int score = 0;
14932 
14933   switch (s390_sched_state)
14934     {
14935     case 0:
14936       /* Try to put insns into the first slot which would otherwise
14937 	 break a group.  */
14938       if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14939 	  || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
14940 	score += 5;
14941       if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
14942 	score += 10;
14943       /* fallthrough */
14944     case 1:
14945       /* Prefer not cracked insns while trying to put together a
14946 	 group.  */
14947       if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14948 	  && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
14949 	  && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
14950 	score += 10;
14951       if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) == 0)
14952 	score += 5;
14953       break;
14954     case 2:
14955       /* Prefer not cracked insns while trying to put together a
14956 	 group.  */
14957       if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14958 	  && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
14959 	  && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
14960 	score += 10;
14961       /* Prefer endgroup insns in the last slot.  */
14962       if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0)
14963 	score += 10;
14964       break;
14965     case S390_SCHED_STATE_NORMAL:
14966       /* Prefer not cracked insns if the last was not cracked.  */
14967       if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14968 	  && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0)
14969 	score += 5;
14970       if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
14971 	score += 10;
14972       break;
14973     case S390_SCHED_STATE_CRACKED:
14974       /* Try to keep cracked insns together to prevent them from
14975 	 interrupting groups.  */
14976       if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14977 	  || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
14978 	score += 5;
14979       break;
14980     }
14981 
14982   if (s390_tune >= PROCESSOR_2964_Z13)
14983     {
14984       int units, i;
14985       unsigned unit_mask, m = 1;
14986 
14987       unit_mask = s390_get_unit_mask (insn, &units);
14988       gcc_assert (units <= MAX_SCHED_UNITS);
14989 
14990       /* Add a score in range 0..MAX_SCHED_MIX_SCORE depending on how long
14991 	 ago the last insn of this unit type got scheduled.  This is
14992 	 supposed to help providing a proper instruction mix to the
14993 	 CPU.  */
14994       for (i = 0; i < units; i++, m <<= 1)
14995 	if (m & unit_mask)
14996 	  score += (last_scheduled_unit_distance[i] * MAX_SCHED_MIX_SCORE /
14997 		    MAX_SCHED_MIX_DISTANCE);
14998 
14999       unsigned latency = insn_default_latency (insn);
15000 
15001       int other_side = 1 - current_side;
15002 
15003       /* Try to delay long-running insns when side is busy.  */
15004       if (latency > LONGRUNNING_THRESHOLD)
15005 	{
15006 	  if (get_attr_z13_unit_fxu (insn) && fxu_longrunning[current_side]
15007 	      && fxu_longrunning[other_side] <= fxu_longrunning[current_side])
15008 	    score = MAX (0, score - 10);
15009 
15010 	  if (get_attr_z13_unit_vfu (insn) && vfu_longrunning[current_side]
15011 	      && vfu_longrunning[other_side] <= vfu_longrunning[current_side])
15012 	    score = MAX (0, score - 10);
15013 	}
15014     }
15015 
15016   return score;
15017 }
15018 
15019 /* This function is called via hook TARGET_SCHED_REORDER before
15020    issuing one insn from list READY which contains *NREADYP entries.
15021    For target z10 it reorders load instructions to avoid early load
15022    conflicts in the floating point pipeline  */
15023 static int
s390_sched_reorder(FILE * file,int verbose,rtx_insn ** ready,int * nreadyp,int clock ATTRIBUTE_UNUSED)15024 s390_sched_reorder (FILE *file, int verbose,
15025 		    rtx_insn **ready, int *nreadyp, int clock ATTRIBUTE_UNUSED)
15026 {
15027   if (s390_tune == PROCESSOR_2097_Z10
15028       && reload_completed
15029       && *nreadyp > 1)
15030     s390_z10_prevent_earlyload_conflicts (ready, nreadyp);
15031 
15032   if (s390_tune >= PROCESSOR_2827_ZEC12
15033       && reload_completed
15034       && *nreadyp > 1)
15035     {
15036       int i;
15037       int last_index = *nreadyp - 1;
15038       int max_index = -1;
15039       int max_score = -1;
15040       rtx_insn *tmp;
15041 
15042       /* Just move the insn with the highest score to the top (the
15043 	 end) of the list.  A full sort is not needed since a conflict
15044 	 in the hazard recognition cannot happen.  So the top insn in
15045 	 the ready list will always be taken.  */
15046       for (i = last_index; i >= 0; i--)
15047 	{
15048 	  int score;
15049 
15050 	  if (recog_memoized (ready[i]) < 0)
15051 	    continue;
15052 
15053 	  score = s390_sched_score (ready[i]);
15054 	  if (score > max_score)
15055 	    {
15056 	      max_score = score;
15057 	      max_index = i;
15058 	    }
15059 	}
15060 
15061       if (max_index != -1)
15062 	{
15063 	  if (max_index != last_index)
15064 	    {
15065 	      tmp = ready[max_index];
15066 	      ready[max_index] = ready[last_index];
15067 	      ready[last_index] = tmp;
15068 
15069 	      if (verbose > 5)
15070 		fprintf (file,
15071 			 ";;\t\tBACKEND: move insn %d to the top of list\n",
15072 			 INSN_UID (ready[last_index]));
15073 	    }
15074 	  else if (verbose > 5)
15075 	    fprintf (file,
15076 		     ";;\t\tBACKEND: best insn %d already on top\n",
15077 		     INSN_UID (ready[last_index]));
15078 	}
15079 
15080       if (verbose > 5)
15081 	{
15082 	  fprintf (file, "ready list ooo attributes - sched state: %d\n",
15083 		   s390_sched_state);
15084 
15085 	  for (i = last_index; i >= 0; i--)
15086 	    {
15087 	      unsigned int sched_mask;
15088 	      rtx_insn *insn = ready[i];
15089 
15090 	      if (recog_memoized (insn) < 0)
15091 		continue;
15092 
15093 	      sched_mask = s390_get_sched_attrmask (insn);
15094 	      fprintf (file, ";;\t\tBACKEND: insn %d score: %d: ",
15095 		       INSN_UID (insn),
15096 		       s390_sched_score (insn));
15097 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ",\
15098 					   ((M) & sched_mask) ? #ATTR : "");
15099 	      PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
15100 	      PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
15101 	      PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
15102 	      PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
15103 #undef PRINT_SCHED_ATTR
15104 	      if (s390_tune >= PROCESSOR_2964_Z13)
15105 		{
15106 		  unsigned int unit_mask, m = 1;
15107 		  int units, j;
15108 
15109 		  unit_mask  = s390_get_unit_mask (insn, &units);
15110 		  fprintf (file, "(units:");
15111 		  for (j = 0; j < units; j++, m <<= 1)
15112 		    if (m & unit_mask)
15113 		      fprintf (file, " u%d", j);
15114 		  fprintf (file, ")");
15115 		}
15116 	      fprintf (file, "\n");
15117 	    }
15118 	}
15119     }
15120 
15121   return s390_issue_rate ();
15122 }
15123 
15124 
15125 /* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
15126    the scheduler has issued INSN.  It stores the last issued insn into
15127    last_scheduled_insn in order to make it available for
15128    s390_sched_reorder.  */
15129 static int
s390_sched_variable_issue(FILE * file,int verbose,rtx_insn * insn,int more)15130 s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
15131 {
15132   last_scheduled_insn = insn;
15133 
15134   bool starts_group = false;
15135 
15136   if (s390_tune >= PROCESSOR_2827_ZEC12
15137       && reload_completed
15138       && recog_memoized (insn) >= 0)
15139     {
15140       unsigned int mask = s390_get_sched_attrmask (insn);
15141 
15142       if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
15143 	  || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0
15144 	  || (mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
15145 	starts_group = true;
15146 
15147       if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
15148 	  || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
15149 	s390_sched_state = S390_SCHED_STATE_CRACKED;
15150       else if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0
15151 	       || (mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
15152 	s390_sched_state = S390_SCHED_STATE_NORMAL;
15153       else
15154 	{
15155 	  /* Only normal insns are left (mask == 0).  */
15156 	  switch (s390_sched_state)
15157 	    {
15158 	    case 0:
15159 	      starts_group = true;
15160 	      /* fallthrough */
15161 	    case 1:
15162 	    case 2:
15163 	      s390_sched_state++;
15164 	      break;
15165 	    case S390_SCHED_STATE_NORMAL:
15166 	      starts_group = true;
15167 	      s390_sched_state = 1;
15168 	      break;
15169 	    case S390_SCHED_STATE_CRACKED:
15170 	      s390_sched_state = S390_SCHED_STATE_NORMAL;
15171 	      break;
15172 	    }
15173 	}
15174 
15175       if (s390_tune >= PROCESSOR_2964_Z13)
15176 	{
15177 	  int units, i;
15178 	  unsigned unit_mask, m = 1;
15179 
15180 	  unit_mask = s390_get_unit_mask (insn, &units);
15181 	  gcc_assert (units <= MAX_SCHED_UNITS);
15182 
15183 	  for (i = 0; i < units; i++, m <<= 1)
15184 	    if (m & unit_mask)
15185 	      last_scheduled_unit_distance[i] = 0;
15186 	    else if (last_scheduled_unit_distance[i] < MAX_SCHED_MIX_DISTANCE)
15187 	      last_scheduled_unit_distance[i]++;
15188 	}
15189 
15190       /* If this insn started a new group, the side flipped.  */
15191       if (starts_group)
15192 	current_side = current_side ? 0 : 1;
15193 
15194       for (int i = 0; i < 2; i++)
15195 	{
15196 	  if (fxu_longrunning[i] >= 1)
15197 	    fxu_longrunning[i] -= 1;
15198 	  if (vfu_longrunning[i] >= 1)
15199 	    vfu_longrunning[i] -= 1;
15200 	}
15201 
15202       unsigned latency = insn_default_latency (insn);
15203       if (latency > LONGRUNNING_THRESHOLD)
15204 	{
15205 	  if (get_attr_z13_unit_fxu (insn))
15206 	    fxu_longrunning[current_side] = latency * LATENCY_FACTOR;
15207 	  else
15208 	    vfu_longrunning[current_side] = latency * LATENCY_FACTOR;
15209 	}
15210 
15211       if (verbose > 5)
15212 	{
15213 	  unsigned int sched_mask;
15214 
15215 	  sched_mask = s390_get_sched_attrmask (insn);
15216 
15217 	  fprintf (file, ";;\t\tBACKEND: insn %d: ", INSN_UID (insn));
15218 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ", ((M) & sched_mask) ? #ATTR : "");
15219 	  PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
15220 	  PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
15221 	  PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
15222 	  PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
15223 #undef PRINT_SCHED_ATTR
15224 
15225 	  if (s390_tune >= PROCESSOR_2964_Z13)
15226 	    {
15227 	      unsigned int unit_mask, m = 1;
15228 	      int units, j;
15229 
15230 	      unit_mask  = s390_get_unit_mask (insn, &units);
15231 	      fprintf (file, "(units:");
15232 	      for (j = 0; j < units; j++, m <<= 1)
15233 		if (m & unit_mask)
15234 		  fprintf (file, " %d", j);
15235 	      fprintf (file, ")");
15236 	    }
15237 	  fprintf (file, " sched state: %d\n", s390_sched_state);
15238 
15239 	  if (s390_tune >= PROCESSOR_2964_Z13)
15240 	    {
15241 	      int units, j;
15242 
15243 	      s390_get_unit_mask (insn, &units);
15244 
15245 	      fprintf (file, ";;\t\tBACKEND: units unused for: ");
15246 	      for (j = 0; j < units; j++)
15247 		fprintf (file, "%d:%d ", j, last_scheduled_unit_distance[j]);
15248 	      fprintf (file, "\n");
15249 	    }
15250 	}
15251     }
15252 
15253   if (GET_CODE (PATTERN (insn)) != USE
15254       && GET_CODE (PATTERN (insn)) != CLOBBER)
15255     return more - 1;
15256   else
15257     return more;
15258 }
15259 
15260 static void
s390_sched_init(FILE * file ATTRIBUTE_UNUSED,int verbose ATTRIBUTE_UNUSED,int max_ready ATTRIBUTE_UNUSED)15261 s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
15262 		 int verbose ATTRIBUTE_UNUSED,
15263 		 int max_ready ATTRIBUTE_UNUSED)
15264 {
15265   last_scheduled_insn = NULL;
15266   memset (last_scheduled_unit_distance, 0, MAX_SCHED_UNITS * sizeof (int));
15267 
15268   /* If the next basic block is most likely entered via a fallthru edge
15269      we keep the last sched state.  Otherwise we start a new group.
15270      The scheduler traverses basic blocks in "instruction stream" ordering
15271      so if we see a fallthru edge here, s390_sched_state will be of its
15272      source block.
15273 
15274      current_sched_info->prev_head is the insn before the first insn of the
15275      block of insns to be scheduled.
15276      */
15277   rtx_insn *insn = current_sched_info->prev_head
15278     ? NEXT_INSN (current_sched_info->prev_head) : NULL;
15279   basic_block bb = insn ? BLOCK_FOR_INSN (insn) : NULL;
15280   if (s390_tune < PROCESSOR_2964_Z13 || !s390_bb_fallthru_entry_likely (bb))
15281     s390_sched_state = 0;
15282 }
15283 
15284 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
15285    a new number struct loop *loop should be unrolled if tuned for cpus with
15286    a built-in stride prefetcher.
15287    The loop is analyzed for memory accesses by calling check_dpu for
15288    each rtx of the loop. Depending on the loop_depth and the amount of
15289    memory accesses a new number <=nunroll is returned to improve the
15290    behavior of the hardware prefetch unit.  */
15291 static unsigned
s390_loop_unroll_adjust(unsigned nunroll,struct loop * loop)15292 s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
15293 {
15294   basic_block *bbs;
15295   rtx_insn *insn;
15296   unsigned i;
15297   unsigned mem_count = 0;
15298 
15299   if (s390_tune < PROCESSOR_2097_Z10)
15300     return nunroll;
15301 
15302   /* Count the number of memory references within the loop body.  */
15303   bbs = get_loop_body (loop);
15304   subrtx_iterator::array_type array;
15305   for (i = 0; i < loop->num_nodes; i++)
15306     FOR_BB_INSNS (bbs[i], insn)
15307       if (INSN_P (insn) && INSN_CODE (insn) != -1)
15308 	FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
15309 	  if (MEM_P (*iter))
15310 	    mem_count += 1;
15311   free (bbs);
15312 
15313   /* Prevent division by zero, and we do not need to adjust nunroll in this case.  */
15314   if (mem_count == 0)
15315     return nunroll;
15316 
15317   switch (loop_depth(loop))
15318     {
15319     case 1:
15320       return MIN (nunroll, 28 / mem_count);
15321     case 2:
15322       return MIN (nunroll, 22 / mem_count);
15323     default:
15324       return MIN (nunroll, 16 / mem_count);
15325     }
15326 }
15327 
15328 /* Restore the current options.  This is a hook function and also called
15329    internally.  */
15330 
15331 static void
s390_function_specific_restore(struct gcc_options * opts,struct cl_target_option * ptr ATTRIBUTE_UNUSED)15332 s390_function_specific_restore (struct gcc_options *opts,
15333 				struct cl_target_option *ptr ATTRIBUTE_UNUSED)
15334 {
15335   opts->x_s390_cost_pointer = (long)processor_table[opts->x_s390_tune].cost;
15336 }
15337 
15338 static void
s390_option_override_internal(bool main_args_p,struct gcc_options * opts,const struct gcc_options * opts_set)15339 s390_option_override_internal (bool main_args_p,
15340 			       struct gcc_options *opts,
15341 			       const struct gcc_options *opts_set)
15342 {
15343   const char *prefix;
15344   const char *suffix;
15345 
15346   /* Set up prefix/suffix so the error messages refer to either the command
15347      line argument, or the attribute(target).  */
15348   if (main_args_p)
15349     {
15350       prefix = "-m";
15351       suffix = "";
15352     }
15353   else
15354     {
15355       prefix = "option(\"";
15356       suffix = "\")";
15357     }
15358 
15359 
15360   /* Architecture mode defaults according to ABI.  */
15361   if (!(opts_set->x_target_flags & MASK_ZARCH))
15362     {
15363       if (TARGET_64BIT)
15364 	opts->x_target_flags |= MASK_ZARCH;
15365       else
15366 	opts->x_target_flags &= ~MASK_ZARCH;
15367     }
15368 
15369   /* Set the march default in case it hasn't been specified on cmdline.  */
15370   if (!opts_set->x_s390_arch)
15371     opts->x_s390_arch = PROCESSOR_2064_Z900;
15372   else if (opts->x_s390_arch == PROCESSOR_9672_G5
15373 	   || opts->x_s390_arch == PROCESSOR_9672_G6)
15374     warning (OPT_Wdeprecated, "%sarch=%s%s is deprecated and will be removed "
15375 	     "in future releases; use at least %sarch=z900%s",
15376 	     prefix, opts->x_s390_arch == PROCESSOR_9672_G5 ? "g5" : "g6",
15377 	     suffix, prefix, suffix);
15378 
15379   opts->x_s390_arch_flags = processor_flags_table[(int) opts->x_s390_arch];
15380 
15381   /* Determine processor to tune for.  */
15382   if (!opts_set->x_s390_tune)
15383     opts->x_s390_tune = opts->x_s390_arch;
15384   else if (opts->x_s390_tune == PROCESSOR_9672_G5
15385 	   || opts->x_s390_tune == PROCESSOR_9672_G6)
15386     warning (OPT_Wdeprecated, "%stune=%s%s is deprecated and will be removed "
15387 	     "in future releases; use at least %stune=z900%s",
15388 	     prefix, opts->x_s390_tune == PROCESSOR_9672_G5 ? "g5" : "g6",
15389 	     suffix, prefix, suffix);
15390 
15391   opts->x_s390_tune_flags = processor_flags_table[opts->x_s390_tune];
15392 
15393   /* Sanity checks.  */
15394   if (opts->x_s390_arch == PROCESSOR_NATIVE
15395       || opts->x_s390_tune == PROCESSOR_NATIVE)
15396     gcc_unreachable ();
15397   if (TARGET_ZARCH_P (opts->x_target_flags) && !TARGET_CPU_ZARCH_P (opts))
15398     error ("z/Architecture mode not supported on %s",
15399 	   processor_table[(int)opts->x_s390_arch].name);
15400   if (TARGET_64BIT && !TARGET_ZARCH_P (opts->x_target_flags))
15401     error ("64-bit ABI not supported in ESA/390 mode");
15402 
15403   if (opts->x_s390_indirect_branch == indirect_branch_thunk_inline
15404       || opts->x_s390_indirect_branch_call == indirect_branch_thunk_inline
15405       || opts->x_s390_function_return == indirect_branch_thunk_inline
15406       || opts->x_s390_function_return_reg == indirect_branch_thunk_inline
15407       || opts->x_s390_function_return_mem == indirect_branch_thunk_inline)
15408     error ("thunk-inline is only supported with -mindirect-branch-jump");
15409 
15410   if (opts->x_s390_indirect_branch != indirect_branch_keep)
15411     {
15412       if (!opts_set->x_s390_indirect_branch_call)
15413 	opts->x_s390_indirect_branch_call = opts->x_s390_indirect_branch;
15414 
15415       if (!opts_set->x_s390_indirect_branch_jump)
15416 	opts->x_s390_indirect_branch_jump = opts->x_s390_indirect_branch;
15417     }
15418 
15419   if (opts->x_s390_function_return != indirect_branch_keep)
15420     {
15421       if (!opts_set->x_s390_function_return_reg)
15422 	opts->x_s390_function_return_reg = opts->x_s390_function_return;
15423 
15424       if (!opts_set->x_s390_function_return_mem)
15425 	opts->x_s390_function_return_mem = opts->x_s390_function_return;
15426     }
15427 
15428   if (!TARGET_CPU_ZARCH)
15429     {
15430       if (opts->x_s390_indirect_branch_call != indirect_branch_keep
15431 	  || opts->x_s390_indirect_branch_jump != indirect_branch_keep)
15432 	error ("-mindirect-branch* options require -march=z900 or higher");
15433       if (opts->x_s390_function_return_reg != indirect_branch_keep
15434 	  || opts->x_s390_function_return_mem != indirect_branch_keep)
15435 	error ("-mfunction-return* options require -march=z900 or higher");
15436     }
15437 
15438 
15439   /* Enable hardware transactions if available and not explicitly
15440      disabled by user.  E.g. with -m31 -march=zEC12 -mzarch */
15441   if (!TARGET_OPT_HTM_P (opts_set->x_target_flags))
15442     {
15443       if (TARGET_CPU_HTM_P (opts) && TARGET_ZARCH_P (opts->x_target_flags))
15444 	opts->x_target_flags |= MASK_OPT_HTM;
15445       else
15446 	opts->x_target_flags &= ~MASK_OPT_HTM;
15447     }
15448 
15449   if (TARGET_OPT_VX_P (opts_set->x_target_flags))
15450     {
15451       if (TARGET_OPT_VX_P (opts->x_target_flags))
15452 	{
15453 	  if (!TARGET_CPU_VX_P (opts))
15454 	    error ("hardware vector support not available on %s",
15455 		   processor_table[(int)opts->x_s390_arch].name);
15456 	  if (TARGET_SOFT_FLOAT_P (opts->x_target_flags))
15457 	    error ("hardware vector support not available with -msoft-float");
15458 	}
15459     }
15460   else
15461     {
15462       if (TARGET_CPU_VX_P (opts))
15463 	/* Enable vector support if available and not explicitly disabled
15464 	   by user.  E.g. with -m31 -march=z13 -mzarch */
15465 	opts->x_target_flags |= MASK_OPT_VX;
15466       else
15467 	opts->x_target_flags &= ~MASK_OPT_VX;
15468     }
15469 
15470   /* Use hardware DFP if available and not explicitly disabled by
15471      user. E.g. with -m31 -march=z10 -mzarch   */
15472   if (!TARGET_HARD_DFP_P (opts_set->x_target_flags))
15473     {
15474       if (TARGET_DFP_P (opts))
15475 	opts->x_target_flags |= MASK_HARD_DFP;
15476       else
15477 	opts->x_target_flags &= ~MASK_HARD_DFP;
15478     }
15479 
15480   if (TARGET_HARD_DFP_P (opts->x_target_flags) && !TARGET_DFP_P (opts))
15481     {
15482       if (TARGET_HARD_DFP_P (opts_set->x_target_flags))
15483 	{
15484 	  if (!TARGET_CPU_DFP_P (opts))
15485 	    error ("hardware decimal floating point instructions"
15486 		   " not available on %s",
15487 		   processor_table[(int)opts->x_s390_arch].name);
15488 	  if (!TARGET_ZARCH_P (opts->x_target_flags))
15489 	    error ("hardware decimal floating point instructions"
15490 		   " not available in ESA/390 mode");
15491 	}
15492       else
15493 	opts->x_target_flags &= ~MASK_HARD_DFP;
15494     }
15495 
15496   if (TARGET_SOFT_FLOAT_P (opts_set->x_target_flags)
15497       && TARGET_SOFT_FLOAT_P (opts->x_target_flags))
15498     {
15499       if (TARGET_HARD_DFP_P (opts_set->x_target_flags)
15500 	  && TARGET_HARD_DFP_P (opts->x_target_flags))
15501 	error ("-mhard-dfp can%'t be used in conjunction with -msoft-float");
15502 
15503       opts->x_target_flags &= ~MASK_HARD_DFP;
15504     }
15505 
15506   if (TARGET_BACKCHAIN_P (opts->x_target_flags)
15507       && TARGET_PACKED_STACK_P (opts->x_target_flags)
15508       && TARGET_HARD_FLOAT_P (opts->x_target_flags))
15509     error ("-mbackchain -mpacked-stack -mhard-float are not supported "
15510 	   "in combination");
15511 
15512   if (opts->x_s390_stack_size)
15513     {
15514       if (opts->x_s390_stack_guard >= opts->x_s390_stack_size)
15515 	error ("stack size must be greater than the stack guard value");
15516       else if (opts->x_s390_stack_size > 1 << 16)
15517 	error ("stack size must not be greater than 64k");
15518     }
15519   else if (opts->x_s390_stack_guard)
15520     error ("-mstack-guard implies use of -mstack-size");
15521 
15522 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
15523   if (!TARGET_LONG_DOUBLE_128_P (opts_set->x_target_flags))
15524     opts->x_target_flags |= MASK_LONG_DOUBLE_128;
15525 #endif
15526 
15527   if (opts->x_s390_tune >= PROCESSOR_2097_Z10)
15528     {
15529       maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 100,
15530 			     opts->x_param_values,
15531 			     opts_set->x_param_values);
15532       maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 32,
15533 			     opts->x_param_values,
15534 			     opts_set->x_param_values);
15535       maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 2000,
15536 			     opts->x_param_values,
15537 			     opts_set->x_param_values);
15538       maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 64,
15539 			     opts->x_param_values,
15540 			     opts_set->x_param_values);
15541     }
15542 
15543   maybe_set_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 256,
15544 			 opts->x_param_values,
15545 			 opts_set->x_param_values);
15546   /* values for loop prefetching */
15547   maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, 256,
15548 			 opts->x_param_values,
15549 			 opts_set->x_param_values);
15550   maybe_set_param_value (PARAM_L1_CACHE_SIZE, 128,
15551 			 opts->x_param_values,
15552 			 opts_set->x_param_values);
15553   /* s390 has more than 2 levels and the size is much larger.  Since
15554      we are always running virtualized assume that we only get a small
15555      part of the caches above l1.  */
15556   maybe_set_param_value (PARAM_L2_CACHE_SIZE, 1500,
15557 			 opts->x_param_values,
15558 			 opts_set->x_param_values);
15559   maybe_set_param_value (PARAM_PREFETCH_MIN_INSN_TO_MEM_RATIO, 2,
15560 			 opts->x_param_values,
15561 			 opts_set->x_param_values);
15562   maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 6,
15563 			 opts->x_param_values,
15564 			 opts_set->x_param_values);
15565 
15566   /* Use the alternative scheduling-pressure algorithm by default.  */
15567   maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
15568                          opts->x_param_values,
15569                          opts_set->x_param_values);
15570 
15571   maybe_set_param_value (PARAM_MIN_VECT_LOOP_BOUND, 2,
15572 			 opts->x_param_values,
15573 			 opts_set->x_param_values);
15574 
15575   /* Call target specific restore function to do post-init work.  At the moment,
15576      this just sets opts->x_s390_cost_pointer.  */
15577   s390_function_specific_restore (opts, NULL);
15578 }
15579 
15580 static void
s390_option_override(void)15581 s390_option_override (void)
15582 {
15583   unsigned int i;
15584   cl_deferred_option *opt;
15585   vec<cl_deferred_option> *v =
15586     (vec<cl_deferred_option> *) s390_deferred_options;
15587 
15588   if (v)
15589     FOR_EACH_VEC_ELT (*v, i, opt)
15590       {
15591 	switch (opt->opt_index)
15592 	  {
15593 	  case OPT_mhotpatch_:
15594 	    {
15595 	      int val1;
15596 	      int val2;
15597 	      char *s = strtok (ASTRDUP (opt->arg), ",");
15598 	      char *t = strtok (NULL, "\0");
15599 
15600 	      if (t != NULL)
15601 		{
15602 		  val1 = integral_argument (s);
15603 		  val2 = integral_argument (t);
15604 		}
15605 	      else
15606 		{
15607 		  val1 = -1;
15608 		  val2 = -1;
15609 		}
15610 	      if (val1 == -1 || val2 == -1)
15611 		{
15612 		  /* argument is not a plain number */
15613 		  error ("arguments to %qs should be non-negative integers",
15614 			 "-mhotpatch=n,m");
15615 		  break;
15616 		}
15617 	      else if (val1 > s390_hotpatch_hw_max
15618 		       || val2 > s390_hotpatch_hw_max)
15619 		{
15620 		  error ("argument to %qs is too large (max. %d)",
15621 			 "-mhotpatch=n,m", s390_hotpatch_hw_max);
15622 		  break;
15623 		}
15624 	      s390_hotpatch_hw_before_label = val1;
15625 	      s390_hotpatch_hw_after_label = val2;
15626 	      break;
15627 	    }
15628 	  default:
15629 	    gcc_unreachable ();
15630 	  }
15631       }
15632 
15633   /* Set up function hooks.  */
15634   init_machine_status = s390_init_machine_status;
15635 
15636   s390_option_override_internal (true, &global_options, &global_options_set);
15637 
15638   /* Save the initial options in case the user does function specific
15639      options.  */
15640   target_option_default_node = build_target_option_node (&global_options);
15641   target_option_current_node = target_option_default_node;
15642 
15643   /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
15644      requires the arch flags to be evaluated already.  Since prefetching
15645      is beneficial on s390, we enable it if available.  */
15646   if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3)
15647     flag_prefetch_loop_arrays = 1;
15648 
15649   if (!s390_pic_data_is_text_relative && !flag_pic)
15650     error ("-mno-pic-data-is-text-relative cannot be used without -fpic/-fPIC");
15651 
15652   if (TARGET_TPF)
15653     {
15654       /* Don't emit DWARF3/4 unless specifically selected.  The TPF
15655 	 debuggers do not yet support DWARF 3/4.  */
15656       if (!global_options_set.x_dwarf_strict)
15657 	dwarf_strict = 1;
15658       if (!global_options_set.x_dwarf_version)
15659 	dwarf_version = 2;
15660     }
15661 
15662   /* Register a target-specific optimization-and-lowering pass
15663      to run immediately before prologue and epilogue generation.
15664 
15665      Registering the pass must be done at start up.  It's
15666      convenient to do it here.  */
15667   opt_pass *new_pass = new pass_s390_early_mach (g);
15668   struct register_pass_info insert_pass_s390_early_mach =
15669     {
15670       new_pass,			/* pass */
15671       "pro_and_epilogue",	/* reference_pass_name */
15672       1,			/* ref_pass_instance_number */
15673       PASS_POS_INSERT_BEFORE	/* po_op */
15674     };
15675   register_pass (&insert_pass_s390_early_mach);
15676 }
15677 
15678 #if S390_USE_TARGET_ATTRIBUTE
15679 /* Inner function to process the attribute((target(...))), take an argument and
15680    set the current options from the argument. If we have a list, recursively go
15681    over the list.  */
15682 
15683 static bool
s390_valid_target_attribute_inner_p(tree args,struct gcc_options * opts,struct gcc_options * new_opts_set,bool force_pragma)15684 s390_valid_target_attribute_inner_p (tree args,
15685 				     struct gcc_options *opts,
15686 				     struct gcc_options *new_opts_set,
15687 				     bool force_pragma)
15688 {
15689   char *next_optstr;
15690   bool ret = true;
15691 
15692 #define S390_ATTRIB(S,O,A)  { S, sizeof (S)-1, O, A, 0 }
15693 #define S390_PRAGMA(S,O,A)  { S, sizeof (S)-1, O, A, 1 }
15694   static const struct
15695   {
15696     const char *string;
15697     size_t len;
15698     int opt;
15699     int has_arg;
15700     int only_as_pragma;
15701   } attrs[] = {
15702     /* enum options */
15703     S390_ATTRIB ("arch=", OPT_march_, 1),
15704     S390_ATTRIB ("tune=", OPT_mtune_, 1),
15705     /* uinteger options */
15706     S390_ATTRIB ("stack-guard=", OPT_mstack_guard_, 1),
15707     S390_ATTRIB ("stack-size=", OPT_mstack_size_, 1),
15708     S390_ATTRIB ("branch-cost=", OPT_mbranch_cost_, 1),
15709     S390_ATTRIB ("warn-framesize=", OPT_mwarn_framesize_, 1),
15710     /* flag options */
15711     S390_ATTRIB ("backchain", OPT_mbackchain, 0),
15712     S390_ATTRIB ("hard-dfp", OPT_mhard_dfp, 0),
15713     S390_ATTRIB ("hard-float", OPT_mhard_float, 0),
15714     S390_ATTRIB ("htm", OPT_mhtm, 0),
15715     S390_ATTRIB ("vx", OPT_mvx, 0),
15716     S390_ATTRIB ("packed-stack", OPT_mpacked_stack, 0),
15717     S390_ATTRIB ("small-exec", OPT_msmall_exec, 0),
15718     S390_ATTRIB ("soft-float", OPT_msoft_float, 0),
15719     S390_ATTRIB ("mvcle", OPT_mmvcle, 0),
15720     S390_PRAGMA ("zvector", OPT_mzvector, 0),
15721     /* boolean options */
15722     S390_ATTRIB ("warn-dynamicstack", OPT_mwarn_dynamicstack, 0),
15723   };
15724 #undef S390_ATTRIB
15725 #undef S390_PRAGMA
15726 
15727   /* If this is a list, recurse to get the options.  */
15728   if (TREE_CODE (args) == TREE_LIST)
15729     {
15730       bool ret = true;
15731       int num_pragma_values;
15732       int i;
15733 
15734       /* Note: attribs.c:decl_attributes prepends the values from
15735 	 current_target_pragma to the list of target attributes.  To determine
15736 	 whether we're looking at a value of the attribute or the pragma we
15737 	 assume that the first [list_length (current_target_pragma)] values in
15738 	 the list are the values from the pragma.  */
15739       num_pragma_values = (!force_pragma && current_target_pragma != NULL)
15740 	? list_length (current_target_pragma) : 0;
15741       for (i = 0; args; args = TREE_CHAIN (args), i++)
15742 	{
15743 	  bool is_pragma;
15744 
15745 	  is_pragma = (force_pragma || i < num_pragma_values);
15746 	  if (TREE_VALUE (args)
15747 	      && !s390_valid_target_attribute_inner_p (TREE_VALUE (args),
15748 						       opts, new_opts_set,
15749 						       is_pragma))
15750 	    {
15751 	      ret = false;
15752 	    }
15753 	}
15754       return ret;
15755     }
15756 
15757   else if (TREE_CODE (args) != STRING_CST)
15758     {
15759       error ("attribute %<target%> argument not a string");
15760       return false;
15761     }
15762 
15763   /* Handle multiple arguments separated by commas.  */
15764   next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
15765 
15766   while (next_optstr && *next_optstr != '\0')
15767     {
15768       char *p = next_optstr;
15769       char *orig_p = p;
15770       char *comma = strchr (next_optstr, ',');
15771       size_t len, opt_len;
15772       int opt;
15773       bool opt_set_p;
15774       char ch;
15775       unsigned i;
15776       int mask = 0;
15777       enum cl_var_type var_type;
15778       bool found;
15779 
15780       if (comma)
15781 	{
15782 	  *comma = '\0';
15783 	  len = comma - next_optstr;
15784 	  next_optstr = comma + 1;
15785 	}
15786       else
15787 	{
15788 	  len = strlen (p);
15789 	  next_optstr = NULL;
15790 	}
15791 
15792       /* Recognize no-xxx.  */
15793       if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
15794 	{
15795 	  opt_set_p = false;
15796 	  p += 3;
15797 	  len -= 3;
15798 	}
15799       else
15800 	opt_set_p = true;
15801 
15802       /* Find the option.  */
15803       ch = *p;
15804       found = false;
15805       for (i = 0; i < ARRAY_SIZE (attrs); i++)
15806 	{
15807 	  opt_len = attrs[i].len;
15808 	  if (ch == attrs[i].string[0]
15809 	      && ((attrs[i].has_arg) ? len > opt_len : len == opt_len)
15810 	      && memcmp (p, attrs[i].string, opt_len) == 0)
15811 	    {
15812 	      opt = attrs[i].opt;
15813 	      if (!opt_set_p && cl_options[opt].cl_reject_negative)
15814 		continue;
15815 	      mask = cl_options[opt].var_value;
15816 	      var_type = cl_options[opt].var_type;
15817 	      found = true;
15818 	      break;
15819 	    }
15820 	}
15821 
15822       /* Process the option.  */
15823       if (!found)
15824 	{
15825 	  error ("attribute(target(\"%s\")) is unknown", orig_p);
15826 	  return false;
15827 	}
15828       else if (attrs[i].only_as_pragma && !force_pragma)
15829 	{
15830 	  /* Value is not allowed for the target attribute.  */
15831 	  error ("value %qs is not supported by attribute %<target%>",
15832 		 attrs[i].string);
15833 	  return false;
15834 	}
15835 
15836       else if (var_type == CLVC_BIT_SET || var_type == CLVC_BIT_CLEAR)
15837 	{
15838 	  if (var_type == CLVC_BIT_CLEAR)
15839 	    opt_set_p = !opt_set_p;
15840 
15841 	  if (opt_set_p)
15842 	    opts->x_target_flags |= mask;
15843 	  else
15844 	    opts->x_target_flags &= ~mask;
15845 	  new_opts_set->x_target_flags |= mask;
15846 	}
15847 
15848       else if (cl_options[opt].var_type == CLVC_BOOLEAN)
15849 	{
15850 	  int value;
15851 
15852 	  if (cl_options[opt].cl_uinteger)
15853 	    {
15854 	      /* Unsigned integer argument.  Code based on the function
15855 		 decode_cmdline_option () in opts-common.c.  */
15856 	      value = integral_argument (p + opt_len);
15857 	    }
15858 	  else
15859 	    value = (opt_set_p) ? 1 : 0;
15860 
15861 	  if (value != -1)
15862 	    {
15863 	      struct cl_decoded_option decoded;
15864 
15865 	      /* Value range check; only implemented for numeric and boolean
15866 		 options at the moment.  */
15867 	      generate_option (opt, NULL, value, CL_TARGET, &decoded);
15868 	      s390_handle_option (opts, new_opts_set, &decoded, input_location);
15869 	      set_option (opts, new_opts_set, opt, value,
15870 			  p + opt_len, DK_UNSPECIFIED, input_location,
15871 			  global_dc);
15872 	    }
15873 	  else
15874 	    {
15875 	      error ("attribute(target(\"%s\")) is unknown", orig_p);
15876 	      ret = false;
15877 	    }
15878 	}
15879 
15880       else if (cl_options[opt].var_type == CLVC_ENUM)
15881 	{
15882 	  bool arg_ok;
15883 	  int value;
15884 
15885 	  arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
15886 	  if (arg_ok)
15887 	    set_option (opts, new_opts_set, opt, value,
15888 			p + opt_len, DK_UNSPECIFIED, input_location,
15889 			global_dc);
15890 	  else
15891 	    {
15892 	      error ("attribute(target(\"%s\")) is unknown", orig_p);
15893 	      ret = false;
15894 	    }
15895 	}
15896 
15897       else
15898 	gcc_unreachable ();
15899     }
15900   return ret;
15901 }
15902 
15903 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
15904 
15905 tree
s390_valid_target_attribute_tree(tree args,struct gcc_options * opts,const struct gcc_options * opts_set,bool force_pragma)15906 s390_valid_target_attribute_tree (tree args,
15907 				  struct gcc_options *opts,
15908 				  const struct gcc_options *opts_set,
15909 				  bool force_pragma)
15910 {
15911   tree t = NULL_TREE;
15912   struct gcc_options new_opts_set;
15913 
15914   memset (&new_opts_set, 0, sizeof (new_opts_set));
15915 
15916   /* Process each of the options on the chain.  */
15917   if (! s390_valid_target_attribute_inner_p (args, opts, &new_opts_set,
15918 					     force_pragma))
15919     return error_mark_node;
15920 
15921   /* If some option was set (even if it has not changed), rerun
15922      s390_option_override_internal, and then save the options away.  */
15923   if (new_opts_set.x_target_flags
15924       || new_opts_set.x_s390_arch
15925       || new_opts_set.x_s390_tune
15926       || new_opts_set.x_s390_stack_guard
15927       || new_opts_set.x_s390_stack_size
15928       || new_opts_set.x_s390_branch_cost
15929       || new_opts_set.x_s390_warn_framesize
15930       || new_opts_set.x_s390_warn_dynamicstack_p)
15931     {
15932       const unsigned char *src = (const unsigned char *)opts_set;
15933       unsigned char *dest = (unsigned char *)&new_opts_set;
15934       unsigned int i;
15935 
15936       /* Merge the original option flags into the new ones.  */
15937       for (i = 0; i < sizeof(*opts_set); i++)
15938 	dest[i] |= src[i];
15939 
15940       /* Do any overrides, such as arch=xxx, or tune=xxx support.  */
15941       s390_option_override_internal (false, opts, &new_opts_set);
15942       /* Save the current options unless we are validating options for
15943 	 #pragma.  */
15944       t = build_target_option_node (opts);
15945     }
15946   return t;
15947 }
15948 
15949 /* Hook to validate attribute((target("string"))).  */
15950 
15951 static bool
s390_valid_target_attribute_p(tree fndecl,tree ARG_UNUSED (name),tree args,int ARG_UNUSED (flags))15952 s390_valid_target_attribute_p (tree fndecl,
15953 			       tree ARG_UNUSED (name),
15954 			       tree args,
15955 			       int ARG_UNUSED (flags))
15956 {
15957   struct gcc_options func_options;
15958   tree new_target, new_optimize;
15959   bool ret = true;
15960 
15961   /* attribute((target("default"))) does nothing, beyond
15962      affecting multi-versioning.  */
15963   if (TREE_VALUE (args)
15964       && TREE_CODE (TREE_VALUE (args)) == STRING_CST
15965       && TREE_CHAIN (args) == NULL_TREE
15966       && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
15967     return true;
15968 
15969   tree old_optimize = build_optimization_node (&global_options);
15970 
15971   /* Get the optimization options of the current function.  */
15972   tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
15973 
15974   if (!func_optimize)
15975     func_optimize = old_optimize;
15976 
15977   /* Init func_options.  */
15978   memset (&func_options, 0, sizeof (func_options));
15979   init_options_struct (&func_options, NULL);
15980   lang_hooks.init_options_struct (&func_options);
15981 
15982   cl_optimization_restore (&func_options, TREE_OPTIMIZATION (func_optimize));
15983 
15984   /* Initialize func_options to the default before its target options can
15985      be set.  */
15986   cl_target_option_restore (&func_options,
15987 			    TREE_TARGET_OPTION (target_option_default_node));
15988 
15989   new_target = s390_valid_target_attribute_tree (args, &func_options,
15990 						 &global_options_set,
15991 						 (args ==
15992 						  current_target_pragma));
15993   new_optimize = build_optimization_node (&func_options);
15994   if (new_target == error_mark_node)
15995     ret = false;
15996   else if (fndecl && new_target)
15997     {
15998       DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
15999       if (old_optimize != new_optimize)
16000 	DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
16001     }
16002   return ret;
16003 }
16004 
16005 /* Hook to determine if one function can safely inline another.  */
16006 
16007 static bool
s390_can_inline_p(tree caller,tree callee)16008 s390_can_inline_p (tree caller, tree callee)
16009 {
16010   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
16011   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
16012 
16013   if (!callee_tree)
16014     callee_tree = target_option_default_node;
16015   if (!caller_tree)
16016     caller_tree = target_option_default_node;
16017   if (callee_tree == caller_tree)
16018     return true;
16019 
16020   struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
16021   struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
16022   bool ret = true;
16023 
16024   if ((caller_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP))
16025       != (callee_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP)))
16026     ret = false;
16027 
16028   /* Don't inline functions to be compiled for a more recent arch into a
16029      function for an older arch.  */
16030   else if (caller_opts->x_s390_arch < callee_opts->x_s390_arch)
16031     ret = false;
16032 
16033   /* Inlining a hard float function into a soft float function is only
16034      allowed if the hard float function doesn't actually make use of
16035      floating point.
16036 
16037      We are called from FEs for multi-versioning call optimization, so
16038      beware of ipa_fn_summaries not available.  */
16039   else if (((TARGET_SOFT_FLOAT_P (caller_opts->x_target_flags)
16040 	     && !TARGET_SOFT_FLOAT_P (callee_opts->x_target_flags))
16041 	    || (!TARGET_HARD_DFP_P (caller_opts->x_target_flags)
16042 		&& TARGET_HARD_DFP_P (callee_opts->x_target_flags)))
16043 	   && (! ipa_fn_summaries
16044 	       || ipa_fn_summaries->get
16045 	       (cgraph_node::get (callee))->fp_expressions))
16046     ret = false;
16047 
16048   return ret;
16049 }
16050 #endif
16051 
16052 /* Set VAL to correct enum value according to the indirect-branch or
16053    function-return attribute in ATTR.  */
16054 
16055 static inline void
s390_indirect_branch_attrvalue(tree attr,enum indirect_branch * val)16056 s390_indirect_branch_attrvalue (tree attr, enum indirect_branch *val)
16057 {
16058   const char *str = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
16059   if (strcmp (str, "keep") == 0)
16060     *val = indirect_branch_keep;
16061   else if (strcmp (str, "thunk") == 0)
16062     *val = indirect_branch_thunk;
16063   else if (strcmp (str, "thunk-inline") == 0)
16064     *val = indirect_branch_thunk_inline;
16065   else if (strcmp (str, "thunk-extern") == 0)
16066     *val = indirect_branch_thunk_extern;
16067 }
16068 
16069 /* Memorize the setting for -mindirect-branch* and -mfunction-return*
16070    from either the cmdline or the function attributes in
16071    cfun->machine.  */
16072 
16073 static void
s390_indirect_branch_settings(tree fndecl)16074 s390_indirect_branch_settings (tree fndecl)
16075 {
16076   tree attr;
16077 
16078   if (!fndecl)
16079     return;
16080 
16081   /* Initialize with the cmdline options and let the attributes
16082      override it.  */
16083   cfun->machine->indirect_branch_jump = s390_indirect_branch_jump;
16084   cfun->machine->indirect_branch_call = s390_indirect_branch_call;
16085 
16086   cfun->machine->function_return_reg = s390_function_return_reg;
16087   cfun->machine->function_return_mem = s390_function_return_mem;
16088 
16089   if ((attr = lookup_attribute ("indirect_branch",
16090 				DECL_ATTRIBUTES (fndecl))))
16091     {
16092       s390_indirect_branch_attrvalue (attr,
16093 				      &cfun->machine->indirect_branch_jump);
16094       s390_indirect_branch_attrvalue (attr,
16095 				      &cfun->machine->indirect_branch_call);
16096     }
16097 
16098   if ((attr = lookup_attribute ("indirect_branch_jump",
16099 				DECL_ATTRIBUTES (fndecl))))
16100     s390_indirect_branch_attrvalue (attr, &cfun->machine->indirect_branch_jump);
16101 
16102   if ((attr = lookup_attribute ("indirect_branch_call",
16103 				DECL_ATTRIBUTES (fndecl))))
16104     s390_indirect_branch_attrvalue (attr, &cfun->machine->indirect_branch_call);
16105 
16106   if ((attr = lookup_attribute ("function_return",
16107 				DECL_ATTRIBUTES (fndecl))))
16108     {
16109       s390_indirect_branch_attrvalue (attr,
16110 				      &cfun->machine->function_return_reg);
16111       s390_indirect_branch_attrvalue (attr,
16112 				      &cfun->machine->function_return_mem);
16113     }
16114 
16115   if ((attr = lookup_attribute ("function_return_reg",
16116 				DECL_ATTRIBUTES (fndecl))))
16117     s390_indirect_branch_attrvalue (attr, &cfun->machine->function_return_reg);
16118 
16119   if ((attr = lookup_attribute ("function_return_mem",
16120 				DECL_ATTRIBUTES (fndecl))))
16121     s390_indirect_branch_attrvalue (attr, &cfun->machine->function_return_mem);
16122 }
16123 
16124 #if S390_USE_TARGET_ATTRIBUTE
16125 /* Restore targets globals from NEW_TREE and invalidate s390_previous_fndecl
16126    cache.  */
16127 
16128 void
s390_activate_target_options(tree new_tree)16129 s390_activate_target_options (tree new_tree)
16130 {
16131   cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
16132   if (TREE_TARGET_GLOBALS (new_tree))
16133     restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
16134   else if (new_tree == target_option_default_node)
16135     restore_target_globals (&default_target_globals);
16136   else
16137     TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
16138   s390_previous_fndecl = NULL_TREE;
16139 }
16140 #endif
16141 
16142 /* Establish appropriate back-end context for processing the function
16143    FNDECL.  The argument might be NULL to indicate processing at top
16144    level, outside of any function scope.  */
16145 static void
s390_set_current_function(tree fndecl)16146 s390_set_current_function (tree fndecl)
16147 {
16148 #if S390_USE_TARGET_ATTRIBUTE
16149   /* Only change the context if the function changes.  This hook is called
16150      several times in the course of compiling a function, and we don't want to
16151      slow things down too much or call target_reinit when it isn't safe.  */
16152   if (fndecl == s390_previous_fndecl)
16153     {
16154       s390_indirect_branch_settings (fndecl);
16155       return;
16156     }
16157 
16158   tree old_tree;
16159   if (s390_previous_fndecl == NULL_TREE)
16160     old_tree = target_option_current_node;
16161   else if (DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl))
16162     old_tree = DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl);
16163   else
16164     old_tree = target_option_default_node;
16165 
16166   if (fndecl == NULL_TREE)
16167     {
16168       if (old_tree != target_option_current_node)
16169 	s390_activate_target_options (target_option_current_node);
16170       return;
16171     }
16172 
16173   tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
16174   if (new_tree == NULL_TREE)
16175     new_tree = target_option_default_node;
16176 
16177   if (old_tree != new_tree)
16178     s390_activate_target_options (new_tree);
16179   s390_previous_fndecl = fndecl;
16180 #endif
16181   s390_indirect_branch_settings (fndecl);
16182 }
16183 
16184 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P.  */
16185 
16186 static bool
s390_use_by_pieces_infrastructure_p(unsigned HOST_WIDE_INT size,unsigned int align ATTRIBUTE_UNUSED,enum by_pieces_operation op ATTRIBUTE_UNUSED,bool speed_p ATTRIBUTE_UNUSED)16187 s390_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
16188 				     unsigned int align ATTRIBUTE_UNUSED,
16189 				     enum by_pieces_operation op ATTRIBUTE_UNUSED,
16190 				     bool speed_p ATTRIBUTE_UNUSED)
16191 {
16192   return (size == 1 || size == 2
16193 	  || size == 4 || (TARGET_ZARCH && size == 8));
16194 }
16195 
16196 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook.  */
16197 
16198 static void
s390_atomic_assign_expand_fenv(tree * hold,tree * clear,tree * update)16199 s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
16200 {
16201   tree sfpc = s390_builtin_decls[S390_BUILTIN_s390_sfpc];
16202   tree efpc = s390_builtin_decls[S390_BUILTIN_s390_efpc];
16203   tree call_efpc = build_call_expr (efpc, 0);
16204   tree fenv_var = create_tmp_var_raw (unsigned_type_node);
16205 
16206 #define FPC_EXCEPTION_MASK	 HOST_WIDE_INT_UC (0xf8000000)
16207 #define FPC_FLAGS_MASK		 HOST_WIDE_INT_UC (0x00f80000)
16208 #define FPC_DXC_MASK		 HOST_WIDE_INT_UC (0x0000ff00)
16209 #define FPC_EXCEPTION_MASK_SHIFT HOST_WIDE_INT_UC (24)
16210 #define FPC_FLAGS_SHIFT		 HOST_WIDE_INT_UC (16)
16211 #define FPC_DXC_SHIFT		 HOST_WIDE_INT_UC (8)
16212 
16213   /* Generates the equivalent of feholdexcept (&fenv_var)
16214 
16215      fenv_var = __builtin_s390_efpc ();
16216      __builtin_s390_sfpc (fenv_var & mask) */
16217   tree old_fpc = build2 (MODIFY_EXPR, unsigned_type_node, fenv_var, call_efpc);
16218   tree new_fpc =
16219     build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
16220 	    build_int_cst (unsigned_type_node,
16221 			   ~(FPC_DXC_MASK | FPC_FLAGS_MASK |
16222 			     FPC_EXCEPTION_MASK)));
16223   tree set_new_fpc = build_call_expr (sfpc, 1, new_fpc);
16224   *hold = build2 (COMPOUND_EXPR, void_type_node, old_fpc, set_new_fpc);
16225 
16226   /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT)
16227 
16228      __builtin_s390_sfpc (__builtin_s390_efpc () & mask) */
16229   new_fpc = build2 (BIT_AND_EXPR, unsigned_type_node, call_efpc,
16230 		    build_int_cst (unsigned_type_node,
16231 				   ~(FPC_DXC_MASK | FPC_FLAGS_MASK)));
16232   *clear = build_call_expr (sfpc, 1, new_fpc);
16233 
16234   /* Generates the equivalent of feupdateenv (fenv_var)
16235 
16236   old_fpc = __builtin_s390_efpc ();
16237   __builtin_s390_sfpc (fenv_var);
16238   __atomic_feraiseexcept ((old_fpc & FPC_FLAGS_MASK) >> FPC_FLAGS_SHIFT);  */
16239 
16240   old_fpc = create_tmp_var_raw (unsigned_type_node);
16241   tree store_old_fpc = build2 (MODIFY_EXPR, void_type_node,
16242 			       old_fpc, call_efpc);
16243 
16244   set_new_fpc = build_call_expr (sfpc, 1, fenv_var);
16245 
16246   tree raise_old_except = build2 (BIT_AND_EXPR, unsigned_type_node, old_fpc,
16247 				  build_int_cst (unsigned_type_node,
16248 						 FPC_FLAGS_MASK));
16249   raise_old_except = build2 (RSHIFT_EXPR, unsigned_type_node, raise_old_except,
16250 			     build_int_cst (unsigned_type_node,
16251 					    FPC_FLAGS_SHIFT));
16252   tree atomic_feraiseexcept
16253     = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
16254   raise_old_except = build_call_expr (atomic_feraiseexcept,
16255 				      1, raise_old_except);
16256 
16257   *update = build2 (COMPOUND_EXPR, void_type_node,
16258 		    build2 (COMPOUND_EXPR, void_type_node,
16259 			    store_old_fpc, set_new_fpc),
16260 		    raise_old_except);
16261 
16262 #undef FPC_EXCEPTION_MASK
16263 #undef FPC_FLAGS_MASK
16264 #undef FPC_DXC_MASK
16265 #undef FPC_EXCEPTION_MASK_SHIFT
16266 #undef FPC_FLAGS_SHIFT
16267 #undef FPC_DXC_SHIFT
16268 }
16269 
16270 /* Return the vector mode to be used for inner mode MODE when doing
16271    vectorization.  */
16272 static machine_mode
s390_preferred_simd_mode(scalar_mode mode)16273 s390_preferred_simd_mode (scalar_mode mode)
16274 {
16275   if (TARGET_VXE)
16276     switch (mode)
16277       {
16278       case E_SFmode:
16279 	return V4SFmode;
16280       default:;
16281       }
16282 
16283   if (TARGET_VX)
16284     switch (mode)
16285       {
16286       case E_DFmode:
16287 	return V2DFmode;
16288       case E_DImode:
16289 	return V2DImode;
16290       case E_SImode:
16291 	return V4SImode;
16292       case E_HImode:
16293 	return V8HImode;
16294       case E_QImode:
16295 	return V16QImode;
16296       default:;
16297       }
16298   return word_mode;
16299 }
16300 
16301 /* Our hardware does not require vectors to be strictly aligned.  */
16302 static bool
s390_support_vector_misalignment(machine_mode mode ATTRIBUTE_UNUSED,const_tree type ATTRIBUTE_UNUSED,int misalignment ATTRIBUTE_UNUSED,bool is_packed ATTRIBUTE_UNUSED)16303 s390_support_vector_misalignment (machine_mode mode ATTRIBUTE_UNUSED,
16304 				  const_tree type ATTRIBUTE_UNUSED,
16305 				  int misalignment ATTRIBUTE_UNUSED,
16306 				  bool is_packed ATTRIBUTE_UNUSED)
16307 {
16308   if (TARGET_VX)
16309     return true;
16310 
16311   return default_builtin_support_vector_misalignment (mode, type, misalignment,
16312 						      is_packed);
16313 }
16314 
16315 /* The vector ABI requires vector types to be aligned on an 8 byte
16316    boundary (our stack alignment).  However, we allow this to be
16317    overriden by the user, while this definitely breaks the ABI.  */
16318 static HOST_WIDE_INT
s390_vector_alignment(const_tree type)16319 s390_vector_alignment (const_tree type)
16320 {
16321   if (!TARGET_VX_ABI)
16322     return default_vector_alignment (type);
16323 
16324   if (TYPE_USER_ALIGN (type))
16325     return TYPE_ALIGN (type);
16326 
16327   return MIN (64, tree_to_shwi (TYPE_SIZE (type)));
16328 }
16329 
16330 /* Implement TARGET_CONSTANT_ALIGNMENT.  Alignment on even addresses for
16331    LARL instruction.  */
16332 
16333 static HOST_WIDE_INT
s390_constant_alignment(const_tree,HOST_WIDE_INT align)16334 s390_constant_alignment (const_tree, HOST_WIDE_INT align)
16335 {
16336   return MAX (align, 16);
16337 }
16338 
16339 #ifdef HAVE_AS_MACHINE_MACHINEMODE
16340 /* Implement TARGET_ASM_FILE_START.  */
16341 static void
s390_asm_file_start(void)16342 s390_asm_file_start (void)
16343 {
16344   default_file_start ();
16345   s390_asm_output_machine_for_arch (asm_out_file);
16346 }
16347 #endif
16348 
16349 /* Implement TARGET_ASM_FILE_END.  */
16350 static void
s390_asm_file_end(void)16351 s390_asm_file_end (void)
16352 {
16353 #ifdef HAVE_AS_GNU_ATTRIBUTE
16354   varpool_node *vnode;
16355   cgraph_node *cnode;
16356 
16357   FOR_EACH_VARIABLE (vnode)
16358     if (TREE_PUBLIC (vnode->decl))
16359       s390_check_type_for_vector_abi (TREE_TYPE (vnode->decl), false, false);
16360 
16361   FOR_EACH_FUNCTION (cnode)
16362     if (TREE_PUBLIC (cnode->decl))
16363       s390_check_type_for_vector_abi (TREE_TYPE (cnode->decl), false, false);
16364 
16365 
16366   if (s390_vector_abi != 0)
16367     fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
16368 	     s390_vector_abi);
16369 #endif
16370   file_end_indicate_exec_stack ();
16371 
16372   if (flag_split_stack)
16373     file_end_indicate_split_stack ();
16374 }
16375 
16376 /* Return true if TYPE is a vector bool type.  */
16377 static inline bool
s390_vector_bool_type_p(const_tree type)16378 s390_vector_bool_type_p (const_tree type)
16379 {
16380   return TYPE_VECTOR_OPAQUE (type);
16381 }
16382 
16383 /* Return the diagnostic message string if the binary operation OP is
16384    not permitted on TYPE1 and TYPE2, NULL otherwise.  */
16385 static const char*
s390_invalid_binary_op(int op ATTRIBUTE_UNUSED,const_tree type1,const_tree type2)16386 s390_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
16387 {
16388   bool bool1_p, bool2_p;
16389   bool plusminus_p;
16390   bool muldiv_p;
16391   bool compare_p;
16392   machine_mode mode1, mode2;
16393 
16394   if (!TARGET_ZVECTOR)
16395     return NULL;
16396 
16397   if (!VECTOR_TYPE_P (type1) || !VECTOR_TYPE_P (type2))
16398     return NULL;
16399 
16400   bool1_p = s390_vector_bool_type_p (type1);
16401   bool2_p = s390_vector_bool_type_p (type2);
16402 
16403   /* Mixing signed and unsigned types is forbidden for all
16404      operators.  */
16405   if (!bool1_p && !bool2_p
16406       && TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
16407     return N_("types differ in signedness");
16408 
16409   plusminus_p = (op == PLUS_EXPR || op == MINUS_EXPR);
16410   muldiv_p = (op == MULT_EXPR || op == RDIV_EXPR || op == TRUNC_DIV_EXPR
16411 	      || op == CEIL_DIV_EXPR || op == FLOOR_DIV_EXPR
16412 	      || op == ROUND_DIV_EXPR);
16413   compare_p = (op == LT_EXPR || op == LE_EXPR || op == GT_EXPR || op == GE_EXPR
16414 	       || op == EQ_EXPR || op == NE_EXPR);
16415 
16416   if (bool1_p && bool2_p && (plusminus_p || muldiv_p))
16417     return N_("binary operator does not support two vector bool operands");
16418 
16419   if (bool1_p != bool2_p && (muldiv_p || compare_p))
16420     return N_("binary operator does not support vector bool operand");
16421 
16422   mode1 = TYPE_MODE (type1);
16423   mode2 = TYPE_MODE (type2);
16424 
16425   if (bool1_p != bool2_p && plusminus_p
16426       && (GET_MODE_CLASS (mode1) == MODE_VECTOR_FLOAT
16427 	  || GET_MODE_CLASS (mode2) == MODE_VECTOR_FLOAT))
16428     return N_("binary operator does not support mixing vector "
16429 	      "bool with floating point vector operands");
16430 
16431   return NULL;
16432 }
16433 
16434 /* Implement TARGET_C_EXCESS_PRECISION.
16435 
16436    FIXME: For historical reasons, float_t and double_t are typedef'ed to
16437    double on s390, causing operations on float_t to operate in a higher
16438    precision than is necessary.  However, it is not the case that SFmode
16439    operations have implicit excess precision, and we generate more optimal
16440    code if we let the compiler know no implicit extra precision is added.
16441 
16442    That means when we are compiling with -fexcess-precision=fast, the value
16443    we set for FLT_EVAL_METHOD will be out of line with the actual precision of
16444    float_t (though they would be correct for -fexcess-precision=standard).
16445 
16446    A complete fix would modify glibc to remove the unnecessary typedef
16447    of float_t to double.  */
16448 
16449 static enum flt_eval_method
s390_excess_precision(enum excess_precision_type type)16450 s390_excess_precision (enum excess_precision_type type)
16451 {
16452   switch (type)
16453     {
16454       case EXCESS_PRECISION_TYPE_IMPLICIT:
16455       case EXCESS_PRECISION_TYPE_FAST:
16456 	/* The fastest type to promote to will always be the native type,
16457 	   whether that occurs with implicit excess precision or
16458 	   otherwise.  */
16459 	return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
16460       case EXCESS_PRECISION_TYPE_STANDARD:
16461 	/* Otherwise, when we are in a standards compliant mode, to
16462 	   ensure consistency with the implementation in glibc, report that
16463 	   float is evaluated to the range and precision of double.  */
16464 	return FLT_EVAL_METHOD_PROMOTE_TO_DOUBLE;
16465       default:
16466 	gcc_unreachable ();
16467     }
16468   return FLT_EVAL_METHOD_UNPREDICTABLE;
16469 }
16470 
16471 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
16472 
16473 static unsigned HOST_WIDE_INT
s390_asan_shadow_offset(void)16474 s390_asan_shadow_offset (void)
16475 {
16476   return TARGET_64BIT ? HOST_WIDE_INT_1U << 52 : HOST_WIDE_INT_UC (0x20000000);
16477 }
16478 
16479 #ifdef HAVE_GAS_HIDDEN
16480 # define USE_HIDDEN_LINKONCE 1
16481 #else
16482 # define USE_HIDDEN_LINKONCE 0
16483 #endif
16484 
16485 /* Output an indirect branch trampoline for target register REGNO.  */
16486 
16487 static void
s390_output_indirect_thunk_function(unsigned int regno,bool z10_p)16488 s390_output_indirect_thunk_function (unsigned int regno, bool z10_p)
16489 {
16490   tree decl;
16491   char thunk_label[32];
16492   int i;
16493 
16494   if (z10_p)
16495     sprintf (thunk_label, TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL, regno);
16496   else
16497     sprintf (thunk_label, TARGET_INDIRECT_BRANCH_THUNK_NAME_EX,
16498 	     INDIRECT_BRANCH_THUNK_REGNUM, regno);
16499 
16500   decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
16501 		     get_identifier (thunk_label),
16502 		     build_function_type_list (void_type_node, NULL_TREE));
16503   DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
16504 				   NULL_TREE, void_type_node);
16505   TREE_PUBLIC (decl) = 1;
16506   TREE_STATIC (decl) = 1;
16507   DECL_IGNORED_P (decl) = 1;
16508 
16509   if (USE_HIDDEN_LINKONCE)
16510     {
16511       cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
16512 
16513       targetm.asm_out.unique_section (decl, 0);
16514       switch_to_section (get_named_section (decl, NULL, 0));
16515 
16516       targetm.asm_out.globalize_label (asm_out_file, thunk_label);
16517       fputs ("\t.hidden\t", asm_out_file);
16518       assemble_name (asm_out_file, thunk_label);
16519       putc ('\n', asm_out_file);
16520       ASM_DECLARE_FUNCTION_NAME (asm_out_file, thunk_label, decl);
16521     }
16522   else
16523     {
16524       switch_to_section (text_section);
16525       ASM_OUTPUT_LABEL (asm_out_file, thunk_label);
16526     }
16527 
16528   DECL_INITIAL (decl) = make_node (BLOCK);
16529   current_function_decl = decl;
16530   allocate_struct_function (decl, false);
16531   init_function_start (decl);
16532   cfun->is_thunk = true;
16533   first_function_block_is_cold = false;
16534   final_start_function (emit_barrier (), asm_out_file, 1);
16535 
16536   /* This makes CFI at least usable for indirect jumps.
16537 
16538      Stopping in the thunk: backtrace will point to the thunk target
16539      is if it was interrupted by a signal.  For a call this means that
16540      the call chain will be: caller->callee->thunk   */
16541   if (flag_asynchronous_unwind_tables && flag_dwarf2_cfi_asm)
16542     {
16543       fputs ("\t.cfi_signal_frame\n", asm_out_file);
16544       fprintf (asm_out_file, "\t.cfi_return_column %d\n", regno);
16545       for (i = 0; i < FPR15_REGNUM; i++)
16546 	fprintf (asm_out_file, "\t.cfi_same_value %s\n", reg_names[i]);
16547     }
16548 
16549   if (z10_p)
16550     {
16551       /* exrl  0,1f  */
16552 
16553       /* We generate a thunk for z10 compiled code although z10 is
16554 	 currently not enabled.  Tell the assembler to accept the
16555 	 instruction.  */
16556       if (!TARGET_CPU_Z10)
16557 	{
16558 	  fputs ("\t.machine push\n", asm_out_file);
16559 	  fputs ("\t.machine z10\n", asm_out_file);
16560 	}
16561       /* We use exrl even if -mzarch hasn't been specified on the
16562 	 command line so we have to tell the assembler to accept
16563 	 it.  */
16564       if (!TARGET_ZARCH)
16565 	fputs ("\t.machinemode zarch\n", asm_out_file);
16566 
16567       fputs ("\texrl\t0,1f\n", asm_out_file);
16568 
16569       if (!TARGET_ZARCH)
16570 	fputs ("\t.machinemode esa\n", asm_out_file);
16571 
16572       if (!TARGET_CPU_Z10)
16573 	fputs ("\t.machine pop\n", asm_out_file);
16574     }
16575   else if (TARGET_CPU_ZARCH)
16576     {
16577       /* larl %r1,1f  */
16578       fprintf (asm_out_file, "\tlarl\t%%r%d,1f\n",
16579 	       INDIRECT_BRANCH_THUNK_REGNUM);
16580 
16581       /* ex 0,0(%r1)  */
16582       fprintf (asm_out_file, "\tex\t0,0(%%r%d)\n",
16583 	       INDIRECT_BRANCH_THUNK_REGNUM);
16584     }
16585   else
16586     gcc_unreachable ();
16587 
16588   /* 0:    j 0b  */
16589   fputs ("0:\tj\t0b\n", asm_out_file);
16590 
16591   /* 1:    br <regno>  */
16592   fprintf (asm_out_file, "1:\tbr\t%%r%d\n", regno);
16593 
16594   final_end_function ();
16595   init_insn_lengths ();
16596   free_after_compilation (cfun);
16597   set_cfun (NULL);
16598   current_function_decl = NULL;
16599 }
16600 
16601 /* Implement the asm.code_end target hook.  */
16602 
16603 static void
s390_code_end(void)16604 s390_code_end (void)
16605 {
16606   int i;
16607 
16608   for (i = 1; i < 16; i++)
16609     {
16610       if (indirect_branch_z10thunk_mask & (1 << i))
16611 	s390_output_indirect_thunk_function (i, true);
16612 
16613       if (indirect_branch_prez10thunk_mask & (1 << i))
16614 	s390_output_indirect_thunk_function (i, false);
16615     }
16616 
16617   if (TARGET_INDIRECT_BRANCH_TABLE)
16618     {
16619       int o;
16620       int i;
16621 
16622       for (o = 0; o < INDIRECT_BRANCH_NUM_OPTIONS; o++)
16623 	{
16624 	  if (indirect_branch_table_label_no[o] == 0)
16625 	    continue;
16626 
16627 	  switch_to_section (get_section (indirect_branch_table_name[o],
16628 					  0,
16629 					  NULL_TREE));
16630 	  for (i = 0; i < indirect_branch_table_label_no[o]; i++)
16631 	    {
16632 	      char label_start[32];
16633 
16634 	      ASM_GENERATE_INTERNAL_LABEL (label_start,
16635 					   indirect_branch_table_label[o], i);
16636 
16637 	      fputs ("\t.long\t", asm_out_file);
16638 	      assemble_name_raw (asm_out_file, label_start);
16639 	      fputs ("-.\n", asm_out_file);
16640 	    }
16641 	  switch_to_section (current_function_section ());
16642 	}
16643     }
16644 }
16645 
16646 /* Implement the TARGET_CASE_VALUES_THRESHOLD target hook.  */
16647 
16648 unsigned int
s390_case_values_threshold(void)16649 s390_case_values_threshold (void)
16650 {
16651   /* Disabling branch prediction for indirect jumps makes jump tables
16652      much more expensive.  */
16653   if (TARGET_INDIRECT_BRANCH_NOBP_JUMP)
16654     return 20;
16655 
16656   return default_case_values_threshold ();
16657 }
16658 
16659 /* Initialize GCC target structure.  */
16660 
16661 #undef  TARGET_ASM_ALIGNED_HI_OP
16662 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
16663 #undef  TARGET_ASM_ALIGNED_DI_OP
16664 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
16665 #undef  TARGET_ASM_INTEGER
16666 #define TARGET_ASM_INTEGER s390_assemble_integer
16667 
16668 #undef  TARGET_ASM_OPEN_PAREN
16669 #define TARGET_ASM_OPEN_PAREN ""
16670 
16671 #undef  TARGET_ASM_CLOSE_PAREN
16672 #define TARGET_ASM_CLOSE_PAREN ""
16673 
16674 #undef TARGET_OPTION_OVERRIDE
16675 #define TARGET_OPTION_OVERRIDE s390_option_override
16676 
16677 #ifdef TARGET_THREAD_SSP_OFFSET
16678 #undef TARGET_STACK_PROTECT_GUARD
16679 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
16680 #endif
16681 
16682 #undef	TARGET_ENCODE_SECTION_INFO
16683 #define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
16684 
16685 #undef TARGET_SCALAR_MODE_SUPPORTED_P
16686 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
16687 
16688 #ifdef HAVE_AS_TLS
16689 #undef TARGET_HAVE_TLS
16690 #define TARGET_HAVE_TLS true
16691 #endif
16692 #undef TARGET_CANNOT_FORCE_CONST_MEM
16693 #define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
16694 
16695 #undef TARGET_DELEGITIMIZE_ADDRESS
16696 #define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
16697 
16698 #undef TARGET_LEGITIMIZE_ADDRESS
16699 #define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
16700 
16701 #undef TARGET_RETURN_IN_MEMORY
16702 #define TARGET_RETURN_IN_MEMORY s390_return_in_memory
16703 
16704 #undef  TARGET_INIT_BUILTINS
16705 #define TARGET_INIT_BUILTINS s390_init_builtins
16706 #undef  TARGET_EXPAND_BUILTIN
16707 #define TARGET_EXPAND_BUILTIN s390_expand_builtin
16708 #undef  TARGET_BUILTIN_DECL
16709 #define TARGET_BUILTIN_DECL s390_builtin_decl
16710 
16711 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
16712 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
16713 
16714 #undef TARGET_ASM_OUTPUT_MI_THUNK
16715 #define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
16716 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
16717 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
16718 
16719 #undef TARGET_C_EXCESS_PRECISION
16720 #define TARGET_C_EXCESS_PRECISION s390_excess_precision
16721 
16722 #undef  TARGET_SCHED_ADJUST_PRIORITY
16723 #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
16724 #undef TARGET_SCHED_ISSUE_RATE
16725 #define TARGET_SCHED_ISSUE_RATE s390_issue_rate
16726 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
16727 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
16728 
16729 #undef TARGET_SCHED_VARIABLE_ISSUE
16730 #define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
16731 #undef TARGET_SCHED_REORDER
16732 #define TARGET_SCHED_REORDER s390_sched_reorder
16733 #undef TARGET_SCHED_INIT
16734 #define TARGET_SCHED_INIT s390_sched_init
16735 
16736 #undef TARGET_CANNOT_COPY_INSN_P
16737 #define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
16738 #undef TARGET_RTX_COSTS
16739 #define TARGET_RTX_COSTS s390_rtx_costs
16740 #undef TARGET_ADDRESS_COST
16741 #define TARGET_ADDRESS_COST s390_address_cost
16742 #undef TARGET_REGISTER_MOVE_COST
16743 #define TARGET_REGISTER_MOVE_COST s390_register_move_cost
16744 #undef TARGET_MEMORY_MOVE_COST
16745 #define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
16746 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
16747 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
16748   s390_builtin_vectorization_cost
16749 
16750 #undef TARGET_MACHINE_DEPENDENT_REORG
16751 #define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
16752 
16753 #undef TARGET_VALID_POINTER_MODE
16754 #define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
16755 
16756 #undef TARGET_BUILD_BUILTIN_VA_LIST
16757 #define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
16758 #undef TARGET_EXPAND_BUILTIN_VA_START
16759 #define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
16760 #undef TARGET_ASAN_SHADOW_OFFSET
16761 #define TARGET_ASAN_SHADOW_OFFSET s390_asan_shadow_offset
16762 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
16763 #define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
16764 
16765 #undef TARGET_PROMOTE_FUNCTION_MODE
16766 #define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
16767 #undef TARGET_PASS_BY_REFERENCE
16768 #define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
16769 
16770 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
16771 #define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
16772 #undef TARGET_FUNCTION_ARG
16773 #define TARGET_FUNCTION_ARG s390_function_arg
16774 #undef TARGET_FUNCTION_ARG_ADVANCE
16775 #define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
16776 #undef TARGET_FUNCTION_ARG_PADDING
16777 #define TARGET_FUNCTION_ARG_PADDING s390_function_arg_padding
16778 #undef TARGET_FUNCTION_VALUE
16779 #define TARGET_FUNCTION_VALUE s390_function_value
16780 #undef TARGET_LIBCALL_VALUE
16781 #define TARGET_LIBCALL_VALUE s390_libcall_value
16782 #undef TARGET_STRICT_ARGUMENT_NAMING
16783 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
16784 
16785 #undef TARGET_KEEP_LEAF_WHEN_PROFILED
16786 #define TARGET_KEEP_LEAF_WHEN_PROFILED s390_keep_leaf_when_profiled
16787 
16788 #undef TARGET_FIXED_CONDITION_CODE_REGS
16789 #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
16790 
16791 #undef TARGET_CC_MODES_COMPATIBLE
16792 #define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
16793 
16794 #undef TARGET_INVALID_WITHIN_DOLOOP
16795 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
16796 
16797 #ifdef HAVE_AS_TLS
16798 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
16799 #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
16800 #endif
16801 
16802 #undef TARGET_DWARF_FRAME_REG_MODE
16803 #define TARGET_DWARF_FRAME_REG_MODE s390_dwarf_frame_reg_mode
16804 
16805 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
16806 #undef TARGET_MANGLE_TYPE
16807 #define TARGET_MANGLE_TYPE s390_mangle_type
16808 #endif
16809 
16810 #undef TARGET_SCALAR_MODE_SUPPORTED_P
16811 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
16812 
16813 #undef TARGET_VECTOR_MODE_SUPPORTED_P
16814 #define TARGET_VECTOR_MODE_SUPPORTED_P s390_vector_mode_supported_p
16815 
16816 #undef  TARGET_PREFERRED_RELOAD_CLASS
16817 #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
16818 
16819 #undef TARGET_SECONDARY_RELOAD
16820 #define TARGET_SECONDARY_RELOAD s390_secondary_reload
16821 #undef TARGET_SECONDARY_MEMORY_NEEDED
16822 #define TARGET_SECONDARY_MEMORY_NEEDED s390_secondary_memory_needed
16823 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
16824 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE s390_secondary_memory_needed_mode
16825 
16826 #undef TARGET_LIBGCC_CMP_RETURN_MODE
16827 #define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
16828 
16829 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
16830 #define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
16831 
16832 #undef TARGET_LEGITIMATE_ADDRESS_P
16833 #define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
16834 
16835 #undef TARGET_LEGITIMATE_CONSTANT_P
16836 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
16837 
16838 #undef TARGET_LRA_P
16839 #define TARGET_LRA_P s390_lra_p
16840 
16841 #undef TARGET_CAN_ELIMINATE
16842 #define TARGET_CAN_ELIMINATE s390_can_eliminate
16843 
16844 #undef TARGET_CONDITIONAL_REGISTER_USAGE
16845 #define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
16846 
16847 #undef TARGET_LOOP_UNROLL_ADJUST
16848 #define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
16849 
16850 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
16851 #define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
16852 #undef TARGET_TRAMPOLINE_INIT
16853 #define TARGET_TRAMPOLINE_INIT s390_trampoline_init
16854 
16855 /* PR 79421 */
16856 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
16857 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
16858 
16859 #undef TARGET_UNWIND_WORD_MODE
16860 #define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
16861 
16862 #undef TARGET_CANONICALIZE_COMPARISON
16863 #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
16864 
16865 #undef TARGET_HARD_REGNO_SCRATCH_OK
16866 #define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok
16867 
16868 #undef TARGET_HARD_REGNO_NREGS
16869 #define TARGET_HARD_REGNO_NREGS s390_hard_regno_nregs
16870 #undef TARGET_HARD_REGNO_MODE_OK
16871 #define TARGET_HARD_REGNO_MODE_OK s390_hard_regno_mode_ok
16872 #undef TARGET_MODES_TIEABLE_P
16873 #define TARGET_MODES_TIEABLE_P s390_modes_tieable_p
16874 
16875 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
16876 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
16877   s390_hard_regno_call_part_clobbered
16878 
16879 #undef TARGET_ATTRIBUTE_TABLE
16880 #define TARGET_ATTRIBUTE_TABLE s390_attribute_table
16881 
16882 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
16883 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
16884 
16885 #undef TARGET_SET_UP_BY_PROLOGUE
16886 #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
16887 
16888 #undef TARGET_EXTRA_LIVE_ON_ENTRY
16889 #define TARGET_EXTRA_LIVE_ON_ENTRY s390_live_on_entry
16890 
16891 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
16892 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
16893   s390_use_by_pieces_infrastructure_p
16894 
16895 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
16896 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV s390_atomic_assign_expand_fenv
16897 
16898 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
16899 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN s390_invalid_arg_for_unprototyped_fn
16900 
16901 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
16902 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE s390_preferred_simd_mode
16903 
16904 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
16905 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT s390_support_vector_misalignment
16906 
16907 #undef TARGET_VECTOR_ALIGNMENT
16908 #define TARGET_VECTOR_ALIGNMENT s390_vector_alignment
16909 
16910 #undef TARGET_INVALID_BINARY_OP
16911 #define TARGET_INVALID_BINARY_OP s390_invalid_binary_op
16912 
16913 #ifdef HAVE_AS_MACHINE_MACHINEMODE
16914 #undef TARGET_ASM_FILE_START
16915 #define TARGET_ASM_FILE_START s390_asm_file_start
16916 #endif
16917 
16918 #undef TARGET_ASM_FILE_END
16919 #define TARGET_ASM_FILE_END s390_asm_file_end
16920 
16921 #undef TARGET_SET_CURRENT_FUNCTION
16922 #define TARGET_SET_CURRENT_FUNCTION s390_set_current_function
16923 
16924 #if S390_USE_TARGET_ATTRIBUTE
16925 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
16926 #define TARGET_OPTION_VALID_ATTRIBUTE_P s390_valid_target_attribute_p
16927 
16928 #undef TARGET_CAN_INLINE_P
16929 #define TARGET_CAN_INLINE_P s390_can_inline_p
16930 #endif
16931 
16932 #undef TARGET_OPTION_RESTORE
16933 #define TARGET_OPTION_RESTORE s390_function_specific_restore
16934 
16935 #undef TARGET_CAN_CHANGE_MODE_CLASS
16936 #define TARGET_CAN_CHANGE_MODE_CLASS s390_can_change_mode_class
16937 
16938 #undef TARGET_CONSTANT_ALIGNMENT
16939 #define TARGET_CONSTANT_ALIGNMENT s390_constant_alignment
16940 
16941 #undef TARGET_ASM_CODE_END
16942 #define TARGET_ASM_CODE_END s390_code_end
16943 
16944 #undef TARGET_CASE_VALUES_THRESHOLD
16945 #define TARGET_CASE_VALUES_THRESHOLD s390_case_values_threshold
16946 
16947 struct gcc_target targetm = TARGET_INITIALIZER;
16948 
16949 #include "gt-s390.h"
16950