1 /* Subroutines used for code generation on IBM S/390 and zSeries
2    Copyright (C) 1999-2019 Free Software Foundation, Inc.
3    Contributed by Hartmut Penner (hpenner@de.ibm.com) and
4                   Ulrich Weigand (uweigand@de.ibm.com) and
5                   Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
6 
7 This file is part of GCC.
8 
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13 
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17 for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3.  If not see
21 <http://www.gnu.org/licenses/>.  */
22 
23 #define IN_TARGET_CODE 1
24 
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "backend.h"
29 #include "target.h"
30 #include "target-globals.h"
31 #include "rtl.h"
32 #include "tree.h"
33 #include "gimple.h"
34 #include "cfghooks.h"
35 #include "cfgloop.h"
36 #include "df.h"
37 #include "memmodel.h"
38 #include "tm_p.h"
39 #include "stringpool.h"
40 #include "attribs.h"
41 #include "expmed.h"
42 #include "optabs.h"
43 #include "regs.h"
44 #include "emit-rtl.h"
45 #include "recog.h"
46 #include "cgraph.h"
47 #include "diagnostic-core.h"
48 #include "diagnostic.h"
49 #include "alias.h"
50 #include "fold-const.h"
51 #include "print-tree.h"
52 #include "stor-layout.h"
53 #include "varasm.h"
54 #include "calls.h"
55 #include "conditions.h"
56 #include "output.h"
57 #include "insn-attr.h"
58 #include "flags.h"
59 #include "except.h"
60 #include "dojump.h"
61 #include "explow.h"
62 #include "stmt.h"
63 #include "expr.h"
64 #include "reload.h"
65 #include "cfgrtl.h"
66 #include "cfganal.h"
67 #include "lcm.h"
68 #include "cfgbuild.h"
69 #include "cfgcleanup.h"
70 #include "debug.h"
71 #include "langhooks.h"
72 #include "internal-fn.h"
73 #include "gimple-fold.h"
74 #include "tree-eh.h"
75 #include "gimplify.h"
76 #include "params.h"
77 #include "opts.h"
78 #include "tree-pass.h"
79 #include "context.h"
80 #include "builtins.h"
81 #include "rtl-iter.h"
82 #include "intl.h"
83 #include "tm-constrs.h"
84 #include "tree-vrp.h"
85 #include "symbol-summary.h"
86 #include "ipa-prop.h"
87 #include "ipa-fnsummary.h"
88 #include "sched-int.h"
89 
90 /* This file should be included last.  */
91 #include "target-def.h"
92 
93 static bool s390_hard_regno_mode_ok (unsigned int, machine_mode);
94 
95 /* Remember the last target of s390_set_current_function.  */
96 static GTY(()) tree s390_previous_fndecl;
97 
98 /* Define the specific costs for a given cpu.  */
99 
100 struct processor_costs
101 {
102   /* multiplication */
103   const int m;        /* cost of an M instruction.  */
104   const int mghi;     /* cost of an MGHI instruction.  */
105   const int mh;       /* cost of an MH instruction.  */
106   const int mhi;      /* cost of an MHI instruction.  */
107   const int ml;       /* cost of an ML instruction.  */
108   const int mr;       /* cost of an MR instruction.  */
109   const int ms;       /* cost of an MS instruction.  */
110   const int msg;      /* cost of an MSG instruction.  */
111   const int msgf;     /* cost of an MSGF instruction.  */
112   const int msgfr;    /* cost of an MSGFR instruction.  */
113   const int msgr;     /* cost of an MSGR instruction.  */
114   const int msr;      /* cost of an MSR instruction.  */
115   const int mult_df;  /* cost of multiplication in DFmode.  */
116   const int mxbr;
117   /* square root */
118   const int sqxbr;    /* cost of square root in TFmode.  */
119   const int sqdbr;    /* cost of square root in DFmode.  */
120   const int sqebr;    /* cost of square root in SFmode.  */
121   /* multiply and add */
122   const int madbr;    /* cost of multiply and add in DFmode.  */
123   const int maebr;    /* cost of multiply and add in SFmode.  */
124   /* division */
125   const int dxbr;
126   const int ddbr;
127   const int debr;
128   const int dlgr;
129   const int dlr;
130   const int dr;
131   const int dsgfr;
132   const int dsgr;
133 };
134 
135 #define s390_cost ((const struct processor_costs *)(s390_cost_pointer))
136 
137 static const
138 struct processor_costs z900_cost =
139 {
140   COSTS_N_INSNS (5),     /* M     */
141   COSTS_N_INSNS (10),    /* MGHI  */
142   COSTS_N_INSNS (5),     /* MH    */
143   COSTS_N_INSNS (4),     /* MHI   */
144   COSTS_N_INSNS (5),     /* ML    */
145   COSTS_N_INSNS (5),     /* MR    */
146   COSTS_N_INSNS (4),     /* MS    */
147   COSTS_N_INSNS (15),    /* MSG   */
148   COSTS_N_INSNS (7),     /* MSGF  */
149   COSTS_N_INSNS (7),     /* MSGFR */
150   COSTS_N_INSNS (10),    /* MSGR  */
151   COSTS_N_INSNS (4),     /* MSR   */
152   COSTS_N_INSNS (7),     /* multiplication in DFmode */
153   COSTS_N_INSNS (13),    /* MXBR */
154   COSTS_N_INSNS (136),   /* SQXBR */
155   COSTS_N_INSNS (44),    /* SQDBR */
156   COSTS_N_INSNS (35),    /* SQEBR */
157   COSTS_N_INSNS (18),    /* MADBR */
158   COSTS_N_INSNS (13),    /* MAEBR */
159   COSTS_N_INSNS (134),   /* DXBR */
160   COSTS_N_INSNS (30),    /* DDBR */
161   COSTS_N_INSNS (27),    /* DEBR */
162   COSTS_N_INSNS (220),   /* DLGR */
163   COSTS_N_INSNS (34),    /* DLR */
164   COSTS_N_INSNS (34),    /* DR */
165   COSTS_N_INSNS (32),    /* DSGFR */
166   COSTS_N_INSNS (32),    /* DSGR */
167 };
168 
169 static const
170 struct processor_costs z990_cost =
171 {
172   COSTS_N_INSNS (4),     /* M     */
173   COSTS_N_INSNS (2),     /* MGHI  */
174   COSTS_N_INSNS (2),     /* MH    */
175   COSTS_N_INSNS (2),     /* MHI   */
176   COSTS_N_INSNS (4),     /* ML    */
177   COSTS_N_INSNS (4),     /* MR    */
178   COSTS_N_INSNS (5),     /* MS    */
179   COSTS_N_INSNS (6),     /* MSG   */
180   COSTS_N_INSNS (4),     /* MSGF  */
181   COSTS_N_INSNS (4),     /* MSGFR */
182   COSTS_N_INSNS (4),     /* MSGR  */
183   COSTS_N_INSNS (4),     /* MSR   */
184   COSTS_N_INSNS (1),     /* multiplication in DFmode */
185   COSTS_N_INSNS (28),    /* MXBR */
186   COSTS_N_INSNS (130),   /* SQXBR */
187   COSTS_N_INSNS (66),    /* SQDBR */
188   COSTS_N_INSNS (38),    /* SQEBR */
189   COSTS_N_INSNS (1),     /* MADBR */
190   COSTS_N_INSNS (1),     /* MAEBR */
191   COSTS_N_INSNS (60),    /* DXBR */
192   COSTS_N_INSNS (40),    /* DDBR */
193   COSTS_N_INSNS (26),    /* DEBR */
194   COSTS_N_INSNS (176),   /* DLGR */
195   COSTS_N_INSNS (31),    /* DLR */
196   COSTS_N_INSNS (31),    /* DR */
197   COSTS_N_INSNS (31),    /* DSGFR */
198   COSTS_N_INSNS (31),    /* DSGR */
199 };
200 
201 static const
202 struct processor_costs z9_109_cost =
203 {
204   COSTS_N_INSNS (4),     /* M     */
205   COSTS_N_INSNS (2),     /* MGHI  */
206   COSTS_N_INSNS (2),     /* MH    */
207   COSTS_N_INSNS (2),     /* MHI   */
208   COSTS_N_INSNS (4),     /* ML    */
209   COSTS_N_INSNS (4),     /* MR    */
210   COSTS_N_INSNS (5),     /* MS    */
211   COSTS_N_INSNS (6),     /* MSG   */
212   COSTS_N_INSNS (4),     /* MSGF  */
213   COSTS_N_INSNS (4),     /* MSGFR */
214   COSTS_N_INSNS (4),     /* MSGR  */
215   COSTS_N_INSNS (4),     /* MSR   */
216   COSTS_N_INSNS (1),     /* multiplication in DFmode */
217   COSTS_N_INSNS (28),    /* MXBR */
218   COSTS_N_INSNS (130),   /* SQXBR */
219   COSTS_N_INSNS (66),    /* SQDBR */
220   COSTS_N_INSNS (38),    /* SQEBR */
221   COSTS_N_INSNS (1),     /* MADBR */
222   COSTS_N_INSNS (1),     /* MAEBR */
223   COSTS_N_INSNS (60),    /* DXBR */
224   COSTS_N_INSNS (40),    /* DDBR */
225   COSTS_N_INSNS (26),    /* DEBR */
226   COSTS_N_INSNS (30),    /* DLGR */
227   COSTS_N_INSNS (23),    /* DLR */
228   COSTS_N_INSNS (23),    /* DR */
229   COSTS_N_INSNS (24),    /* DSGFR */
230   COSTS_N_INSNS (24),    /* DSGR */
231 };
232 
233 static const
234 struct processor_costs z10_cost =
235 {
236   COSTS_N_INSNS (10),    /* M     */
237   COSTS_N_INSNS (10),    /* MGHI  */
238   COSTS_N_INSNS (10),    /* MH    */
239   COSTS_N_INSNS (10),    /* MHI   */
240   COSTS_N_INSNS (10),    /* ML    */
241   COSTS_N_INSNS (10),    /* MR    */
242   COSTS_N_INSNS (10),    /* MS    */
243   COSTS_N_INSNS (10),    /* MSG   */
244   COSTS_N_INSNS (10),    /* MSGF  */
245   COSTS_N_INSNS (10),    /* MSGFR */
246   COSTS_N_INSNS (10),    /* MSGR  */
247   COSTS_N_INSNS (10),    /* MSR   */
248   COSTS_N_INSNS (1) ,    /* multiplication in DFmode */
249   COSTS_N_INSNS (50),    /* MXBR */
250   COSTS_N_INSNS (120),   /* SQXBR */
251   COSTS_N_INSNS (52),    /* SQDBR */
252   COSTS_N_INSNS (38),    /* SQEBR */
253   COSTS_N_INSNS (1),     /* MADBR */
254   COSTS_N_INSNS (1),     /* MAEBR */
255   COSTS_N_INSNS (111),   /* DXBR */
256   COSTS_N_INSNS (39),    /* DDBR */
257   COSTS_N_INSNS (32),    /* DEBR */
258   COSTS_N_INSNS (160),   /* DLGR */
259   COSTS_N_INSNS (71),    /* DLR */
260   COSTS_N_INSNS (71),    /* DR */
261   COSTS_N_INSNS (71),    /* DSGFR */
262   COSTS_N_INSNS (71),    /* DSGR */
263 };
264 
265 static const
266 struct processor_costs z196_cost =
267 {
268   COSTS_N_INSNS (7),     /* M     */
269   COSTS_N_INSNS (5),     /* MGHI  */
270   COSTS_N_INSNS (5),     /* MH    */
271   COSTS_N_INSNS (5),     /* MHI   */
272   COSTS_N_INSNS (7),     /* ML    */
273   COSTS_N_INSNS (7),     /* MR    */
274   COSTS_N_INSNS (6),     /* MS    */
275   COSTS_N_INSNS (8),     /* MSG   */
276   COSTS_N_INSNS (6),     /* MSGF  */
277   COSTS_N_INSNS (6),     /* MSGFR */
278   COSTS_N_INSNS (8),     /* MSGR  */
279   COSTS_N_INSNS (6),     /* MSR   */
280   COSTS_N_INSNS (1) ,    /* multiplication in DFmode */
281   COSTS_N_INSNS (40),    /* MXBR B+40 */
282   COSTS_N_INSNS (100),   /* SQXBR B+100 */
283   COSTS_N_INSNS (42),    /* SQDBR B+42 */
284   COSTS_N_INSNS (28),    /* SQEBR B+28 */
285   COSTS_N_INSNS (1),     /* MADBR B */
286   COSTS_N_INSNS (1),     /* MAEBR B */
287   COSTS_N_INSNS (101),   /* DXBR B+101 */
288   COSTS_N_INSNS (29),    /* DDBR */
289   COSTS_N_INSNS (22),    /* DEBR */
290   COSTS_N_INSNS (160),   /* DLGR cracked */
291   COSTS_N_INSNS (160),   /* DLR cracked */
292   COSTS_N_INSNS (160),   /* DR expanded */
293   COSTS_N_INSNS (160),   /* DSGFR cracked */
294   COSTS_N_INSNS (160),   /* DSGR cracked */
295 };
296 
297 static const
298 struct processor_costs zEC12_cost =
299 {
300   COSTS_N_INSNS (7),     /* M     */
301   COSTS_N_INSNS (5),     /* MGHI  */
302   COSTS_N_INSNS (5),     /* MH    */
303   COSTS_N_INSNS (5),     /* MHI   */
304   COSTS_N_INSNS (7),     /* ML    */
305   COSTS_N_INSNS (7),     /* MR    */
306   COSTS_N_INSNS (6),     /* MS    */
307   COSTS_N_INSNS (8),     /* MSG   */
308   COSTS_N_INSNS (6),     /* MSGF  */
309   COSTS_N_INSNS (6),     /* MSGFR */
310   COSTS_N_INSNS (8),     /* MSGR  */
311   COSTS_N_INSNS (6),     /* MSR   */
312   COSTS_N_INSNS (1) ,    /* multiplication in DFmode */
313   COSTS_N_INSNS (40),    /* MXBR B+40 */
314   COSTS_N_INSNS (100),   /* SQXBR B+100 */
315   COSTS_N_INSNS (42),    /* SQDBR B+42 */
316   COSTS_N_INSNS (28),    /* SQEBR B+28 */
317   COSTS_N_INSNS (1),     /* MADBR B */
318   COSTS_N_INSNS (1),     /* MAEBR B */
319   COSTS_N_INSNS (131),   /* DXBR B+131 */
320   COSTS_N_INSNS (29),    /* DDBR */
321   COSTS_N_INSNS (22),    /* DEBR */
322   COSTS_N_INSNS (160),   /* DLGR cracked */
323   COSTS_N_INSNS (160),   /* DLR cracked */
324   COSTS_N_INSNS (160),   /* DR expanded */
325   COSTS_N_INSNS (160),   /* DSGFR cracked */
326   COSTS_N_INSNS (160),   /* DSGR cracked */
327 };
328 
329 const struct s390_processor processor_table[] =
330 {
331   { "z900",   "z900",   PROCESSOR_2064_Z900,   &z900_cost,   5  },
332   { "z990",   "z990",   PROCESSOR_2084_Z990,   &z990_cost,   6  },
333   { "z9-109", "z9-109", PROCESSOR_2094_Z9_109, &z9_109_cost, 7  },
334   { "z9-ec",  "z9-ec",  PROCESSOR_2094_Z9_EC,  &z9_109_cost, 7  },
335   { "z10",    "z10",    PROCESSOR_2097_Z10,    &z10_cost,    8  },
336   { "z196",   "z196",   PROCESSOR_2817_Z196,   &z196_cost,   9  },
337   { "zEC12",  "zEC12",  PROCESSOR_2827_ZEC12,  &zEC12_cost,  10 },
338   { "z13",    "z13",    PROCESSOR_2964_Z13,    &zEC12_cost,  11 },
339   { "z14",    "arch12", PROCESSOR_3906_Z14,    &zEC12_cost,  12 },
340   { "z15",    "arch13", PROCESSOR_8561_Z15,    &zEC12_cost,  13 },
341   { "native", "",       PROCESSOR_NATIVE,      NULL,         0  }
342 };
343 
344 extern int reload_completed;
345 
346 /* Kept up to date using the SCHED_VARIABLE_ISSUE hook.  */
347 static rtx_insn *last_scheduled_insn;
348 #define NUM_SIDES 2
349 
350 #define MAX_SCHED_UNITS 4
351 static int last_scheduled_unit_distance[MAX_SCHED_UNITS][NUM_SIDES];
352 
353 /* Estimate of number of cycles a long-running insn occupies an
354    execution unit.  */
355 static int fxd_longrunning[NUM_SIDES];
356 static int fpd_longrunning[NUM_SIDES];
357 
358 /* The maximum score added for an instruction whose unit hasn't been
359    in use for MAX_SCHED_MIX_DISTANCE steps.  Increase this value to
360    give instruction mix scheduling more priority over instruction
361    grouping.  */
362 #define MAX_SCHED_MIX_SCORE      2
363 
364 /* The maximum distance up to which individual scores will be
365    calculated.  Everything beyond this gives MAX_SCHED_MIX_SCORE.
366    Increase this with the OOO windows size of the machine.  */
367 #define MAX_SCHED_MIX_DISTANCE 70
368 
369 /* Structure used to hold the components of a S/390 memory
370    address.  A legitimate address on S/390 is of the general
371    form
372           base + index + displacement
373    where any of the components is optional.
374 
375    base and index are registers of the class ADDR_REGS,
376    displacement is an unsigned 12-bit immediate constant.  */
377 
378 /* The max number of insns of backend generated memset/memcpy/memcmp
379    loops.  This value is used in the unroll adjust hook to detect such
380    loops.  Current max is 9 coming from the memcmp loop.  */
381 #define BLOCK_MEM_OPS_LOOP_INSNS 9
382 
383 struct s390_address
384 {
385   rtx base;
386   rtx indx;
387   rtx disp;
388   bool pointer;
389   bool literal_pool;
390 };
391 
392 /* Few accessor macros for struct cfun->machine->s390_frame_layout.  */
393 
394 #define cfun_frame_layout (cfun->machine->frame_layout)
395 #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
396 #define cfun_save_arg_fprs_p (!!(TARGET_64BIT				\
397 				 ? cfun_frame_layout.fpr_bitmap & 0x0f	\
398 				 : cfun_frame_layout.fpr_bitmap & 0x03))
399 #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
400   cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
401 #define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |=    \
402   (1 << (REGNO - FPR0_REGNUM)))
403 #define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap &    \
404   (1 << (REGNO - FPR0_REGNUM))))
405 #define cfun_gpr_save_slot(REGNO) \
406   cfun->machine->frame_layout.gpr_save_slots[REGNO]
407 
408 /* Number of GPRs and FPRs used for argument passing.  */
409 #define GP_ARG_NUM_REG 5
410 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
411 #define VEC_ARG_NUM_REG 8
412 
413 /* A couple of shortcuts.  */
414 #define CONST_OK_FOR_J(x) \
415 	CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
416 #define CONST_OK_FOR_K(x) \
417 	CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
418 #define CONST_OK_FOR_Os(x) \
419 	CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
420 #define CONST_OK_FOR_Op(x) \
421 	CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
422 #define CONST_OK_FOR_On(x) \
423 	CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
424 
425 #define REGNO_PAIR_OK(REGNO, MODE)                               \
426   (s390_hard_regno_nregs ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
427 
428 /* That's the read ahead of the dynamic branch prediction unit in
429    bytes on a z10 (or higher) CPU.  */
430 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
431 
432 /* Masks per jump target register indicating which thunk need to be
433    generated.  */
434 static GTY(()) int indirect_branch_prez10thunk_mask = 0;
435 static GTY(()) int indirect_branch_z10thunk_mask = 0;
436 
437 #define INDIRECT_BRANCH_NUM_OPTIONS 4
438 
439 enum s390_indirect_branch_option
440   {
441     s390_opt_indirect_branch_jump = 0,
442     s390_opt_indirect_branch_call,
443     s390_opt_function_return_reg,
444     s390_opt_function_return_mem
445   };
446 
447 static GTY(()) int indirect_branch_table_label_no[INDIRECT_BRANCH_NUM_OPTIONS] = { 0 };
448 const char *indirect_branch_table_label[INDIRECT_BRANCH_NUM_OPTIONS] = \
449   { "LJUMP", "LCALL", "LRETREG", "LRETMEM" };
450 const char *indirect_branch_table_name[INDIRECT_BRANCH_NUM_OPTIONS] =	\
451   { ".s390_indirect_jump", ".s390_indirect_call",
452     ".s390_return_reg", ".s390_return_mem" };
453 
454 bool
s390_return_addr_from_memory()455 s390_return_addr_from_memory ()
456 {
457   return cfun_gpr_save_slot(RETURN_REGNUM) == SAVE_SLOT_STACK;
458 }
459 
460 /* Indicate which ABI has been used for passing vector args.
461    0 - no vector type arguments have been passed where the ABI is relevant
462    1 - the old ABI has been used
463    2 - a vector type argument has been passed either in a vector register
464        or on the stack by value  */
465 static int s390_vector_abi = 0;
466 
467 /* Set the vector ABI marker if TYPE is subject to the vector ABI
468    switch.  The vector ABI affects only vector data types.  There are
469    two aspects of the vector ABI relevant here:
470 
471    1. vectors >= 16 bytes have an alignment of 8 bytes with the new
472    ABI and natural alignment with the old.
473 
474    2. vector <= 16 bytes are passed in VRs or by value on the stack
475    with the new ABI but by reference on the stack with the old.
476 
477    If ARG_P is true TYPE is used for a function argument or return
478    value.  The ABI marker then is set for all vector data types.  If
479    ARG_P is false only type 1 vectors are being checked.  */
480 
481 static void
s390_check_type_for_vector_abi(const_tree type,bool arg_p,bool in_struct_p)482 s390_check_type_for_vector_abi (const_tree type, bool arg_p, bool in_struct_p)
483 {
484   static hash_set<const_tree> visited_types_hash;
485 
486   if (s390_vector_abi)
487     return;
488 
489   if (type == NULL_TREE || TREE_CODE (type) == ERROR_MARK)
490     return;
491 
492   if (visited_types_hash.contains (type))
493     return;
494 
495   visited_types_hash.add (type);
496 
497   if (VECTOR_TYPE_P (type))
498     {
499       int type_size = int_size_in_bytes (type);
500 
501       /* Outside arguments only the alignment is changing and this
502 	 only happens for vector types >= 16 bytes.  */
503       if (!arg_p && type_size < 16)
504 	return;
505 
506       /* In arguments vector types > 16 are passed as before (GCC
507 	 never enforced the bigger alignment for arguments which was
508 	 required by the old vector ABI).  However, it might still be
509 	 ABI relevant due to the changed alignment if it is a struct
510 	 member.  */
511       if (arg_p && type_size > 16 && !in_struct_p)
512 	return;
513 
514       s390_vector_abi = TARGET_VX_ABI ? 2 : 1;
515     }
516   else if (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE)
517     {
518       /* ARRAY_TYPE: Since with neither of the ABIs we have more than
519 	 natural alignment there will never be ABI dependent padding
520 	 in an array type.  That's why we do not set in_struct_p to
521 	 true here.  */
522       s390_check_type_for_vector_abi (TREE_TYPE (type), arg_p, in_struct_p);
523     }
524   else if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
525     {
526       tree arg_chain;
527 
528       /* Check the return type.  */
529       s390_check_type_for_vector_abi (TREE_TYPE (type), true, false);
530 
531       for (arg_chain = TYPE_ARG_TYPES (type);
532 	   arg_chain;
533 	   arg_chain = TREE_CHAIN (arg_chain))
534 	s390_check_type_for_vector_abi (TREE_VALUE (arg_chain), true, false);
535     }
536   else if (RECORD_OR_UNION_TYPE_P (type))
537     {
538       tree field;
539 
540       for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
541 	{
542 	  if (TREE_CODE (field) != FIELD_DECL)
543 	    continue;
544 
545 	  s390_check_type_for_vector_abi (TREE_TYPE (field), arg_p, true);
546 	}
547     }
548 }
549 
550 
551 /* System z builtins.  */
552 
553 #include "s390-builtins.h"
554 
555 const unsigned int bflags_builtin[S390_BUILTIN_MAX + 1] =
556   {
557 #undef B_DEF
558 #undef OB_DEF
559 #undef OB_DEF_VAR
560 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, ...) BFLAGS,
561 #define OB_DEF(...)
562 #define OB_DEF_VAR(...)
563 #include "s390-builtins.def"
564     0
565   };
566 
567 const unsigned int opflags_builtin[S390_BUILTIN_MAX + 1] =
568   {
569 #undef B_DEF
570 #undef OB_DEF
571 #undef OB_DEF_VAR
572 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, ...) OPFLAGS,
573 #define OB_DEF(...)
574 #define OB_DEF_VAR(...)
575 #include "s390-builtins.def"
576     0
577   };
578 
579 const unsigned int bflags_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1] =
580   {
581 #undef B_DEF
582 #undef OB_DEF
583 #undef OB_DEF_VAR
584 #define B_DEF(...)
585 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, ...) BFLAGS,
586 #define OB_DEF_VAR(...)
587 #include "s390-builtins.def"
588     0
589   };
590 
591 const unsigned int
592 bflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
593   {
594 #undef B_DEF
595 #undef OB_DEF
596 #undef OB_DEF_VAR
597 #define B_DEF(...)
598 #define OB_DEF(...)
599 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) FLAGS,
600 #include "s390-builtins.def"
601     0
602   };
603 
604 const unsigned int
605 opflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
606   {
607 #undef B_DEF
608 #undef OB_DEF
609 #undef OB_DEF_VAR
610 #define B_DEF(...)
611 #define OB_DEF(...)
612 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) OPFLAGS,
613 #include "s390-builtins.def"
614     0
615   };
616 
617 tree s390_builtin_types[BT_MAX];
618 tree s390_builtin_fn_types[BT_FN_MAX];
619 tree s390_builtin_decls[S390_BUILTIN_MAX +
620 			S390_OVERLOADED_BUILTIN_MAX +
621 			S390_OVERLOADED_BUILTIN_VAR_MAX];
622 
623 static enum insn_code const code_for_builtin[S390_BUILTIN_MAX + 1] = {
624 #undef B_DEF
625 #undef OB_DEF
626 #undef OB_DEF_VAR
627 #define B_DEF(NAME, PATTERN, ...) CODE_FOR_##PATTERN,
628 #define OB_DEF(...)
629 #define OB_DEF_VAR(...)
630 
631 #include "s390-builtins.def"
632   CODE_FOR_nothing
633 };
634 
635 static void
s390_init_builtins(void)636 s390_init_builtins (void)
637 {
638   /* These definitions are being used in s390-builtins.def.  */
639   tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"),
640 				       NULL, NULL);
641   tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL);
642   tree c_uint64_type_node;
643 
644   /* The uint64_type_node from tree.c is not compatible to the C99
645      uint64_t data type.  What we want is c_uint64_type_node from
646      c-common.c.  But since backend code is not supposed to interface
647      with the frontend we recreate it here.  */
648   if (TARGET_64BIT)
649     c_uint64_type_node = long_unsigned_type_node;
650   else
651     c_uint64_type_node = long_long_unsigned_type_node;
652 
653 #undef DEF_TYPE
654 #define DEF_TYPE(INDEX, NODE, CONST_P)			\
655   if (s390_builtin_types[INDEX] == NULL)		\
656     s390_builtin_types[INDEX] = (!CONST_P) ?		\
657       (NODE) : build_type_variant ((NODE), 1, 0);
658 
659 #undef DEF_POINTER_TYPE
660 #define DEF_POINTER_TYPE(INDEX, INDEX_BASE)				\
661   if (s390_builtin_types[INDEX] == NULL)				\
662     s390_builtin_types[INDEX] =						\
663       build_pointer_type (s390_builtin_types[INDEX_BASE]);
664 
665 #undef DEF_DISTINCT_TYPE
666 #define DEF_DISTINCT_TYPE(INDEX, INDEX_BASE)				\
667   if (s390_builtin_types[INDEX] == NULL)				\
668     s390_builtin_types[INDEX] =						\
669       build_distinct_type_copy (s390_builtin_types[INDEX_BASE]);
670 
671 #undef DEF_VECTOR_TYPE
672 #define DEF_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS)			\
673   if (s390_builtin_types[INDEX] == NULL)				\
674     s390_builtin_types[INDEX] =						\
675       build_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
676 
677 #undef DEF_OPAQUE_VECTOR_TYPE
678 #define DEF_OPAQUE_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS)		\
679   if (s390_builtin_types[INDEX] == NULL)				\
680     s390_builtin_types[INDEX] =						\
681       build_opaque_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
682 
683 #undef DEF_FN_TYPE
684 #define DEF_FN_TYPE(INDEX, args...)				\
685   if (s390_builtin_fn_types[INDEX] == NULL)			\
686     s390_builtin_fn_types[INDEX] =				\
687       build_function_type_list (args, NULL_TREE);
688 #undef DEF_OV_TYPE
689 #define DEF_OV_TYPE(...)
690 #include "s390-builtin-types.def"
691 
692 #undef B_DEF
693 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, FNTYPE)		\
694   if (s390_builtin_decls[S390_BUILTIN_##NAME] == NULL)			\
695     s390_builtin_decls[S390_BUILTIN_##NAME] =				\
696       add_builtin_function ("__builtin_" #NAME,				\
697 			    s390_builtin_fn_types[FNTYPE],		\
698 			    S390_BUILTIN_##NAME,			\
699 			    BUILT_IN_MD,				\
700 			    NULL,					\
701 			    ATTRS);
702 #undef OB_DEF
703 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, FNTYPE)	\
704   if (s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] \
705       == NULL)								\
706     s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \
707       add_builtin_function ("__builtin_" #NAME,				\
708 			    s390_builtin_fn_types[FNTYPE],		\
709 			    S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX, \
710 			    BUILT_IN_MD,				\
711 			    NULL,					\
712 			    0);
713 #undef OB_DEF_VAR
714 #define OB_DEF_VAR(...)
715 #include "s390-builtins.def"
716 
717 }
718 
719 /* Return true if ARG is appropriate as argument number ARGNUM of
720    builtin DECL.  The operand flags from s390-builtins.def have to
721    passed as OP_FLAGS.  */
722 bool
s390_const_operand_ok(tree arg,int argnum,int op_flags,tree decl)723 s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl)
724 {
725   if (O_UIMM_P (op_flags))
726     {
727       int bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32 };
728       int bitwidth = bitwidths[op_flags - O_U1];
729 
730       if (!tree_fits_uhwi_p (arg)
731 	  || tree_to_uhwi (arg) > (HOST_WIDE_INT_1U << bitwidth) - 1)
732 	{
733 	  error ("constant argument %d for builtin %qF is out of range "
734 		 "(0..%wu)", argnum, decl,
735 		 (HOST_WIDE_INT_1U << bitwidth) - 1);
736 	  return false;
737 	}
738     }
739 
740   if (O_SIMM_P (op_flags))
741     {
742       int bitwidths[] = { 2, 3, 4, 5, 8, 12, 16, 32 };
743       int bitwidth = bitwidths[op_flags - O_S2];
744 
745       if (!tree_fits_shwi_p (arg)
746 	  || tree_to_shwi (arg) < -(HOST_WIDE_INT_1 << (bitwidth - 1))
747 	  || tree_to_shwi (arg) > ((HOST_WIDE_INT_1 << (bitwidth - 1)) - 1))
748 	{
749 	  error ("constant argument %d for builtin %qF is out of range "
750 		 "(%wd..%wd)", argnum, decl,
751 		 -(HOST_WIDE_INT_1 << (bitwidth - 1)),
752 		 (HOST_WIDE_INT_1 << (bitwidth - 1)) - 1);
753 	  return false;
754 	}
755     }
756   return true;
757 }
758 
759 /* Expand an expression EXP that calls a built-in function,
760    with result going to TARGET if that's convenient
761    (and in mode MODE if that's convenient).
762    SUBTARGET may be used as the target for computing one of EXP's operands.
763    IGNORE is nonzero if the value is to be ignored.  */
764 
765 static rtx
s390_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,int ignore ATTRIBUTE_UNUSED)766 s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
767 		     machine_mode mode ATTRIBUTE_UNUSED,
768 		     int ignore ATTRIBUTE_UNUSED)
769 {
770 #define MAX_ARGS 6
771 
772   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
773   unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
774   enum insn_code icode;
775   rtx op[MAX_ARGS], pat;
776   int arity;
777   bool nonvoid;
778   tree arg;
779   call_expr_arg_iterator iter;
780   unsigned int all_op_flags = opflags_for_builtin (fcode);
781   machine_mode last_vec_mode = VOIDmode;
782 
783   if (TARGET_DEBUG_ARG)
784     {
785       fprintf (stderr,
786 	       "s390_expand_builtin, code = %4d, %s, bflags = 0x%x\n",
787 	       (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl)),
788 	       bflags_for_builtin (fcode));
789     }
790 
791   if (S390_USE_TARGET_ATTRIBUTE)
792     {
793       unsigned int bflags;
794 
795       bflags = bflags_for_builtin (fcode);
796       if ((bflags & B_HTM) && !TARGET_HTM)
797 	{
798 	  error ("builtin %qF is not supported without %<-mhtm%> "
799 		 "(default with %<-march=zEC12%> and higher).", fndecl);
800 	  return const0_rtx;
801 	}
802       if (((bflags & B_VX) || (bflags & B_VXE)) && !TARGET_VX)
803 	{
804 	  error ("builtin %qF requires %<-mvx%> "
805 		 "(default with %<-march=z13%> and higher).", fndecl);
806 	  return const0_rtx;
807 	}
808 
809       if ((bflags & B_VXE) && !TARGET_VXE)
810 	{
811 	  error ("Builtin %qF requires z14 or higher.", fndecl);
812 	  return const0_rtx;
813 	}
814 
815       if ((bflags & B_VXE2) && !TARGET_VXE2)
816 	{
817 	  error ("Builtin %qF requires z15 or higher.", fndecl);
818 	  return const0_rtx;
819 	}
820     }
821   if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET
822       && fcode < S390_ALL_BUILTIN_MAX)
823     {
824       gcc_unreachable ();
825     }
826   else if (fcode < S390_OVERLOADED_BUILTIN_OFFSET)
827     {
828       icode = code_for_builtin[fcode];
829       /* Set a flag in the machine specific cfun part in order to support
830 	 saving/restoring of FPRs.  */
831       if (fcode == S390_BUILTIN_tbegin || fcode == S390_BUILTIN_tbegin_retry)
832 	cfun->machine->tbegin_p = true;
833     }
834   else if (fcode < S390_OVERLOADED_BUILTIN_VAR_OFFSET)
835     {
836       error ("unresolved overloaded builtin");
837       return const0_rtx;
838     }
839   else
840     internal_error ("bad builtin fcode");
841 
842   if (icode == 0)
843     internal_error ("bad builtin icode");
844 
845   nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
846 
847   if (nonvoid)
848     {
849       machine_mode tmode = insn_data[icode].operand[0].mode;
850       if (!target
851 	  || GET_MODE (target) != tmode
852 	  || !(*insn_data[icode].operand[0].predicate) (target, tmode))
853 	target = gen_reg_rtx (tmode);
854 
855       /* There are builtins (e.g. vec_promote) with no vector
856 	 arguments but an element selector.  So we have to also look
857 	 at the vector return type when emitting the modulo
858 	 operation.  */
859       if (VECTOR_MODE_P (insn_data[icode].operand[0].mode))
860 	last_vec_mode = insn_data[icode].operand[0].mode;
861     }
862 
863   arity = 0;
864   FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
865     {
866       rtx tmp_rtx;
867       const struct insn_operand_data *insn_op;
868       unsigned int op_flags = all_op_flags & ((1 << O_SHIFT) - 1);
869 
870       all_op_flags = all_op_flags >> O_SHIFT;
871 
872       if (arg == error_mark_node)
873 	return NULL_RTX;
874       if (arity >= MAX_ARGS)
875 	return NULL_RTX;
876 
877       if (O_IMM_P (op_flags)
878 	  && TREE_CODE (arg) != INTEGER_CST)
879 	{
880 	  error ("constant value required for builtin %qF argument %d",
881 		 fndecl, arity + 1);
882 	  return const0_rtx;
883 	}
884 
885       if (!s390_const_operand_ok (arg, arity + 1, op_flags, fndecl))
886 	return const0_rtx;
887 
888       insn_op = &insn_data[icode].operand[arity + nonvoid];
889       op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
890 
891       /* expand_expr truncates constants to the target mode only if it
892 	 is "convenient".  However, our checks below rely on this
893 	 being done.  */
894       if (CONST_INT_P (op[arity])
895 	  && SCALAR_INT_MODE_P (insn_op->mode)
896 	  && GET_MODE (op[arity]) != insn_op->mode)
897 	op[arity] = GEN_INT (trunc_int_for_mode (INTVAL (op[arity]),
898 						 insn_op->mode));
899 
900       /* Wrap the expanded RTX for pointer types into a MEM expr with
901 	 the proper mode.  This allows us to use e.g. (match_operand
902 	 "memory_operand"..) in the insn patterns instead of (mem
903 	 (match_operand "address_operand)).  This is helpful for
904 	 patterns not just accepting MEMs.  */
905       if (POINTER_TYPE_P (TREE_TYPE (arg))
906 	  && insn_op->predicate != address_operand)
907 	op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]);
908 
909       /* Expand the module operation required on element selectors.  */
910       if (op_flags == O_ELEM)
911 	{
912 	  gcc_assert (last_vec_mode != VOIDmode);
913 	  op[arity] = simplify_expand_binop (SImode, code_to_optab (AND),
914 					     op[arity],
915 					     GEN_INT (GET_MODE_NUNITS (last_vec_mode) - 1),
916 					     NULL_RTX, 1, OPTAB_DIRECT);
917 	}
918 
919       /* Record the vector mode used for an element selector.  This assumes:
920 	 1. There is no builtin with two different vector modes and an element selector
921 	 2. The element selector comes after the vector type it is referring to.
922 	 This currently the true for all the builtins but FIXME we
923 	 should better check for that.  */
924       if (VECTOR_MODE_P (insn_op->mode))
925 	last_vec_mode = insn_op->mode;
926 
927       if (insn_op->predicate (op[arity], insn_op->mode))
928 	{
929 	  arity++;
930 	  continue;
931 	}
932 
933       /* A memory operand is rejected by the memory_operand predicate.
934 	 Try making the address legal by copying it into a register.  */
935       if (MEM_P (op[arity])
936 	  && insn_op->predicate == memory_operand
937 	  && (GET_MODE (XEXP (op[arity], 0)) == Pmode
938 	      || GET_MODE (XEXP (op[arity], 0)) == VOIDmode))
939 	{
940 	  op[arity] = replace_equiv_address (op[arity],
941 					     copy_to_mode_reg (Pmode,
942 					       XEXP (op[arity], 0)));
943 	}
944       /* Some of the builtins require different modes/types than the
945 	 pattern in order to implement a specific API.  Instead of
946 	 adding many expanders which do the mode change we do it here.
947 	 E.g. s390_vec_add_u128 required to have vector unsigned char
948 	 arguments is mapped to addti3.  */
949       else if (insn_op->mode != VOIDmode
950 	       && GET_MODE (op[arity]) != VOIDmode
951 	       && GET_MODE (op[arity]) != insn_op->mode
952 	       && ((tmp_rtx = simplify_gen_subreg (insn_op->mode, op[arity],
953 						   GET_MODE (op[arity]), 0))
954 		   != NULL_RTX))
955 	{
956 	  op[arity] = tmp_rtx;
957 	}
958 
959       /* The predicate rejects the operand although the mode is fine.
960 	 Copy the operand to register.  */
961       if (!insn_op->predicate (op[arity], insn_op->mode)
962 	  && (GET_MODE (op[arity]) == insn_op->mode
963 	      || GET_MODE (op[arity]) == VOIDmode
964 	      || (insn_op->predicate == address_operand
965 		  && GET_MODE (op[arity]) == Pmode)))
966 	{
967 	  /* An address_operand usually has VOIDmode in the expander
968 	     so we cannot use this.  */
969 	  machine_mode target_mode =
970 	    (insn_op->predicate == address_operand
971 	     ? (machine_mode) Pmode : insn_op->mode);
972 	  op[arity] = copy_to_mode_reg (target_mode, op[arity]);
973 	}
974 
975       if (!insn_op->predicate (op[arity], insn_op->mode))
976 	{
977 	  error ("invalid argument %d for builtin %qF", arity + 1, fndecl);
978 	  return const0_rtx;
979 	}
980       arity++;
981     }
982 
983   switch (arity)
984     {
985     case 0:
986       pat = GEN_FCN (icode) (target);
987       break;
988     case 1:
989       if (nonvoid)
990 	pat = GEN_FCN (icode) (target, op[0]);
991       else
992 	pat = GEN_FCN (icode) (op[0]);
993       break;
994     case 2:
995       if (nonvoid)
996 	pat = GEN_FCN (icode) (target, op[0], op[1]);
997       else
998 	pat = GEN_FCN (icode) (op[0], op[1]);
999       break;
1000     case 3:
1001       if (nonvoid)
1002 	pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
1003       else
1004 	pat = GEN_FCN (icode) (op[0], op[1], op[2]);
1005       break;
1006     case 4:
1007       if (nonvoid)
1008 	pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
1009       else
1010 	pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
1011       break;
1012     case 5:
1013       if (nonvoid)
1014 	pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
1015       else
1016 	pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
1017       break;
1018     case 6:
1019       if (nonvoid)
1020 	pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4], op[5]);
1021       else
1022 	pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
1023       break;
1024     default:
1025       gcc_unreachable ();
1026     }
1027   if (!pat)
1028     return NULL_RTX;
1029   emit_insn (pat);
1030 
1031   if (nonvoid)
1032     return target;
1033   else
1034     return const0_rtx;
1035 }
1036 
1037 
1038 static const int s390_hotpatch_hw_max = 1000000;
1039 static int s390_hotpatch_hw_before_label = 0;
1040 static int s390_hotpatch_hw_after_label = 0;
1041 
1042 /* Check whether the hotpatch attribute is applied to a function and, if it has
1043    an argument, the argument is valid.  */
1044 
1045 static tree
s390_handle_hotpatch_attribute(tree * node,tree name,tree args,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)1046 s390_handle_hotpatch_attribute (tree *node, tree name, tree args,
1047 				int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1048 {
1049   tree expr;
1050   tree expr2;
1051   int err;
1052 
1053   if (TREE_CODE (*node) != FUNCTION_DECL)
1054     {
1055       warning (OPT_Wattributes, "%qE attribute only applies to functions",
1056 	       name);
1057       *no_add_attrs = true;
1058     }
1059   if (args != NULL && TREE_CHAIN (args) != NULL)
1060     {
1061       expr = TREE_VALUE (args);
1062       expr2 = TREE_VALUE (TREE_CHAIN (args));
1063     }
1064   if (args == NULL || TREE_CHAIN (args) == NULL)
1065     err = 1;
1066   else if (TREE_CODE (expr) != INTEGER_CST
1067 	   || !INTEGRAL_TYPE_P (TREE_TYPE (expr))
1068 	   || wi::gtu_p (wi::to_wide (expr), s390_hotpatch_hw_max))
1069     err = 1;
1070   else if (TREE_CODE (expr2) != INTEGER_CST
1071 	   || !INTEGRAL_TYPE_P (TREE_TYPE (expr2))
1072 	   || wi::gtu_p (wi::to_wide (expr2), s390_hotpatch_hw_max))
1073     err = 1;
1074   else
1075     err = 0;
1076   if (err)
1077     {
1078       error ("requested %qE attribute is not a comma separated pair of"
1079 	     " non-negative integer constants or too large (max. %d)", name,
1080 	     s390_hotpatch_hw_max);
1081       *no_add_attrs = true;
1082     }
1083 
1084   return NULL_TREE;
1085 }
1086 
1087 /* Expand the s390_vector_bool type attribute.  */
1088 
1089 static tree
s390_handle_vectorbool_attribute(tree * node,tree name ATTRIBUTE_UNUSED,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)1090 s390_handle_vectorbool_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
1091 				  tree args ATTRIBUTE_UNUSED,
1092 				  int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1093 {
1094   tree type = *node, result = NULL_TREE;
1095   machine_mode mode;
1096 
1097   while (POINTER_TYPE_P (type)
1098 	 || TREE_CODE (type) == FUNCTION_TYPE
1099 	 || TREE_CODE (type) == METHOD_TYPE
1100 	 || TREE_CODE (type) == ARRAY_TYPE)
1101     type = TREE_TYPE (type);
1102 
1103   mode = TYPE_MODE (type);
1104   switch (mode)
1105     {
1106     case E_DImode: case E_V2DImode:
1107       result = s390_builtin_types[BT_BV2DI];
1108       break;
1109     case E_SImode: case E_V4SImode:
1110       result = s390_builtin_types[BT_BV4SI];
1111       break;
1112     case E_HImode: case E_V8HImode:
1113       result = s390_builtin_types[BT_BV8HI];
1114       break;
1115     case E_QImode: case E_V16QImode:
1116       result = s390_builtin_types[BT_BV16QI];
1117       break;
1118     default:
1119       break;
1120     }
1121 
1122   *no_add_attrs = true;  /* No need to hang on to the attribute.  */
1123 
1124   if (result)
1125     *node = lang_hooks.types.reconstruct_complex_type (*node, result);
1126 
1127   return NULL_TREE;
1128 }
1129 
1130 /* Check syntax of function decl attributes having a string type value.  */
1131 
1132 static tree
s390_handle_string_attribute(tree * node,tree name ATTRIBUTE_UNUSED,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)1133 s390_handle_string_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
1134 			      tree args ATTRIBUTE_UNUSED,
1135 			      int flags ATTRIBUTE_UNUSED,
1136 			      bool *no_add_attrs)
1137 {
1138   tree cst;
1139 
1140   if (TREE_CODE (*node) != FUNCTION_DECL)
1141     {
1142       warning (OPT_Wattributes, "%qE attribute only applies to functions",
1143 	       name);
1144       *no_add_attrs = true;
1145     }
1146 
1147   cst = TREE_VALUE (args);
1148 
1149   if (TREE_CODE (cst) != STRING_CST)
1150     {
1151       warning (OPT_Wattributes,
1152 	       "%qE attribute requires a string constant argument",
1153 	       name);
1154       *no_add_attrs = true;
1155     }
1156 
1157   if (is_attribute_p ("indirect_branch", name)
1158       || is_attribute_p ("indirect_branch_call", name)
1159       || is_attribute_p ("function_return", name)
1160       || is_attribute_p ("function_return_reg", name)
1161       || is_attribute_p ("function_return_mem", name))
1162     {
1163       if (strcmp (TREE_STRING_POINTER (cst), "keep") != 0
1164 	  && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0
1165 	  && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0)
1166       {
1167 	warning (OPT_Wattributes,
1168 		 "argument to %qE attribute is not "
1169 		 "(keep|thunk|thunk-extern)", name);
1170 	*no_add_attrs = true;
1171       }
1172     }
1173 
1174   if (is_attribute_p ("indirect_branch_jump", name)
1175       && strcmp (TREE_STRING_POINTER (cst), "keep") != 0
1176       && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0
1177       && strcmp (TREE_STRING_POINTER (cst), "thunk-inline") != 0
1178       && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0)
1179     {
1180       warning (OPT_Wattributes,
1181 	       "argument to %qE attribute is not "
1182 	       "(keep|thunk|thunk-inline|thunk-extern)", name);
1183       *no_add_attrs = true;
1184     }
1185 
1186   return NULL_TREE;
1187 }
1188 
1189 static const struct attribute_spec s390_attribute_table[] = {
1190   { "hotpatch", 2, 2, true, false, false, false,
1191     s390_handle_hotpatch_attribute, NULL },
1192   { "s390_vector_bool", 0, 0, false, true, false, true,
1193     s390_handle_vectorbool_attribute, NULL },
1194   { "indirect_branch", 1, 1, true, false, false, false,
1195     s390_handle_string_attribute, NULL },
1196   { "indirect_branch_jump", 1, 1, true, false, false, false,
1197     s390_handle_string_attribute, NULL },
1198   { "indirect_branch_call", 1, 1, true, false, false, false,
1199     s390_handle_string_attribute, NULL },
1200   { "function_return", 1, 1, true, false, false, false,
1201     s390_handle_string_attribute, NULL },
1202   { "function_return_reg", 1, 1, true, false, false, false,
1203     s390_handle_string_attribute, NULL },
1204   { "function_return_mem", 1, 1, true, false, false, false,
1205     s390_handle_string_attribute, NULL },
1206 
1207   /* End element.  */
1208   { NULL,        0, 0, false, false, false, false, NULL, NULL }
1209 };
1210 
1211 /* Return the alignment for LABEL.  We default to the -falign-labels
1212    value except for the literal pool base label.  */
1213 int
s390_label_align(rtx_insn * label)1214 s390_label_align (rtx_insn *label)
1215 {
1216   rtx_insn *prev_insn = prev_active_insn (label);
1217   rtx set, src;
1218 
1219   if (prev_insn == NULL_RTX)
1220     goto old;
1221 
1222   set = single_set (prev_insn);
1223 
1224   if (set == NULL_RTX)
1225     goto old;
1226 
1227   src = SET_SRC (set);
1228 
1229   /* Don't align literal pool base labels.  */
1230   if (GET_CODE (src) == UNSPEC
1231       && XINT (src, 1) == UNSPEC_MAIN_BASE)
1232     return 0;
1233 
1234  old:
1235   return align_labels.levels[0].log;
1236 }
1237 
1238 static GTY(()) rtx got_symbol;
1239 
1240 /* Return the GOT table symbol.  The symbol will be created when the
1241    function is invoked for the first time.  */
1242 
1243 static rtx
s390_got_symbol(void)1244 s390_got_symbol (void)
1245 {
1246   if (!got_symbol)
1247     {
1248       got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
1249       SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL;
1250     }
1251 
1252   return got_symbol;
1253 }
1254 
1255 static scalar_int_mode
s390_libgcc_cmp_return_mode(void)1256 s390_libgcc_cmp_return_mode (void)
1257 {
1258   return TARGET_64BIT ? DImode : SImode;
1259 }
1260 
1261 static scalar_int_mode
s390_libgcc_shift_count_mode(void)1262 s390_libgcc_shift_count_mode (void)
1263 {
1264   return TARGET_64BIT ? DImode : SImode;
1265 }
1266 
1267 static scalar_int_mode
s390_unwind_word_mode(void)1268 s390_unwind_word_mode (void)
1269 {
1270   return TARGET_64BIT ? DImode : SImode;
1271 }
1272 
1273 /* Return true if the back end supports mode MODE.  */
1274 static bool
s390_scalar_mode_supported_p(scalar_mode mode)1275 s390_scalar_mode_supported_p (scalar_mode mode)
1276 {
1277   /* In contrast to the default implementation reject TImode constants on 31bit
1278      TARGET_ZARCH for ABI compliance.  */
1279   if (!TARGET_64BIT && TARGET_ZARCH && mode == TImode)
1280     return false;
1281 
1282   if (DECIMAL_FLOAT_MODE_P (mode))
1283     return default_decimal_float_supported_p ();
1284 
1285   return default_scalar_mode_supported_p (mode);
1286 }
1287 
1288 /* Return true if the back end supports vector mode MODE.  */
1289 static bool
s390_vector_mode_supported_p(machine_mode mode)1290 s390_vector_mode_supported_p (machine_mode mode)
1291 {
1292   machine_mode inner;
1293 
1294   if (!VECTOR_MODE_P (mode)
1295       || !TARGET_VX
1296       || GET_MODE_SIZE (mode) > 16)
1297     return false;
1298 
1299   inner = GET_MODE_INNER (mode);
1300 
1301   switch (inner)
1302     {
1303     case E_QImode:
1304     case E_HImode:
1305     case E_SImode:
1306     case E_DImode:
1307     case E_TImode:
1308     case E_SFmode:
1309     case E_DFmode:
1310     case E_TFmode:
1311       return true;
1312     default:
1313       return false;
1314     }
1315 }
1316 
1317 /* Set the has_landing_pad_p flag in struct machine_function to VALUE.  */
1318 
1319 void
s390_set_has_landing_pad_p(bool value)1320 s390_set_has_landing_pad_p (bool value)
1321 {
1322   cfun->machine->has_landing_pad_p = value;
1323 }
1324 
1325 /* If two condition code modes are compatible, return a condition code
1326    mode which is compatible with both.  Otherwise, return
1327    VOIDmode.  */
1328 
1329 static machine_mode
s390_cc_modes_compatible(machine_mode m1,machine_mode m2)1330 s390_cc_modes_compatible (machine_mode m1, machine_mode m2)
1331 {
1332   if (m1 == m2)
1333     return m1;
1334 
1335   switch (m1)
1336     {
1337     case E_CCZmode:
1338       if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode
1339 	  || m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode)
1340 	return m2;
1341       return VOIDmode;
1342 
1343     case E_CCSmode:
1344     case E_CCUmode:
1345     case E_CCTmode:
1346     case E_CCSRmode:
1347     case E_CCURmode:
1348     case E_CCZ1mode:
1349       if (m2 == CCZmode)
1350 	return m1;
1351 
1352       return VOIDmode;
1353 
1354     default:
1355       return VOIDmode;
1356     }
1357   return VOIDmode;
1358 }
1359 
1360 /* Return true if SET either doesn't set the CC register, or else
1361    the source and destination have matching CC modes and that
1362    CC mode is at least as constrained as REQ_MODE.  */
1363 
1364 static bool
s390_match_ccmode_set(rtx set,machine_mode req_mode)1365 s390_match_ccmode_set (rtx set, machine_mode req_mode)
1366 {
1367   machine_mode set_mode;
1368 
1369   gcc_assert (GET_CODE (set) == SET);
1370 
1371   /* These modes are supposed to be used only in CC consumer
1372      patterns.  */
1373   gcc_assert (req_mode != CCVIALLmode && req_mode != CCVIANYmode
1374 	      && req_mode != CCVFALLmode && req_mode != CCVFANYmode);
1375 
1376   if (GET_CODE (SET_DEST (set)) != REG || !CC_REGNO_P (REGNO (SET_DEST (set))))
1377     return 1;
1378 
1379   set_mode = GET_MODE (SET_DEST (set));
1380   switch (set_mode)
1381     {
1382     case E_CCZ1mode:
1383     case E_CCSmode:
1384     case E_CCSRmode:
1385     case E_CCUmode:
1386     case E_CCURmode:
1387     case E_CCLmode:
1388     case E_CCL1mode:
1389     case E_CCL2mode:
1390     case E_CCL3mode:
1391     case E_CCT1mode:
1392     case E_CCT2mode:
1393     case E_CCT3mode:
1394     case E_CCVEQmode:
1395     case E_CCVIHmode:
1396     case E_CCVIHUmode:
1397     case E_CCVFHmode:
1398     case E_CCVFHEmode:
1399       if (req_mode != set_mode)
1400 	return 0;
1401       break;
1402 
1403     case E_CCZmode:
1404       if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode
1405 	  && req_mode != CCSRmode && req_mode != CCURmode
1406 	  && req_mode != CCZ1mode)
1407 	return 0;
1408       break;
1409 
1410     case E_CCAPmode:
1411     case E_CCANmode:
1412       if (req_mode != CCAmode)
1413 	return 0;
1414       break;
1415 
1416     default:
1417       gcc_unreachable ();
1418     }
1419 
1420   return (GET_MODE (SET_SRC (set)) == set_mode);
1421 }
1422 
1423 /* Return true if every SET in INSN that sets the CC register
1424    has source and destination with matching CC modes and that
1425    CC mode is at least as constrained as REQ_MODE.
1426    If REQ_MODE is VOIDmode, always return false.  */
1427 
1428 bool
s390_match_ccmode(rtx_insn * insn,machine_mode req_mode)1429 s390_match_ccmode (rtx_insn *insn, machine_mode req_mode)
1430 {
1431   int i;
1432 
1433   /* s390_tm_ccmode returns VOIDmode to indicate failure.  */
1434   if (req_mode == VOIDmode)
1435     return false;
1436 
1437   if (GET_CODE (PATTERN (insn)) == SET)
1438     return s390_match_ccmode_set (PATTERN (insn), req_mode);
1439 
1440   if (GET_CODE (PATTERN (insn)) == PARALLEL)
1441       for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1442 	{
1443 	  rtx set = XVECEXP (PATTERN (insn), 0, i);
1444 	  if (GET_CODE (set) == SET)
1445 	    if (!s390_match_ccmode_set (set, req_mode))
1446 	      return false;
1447 	}
1448 
1449   return true;
1450 }
1451 
1452 /* If a test-under-mask instruction can be used to implement
1453    (compare (and ... OP1) OP2), return the CC mode required
1454    to do that.  Otherwise, return VOIDmode.
1455    MIXED is true if the instruction can distinguish between
1456    CC1 and CC2 for mixed selected bits (TMxx), it is false
1457    if the instruction cannot (TM).  */
1458 
1459 machine_mode
s390_tm_ccmode(rtx op1,rtx op2,bool mixed)1460 s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
1461 {
1462   int bit0, bit1;
1463 
1464   /* ??? Fixme: should work on CONST_WIDE_INT as well.  */
1465   if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT)
1466     return VOIDmode;
1467 
1468   /* Selected bits all zero: CC0.
1469      e.g.: int a; if ((a & (16 + 128)) == 0) */
1470   if (INTVAL (op2) == 0)
1471     return CCTmode;
1472 
1473   /* Selected bits all one: CC3.
1474      e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
1475   if (INTVAL (op2) == INTVAL (op1))
1476     return CCT3mode;
1477 
1478   /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
1479      int a;
1480      if ((a & (16 + 128)) == 16)         -> CCT1
1481      if ((a & (16 + 128)) == 128)        -> CCT2  */
1482   if (mixed)
1483     {
1484       bit1 = exact_log2 (INTVAL (op2));
1485       bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2));
1486       if (bit0 != -1 && bit1 != -1)
1487 	return bit0 > bit1 ? CCT1mode : CCT2mode;
1488     }
1489 
1490   return VOIDmode;
1491 }
1492 
1493 /* Given a comparison code OP (EQ, NE, etc.) and the operands
1494    OP0 and OP1 of a COMPARE, return the mode to be used for the
1495    comparison.  */
1496 
1497 machine_mode
s390_select_ccmode(enum rtx_code code,rtx op0,rtx op1)1498 s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
1499 {
1500   switch (code)
1501     {
1502       case EQ:
1503       case NE:
1504 	if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1505 	    && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1506 	  return CCAPmode;
1507 	if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1508 	    && CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))))
1509 	  return CCAPmode;
1510 	if ((GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS
1511 	     || GET_CODE (op1) == NEG)
1512 	    && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1513 	  return CCLmode;
1514 
1515 	if (GET_CODE (op0) == AND)
1516 	  {
1517 	    /* Check whether we can potentially do it via TM.  */
1518 	    machine_mode ccmode;
1519 	    ccmode = s390_tm_ccmode (XEXP (op0, 1), op1, 1);
1520 	    if (ccmode != VOIDmode)
1521 	      {
1522 		/* Relax CCTmode to CCZmode to allow fall-back to AND
1523 		   if that turns out to be beneficial.  */
1524 		return ccmode == CCTmode ? CCZmode : ccmode;
1525 	      }
1526 	  }
1527 
1528 	if (register_operand (op0, HImode)
1529 	    && GET_CODE (op1) == CONST_INT
1530 	    && (INTVAL (op1) == -1 || INTVAL (op1) == 65535))
1531 	  return CCT3mode;
1532 	if (register_operand (op0, QImode)
1533 	    && GET_CODE (op1) == CONST_INT
1534 	    && (INTVAL (op1) == -1 || INTVAL (op1) == 255))
1535 	  return CCT3mode;
1536 
1537 	return CCZmode;
1538 
1539       case LE:
1540       case LT:
1541       case GE:
1542       case GT:
1543 	/* The only overflow condition of NEG and ABS happens when
1544 	   -INT_MAX is used as parameter, which stays negative. So
1545 	   we have an overflow from a positive value to a negative.
1546 	   Using CCAP mode the resulting cc can be used for comparisons.  */
1547 	if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1548 	    && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1549 	  return CCAPmode;
1550 
1551 	/* If constants are involved in an add instruction it is possible to use
1552 	   the resulting cc for comparisons with zero. Knowing the sign of the
1553 	   constant the overflow behavior gets predictable. e.g.:
1554 	     int a, b; if ((b = a + c) > 0)
1555 	   with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP  */
1556 	if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1557 	    && (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1)))
1558 		|| (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os")
1559 		    /* Avoid INT32_MIN on 32 bit.  */
1560 		    && (!TARGET_ZARCH || INTVAL (XEXP (op0, 1)) != -0x7fffffff - 1))))
1561 	  {
1562 	    if (INTVAL (XEXP((op0), 1)) < 0)
1563 	      return CCANmode;
1564 	    else
1565 	      return CCAPmode;
1566 	  }
1567 	/* Fall through.  */
1568       case UNORDERED:
1569       case ORDERED:
1570       case UNEQ:
1571       case UNLE:
1572       case UNLT:
1573       case UNGE:
1574       case UNGT:
1575       case LTGT:
1576 	if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1577 	    && GET_CODE (op1) != CONST_INT)
1578 	  return CCSRmode;
1579 	return CCSmode;
1580 
1581       case LTU:
1582       case GEU:
1583 	if (GET_CODE (op0) == PLUS
1584 	    && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1585 	  return CCL1mode;
1586 
1587 	if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1588 	    && GET_CODE (op1) != CONST_INT)
1589 	  return CCURmode;
1590 	return CCUmode;
1591 
1592       case LEU:
1593       case GTU:
1594 	if (GET_CODE (op0) == MINUS
1595 	    && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1596 	  return CCL2mode;
1597 
1598 	if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1599 	    && GET_CODE (op1) != CONST_INT)
1600 	  return CCURmode;
1601 	return CCUmode;
1602 
1603       default:
1604 	gcc_unreachable ();
1605     }
1606 }
1607 
1608 /* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
1609    that we can implement more efficiently.  */
1610 
1611 static void
s390_canonicalize_comparison(int * code,rtx * op0,rtx * op1,bool op0_preserve_value)1612 s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1613 			      bool op0_preserve_value)
1614 {
1615   if (op0_preserve_value)
1616     return;
1617 
1618   /* Convert ZERO_EXTRACT back to AND to enable TM patterns.  */
1619   if ((*code == EQ || *code == NE)
1620       && *op1 == const0_rtx
1621       && GET_CODE (*op0) == ZERO_EXTRACT
1622       && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1623       && GET_CODE (XEXP (*op0, 2)) == CONST_INT
1624       && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1625     {
1626       rtx inner = XEXP (*op0, 0);
1627       HOST_WIDE_INT modesize = GET_MODE_BITSIZE (GET_MODE (inner));
1628       HOST_WIDE_INT len = INTVAL (XEXP (*op0, 1));
1629       HOST_WIDE_INT pos = INTVAL (XEXP (*op0, 2));
1630 
1631       if (len > 0 && len < modesize
1632 	  && pos >= 0 && pos + len <= modesize
1633 	  && modesize <= HOST_BITS_PER_WIDE_INT)
1634 	{
1635 	  unsigned HOST_WIDE_INT block;
1636 	  block = (HOST_WIDE_INT_1U << len) - 1;
1637 	  block <<= modesize - pos - len;
1638 
1639 	  *op0 = gen_rtx_AND (GET_MODE (inner), inner,
1640 			      gen_int_mode (block, GET_MODE (inner)));
1641 	}
1642     }
1643 
1644   /* Narrow AND of memory against immediate to enable TM.  */
1645   if ((*code == EQ || *code == NE)
1646       && *op1 == const0_rtx
1647       && GET_CODE (*op0) == AND
1648       && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1649       && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1650     {
1651       rtx inner = XEXP (*op0, 0);
1652       rtx mask = XEXP (*op0, 1);
1653 
1654       /* Ignore paradoxical SUBREGs if all extra bits are masked out.  */
1655       if (GET_CODE (inner) == SUBREG
1656 	  && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner)))
1657 	  && (GET_MODE_SIZE (GET_MODE (inner))
1658 	      >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1659 	  && ((INTVAL (mask)
1660 	       & GET_MODE_MASK (GET_MODE (inner))
1661 	       & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner))))
1662 	      == 0))
1663 	inner = SUBREG_REG (inner);
1664 
1665       /* Do not change volatile MEMs.  */
1666       if (MEM_P (inner) && !MEM_VOLATILE_P (inner))
1667 	{
1668 	  int part = s390_single_part (XEXP (*op0, 1),
1669 				       GET_MODE (inner), QImode, 0);
1670 	  if (part >= 0)
1671 	    {
1672 	      mask = gen_int_mode (s390_extract_part (mask, QImode, 0), QImode);
1673 	      inner = adjust_address_nv (inner, QImode, part);
1674 	      *op0 = gen_rtx_AND (QImode, inner, mask);
1675 	    }
1676 	}
1677     }
1678 
1679   /* Narrow comparisons against 0xffff to HImode if possible.  */
1680   if ((*code == EQ || *code == NE)
1681       && GET_CODE (*op1) == CONST_INT
1682       && INTVAL (*op1) == 0xffff
1683       && SCALAR_INT_MODE_P (GET_MODE (*op0))
1684       && (nonzero_bits (*op0, GET_MODE (*op0))
1685 	  & ~HOST_WIDE_INT_UC (0xffff)) == 0)
1686     {
1687       *op0 = gen_lowpart (HImode, *op0);
1688       *op1 = constm1_rtx;
1689     }
1690 
1691   /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible.  */
1692   if (GET_CODE (*op0) == UNSPEC
1693       && XINT (*op0, 1) == UNSPEC_STRCMPCC_TO_INT
1694       && XVECLEN (*op0, 0) == 1
1695       && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode
1696       && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1697       && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1698       && *op1 == const0_rtx)
1699     {
1700       enum rtx_code new_code = UNKNOWN;
1701       switch (*code)
1702 	{
1703 	  case EQ: new_code = EQ;  break;
1704 	  case NE: new_code = NE;  break;
1705 	  case LT: new_code = GTU; break;
1706 	  case GT: new_code = LTU; break;
1707 	  case LE: new_code = GEU; break;
1708 	  case GE: new_code = LEU; break;
1709 	  default: break;
1710 	}
1711 
1712       if (new_code != UNKNOWN)
1713 	{
1714 	  *op0 = XVECEXP (*op0, 0, 0);
1715 	  *code = new_code;
1716 	}
1717     }
1718 
1719   /* Remove redundant UNSPEC_CC_TO_INT conversions if possible.  */
1720   if (GET_CODE (*op0) == UNSPEC
1721       && XINT (*op0, 1) == UNSPEC_CC_TO_INT
1722       && XVECLEN (*op0, 0) == 1
1723       && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1724       && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1725       && CONST_INT_P (*op1))
1726     {
1727       enum rtx_code new_code = UNKNOWN;
1728       switch (GET_MODE (XVECEXP (*op0, 0, 0)))
1729 	{
1730 	case E_CCZmode:
1731 	case E_CCRAWmode:
1732 	  switch (*code)
1733 	    {
1734 	    case EQ: new_code = EQ;  break;
1735 	    case NE: new_code = NE;  break;
1736 	    default: break;
1737 	    }
1738 	  break;
1739 	default: break;
1740 	}
1741 
1742       if (new_code != UNKNOWN)
1743 	{
1744 	  /* For CCRAWmode put the required cc mask into the second
1745 	     operand.  */
1746 	if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode
1747 	    && INTVAL (*op1) >= 0 && INTVAL (*op1) <= 3)
1748 	    *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1)));
1749 	  *op0 = XVECEXP (*op0, 0, 0);
1750 	  *code = new_code;
1751 	}
1752     }
1753 
1754   /* Simplify cascaded EQ, NE with const0_rtx.  */
1755   if ((*code == NE || *code == EQ)
1756       && (GET_CODE (*op0) == EQ || GET_CODE (*op0) == NE)
1757       && GET_MODE (*op0) == SImode
1758       && GET_MODE (XEXP (*op0, 0)) == CCZ1mode
1759       && REG_P (XEXP (*op0, 0))
1760       && XEXP (*op0, 1) == const0_rtx
1761       && *op1 == const0_rtx)
1762     {
1763       if ((*code == EQ && GET_CODE (*op0) == NE)
1764 	  || (*code == NE && GET_CODE (*op0) == EQ))
1765 	*code = EQ;
1766       else
1767 	*code = NE;
1768       *op0 = XEXP (*op0, 0);
1769     }
1770 
1771   /* Prefer register over memory as first operand.  */
1772   if (MEM_P (*op0) && REG_P (*op1))
1773     {
1774       rtx tem = *op0; *op0 = *op1; *op1 = tem;
1775       *code = (int)swap_condition ((enum rtx_code)*code);
1776     }
1777 
1778   /* A comparison result is compared against zero.  Replace it with
1779      the (perhaps inverted) original comparison.
1780      This probably should be done by simplify_relational_operation.  */
1781   if ((*code == EQ || *code == NE)
1782       && *op1 == const0_rtx
1783       && COMPARISON_P (*op0)
1784       && CC_REG_P (XEXP (*op0, 0)))
1785     {
1786       enum rtx_code new_code;
1787 
1788       if (*code == EQ)
1789 	new_code = reversed_comparison_code_parts (GET_CODE (*op0),
1790 						   XEXP (*op0, 0),
1791 						   XEXP (*op0, 1), NULL);
1792       else
1793 	new_code = GET_CODE (*op0);
1794 
1795       if (new_code != UNKNOWN)
1796 	{
1797 	  *code = new_code;
1798 	  *op1 = XEXP (*op0, 1);
1799 	  *op0 = XEXP (*op0, 0);
1800 	}
1801     }
1802 
1803   /* ~a==b -> ~(a^b)==0   ~a!=b -> ~(a^b)!=0 */
1804   if (TARGET_Z15
1805       && (*code == EQ || *code == NE)
1806       && (GET_MODE (*op0) == DImode || GET_MODE (*op0) == SImode)
1807       && GET_CODE (*op0) == NOT)
1808     {
1809       machine_mode mode = GET_MODE (*op0);
1810       *op0 = gen_rtx_XOR (mode, XEXP (*op0, 0), *op1);
1811       *op0 = gen_rtx_NOT (mode, *op0);
1812       *op1 = const0_rtx;
1813     }
1814 
1815   /* a&b == -1 -> ~a|~b == 0    a|b == -1 -> ~a&~b == 0  */
1816   if (TARGET_Z15
1817       && (*code == EQ || *code == NE)
1818       && (GET_CODE (*op0) == AND || GET_CODE (*op0) == IOR)
1819       && (GET_MODE (*op0) == DImode || GET_MODE (*op0) == SImode)
1820       && CONST_INT_P (*op1)
1821       && *op1 == constm1_rtx)
1822     {
1823       machine_mode mode = GET_MODE (*op0);
1824       rtx op00 = gen_rtx_NOT (mode, XEXP (*op0, 0));
1825       rtx op01 = gen_rtx_NOT (mode, XEXP (*op0, 1));
1826 
1827       if (GET_CODE (*op0) == AND)
1828 	*op0 = gen_rtx_IOR (mode, op00, op01);
1829       else
1830 	*op0 = gen_rtx_AND (mode, op00, op01);
1831 
1832       *op1 = const0_rtx;
1833     }
1834 }
1835 
1836 
1837 /* Emit a compare instruction suitable to implement the comparison
1838    OP0 CODE OP1.  Return the correct condition RTL to be placed in
1839    the IF_THEN_ELSE of the conditional branch testing the result.  */
1840 
1841 rtx
s390_emit_compare(enum rtx_code code,rtx op0,rtx op1)1842 s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
1843 {
1844   machine_mode mode = s390_select_ccmode (code, op0, op1);
1845   rtx cc;
1846 
1847   if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
1848     {
1849       /* Do not output a redundant compare instruction if a
1850 	 compare_and_swap pattern already computed the result and the
1851 	 machine modes are compatible.  */
1852       gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
1853 		  == GET_MODE (op0));
1854       cc = op0;
1855     }
1856   else
1857     {
1858       cc = gen_rtx_REG (mode, CC_REGNUM);
1859       emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, op0, op1)));
1860     }
1861 
1862   return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
1863 }
1864 
1865 /* If MEM is not a legitimate compare-and-swap memory operand, return a new
1866    MEM, whose address is a pseudo containing the original MEM's address.  */
1867 
1868 static rtx
s390_legitimize_cs_operand(rtx mem)1869 s390_legitimize_cs_operand (rtx mem)
1870 {
1871   rtx tmp;
1872 
1873   if (!contains_symbol_ref_p (mem))
1874     return mem;
1875   tmp = gen_reg_rtx (Pmode);
1876   emit_move_insn (tmp, copy_rtx (XEXP (mem, 0)));
1877   return change_address (mem, VOIDmode, tmp);
1878 }
1879 
1880 /* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
1881    matches CMP.
1882    Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
1883    conditional branch testing the result.  */
1884 
1885 static rtx
s390_emit_compare_and_swap(enum rtx_code code,rtx old,rtx mem,rtx cmp,rtx new_rtx,machine_mode ccmode)1886 s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
1887 			    rtx cmp, rtx new_rtx, machine_mode ccmode)
1888 {
1889   rtx cc;
1890 
1891   mem = s390_legitimize_cs_operand (mem);
1892   cc = gen_rtx_REG (ccmode, CC_REGNUM);
1893   switch (GET_MODE (mem))
1894     {
1895     case E_SImode:
1896       emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp,
1897 							 new_rtx, cc));
1898       break;
1899     case E_DImode:
1900       emit_insn (gen_atomic_compare_and_swapdi_internal (old, mem, cmp,
1901 							 new_rtx, cc));
1902       break;
1903     case E_TImode:
1904 	emit_insn (gen_atomic_compare_and_swapti_internal (old, mem, cmp,
1905 							   new_rtx, cc));
1906       break;
1907     case E_QImode:
1908     case E_HImode:
1909     default:
1910       gcc_unreachable ();
1911     }
1912   return s390_emit_compare (code, cc, const0_rtx);
1913 }
1914 
1915 /* Emit a jump instruction to TARGET and return it.  If COND is
1916    NULL_RTX, emit an unconditional jump, else a conditional jump under
1917    condition COND.  */
1918 
1919 rtx_insn *
s390_emit_jump(rtx target,rtx cond)1920 s390_emit_jump (rtx target, rtx cond)
1921 {
1922   rtx insn;
1923 
1924   target = gen_rtx_LABEL_REF (VOIDmode, target);
1925   if (cond)
1926     target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
1927 
1928   insn = gen_rtx_SET (pc_rtx, target);
1929   return emit_jump_insn (insn);
1930 }
1931 
1932 /* Return branch condition mask to implement a branch
1933    specified by CODE.  Return -1 for invalid comparisons.  */
1934 
1935 int
s390_branch_condition_mask(rtx code)1936 s390_branch_condition_mask (rtx code)
1937 {
1938   const int CC0 = 1 << 3;
1939   const int CC1 = 1 << 2;
1940   const int CC2 = 1 << 1;
1941   const int CC3 = 1 << 0;
1942 
1943   gcc_assert (GET_CODE (XEXP (code, 0)) == REG);
1944   gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM);
1945   gcc_assert (XEXP (code, 1) == const0_rtx
1946 	      || (GET_MODE (XEXP (code, 0)) == CCRAWmode
1947 		  && CONST_INT_P (XEXP (code, 1))));
1948 
1949 
1950   switch (GET_MODE (XEXP (code, 0)))
1951     {
1952     case E_CCZmode:
1953     case E_CCZ1mode:
1954       switch (GET_CODE (code))
1955 	{
1956 	case EQ:	return CC0;
1957 	case NE:	return CC1 | CC2 | CC3;
1958 	default:	return -1;
1959 	}
1960       break;
1961 
1962     case E_CCT1mode:
1963       switch (GET_CODE (code))
1964 	{
1965 	case EQ:	return CC1;
1966 	case NE:	return CC0 | CC2 | CC3;
1967 	default:	return -1;
1968 	}
1969       break;
1970 
1971     case E_CCT2mode:
1972       switch (GET_CODE (code))
1973 	{
1974 	case EQ:	return CC2;
1975 	case NE:	return CC0 | CC1 | CC3;
1976 	default:	return -1;
1977 	}
1978       break;
1979 
1980     case E_CCT3mode:
1981       switch (GET_CODE (code))
1982 	{
1983 	case EQ:	return CC3;
1984 	case NE:	return CC0 | CC1 | CC2;
1985 	default:	return -1;
1986 	}
1987       break;
1988 
1989     case E_CCLmode:
1990       switch (GET_CODE (code))
1991 	{
1992 	case EQ:	return CC0 | CC2;
1993 	case NE:	return CC1 | CC3;
1994 	default:	return -1;
1995 	}
1996       break;
1997 
1998     case E_CCL1mode:
1999       switch (GET_CODE (code))
2000 	{
2001 	case LTU:	return CC2 | CC3;  /* carry */
2002 	case GEU:	return CC0 | CC1;  /* no carry */
2003 	default:	return -1;
2004 	}
2005       break;
2006 
2007     case E_CCL2mode:
2008       switch (GET_CODE (code))
2009 	{
2010 	case GTU:	return CC0 | CC1;  /* borrow */
2011 	case LEU:	return CC2 | CC3;  /* no borrow */
2012 	default:	return -1;
2013 	}
2014       break;
2015 
2016     case E_CCL3mode:
2017       switch (GET_CODE (code))
2018 	{
2019 	case EQ:	return CC0 | CC2;
2020 	case NE:	return CC1 | CC3;
2021 	case LTU:	return CC1;
2022 	case GTU:	return CC3;
2023 	case LEU:	return CC1 | CC2;
2024 	case GEU:	return CC2 | CC3;
2025 	default:	return -1;
2026 	}
2027 
2028     case E_CCUmode:
2029       switch (GET_CODE (code))
2030 	{
2031 	case EQ:	return CC0;
2032 	case NE:	return CC1 | CC2 | CC3;
2033 	case LTU:	return CC1;
2034 	case GTU:	return CC2;
2035 	case LEU:	return CC0 | CC1;
2036 	case GEU:	return CC0 | CC2;
2037 	default:	return -1;
2038 	}
2039       break;
2040 
2041     case E_CCURmode:
2042       switch (GET_CODE (code))
2043 	{
2044 	case EQ:	return CC0;
2045 	case NE:	return CC2 | CC1 | CC3;
2046 	case LTU:	return CC2;
2047 	case GTU:	return CC1;
2048 	case LEU:	return CC0 | CC2;
2049 	case GEU:	return CC0 | CC1;
2050 	default:	return -1;
2051 	}
2052       break;
2053 
2054     case E_CCAPmode:
2055       switch (GET_CODE (code))
2056 	{
2057 	case EQ:	return CC0;
2058 	case NE:	return CC1 | CC2 | CC3;
2059 	case LT:	return CC1 | CC3;
2060 	case GT:	return CC2;
2061 	case LE:	return CC0 | CC1 | CC3;
2062 	case GE:	return CC0 | CC2;
2063 	default:	return -1;
2064 	}
2065       break;
2066 
2067     case E_CCANmode:
2068       switch (GET_CODE (code))
2069 	{
2070 	case EQ:	return CC0;
2071 	case NE:	return CC1 | CC2 | CC3;
2072 	case LT:	return CC1;
2073 	case GT:	return CC2 | CC3;
2074 	case LE:	return CC0 | CC1;
2075 	case GE:	return CC0 | CC2 | CC3;
2076 	default:	return -1;
2077 	}
2078       break;
2079 
2080     case E_CCSmode:
2081       switch (GET_CODE (code))
2082 	{
2083 	case EQ:	return CC0;
2084 	case NE:	return CC1 | CC2 | CC3;
2085 	case LT:	return CC1;
2086 	case GT:	return CC2;
2087 	case LE:	return CC0 | CC1;
2088 	case GE:	return CC0 | CC2;
2089 	case UNORDERED:	return CC3;
2090 	case ORDERED:	return CC0 | CC1 | CC2;
2091 	case UNEQ:	return CC0 | CC3;
2092 	case UNLT:	return CC1 | CC3;
2093 	case UNGT:	return CC2 | CC3;
2094 	case UNLE:	return CC0 | CC1 | CC3;
2095 	case UNGE:	return CC0 | CC2 | CC3;
2096 	case LTGT:	return CC1 | CC2;
2097 	default:	return -1;
2098 	}
2099       break;
2100 
2101     case E_CCSRmode:
2102       switch (GET_CODE (code))
2103 	{
2104 	case EQ:	return CC0;
2105 	case NE:	return CC2 | CC1 | CC3;
2106 	case LT:	return CC2;
2107 	case GT:	return CC1;
2108 	case LE:	return CC0 | CC2;
2109 	case GE:	return CC0 | CC1;
2110 	case UNORDERED:	return CC3;
2111 	case ORDERED:	return CC0 | CC2 | CC1;
2112 	case UNEQ:	return CC0 | CC3;
2113 	case UNLT:	return CC2 | CC3;
2114 	case UNGT:	return CC1 | CC3;
2115 	case UNLE:	return CC0 | CC2 | CC3;
2116 	case UNGE:	return CC0 | CC1 | CC3;
2117 	case LTGT:	return CC2 | CC1;
2118 	default:	return -1;
2119 	}
2120       break;
2121 
2122       /* Vector comparison modes.  */
2123       /* CC2 will never be set.  It however is part of the negated
2124 	 masks.  */
2125     case E_CCVIALLmode:
2126       switch (GET_CODE (code))
2127 	{
2128 	case EQ:
2129 	case GTU:
2130 	case GT:
2131 	case GE:        return CC0;
2132 	  /* The inverted modes are in fact *any* modes.  */
2133 	case NE:
2134 	case LEU:
2135 	case LE:
2136 	case LT:        return CC3 | CC1 | CC2;
2137 	default:        return -1;
2138 	}
2139 
2140     case E_CCVIANYmode:
2141       switch (GET_CODE (code))
2142 	{
2143 	case EQ:
2144 	case GTU:
2145 	case GT:
2146 	case GE:        return CC0 | CC1;
2147 	  /* The inverted modes are in fact *all* modes.  */
2148 	case NE:
2149 	case LEU:
2150 	case LE:
2151 	case LT:        return CC3 | CC2;
2152 	default:        return -1;
2153 	}
2154     case E_CCVFALLmode:
2155       switch (GET_CODE (code))
2156 	{
2157 	case EQ:
2158 	case GT:
2159 	case GE:        return CC0;
2160 	  /* The inverted modes are in fact *any* modes.  */
2161 	case NE:
2162 	case UNLE:
2163 	case UNLT:      return CC3 | CC1 | CC2;
2164 	default:        return -1;
2165 	}
2166 
2167     case E_CCVFANYmode:
2168       switch (GET_CODE (code))
2169 	{
2170 	case EQ:
2171 	case GT:
2172 	case GE:        return CC0 | CC1;
2173 	  /* The inverted modes are in fact *all* modes.  */
2174 	case NE:
2175 	case UNLE:
2176 	case UNLT:      return CC3 | CC2;
2177 	default:        return -1;
2178 	}
2179 
2180     case E_CCRAWmode:
2181       switch (GET_CODE (code))
2182 	{
2183 	case EQ:
2184 	  return INTVAL (XEXP (code, 1));
2185 	case NE:
2186 	  return (INTVAL (XEXP (code, 1))) ^ 0xf;
2187 	default:
2188 	  gcc_unreachable ();
2189 	}
2190 
2191     default:
2192       return -1;
2193     }
2194 }
2195 
2196 
2197 /* Return branch condition mask to implement a compare and branch
2198    specified by CODE.  Return -1 for invalid comparisons.  */
2199 
2200 int
s390_compare_and_branch_condition_mask(rtx code)2201 s390_compare_and_branch_condition_mask (rtx code)
2202 {
2203   const int CC0 = 1 << 3;
2204   const int CC1 = 1 << 2;
2205   const int CC2 = 1 << 1;
2206 
2207   switch (GET_CODE (code))
2208     {
2209     case EQ:
2210       return CC0;
2211     case NE:
2212       return CC1 | CC2;
2213     case LT:
2214     case LTU:
2215       return CC1;
2216     case GT:
2217     case GTU:
2218       return CC2;
2219     case LE:
2220     case LEU:
2221       return CC0 | CC1;
2222     case GE:
2223     case GEU:
2224       return CC0 | CC2;
2225     default:
2226       gcc_unreachable ();
2227     }
2228   return -1;
2229 }
2230 
2231 /* If INV is false, return assembler mnemonic string to implement
2232    a branch specified by CODE.  If INV is true, return mnemonic
2233    for the corresponding inverted branch.  */
2234 
2235 static const char *
s390_branch_condition_mnemonic(rtx code,int inv)2236 s390_branch_condition_mnemonic (rtx code, int inv)
2237 {
2238   int mask;
2239 
2240   static const char *const mnemonic[16] =
2241     {
2242       NULL, "o", "h", "nle",
2243       "l", "nhe", "lh", "ne",
2244       "e", "nlh", "he", "nl",
2245       "le", "nh", "no", NULL
2246     };
2247 
2248   if (GET_CODE (XEXP (code, 0)) == REG
2249       && REGNO (XEXP (code, 0)) == CC_REGNUM
2250       && (XEXP (code, 1) == const0_rtx
2251 	  || (GET_MODE (XEXP (code, 0)) == CCRAWmode
2252 	      && CONST_INT_P (XEXP (code, 1)))))
2253     mask = s390_branch_condition_mask (code);
2254   else
2255     mask = s390_compare_and_branch_condition_mask (code);
2256 
2257   gcc_assert (mask >= 0);
2258 
2259   if (inv)
2260     mask ^= 15;
2261 
2262   gcc_assert (mask >= 1 && mask <= 14);
2263 
2264   return mnemonic[mask];
2265 }
2266 
2267 /* Return the part of op which has a value different from def.
2268    The size of the part is determined by mode.
2269    Use this function only if you already know that op really
2270    contains such a part.  */
2271 
2272 unsigned HOST_WIDE_INT
s390_extract_part(rtx op,machine_mode mode,int def)2273 s390_extract_part (rtx op, machine_mode mode, int def)
2274 {
2275   unsigned HOST_WIDE_INT value = 0;
2276   int max_parts = HOST_BITS_PER_WIDE_INT / GET_MODE_BITSIZE (mode);
2277   int part_bits = GET_MODE_BITSIZE (mode);
2278   unsigned HOST_WIDE_INT part_mask = (HOST_WIDE_INT_1U << part_bits) - 1;
2279   int i;
2280 
2281   for (i = 0; i < max_parts; i++)
2282     {
2283       if (i == 0)
2284 	value = UINTVAL (op);
2285       else
2286 	value >>= part_bits;
2287 
2288       if ((value & part_mask) != (def & part_mask))
2289 	return value & part_mask;
2290     }
2291 
2292   gcc_unreachable ();
2293 }
2294 
2295 /* If OP is an integer constant of mode MODE with exactly one
2296    part of mode PART_MODE unequal to DEF, return the number of that
2297    part. Otherwise, return -1.  */
2298 
2299 int
s390_single_part(rtx op,machine_mode mode,machine_mode part_mode,int def)2300 s390_single_part (rtx op,
2301 		  machine_mode mode,
2302 		  machine_mode part_mode,
2303 		  int def)
2304 {
2305   unsigned HOST_WIDE_INT value = 0;
2306   int n_parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (part_mode);
2307   unsigned HOST_WIDE_INT part_mask
2308     = (HOST_WIDE_INT_1U << GET_MODE_BITSIZE (part_mode)) - 1;
2309   int i, part = -1;
2310 
2311   if (GET_CODE (op) != CONST_INT)
2312     return -1;
2313 
2314   for (i = 0; i < n_parts; i++)
2315     {
2316       if (i == 0)
2317 	value = UINTVAL (op);
2318       else
2319 	value >>= GET_MODE_BITSIZE (part_mode);
2320 
2321       if ((value & part_mask) != (def & part_mask))
2322 	{
2323 	  if (part != -1)
2324 	    return -1;
2325 	  else
2326 	    part = i;
2327 	}
2328     }
2329   return part == -1 ? -1 : n_parts - 1 - part;
2330 }
2331 
2332 /* Return true if IN contains a contiguous bitfield in the lower SIZE
2333    bits and no other bits are set in (the lower SIZE bits of) IN.
2334 
2335    PSTART and PEND can be used to obtain the start and end
2336    position (inclusive) of the bitfield relative to 64
2337    bits. *PSTART / *PEND gives the position of the first/last bit
2338    of the bitfield counting from the highest order bit starting
2339    with zero.  */
2340 
2341 bool
s390_contiguous_bitmask_nowrap_p(unsigned HOST_WIDE_INT in,int size,int * pstart,int * pend)2342 s390_contiguous_bitmask_nowrap_p (unsigned HOST_WIDE_INT in, int size,
2343 				  int *pstart, int *pend)
2344 {
2345   int start;
2346   int end = -1;
2347   int lowbit = HOST_BITS_PER_WIDE_INT - 1;
2348   int highbit = HOST_BITS_PER_WIDE_INT - size;
2349   unsigned HOST_WIDE_INT bitmask = HOST_WIDE_INT_1U;
2350 
2351   gcc_assert (!!pstart == !!pend);
2352   for (start = lowbit; start >= highbit; bitmask <<= 1, start--)
2353     if (end == -1)
2354       {
2355 	/* Look for the rightmost bit of a contiguous range of ones.  */
2356 	if (bitmask & in)
2357 	  /* Found it.  */
2358 	  end = start;
2359       }
2360     else
2361       {
2362 	/* Look for the firt zero bit after the range of ones.  */
2363 	if (! (bitmask & in))
2364 	  /* Found it.  */
2365 	  break;
2366       }
2367   /* We're one past the last one-bit.  */
2368   start++;
2369 
2370   if (end == -1)
2371     /* No one bits found.  */
2372     return false;
2373 
2374   if (start > highbit)
2375     {
2376       unsigned HOST_WIDE_INT mask;
2377 
2378       /* Calculate a mask for all bits beyond the contiguous bits.  */
2379       mask = ((~HOST_WIDE_INT_0U >> highbit)
2380 	      & (~HOST_WIDE_INT_0U << (lowbit - start + 1)));
2381       if (mask & in)
2382 	/* There are more bits set beyond the first range of one bits.  */
2383 	return false;
2384     }
2385 
2386   if (pstart)
2387     {
2388       *pstart = start;
2389       *pend = end;
2390     }
2391 
2392   return true;
2393 }
2394 
2395 /* Same as s390_contiguous_bitmask_nowrap_p but also returns true
2396    if ~IN contains a contiguous bitfield.  In that case, *END is <
2397    *START.
2398 
2399    If WRAP_P is true, a bitmask that wraps around is also tested.
2400    When a wraparoud occurs *START is greater than *END (in
2401    non-null pointers), and the uppermost (64 - SIZE) bits are thus
2402    part of the range.  If WRAP_P is false, no wraparound is
2403    tested.  */
2404 
2405 bool
s390_contiguous_bitmask_p(unsigned HOST_WIDE_INT in,bool wrap_p,int size,int * start,int * end)2406 s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, bool wrap_p,
2407 			   int size, int *start, int *end)
2408 {
2409   int bs = HOST_BITS_PER_WIDE_INT;
2410   bool b;
2411 
2412   gcc_assert (!!start == !!end);
2413   if ((in & ((~HOST_WIDE_INT_0U) >> (bs - size))) == 0)
2414     /* This cannot be expressed as a contiguous bitmask.  Exit early because
2415        the second call of s390_contiguous_bitmask_nowrap_p would accept this as
2416        a valid bitmask.  */
2417     return false;
2418   b = s390_contiguous_bitmask_nowrap_p (in, size, start, end);
2419   if (b)
2420     return true;
2421   if (! wrap_p)
2422     return false;
2423   b = s390_contiguous_bitmask_nowrap_p (~in, size, start, end);
2424   if (b && start)
2425     {
2426       int s = *start;
2427       int e = *end;
2428 
2429       gcc_assert (s >= 1);
2430       *start = ((e + 1) & (bs - 1));
2431       *end = ((s - 1 + bs) & (bs - 1));
2432     }
2433 
2434   return b;
2435 }
2436 
2437 /* Return true if OP contains the same contiguous bitfield in *all*
2438    its elements.  START and END can be used to obtain the start and
2439    end position of the bitfield.
2440 
2441    START/STOP give the position of the first/last bit of the bitfield
2442    counting from the lowest order bit starting with zero.  In order to
2443    use these values for S/390 instructions this has to be converted to
2444    "bits big endian" style.  */
2445 
2446 bool
s390_contiguous_bitmask_vector_p(rtx op,int * start,int * end)2447 s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end)
2448 {
2449   unsigned HOST_WIDE_INT mask;
2450   int size;
2451   rtx elt;
2452   bool b;
2453 
2454   gcc_assert (!!start == !!end);
2455   if (!const_vec_duplicate_p (op, &elt)
2456       || !CONST_INT_P (elt))
2457     return false;
2458 
2459   size = GET_MODE_UNIT_BITSIZE (GET_MODE (op));
2460 
2461   /* We cannot deal with V1TI/V1TF. This would require a vgmq.  */
2462   if (size > 64)
2463     return false;
2464 
2465   mask = UINTVAL (elt);
2466 
2467   b = s390_contiguous_bitmask_p (mask, true, size, start, end);
2468   if (b)
2469     {
2470       if (start)
2471 	{
2472 	  *start -= (HOST_BITS_PER_WIDE_INT - size);
2473 	  *end -= (HOST_BITS_PER_WIDE_INT - size);
2474 	}
2475       return true;
2476     }
2477   else
2478     return false;
2479 }
2480 
2481 /* Return true if C consists only of byte chunks being either 0 or
2482    0xff.  If MASK is !=NULL a byte mask is generated which is
2483    appropriate for the vector generate byte mask instruction.  */
2484 
2485 bool
s390_bytemask_vector_p(rtx op,unsigned * mask)2486 s390_bytemask_vector_p (rtx op, unsigned *mask)
2487 {
2488   int i;
2489   unsigned tmp_mask = 0;
2490   int nunit, unit_size;
2491 
2492   if (!VECTOR_MODE_P (GET_MODE (op))
2493       || GET_CODE (op) != CONST_VECTOR
2494       || !CONST_INT_P (XVECEXP (op, 0, 0)))
2495     return false;
2496 
2497   nunit = GET_MODE_NUNITS (GET_MODE (op));
2498   unit_size = GET_MODE_UNIT_SIZE (GET_MODE (op));
2499 
2500   for (i = 0; i < nunit; i++)
2501     {
2502       unsigned HOST_WIDE_INT c;
2503       int j;
2504 
2505       if (!CONST_INT_P (XVECEXP (op, 0, i)))
2506 	return false;
2507 
2508       c = UINTVAL (XVECEXP (op, 0, i));
2509       for (j = 0; j < unit_size; j++)
2510 	{
2511 	  if ((c & 0xff) != 0 && (c & 0xff) != 0xff)
2512 	    return false;
2513 	  tmp_mask |= (c & 1) << ((nunit - 1 - i) * unit_size + j);
2514 	  c = c >> BITS_PER_UNIT;
2515 	}
2516     }
2517 
2518   if (mask != NULL)
2519     *mask = tmp_mask;
2520 
2521   return true;
2522 }
2523 
2524 /* Check whether a rotate of ROTL followed by an AND of CONTIG is
2525    equivalent to a shift followed by the AND.  In particular, CONTIG
2526    should not overlap the (rotated) bit 0/bit 63 gap.  Negative values
2527    for ROTL indicate a rotate to the right.  */
2528 
2529 bool
s390_extzv_shift_ok(int bitsize,int rotl,unsigned HOST_WIDE_INT contig)2530 s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig)
2531 {
2532   int start, end;
2533   bool ok;
2534 
2535   ok = s390_contiguous_bitmask_nowrap_p (contig, bitsize, &start, &end);
2536   gcc_assert (ok);
2537 
2538   if (rotl >= 0)
2539     return (64 - end >= rotl);
2540   else
2541     {
2542       /* Translate "- rotate right" in BITSIZE mode to "rotate left" in
2543 	 DIMode.  */
2544       rotl = -rotl + (64 - bitsize);
2545       return (start >= rotl);
2546     }
2547 }
2548 
2549 /* Check whether we can (and want to) split a double-word
2550    move in mode MODE from SRC to DST into two single-word
2551    moves, moving the subword FIRST_SUBWORD first.  */
2552 
2553 bool
s390_split_ok_p(rtx dst,rtx src,machine_mode mode,int first_subword)2554 s390_split_ok_p (rtx dst, rtx src, machine_mode mode, int first_subword)
2555 {
2556   /* Floating point and vector registers cannot be split.  */
2557   if (FP_REG_P (src) || FP_REG_P (dst) || VECTOR_REG_P (src) || VECTOR_REG_P (dst))
2558     return false;
2559 
2560   /* Non-offsettable memory references cannot be split.  */
2561   if ((GET_CODE (src) == MEM && !offsettable_memref_p (src))
2562       || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst)))
2563     return false;
2564 
2565   /* Moving the first subword must not clobber a register
2566      needed to move the second subword.  */
2567   if (register_operand (dst, mode))
2568     {
2569       rtx subreg = operand_subword (dst, first_subword, 0, mode);
2570       if (reg_overlap_mentioned_p (subreg, src))
2571 	return false;
2572     }
2573 
2574   return true;
2575 }
2576 
2577 /* Return true if it can be proven that [MEM1, MEM1 + SIZE]
2578    and [MEM2, MEM2 + SIZE] do overlap and false
2579    otherwise.  */
2580 
2581 bool
s390_overlap_p(rtx mem1,rtx mem2,HOST_WIDE_INT size)2582 s390_overlap_p (rtx mem1, rtx mem2, HOST_WIDE_INT size)
2583 {
2584   rtx addr1, addr2, addr_delta;
2585   HOST_WIDE_INT delta;
2586 
2587   if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2588     return true;
2589 
2590   if (size == 0)
2591     return false;
2592 
2593   addr1 = XEXP (mem1, 0);
2594   addr2 = XEXP (mem2, 0);
2595 
2596   addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2597 
2598   /* This overlapping check is used by peepholes merging memory block operations.
2599      Overlapping operations would otherwise be recognized by the S/390 hardware
2600      and would fall back to a slower implementation. Allowing overlapping
2601      operations would lead to slow code but not to wrong code. Therefore we are
2602      somewhat optimistic if we cannot prove that the memory blocks are
2603      overlapping.
2604      That's why we return false here although this may accept operations on
2605      overlapping memory areas.  */
2606   if (!addr_delta || GET_CODE (addr_delta) != CONST_INT)
2607     return false;
2608 
2609   delta = INTVAL (addr_delta);
2610 
2611   if (delta == 0
2612       || (delta > 0 && delta < size)
2613       || (delta < 0 && -delta < size))
2614     return true;
2615 
2616   return false;
2617 }
2618 
2619 /* Check whether the address of memory reference MEM2 equals exactly
2620    the address of memory reference MEM1 plus DELTA.  Return true if
2621    we can prove this to be the case, false otherwise.  */
2622 
2623 bool
s390_offset_p(rtx mem1,rtx mem2,rtx delta)2624 s390_offset_p (rtx mem1, rtx mem2, rtx delta)
2625 {
2626   rtx addr1, addr2, addr_delta;
2627 
2628   if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2629     return false;
2630 
2631   addr1 = XEXP (mem1, 0);
2632   addr2 = XEXP (mem2, 0);
2633 
2634   addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2635   if (!addr_delta || !rtx_equal_p (addr_delta, delta))
2636     return false;
2637 
2638   return true;
2639 }
2640 
2641 /* Expand logical operator CODE in mode MODE with operands OPERANDS.  */
2642 
2643 void
s390_expand_logical_operator(enum rtx_code code,machine_mode mode,rtx * operands)2644 s390_expand_logical_operator (enum rtx_code code, machine_mode mode,
2645 			      rtx *operands)
2646 {
2647   machine_mode wmode = mode;
2648   rtx dst = operands[0];
2649   rtx src1 = operands[1];
2650   rtx src2 = operands[2];
2651   rtx op, clob, tem;
2652 
2653   /* If we cannot handle the operation directly, use a temp register.  */
2654   if (!s390_logical_operator_ok_p (operands))
2655     dst = gen_reg_rtx (mode);
2656 
2657   /* QImode and HImode patterns make sense only if we have a destination
2658      in memory.  Otherwise perform the operation in SImode.  */
2659   if ((mode == QImode || mode == HImode) && GET_CODE (dst) != MEM)
2660     wmode = SImode;
2661 
2662   /* Widen operands if required.  */
2663   if (mode != wmode)
2664     {
2665       if (GET_CODE (dst) == SUBREG
2666 	  && (tem = simplify_subreg (wmode, dst, mode, 0)) != 0)
2667 	dst = tem;
2668       else if (REG_P (dst))
2669 	dst = gen_rtx_SUBREG (wmode, dst, 0);
2670       else
2671 	dst = gen_reg_rtx (wmode);
2672 
2673       if (GET_CODE (src1) == SUBREG
2674 	  && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0)
2675 	src1 = tem;
2676       else if (GET_MODE (src1) != VOIDmode)
2677 	src1 = gen_rtx_SUBREG (wmode, force_reg (mode, src1), 0);
2678 
2679       if (GET_CODE (src2) == SUBREG
2680 	  && (tem = simplify_subreg (wmode, src2, mode, 0)) != 0)
2681 	src2 = tem;
2682       else if (GET_MODE (src2) != VOIDmode)
2683 	src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0);
2684     }
2685 
2686   /* Emit the instruction.  */
2687   op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, wmode, src1, src2));
2688   clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
2689   emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
2690 
2691   /* Fix up the destination if needed.  */
2692   if (dst != operands[0])
2693     emit_move_insn (operands[0], gen_lowpart (mode, dst));
2694 }
2695 
2696 /* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR).  */
2697 
2698 bool
s390_logical_operator_ok_p(rtx * operands)2699 s390_logical_operator_ok_p (rtx *operands)
2700 {
2701   /* If the destination operand is in memory, it needs to coincide
2702      with one of the source operands.  After reload, it has to be
2703      the first source operand.  */
2704   if (GET_CODE (operands[0]) == MEM)
2705     return rtx_equal_p (operands[0], operands[1])
2706 	   || (!reload_completed && rtx_equal_p (operands[0], operands[2]));
2707 
2708   return true;
2709 }
2710 
2711 /* Narrow logical operation CODE of memory operand MEMOP with immediate
2712    operand IMMOP to switch from SS to SI type instructions.  */
2713 
2714 void
s390_narrow_logical_operator(enum rtx_code code,rtx * memop,rtx * immop)2715 s390_narrow_logical_operator (enum rtx_code code, rtx *memop, rtx *immop)
2716 {
2717   int def = code == AND ? -1 : 0;
2718   HOST_WIDE_INT mask;
2719   int part;
2720 
2721   gcc_assert (GET_CODE (*memop) == MEM);
2722   gcc_assert (!MEM_VOLATILE_P (*memop));
2723 
2724   mask = s390_extract_part (*immop, QImode, def);
2725   part = s390_single_part (*immop, GET_MODE (*memop), QImode, def);
2726   gcc_assert (part >= 0);
2727 
2728   *memop = adjust_address (*memop, QImode, part);
2729   *immop = gen_int_mode (mask, QImode);
2730 }
2731 
2732 
2733 /* How to allocate a 'struct machine_function'.  */
2734 
2735 static struct machine_function *
s390_init_machine_status(void)2736 s390_init_machine_status (void)
2737 {
2738   return ggc_cleared_alloc<machine_function> ();
2739 }
2740 
2741 /* Map for smallest class containing reg regno.  */
2742 
2743 const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
2744 { GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,  /*  0 */
2745   ADDR_REGS,    ADDR_REGS, ADDR_REGS, ADDR_REGS,  /*  4 */
2746   ADDR_REGS,    ADDR_REGS, ADDR_REGS, ADDR_REGS,  /*  8 */
2747   ADDR_REGS,    ADDR_REGS, ADDR_REGS, ADDR_REGS,  /* 12 */
2748   FP_REGS,      FP_REGS,   FP_REGS,   FP_REGS,    /* 16 */
2749   FP_REGS,      FP_REGS,   FP_REGS,   FP_REGS,    /* 20 */
2750   FP_REGS,      FP_REGS,   FP_REGS,   FP_REGS,    /* 24 */
2751   FP_REGS,      FP_REGS,   FP_REGS,   FP_REGS,    /* 28 */
2752   ADDR_REGS,    CC_REGS,   ADDR_REGS, ADDR_REGS,  /* 32 */
2753   ACCESS_REGS,	ACCESS_REGS, VEC_REGS, VEC_REGS,  /* 36 */
2754   VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS,         /* 40 */
2755   VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS,         /* 44 */
2756   VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS,         /* 48 */
2757   VEC_REGS, VEC_REGS                              /* 52 */
2758 };
2759 
2760 /* Return attribute type of insn.  */
2761 
2762 static enum attr_type
s390_safe_attr_type(rtx_insn * insn)2763 s390_safe_attr_type (rtx_insn *insn)
2764 {
2765   if (recog_memoized (insn) >= 0)
2766     return get_attr_type (insn);
2767   else
2768     return TYPE_NONE;
2769 }
2770 
2771 /* Return attribute relative_long of insn.  */
2772 
2773 static bool
s390_safe_relative_long_p(rtx_insn * insn)2774 s390_safe_relative_long_p (rtx_insn *insn)
2775 {
2776   if (recog_memoized (insn) >= 0)
2777     return get_attr_relative_long (insn) == RELATIVE_LONG_YES;
2778   else
2779     return false;
2780 }
2781 
2782 /* Return true if DISP is a valid short displacement.  */
2783 
2784 static bool
s390_short_displacement(rtx disp)2785 s390_short_displacement (rtx disp)
2786 {
2787   /* No displacement is OK.  */
2788   if (!disp)
2789     return true;
2790 
2791   /* Without the long displacement facility we don't need to
2792      distingiush between long and short displacement.  */
2793   if (!TARGET_LONG_DISPLACEMENT)
2794     return true;
2795 
2796   /* Integer displacement in range.  */
2797   if (GET_CODE (disp) == CONST_INT)
2798     return INTVAL (disp) >= 0 && INTVAL (disp) < 4096;
2799 
2800   /* GOT offset is not OK, the GOT can be large.  */
2801   if (GET_CODE (disp) == CONST
2802       && GET_CODE (XEXP (disp, 0)) == UNSPEC
2803       && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOT
2804 	  || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF))
2805     return false;
2806 
2807   /* All other symbolic constants are literal pool references,
2808      which are OK as the literal pool must be small.  */
2809   if (GET_CODE (disp) == CONST)
2810     return true;
2811 
2812   return false;
2813 }
2814 
2815 /* Attempts to split `ref', which should be UNSPEC_LTREF, into (base + `disp').
2816    If successful, also determines the
2817    following characteristics of `ref': `is_ptr' - whether it can be an
2818    LA argument, `is_base_ptr' - whether the resulting base is a well-known
2819    base register (stack/frame pointer, etc), `is_pool_ptr` - whether it is
2820    considered a literal pool pointer for purposes of avoiding two different
2821    literal pool pointers per insn during or after reload (`B' constraint).  */
2822 static bool
s390_decompose_constant_pool_ref(rtx * ref,rtx * disp,bool * is_ptr,bool * is_base_ptr,bool * is_pool_ptr)2823 s390_decompose_constant_pool_ref (rtx *ref, rtx *disp, bool *is_ptr,
2824 				  bool *is_base_ptr, bool *is_pool_ptr)
2825 {
2826   if (!*ref)
2827     return true;
2828 
2829   if (GET_CODE (*ref) == UNSPEC)
2830     switch (XINT (*ref, 1))
2831       {
2832       case UNSPEC_LTREF:
2833 	if (!*disp)
2834 	  *disp = gen_rtx_UNSPEC (Pmode,
2835 				  gen_rtvec (1, XVECEXP (*ref, 0, 0)),
2836 				  UNSPEC_LTREL_OFFSET);
2837 	else
2838 	  return false;
2839 
2840 	*ref = XVECEXP (*ref, 0, 1);
2841 	break;
2842 
2843       default:
2844 	return false;
2845       }
2846 
2847   if (!REG_P (*ref) || GET_MODE (*ref) != Pmode)
2848     return false;
2849 
2850   if (REGNO (*ref) == STACK_POINTER_REGNUM
2851       || REGNO (*ref) == FRAME_POINTER_REGNUM
2852       || ((reload_completed || reload_in_progress)
2853 	  && frame_pointer_needed
2854 	  && REGNO (*ref) == HARD_FRAME_POINTER_REGNUM)
2855       || REGNO (*ref) == ARG_POINTER_REGNUM
2856       || (flag_pic
2857 	  && REGNO (*ref) == PIC_OFFSET_TABLE_REGNUM))
2858     *is_ptr = *is_base_ptr = true;
2859 
2860   if ((reload_completed || reload_in_progress)
2861       && *ref == cfun->machine->base_reg)
2862     *is_ptr = *is_base_ptr = *is_pool_ptr = true;
2863 
2864   return true;
2865 }
2866 
2867 /* Decompose a RTL expression ADDR for a memory address into
2868    its components, returned in OUT.
2869 
2870    Returns false if ADDR is not a valid memory address, true
2871    otherwise.  If OUT is NULL, don't return the components,
2872    but check for validity only.
2873 
2874    Note: Only addresses in canonical form are recognized.
2875    LEGITIMIZE_ADDRESS should convert non-canonical forms to the
2876    canonical form so that they will be recognized.  */
2877 
2878 static int
s390_decompose_address(rtx addr,struct s390_address * out)2879 s390_decompose_address (rtx addr, struct s390_address *out)
2880 {
2881   HOST_WIDE_INT offset = 0;
2882   rtx base = NULL_RTX;
2883   rtx indx = NULL_RTX;
2884   rtx disp = NULL_RTX;
2885   rtx orig_disp;
2886   bool pointer = false;
2887   bool base_ptr = false;
2888   bool indx_ptr = false;
2889   bool literal_pool = false;
2890 
2891   /* We may need to substitute the literal pool base register into the address
2892      below.  However, at this point we do not know which register is going to
2893      be used as base, so we substitute the arg pointer register.  This is going
2894      to be treated as holding a pointer below -- it shouldn't be used for any
2895      other purpose.  */
2896   rtx fake_pool_base = gen_rtx_REG (Pmode, ARG_POINTER_REGNUM);
2897 
2898   /* Decompose address into base + index + displacement.  */
2899 
2900   if (GET_CODE (addr) == REG || GET_CODE (addr) == UNSPEC)
2901     base = addr;
2902 
2903   else if (GET_CODE (addr) == PLUS)
2904     {
2905       rtx op0 = XEXP (addr, 0);
2906       rtx op1 = XEXP (addr, 1);
2907       enum rtx_code code0 = GET_CODE (op0);
2908       enum rtx_code code1 = GET_CODE (op1);
2909 
2910       if (code0 == REG || code0 == UNSPEC)
2911 	{
2912 	  if (code1 == REG || code1 == UNSPEC)
2913 	    {
2914 	      indx = op0;	/* index + base */
2915 	      base = op1;
2916 	    }
2917 
2918 	  else
2919 	    {
2920 	      base = op0;	/* base + displacement */
2921 	      disp = op1;
2922 	    }
2923 	}
2924 
2925       else if (code0 == PLUS)
2926 	{
2927 	  indx = XEXP (op0, 0);	/* index + base + disp */
2928 	  base = XEXP (op0, 1);
2929 	  disp = op1;
2930 	}
2931 
2932       else
2933 	{
2934 	  return false;
2935 	}
2936     }
2937 
2938   else
2939     disp = addr;		/* displacement */
2940 
2941   /* Extract integer part of displacement.  */
2942   orig_disp = disp;
2943   if (disp)
2944     {
2945       if (GET_CODE (disp) == CONST_INT)
2946 	{
2947 	  offset = INTVAL (disp);
2948 	  disp = NULL_RTX;
2949 	}
2950       else if (GET_CODE (disp) == CONST
2951 	       && GET_CODE (XEXP (disp, 0)) == PLUS
2952 	       && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
2953 	{
2954 	  offset = INTVAL (XEXP (XEXP (disp, 0), 1));
2955 	  disp = XEXP (XEXP (disp, 0), 0);
2956 	}
2957     }
2958 
2959   /* Strip off CONST here to avoid special case tests later.  */
2960   if (disp && GET_CODE (disp) == CONST)
2961     disp = XEXP (disp, 0);
2962 
2963   /* We can convert literal pool addresses to
2964      displacements by basing them off the base register.  */
2965   if (disp && GET_CODE (disp) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (disp))
2966     {
2967       if (base || indx)
2968 	return false;
2969 
2970       base = fake_pool_base, literal_pool = true;
2971 
2972       /* Mark up the displacement.  */
2973       disp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, disp),
2974 			     UNSPEC_LTREL_OFFSET);
2975     }
2976 
2977   /* Validate base register.  */
2978   if (!s390_decompose_constant_pool_ref (&base, &disp, &pointer, &base_ptr,
2979 					 &literal_pool))
2980     return false;
2981 
2982   /* Validate index register.  */
2983   if (!s390_decompose_constant_pool_ref (&indx, &disp, &pointer, &indx_ptr,
2984 					 &literal_pool))
2985     return false;
2986 
2987   /* Prefer to use pointer as base, not index.  */
2988   if (base && indx && !base_ptr
2989       && (indx_ptr || (!REG_POINTER (base) && REG_POINTER (indx))))
2990     {
2991       rtx tmp = base;
2992       base = indx;
2993       indx = tmp;
2994     }
2995 
2996   /* Validate displacement.  */
2997   if (!disp)
2998     {
2999       /* If virtual registers are involved, the displacement will change later
3000 	 anyway as the virtual registers get eliminated.  This could make a
3001 	 valid displacement invalid, but it is more likely to make an invalid
3002 	 displacement valid, because we sometimes access the register save area
3003 	 via negative offsets to one of those registers.
3004 	 Thus we don't check the displacement for validity here.  If after
3005 	 elimination the displacement turns out to be invalid after all,
3006 	 this is fixed up by reload in any case.  */
3007       /* LRA maintains always displacements up to date and we need to
3008 	 know the displacement is right during all LRA not only at the
3009 	 final elimination.  */
3010       if (lra_in_progress
3011 	  || (base != arg_pointer_rtx
3012 	      && indx != arg_pointer_rtx
3013 	      && base != return_address_pointer_rtx
3014 	      && indx != return_address_pointer_rtx
3015 	      && base != frame_pointer_rtx
3016 	      && indx != frame_pointer_rtx
3017 	      && base != virtual_stack_vars_rtx
3018 	      && indx != virtual_stack_vars_rtx))
3019 	if (!DISP_IN_RANGE (offset))
3020 	  return false;
3021     }
3022   else
3023     {
3024       /* All the special cases are pointers.  */
3025       pointer = true;
3026 
3027       /* In the small-PIC case, the linker converts @GOT
3028 	 and @GOTNTPOFF offsets to possible displacements.  */
3029       if (GET_CODE (disp) == UNSPEC
3030 	  && (XINT (disp, 1) == UNSPEC_GOT
3031 	      || XINT (disp, 1) == UNSPEC_GOTNTPOFF)
3032 	  && flag_pic == 1)
3033 	{
3034 	  ;
3035 	}
3036 
3037       /* Accept pool label offsets.  */
3038       else if (GET_CODE (disp) == UNSPEC
3039 	       && XINT (disp, 1) == UNSPEC_POOL_OFFSET)
3040 	;
3041 
3042       /* Accept literal pool references.  */
3043       else if (GET_CODE (disp) == UNSPEC
3044 	       && XINT (disp, 1) == UNSPEC_LTREL_OFFSET)
3045 	{
3046 	  /* In case CSE pulled a non literal pool reference out of
3047 	     the pool we have to reject the address.  This is
3048 	     especially important when loading the GOT pointer on non
3049 	     zarch CPUs.  In this case the literal pool contains an lt
3050 	     relative offset to the _GLOBAL_OFFSET_TABLE_ label which
3051 	     will most likely exceed the displacement.  */
3052 	  if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
3053 	      || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp, 0, 0)))
3054 	    return false;
3055 
3056 	  orig_disp = gen_rtx_CONST (Pmode, disp);
3057 	  if (offset)
3058 	    {
3059 	      /* If we have an offset, make sure it does not
3060 		 exceed the size of the constant pool entry.
3061 		 Otherwise we might generate an out-of-range
3062 		 displacement for the base register form.  */
3063 	      rtx sym = XVECEXP (disp, 0, 0);
3064 	      if (offset >= GET_MODE_SIZE (get_pool_mode (sym)))
3065 		return false;
3066 
3067 	      orig_disp = plus_constant (Pmode, orig_disp, offset);
3068 	    }
3069 	}
3070 
3071       else
3072 	return false;
3073     }
3074 
3075   if (!base && !indx)
3076     pointer = true;
3077 
3078   if (out)
3079     {
3080       out->base = base;
3081       out->indx = indx;
3082       out->disp = orig_disp;
3083       out->pointer = pointer;
3084       out->literal_pool = literal_pool;
3085     }
3086 
3087   return true;
3088 }
3089 
3090 /* Decompose a RTL expression OP for an address style operand into its
3091    components, and return the base register in BASE and the offset in
3092    OFFSET.  While OP looks like an address it is never supposed to be
3093    used as such.
3094 
3095    Return true if OP is a valid address operand, false if not.  */
3096 
3097 bool
s390_decompose_addrstyle_without_index(rtx op,rtx * base,HOST_WIDE_INT * offset)3098 s390_decompose_addrstyle_without_index (rtx op, rtx *base,
3099 					HOST_WIDE_INT *offset)
3100 {
3101   rtx off = NULL_RTX;
3102 
3103   /* We can have an integer constant, an address register,
3104      or a sum of the two.  */
3105   if (CONST_SCALAR_INT_P (op))
3106     {
3107       off = op;
3108       op = NULL_RTX;
3109     }
3110   if (op && GET_CODE (op) == PLUS && CONST_SCALAR_INT_P (XEXP (op, 1)))
3111     {
3112       off = XEXP (op, 1);
3113       op = XEXP (op, 0);
3114     }
3115   while (op && GET_CODE (op) == SUBREG)
3116     op = SUBREG_REG (op);
3117 
3118   if (op && GET_CODE (op) != REG)
3119     return false;
3120 
3121   if (offset)
3122     {
3123       if (off == NULL_RTX)
3124 	*offset = 0;
3125       else if (CONST_INT_P (off))
3126 	*offset = INTVAL (off);
3127       else if (CONST_WIDE_INT_P (off))
3128 	/* The offset will anyway be cut down to 12 bits so take just
3129 	   the lowest order chunk of the wide int.  */
3130 	*offset = CONST_WIDE_INT_ELT (off, 0);
3131       else
3132 	gcc_unreachable ();
3133     }
3134   if (base)
3135     *base = op;
3136 
3137    return true;
3138 }
3139 
3140 
3141 /* Return true if CODE is a valid address without index.  */
3142 
3143 bool
s390_legitimate_address_without_index_p(rtx op)3144 s390_legitimate_address_without_index_p (rtx op)
3145 {
3146   struct s390_address addr;
3147 
3148   if (!s390_decompose_address (XEXP (op, 0), &addr))
3149     return false;
3150   if (addr.indx)
3151     return false;
3152 
3153   return true;
3154 }
3155 
3156 
3157 /* Return TRUE if ADDR is an operand valid for a load/store relative
3158    instruction.  Be aware that the alignment of the operand needs to
3159    be checked separately.
3160    Valid addresses are single references or a sum of a reference and a
3161    constant integer. Return these parts in SYMREF and ADDEND.  You can
3162    pass NULL in REF and/or ADDEND if you are not interested in these
3163    values.  */
3164 
3165 static bool
s390_loadrelative_operand_p(rtx addr,rtx * symref,HOST_WIDE_INT * addend)3166 s390_loadrelative_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
3167 {
3168   HOST_WIDE_INT tmpaddend = 0;
3169 
3170   if (GET_CODE (addr) == CONST)
3171     addr = XEXP (addr, 0);
3172 
3173   if (GET_CODE (addr) == PLUS)
3174     {
3175       if (!CONST_INT_P (XEXP (addr, 1)))
3176 	return false;
3177 
3178       tmpaddend = INTVAL (XEXP (addr, 1));
3179       addr = XEXP (addr, 0);
3180     }
3181 
3182   if (GET_CODE (addr) == SYMBOL_REF
3183       || (GET_CODE (addr) == UNSPEC
3184 	  && (XINT (addr, 1) == UNSPEC_GOTENT
3185 	      || XINT (addr, 1) == UNSPEC_PLT)))
3186     {
3187       if (symref)
3188 	*symref = addr;
3189       if (addend)
3190 	*addend = tmpaddend;
3191 
3192       return true;
3193     }
3194   return false;
3195 }
3196 
3197 /* Return true if the address in OP is valid for constraint letter C
3198    if wrapped in a MEM rtx.  Set LIT_POOL_OK to true if it literal
3199    pool MEMs should be accepted.  Only the Q, R, S, T constraint
3200    letters are allowed for C.  */
3201 
3202 static int
s390_check_qrst_address(char c,rtx op,bool lit_pool_ok)3203 s390_check_qrst_address (char c, rtx op, bool lit_pool_ok)
3204 {
3205   rtx symref;
3206   struct s390_address addr;
3207   bool decomposed = false;
3208 
3209   if (!address_operand (op, GET_MODE (op)))
3210     return 0;
3211 
3212   /* This check makes sure that no symbolic address (except literal
3213      pool references) are accepted by the R or T constraints.  */
3214   if (s390_loadrelative_operand_p (op, &symref, NULL)
3215       && (!lit_pool_ok
3216           || !SYMBOL_REF_P (symref)
3217           || !CONSTANT_POOL_ADDRESS_P (symref)))
3218     return 0;
3219 
3220   /* Ensure literal pool references are only accepted if LIT_POOL_OK.  */
3221   if (!lit_pool_ok)
3222     {
3223       if (!s390_decompose_address (op, &addr))
3224 	return 0;
3225       if (addr.literal_pool)
3226 	return 0;
3227       decomposed = true;
3228     }
3229 
3230   /* With reload, we sometimes get intermediate address forms that are
3231      actually invalid as-is, but we need to accept them in the most
3232      generic cases below ('R' or 'T'), since reload will in fact fix
3233      them up.  LRA behaves differently here; we never see such forms,
3234      but on the other hand, we need to strictly reject every invalid
3235      address form.  After both reload and LRA invalid address forms
3236      must be rejected, because nothing will fix them up later.  Perform
3237      this check right up front.  */
3238   if (lra_in_progress || reload_completed)
3239     {
3240       if (!decomposed && !s390_decompose_address (op, &addr))
3241 	return 0;
3242       decomposed = true;
3243     }
3244 
3245   switch (c)
3246     {
3247     case 'Q': /* no index short displacement */
3248       if (!decomposed && !s390_decompose_address (op, &addr))
3249 	return 0;
3250       if (addr.indx)
3251 	return 0;
3252       if (!s390_short_displacement (addr.disp))
3253 	return 0;
3254       break;
3255 
3256     case 'R': /* with index short displacement */
3257       if (TARGET_LONG_DISPLACEMENT)
3258 	{
3259 	  if (!decomposed && !s390_decompose_address (op, &addr))
3260 	    return 0;
3261 	  if (!s390_short_displacement (addr.disp))
3262 	    return 0;
3263 	}
3264       /* Any invalid address here will be fixed up by reload,
3265 	 so accept it for the most generic constraint.  */
3266       break;
3267 
3268     case 'S': /* no index long displacement */
3269       if (!decomposed && !s390_decompose_address (op, &addr))
3270 	return 0;
3271       if (addr.indx)
3272 	return 0;
3273       break;
3274 
3275     case 'T': /* with index long displacement */
3276       /* Any invalid address here will be fixed up by reload,
3277 	 so accept it for the most generic constraint.  */
3278       break;
3279 
3280     default:
3281       return 0;
3282     }
3283   return 1;
3284 }
3285 
3286 
3287 /* Evaluates constraint strings described by the regular expression
3288    ([A|B|Z](Q|R|S|T))|Y and returns 1 if OP is a valid operand for
3289    the constraint given in STR, or 0 else.  */
3290 
3291 int
s390_mem_constraint(const char * str,rtx op)3292 s390_mem_constraint (const char *str, rtx op)
3293 {
3294   char c = str[0];
3295 
3296   switch (c)
3297     {
3298     case 'A':
3299       /* Check for offsettable variants of memory constraints.  */
3300       if (!MEM_P (op) || MEM_VOLATILE_P (op))
3301 	return 0;
3302       if ((reload_completed || reload_in_progress)
3303 	  ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op))
3304 	return 0;
3305       return s390_check_qrst_address (str[1], XEXP (op, 0), true);
3306     case 'B':
3307       /* Check for non-literal-pool variants of memory constraints.  */
3308       if (!MEM_P (op))
3309 	return 0;
3310       return s390_check_qrst_address (str[1], XEXP (op, 0), false);
3311     case 'Q':
3312     case 'R':
3313     case 'S':
3314     case 'T':
3315       if (GET_CODE (op) != MEM)
3316 	return 0;
3317       return s390_check_qrst_address (c, XEXP (op, 0), true);
3318     case 'Y':
3319       /* Simply check for the basic form of a shift count.  Reload will
3320 	 take care of making sure we have a proper base register.  */
3321       if (!s390_decompose_addrstyle_without_index (op, NULL, NULL))
3322 	return 0;
3323       break;
3324     case 'Z':
3325       return s390_check_qrst_address (str[1], op, true);
3326     default:
3327       return 0;
3328     }
3329   return 1;
3330 }
3331 
3332 
3333 /* Evaluates constraint strings starting with letter O.  Input
3334    parameter C is the second letter following the "O" in the constraint
3335    string. Returns 1 if VALUE meets the respective constraint and 0
3336    otherwise.  */
3337 
3338 int
s390_O_constraint_str(const char c,HOST_WIDE_INT value)3339 s390_O_constraint_str (const char c, HOST_WIDE_INT value)
3340 {
3341   if (!TARGET_EXTIMM)
3342     return 0;
3343 
3344   switch (c)
3345     {
3346     case 's':
3347       return trunc_int_for_mode (value, SImode) == value;
3348 
3349     case 'p':
3350       return value == 0
3351 	|| s390_single_part (GEN_INT (value), DImode, SImode, 0) == 1;
3352 
3353     case 'n':
3354       return s390_single_part (GEN_INT (value - 1), DImode, SImode, -1) == 1;
3355 
3356     default:
3357       gcc_unreachable ();
3358     }
3359 }
3360 
3361 
3362 /* Evaluates constraint strings starting with letter N.  Parameter STR
3363    contains the letters following letter "N" in the constraint string.
3364    Returns true if VALUE matches the constraint.  */
3365 
3366 int
s390_N_constraint_str(const char * str,HOST_WIDE_INT value)3367 s390_N_constraint_str (const char *str, HOST_WIDE_INT value)
3368 {
3369   machine_mode mode, part_mode;
3370   int def;
3371   int part, part_goal;
3372 
3373 
3374   if (str[0] == 'x')
3375     part_goal = -1;
3376   else
3377     part_goal = str[0] - '0';
3378 
3379   switch (str[1])
3380     {
3381     case 'Q':
3382       part_mode = QImode;
3383       break;
3384     case 'H':
3385       part_mode = HImode;
3386       break;
3387     case 'S':
3388       part_mode = SImode;
3389       break;
3390     default:
3391       return 0;
3392     }
3393 
3394   switch (str[2])
3395     {
3396     case 'H':
3397       mode = HImode;
3398       break;
3399     case 'S':
3400       mode = SImode;
3401       break;
3402     case 'D':
3403       mode = DImode;
3404       break;
3405     default:
3406       return 0;
3407     }
3408 
3409   switch (str[3])
3410     {
3411     case '0':
3412       def = 0;
3413       break;
3414     case 'F':
3415       def = -1;
3416       break;
3417     default:
3418       return 0;
3419     }
3420 
3421   if (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (part_mode))
3422     return 0;
3423 
3424   part = s390_single_part (GEN_INT (value), mode, part_mode, def);
3425   if (part < 0)
3426     return 0;
3427   if (part_goal != -1 && part_goal != part)
3428     return 0;
3429 
3430   return 1;
3431 }
3432 
3433 
3434 /* Returns true if the input parameter VALUE is a float zero.  */
3435 
3436 int
s390_float_const_zero_p(rtx value)3437 s390_float_const_zero_p (rtx value)
3438 {
3439   return (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT
3440 	  && value == CONST0_RTX (GET_MODE (value)));
3441 }
3442 
3443 /* Implement TARGET_REGISTER_MOVE_COST.  */
3444 
3445 static int
s390_register_move_cost(machine_mode mode,reg_class_t from,reg_class_t to)3446 s390_register_move_cost (machine_mode mode,
3447 			 reg_class_t from, reg_class_t to)
3448 {
3449   /* On s390, copy between fprs and gprs is expensive.  */
3450 
3451   /* It becomes somewhat faster having ldgr/lgdr.  */
3452   if (TARGET_Z10 && GET_MODE_SIZE (mode) == 8)
3453     {
3454       /* ldgr is single cycle. */
3455       if (reg_classes_intersect_p (from, GENERAL_REGS)
3456 	  && reg_classes_intersect_p (to, FP_REGS))
3457 	return 1;
3458       /* lgdr needs 3 cycles. */
3459       if (reg_classes_intersect_p (to, GENERAL_REGS)
3460 	  && reg_classes_intersect_p (from, FP_REGS))
3461 	return 3;
3462     }
3463 
3464   /* Otherwise copying is done via memory.  */
3465   if ((reg_classes_intersect_p (from, GENERAL_REGS)
3466        && reg_classes_intersect_p (to, FP_REGS))
3467       || (reg_classes_intersect_p (from, FP_REGS)
3468 	  && reg_classes_intersect_p (to, GENERAL_REGS)))
3469     return 10;
3470 
3471   /* We usually do not want to copy via CC.  */
3472   if (reg_classes_intersect_p (from, CC_REGS)
3473        || reg_classes_intersect_p (to, CC_REGS))
3474     return 5;
3475 
3476   return 1;
3477 }
3478 
3479 /* Implement TARGET_MEMORY_MOVE_COST.  */
3480 
3481 static int
s390_memory_move_cost(machine_mode mode ATTRIBUTE_UNUSED,reg_class_t rclass ATTRIBUTE_UNUSED,bool in ATTRIBUTE_UNUSED)3482 s390_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
3483 		       reg_class_t rclass ATTRIBUTE_UNUSED,
3484 		       bool in ATTRIBUTE_UNUSED)
3485 {
3486   return 2;
3487 }
3488 
3489 /* Compute a (partial) cost for rtx X.  Return true if the complete
3490    cost has been computed, and false if subexpressions should be
3491    scanned.  In either case, *TOTAL contains the cost result.  The
3492    initial value of *TOTAL is the default value computed by
3493    rtx_cost.  It may be left unmodified.  OUTER_CODE contains the
3494    code of the superexpression of x.  */
3495 
3496 static bool
s390_rtx_costs(rtx x,machine_mode mode,int outer_code,int opno ATTRIBUTE_UNUSED,int * total,bool speed ATTRIBUTE_UNUSED)3497 s390_rtx_costs (rtx x, machine_mode mode, int outer_code,
3498 		int opno ATTRIBUTE_UNUSED,
3499 		int *total, bool speed ATTRIBUTE_UNUSED)
3500 {
3501   int code = GET_CODE (x);
3502   switch (code)
3503     {
3504     case CONST:
3505     case CONST_INT:
3506     case LABEL_REF:
3507     case SYMBOL_REF:
3508     case CONST_DOUBLE:
3509     case CONST_WIDE_INT:
3510     case MEM:
3511       *total = 0;
3512       return true;
3513 
3514     case SET:
3515       {
3516 	/* Without this a conditional move instruction would be
3517 	   accounted as 3 * COSTS_N_INSNS (set, if_then_else,
3518 	   comparison operator).  That's a bit pessimistic.  */
3519 
3520 	if (!TARGET_Z196 || GET_CODE (SET_SRC (x)) != IF_THEN_ELSE)
3521 	  return false;
3522 
3523 	rtx cond = XEXP (SET_SRC (x), 0);
3524 
3525 	if (!CC_REG_P (XEXP (cond, 0)) || !CONST_INT_P (XEXP (cond, 1)))
3526 	  return false;
3527 
3528 	/* It is going to be a load/store on condition.  Make it
3529 	   slightly more expensive than a normal load.  */
3530 	*total = COSTS_N_INSNS (1) + 1;
3531 
3532 	rtx dst = SET_DEST (x);
3533 	rtx then = XEXP (SET_SRC (x), 1);
3534 	rtx els = XEXP (SET_SRC (x), 2);
3535 
3536 	/* It is a real IF-THEN-ELSE.  An additional move will be
3537 	   needed to implement that.  */
3538 	if (!TARGET_Z15
3539 	    && reload_completed
3540 	    && !rtx_equal_p (dst, then)
3541 	    && !rtx_equal_p (dst, els))
3542 	  *total += COSTS_N_INSNS (1) / 2;
3543 
3544 	/* A minor penalty for constants we cannot directly handle.  */
3545 	if ((CONST_INT_P (then) || CONST_INT_P (els))
3546 	    && (!TARGET_Z13 || MEM_P (dst)
3547 		|| (CONST_INT_P (then) && !satisfies_constraint_K (then))
3548 		|| (CONST_INT_P (els) && !satisfies_constraint_K (els))))
3549 	  *total += COSTS_N_INSNS (1) / 2;
3550 
3551 	/* A store on condition can only handle register src operands.  */
3552 	if (MEM_P (dst) && (!REG_P (then) || !REG_P (els)))
3553 	  *total += COSTS_N_INSNS (1) / 2;
3554 
3555 	return true;
3556       }
3557     case IOR:
3558 
3559       /* nnrk, nngrk */
3560       if (TARGET_Z15
3561 	  && (mode == SImode || mode == DImode)
3562 	  && GET_CODE (XEXP (x, 0)) == NOT
3563 	  && GET_CODE (XEXP (x, 1)) == NOT)
3564 	{
3565 	  *total = COSTS_N_INSNS (1);
3566 	  if (!REG_P (XEXP (XEXP (x, 0), 0)))
3567 	    *total += 1;
3568 	  if (!REG_P (XEXP (XEXP (x, 1), 0)))
3569 	    *total += 1;
3570 	  return true;
3571 	}
3572 
3573       /* risbg */
3574       if (GET_CODE (XEXP (x, 0)) == AND
3575 	  && GET_CODE (XEXP (x, 1)) == ASHIFT
3576 	  && REG_P (XEXP (XEXP (x, 0), 0))
3577 	  && REG_P (XEXP (XEXP (x, 1), 0))
3578 	  && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3579 	  && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3580 	  && (UINTVAL (XEXP (XEXP (x, 0), 1)) ==
3581 	      (HOST_WIDE_INT_1U << UINTVAL (XEXP (XEXP (x, 1), 1))) - 1))
3582 	{
3583 	  *total = COSTS_N_INSNS (2);
3584 	  return true;
3585 	}
3586 
3587       /* ~AND on a 128 bit mode.  This can be done using a vector
3588 	 instruction.  */
3589       if (TARGET_VXE
3590 	  && GET_CODE (XEXP (x, 0)) == NOT
3591 	  && GET_CODE (XEXP (x, 1)) == NOT
3592 	  && REG_P (XEXP (XEXP (x, 0), 0))
3593 	  && REG_P (XEXP (XEXP (x, 1), 0))
3594 	  && GET_MODE_SIZE (GET_MODE (XEXP (XEXP (x, 0), 0))) == 16
3595 	  && s390_hard_regno_mode_ok (VR0_REGNUM,
3596 				      GET_MODE (XEXP (XEXP (x, 0), 0))))
3597 	{
3598 	  *total = COSTS_N_INSNS (1);
3599 	  return true;
3600 	}
3601 
3602       *total = COSTS_N_INSNS (1);
3603       return false;
3604 
3605     case AND:
3606       /* nork, nogrk */
3607       if (TARGET_Z15
3608 	  && (mode == SImode || mode == DImode)
3609 	  && GET_CODE (XEXP (x, 0)) == NOT
3610 	  && GET_CODE (XEXP (x, 1)) == NOT)
3611 	{
3612 	  *total = COSTS_N_INSNS (1);
3613 	  if (!REG_P (XEXP (XEXP (x, 0), 0)))
3614 	    *total += 1;
3615 	  if (!REG_P (XEXP (XEXP (x, 1), 0)))
3616 	    *total += 1;
3617 	  return true;
3618 	}
3619       /* fallthrough */
3620     case ASHIFT:
3621     case ASHIFTRT:
3622     case LSHIFTRT:
3623     case ROTATE:
3624     case ROTATERT:
3625     case XOR:
3626     case NEG:
3627     case NOT:
3628     case PLUS:
3629     case MINUS:
3630       *total = COSTS_N_INSNS (1);
3631       return false;
3632 
3633     case MULT:
3634       switch (mode)
3635 	{
3636 	case E_SImode:
3637 	  {
3638 	    rtx left = XEXP (x, 0);
3639 	    rtx right = XEXP (x, 1);
3640 	    if (GET_CODE (right) == CONST_INT
3641 		&& CONST_OK_FOR_K (INTVAL (right)))
3642 	      *total = s390_cost->mhi;
3643 	    else if (GET_CODE (left) == SIGN_EXTEND)
3644 	      *total = s390_cost->mh;
3645 	    else
3646 	      *total = s390_cost->ms;  /* msr, ms, msy */
3647 	    break;
3648 	  }
3649 	case E_DImode:
3650 	  {
3651 	    rtx left = XEXP (x, 0);
3652 	    rtx right = XEXP (x, 1);
3653 	    if (TARGET_ZARCH)
3654 	      {
3655 		if (GET_CODE (right) == CONST_INT
3656 		    && CONST_OK_FOR_K (INTVAL (right)))
3657 		  *total = s390_cost->mghi;
3658 		else if (GET_CODE (left) == SIGN_EXTEND)
3659 		  *total = s390_cost->msgf;
3660 		else
3661 		  *total = s390_cost->msg;  /* msgr, msg */
3662 	      }
3663 	    else /* TARGET_31BIT */
3664 	      {
3665 		if (GET_CODE (left) == SIGN_EXTEND
3666 		    && GET_CODE (right) == SIGN_EXTEND)
3667 		  /* mulsidi case: mr, m */
3668 		  *total = s390_cost->m;
3669 		else if (GET_CODE (left) == ZERO_EXTEND
3670 			 && GET_CODE (right) == ZERO_EXTEND)
3671 		  /* umulsidi case: ml, mlr */
3672 		  *total = s390_cost->ml;
3673 		else
3674 		  /* Complex calculation is required.  */
3675 		  *total = COSTS_N_INSNS (40);
3676 	      }
3677 	    break;
3678 	  }
3679 	case E_SFmode:
3680 	case E_DFmode:
3681 	  *total = s390_cost->mult_df;
3682 	  break;
3683 	case E_TFmode:
3684 	  *total = s390_cost->mxbr;
3685 	  break;
3686 	default:
3687 	  return false;
3688 	}
3689       return false;
3690 
3691     case FMA:
3692       switch (mode)
3693 	{
3694 	case E_DFmode:
3695 	  *total = s390_cost->madbr;
3696 	  break;
3697 	case E_SFmode:
3698 	  *total = s390_cost->maebr;
3699 	  break;
3700 	default:
3701 	  return false;
3702 	}
3703       /* Negate in the third argument is free: FMSUB.  */
3704       if (GET_CODE (XEXP (x, 2)) == NEG)
3705 	{
3706 	  *total += (rtx_cost (XEXP (x, 0), mode, FMA, 0, speed)
3707 		     + rtx_cost (XEXP (x, 1), mode, FMA, 1, speed)
3708 		     + rtx_cost (XEXP (XEXP (x, 2), 0), mode, FMA, 2, speed));
3709 	  return true;
3710 	}
3711       return false;
3712 
3713     case UDIV:
3714     case UMOD:
3715       if (mode == TImode)	       /* 128 bit division */
3716 	*total = s390_cost->dlgr;
3717       else if (mode == DImode)
3718 	{
3719 	  rtx right = XEXP (x, 1);
3720 	  if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3721 	    *total = s390_cost->dlr;
3722 	  else				       /* 64 by 64 bit division */
3723 	    *total = s390_cost->dlgr;
3724 	}
3725       else if (mode == SImode)         /* 32 bit division */
3726 	*total = s390_cost->dlr;
3727       return false;
3728 
3729     case DIV:
3730     case MOD:
3731       if (mode == DImode)
3732 	{
3733 	  rtx right = XEXP (x, 1);
3734 	  if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3735 	    if (TARGET_ZARCH)
3736 	      *total = s390_cost->dsgfr;
3737 	    else
3738 	      *total = s390_cost->dr;
3739 	  else				       /* 64 by 64 bit division */
3740 	    *total = s390_cost->dsgr;
3741 	}
3742       else if (mode == SImode)         /* 32 bit division */
3743 	*total = s390_cost->dlr;
3744       else if (mode == SFmode)
3745 	{
3746 	  *total = s390_cost->debr;
3747 	}
3748       else if (mode == DFmode)
3749 	{
3750 	  *total = s390_cost->ddbr;
3751 	}
3752       else if (mode == TFmode)
3753 	{
3754 	  *total = s390_cost->dxbr;
3755 	}
3756       return false;
3757 
3758     case SQRT:
3759       if (mode == SFmode)
3760 	*total = s390_cost->sqebr;
3761       else if (mode == DFmode)
3762 	*total = s390_cost->sqdbr;
3763       else /* TFmode */
3764 	*total = s390_cost->sqxbr;
3765       return false;
3766 
3767     case SIGN_EXTEND:
3768     case ZERO_EXTEND:
3769       if (outer_code == MULT || outer_code == DIV || outer_code == MOD
3770 	  || outer_code == PLUS || outer_code == MINUS
3771 	  || outer_code == COMPARE)
3772 	*total = 0;
3773       return false;
3774 
3775     case COMPARE:
3776       *total = COSTS_N_INSNS (1);
3777 
3778       /* nxrk, nxgrk ~(a^b)==0 */
3779       if (TARGET_Z15
3780 	  && GET_CODE (XEXP (x, 0)) == NOT
3781 	  && XEXP (x, 1) == const0_rtx
3782 	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == XOR
3783 	  && (GET_MODE (XEXP (x, 0)) == SImode || GET_MODE (XEXP (x, 0)) == DImode)
3784 	  && mode == CCZmode)
3785 	{
3786 	  if (!REG_P (XEXP (XEXP (XEXP (x, 0), 0), 0)))
3787 	    *total += 1;
3788 	  if (!REG_P (XEXP (XEXP (XEXP (x, 0), 0), 1)))
3789 	    *total += 1;
3790 	  return true;
3791 	}
3792 
3793       /* nnrk, nngrk, nork, nogrk */
3794       if (TARGET_Z15
3795 	  && (GET_CODE (XEXP (x, 0)) == AND || GET_CODE (XEXP (x, 0)) == IOR)
3796 	  && XEXP (x, 1) == const0_rtx
3797 	  && (GET_MODE (XEXP (x, 0)) == SImode || GET_MODE (XEXP (x, 0)) == DImode)
3798 	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == NOT
3799 	  && GET_CODE (XEXP (XEXP (x, 0), 1)) == NOT
3800 	  && mode == CCZmode)
3801 	{
3802 	  if (!REG_P (XEXP (XEXP (XEXP (x, 0), 0), 0)))
3803 	    *total += 1;
3804 	  if (!REG_P (XEXP (XEXP (XEXP (x, 0), 1), 0)))
3805 	    *total += 1;
3806 	  return true;
3807 	}
3808 
3809       if (GET_CODE (XEXP (x, 0)) == AND
3810 	  && GET_CODE (XEXP (x, 1)) == CONST_INT
3811 	  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
3812 	{
3813 	  rtx op0 = XEXP (XEXP (x, 0), 0);
3814 	  rtx op1 = XEXP (XEXP (x, 0), 1);
3815 	  rtx op2 = XEXP (x, 1);
3816 
3817 	  if (memory_operand (op0, GET_MODE (op0))
3818 	      && s390_tm_ccmode (op1, op2, 0) != VOIDmode)
3819 	    return true;
3820 	  if (register_operand (op0, GET_MODE (op0))
3821 	      && s390_tm_ccmode (op1, op2, 1) != VOIDmode)
3822 	    return true;
3823 	}
3824       return false;
3825 
3826     default:
3827       return false;
3828     }
3829 }
3830 
3831 /* Return the cost of an address rtx ADDR.  */
3832 
3833 static int
s390_address_cost(rtx addr,machine_mode mode ATTRIBUTE_UNUSED,addr_space_t as ATTRIBUTE_UNUSED,bool speed ATTRIBUTE_UNUSED)3834 s390_address_cost (rtx addr, machine_mode mode ATTRIBUTE_UNUSED,
3835 		   addr_space_t as ATTRIBUTE_UNUSED,
3836 		   bool speed ATTRIBUTE_UNUSED)
3837 {
3838   struct s390_address ad;
3839   if (!s390_decompose_address (addr, &ad))
3840     return 1000;
3841 
3842   return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
3843 }
3844 
3845 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
3846 static int
s390_builtin_vectorization_cost(enum vect_cost_for_stmt type_of_cost,tree vectype,int misalign ATTRIBUTE_UNUSED)3847 s390_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
3848 				 tree vectype,
3849 				 int misalign ATTRIBUTE_UNUSED)
3850 {
3851   switch (type_of_cost)
3852     {
3853       case scalar_stmt:
3854       case scalar_load:
3855       case scalar_store:
3856       case vector_stmt:
3857       case vector_load:
3858       case vector_store:
3859       case vector_gather_load:
3860       case vector_scatter_store:
3861       case vec_to_scalar:
3862       case scalar_to_vec:
3863       case cond_branch_not_taken:
3864       case vec_perm:
3865       case vec_promote_demote:
3866       case unaligned_load:
3867       case unaligned_store:
3868 	return 1;
3869 
3870       case cond_branch_taken:
3871 	return 3;
3872 
3873       case vec_construct:
3874 	return TYPE_VECTOR_SUBPARTS (vectype) - 1;
3875 
3876       default:
3877 	gcc_unreachable ();
3878     }
3879 }
3880 
3881 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
3882    otherwise return 0.  */
3883 
3884 int
tls_symbolic_operand(rtx op)3885 tls_symbolic_operand (rtx op)
3886 {
3887   if (GET_CODE (op) != SYMBOL_REF)
3888     return 0;
3889   return SYMBOL_REF_TLS_MODEL (op);
3890 }
3891 
3892 /* Split DImode access register reference REG (on 64-bit) into its constituent
3893    low and high parts, and store them into LO and HI.  Note that gen_lowpart/
3894    gen_highpart cannot be used as they assume all registers are word-sized,
3895    while our access registers have only half that size.  */
3896 
3897 void
s390_split_access_reg(rtx reg,rtx * lo,rtx * hi)3898 s390_split_access_reg (rtx reg, rtx *lo, rtx *hi)
3899 {
3900   gcc_assert (TARGET_64BIT);
3901   gcc_assert (ACCESS_REG_P (reg));
3902   gcc_assert (GET_MODE (reg) == DImode);
3903   gcc_assert (!(REGNO (reg) & 1));
3904 
3905   *lo = gen_rtx_REG (SImode, REGNO (reg) + 1);
3906   *hi = gen_rtx_REG (SImode, REGNO (reg));
3907 }
3908 
3909 /* Return true if OP contains a symbol reference */
3910 
3911 bool
symbolic_reference_mentioned_p(rtx op)3912 symbolic_reference_mentioned_p (rtx op)
3913 {
3914   const char *fmt;
3915   int i;
3916 
3917   if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3918     return 1;
3919 
3920   fmt = GET_RTX_FORMAT (GET_CODE (op));
3921   for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3922     {
3923       if (fmt[i] == 'E')
3924 	{
3925 	  int j;
3926 
3927 	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3928 	    if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3929 	      return 1;
3930 	}
3931 
3932       else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3933 	return 1;
3934     }
3935 
3936   return 0;
3937 }
3938 
3939 /* Return true if OP contains a reference to a thread-local symbol.  */
3940 
3941 bool
tls_symbolic_reference_mentioned_p(rtx op)3942 tls_symbolic_reference_mentioned_p (rtx op)
3943 {
3944   const char *fmt;
3945   int i;
3946 
3947   if (GET_CODE (op) == SYMBOL_REF)
3948     return tls_symbolic_operand (op);
3949 
3950   fmt = GET_RTX_FORMAT (GET_CODE (op));
3951   for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3952     {
3953       if (fmt[i] == 'E')
3954 	{
3955 	  int j;
3956 
3957 	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3958 	    if (tls_symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3959 	      return true;
3960 	}
3961 
3962       else if (fmt[i] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op, i)))
3963 	return true;
3964     }
3965 
3966   return false;
3967 }
3968 
3969 
3970 /* Return true if OP is a legitimate general operand when
3971    generating PIC code.  It is given that flag_pic is on
3972    and that OP satisfies CONSTANT_P.  */
3973 
3974 int
legitimate_pic_operand_p(rtx op)3975 legitimate_pic_operand_p (rtx op)
3976 {
3977   /* Accept all non-symbolic constants.  */
3978   if (!SYMBOLIC_CONST (op))
3979     return 1;
3980 
3981   /* Accept addresses that can be expressed relative to (pc).  */
3982   if (larl_operand (op, VOIDmode))
3983     return 1;
3984 
3985   /* Reject everything else; must be handled
3986      via emit_symbolic_move.  */
3987   return 0;
3988 }
3989 
3990 /* Returns true if the constant value OP is a legitimate general operand.
3991    It is given that OP satisfies CONSTANT_P.  */
3992 
3993 static bool
s390_legitimate_constant_p(machine_mode mode,rtx op)3994 s390_legitimate_constant_p (machine_mode mode, rtx op)
3995 {
3996   if (TARGET_VX && VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR)
3997     {
3998       if (GET_MODE_SIZE (mode) != 16)
3999 	return 0;
4000 
4001       if (!satisfies_constraint_j00 (op)
4002 	  && !satisfies_constraint_jm1 (op)
4003 	  && !satisfies_constraint_jKK (op)
4004 	  && !satisfies_constraint_jxx (op)
4005 	  && !satisfies_constraint_jyy (op))
4006 	return 0;
4007     }
4008 
4009   /* Accept all non-symbolic constants.  */
4010   if (!SYMBOLIC_CONST (op))
4011     return 1;
4012 
4013   /* Accept immediate LARL operands.  */
4014   if (larl_operand (op, mode))
4015     return 1;
4016 
4017   /* Thread-local symbols are never legal constants.  This is
4018      so that emit_call knows that computing such addresses
4019      might require a function call.  */
4020   if (TLS_SYMBOLIC_CONST (op))
4021     return 0;
4022 
4023   /* In the PIC case, symbolic constants must *not* be
4024      forced into the literal pool.  We accept them here,
4025      so that they will be handled by emit_symbolic_move.  */
4026   if (flag_pic)
4027     return 1;
4028 
4029   /* All remaining non-PIC symbolic constants are
4030      forced into the literal pool.  */
4031   return 0;
4032 }
4033 
4034 /* Determine if it's legal to put X into the constant pool.  This
4035    is not possible if X contains the address of a symbol that is
4036    not constant (TLS) or not known at final link time (PIC).  */
4037 
4038 static bool
s390_cannot_force_const_mem(machine_mode mode,rtx x)4039 s390_cannot_force_const_mem (machine_mode mode, rtx x)
4040 {
4041   switch (GET_CODE (x))
4042     {
4043     case CONST_INT:
4044     case CONST_DOUBLE:
4045     case CONST_WIDE_INT:
4046     case CONST_VECTOR:
4047       /* Accept all non-symbolic constants.  */
4048       return false;
4049 
4050     case LABEL_REF:
4051       /* Labels are OK iff we are non-PIC.  */
4052       return flag_pic != 0;
4053 
4054     case SYMBOL_REF:
4055       /* 'Naked' TLS symbol references are never OK,
4056 	 non-TLS symbols are OK iff we are non-PIC.  */
4057       if (tls_symbolic_operand (x))
4058 	return true;
4059       else
4060 	return flag_pic != 0;
4061 
4062     case CONST:
4063       return s390_cannot_force_const_mem (mode, XEXP (x, 0));
4064     case PLUS:
4065     case MINUS:
4066       return s390_cannot_force_const_mem (mode, XEXP (x, 0))
4067 	     || s390_cannot_force_const_mem (mode, XEXP (x, 1));
4068 
4069     case UNSPEC:
4070       switch (XINT (x, 1))
4071 	{
4072 	/* Only lt-relative or GOT-relative UNSPECs are OK.  */
4073 	case UNSPEC_LTREL_OFFSET:
4074 	case UNSPEC_GOT:
4075 	case UNSPEC_GOTOFF:
4076 	case UNSPEC_PLTOFF:
4077 	case UNSPEC_TLSGD:
4078 	case UNSPEC_TLSLDM:
4079 	case UNSPEC_NTPOFF:
4080 	case UNSPEC_DTPOFF:
4081 	case UNSPEC_GOTNTPOFF:
4082 	case UNSPEC_INDNTPOFF:
4083 	  return false;
4084 
4085 	/* If the literal pool shares the code section, be put
4086 	   execute template placeholders into the pool as well.  */
4087 	case UNSPEC_INSN:
4088 	default:
4089 	  return true;
4090 	}
4091       break;
4092 
4093     default:
4094       gcc_unreachable ();
4095     }
4096 }
4097 
4098 /* Returns true if the constant value OP is a legitimate general
4099    operand during and after reload.  The difference to
4100    legitimate_constant_p is that this function will not accept
4101    a constant that would need to be forced to the literal pool
4102    before it can be used as operand.
4103    This function accepts all constants which can be loaded directly
4104    into a GPR.  */
4105 
4106 bool
legitimate_reload_constant_p(rtx op)4107 legitimate_reload_constant_p (rtx op)
4108 {
4109   /* Accept la(y) operands.  */
4110   if (GET_CODE (op) == CONST_INT
4111       && DISP_IN_RANGE (INTVAL (op)))
4112     return true;
4113 
4114   /* Accept l(g)hi/l(g)fi operands.  */
4115   if (GET_CODE (op) == CONST_INT
4116       && (CONST_OK_FOR_K (INTVAL (op)) || CONST_OK_FOR_Os (INTVAL (op))))
4117     return true;
4118 
4119   /* Accept lliXX operands.  */
4120   if (TARGET_ZARCH
4121       && GET_CODE (op) == CONST_INT
4122       && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
4123       && s390_single_part (op, word_mode, HImode, 0) >= 0)
4124   return true;
4125 
4126   if (TARGET_EXTIMM
4127       && GET_CODE (op) == CONST_INT
4128       && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
4129       && s390_single_part (op, word_mode, SImode, 0) >= 0)
4130     return true;
4131 
4132   /* Accept larl operands.  */
4133   if (larl_operand (op, VOIDmode))
4134     return true;
4135 
4136   /* Accept floating-point zero operands that fit into a single GPR.  */
4137   if (GET_CODE (op) == CONST_DOUBLE
4138       && s390_float_const_zero_p (op)
4139       && GET_MODE_SIZE (GET_MODE (op)) <= UNITS_PER_WORD)
4140     return true;
4141 
4142   /* Accept double-word operands that can be split.  */
4143   if (GET_CODE (op) == CONST_WIDE_INT
4144       || (GET_CODE (op) == CONST_INT
4145 	  && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op)))
4146     {
4147       machine_mode dword_mode = word_mode == SImode ? DImode : TImode;
4148       rtx hi = operand_subword (op, 0, 0, dword_mode);
4149       rtx lo = operand_subword (op, 1, 0, dword_mode);
4150       return legitimate_reload_constant_p (hi)
4151 	     && legitimate_reload_constant_p (lo);
4152     }
4153 
4154   /* Everything else cannot be handled without reload.  */
4155   return false;
4156 }
4157 
4158 /* Returns true if the constant value OP is a legitimate fp operand
4159    during and after reload.
4160    This function accepts all constants which can be loaded directly
4161    into an FPR.  */
4162 
4163 static bool
legitimate_reload_fp_constant_p(rtx op)4164 legitimate_reload_fp_constant_p (rtx op)
4165 {
4166   /* Accept floating-point zero operands if the load zero instruction
4167      can be used.  Prior to z196 the load fp zero instruction caused a
4168      performance penalty if the result is used as BFP number.  */
4169   if (TARGET_Z196
4170       && GET_CODE (op) == CONST_DOUBLE
4171       && s390_float_const_zero_p (op))
4172     return true;
4173 
4174   return false;
4175 }
4176 
4177 /* Returns true if the constant value OP is a legitimate vector operand
4178    during and after reload.
4179    This function accepts all constants which can be loaded directly
4180    into an VR.  */
4181 
4182 static bool
legitimate_reload_vector_constant_p(rtx op)4183 legitimate_reload_vector_constant_p (rtx op)
4184 {
4185   if (TARGET_VX && GET_MODE_SIZE (GET_MODE (op)) == 16
4186       && (satisfies_constraint_j00 (op)
4187 	  || satisfies_constraint_jm1 (op)
4188 	  || satisfies_constraint_jKK (op)
4189 	  || satisfies_constraint_jxx (op)
4190 	  || satisfies_constraint_jyy (op)))
4191     return true;
4192 
4193   return false;
4194 }
4195 
4196 /* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
4197    return the class of reg to actually use.  */
4198 
4199 static reg_class_t
s390_preferred_reload_class(rtx op,reg_class_t rclass)4200 s390_preferred_reload_class (rtx op, reg_class_t rclass)
4201 {
4202   switch (GET_CODE (op))
4203     {
4204       /* Constants we cannot reload into general registers
4205 	 must be forced into the literal pool.  */
4206       case CONST_VECTOR:
4207       case CONST_DOUBLE:
4208       case CONST_INT:
4209       case CONST_WIDE_INT:
4210 	if (reg_class_subset_p (GENERAL_REGS, rclass)
4211 	    && legitimate_reload_constant_p (op))
4212 	  return GENERAL_REGS;
4213 	else if (reg_class_subset_p (ADDR_REGS, rclass)
4214 		 && legitimate_reload_constant_p (op))
4215 	  return ADDR_REGS;
4216 	else if (reg_class_subset_p (FP_REGS, rclass)
4217 		 && legitimate_reload_fp_constant_p (op))
4218 	  return FP_REGS;
4219 	else if (reg_class_subset_p (VEC_REGS, rclass)
4220 		 && legitimate_reload_vector_constant_p (op))
4221 	  return VEC_REGS;
4222 
4223 	return NO_REGS;
4224 
4225       /* If a symbolic constant or a PLUS is reloaded,
4226 	 it is most likely being used as an address, so
4227 	 prefer ADDR_REGS.  If 'class' is not a superset
4228 	 of ADDR_REGS, e.g. FP_REGS, reject this reload.  */
4229       case CONST:
4230 	/* Symrefs cannot be pushed into the literal pool with -fPIC
4231 	   so we *MUST NOT* return NO_REGS for these cases
4232 	   (s390_cannot_force_const_mem will return true).
4233 
4234 	   On the other hand we MUST return NO_REGS for symrefs with
4235 	   invalid addend which might have been pushed to the literal
4236 	   pool (no -fPIC).  Usually we would expect them to be
4237 	   handled via secondary reload but this does not happen if
4238 	   they are used as literal pool slot replacement in reload
4239 	   inheritance (see emit_input_reload_insns).  */
4240 	if (GET_CODE (XEXP (op, 0)) == PLUS
4241 	    && GET_CODE (XEXP (XEXP(op, 0), 0)) == SYMBOL_REF
4242 	    && GET_CODE (XEXP (XEXP(op, 0), 1)) == CONST_INT)
4243 	  {
4244 	    if (flag_pic && reg_class_subset_p (ADDR_REGS, rclass))
4245 	      return ADDR_REGS;
4246 	    else
4247 	      return NO_REGS;
4248 	  }
4249 	/* fallthrough */
4250       case LABEL_REF:
4251       case SYMBOL_REF:
4252 	if (!legitimate_reload_constant_p (op))
4253 	  return NO_REGS;
4254 	/* fallthrough */
4255       case PLUS:
4256 	/* load address will be used.  */
4257 	if (reg_class_subset_p (ADDR_REGS, rclass))
4258 	  return ADDR_REGS;
4259 	else
4260 	  return NO_REGS;
4261 
4262       default:
4263 	break;
4264     }
4265 
4266   return rclass;
4267 }
4268 
4269 /* Return true if ADDR is SYMBOL_REF + addend with addend being a
4270    multiple of ALIGNMENT and the SYMBOL_REF being naturally
4271    aligned.  */
4272 
4273 bool
s390_check_symref_alignment(rtx addr,HOST_WIDE_INT alignment)4274 s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment)
4275 {
4276   HOST_WIDE_INT addend;
4277   rtx symref;
4278 
4279   /* The "required alignment" might be 0 (e.g. for certain structs
4280      accessed via BLKmode).  Early abort in this case, as well as when
4281      an alignment > 8 is required.  */
4282   if (alignment < 2 || alignment > 8)
4283     return false;
4284 
4285   if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4286     return false;
4287 
4288   if (addend & (alignment - 1))
4289     return false;
4290 
4291   if (GET_CODE (symref) == SYMBOL_REF)
4292     {
4293       /* s390_encode_section_info is not called for anchors, since they don't
4294 	 have corresponding VAR_DECLs.  Therefore, we cannot rely on
4295 	 SYMBOL_FLAG_NOTALIGN{2,4,8}_P returning useful information.  */
4296       if (SYMBOL_REF_ANCHOR_P (symref))
4297 	{
4298 	  HOST_WIDE_INT block_offset = SYMBOL_REF_BLOCK_OFFSET (symref);
4299 	  unsigned int block_alignment = (SYMBOL_REF_BLOCK (symref)->alignment
4300 					  / BITS_PER_UNIT);
4301 
4302 	  gcc_assert (block_offset >= 0);
4303 	  return ((block_offset & (alignment - 1)) == 0
4304 		  && block_alignment >= alignment);
4305 	}
4306 
4307       /* We have load-relative instructions for 2-byte, 4-byte, and
4308 	 8-byte alignment so allow only these.  */
4309       switch (alignment)
4310 	{
4311 	case 8:	return !SYMBOL_FLAG_NOTALIGN8_P (symref);
4312 	case 4:	return !SYMBOL_FLAG_NOTALIGN4_P (symref);
4313 	case 2:	return !SYMBOL_FLAG_NOTALIGN2_P (symref);
4314 	default: return false;
4315 	}
4316     }
4317 
4318   if (GET_CODE (symref) == UNSPEC
4319       && alignment <= UNITS_PER_LONG)
4320     return true;
4321 
4322   return false;
4323 }
4324 
4325 /* ADDR is moved into REG using larl.  If ADDR isn't a valid larl
4326    operand SCRATCH is used to reload the even part of the address and
4327    adding one.  */
4328 
4329 void
s390_reload_larl_operand(rtx reg,rtx addr,rtx scratch)4330 s390_reload_larl_operand (rtx reg, rtx addr, rtx scratch)
4331 {
4332   HOST_WIDE_INT addend;
4333   rtx symref;
4334 
4335   if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4336     gcc_unreachable ();
4337 
4338   if (!(addend & 1))
4339     /* Easy case.  The addend is even so larl will do fine.  */
4340     emit_move_insn (reg, addr);
4341   else
4342     {
4343       /* We can leave the scratch register untouched if the target
4344 	 register is a valid base register.  */
4345       if (REGNO (reg) < FIRST_PSEUDO_REGISTER
4346 	  && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS)
4347 	scratch = reg;
4348 
4349       gcc_assert (REGNO (scratch) < FIRST_PSEUDO_REGISTER);
4350       gcc_assert (REGNO_REG_CLASS (REGNO (scratch)) == ADDR_REGS);
4351 
4352       if (addend != 1)
4353 	emit_move_insn (scratch,
4354 			gen_rtx_CONST (Pmode,
4355 				       gen_rtx_PLUS (Pmode, symref,
4356 						     GEN_INT (addend - 1))));
4357       else
4358 	emit_move_insn (scratch, symref);
4359 
4360       /* Increment the address using la in order to avoid clobbering cc.  */
4361       s390_load_address (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx));
4362     }
4363 }
4364 
4365 /* Generate what is necessary to move between REG and MEM using
4366    SCRATCH.  The direction is given by TOMEM.  */
4367 
4368 void
s390_reload_symref_address(rtx reg,rtx mem,rtx scratch,bool tomem)4369 s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem)
4370 {
4371   /* Reload might have pulled a constant out of the literal pool.
4372      Force it back in.  */
4373   if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
4374       || GET_CODE (mem) == CONST_WIDE_INT
4375       || GET_CODE (mem) == CONST_VECTOR
4376       || GET_CODE (mem) == CONST)
4377     mem = force_const_mem (GET_MODE (reg), mem);
4378 
4379   gcc_assert (MEM_P (mem));
4380 
4381   /* For a load from memory we can leave the scratch register
4382      untouched if the target register is a valid base register.  */
4383   if (!tomem
4384       && REGNO (reg) < FIRST_PSEUDO_REGISTER
4385       && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS
4386       && GET_MODE (reg) == GET_MODE (scratch))
4387     scratch = reg;
4388 
4389   /* Load address into scratch register.  Since we can't have a
4390      secondary reload for a secondary reload we have to cover the case
4391      where larl would need a secondary reload here as well.  */
4392   s390_reload_larl_operand (scratch, XEXP (mem, 0), scratch);
4393 
4394   /* Now we can use a standard load/store to do the move.  */
4395   if (tomem)
4396     emit_move_insn (replace_equiv_address (mem, scratch), reg);
4397   else
4398     emit_move_insn (reg, replace_equiv_address (mem, scratch));
4399 }
4400 
4401 /* Inform reload about cases where moving X with a mode MODE to a register in
4402    RCLASS requires an extra scratch or immediate register.  Return the class
4403    needed for the immediate register.  */
4404 
4405 static reg_class_t
s390_secondary_reload(bool in_p,rtx x,reg_class_t rclass_i,machine_mode mode,secondary_reload_info * sri)4406 s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
4407 		       machine_mode mode, secondary_reload_info *sri)
4408 {
4409   enum reg_class rclass = (enum reg_class) rclass_i;
4410 
4411   /* Intermediate register needed.  */
4412   if (reg_classes_intersect_p (CC_REGS, rclass))
4413     return GENERAL_REGS;
4414 
4415   if (TARGET_VX)
4416     {
4417       /* The vst/vl vector move instructions allow only for short
4418 	 displacements.  */
4419       if (MEM_P (x)
4420 	  && GET_CODE (XEXP (x, 0)) == PLUS
4421 	  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4422 	  && !SHORT_DISP_IN_RANGE(INTVAL (XEXP (XEXP (x, 0), 1)))
4423 	  && reg_class_subset_p (rclass, VEC_REGS)
4424 	  && (!reg_class_subset_p (rclass, FP_REGS)
4425 	      || (GET_MODE_SIZE (mode) > 8
4426 		  && s390_class_max_nregs (FP_REGS, mode) == 1)))
4427 	{
4428 	  if (in_p)
4429 	    sri->icode = (TARGET_64BIT ?
4430 			  CODE_FOR_reloaddi_la_in :
4431 			  CODE_FOR_reloadsi_la_in);
4432 	  else
4433 	    sri->icode = (TARGET_64BIT ?
4434 			  CODE_FOR_reloaddi_la_out :
4435 			  CODE_FOR_reloadsi_la_out);
4436 	}
4437     }
4438 
4439   if (TARGET_Z10)
4440     {
4441       HOST_WIDE_INT offset;
4442       rtx symref;
4443 
4444       /* On z10 several optimizer steps may generate larl operands with
4445 	 an odd addend.  */
4446       if (in_p
4447 	  && s390_loadrelative_operand_p (x, &symref, &offset)
4448 	  && mode == Pmode
4449 	  && !SYMBOL_FLAG_NOTALIGN2_P (symref)
4450 	  && (offset & 1) == 1)
4451 	sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10
4452 		      : CODE_FOR_reloadsi_larl_odd_addend_z10);
4453 
4454       /* Handle all the (mem (symref)) accesses we cannot use the z10
4455 	 instructions for.  */
4456       if (MEM_P (x)
4457 	  && s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL)
4458 	  && (mode == QImode
4459 	      || !reg_class_subset_p (rclass, GENERAL_REGS)
4460 	      || GET_MODE_SIZE (mode) > UNITS_PER_WORD
4461 	      || !s390_check_symref_alignment (XEXP (x, 0),
4462 					       GET_MODE_SIZE (mode))))
4463 	{
4464 #define __SECONDARY_RELOAD_CASE(M,m)					\
4465 	  case E_##M##mode:						\
4466 	    if (TARGET_64BIT)						\
4467 	      sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 :	\
4468 				  CODE_FOR_reload##m##di_tomem_z10;	\
4469 	    else							\
4470 	      sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 :	\
4471 				  CODE_FOR_reload##m##si_tomem_z10;	\
4472 	  break;
4473 
4474 	  switch (GET_MODE (x))
4475 	    {
4476 	      __SECONDARY_RELOAD_CASE (QI, qi);
4477 	      __SECONDARY_RELOAD_CASE (HI, hi);
4478 	      __SECONDARY_RELOAD_CASE (SI, si);
4479 	      __SECONDARY_RELOAD_CASE (DI, di);
4480 	      __SECONDARY_RELOAD_CASE (TI, ti);
4481 	      __SECONDARY_RELOAD_CASE (SF, sf);
4482 	      __SECONDARY_RELOAD_CASE (DF, df);
4483 	      __SECONDARY_RELOAD_CASE (TF, tf);
4484 	      __SECONDARY_RELOAD_CASE (SD, sd);
4485 	      __SECONDARY_RELOAD_CASE (DD, dd);
4486 	      __SECONDARY_RELOAD_CASE (TD, td);
4487 	      __SECONDARY_RELOAD_CASE (V1QI, v1qi);
4488 	      __SECONDARY_RELOAD_CASE (V2QI, v2qi);
4489 	      __SECONDARY_RELOAD_CASE (V4QI, v4qi);
4490 	      __SECONDARY_RELOAD_CASE (V8QI, v8qi);
4491 	      __SECONDARY_RELOAD_CASE (V16QI, v16qi);
4492 	      __SECONDARY_RELOAD_CASE (V1HI, v1hi);
4493 	      __SECONDARY_RELOAD_CASE (V2HI, v2hi);
4494 	      __SECONDARY_RELOAD_CASE (V4HI, v4hi);
4495 	      __SECONDARY_RELOAD_CASE (V8HI, v8hi);
4496 	      __SECONDARY_RELOAD_CASE (V1SI, v1si);
4497 	      __SECONDARY_RELOAD_CASE (V2SI, v2si);
4498 	      __SECONDARY_RELOAD_CASE (V4SI, v4si);
4499 	      __SECONDARY_RELOAD_CASE (V1DI, v1di);
4500 	      __SECONDARY_RELOAD_CASE (V2DI, v2di);
4501 	      __SECONDARY_RELOAD_CASE (V1TI, v1ti);
4502 	      __SECONDARY_RELOAD_CASE (V1SF, v1sf);
4503 	      __SECONDARY_RELOAD_CASE (V2SF, v2sf);
4504 	      __SECONDARY_RELOAD_CASE (V4SF, v4sf);
4505 	      __SECONDARY_RELOAD_CASE (V1DF, v1df);
4506 	      __SECONDARY_RELOAD_CASE (V2DF, v2df);
4507 	      __SECONDARY_RELOAD_CASE (V1TF, v1tf);
4508 	    default:
4509 	      gcc_unreachable ();
4510 	    }
4511 #undef __SECONDARY_RELOAD_CASE
4512 	}
4513     }
4514 
4515   /* We need a scratch register when loading a PLUS expression which
4516      is not a legitimate operand of the LOAD ADDRESS instruction.  */
4517   /* LRA can deal with transformation of plus op very well -- so we
4518      don't need to prompt LRA in this case.  */
4519   if (! lra_in_progress && in_p && s390_plus_operand (x, mode))
4520     sri->icode = (TARGET_64BIT ?
4521 		  CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
4522 
4523   /* Performing a multiword move from or to memory we have to make sure the
4524      second chunk in memory is addressable without causing a displacement
4525      overflow.  If that would be the case we calculate the address in
4526      a scratch register.  */
4527   if (MEM_P (x)
4528       && GET_CODE (XEXP (x, 0)) == PLUS
4529       && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4530       && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1))
4531 			 + GET_MODE_SIZE (mode) - 1))
4532     {
4533       /* For GENERAL_REGS a displacement overflow is no problem if occurring
4534 	 in a s_operand address since we may fallback to lm/stm.  So we only
4535 	 have to care about overflows in the b+i+d case.  */
4536       if ((reg_classes_intersect_p (GENERAL_REGS, rclass)
4537 	   && s390_class_max_nregs (GENERAL_REGS, mode) > 1
4538 	   && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
4539 	  /* For FP_REGS no lm/stm is available so this check is triggered
4540 	     for displacement overflows in b+i+d and b+d like addresses.  */
4541 	  || (reg_classes_intersect_p (FP_REGS, rclass)
4542 	      && s390_class_max_nregs (FP_REGS, mode) > 1))
4543 	{
4544 	  if (in_p)
4545 	    sri->icode = (TARGET_64BIT ?
4546 			  CODE_FOR_reloaddi_la_in :
4547 			  CODE_FOR_reloadsi_la_in);
4548 	  else
4549 	    sri->icode = (TARGET_64BIT ?
4550 			  CODE_FOR_reloaddi_la_out :
4551 			  CODE_FOR_reloadsi_la_out);
4552 	}
4553     }
4554 
4555   /* A scratch address register is needed when a symbolic constant is
4556      copied to r0 compiling with -fPIC.  In other cases the target
4557      register might be used as temporary (see legitimize_pic_address).  */
4558   if (in_p && SYMBOLIC_CONST (x) && flag_pic == 2 && rclass != ADDR_REGS)
4559     sri->icode = (TARGET_64BIT ?
4560 		  CODE_FOR_reloaddi_PIC_addr :
4561 		  CODE_FOR_reloadsi_PIC_addr);
4562 
4563   /* Either scratch or no register needed.  */
4564   return NO_REGS;
4565 }
4566 
4567 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
4568 
4569    We need secondary memory to move data between GPRs and FPRs.
4570 
4571    - With DFP the ldgr lgdr instructions are available.  Due to the
4572      different alignment we cannot use them for SFmode.  For 31 bit a
4573      64 bit value in GPR would be a register pair so here we still
4574      need to go via memory.
4575 
4576    - With z13 we can do the SF/SImode moves with vlgvf.  Due to the
4577      overlapping of FPRs and VRs we still disallow TF/TD modes to be
4578      in full VRs so as before also on z13 we do these moves via
4579      memory.
4580 
4581      FIXME: Should we try splitting it into two vlgvg's/vlvg's instead?  */
4582 
4583 static bool
s390_secondary_memory_needed(machine_mode mode,reg_class_t class1,reg_class_t class2)4584 s390_secondary_memory_needed (machine_mode mode,
4585 			      reg_class_t class1, reg_class_t class2)
4586 {
4587   return (((reg_classes_intersect_p (class1, VEC_REGS)
4588 	    && reg_classes_intersect_p (class2, GENERAL_REGS))
4589 	   || (reg_classes_intersect_p (class1, GENERAL_REGS)
4590 	       && reg_classes_intersect_p (class2, VEC_REGS)))
4591 	  && (!TARGET_DFP || !TARGET_64BIT || GET_MODE_SIZE (mode) != 8)
4592 	  && (!TARGET_VX || (SCALAR_FLOAT_MODE_P (mode)
4593 			     && GET_MODE_SIZE (mode) > 8)));
4594 }
4595 
4596 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
4597 
4598    get_secondary_mem widens its argument to BITS_PER_WORD which loses on 64bit
4599    because the movsi and movsf patterns don't handle r/f moves.  */
4600 
4601 static machine_mode
s390_secondary_memory_needed_mode(machine_mode mode)4602 s390_secondary_memory_needed_mode (machine_mode mode)
4603 {
4604   if (GET_MODE_BITSIZE (mode) < 32)
4605     return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
4606   return mode;
4607 }
4608 
4609 /* Generate code to load SRC, which is PLUS that is not a
4610    legitimate operand for the LA instruction, into TARGET.
4611    SCRATCH may be used as scratch register.  */
4612 
4613 void
s390_expand_plus_operand(rtx target,rtx src,rtx scratch)4614 s390_expand_plus_operand (rtx target, rtx src,
4615 			  rtx scratch)
4616 {
4617   rtx sum1, sum2;
4618   struct s390_address ad;
4619 
4620   /* src must be a PLUS; get its two operands.  */
4621   gcc_assert (GET_CODE (src) == PLUS);
4622   gcc_assert (GET_MODE (src) == Pmode);
4623 
4624   /* Check if any of the two operands is already scheduled
4625      for replacement by reload.  This can happen e.g. when
4626      float registers occur in an address.  */
4627   sum1 = find_replacement (&XEXP (src, 0));
4628   sum2 = find_replacement (&XEXP (src, 1));
4629   src = gen_rtx_PLUS (Pmode, sum1, sum2);
4630 
4631   /* If the address is already strictly valid, there's nothing to do.  */
4632   if (!s390_decompose_address (src, &ad)
4633       || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4634       || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
4635     {
4636       /* Otherwise, one of the operands cannot be an address register;
4637 	 we reload its value into the scratch register.  */
4638       if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15)
4639 	{
4640 	  emit_move_insn (scratch, sum1);
4641 	  sum1 = scratch;
4642 	}
4643       if (true_regnum (sum2) < 1 || true_regnum (sum2) > 15)
4644 	{
4645 	  emit_move_insn (scratch, sum2);
4646 	  sum2 = scratch;
4647 	}
4648 
4649       /* According to the way these invalid addresses are generated
4650 	 in reload.c, it should never happen (at least on s390) that
4651 	 *neither* of the PLUS components, after find_replacements
4652 	 was applied, is an address register.  */
4653       if (sum1 == scratch && sum2 == scratch)
4654 	{
4655 	  debug_rtx (src);
4656 	  gcc_unreachable ();
4657 	}
4658 
4659       src = gen_rtx_PLUS (Pmode, sum1, sum2);
4660     }
4661 
4662   /* Emit the LOAD ADDRESS pattern.  Note that reload of PLUS
4663      is only ever performed on addresses, so we can mark the
4664      sum as legitimate for LA in any case.  */
4665   s390_load_address (target, src);
4666 }
4667 
4668 
4669 /* Return true if ADDR is a valid memory address.
4670    STRICT specifies whether strict register checking applies.  */
4671 
4672 static bool
s390_legitimate_address_p(machine_mode mode,rtx addr,bool strict)4673 s390_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4674 {
4675   struct s390_address ad;
4676 
4677   if (TARGET_Z10
4678       && larl_operand (addr, VOIDmode)
4679       && (mode == VOIDmode
4680 	  || s390_check_symref_alignment (addr, GET_MODE_SIZE (mode))))
4681     return true;
4682 
4683   if (!s390_decompose_address (addr, &ad))
4684     return false;
4685 
4686   /* The vector memory instructions only support short displacements.
4687      Reject invalid displacements early to prevent plenty of lay
4688      instructions to be generated later which then cannot be merged
4689      properly.  */
4690   if (TARGET_VX
4691       && VECTOR_MODE_P (mode)
4692       && ad.disp != NULL_RTX
4693       && CONST_INT_P (ad.disp)
4694       && !SHORT_DISP_IN_RANGE (INTVAL (ad.disp)))
4695     return false;
4696 
4697   if (strict)
4698     {
4699       if (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4700 	return false;
4701 
4702       if (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))
4703 	return false;
4704     }
4705   else
4706     {
4707       if (ad.base
4708 	  && !(REGNO (ad.base) >= FIRST_PSEUDO_REGISTER
4709 	       || REGNO_REG_CLASS (REGNO (ad.base)) == ADDR_REGS))
4710 	return false;
4711 
4712       if (ad.indx
4713 	  && !(REGNO (ad.indx) >= FIRST_PSEUDO_REGISTER
4714 	       || REGNO_REG_CLASS (REGNO (ad.indx)) == ADDR_REGS))
4715 	  return false;
4716     }
4717   return true;
4718 }
4719 
4720 /* Return true if OP is a valid operand for the LA instruction.
4721    In 31-bit, we need to prove that the result is used as an
4722    address, as LA performs only a 31-bit addition.  */
4723 
4724 bool
legitimate_la_operand_p(rtx op)4725 legitimate_la_operand_p (rtx op)
4726 {
4727   struct s390_address addr;
4728   if (!s390_decompose_address (op, &addr))
4729     return false;
4730 
4731   return (TARGET_64BIT || addr.pointer);
4732 }
4733 
4734 /* Return true if it is valid *and* preferable to use LA to
4735    compute the sum of OP1 and OP2.  */
4736 
4737 bool
preferred_la_operand_p(rtx op1,rtx op2)4738 preferred_la_operand_p (rtx op1, rtx op2)
4739 {
4740   struct s390_address addr;
4741 
4742   if (op2 != const0_rtx)
4743     op1 = gen_rtx_PLUS (Pmode, op1, op2);
4744 
4745   if (!s390_decompose_address (op1, &addr))
4746     return false;
4747   if (addr.base && !REGNO_OK_FOR_BASE_P (REGNO (addr.base)))
4748     return false;
4749   if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx)))
4750     return false;
4751 
4752   /* Avoid LA instructions with index (and base) register on z196 or
4753      later; it is preferable to use regular add instructions when
4754      possible.  Starting with zEC12 the la with index register is
4755      "uncracked" again but still slower than a regular add.  */
4756   if (addr.indx && s390_tune >= PROCESSOR_2817_Z196)
4757     return false;
4758 
4759   if (!TARGET_64BIT && !addr.pointer)
4760     return false;
4761 
4762   if (addr.pointer)
4763     return true;
4764 
4765   if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base))
4766       || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx)))
4767     return true;
4768 
4769   return false;
4770 }
4771 
4772 /* Emit a forced load-address operation to load SRC into DST.
4773    This will use the LOAD ADDRESS instruction even in situations
4774    where legitimate_la_operand_p (SRC) returns false.  */
4775 
4776 void
s390_load_address(rtx dst,rtx src)4777 s390_load_address (rtx dst, rtx src)
4778 {
4779   if (TARGET_64BIT)
4780     emit_move_insn (dst, src);
4781   else
4782     emit_insn (gen_force_la_31 (dst, src));
4783 }
4784 
4785 /* Return true if it ok to use SYMBOL_REF in a relative address.  */
4786 
4787 bool
s390_rel_address_ok_p(rtx symbol_ref)4788 s390_rel_address_ok_p (rtx symbol_ref)
4789 {
4790   tree decl;
4791 
4792   if (symbol_ref == s390_got_symbol () || CONSTANT_POOL_ADDRESS_P (symbol_ref))
4793     return true;
4794 
4795   decl = SYMBOL_REF_DECL (symbol_ref);
4796 
4797   if (!flag_pic || SYMBOL_REF_LOCAL_P (symbol_ref))
4798     return (s390_pic_data_is_text_relative
4799 	    || (decl
4800 		&& TREE_CODE (decl) == FUNCTION_DECL));
4801 
4802   return false;
4803 }
4804 
4805 /* Return a legitimate reference for ORIG (an address) using the
4806    register REG.  If REG is 0, a new pseudo is generated.
4807 
4808    There are two types of references that must be handled:
4809 
4810    1. Global data references must load the address from the GOT, via
4811       the PIC reg.  An insn is emitted to do this load, and the reg is
4812       returned.
4813 
4814    2. Static data references, constant pool addresses, and code labels
4815       compute the address as an offset from the GOT, whose base is in
4816       the PIC reg.  Static data objects have SYMBOL_FLAG_LOCAL set to
4817       differentiate them from global data objects.  The returned
4818       address is the PIC reg + an unspec constant.
4819 
4820    TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
4821    reg also appears in the address.  */
4822 
4823 rtx
legitimize_pic_address(rtx orig,rtx reg)4824 legitimize_pic_address (rtx orig, rtx reg)
4825 {
4826   rtx addr = orig;
4827   rtx addend = const0_rtx;
4828   rtx new_rtx = orig;
4829 
4830   gcc_assert (!TLS_SYMBOLIC_CONST (addr));
4831 
4832   if (GET_CODE (addr) == CONST)
4833     addr = XEXP (addr, 0);
4834 
4835   if (GET_CODE (addr) == PLUS)
4836     {
4837       addend = XEXP (addr, 1);
4838       addr = XEXP (addr, 0);
4839     }
4840 
4841   if ((GET_CODE (addr) == LABEL_REF
4842        || (SYMBOL_REF_P (addr) && s390_rel_address_ok_p (addr))
4843        || (GET_CODE (addr) == UNSPEC &&
4844 	   (XINT (addr, 1) == UNSPEC_GOTENT
4845 	    || XINT (addr, 1) == UNSPEC_PLT)))
4846       && GET_CODE (addend) == CONST_INT)
4847     {
4848       /* This can be locally addressed.  */
4849 
4850       /* larl_operand requires UNSPECs to be wrapped in a const rtx.  */
4851       rtx const_addr = (GET_CODE (addr) == UNSPEC ?
4852 			gen_rtx_CONST (Pmode, addr) : addr);
4853 
4854       if (larl_operand (const_addr, VOIDmode)
4855 	  && INTVAL (addend) < HOST_WIDE_INT_1 << 31
4856 	  && INTVAL (addend) >= -(HOST_WIDE_INT_1 << 31))
4857 	{
4858 	  if (INTVAL (addend) & 1)
4859 	    {
4860 	      /* LARL can't handle odd offsets, so emit a pair of LARL
4861 		 and LA.  */
4862 	      rtx temp = reg? reg : gen_reg_rtx (Pmode);
4863 
4864 	      if (!DISP_IN_RANGE (INTVAL (addend)))
4865 		{
4866 		  HOST_WIDE_INT even = INTVAL (addend) - 1;
4867 		  addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (even));
4868 		  addr = gen_rtx_CONST (Pmode, addr);
4869 		  addend = const1_rtx;
4870 		}
4871 
4872 	      emit_move_insn (temp, addr);
4873 	      new_rtx = gen_rtx_PLUS (Pmode, temp, addend);
4874 
4875 	      if (reg != 0)
4876 		{
4877 		  s390_load_address (reg, new_rtx);
4878 		  new_rtx = reg;
4879 		}
4880 	    }
4881 	  else
4882 	    {
4883 	      /* If the offset is even, we can just use LARL.  This
4884 		 will happen automatically.  */
4885 	    }
4886 	}
4887       else
4888 	{
4889 	  /* No larl - Access local symbols relative to the GOT.  */
4890 
4891 	  rtx temp = reg? reg : gen_reg_rtx (Pmode);
4892 
4893 	  if (reload_in_progress || reload_completed)
4894 	    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4895 
4896 	  addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
4897 	  if (addend != const0_rtx)
4898 	    addr = gen_rtx_PLUS (Pmode, addr, addend);
4899 	  addr = gen_rtx_CONST (Pmode, addr);
4900 	  addr = force_const_mem (Pmode, addr);
4901 	  emit_move_insn (temp, addr);
4902 
4903 	  new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4904 	  if (reg != 0)
4905 	    {
4906 	      s390_load_address (reg, new_rtx);
4907 	      new_rtx = reg;
4908 	    }
4909 	}
4910     }
4911   else if (GET_CODE (addr) == SYMBOL_REF && addend == const0_rtx)
4912     {
4913       /* A non-local symbol reference without addend.
4914 
4915 	 The symbol ref is wrapped into an UNSPEC to make sure the
4916 	 proper operand modifier (@GOT or @GOTENT) will be emitted.
4917 	 This will tell the linker to put the symbol into the GOT.
4918 
4919 	 Additionally the code dereferencing the GOT slot is emitted here.
4920 
4921 	 An addend to the symref needs to be added afterwards.
4922 	 legitimize_pic_address calls itself recursively to handle
4923 	 that case.  So no need to do it here.  */
4924 
4925       if (reg == 0)
4926 	reg = gen_reg_rtx (Pmode);
4927 
4928       if (TARGET_Z10)
4929 	{
4930 	  /* Use load relative if possible.
4931 	     lgrl <target>, sym@GOTENT  */
4932 	  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4933 	  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4934 	  new_rtx = gen_const_mem (GET_MODE (reg), new_rtx);
4935 
4936 	  emit_move_insn (reg, new_rtx);
4937 	  new_rtx = reg;
4938 	}
4939       else if (flag_pic == 1)
4940 	{
4941 	  /* Assume GOT offset is a valid displacement operand (< 4k
4942 	     or < 512k with z990).  This is handled the same way in
4943 	     both 31- and 64-bit code (@GOT).
4944 	     lg <target>, sym@GOT(r12)  */
4945 
4946 	  if (reload_in_progress || reload_completed)
4947 	    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4948 
4949 	  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
4950 	  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4951 	  new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
4952 	  new_rtx = gen_const_mem (Pmode, new_rtx);
4953 	  emit_move_insn (reg, new_rtx);
4954 	  new_rtx = reg;
4955 	}
4956       else
4957 	{
4958 	  /* If the GOT offset might be >= 4k, we determine the position
4959 	     of the GOT entry via a PC-relative LARL (@GOTENT).
4960 	     larl temp, sym@GOTENT
4961 	     lg   <target>, 0(temp) */
4962 
4963 	  rtx temp = reg ? reg : gen_reg_rtx (Pmode);
4964 
4965 	  gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
4966 		      || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
4967 
4968 	  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4969 	  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4970 	  emit_move_insn (temp, new_rtx);
4971 	  new_rtx = gen_const_mem (Pmode, temp);
4972 	  emit_move_insn (reg, new_rtx);
4973 
4974 	  new_rtx = reg;
4975 	}
4976     }
4977   else if (GET_CODE (addr) == UNSPEC && GET_CODE (addend) == CONST_INT)
4978     {
4979       gcc_assert (XVECLEN (addr, 0) == 1);
4980       switch (XINT (addr, 1))
4981 	{
4982 	  /* These address symbols (or PLT slots) relative to the GOT
4983 	     (not GOT slots!).  In general this will exceed the
4984 	     displacement range so these value belong into the literal
4985 	     pool.  */
4986 	case UNSPEC_GOTOFF:
4987 	case UNSPEC_PLTOFF:
4988 	  new_rtx = force_const_mem (Pmode, orig);
4989 	  break;
4990 
4991 	  /* For -fPIC the GOT size might exceed the displacement
4992 	     range so make sure the value is in the literal pool.  */
4993 	case UNSPEC_GOT:
4994 	  if (flag_pic == 2)
4995 	    new_rtx = force_const_mem (Pmode, orig);
4996 	  break;
4997 
4998 	  /* For @GOTENT larl is used.  This is handled like local
4999 	     symbol refs.  */
5000 	case UNSPEC_GOTENT:
5001 	  gcc_unreachable ();
5002 	  break;
5003 
5004 	  /* For @PLT larl is used.  This is handled like local
5005 	     symbol refs.  */
5006 	case UNSPEC_PLT:
5007 	  gcc_unreachable ();
5008 	  break;
5009 
5010 	  /* Everything else cannot happen.  */
5011 	default:
5012 	  gcc_unreachable ();
5013 	}
5014     }
5015   else if (addend != const0_rtx)
5016     {
5017       /* Otherwise, compute the sum.  */
5018 
5019       rtx base = legitimize_pic_address (addr, reg);
5020       new_rtx  = legitimize_pic_address (addend,
5021 					 base == reg ? NULL_RTX : reg);
5022       if (GET_CODE (new_rtx) == CONST_INT)
5023 	new_rtx = plus_constant (Pmode, base, INTVAL (new_rtx));
5024       else
5025 	{
5026 	  if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
5027 	    {
5028 	      base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
5029 	      new_rtx = XEXP (new_rtx, 1);
5030 	    }
5031 	  new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
5032 	}
5033 
5034       if (GET_CODE (new_rtx) == CONST)
5035 	new_rtx = XEXP (new_rtx, 0);
5036       new_rtx = force_operand (new_rtx, 0);
5037     }
5038 
5039   return new_rtx;
5040 }
5041 
5042 /* Load the thread pointer into a register.  */
5043 
5044 rtx
s390_get_thread_pointer(void)5045 s390_get_thread_pointer (void)
5046 {
5047   rtx tp = gen_reg_rtx (Pmode);
5048 
5049   emit_move_insn (tp, gen_rtx_REG (Pmode, TP_REGNUM));
5050   mark_reg_pointer (tp, BITS_PER_WORD);
5051 
5052   return tp;
5053 }
5054 
5055 /* Emit a tls call insn. The call target is the SYMBOL_REF stored
5056    in s390_tls_symbol which always refers to __tls_get_offset.
5057    The returned offset is written to RESULT_REG and an USE rtx is
5058    generated for TLS_CALL.  */
5059 
5060 static GTY(()) rtx s390_tls_symbol;
5061 
5062 static void
s390_emit_tls_call_insn(rtx result_reg,rtx tls_call)5063 s390_emit_tls_call_insn (rtx result_reg, rtx tls_call)
5064 {
5065   rtx insn;
5066 
5067   if (!flag_pic)
5068     emit_insn (s390_load_got ());
5069 
5070   if (!s390_tls_symbol)
5071     s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset");
5072 
5073   insn = s390_emit_call (s390_tls_symbol, tls_call, result_reg,
5074 			 gen_rtx_REG (Pmode, RETURN_REGNUM));
5075 
5076   use_reg (&CALL_INSN_FUNCTION_USAGE (insn), result_reg);
5077   RTL_CONST_CALL_P (insn) = 1;
5078 }
5079 
5080 /* ADDR contains a thread-local SYMBOL_REF.  Generate code to compute
5081    this (thread-local) address.  REG may be used as temporary.  */
5082 
5083 static rtx
legitimize_tls_address(rtx addr,rtx reg)5084 legitimize_tls_address (rtx addr, rtx reg)
5085 {
5086   rtx new_rtx, tls_call, temp, base, r2;
5087   rtx_insn *insn;
5088 
5089   if (GET_CODE (addr) == SYMBOL_REF)
5090     switch (tls_symbolic_operand (addr))
5091       {
5092       case TLS_MODEL_GLOBAL_DYNAMIC:
5093 	start_sequence ();
5094 	r2 = gen_rtx_REG (Pmode, 2);
5095 	tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD);
5096 	new_rtx = gen_rtx_CONST (Pmode, tls_call);
5097 	new_rtx = force_const_mem (Pmode, new_rtx);
5098 	emit_move_insn (r2, new_rtx);
5099 	s390_emit_tls_call_insn (r2, tls_call);
5100 	insn = get_insns ();
5101 	end_sequence ();
5102 
5103 	new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
5104 	temp = gen_reg_rtx (Pmode);
5105 	emit_libcall_block (insn, temp, r2, new_rtx);
5106 
5107 	new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5108 	if (reg != 0)
5109 	  {
5110 	    s390_load_address (reg, new_rtx);
5111 	    new_rtx = reg;
5112 	  }
5113 	break;
5114 
5115       case TLS_MODEL_LOCAL_DYNAMIC:
5116 	start_sequence ();
5117 	r2 = gen_rtx_REG (Pmode, 2);
5118 	tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM);
5119 	new_rtx = gen_rtx_CONST (Pmode, tls_call);
5120 	new_rtx = force_const_mem (Pmode, new_rtx);
5121 	emit_move_insn (r2, new_rtx);
5122 	s390_emit_tls_call_insn (r2, tls_call);
5123 	insn = get_insns ();
5124 	end_sequence ();
5125 
5126 	new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
5127 	temp = gen_reg_rtx (Pmode);
5128 	emit_libcall_block (insn, temp, r2, new_rtx);
5129 
5130 	new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5131 	base = gen_reg_rtx (Pmode);
5132 	s390_load_address (base, new_rtx);
5133 
5134 	new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF);
5135 	new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5136 	new_rtx = force_const_mem (Pmode, new_rtx);
5137 	temp = gen_reg_rtx (Pmode);
5138 	emit_move_insn (temp, new_rtx);
5139 
5140 	new_rtx = gen_rtx_PLUS (Pmode, base, temp);
5141 	if (reg != 0)
5142 	  {
5143 	    s390_load_address (reg, new_rtx);
5144 	    new_rtx = reg;
5145 	  }
5146 	break;
5147 
5148       case TLS_MODEL_INITIAL_EXEC:
5149 	if (flag_pic == 1)
5150 	  {
5151 	    /* Assume GOT offset < 4k.  This is handled the same way
5152 	       in both 31- and 64-bit code.  */
5153 
5154 	    if (reload_in_progress || reload_completed)
5155 	      df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
5156 
5157 	    new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
5158 	    new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5159 	    new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
5160 	    new_rtx = gen_const_mem (Pmode, new_rtx);
5161 	    temp = gen_reg_rtx (Pmode);
5162 	    emit_move_insn (temp, new_rtx);
5163 	  }
5164 	else
5165 	  {
5166 	    /* If the GOT offset might be >= 4k, we determine the position
5167 	       of the GOT entry via a PC-relative LARL.  */
5168 
5169 	    new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
5170 	    new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5171 	    temp = gen_reg_rtx (Pmode);
5172 	    emit_move_insn (temp, new_rtx);
5173 
5174 	    new_rtx = gen_const_mem (Pmode, temp);
5175 	    temp = gen_reg_rtx (Pmode);
5176 	    emit_move_insn (temp, new_rtx);
5177 	  }
5178 
5179 	new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5180 	if (reg != 0)
5181 	  {
5182 	    s390_load_address (reg, new_rtx);
5183 	    new_rtx = reg;
5184 	  }
5185 	break;
5186 
5187       case TLS_MODEL_LOCAL_EXEC:
5188 	new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
5189 	new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5190 	new_rtx = force_const_mem (Pmode, new_rtx);
5191 	temp = gen_reg_rtx (Pmode);
5192 	emit_move_insn (temp, new_rtx);
5193 
5194 	new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5195 	if (reg != 0)
5196 	  {
5197 	    s390_load_address (reg, new_rtx);
5198 	    new_rtx = reg;
5199 	  }
5200 	break;
5201 
5202       default:
5203 	gcc_unreachable ();
5204       }
5205 
5206   else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == UNSPEC)
5207     {
5208       switch (XINT (XEXP (addr, 0), 1))
5209 	{
5210 	case UNSPEC_INDNTPOFF:
5211 	  new_rtx = addr;
5212 	  break;
5213 
5214 	default:
5215 	  gcc_unreachable ();
5216 	}
5217     }
5218 
5219   else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
5220 	   && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
5221     {
5222       new_rtx = XEXP (XEXP (addr, 0), 0);
5223       if (GET_CODE (new_rtx) != SYMBOL_REF)
5224 	new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5225 
5226       new_rtx = legitimize_tls_address (new_rtx, reg);
5227       new_rtx = plus_constant (Pmode, new_rtx,
5228 			       INTVAL (XEXP (XEXP (addr, 0), 1)));
5229       new_rtx = force_operand (new_rtx, 0);
5230     }
5231 
5232   else
5233     gcc_unreachable ();  /* for now ... */
5234 
5235   return new_rtx;
5236 }
5237 
5238 /* Emit insns making the address in operands[1] valid for a standard
5239    move to operands[0].  operands[1] is replaced by an address which
5240    should be used instead of the former RTX to emit the move
5241    pattern.  */
5242 
5243 void
emit_symbolic_move(rtx * operands)5244 emit_symbolic_move (rtx *operands)
5245 {
5246   rtx temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
5247 
5248   if (GET_CODE (operands[0]) == MEM)
5249     operands[1] = force_reg (Pmode, operands[1]);
5250   else if (TLS_SYMBOLIC_CONST (operands[1]))
5251     operands[1] = legitimize_tls_address (operands[1], temp);
5252   else if (flag_pic)
5253     operands[1] = legitimize_pic_address (operands[1], temp);
5254 }
5255 
5256 /* Try machine-dependent ways of modifying an illegitimate address X
5257    to be legitimate.  If we find one, return the new, valid address.
5258 
5259    OLDX is the address as it was before break_out_memory_refs was called.
5260    In some cases it is useful to look at this to decide what needs to be done.
5261 
5262    MODE is the mode of the operand pointed to by X.
5263 
5264    When -fpic is used, special handling is needed for symbolic references.
5265    See comments by legitimize_pic_address for details.  */
5266 
5267 static rtx
s390_legitimize_address(rtx x,rtx oldx ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED)5268 s390_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
5269 			 machine_mode mode ATTRIBUTE_UNUSED)
5270 {
5271   rtx constant_term = const0_rtx;
5272 
5273   if (TLS_SYMBOLIC_CONST (x))
5274     {
5275       x = legitimize_tls_address (x, 0);
5276 
5277       if (s390_legitimate_address_p (mode, x, FALSE))
5278 	return x;
5279     }
5280   else if (GET_CODE (x) == PLUS
5281 	   && (TLS_SYMBOLIC_CONST (XEXP (x, 0))
5282 	       || TLS_SYMBOLIC_CONST (XEXP (x, 1))))
5283     {
5284       return x;
5285     }
5286   else if (flag_pic)
5287     {
5288       if (SYMBOLIC_CONST (x)
5289 	  || (GET_CODE (x) == PLUS
5290 	      && (SYMBOLIC_CONST (XEXP (x, 0))
5291 		  || SYMBOLIC_CONST (XEXP (x, 1)))))
5292 	  x = legitimize_pic_address (x, 0);
5293 
5294       if (s390_legitimate_address_p (mode, x, FALSE))
5295 	return x;
5296     }
5297 
5298   x = eliminate_constant_term (x, &constant_term);
5299 
5300   /* Optimize loading of large displacements by splitting them
5301      into the multiple of 4K and the rest; this allows the
5302      former to be CSE'd if possible.
5303 
5304      Don't do this if the displacement is added to a register
5305      pointing into the stack frame, as the offsets will
5306      change later anyway.  */
5307 
5308   if (GET_CODE (constant_term) == CONST_INT
5309       && !TARGET_LONG_DISPLACEMENT
5310       && !DISP_IN_RANGE (INTVAL (constant_term))
5311       && !(REG_P (x) && REGNO_PTR_FRAME_P (REGNO (x))))
5312     {
5313       HOST_WIDE_INT lower = INTVAL (constant_term) & 0xfff;
5314       HOST_WIDE_INT upper = INTVAL (constant_term) ^ lower;
5315 
5316       rtx temp = gen_reg_rtx (Pmode);
5317       rtx val  = force_operand (GEN_INT (upper), temp);
5318       if (val != temp)
5319 	emit_move_insn (temp, val);
5320 
5321       x = gen_rtx_PLUS (Pmode, x, temp);
5322       constant_term = GEN_INT (lower);
5323     }
5324 
5325   if (GET_CODE (x) == PLUS)
5326     {
5327       if (GET_CODE (XEXP (x, 0)) == REG)
5328 	{
5329 	  rtx temp = gen_reg_rtx (Pmode);
5330 	  rtx val  = force_operand (XEXP (x, 1), temp);
5331 	  if (val != temp)
5332 	    emit_move_insn (temp, val);
5333 
5334 	  x = gen_rtx_PLUS (Pmode, XEXP (x, 0), temp);
5335 	}
5336 
5337       else if (GET_CODE (XEXP (x, 1)) == REG)
5338 	{
5339 	  rtx temp = gen_reg_rtx (Pmode);
5340 	  rtx val  = force_operand (XEXP (x, 0), temp);
5341 	  if (val != temp)
5342 	    emit_move_insn (temp, val);
5343 
5344 	  x = gen_rtx_PLUS (Pmode, temp, XEXP (x, 1));
5345 	}
5346     }
5347 
5348   if (constant_term != const0_rtx)
5349     x = gen_rtx_PLUS (Pmode, x, constant_term);
5350 
5351   return x;
5352 }
5353 
5354 /* Try a machine-dependent way of reloading an illegitimate address AD
5355    operand.  If we find one, push the reload and return the new address.
5356 
5357    MODE is the mode of the enclosing MEM.  OPNUM is the operand number
5358    and TYPE is the reload type of the current reload.  */
5359 
5360 rtx
legitimize_reload_address(rtx ad,machine_mode mode ATTRIBUTE_UNUSED,int opnum,int type)5361 legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
5362 			   int opnum, int type)
5363 {
5364   if (!optimize || TARGET_LONG_DISPLACEMENT)
5365     return NULL_RTX;
5366 
5367   if (GET_CODE (ad) == PLUS)
5368     {
5369       rtx tem = simplify_binary_operation (PLUS, Pmode,
5370 					   XEXP (ad, 0), XEXP (ad, 1));
5371       if (tem)
5372 	ad = tem;
5373     }
5374 
5375   if (GET_CODE (ad) == PLUS
5376       && GET_CODE (XEXP (ad, 0)) == REG
5377       && GET_CODE (XEXP (ad, 1)) == CONST_INT
5378       && !DISP_IN_RANGE (INTVAL (XEXP (ad, 1))))
5379     {
5380       HOST_WIDE_INT lower = INTVAL (XEXP (ad, 1)) & 0xfff;
5381       HOST_WIDE_INT upper = INTVAL (XEXP (ad, 1)) ^ lower;
5382       rtx cst, tem, new_rtx;
5383 
5384       cst = GEN_INT (upper);
5385       if (!legitimate_reload_constant_p (cst))
5386 	cst = force_const_mem (Pmode, cst);
5387 
5388       tem = gen_rtx_PLUS (Pmode, XEXP (ad, 0), cst);
5389       new_rtx = gen_rtx_PLUS (Pmode, tem, GEN_INT (lower));
5390 
5391       push_reload (XEXP (tem, 1), 0, &XEXP (tem, 1), 0,
5392 		   BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
5393 		   opnum, (enum reload_type) type);
5394       return new_rtx;
5395     }
5396 
5397   return NULL_RTX;
5398 }
5399 
5400 /* Emit code to move LEN bytes from DST to SRC.  */
5401 
5402 bool
s390_expand_movmem(rtx dst,rtx src,rtx len)5403 s390_expand_movmem (rtx dst, rtx src, rtx len)
5404 {
5405   /* When tuning for z10 or higher we rely on the Glibc functions to
5406      do the right thing. Only for constant lengths below 64k we will
5407      generate inline code.  */
5408   if (s390_tune >= PROCESSOR_2097_Z10
5409       && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5410     return false;
5411 
5412   /* Expand memcpy for constant length operands without a loop if it
5413      is shorter that way.
5414 
5415      With a constant length argument a
5416      memcpy loop (without pfd) is 36 bytes -> 6 * mvc  */
5417   if (GET_CODE (len) == CONST_INT
5418       && INTVAL (len) >= 0
5419       && INTVAL (len) <= 256 * 6
5420       && (!TARGET_MVCLE || INTVAL (len) <= 256))
5421     {
5422       HOST_WIDE_INT o, l;
5423 
5424       for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
5425 	{
5426 	  rtx newdst = adjust_address (dst, BLKmode, o);
5427 	  rtx newsrc = adjust_address (src, BLKmode, o);
5428 	  emit_insn (gen_movmem_short (newdst, newsrc,
5429 				       GEN_INT (l > 256 ? 255 : l - 1)));
5430 	}
5431     }
5432 
5433   else if (TARGET_MVCLE)
5434     {
5435       emit_insn (gen_movmem_long (dst, src, convert_to_mode (Pmode, len, 1)));
5436     }
5437 
5438   else
5439     {
5440       rtx dst_addr, src_addr, count, blocks, temp;
5441       rtx_code_label *loop_start_label = gen_label_rtx ();
5442       rtx_code_label *loop_end_label = gen_label_rtx ();
5443       rtx_code_label *end_label = gen_label_rtx ();
5444       machine_mode mode;
5445 
5446       mode = GET_MODE (len);
5447       if (mode == VOIDmode)
5448 	mode = Pmode;
5449 
5450       dst_addr = gen_reg_rtx (Pmode);
5451       src_addr = gen_reg_rtx (Pmode);
5452       count = gen_reg_rtx (mode);
5453       blocks = gen_reg_rtx (mode);
5454 
5455       convert_move (count, len, 1);
5456       emit_cmp_and_jump_insns (count, const0_rtx,
5457 			       EQ, NULL_RTX, mode, 1, end_label);
5458 
5459       emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5460       emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX));
5461       dst = change_address (dst, VOIDmode, dst_addr);
5462       src = change_address (src, VOIDmode, src_addr);
5463 
5464       temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5465 			   OPTAB_DIRECT);
5466       if (temp != count)
5467 	emit_move_insn (count, temp);
5468 
5469       temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5470 			   OPTAB_DIRECT);
5471       if (temp != blocks)
5472 	emit_move_insn (blocks, temp);
5473 
5474       emit_cmp_and_jump_insns (blocks, const0_rtx,
5475 			       EQ, NULL_RTX, mode, 1, loop_end_label);
5476 
5477       emit_label (loop_start_label);
5478 
5479       if (TARGET_Z10
5480 	  && (GET_CODE (len) != CONST_INT || INTVAL (len) > 768))
5481 	{
5482 	  rtx prefetch;
5483 
5484 	  /* Issue a read prefetch for the +3 cache line.  */
5485 	  prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, src_addr, GEN_INT (768)),
5486 				   const0_rtx, const0_rtx);
5487 	  PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5488 	  emit_insn (prefetch);
5489 
5490 	  /* Issue a write prefetch for the +3 cache line.  */
5491 	  prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (768)),
5492 				   const1_rtx, const0_rtx);
5493 	  PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5494 	  emit_insn (prefetch);
5495 	}
5496 
5497       emit_insn (gen_movmem_short (dst, src, GEN_INT (255)));
5498       s390_load_address (dst_addr,
5499 			 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5500       s390_load_address (src_addr,
5501 			 gen_rtx_PLUS (Pmode, src_addr, GEN_INT (256)));
5502 
5503       temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5504 			   OPTAB_DIRECT);
5505       if (temp != blocks)
5506 	emit_move_insn (blocks, temp);
5507 
5508       emit_cmp_and_jump_insns (blocks, const0_rtx,
5509 			       EQ, NULL_RTX, mode, 1, loop_end_label);
5510 
5511       emit_jump (loop_start_label);
5512       emit_label (loop_end_label);
5513 
5514       emit_insn (gen_movmem_short (dst, src,
5515 				   convert_to_mode (Pmode, count, 1)));
5516       emit_label (end_label);
5517     }
5518   return true;
5519 }
5520 
5521 /* Emit code to set LEN bytes at DST to VAL.
5522    Make use of clrmem if VAL is zero.  */
5523 
5524 void
s390_expand_setmem(rtx dst,rtx len,rtx val)5525 s390_expand_setmem (rtx dst, rtx len, rtx val)
5526 {
5527   if (GET_CODE (len) == CONST_INT && INTVAL (len) <= 0)
5528     return;
5529 
5530   gcc_assert (GET_CODE (val) == CONST_INT || GET_MODE (val) == QImode);
5531 
5532   /* Expand setmem/clrmem for a constant length operand without a
5533      loop if it will be shorter that way.
5534      clrmem loop (with PFD)    is 30 bytes -> 5 * xc
5535      clrmem loop (without PFD) is 24 bytes -> 4 * xc
5536      setmem loop (with PFD)    is 38 bytes -> ~4 * (mvi/stc + mvc)
5537      setmem loop (without PFD) is 32 bytes -> ~4 * (mvi/stc + mvc) */
5538   if (GET_CODE (len) == CONST_INT
5539       && ((val == const0_rtx
5540 	   && (INTVAL (len) <= 256 * 4
5541 	       || (INTVAL (len) <= 256 * 5 && TARGET_SETMEM_PFD(val,len))))
5542 	  || (val != const0_rtx && INTVAL (len) <= 257 * 4))
5543       && (!TARGET_MVCLE || INTVAL (len) <= 256))
5544     {
5545       HOST_WIDE_INT o, l;
5546 
5547       if (val == const0_rtx)
5548 	/* clrmem: emit 256 byte blockwise XCs.  */
5549 	for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
5550 	  {
5551 	    rtx newdst = adjust_address (dst, BLKmode, o);
5552 	    emit_insn (gen_clrmem_short (newdst,
5553 					 GEN_INT (l > 256 ? 255 : l - 1)));
5554 	  }
5555       else
5556 	/* setmem: emit 1(mvi) + 256(mvc) byte blockwise memsets by
5557 	   setting first byte to val and using a 256 byte mvc with one
5558 	   byte overlap to propagate the byte.  */
5559 	for (l = INTVAL (len), o = 0; l > 0; l -= 257, o += 257)
5560 	  {
5561 	    rtx newdst = adjust_address (dst, BLKmode, o);
5562 	    emit_move_insn (adjust_address (dst, QImode, o), val);
5563 	    if (l > 1)
5564 	      {
5565 		rtx newdstp1 = adjust_address (dst, BLKmode, o + 1);
5566 		emit_insn (gen_movmem_short (newdstp1, newdst,
5567 					     GEN_INT (l > 257 ? 255 : l - 2)));
5568 	      }
5569 	  }
5570     }
5571 
5572   else if (TARGET_MVCLE)
5573     {
5574       val = force_not_mem (convert_modes (Pmode, QImode, val, 1));
5575       if (TARGET_64BIT)
5576 	emit_insn (gen_setmem_long_di (dst, convert_to_mode (Pmode, len, 1),
5577 				       val));
5578       else
5579 	emit_insn (gen_setmem_long_si (dst, convert_to_mode (Pmode, len, 1),
5580 				       val));
5581     }
5582 
5583   else
5584     {
5585       rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX;
5586       rtx_code_label *loop_start_label = gen_label_rtx ();
5587       rtx_code_label *onebyte_end_label = gen_label_rtx ();
5588       rtx_code_label *zerobyte_end_label = gen_label_rtx ();
5589       rtx_code_label *restbyte_end_label = gen_label_rtx ();
5590       machine_mode mode;
5591 
5592       mode = GET_MODE (len);
5593       if (mode == VOIDmode)
5594 	mode = Pmode;
5595 
5596       dst_addr = gen_reg_rtx (Pmode);
5597       count = gen_reg_rtx (mode);
5598       blocks = gen_reg_rtx (mode);
5599 
5600       convert_move (count, len, 1);
5601       emit_cmp_and_jump_insns (count, const0_rtx,
5602 			       EQ, NULL_RTX, mode, 1, zerobyte_end_label,
5603 			       profile_probability::very_unlikely ());
5604 
5605       /* We need to make a copy of the target address since memset is
5606 	 supposed to return it unmodified.  We have to make it here
5607 	 already since the new reg is used at onebyte_end_label.  */
5608       emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5609       dst = change_address (dst, VOIDmode, dst_addr);
5610 
5611       if (val != const0_rtx)
5612 	{
5613 	  /* When using the overlapping mvc the original target
5614 	     address is only accessed as single byte entity (even by
5615 	     the mvc reading this value).  */
5616 	  set_mem_size (dst, 1);
5617 	  dstp1 = adjust_address (dst, VOIDmode, 1);
5618 	  emit_cmp_and_jump_insns (count,
5619 				   const1_rtx, EQ, NULL_RTX, mode, 1,
5620 				   onebyte_end_label,
5621 				   profile_probability::very_unlikely ());
5622 	}
5623 
5624       /* There is one unconditional (mvi+mvc)/xc after the loop
5625 	 dealing with the rest of the bytes, subtracting two (mvi+mvc)
5626 	 or one (xc) here leaves this number of bytes to be handled by
5627 	 it.  */
5628       temp = expand_binop (mode, add_optab, count,
5629 			   val == const0_rtx ? constm1_rtx : GEN_INT (-2),
5630 			   count, 1, OPTAB_DIRECT);
5631       if (temp != count)
5632 	emit_move_insn (count, temp);
5633 
5634       temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5635 			   OPTAB_DIRECT);
5636       if (temp != blocks)
5637 	emit_move_insn (blocks, temp);
5638 
5639       emit_cmp_and_jump_insns (blocks, const0_rtx,
5640 			       EQ, NULL_RTX, mode, 1, restbyte_end_label);
5641 
5642       emit_jump (loop_start_label);
5643 
5644       if (val != const0_rtx)
5645 	{
5646 	  /* The 1 byte != 0 special case.  Not handled efficiently
5647 	     since we require two jumps for that.  However, this
5648 	     should be very rare.  */
5649 	  emit_label (onebyte_end_label);
5650 	  emit_move_insn (adjust_address (dst, QImode, 0), val);
5651 	  emit_jump (zerobyte_end_label);
5652 	}
5653 
5654       emit_label (loop_start_label);
5655 
5656       if (TARGET_SETMEM_PFD (val, len))
5657 	{
5658 	  /* Issue a write prefetch.  */
5659 	  rtx distance = GEN_INT (TARGET_SETMEM_PREFETCH_DISTANCE);
5660 	  rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, distance),
5661 				       const1_rtx, const0_rtx);
5662 	  emit_insn (prefetch);
5663 	  PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5664 	}
5665 
5666       if (val == const0_rtx)
5667 	emit_insn (gen_clrmem_short (dst, GEN_INT (255)));
5668       else
5669 	{
5670 	  /* Set the first byte in the block to the value and use an
5671 	     overlapping mvc for the block.  */
5672 	  emit_move_insn (adjust_address (dst, QImode, 0), val);
5673 	  emit_insn (gen_movmem_short (dstp1, dst, GEN_INT (254)));
5674 	}
5675       s390_load_address (dst_addr,
5676 			 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5677 
5678       temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5679 			   OPTAB_DIRECT);
5680       if (temp != blocks)
5681 	emit_move_insn (blocks, temp);
5682 
5683       emit_cmp_and_jump_insns (blocks, const0_rtx,
5684 			       NE, NULL_RTX, mode, 1, loop_start_label);
5685 
5686       emit_label (restbyte_end_label);
5687 
5688       if (val == const0_rtx)
5689 	emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1)));
5690       else
5691 	{
5692 	  /* Set the first byte in the block to the value and use an
5693 	     overlapping mvc for the block.  */
5694 	  emit_move_insn (adjust_address (dst, QImode, 0), val);
5695 	  /* execute only uses the lowest 8 bits of count that's
5696 	     exactly what we need here.  */
5697 	  emit_insn (gen_movmem_short (dstp1, dst,
5698 				       convert_to_mode (Pmode, count, 1)));
5699 	}
5700 
5701       emit_label (zerobyte_end_label);
5702     }
5703 }
5704 
5705 /* Emit code to compare LEN bytes at OP0 with those at OP1,
5706    and return the result in TARGET.  */
5707 
5708 bool
s390_expand_cmpmem(rtx target,rtx op0,rtx op1,rtx len)5709 s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
5710 {
5711   rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM);
5712   rtx tmp;
5713 
5714   /* When tuning for z10 or higher we rely on the Glibc functions to
5715      do the right thing. Only for constant lengths below 64k we will
5716      generate inline code.  */
5717   if (s390_tune >= PROCESSOR_2097_Z10
5718       && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5719     return false;
5720 
5721   /* As the result of CMPINT is inverted compared to what we need,
5722      we have to swap the operands.  */
5723   tmp = op0; op0 = op1; op1 = tmp;
5724 
5725   if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
5726     {
5727       if (INTVAL (len) > 0)
5728 	{
5729 	  emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1)));
5730 	  emit_insn (gen_cmpint (target, ccreg));
5731 	}
5732       else
5733 	emit_move_insn (target, const0_rtx);
5734     }
5735   else if (TARGET_MVCLE)
5736     {
5737       emit_insn (gen_cmpmem_long (op0, op1, convert_to_mode (Pmode, len, 1)));
5738       emit_insn (gen_cmpint (target, ccreg));
5739     }
5740   else
5741     {
5742       rtx addr0, addr1, count, blocks, temp;
5743       rtx_code_label *loop_start_label = gen_label_rtx ();
5744       rtx_code_label *loop_end_label = gen_label_rtx ();
5745       rtx_code_label *end_label = gen_label_rtx ();
5746       machine_mode mode;
5747 
5748       mode = GET_MODE (len);
5749       if (mode == VOIDmode)
5750 	mode = Pmode;
5751 
5752       addr0 = gen_reg_rtx (Pmode);
5753       addr1 = gen_reg_rtx (Pmode);
5754       count = gen_reg_rtx (mode);
5755       blocks = gen_reg_rtx (mode);
5756 
5757       convert_move (count, len, 1);
5758       emit_cmp_and_jump_insns (count, const0_rtx,
5759 			       EQ, NULL_RTX, mode, 1, end_label);
5760 
5761       emit_move_insn (addr0, force_operand (XEXP (op0, 0), NULL_RTX));
5762       emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX));
5763       op0 = change_address (op0, VOIDmode, addr0);
5764       op1 = change_address (op1, VOIDmode, addr1);
5765 
5766       temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5767 			   OPTAB_DIRECT);
5768       if (temp != count)
5769 	emit_move_insn (count, temp);
5770 
5771       temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5772 			   OPTAB_DIRECT);
5773       if (temp != blocks)
5774 	emit_move_insn (blocks, temp);
5775 
5776       emit_cmp_and_jump_insns (blocks, const0_rtx,
5777 			       EQ, NULL_RTX, mode, 1, loop_end_label);
5778 
5779       emit_label (loop_start_label);
5780 
5781       if (TARGET_Z10
5782 	  && (GET_CODE (len) != CONST_INT || INTVAL (len) > 512))
5783 	{
5784 	  rtx prefetch;
5785 
5786 	  /* Issue a read prefetch for the +2 cache line of operand 1.  */
5787 	  prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr0, GEN_INT (512)),
5788 				   const0_rtx, const0_rtx);
5789 	  emit_insn (prefetch);
5790 	  PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5791 
5792 	  /* Issue a read prefetch for the +2 cache line of operand 2.  */
5793 	  prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr1, GEN_INT (512)),
5794 				   const0_rtx, const0_rtx);
5795 	  emit_insn (prefetch);
5796 	  PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5797 	}
5798 
5799       emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (255)));
5800       temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
5801       temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5802 			gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx);
5803       temp = gen_rtx_SET (pc_rtx, temp);
5804       emit_jump_insn (temp);
5805 
5806       s390_load_address (addr0,
5807 			 gen_rtx_PLUS (Pmode, addr0, GEN_INT (256)));
5808       s390_load_address (addr1,
5809 			 gen_rtx_PLUS (Pmode, addr1, GEN_INT (256)));
5810 
5811       temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5812 			   OPTAB_DIRECT);
5813       if (temp != blocks)
5814 	emit_move_insn (blocks, temp);
5815 
5816       emit_cmp_and_jump_insns (blocks, const0_rtx,
5817 			       EQ, NULL_RTX, mode, 1, loop_end_label);
5818 
5819       emit_jump (loop_start_label);
5820       emit_label (loop_end_label);
5821 
5822       emit_insn (gen_cmpmem_short (op0, op1,
5823 				   convert_to_mode (Pmode, count, 1)));
5824       emit_label (end_label);
5825 
5826       emit_insn (gen_cmpint (target, ccreg));
5827     }
5828   return true;
5829 }
5830 
5831 /* Emit a conditional jump to LABEL for condition code mask MASK using
5832    comparsion operator COMPARISON.  Return the emitted jump insn.  */
5833 
5834 static rtx_insn *
s390_emit_ccraw_jump(HOST_WIDE_INT mask,enum rtx_code comparison,rtx label)5835 s390_emit_ccraw_jump (HOST_WIDE_INT mask, enum rtx_code comparison, rtx label)
5836 {
5837   rtx temp;
5838 
5839   gcc_assert (comparison == EQ || comparison == NE);
5840   gcc_assert (mask > 0 && mask < 15);
5841 
5842   temp = gen_rtx_fmt_ee (comparison, VOIDmode,
5843 			 gen_rtx_REG (CCRAWmode, CC_REGNUM), GEN_INT (mask));
5844   temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5845 			       gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
5846   temp = gen_rtx_SET (pc_rtx, temp);
5847   return emit_jump_insn (temp);
5848 }
5849 
5850 /* Emit the instructions to implement strlen of STRING and store the
5851    result in TARGET.  The string has the known ALIGNMENT.  This
5852    version uses vector instructions and is therefore not appropriate
5853    for targets prior to z13.  */
5854 
5855 void
s390_expand_vec_strlen(rtx target,rtx string,rtx alignment)5856 s390_expand_vec_strlen (rtx target, rtx string, rtx alignment)
5857 {
5858   rtx highest_index_to_load_reg = gen_reg_rtx (Pmode);
5859   rtx str_reg = gen_reg_rtx (V16QImode);
5860   rtx str_addr_base_reg = gen_reg_rtx (Pmode);
5861   rtx str_idx_reg = gen_reg_rtx (Pmode);
5862   rtx result_reg = gen_reg_rtx (V16QImode);
5863   rtx is_aligned_label = gen_label_rtx ();
5864   rtx into_loop_label = NULL_RTX;
5865   rtx loop_start_label = gen_label_rtx ();
5866   rtx temp;
5867   rtx len = gen_reg_rtx (QImode);
5868   rtx cond;
5869 
5870   s390_load_address (str_addr_base_reg, XEXP (string, 0));
5871   emit_move_insn (str_idx_reg, const0_rtx);
5872 
5873   if (INTVAL (alignment) < 16)
5874     {
5875       /* Check whether the address happens to be aligned properly so
5876 	 jump directly to the aligned loop.  */
5877       emit_cmp_and_jump_insns (gen_rtx_AND (Pmode,
5878 					    str_addr_base_reg, GEN_INT (15)),
5879 			       const0_rtx, EQ, NULL_RTX,
5880 			       Pmode, 1, is_aligned_label);
5881 
5882       temp = gen_reg_rtx (Pmode);
5883       temp = expand_binop (Pmode, and_optab, str_addr_base_reg,
5884 			   GEN_INT (15), temp, 1, OPTAB_DIRECT);
5885       gcc_assert (REG_P (temp));
5886       highest_index_to_load_reg =
5887 	expand_binop (Pmode, sub_optab, GEN_INT (15), temp,
5888 		      highest_index_to_load_reg, 1, OPTAB_DIRECT);
5889       gcc_assert (REG_P (highest_index_to_load_reg));
5890       emit_insn (gen_vllv16qi (str_reg,
5891 		   convert_to_mode (SImode, highest_index_to_load_reg, 1),
5892 		   gen_rtx_MEM (BLKmode, str_addr_base_reg)));
5893 
5894       into_loop_label = gen_label_rtx ();
5895       s390_emit_jump (into_loop_label, NULL_RTX);
5896       emit_barrier ();
5897     }
5898 
5899   emit_label (is_aligned_label);
5900   LABEL_NUSES (is_aligned_label) = INTVAL (alignment) < 16 ? 2 : 1;
5901 
5902   /* Reaching this point we are only performing 16 bytes aligned
5903      loads.  */
5904   emit_move_insn (highest_index_to_load_reg, GEN_INT (15));
5905 
5906   emit_label (loop_start_label);
5907   LABEL_NUSES (loop_start_label) = 1;
5908 
5909   /* Load 16 bytes of the string into VR.  */
5910   emit_move_insn (str_reg,
5911 		  gen_rtx_MEM (V16QImode,
5912 			       gen_rtx_PLUS (Pmode, str_idx_reg,
5913 					     str_addr_base_reg)));
5914   if (into_loop_label != NULL_RTX)
5915     {
5916       emit_label (into_loop_label);
5917       LABEL_NUSES (into_loop_label) = 1;
5918     }
5919 
5920   /* Increment string index by 16 bytes.  */
5921   expand_binop (Pmode, add_optab, str_idx_reg, GEN_INT (16),
5922 		str_idx_reg, 1, OPTAB_DIRECT);
5923 
5924   emit_insn (gen_vec_vfenesv16qi (result_reg, str_reg, str_reg,
5925 				  GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
5926 
5927   add_int_reg_note (s390_emit_ccraw_jump (8, NE, loop_start_label),
5928 		    REG_BR_PROB,
5929 		    profile_probability::very_likely ().to_reg_br_prob_note ());
5930   emit_insn (gen_vec_extractv16qiqi (len, result_reg, GEN_INT (7)));
5931 
5932   /* If the string pointer wasn't aligned we have loaded less then 16
5933      bytes and the remaining bytes got filled with zeros (by vll).
5934      Now we have to check whether the resulting index lies within the
5935      bytes actually part of the string.  */
5936 
5937   cond = s390_emit_compare (GT, convert_to_mode (Pmode, len, 1),
5938 			    highest_index_to_load_reg);
5939   s390_load_address (highest_index_to_load_reg,
5940 		     gen_rtx_PLUS (Pmode, highest_index_to_load_reg,
5941 				   const1_rtx));
5942   if (TARGET_64BIT)
5943     emit_insn (gen_movdicc (str_idx_reg, cond,
5944 			    highest_index_to_load_reg, str_idx_reg));
5945   else
5946     emit_insn (gen_movsicc (str_idx_reg, cond,
5947 			    highest_index_to_load_reg, str_idx_reg));
5948 
5949   add_reg_br_prob_note (s390_emit_jump (is_aligned_label, cond),
5950 			profile_probability::very_unlikely ());
5951 
5952   expand_binop (Pmode, add_optab, str_idx_reg,
5953 		GEN_INT (-16), str_idx_reg, 1, OPTAB_DIRECT);
5954   /* FIXME: len is already zero extended - so avoid the llgcr emitted
5955      here.  */
5956   temp = expand_binop (Pmode, add_optab, str_idx_reg,
5957 		       convert_to_mode (Pmode, len, 1),
5958 		       target, 1, OPTAB_DIRECT);
5959   if (temp != target)
5960     emit_move_insn (target, temp);
5961 }
5962 
5963 void
s390_expand_vec_movstr(rtx result,rtx dst,rtx src)5964 s390_expand_vec_movstr (rtx result, rtx dst, rtx src)
5965 {
5966   rtx temp = gen_reg_rtx (Pmode);
5967   rtx src_addr = XEXP (src, 0);
5968   rtx dst_addr = XEXP (dst, 0);
5969   rtx src_addr_reg = gen_reg_rtx (Pmode);
5970   rtx dst_addr_reg = gen_reg_rtx (Pmode);
5971   rtx offset = gen_reg_rtx (Pmode);
5972   rtx vsrc = gen_reg_rtx (V16QImode);
5973   rtx vpos = gen_reg_rtx (V16QImode);
5974   rtx loadlen = gen_reg_rtx (SImode);
5975   rtx gpos_qi = gen_reg_rtx(QImode);
5976   rtx gpos = gen_reg_rtx (SImode);
5977   rtx done_label = gen_label_rtx ();
5978   rtx loop_label = gen_label_rtx ();
5979   rtx exit_label = gen_label_rtx ();
5980   rtx full_label = gen_label_rtx ();
5981 
5982   /* Perform a quick check for string ending on the first up to 16
5983      bytes and exit early if successful.  */
5984 
5985   emit_insn (gen_vlbb (vsrc, src, GEN_INT (6)));
5986   emit_insn (gen_lcbb (loadlen, src_addr, GEN_INT (6)));
5987   emit_insn (gen_vfenezv16qi (vpos, vsrc, vsrc));
5988   emit_insn (gen_vec_extractv16qiqi (gpos_qi, vpos, GEN_INT (7)));
5989   emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
5990   /* gpos is the byte index if a zero was found and 16 otherwise.
5991      So if it is lower than the loaded bytes we have a hit.  */
5992   emit_cmp_and_jump_insns (gpos, loadlen, GE, NULL_RTX, SImode, 1,
5993 			   full_label);
5994   emit_insn (gen_vstlv16qi (vsrc, gpos, dst));
5995 
5996   force_expand_binop (Pmode, add_optab, dst_addr, gpos, result,
5997 		      1, OPTAB_DIRECT);
5998   emit_jump (exit_label);
5999   emit_barrier ();
6000 
6001   emit_label (full_label);
6002   LABEL_NUSES (full_label) = 1;
6003 
6004   /* Calculate `offset' so that src + offset points to the last byte
6005      before 16 byte alignment.  */
6006 
6007   /* temp = src_addr & 0xf */
6008   force_expand_binop (Pmode, and_optab, src_addr, GEN_INT (15), temp,
6009 		      1, OPTAB_DIRECT);
6010 
6011   /* offset = 0xf - temp */
6012   emit_move_insn (offset, GEN_INT (15));
6013   force_expand_binop (Pmode, sub_optab, offset, temp, offset,
6014 		      1, OPTAB_DIRECT);
6015 
6016   /* Store `offset' bytes in the dstination string.  The quick check
6017      has loaded at least `offset' bytes into vsrc.  */
6018 
6019   emit_insn (gen_vstlv16qi (vsrc, gen_lowpart (SImode, offset), dst));
6020 
6021   /* Advance to the next byte to be loaded.  */
6022   force_expand_binop (Pmode, add_optab, offset, const1_rtx, offset,
6023 		      1, OPTAB_DIRECT);
6024 
6025   /* Make sure the addresses are single regs which can be used as a
6026      base.  */
6027   emit_move_insn (src_addr_reg, src_addr);
6028   emit_move_insn (dst_addr_reg, dst_addr);
6029 
6030   /* MAIN LOOP */
6031 
6032   emit_label (loop_label);
6033   LABEL_NUSES (loop_label) = 1;
6034 
6035   emit_move_insn (vsrc,
6036 		  gen_rtx_MEM (V16QImode,
6037 			       gen_rtx_PLUS (Pmode, src_addr_reg, offset)));
6038 
6039   emit_insn (gen_vec_vfenesv16qi (vpos, vsrc, vsrc,
6040 				  GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
6041   add_int_reg_note (s390_emit_ccraw_jump (8, EQ, done_label),
6042 		    REG_BR_PROB, profile_probability::very_unlikely ()
6043 				  .to_reg_br_prob_note ());
6044 
6045   emit_move_insn (gen_rtx_MEM (V16QImode,
6046 			       gen_rtx_PLUS (Pmode, dst_addr_reg, offset)),
6047 		  vsrc);
6048   /* offset += 16 */
6049   force_expand_binop (Pmode, add_optab, offset, GEN_INT (16),
6050 		      offset,  1, OPTAB_DIRECT);
6051 
6052   emit_jump (loop_label);
6053   emit_barrier ();
6054 
6055   /* REGULAR EXIT */
6056 
6057   /* We are done.  Add the offset of the zero character to the dst_addr
6058      pointer to get the result.  */
6059 
6060   emit_label (done_label);
6061   LABEL_NUSES (done_label) = 1;
6062 
6063   force_expand_binop (Pmode, add_optab, dst_addr_reg, offset, dst_addr_reg,
6064 		      1, OPTAB_DIRECT);
6065 
6066   emit_insn (gen_vec_extractv16qiqi (gpos_qi, vpos, GEN_INT (7)));
6067   emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
6068 
6069   emit_insn (gen_vstlv16qi (vsrc, gpos, gen_rtx_MEM (BLKmode, dst_addr_reg)));
6070 
6071   force_expand_binop (Pmode, add_optab, dst_addr_reg, gpos, result,
6072 		      1, OPTAB_DIRECT);
6073 
6074   /* EARLY EXIT */
6075 
6076   emit_label (exit_label);
6077   LABEL_NUSES (exit_label) = 1;
6078 }
6079 
6080 
6081 /* Expand conditional increment or decrement using alc/slb instructions.
6082    Should generate code setting DST to either SRC or SRC + INCREMENT,
6083    depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
6084    Returns true if successful, false otherwise.
6085 
6086    That makes it possible to implement some if-constructs without jumps e.g.:
6087    (borrow = CC0 | CC1 and carry = CC2 | CC3)
6088    unsigned int a, b, c;
6089    if (a < b)  c++; -> CCU  b > a  -> CC2;    c += carry;
6090    if (a < b)  c--; -> CCL3 a - b  -> borrow; c -= borrow;
6091    if (a <= b) c++; -> CCL3 b - a  -> borrow; c += carry;
6092    if (a <= b) c--; -> CCU  a <= b -> borrow; c -= borrow;
6093 
6094    Checks for EQ and NE with a nonzero value need an additional xor e.g.:
6095    if (a == b) c++; -> CCL3 a ^= b; 0 - a  -> borrow;    c += carry;
6096    if (a == b) c--; -> CCU  a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
6097    if (a != b) c++; -> CCU  a ^= b; a > 0  -> CC2;       c += carry;
6098    if (a != b) c--; -> CCL3 a ^= b; 0 - a  -> borrow;    c -= borrow; */
6099 
6100 bool
s390_expand_addcc(enum rtx_code cmp_code,rtx cmp_op0,rtx cmp_op1,rtx dst,rtx src,rtx increment)6101 s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1,
6102 		   rtx dst, rtx src, rtx increment)
6103 {
6104   machine_mode cmp_mode;
6105   machine_mode cc_mode;
6106   rtx op_res;
6107   rtx insn;
6108   rtvec p;
6109   int ret;
6110 
6111   if ((GET_MODE (cmp_op0) == SImode || GET_MODE (cmp_op0) == VOIDmode)
6112       && (GET_MODE (cmp_op1) == SImode || GET_MODE (cmp_op1) == VOIDmode))
6113     cmp_mode = SImode;
6114   else if ((GET_MODE (cmp_op0) == DImode || GET_MODE (cmp_op0) == VOIDmode)
6115 	   && (GET_MODE (cmp_op1) == DImode || GET_MODE (cmp_op1) == VOIDmode))
6116     cmp_mode = DImode;
6117   else
6118     return false;
6119 
6120   /* Try ADD LOGICAL WITH CARRY.  */
6121   if (increment == const1_rtx)
6122     {
6123       /* Determine CC mode to use.  */
6124       if (cmp_code == EQ || cmp_code == NE)
6125 	{
6126 	  if (cmp_op1 != const0_rtx)
6127 	    {
6128 	      cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
6129 					     NULL_RTX, 0, OPTAB_WIDEN);
6130 	      cmp_op1 = const0_rtx;
6131 	    }
6132 
6133 	  cmp_code = cmp_code == EQ ? LEU : GTU;
6134 	}
6135 
6136       if (cmp_code == LTU || cmp_code == LEU)
6137 	{
6138 	  rtx tem = cmp_op0;
6139 	  cmp_op0 = cmp_op1;
6140 	  cmp_op1 = tem;
6141 	  cmp_code = swap_condition (cmp_code);
6142 	}
6143 
6144       switch (cmp_code)
6145 	{
6146 	  case GTU:
6147 	    cc_mode = CCUmode;
6148 	    break;
6149 
6150 	  case GEU:
6151 	    cc_mode = CCL3mode;
6152 	    break;
6153 
6154 	  default:
6155 	    return false;
6156 	}
6157 
6158       /* Emit comparison instruction pattern. */
6159       if (!register_operand (cmp_op0, cmp_mode))
6160 	cmp_op0 = force_reg (cmp_mode, cmp_op0);
6161 
6162       insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
6163 			  gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
6164       /* We use insn_invalid_p here to add clobbers if required.  */
6165       ret = insn_invalid_p (emit_insn (insn), false);
6166       gcc_assert (!ret);
6167 
6168       /* Emit ALC instruction pattern.  */
6169       op_res = gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
6170 			       gen_rtx_REG (cc_mode, CC_REGNUM),
6171 			       const0_rtx);
6172 
6173       if (src != const0_rtx)
6174 	{
6175 	  if (!register_operand (src, GET_MODE (dst)))
6176 	    src = force_reg (GET_MODE (dst), src);
6177 
6178 	  op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, src);
6179 	  op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, const0_rtx);
6180 	}
6181 
6182       p = rtvec_alloc (2);
6183       RTVEC_ELT (p, 0) =
6184 	gen_rtx_SET (dst, op_res);
6185       RTVEC_ELT (p, 1) =
6186 	gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6187       emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
6188 
6189       return true;
6190     }
6191 
6192   /* Try SUBTRACT LOGICAL WITH BORROW.  */
6193   if (increment == constm1_rtx)
6194     {
6195       /* Determine CC mode to use.  */
6196       if (cmp_code == EQ || cmp_code == NE)
6197 	{
6198 	  if (cmp_op1 != const0_rtx)
6199 	    {
6200 	      cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
6201 					     NULL_RTX, 0, OPTAB_WIDEN);
6202 	      cmp_op1 = const0_rtx;
6203 	    }
6204 
6205 	  cmp_code = cmp_code == EQ ? LEU : GTU;
6206 	}
6207 
6208       if (cmp_code == GTU || cmp_code == GEU)
6209 	{
6210 	  rtx tem = cmp_op0;
6211 	  cmp_op0 = cmp_op1;
6212 	  cmp_op1 = tem;
6213 	  cmp_code = swap_condition (cmp_code);
6214 	}
6215 
6216       switch (cmp_code)
6217 	{
6218 	  case LEU:
6219 	    cc_mode = CCUmode;
6220 	    break;
6221 
6222 	  case LTU:
6223 	    cc_mode = CCL3mode;
6224 	    break;
6225 
6226 	  default:
6227 	    return false;
6228 	}
6229 
6230       /* Emit comparison instruction pattern. */
6231       if (!register_operand (cmp_op0, cmp_mode))
6232 	cmp_op0 = force_reg (cmp_mode, cmp_op0);
6233 
6234       insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
6235 			  gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
6236       /* We use insn_invalid_p here to add clobbers if required.  */
6237       ret = insn_invalid_p (emit_insn (insn), false);
6238       gcc_assert (!ret);
6239 
6240       /* Emit SLB instruction pattern.  */
6241       if (!register_operand (src, GET_MODE (dst)))
6242 	src = force_reg (GET_MODE (dst), src);
6243 
6244       op_res = gen_rtx_MINUS (GET_MODE (dst),
6245 			      gen_rtx_MINUS (GET_MODE (dst), src, const0_rtx),
6246 			      gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
6247 					      gen_rtx_REG (cc_mode, CC_REGNUM),
6248 					      const0_rtx));
6249       p = rtvec_alloc (2);
6250       RTVEC_ELT (p, 0) =
6251 	gen_rtx_SET (dst, op_res);
6252       RTVEC_ELT (p, 1) =
6253 	gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6254       emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
6255 
6256       return true;
6257     }
6258 
6259   return false;
6260 }
6261 
6262 /* Expand code for the insv template. Return true if successful.  */
6263 
6264 bool
s390_expand_insv(rtx dest,rtx op1,rtx op2,rtx src)6265 s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
6266 {
6267   int bitsize = INTVAL (op1);
6268   int bitpos = INTVAL (op2);
6269   machine_mode mode = GET_MODE (dest);
6270   machine_mode smode;
6271   int smode_bsize, mode_bsize;
6272   rtx op, clobber;
6273 
6274   if (bitsize + bitpos > GET_MODE_BITSIZE (mode))
6275     return false;
6276 
6277   /* Generate INSERT IMMEDIATE (IILL et al).  */
6278   /* (set (ze (reg)) (const_int)).  */
6279   if (TARGET_ZARCH
6280       && register_operand (dest, word_mode)
6281       && (bitpos % 16) == 0
6282       && (bitsize % 16) == 0
6283       && const_int_operand (src, VOIDmode))
6284     {
6285       HOST_WIDE_INT val = INTVAL (src);
6286       int regpos = bitpos + bitsize;
6287 
6288       while (regpos > bitpos)
6289 	{
6290 	  machine_mode putmode;
6291 	  int putsize;
6292 
6293 	  if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32))
6294 	    putmode = SImode;
6295 	  else
6296 	    putmode = HImode;
6297 
6298 	  putsize = GET_MODE_BITSIZE (putmode);
6299 	  regpos -= putsize;
6300 	  emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6301 						GEN_INT (putsize),
6302 						GEN_INT (regpos)),
6303 			  gen_int_mode (val, putmode));
6304 	  val >>= putsize;
6305 	}
6306       gcc_assert (regpos == bitpos);
6307       return true;
6308     }
6309 
6310   smode = smallest_int_mode_for_size (bitsize);
6311   smode_bsize = GET_MODE_BITSIZE (smode);
6312   mode_bsize = GET_MODE_BITSIZE (mode);
6313 
6314   /* Generate STORE CHARACTERS UNDER MASK (STCM et al).  */
6315   if (bitpos == 0
6316       && (bitsize % BITS_PER_UNIT) == 0
6317       && MEM_P (dest)
6318       && (register_operand (src, word_mode)
6319 	  || const_int_operand (src, VOIDmode)))
6320     {
6321       /* Emit standard pattern if possible.  */
6322       if (smode_bsize == bitsize)
6323 	{
6324 	  emit_move_insn (adjust_address (dest, smode, 0),
6325 			  gen_lowpart (smode, src));
6326 	  return true;
6327 	}
6328 
6329       /* (set (ze (mem)) (const_int)).  */
6330       else if (const_int_operand (src, VOIDmode))
6331 	{
6332 	  int size = bitsize / BITS_PER_UNIT;
6333 	  rtx src_mem = adjust_address (force_const_mem (word_mode, src),
6334 					BLKmode,
6335 					UNITS_PER_WORD - size);
6336 
6337 	  dest = adjust_address (dest, BLKmode, 0);
6338 	  set_mem_size (dest, size);
6339 	  s390_expand_movmem (dest, src_mem, GEN_INT (size));
6340 	  return true;
6341 	}
6342 
6343       /* (set (ze (mem)) (reg)).  */
6344       else if (register_operand (src, word_mode))
6345 	{
6346 	  if (bitsize <= 32)
6347 	    emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1,
6348 						  const0_rtx), src);
6349 	  else
6350 	    {
6351 	      /* Emit st,stcmh sequence.  */
6352 	      int stcmh_width = bitsize - 32;
6353 	      int size = stcmh_width / BITS_PER_UNIT;
6354 
6355 	      emit_move_insn (adjust_address (dest, SImode, size),
6356 			      gen_lowpart (SImode, src));
6357 	      set_mem_size (dest, size);
6358 	      emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6359 						    GEN_INT (stcmh_width),
6360 						    const0_rtx),
6361 			      gen_rtx_LSHIFTRT (word_mode, src, GEN_INT (32)));
6362 	    }
6363 	  return true;
6364 	}
6365     }
6366 
6367   /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al).  */
6368   if ((bitpos % BITS_PER_UNIT) == 0
6369       && (bitsize % BITS_PER_UNIT) == 0
6370       && (bitpos & 32) == ((bitpos + bitsize - 1) & 32)
6371       && MEM_P (src)
6372       && (mode == DImode || mode == SImode)
6373       && register_operand (dest, mode))
6374     {
6375       /* Emit a strict_low_part pattern if possible.  */
6376       if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
6377 	{
6378 	  op = gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (smode, dest));
6379 	  op = gen_rtx_SET (op, gen_lowpart (smode, src));
6380 	  clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6381 	  emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)));
6382 	  return true;
6383 	}
6384 
6385       /* ??? There are more powerful versions of ICM that are not
6386 	 completely represented in the md file.  */
6387     }
6388 
6389   /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al).  */
6390   if (TARGET_Z10 && (mode == DImode || mode == SImode))
6391     {
6392       machine_mode mode_s = GET_MODE (src);
6393 
6394       if (CONSTANT_P (src))
6395 	{
6396 	  /* For constant zero values the representation with AND
6397 	     appears to be folded in more situations than the (set
6398 	     (zero_extract) ...).
6399 	     We only do this when the start and end of the bitfield
6400 	     remain in the same SImode chunk.  That way nihf or nilf
6401 	     can be used.
6402 	     The AND patterns might still generate a risbg for this.  */
6403 	  if (src == const0_rtx && bitpos / 32  == (bitpos + bitsize - 1) / 32)
6404 	    return false;
6405 	  else
6406 	    src = force_reg (mode, src);
6407 	}
6408       else if (mode_s != mode)
6409 	{
6410 	  gcc_assert (GET_MODE_BITSIZE (mode_s) >= bitsize);
6411 	  src = force_reg (mode_s, src);
6412 	  src = gen_lowpart (mode, src);
6413 	}
6414 
6415       op = gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2),
6416       op = gen_rtx_SET (op, src);
6417 
6418       if (!TARGET_ZEC12)
6419 	{
6420 	  clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6421 	  op = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber));
6422 	}
6423       emit_insn (op);
6424 
6425       return true;
6426     }
6427 
6428   return false;
6429 }
6430 
6431 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
6432    register that holds VAL of mode MODE shifted by COUNT bits.  */
6433 
6434 static inline rtx
s390_expand_mask_and_shift(rtx val,machine_mode mode,rtx count)6435 s390_expand_mask_and_shift (rtx val, machine_mode mode, rtx count)
6436 {
6437   val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
6438 			     NULL_RTX, 1, OPTAB_DIRECT);
6439   return expand_simple_binop (SImode, ASHIFT, val, count,
6440 			      NULL_RTX, 1, OPTAB_DIRECT);
6441 }
6442 
6443 /* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
6444    the result in TARGET.  */
6445 
6446 void
s390_expand_vec_compare(rtx target,enum rtx_code cond,rtx cmp_op1,rtx cmp_op2)6447 s390_expand_vec_compare (rtx target, enum rtx_code cond,
6448 			 rtx cmp_op1, rtx cmp_op2)
6449 {
6450   machine_mode mode = GET_MODE (target);
6451   bool neg_p = false, swap_p = false;
6452   rtx tmp;
6453 
6454   if (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_VECTOR_FLOAT)
6455     {
6456       switch (cond)
6457 	{
6458 	  /* NE a != b -> !(a == b) */
6459 	case NE:   cond = EQ; neg_p = true;                break;
6460 	  /* UNGT a u> b -> !(b >= a) */
6461 	case UNGT: cond = GE; neg_p = true; swap_p = true; break;
6462 	  /* UNGE a u>= b -> !(b > a) */
6463 	case UNGE: cond = GT; neg_p = true; swap_p = true; break;
6464 	  /* LE: a <= b -> b >= a */
6465 	case LE:   cond = GE;               swap_p = true; break;
6466 	  /* UNLE: a u<= b -> !(a > b) */
6467 	case UNLE: cond = GT; neg_p = true;                break;
6468 	  /* LT: a < b -> b > a */
6469 	case LT:   cond = GT;               swap_p = true; break;
6470 	  /* UNLT: a u< b -> !(a >= b) */
6471 	case UNLT: cond = GE; neg_p = true;                break;
6472 	case UNEQ:
6473 	  emit_insn (gen_vec_cmpuneq (target, cmp_op1, cmp_op2));
6474 	  return;
6475 	case LTGT:
6476 	  emit_insn (gen_vec_cmpltgt (target, cmp_op1, cmp_op2));
6477 	  return;
6478 	case ORDERED:
6479 	  emit_insn (gen_vec_ordered (target, cmp_op1, cmp_op2));
6480 	  return;
6481 	case UNORDERED:
6482 	  emit_insn (gen_vec_unordered (target, cmp_op1, cmp_op2));
6483 	  return;
6484 	default: break;
6485 	}
6486     }
6487   else
6488     {
6489       switch (cond)
6490 	{
6491 	  /* NE: a != b -> !(a == b) */
6492 	case NE:  cond = EQ;  neg_p = true;                break;
6493 	  /* GE: a >= b -> !(b > a) */
6494 	case GE:  cond = GT;  neg_p = true; swap_p = true; break;
6495 	  /* GEU: a >= b -> !(b > a) */
6496 	case GEU: cond = GTU; neg_p = true; swap_p = true; break;
6497 	  /* LE: a <= b -> !(a > b) */
6498 	case LE:  cond = GT;  neg_p = true;                break;
6499 	  /* LEU: a <= b -> !(a > b) */
6500 	case LEU: cond = GTU; neg_p = true;                break;
6501 	  /* LT: a < b -> b > a */
6502 	case LT:  cond = GT;                swap_p = true; break;
6503 	  /* LTU: a < b -> b > a */
6504 	case LTU: cond = GTU;               swap_p = true; break;
6505 	default: break;
6506 	}
6507     }
6508 
6509   if (swap_p)
6510     {
6511       tmp = cmp_op1; cmp_op1 = cmp_op2; cmp_op2 = tmp;
6512     }
6513 
6514   emit_insn (gen_rtx_SET (target, gen_rtx_fmt_ee (cond,
6515 						  mode,
6516 						  cmp_op1, cmp_op2)));
6517   if (neg_p)
6518     emit_insn (gen_rtx_SET (target, gen_rtx_NOT (mode, target)));
6519 }
6520 
6521 /* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into
6522    TARGET if either all (ALL_P is true) or any (ALL_P is false) of the
6523    elements in CMP1 and CMP2 fulfill the comparison.
6524    This function is only used to emit patterns for the vx builtins and
6525    therefore only handles comparison codes required by the
6526    builtins.  */
6527 void
s390_expand_vec_compare_cc(rtx target,enum rtx_code code,rtx cmp1,rtx cmp2,bool all_p)6528 s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
6529 			    rtx cmp1, rtx cmp2, bool all_p)
6530 {
6531   machine_mode cc_producer_mode, cc_consumer_mode, scratch_mode;
6532   rtx tmp_reg = gen_reg_rtx (SImode);
6533   bool swap_p = false;
6534 
6535   if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_INT)
6536     {
6537       switch (code)
6538 	{
6539 	case EQ:
6540 	case NE:
6541 	  cc_producer_mode = CCVEQmode;
6542 	  break;
6543 	case GE:
6544 	case LT:
6545 	  code = swap_condition (code);
6546 	  swap_p = true;
6547 	  /* fallthrough */
6548 	case GT:
6549 	case LE:
6550 	  cc_producer_mode = CCVIHmode;
6551 	  break;
6552 	case GEU:
6553 	case LTU:
6554 	  code = swap_condition (code);
6555 	  swap_p = true;
6556 	  /* fallthrough */
6557 	case GTU:
6558 	case LEU:
6559 	  cc_producer_mode = CCVIHUmode;
6560 	  break;
6561 	default:
6562 	  gcc_unreachable ();
6563 	}
6564 
6565       scratch_mode = GET_MODE (cmp1);
6566       /* These codes represent inverted CC interpretations.  Inverting
6567 	 an ALL CC mode results in an ANY CC mode and the other way
6568 	 around.  Invert the all_p flag here to compensate for
6569 	 that.  */
6570       if (code == NE || code == LE || code == LEU)
6571 	all_p = !all_p;
6572 
6573       cc_consumer_mode = all_p ? CCVIALLmode : CCVIANYmode;
6574     }
6575   else if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_FLOAT)
6576     {
6577       bool inv_p = false;
6578 
6579       switch (code)
6580 	{
6581 	case EQ:   cc_producer_mode = CCVEQmode;  break;
6582 	case NE:   cc_producer_mode = CCVEQmode;  inv_p = true; break;
6583 	case GT:   cc_producer_mode = CCVFHmode;  break;
6584 	case GE:   cc_producer_mode = CCVFHEmode; break;
6585 	case UNLE: cc_producer_mode = CCVFHmode;  inv_p = true; break;
6586 	case UNLT: cc_producer_mode = CCVFHEmode; inv_p = true; break;
6587 	case LT:   cc_producer_mode = CCVFHmode;  code = GT; swap_p = true; break;
6588 	case LE:   cc_producer_mode = CCVFHEmode; code = GE; swap_p = true; break;
6589 	default: gcc_unreachable ();
6590 	}
6591       scratch_mode = mode_for_int_vector (GET_MODE (cmp1)).require ();
6592 
6593       if (inv_p)
6594 	all_p = !all_p;
6595 
6596       cc_consumer_mode = all_p ? CCVFALLmode : CCVFANYmode;
6597     }
6598   else
6599     gcc_unreachable ();
6600 
6601   if (swap_p)
6602     {
6603       rtx tmp = cmp2;
6604       cmp2 = cmp1;
6605       cmp1 = tmp;
6606     }
6607 
6608   emit_insn (gen_rtx_PARALLEL (VOIDmode,
6609 	       gen_rtvec (2, gen_rtx_SET (
6610 			       gen_rtx_REG (cc_producer_mode, CC_REGNUM),
6611 			       gen_rtx_COMPARE (cc_producer_mode, cmp1, cmp2)),
6612 			  gen_rtx_CLOBBER (VOIDmode,
6613 					   gen_rtx_SCRATCH (scratch_mode)))));
6614   emit_move_insn (target, const0_rtx);
6615   emit_move_insn (tmp_reg, const1_rtx);
6616 
6617   emit_move_insn (target,
6618 		  gen_rtx_IF_THEN_ELSE (SImode,
6619 		    gen_rtx_fmt_ee (code, VOIDmode,
6620 				    gen_rtx_REG (cc_consumer_mode, CC_REGNUM),
6621 				    const0_rtx),
6622 					tmp_reg, target));
6623 }
6624 
6625 /* Invert the comparison CODE applied to a CC mode.  This is only safe
6626    if we know whether there result was created by a floating point
6627    compare or not.  For the CCV modes this is encoded as part of the
6628    mode.  */
6629 enum rtx_code
s390_reverse_condition(machine_mode mode,enum rtx_code code)6630 s390_reverse_condition (machine_mode mode, enum rtx_code code)
6631 {
6632   /* Reversal of FP compares takes care -- an ordered compare
6633      becomes an unordered compare and vice versa.  */
6634   if (mode == CCVFALLmode || mode == CCVFANYmode)
6635     return reverse_condition_maybe_unordered (code);
6636   else if (mode == CCVIALLmode || mode == CCVIANYmode)
6637     return reverse_condition (code);
6638   else
6639     gcc_unreachable ();
6640 }
6641 
6642 /* Generate a vector comparison expression loading either elements of
6643    THEN or ELS into TARGET depending on the comparison COND of CMP_OP1
6644    and CMP_OP2.  */
6645 
6646 void
s390_expand_vcond(rtx target,rtx then,rtx els,enum rtx_code cond,rtx cmp_op1,rtx cmp_op2)6647 s390_expand_vcond (rtx target, rtx then, rtx els,
6648 		   enum rtx_code cond, rtx cmp_op1, rtx cmp_op2)
6649 {
6650   rtx tmp;
6651   machine_mode result_mode;
6652   rtx result_target;
6653 
6654   machine_mode target_mode = GET_MODE (target);
6655   machine_mode cmp_mode = GET_MODE (cmp_op1);
6656   rtx op = (cond == LT) ? els : then;
6657 
6658   /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
6659      and x < 0 ? 1 : 0 into (unsigned) x >> 31.  Likewise
6660      for short and byte (x >> 15 and x >> 7 respectively).  */
6661   if ((cond == LT || cond == GE)
6662       && target_mode == cmp_mode
6663       && cmp_op2 == CONST0_RTX (cmp_mode)
6664       && op == CONST0_RTX (target_mode)
6665       && s390_vector_mode_supported_p (target_mode)
6666       && GET_MODE_CLASS (target_mode) == MODE_VECTOR_INT)
6667     {
6668       rtx negop = (cond == LT) ? then : els;
6669 
6670       int shift = GET_MODE_BITSIZE (GET_MODE_INNER (target_mode)) - 1;
6671 
6672       /* if x < 0 ? 1 : 0 or if x >= 0 ? 0 : 1 */
6673       if (negop == CONST1_RTX (target_mode))
6674 	{
6675 	  rtx res = expand_simple_binop (cmp_mode, LSHIFTRT, cmp_op1,
6676 					 GEN_INT (shift), target,
6677 					 1, OPTAB_DIRECT);
6678 	  if (res != target)
6679 	    emit_move_insn (target, res);
6680 	  return;
6681 	}
6682 
6683       /* if x < 0 ? -1 : 0 or if x >= 0 ? 0 : -1 */
6684       else if (all_ones_operand (negop, target_mode))
6685 	{
6686 	  rtx res = expand_simple_binop (cmp_mode, ASHIFTRT, cmp_op1,
6687 					 GEN_INT (shift), target,
6688 					 0, OPTAB_DIRECT);
6689 	  if (res != target)
6690 	    emit_move_insn (target, res);
6691 	  return;
6692 	}
6693     }
6694 
6695   /* We always use an integral type vector to hold the comparison
6696      result.  */
6697   result_mode = mode_for_int_vector (cmp_mode).require ();
6698   result_target = gen_reg_rtx (result_mode);
6699 
6700   /* We allow vector immediates as comparison operands that
6701      can be handled by the optimization above but not by the
6702      following code.  Hence, force them into registers here.  */
6703   if (!REG_P (cmp_op1))
6704     cmp_op1 = force_reg (GET_MODE (cmp_op1), cmp_op1);
6705 
6706   if (!REG_P (cmp_op2))
6707     cmp_op2 = force_reg (GET_MODE (cmp_op2), cmp_op2);
6708 
6709   s390_expand_vec_compare (result_target, cond,
6710 			   cmp_op1, cmp_op2);
6711 
6712   /* If the results are supposed to be either -1 or 0 we are done
6713      since this is what our compare instructions generate anyway.  */
6714   if (all_ones_operand (then, GET_MODE (then))
6715       && const0_operand (els, GET_MODE (els)))
6716     {
6717       emit_move_insn (target, gen_rtx_SUBREG (target_mode,
6718 					      result_target, 0));
6719       return;
6720     }
6721 
6722   /* Otherwise we will do a vsel afterwards.  */
6723   /* This gets triggered e.g.
6724      with gcc.c-torture/compile/pr53410-1.c */
6725   if (!REG_P (then))
6726     then = force_reg (target_mode, then);
6727 
6728   if (!REG_P (els))
6729     els = force_reg (target_mode, els);
6730 
6731   tmp = gen_rtx_fmt_ee (EQ, VOIDmode,
6732 			result_target,
6733 			CONST0_RTX (result_mode));
6734 
6735   /* We compared the result against zero above so we have to swap then
6736      and els here.  */
6737   tmp = gen_rtx_IF_THEN_ELSE (target_mode, tmp, els, then);
6738 
6739   gcc_assert (target_mode == GET_MODE (then));
6740   emit_insn (gen_rtx_SET (target, tmp));
6741 }
6742 
6743 /* Emit the RTX necessary to initialize the vector TARGET with values
6744    in VALS.  */
6745 void
s390_expand_vec_init(rtx target,rtx vals)6746 s390_expand_vec_init (rtx target, rtx vals)
6747 {
6748   machine_mode mode = GET_MODE (target);
6749   machine_mode inner_mode = GET_MODE_INNER (mode);
6750   int n_elts = GET_MODE_NUNITS (mode);
6751   bool all_same = true, all_regs = true, all_const_int = true;
6752   rtx x;
6753   int i;
6754 
6755   for (i = 0; i < n_elts; ++i)
6756     {
6757       x = XVECEXP (vals, 0, i);
6758 
6759       if (!CONST_INT_P (x))
6760 	all_const_int = false;
6761 
6762       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6763 	all_same = false;
6764 
6765       if (!REG_P (x))
6766 	all_regs = false;
6767     }
6768 
6769   /* Use vector gen mask or vector gen byte mask if possible.  */
6770   if (all_same && all_const_int
6771       && (XVECEXP (vals, 0, 0) == const0_rtx
6772 	  || s390_contiguous_bitmask_vector_p (XVECEXP (vals, 0, 0),
6773 					       NULL, NULL)
6774 	  || s390_bytemask_vector_p (XVECEXP (vals, 0, 0), NULL)))
6775     {
6776       emit_insn (gen_rtx_SET (target,
6777 			      gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))));
6778       return;
6779     }
6780 
6781   /* Use vector replicate instructions.  vlrep/vrepi/vrep  */
6782   if (all_same)
6783     {
6784       rtx elem = XVECEXP (vals, 0, 0);
6785 
6786       /* vec_splats accepts general_operand as source.  */
6787       if (!general_operand (elem, GET_MODE (elem)))
6788 	elem = force_reg (inner_mode, elem);
6789 
6790       emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, elem)));
6791       return;
6792     }
6793 
6794   if (all_regs
6795       && REG_P (target)
6796       && n_elts == 2
6797       && GET_MODE_SIZE (inner_mode) == 8)
6798     {
6799       /* Use vector load pair.  */
6800       emit_insn (gen_rtx_SET (target,
6801 			      gen_rtx_VEC_CONCAT (mode,
6802 						  XVECEXP (vals, 0, 0),
6803 						  XVECEXP (vals, 0, 1))));
6804       return;
6805     }
6806 
6807   /* Use vector load logical element and zero.  */
6808   if (TARGET_VXE && (mode == V4SImode || mode == V4SFmode))
6809     {
6810       bool found = true;
6811 
6812       x = XVECEXP (vals, 0, 0);
6813       if (memory_operand (x, inner_mode))
6814 	{
6815 	  for (i = 1; i < n_elts; ++i)
6816 	    found = found && XVECEXP (vals, 0, i) == const0_rtx;
6817 
6818 	  if (found)
6819 	    {
6820 	      machine_mode half_mode = (inner_mode == SFmode
6821 					? V2SFmode : V2SImode);
6822 	      emit_insn (gen_rtx_SET (target,
6823 			      gen_rtx_VEC_CONCAT (mode,
6824 						  gen_rtx_VEC_CONCAT (half_mode,
6825 								      x,
6826 								      const0_rtx),
6827 						  gen_rtx_VEC_CONCAT (half_mode,
6828 								      const0_rtx,
6829 								      const0_rtx))));
6830 	      return;
6831 	    }
6832 	}
6833     }
6834 
6835   /* We are about to set the vector elements one by one.  Zero out the
6836      full register first in order to help the data flow framework to
6837      detect it as full VR set.  */
6838   emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
6839 
6840   /* Unfortunately the vec_init expander is not allowed to fail.  So
6841      we have to implement the fallback ourselves.  */
6842   for (i = 0; i < n_elts; i++)
6843     {
6844       rtx elem = XVECEXP (vals, 0, i);
6845       if (!general_operand (elem, GET_MODE (elem)))
6846 	elem = force_reg (inner_mode, elem);
6847 
6848       emit_insn (gen_rtx_SET (target,
6849 			      gen_rtx_UNSPEC (mode,
6850 					      gen_rtvec (3, elem,
6851 							 GEN_INT (i), target),
6852 					      UNSPEC_VEC_SET)));
6853     }
6854 }
6855 
6856 /* Structure to hold the initial parameters for a compare_and_swap operation
6857    in HImode and QImode.  */
6858 
6859 struct alignment_context
6860 {
6861   rtx memsi;	  /* SI aligned memory location.  */
6862   rtx shift;	  /* Bit offset with regard to lsb.  */
6863   rtx modemask;	  /* Mask of the HQImode shifted by SHIFT bits.  */
6864   rtx modemaski;  /* ~modemask */
6865   bool aligned;	  /* True if memory is aligned, false else.  */
6866 };
6867 
6868 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
6869    structure AC for transparent simplifying, if the memory alignment is known
6870    to be at least 32bit.  MEM is the memory location for the actual operation
6871    and MODE its mode.  */
6872 
6873 static void
init_alignment_context(struct alignment_context * ac,rtx mem,machine_mode mode)6874 init_alignment_context (struct alignment_context *ac, rtx mem,
6875 			machine_mode mode)
6876 {
6877   ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode));
6878   ac->aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode));
6879 
6880   if (ac->aligned)
6881     ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned.  */
6882   else
6883     {
6884       /* Alignment is unknown.  */
6885       rtx byteoffset, addr, align;
6886 
6887       /* Force the address into a register.  */
6888       addr = force_reg (Pmode, XEXP (mem, 0));
6889 
6890       /* Align it to SImode.  */
6891       align = expand_simple_binop (Pmode, AND, addr,
6892 				   GEN_INT (-GET_MODE_SIZE (SImode)),
6893 				   NULL_RTX, 1, OPTAB_DIRECT);
6894       /* Generate MEM.  */
6895       ac->memsi = gen_rtx_MEM (SImode, align);
6896       MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem);
6897       set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER);
6898       set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode));
6899 
6900       /* Calculate shiftcount.  */
6901       byteoffset = expand_simple_binop (Pmode, AND, addr,
6902 					GEN_INT (GET_MODE_SIZE (SImode) - 1),
6903 					NULL_RTX, 1, OPTAB_DIRECT);
6904       /* As we already have some offset, evaluate the remaining distance.  */
6905       ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
6906 				      NULL_RTX, 1, OPTAB_DIRECT);
6907     }
6908 
6909   /* Shift is the byte count, but we need the bitcount.  */
6910   ac->shift = expand_simple_binop (SImode, ASHIFT, ac->shift, GEN_INT (3),
6911 				   NULL_RTX, 1, OPTAB_DIRECT);
6912 
6913   /* Calculate masks.  */
6914   ac->modemask = expand_simple_binop (SImode, ASHIFT,
6915 				      GEN_INT (GET_MODE_MASK (mode)),
6916 				      ac->shift, NULL_RTX, 1, OPTAB_DIRECT);
6917   ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask,
6918 				      NULL_RTX, 1);
6919 }
6920 
6921 /* A subroutine of s390_expand_cs_hqi.  Insert INS into VAL.  If possible,
6922    use a single insv insn into SEQ2.  Otherwise, put prep insns in SEQ1 and
6923    perform the merge in SEQ2.  */
6924 
6925 static rtx
s390_two_part_insv(struct alignment_context * ac,rtx * seq1,rtx * seq2,machine_mode mode,rtx val,rtx ins)6926 s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
6927 		    machine_mode mode, rtx val, rtx ins)
6928 {
6929   rtx tmp;
6930 
6931   if (ac->aligned)
6932     {
6933       start_sequence ();
6934       tmp = copy_to_mode_reg (SImode, val);
6935       if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)),
6936 			    const0_rtx, ins))
6937 	{
6938 	  *seq1 = NULL;
6939 	  *seq2 = get_insns ();
6940 	  end_sequence ();
6941 	  return tmp;
6942 	}
6943       end_sequence ();
6944     }
6945 
6946   /* Failed to use insv.  Generate a two part shift and mask.  */
6947   start_sequence ();
6948   tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
6949   *seq1 = get_insns ();
6950   end_sequence ();
6951 
6952   start_sequence ();
6953   tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
6954   *seq2 = get_insns ();
6955   end_sequence ();
6956 
6957   return tmp;
6958 }
6959 
6960 /* Expand an atomic compare and swap operation for HImode and QImode.  MEM is
6961    the memory location, CMP the old value to compare MEM with and NEW_RTX the
6962    value to set if CMP == MEM.  */
6963 
6964 static void
s390_expand_cs_hqi(machine_mode mode,rtx btarget,rtx vtarget,rtx mem,rtx cmp,rtx new_rtx,bool is_weak)6965 s390_expand_cs_hqi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
6966 		    rtx cmp, rtx new_rtx, bool is_weak)
6967 {
6968   struct alignment_context ac;
6969   rtx cmpv, newv, val, cc, seq0, seq1, seq2, seq3;
6970   rtx res = gen_reg_rtx (SImode);
6971   rtx_code_label *csloop = NULL, *csend = NULL;
6972 
6973   gcc_assert (MEM_P (mem));
6974 
6975   init_alignment_context (&ac, mem, mode);
6976 
6977   /* Load full word.  Subsequent loads are performed by CS.  */
6978   val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski,
6979 			     NULL_RTX, 1, OPTAB_DIRECT);
6980 
6981   /* Prepare insertions of cmp and new_rtx into the loaded value.  When
6982      possible, we try to use insv to make this happen efficiently.  If
6983      that fails we'll generate code both inside and outside the loop.  */
6984   cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp);
6985   newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx);
6986 
6987   if (seq0)
6988     emit_insn (seq0);
6989   if (seq1)
6990     emit_insn (seq1);
6991 
6992   /* Start CS loop.  */
6993   if (!is_weak)
6994     {
6995       /* Begin assuming success.  */
6996       emit_move_insn (btarget, const1_rtx);
6997 
6998       csloop = gen_label_rtx ();
6999       csend = gen_label_rtx ();
7000       emit_label (csloop);
7001     }
7002 
7003   /* val = "<mem>00..0<mem>"
7004    * cmp = "00..0<cmp>00..0"
7005    * new = "00..0<new>00..0"
7006    */
7007 
7008   emit_insn (seq2);
7009   emit_insn (seq3);
7010 
7011   cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv, CCZ1mode);
7012   if (is_weak)
7013     emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1)));
7014   else
7015     {
7016       rtx tmp;
7017 
7018       /* Jump to end if we're done (likely?).  */
7019       s390_emit_jump (csend, cc);
7020 
7021       /* Check for changes outside mode, and loop internal if so.
7022 	 Arrange the moves so that the compare is adjacent to the
7023 	 branch so that we can generate CRJ.  */
7024       tmp = copy_to_reg (val);
7025       force_expand_binop (SImode, and_optab, res, ac.modemaski, val,
7026 			  1, OPTAB_DIRECT);
7027       cc = s390_emit_compare (NE, val, tmp);
7028       s390_emit_jump (csloop, cc);
7029 
7030       /* Failed.  */
7031       emit_move_insn (btarget, const0_rtx);
7032       emit_label (csend);
7033     }
7034 
7035   /* Return the correct part of the bitfield.  */
7036   convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
7037 					      NULL_RTX, 1, OPTAB_DIRECT), 1);
7038 }
7039 
7040 /* Variant of s390_expand_cs for SI, DI and TI modes.  */
7041 static void
s390_expand_cs_tdsi(machine_mode mode,rtx btarget,rtx vtarget,rtx mem,rtx cmp,rtx new_rtx,bool is_weak)7042 s390_expand_cs_tdsi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
7043 		     rtx cmp, rtx new_rtx, bool is_weak)
7044 {
7045   rtx output = vtarget;
7046   rtx_code_label *skip_cs_label = NULL;
7047   bool do_const_opt = false;
7048 
7049   if (!register_operand (output, mode))
7050     output = gen_reg_rtx (mode);
7051 
7052   /* If IS_WEAK is true and the INPUT value is a constant, compare the memory
7053      with the constant first and skip the compare_and_swap because its very
7054      expensive and likely to fail anyway.
7055      Note 1: This is done only for IS_WEAK.  C11 allows optimizations that may
7056      cause spurious in that case.
7057      Note 2: It may be useful to do this also for non-constant INPUT.
7058      Note 3: Currently only targets with "load on condition" are supported
7059      (z196 and newer).  */
7060 
7061   if (TARGET_Z196
7062       && (mode == SImode || mode == DImode))
7063     do_const_opt = (is_weak && CONST_INT_P (cmp));
7064 
7065   if (do_const_opt)
7066     {
7067       rtx cc = gen_rtx_REG (CCZmode, CC_REGNUM);
7068 
7069       skip_cs_label = gen_label_rtx ();
7070       emit_move_insn (btarget, const0_rtx);
7071       if (CONST_INT_P (cmp) && INTVAL (cmp) == 0)
7072 	{
7073 	  rtvec lt = rtvec_alloc (2);
7074 
7075 	  /* Load-and-test + conditional jump.  */
7076 	  RTVEC_ELT (lt, 0)
7077 	    = gen_rtx_SET (cc, gen_rtx_COMPARE (CCZmode, mem, cmp));
7078 	  RTVEC_ELT (lt, 1) = gen_rtx_SET (output, mem);
7079 	  emit_insn (gen_rtx_PARALLEL (VOIDmode, lt));
7080 	}
7081       else
7082 	{
7083 	  emit_move_insn (output, mem);
7084 	  emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (CCZmode, output, cmp)));
7085 	}
7086       s390_emit_jump (skip_cs_label, gen_rtx_NE (VOIDmode, cc, const0_rtx));
7087       add_reg_br_prob_note (get_last_insn (),
7088 			    profile_probability::very_unlikely ());
7089       /* If the jump is not taken, OUTPUT is the expected value.  */
7090       cmp = output;
7091       /* Reload newval to a register manually, *after* the compare and jump
7092 	 above.  Otherwise Reload might place it before the jump.  */
7093     }
7094   else
7095     cmp = force_reg (mode, cmp);
7096   new_rtx = force_reg (mode, new_rtx);
7097   s390_emit_compare_and_swap (EQ, output, mem, cmp, new_rtx,
7098 			      (do_const_opt) ? CCZmode : CCZ1mode);
7099   if (skip_cs_label != NULL)
7100     emit_label (skip_cs_label);
7101 
7102   /* We deliberately accept non-register operands in the predicate
7103      to ensure the write back to the output operand happens *before*
7104      the store-flags code below.  This makes it easier for combine
7105      to merge the store-flags code with a potential test-and-branch
7106      pattern following (immediately!) afterwards.  */
7107   if (output != vtarget)
7108     emit_move_insn (vtarget, output);
7109 
7110   if (do_const_opt)
7111     {
7112       rtx cc, cond, ite;
7113 
7114       /* Do not use gen_cstorecc4 here because it writes either 1 or 0, but
7115 	 btarget has already been initialized with 0 above.  */
7116       cc = gen_rtx_REG (CCZmode, CC_REGNUM);
7117       cond = gen_rtx_EQ (VOIDmode, cc, const0_rtx);
7118       ite = gen_rtx_IF_THEN_ELSE (SImode, cond, const1_rtx, btarget);
7119       emit_insn (gen_rtx_SET (btarget, ite));
7120     }
7121   else
7122     {
7123       rtx cc, cond;
7124 
7125       cc = gen_rtx_REG (CCZ1mode, CC_REGNUM);
7126       cond = gen_rtx_EQ (SImode, cc, const0_rtx);
7127       emit_insn (gen_cstorecc4 (btarget, cond, cc, const0_rtx));
7128     }
7129 }
7130 
7131 /* Expand an atomic compare and swap operation.  MEM is the memory location,
7132    CMP the old value to compare MEM with and NEW_RTX the value to set if
7133    CMP == MEM.  */
7134 
7135 void
s390_expand_cs(machine_mode mode,rtx btarget,rtx vtarget,rtx mem,rtx cmp,rtx new_rtx,bool is_weak)7136 s390_expand_cs (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
7137 		rtx cmp, rtx new_rtx, bool is_weak)
7138 {
7139   switch (mode)
7140     {
7141     case E_TImode:
7142     case E_DImode:
7143     case E_SImode:
7144       s390_expand_cs_tdsi (mode, btarget, vtarget, mem, cmp, new_rtx, is_weak);
7145       break;
7146     case E_HImode:
7147     case E_QImode:
7148       s390_expand_cs_hqi (mode, btarget, vtarget, mem, cmp, new_rtx, is_weak);
7149       break;
7150     default:
7151       gcc_unreachable ();
7152     }
7153 }
7154 
7155 /* Expand an atomic_exchange operation simulated with a compare-and-swap loop.
7156    The memory location MEM is set to INPUT.  OUTPUT is set to the previous value
7157    of MEM.  */
7158 
7159 void
s390_expand_atomic_exchange_tdsi(rtx output,rtx mem,rtx input)7160 s390_expand_atomic_exchange_tdsi (rtx output, rtx mem, rtx input)
7161 {
7162   machine_mode mode = GET_MODE (mem);
7163   rtx_code_label *csloop;
7164 
7165   if (TARGET_Z196
7166       && (mode == DImode || mode == SImode)
7167       && CONST_INT_P (input) && INTVAL (input) == 0)
7168     {
7169       emit_move_insn (output, const0_rtx);
7170       if (mode == DImode)
7171 	emit_insn (gen_atomic_fetch_anddi (output, mem, const0_rtx, input));
7172       else
7173 	emit_insn (gen_atomic_fetch_andsi (output, mem, const0_rtx, input));
7174       return;
7175     }
7176 
7177   input = force_reg (mode, input);
7178   emit_move_insn (output, mem);
7179   csloop = gen_label_rtx ();
7180   emit_label (csloop);
7181   s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, output, mem, output,
7182 						      input, CCZ1mode));
7183 }
7184 
7185 /* Expand an atomic operation CODE of mode MODE.  MEM is the memory location
7186    and VAL the value to play with.  If AFTER is true then store the value
7187    MEM holds after the operation, if AFTER is false then store the value MEM
7188    holds before the operation.  If TARGET is zero then discard that value, else
7189    store it to TARGET.  */
7190 
7191 void
s390_expand_atomic(machine_mode mode,enum rtx_code code,rtx target,rtx mem,rtx val,bool after)7192 s390_expand_atomic (machine_mode mode, enum rtx_code code,
7193 		    rtx target, rtx mem, rtx val, bool after)
7194 {
7195   struct alignment_context ac;
7196   rtx cmp;
7197   rtx new_rtx = gen_reg_rtx (SImode);
7198   rtx orig = gen_reg_rtx (SImode);
7199   rtx_code_label *csloop = gen_label_rtx ();
7200 
7201   gcc_assert (!target || register_operand (target, VOIDmode));
7202   gcc_assert (MEM_P (mem));
7203 
7204   init_alignment_context (&ac, mem, mode);
7205 
7206   /* Shift val to the correct bit positions.
7207      Preserve "icm", but prevent "ex icm".  */
7208   if (!(ac.aligned && code == SET && MEM_P (val)))
7209     val = s390_expand_mask_and_shift (val, mode, ac.shift);
7210 
7211   /* Further preparation insns.  */
7212   if (code == PLUS || code == MINUS)
7213     emit_move_insn (orig, val);
7214   else if (code == MULT || code == AND) /* val = "11..1<val>11..1" */
7215     val = expand_simple_binop (SImode, XOR, val, ac.modemaski,
7216 			       NULL_RTX, 1, OPTAB_DIRECT);
7217 
7218   /* Load full word.  Subsequent loads are performed by CS.  */
7219   cmp = force_reg (SImode, ac.memsi);
7220 
7221   /* Start CS loop.  */
7222   emit_label (csloop);
7223   emit_move_insn (new_rtx, cmp);
7224 
7225   /* Patch new with val at correct position.  */
7226   switch (code)
7227     {
7228     case PLUS:
7229     case MINUS:
7230       val = expand_simple_binop (SImode, code, new_rtx, orig,
7231 				 NULL_RTX, 1, OPTAB_DIRECT);
7232       val = expand_simple_binop (SImode, AND, val, ac.modemask,
7233 				 NULL_RTX, 1, OPTAB_DIRECT);
7234       /* FALLTHRU */
7235     case SET:
7236       if (ac.aligned && MEM_P (val))
7237 	store_bit_field (new_rtx, GET_MODE_BITSIZE (mode), 0,
7238 			 0, 0, SImode, val, false);
7239       else
7240 	{
7241 	  new_rtx = expand_simple_binop (SImode, AND, new_rtx, ac.modemaski,
7242 				     NULL_RTX, 1, OPTAB_DIRECT);
7243 	  new_rtx = expand_simple_binop (SImode, IOR, new_rtx, val,
7244 				     NULL_RTX, 1, OPTAB_DIRECT);
7245 	}
7246       break;
7247     case AND:
7248     case IOR:
7249     case XOR:
7250       new_rtx = expand_simple_binop (SImode, code, new_rtx, val,
7251 				 NULL_RTX, 1, OPTAB_DIRECT);
7252       break;
7253     case MULT: /* NAND */
7254       new_rtx = expand_simple_binop (SImode, AND, new_rtx, val,
7255 				 NULL_RTX, 1, OPTAB_DIRECT);
7256       new_rtx = expand_simple_binop (SImode, XOR, new_rtx, ac.modemask,
7257 				 NULL_RTX, 1, OPTAB_DIRECT);
7258       break;
7259     default:
7260       gcc_unreachable ();
7261     }
7262 
7263   s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, cmp,
7264 						      ac.memsi, cmp, new_rtx,
7265 						      CCZ1mode));
7266 
7267   /* Return the correct part of the bitfield.  */
7268   if (target)
7269     convert_move (target, expand_simple_binop (SImode, LSHIFTRT,
7270 					       after ? new_rtx : cmp, ac.shift,
7271 					       NULL_RTX, 1, OPTAB_DIRECT), 1);
7272 }
7273 
7274 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7275    We need to emit DTP-relative relocations.  */
7276 
7277 static void s390_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
7278 
7279 static void
s390_output_dwarf_dtprel(FILE * file,int size,rtx x)7280 s390_output_dwarf_dtprel (FILE *file, int size, rtx x)
7281 {
7282   switch (size)
7283     {
7284     case 4:
7285       fputs ("\t.long\t", file);
7286       break;
7287     case 8:
7288       fputs ("\t.quad\t", file);
7289       break;
7290     default:
7291       gcc_unreachable ();
7292     }
7293   output_addr_const (file, x);
7294   fputs ("@DTPOFF", file);
7295 }
7296 
7297 /* Return the proper mode for REGNO being represented in the dwarf
7298    unwind table.  */
7299 machine_mode
s390_dwarf_frame_reg_mode(int regno)7300 s390_dwarf_frame_reg_mode (int regno)
7301 {
7302   machine_mode save_mode = default_dwarf_frame_reg_mode (regno);
7303 
7304   /* Make sure not to return DImode for any GPR with -m31 -mzarch.  */
7305   if (GENERAL_REGNO_P (regno))
7306     save_mode = Pmode;
7307 
7308   /* The rightmost 64 bits of vector registers are call-clobbered.  */
7309   if (GET_MODE_SIZE (save_mode) > 8)
7310     save_mode = DImode;
7311 
7312   return save_mode;
7313 }
7314 
7315 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
7316 /* Implement TARGET_MANGLE_TYPE.  */
7317 
7318 static const char *
s390_mangle_type(const_tree type)7319 s390_mangle_type (const_tree type)
7320 {
7321   type = TYPE_MAIN_VARIANT (type);
7322 
7323   if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
7324       && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
7325     return NULL;
7326 
7327   if (type == s390_builtin_types[BT_BV16QI]) return "U6__boolc";
7328   if (type == s390_builtin_types[BT_BV8HI]) return "U6__bools";
7329   if (type == s390_builtin_types[BT_BV4SI]) return "U6__booli";
7330   if (type == s390_builtin_types[BT_BV2DI]) return "U6__booll";
7331 
7332   if (TYPE_MAIN_VARIANT (type) == long_double_type_node
7333       && TARGET_LONG_DOUBLE_128)
7334     return "g";
7335 
7336   /* For all other types, use normal C++ mangling.  */
7337   return NULL;
7338 }
7339 #endif
7340 
7341 /* In the name of slightly smaller debug output, and to cater to
7342    general assembler lossage, recognize various UNSPEC sequences
7343    and turn them back into a direct symbol reference.  */
7344 
7345 static rtx
s390_delegitimize_address(rtx orig_x)7346 s390_delegitimize_address (rtx orig_x)
7347 {
7348   rtx x, y;
7349 
7350   orig_x = delegitimize_mem_from_attrs (orig_x);
7351   x = orig_x;
7352 
7353   /* Extract the symbol ref from:
7354      (plus:SI (reg:SI 12 %r12)
7355 	      (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
7356 				    UNSPEC_GOTOFF/PLTOFF)))
7357      and
7358      (plus:SI (reg:SI 12 %r12)
7359 	      (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
7360 					     UNSPEC_GOTOFF/PLTOFF)
7361 				 (const_int 4 [0x4]))))  */
7362   if (GET_CODE (x) == PLUS
7363       && REG_P (XEXP (x, 0))
7364       && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
7365       && GET_CODE (XEXP (x, 1)) == CONST)
7366     {
7367       HOST_WIDE_INT offset = 0;
7368 
7369       /* The const operand.  */
7370       y = XEXP (XEXP (x, 1), 0);
7371 
7372       if (GET_CODE (y) == PLUS
7373 	  && GET_CODE (XEXP (y, 1)) == CONST_INT)
7374 	{
7375 	  offset = INTVAL (XEXP (y, 1));
7376 	  y = XEXP (y, 0);
7377 	}
7378 
7379       if (GET_CODE (y) == UNSPEC
7380 	  && (XINT (y, 1) == UNSPEC_GOTOFF
7381 	      || XINT (y, 1) == UNSPEC_PLTOFF))
7382 	return plus_constant (Pmode, XVECEXP (y, 0, 0), offset);
7383     }
7384 
7385   if (GET_CODE (x) != MEM)
7386     return orig_x;
7387 
7388   x = XEXP (x, 0);
7389   if (GET_CODE (x) == PLUS
7390       && GET_CODE (XEXP (x, 1)) == CONST
7391       && GET_CODE (XEXP (x, 0)) == REG
7392       && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7393     {
7394       y = XEXP (XEXP (x, 1), 0);
7395       if (GET_CODE (y) == UNSPEC
7396 	  && XINT (y, 1) == UNSPEC_GOT)
7397 	y = XVECEXP (y, 0, 0);
7398       else
7399 	return orig_x;
7400     }
7401   else if (GET_CODE (x) == CONST)
7402     {
7403       /* Extract the symbol ref from:
7404 	 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
7405 				       UNSPEC_PLT/GOTENT)))  */
7406 
7407       y = XEXP (x, 0);
7408       if (GET_CODE (y) == UNSPEC
7409 	  && (XINT (y, 1) == UNSPEC_GOTENT
7410 	      || XINT (y, 1) == UNSPEC_PLT))
7411 	y = XVECEXP (y, 0, 0);
7412       else
7413 	return orig_x;
7414     }
7415   else
7416     return orig_x;
7417 
7418   if (GET_MODE (orig_x) != Pmode)
7419     {
7420       if (GET_MODE (orig_x) == BLKmode)
7421 	return orig_x;
7422       y = lowpart_subreg (GET_MODE (orig_x), y, Pmode);
7423       if (y == NULL_RTX)
7424 	return orig_x;
7425     }
7426   return y;
7427 }
7428 
7429 /* Output operand OP to stdio stream FILE.
7430    OP is an address (register + offset) which is not used to address data;
7431    instead the rightmost bits are interpreted as the value.  */
7432 
7433 static void
print_addrstyle_operand(FILE * file,rtx op)7434 print_addrstyle_operand (FILE *file, rtx op)
7435 {
7436   HOST_WIDE_INT offset;
7437   rtx base;
7438 
7439   /* Extract base register and offset.  */
7440   if (!s390_decompose_addrstyle_without_index (op, &base, &offset))
7441     gcc_unreachable ();
7442 
7443   /* Sanity check.  */
7444   if (base)
7445     {
7446       gcc_assert (GET_CODE (base) == REG);
7447       gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER);
7448       gcc_assert (REGNO_REG_CLASS (REGNO (base)) == ADDR_REGS);
7449     }
7450 
7451   /* Offsets are constricted to twelve bits.  */
7452   fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset & ((1 << 12) - 1));
7453   if (base)
7454     fprintf (file, "(%s)", reg_names[REGNO (base)]);
7455 }
7456 
7457 /* Assigns the number of NOP halfwords to be emitted before and after the
7458    function label to *HW_BEFORE and *HW_AFTER.  Both pointers must not be NULL.
7459    If hotpatching is disabled for the function, the values are set to zero.
7460 */
7461 
7462 static void
s390_function_num_hotpatch_hw(tree decl,int * hw_before,int * hw_after)7463 s390_function_num_hotpatch_hw (tree decl,
7464 			       int *hw_before,
7465 			       int *hw_after)
7466 {
7467   tree attr;
7468 
7469   attr = lookup_attribute ("hotpatch", DECL_ATTRIBUTES (decl));
7470 
7471   /* Handle the arguments of the hotpatch attribute.  The values
7472      specified via attribute might override the cmdline argument
7473      values.  */
7474   if (attr)
7475     {
7476       tree args = TREE_VALUE (attr);
7477 
7478       *hw_before = TREE_INT_CST_LOW (TREE_VALUE (args));
7479       *hw_after = TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (args)));
7480     }
7481   else
7482     {
7483       /* Use the values specified by the cmdline arguments.  */
7484       *hw_before = s390_hotpatch_hw_before_label;
7485       *hw_after = s390_hotpatch_hw_after_label;
7486     }
7487 }
7488 
7489 /* Write the current .machine and .machinemode specification to the assembler
7490    file.  */
7491 
7492 #ifdef HAVE_AS_MACHINE_MACHINEMODE
7493 static void
s390_asm_output_machine_for_arch(FILE * asm_out_file)7494 s390_asm_output_machine_for_arch (FILE *asm_out_file)
7495 {
7496   fprintf (asm_out_file, "\t.machinemode %s\n",
7497 	   (TARGET_ZARCH) ? "zarch" : "esa");
7498   fprintf (asm_out_file, "\t.machine \"%s",
7499 	   processor_table[s390_arch].binutils_name);
7500   if (S390_USE_ARCHITECTURE_MODIFIERS)
7501     {
7502       int cpu_flags;
7503 
7504       cpu_flags = processor_flags_table[(int) s390_arch];
7505       if (TARGET_HTM && !(cpu_flags & PF_TX))
7506 	fprintf (asm_out_file, "+htm");
7507       else if (!TARGET_HTM && (cpu_flags & PF_TX))
7508 	fprintf (asm_out_file, "+nohtm");
7509       if (TARGET_VX && !(cpu_flags & PF_VX))
7510 	fprintf (asm_out_file, "+vx");
7511       else if (!TARGET_VX && (cpu_flags & PF_VX))
7512 	fprintf (asm_out_file, "+novx");
7513     }
7514   fprintf (asm_out_file, "\"\n");
7515 }
7516 
7517 /* Write an extra function header before the very start of the function.  */
7518 
7519 void
s390_asm_output_function_prefix(FILE * asm_out_file,const char * fnname ATTRIBUTE_UNUSED)7520 s390_asm_output_function_prefix (FILE *asm_out_file,
7521 				 const char *fnname ATTRIBUTE_UNUSED)
7522 {
7523   if (DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl) == NULL)
7524     return;
7525   /* Since only the function specific options are saved but not the indications
7526      which options are set, it's too much work here to figure out which options
7527      have actually changed.  Thus, generate .machine and .machinemode whenever a
7528      function has the target attribute or pragma.  */
7529   fprintf (asm_out_file, "\t.machinemode push\n");
7530   fprintf (asm_out_file, "\t.machine push\n");
7531   s390_asm_output_machine_for_arch (asm_out_file);
7532 }
7533 
7534 /* Write an extra function footer after the very end of the function.  */
7535 
7536 void
s390_asm_declare_function_size(FILE * asm_out_file,const char * fnname,tree decl)7537 s390_asm_declare_function_size (FILE *asm_out_file,
7538 				const char *fnname, tree decl)
7539 {
7540   if (!flag_inhibit_size_directive)
7541     ASM_OUTPUT_MEASURED_SIZE (asm_out_file, fnname);
7542   if (DECL_FUNCTION_SPECIFIC_TARGET (decl) == NULL)
7543     return;
7544   fprintf (asm_out_file, "\t.machine pop\n");
7545   fprintf (asm_out_file, "\t.machinemode pop\n");
7546 }
7547 #endif
7548 
7549 /* Write the extra assembler code needed to declare a function properly.  */
7550 
7551 void
s390_asm_output_function_label(FILE * asm_out_file,const char * fname,tree decl)7552 s390_asm_output_function_label (FILE *asm_out_file, const char *fname,
7553 				tree decl)
7554 {
7555   int hw_before, hw_after;
7556 
7557   s390_function_num_hotpatch_hw (decl, &hw_before, &hw_after);
7558   if (hw_before > 0)
7559     {
7560       unsigned int function_alignment;
7561       int i;
7562 
7563       /* Add a trampoline code area before the function label and initialize it
7564 	 with two-byte nop instructions.  This area can be overwritten with code
7565 	 that jumps to a patched version of the function.  */
7566       asm_fprintf (asm_out_file, "\tnopr\t%%r0"
7567 		   "\t# pre-label NOPs for hotpatch (%d halfwords)\n",
7568 		   hw_before);
7569       for (i = 1; i < hw_before; i++)
7570 	fputs ("\tnopr\t%r0\n", asm_out_file);
7571 
7572       /* Note:  The function label must be aligned so that (a) the bytes of the
7573 	 following nop do not cross a cacheline boundary, and (b) a jump address
7574 	 (eight bytes for 64 bit targets, 4 bytes for 32 bit targets) can be
7575 	 stored directly before the label without crossing a cacheline
7576 	 boundary.  All this is necessary to make sure the trampoline code can
7577 	 be changed atomically.
7578 	 This alignment is done automatically using the FOUNCTION_BOUNDARY, but
7579 	 if there are NOPs before the function label, the alignment is placed
7580 	 before them.  So it is necessary to duplicate the alignment after the
7581 	 NOPs.  */
7582       function_alignment = MAX (8, DECL_ALIGN (decl) / BITS_PER_UNIT);
7583       if (! DECL_USER_ALIGN (decl))
7584 	function_alignment
7585 	  = MAX (function_alignment,
7586 		 (unsigned int) align_functions.levels[0].get_value ());
7587       fputs ("\t# alignment for hotpatch\n", asm_out_file);
7588       ASM_OUTPUT_ALIGN (asm_out_file, align_functions.levels[0].log);
7589     }
7590 
7591   if (S390_USE_TARGET_ATTRIBUTE && TARGET_DEBUG_ARG)
7592     {
7593       asm_fprintf (asm_out_file, "\t# fn:%s ar%d\n", fname, s390_arch);
7594       asm_fprintf (asm_out_file, "\t# fn:%s tu%d\n", fname, s390_tune);
7595       asm_fprintf (asm_out_file, "\t# fn:%s sg%d\n", fname, s390_stack_guard);
7596       asm_fprintf (asm_out_file, "\t# fn:%s ss%d\n", fname, s390_stack_size);
7597       asm_fprintf (asm_out_file, "\t# fn:%s bc%d\n", fname, s390_branch_cost);
7598       asm_fprintf (asm_out_file, "\t# fn:%s wf%d\n", fname,
7599 		   s390_warn_framesize);
7600       asm_fprintf (asm_out_file, "\t# fn:%s ba%d\n", fname, TARGET_BACKCHAIN);
7601       asm_fprintf (asm_out_file, "\t# fn:%s hd%d\n", fname, TARGET_HARD_DFP);
7602       asm_fprintf (asm_out_file, "\t# fn:%s hf%d\n", fname, !TARGET_SOFT_FLOAT);
7603       asm_fprintf (asm_out_file, "\t# fn:%s ht%d\n", fname, TARGET_OPT_HTM);
7604       asm_fprintf (asm_out_file, "\t# fn:%s vx%d\n", fname, TARGET_OPT_VX);
7605       asm_fprintf (asm_out_file, "\t# fn:%s ps%d\n", fname,
7606 		   TARGET_PACKED_STACK);
7607       asm_fprintf (asm_out_file, "\t# fn:%s se%d\n", fname, TARGET_SMALL_EXEC);
7608       asm_fprintf (asm_out_file, "\t# fn:%s mv%d\n", fname, TARGET_MVCLE);
7609       asm_fprintf (asm_out_file, "\t# fn:%s zv%d\n", fname, TARGET_ZVECTOR);
7610       asm_fprintf (asm_out_file, "\t# fn:%s wd%d\n", fname,
7611 		   s390_warn_dynamicstack_p);
7612     }
7613   ASM_OUTPUT_LABEL (asm_out_file, fname);
7614   if (hw_after > 0)
7615     asm_fprintf (asm_out_file,
7616 		 "\t# post-label NOPs for hotpatch (%d halfwords)\n",
7617 		 hw_after);
7618 }
7619 
7620 /* Output machine-dependent UNSPECs occurring in address constant X
7621    in assembler syntax to stdio stream FILE.  Returns true if the
7622    constant X could be recognized, false otherwise.  */
7623 
7624 static bool
s390_output_addr_const_extra(FILE * file,rtx x)7625 s390_output_addr_const_extra (FILE *file, rtx x)
7626 {
7627   if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1)
7628     switch (XINT (x, 1))
7629       {
7630       case UNSPEC_GOTENT:
7631 	output_addr_const (file, XVECEXP (x, 0, 0));
7632 	fprintf (file, "@GOTENT");
7633 	return true;
7634       case UNSPEC_GOT:
7635 	output_addr_const (file, XVECEXP (x, 0, 0));
7636 	fprintf (file, "@GOT");
7637 	return true;
7638       case UNSPEC_GOTOFF:
7639 	output_addr_const (file, XVECEXP (x, 0, 0));
7640 	fprintf (file, "@GOTOFF");
7641 	return true;
7642       case UNSPEC_PLT:
7643 	output_addr_const (file, XVECEXP (x, 0, 0));
7644 	fprintf (file, "@PLT");
7645 	return true;
7646       case UNSPEC_PLTOFF:
7647 	output_addr_const (file, XVECEXP (x, 0, 0));
7648 	fprintf (file, "@PLTOFF");
7649 	return true;
7650       case UNSPEC_TLSGD:
7651 	output_addr_const (file, XVECEXP (x, 0, 0));
7652 	fprintf (file, "@TLSGD");
7653 	return true;
7654       case UNSPEC_TLSLDM:
7655 	assemble_name (file, get_some_local_dynamic_name ());
7656 	fprintf (file, "@TLSLDM");
7657 	return true;
7658       case UNSPEC_DTPOFF:
7659 	output_addr_const (file, XVECEXP (x, 0, 0));
7660 	fprintf (file, "@DTPOFF");
7661 	return true;
7662       case UNSPEC_NTPOFF:
7663 	output_addr_const (file, XVECEXP (x, 0, 0));
7664 	fprintf (file, "@NTPOFF");
7665 	return true;
7666       case UNSPEC_GOTNTPOFF:
7667 	output_addr_const (file, XVECEXP (x, 0, 0));
7668 	fprintf (file, "@GOTNTPOFF");
7669 	return true;
7670       case UNSPEC_INDNTPOFF:
7671 	output_addr_const (file, XVECEXP (x, 0, 0));
7672 	fprintf (file, "@INDNTPOFF");
7673 	return true;
7674       }
7675 
7676   if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 2)
7677     switch (XINT (x, 1))
7678       {
7679       case UNSPEC_POOL_OFFSET:
7680 	x = gen_rtx_MINUS (GET_MODE (x), XVECEXP (x, 0, 0), XVECEXP (x, 0, 1));
7681 	output_addr_const (file, x);
7682 	return true;
7683       }
7684   return false;
7685 }
7686 
7687 /* Output address operand ADDR in assembler syntax to
7688    stdio stream FILE.  */
7689 
7690 void
print_operand_address(FILE * file,rtx addr)7691 print_operand_address (FILE *file, rtx addr)
7692 {
7693   struct s390_address ad;
7694   memset (&ad, 0, sizeof (s390_address));
7695 
7696   if (s390_loadrelative_operand_p (addr, NULL, NULL))
7697     {
7698       if (!TARGET_Z10)
7699 	{
7700 	  output_operand_lossage ("symbolic memory references are "
7701 				  "only supported on z10 or later");
7702 	  return;
7703 	}
7704       output_addr_const (file, addr);
7705       return;
7706     }
7707 
7708   if (!s390_decompose_address (addr, &ad)
7709       || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7710       || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
7711     output_operand_lossage ("cannot decompose address");
7712 
7713   if (ad.disp)
7714     output_addr_const (file, ad.disp);
7715   else
7716     fprintf (file, "0");
7717 
7718   if (ad.base && ad.indx)
7719     fprintf (file, "(%s,%s)", reg_names[REGNO (ad.indx)],
7720 			      reg_names[REGNO (ad.base)]);
7721   else if (ad.base)
7722     fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7723 }
7724 
7725 /* Output operand X in assembler syntax to stdio stream FILE.
7726    CODE specified the format flag.  The following format flags
7727    are recognized:
7728 
7729     'A': On z14 or higher: If operand is a mem print the alignment
7730 	 hint usable with vl/vst prefixed by a comma.
7731     'C': print opcode suffix for branch condition.
7732     'D': print opcode suffix for inverse branch condition.
7733     'E': print opcode suffix for branch on index instruction.
7734     'G': print the size of the operand in bytes.
7735     'J': print tls_load/tls_gdcall/tls_ldcall suffix
7736     'M': print the second word of a TImode operand.
7737     'N': print the second word of a DImode operand.
7738     'O': print only the displacement of a memory reference or address.
7739     'R': print only the base register of a memory reference or address.
7740     'S': print S-type memory reference (base+displacement).
7741     'Y': print address style operand without index (e.g. shift count or setmem
7742 	 operand).
7743 
7744     'b': print integer X as if it's an unsigned byte.
7745     'c': print integer X as if it's an signed byte.
7746     'e': "end" contiguous bitmask X in either DImode or vector inner mode.
7747     'f': "end" contiguous bitmask X in SImode.
7748     'h': print integer X as if it's a signed halfword.
7749     'i': print the first nonzero HImode part of X.
7750     'j': print the first HImode part unequal to -1 of X.
7751     'k': print the first nonzero SImode part of X.
7752     'm': print the first SImode part unequal to -1 of X.
7753     'o': print integer X as if it's an unsigned 32bit word.
7754     's': "start" of contiguous bitmask X in either DImode or vector inner mode.
7755     't': CONST_INT: "start" of contiguous bitmask X in SImode.
7756 	 CONST_VECTOR: Generate a bitmask for vgbm instruction.
7757     'x': print integer X as if it's an unsigned halfword.
7758     'v': print register number as vector register (v1 instead of f1).
7759 */
7760 
7761 void
print_operand(FILE * file,rtx x,int code)7762 print_operand (FILE *file, rtx x, int code)
7763 {
7764   HOST_WIDE_INT ival;
7765 
7766   switch (code)
7767     {
7768     case 'A':
7769       if (TARGET_VECTOR_LOADSTORE_ALIGNMENT_HINTS && MEM_P (x))
7770 	{
7771 	  if (MEM_ALIGN (x) >= 128)
7772 	    fprintf (file, ",4");
7773 	  else if (MEM_ALIGN (x) == 64)
7774 	    fprintf (file, ",3");
7775 	}
7776       return;
7777     case 'C':
7778       fprintf (file, s390_branch_condition_mnemonic (x, FALSE));
7779       return;
7780 
7781     case 'D':
7782       fprintf (file, s390_branch_condition_mnemonic (x, TRUE));
7783       return;
7784 
7785     case 'E':
7786       if (GET_CODE (x) == LE)
7787 	fprintf (file, "l");
7788       else if (GET_CODE (x) == GT)
7789 	fprintf (file, "h");
7790       else
7791 	output_operand_lossage ("invalid comparison operator "
7792 				"for 'E' output modifier");
7793       return;
7794 
7795     case 'J':
7796       if (GET_CODE (x) == SYMBOL_REF)
7797 	{
7798 	  fprintf (file, "%s", ":tls_load:");
7799 	  output_addr_const (file, x);
7800 	}
7801       else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
7802 	{
7803 	  fprintf (file, "%s", ":tls_gdcall:");
7804 	  output_addr_const (file, XVECEXP (x, 0, 0));
7805 	}
7806       else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM)
7807 	{
7808 	  fprintf (file, "%s", ":tls_ldcall:");
7809 	  const char *name = get_some_local_dynamic_name ();
7810 	  gcc_assert (name);
7811 	  assemble_name (file, name);
7812 	}
7813       else
7814 	output_operand_lossage ("invalid reference for 'J' output modifier");
7815       return;
7816 
7817     case 'G':
7818       fprintf (file, "%u", GET_MODE_SIZE (GET_MODE (x)));
7819       return;
7820 
7821     case 'O':
7822       {
7823 	struct s390_address ad;
7824 	int ret;
7825 
7826 	ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7827 
7828 	if (!ret
7829 	    || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7830 	    || ad.indx)
7831 	  {
7832 	    output_operand_lossage ("invalid address for 'O' output modifier");
7833 	    return;
7834 	  }
7835 
7836 	if (ad.disp)
7837 	  output_addr_const (file, ad.disp);
7838 	else
7839 	  fprintf (file, "0");
7840       }
7841       return;
7842 
7843     case 'R':
7844       {
7845 	struct s390_address ad;
7846 	int ret;
7847 
7848 	ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7849 
7850 	if (!ret
7851 	    || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7852 	    || ad.indx)
7853 	  {
7854 	    output_operand_lossage ("invalid address for 'R' output modifier");
7855 	    return;
7856 	  }
7857 
7858 	if (ad.base)
7859 	  fprintf (file, "%s", reg_names[REGNO (ad.base)]);
7860 	else
7861 	  fprintf (file, "0");
7862       }
7863       return;
7864 
7865     case 'S':
7866       {
7867 	struct s390_address ad;
7868 	int ret;
7869 
7870 	if (!MEM_P (x))
7871 	  {
7872 	    output_operand_lossage ("memory reference expected for "
7873 				    "'S' output modifier");
7874 	    return;
7875 	  }
7876 	ret = s390_decompose_address (XEXP (x, 0), &ad);
7877 
7878 	if (!ret
7879 	    || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7880 	    || ad.indx)
7881 	  {
7882 	    output_operand_lossage ("invalid address for 'S' output modifier");
7883 	    return;
7884 	  }
7885 
7886 	if (ad.disp)
7887 	  output_addr_const (file, ad.disp);
7888 	else
7889 	  fprintf (file, "0");
7890 
7891 	if (ad.base)
7892 	  fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7893       }
7894       return;
7895 
7896     case 'N':
7897       if (GET_CODE (x) == REG)
7898 	x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7899       else if (GET_CODE (x) == MEM)
7900 	x = change_address (x, VOIDmode,
7901 			    plus_constant (Pmode, XEXP (x, 0), 4));
7902       else
7903 	output_operand_lossage ("register or memory expression expected "
7904 				"for 'N' output modifier");
7905       break;
7906 
7907     case 'M':
7908       if (GET_CODE (x) == REG)
7909 	x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7910       else if (GET_CODE (x) == MEM)
7911 	x = change_address (x, VOIDmode,
7912 			    plus_constant (Pmode, XEXP (x, 0), 8));
7913       else
7914 	output_operand_lossage ("register or memory expression expected "
7915 				"for 'M' output modifier");
7916       break;
7917 
7918     case 'Y':
7919       print_addrstyle_operand (file, x);
7920       return;
7921     }
7922 
7923   switch (GET_CODE (x))
7924     {
7925     case REG:
7926       /* Print FP regs as fx instead of vx when they are accessed
7927 	 through non-vector mode.  */
7928       if (code == 'v'
7929 	  || VECTOR_NOFP_REG_P (x)
7930 	  || (FP_REG_P (x) && VECTOR_MODE_P (GET_MODE (x)))
7931 	  || (VECTOR_REG_P (x)
7932 	      && (GET_MODE_SIZE (GET_MODE (x)) /
7933 		  s390_class_max_nregs (FP_REGS, GET_MODE (x))) > 8))
7934 	fprintf (file, "%%v%s", reg_names[REGNO (x)] + 2);
7935       else
7936 	fprintf (file, "%s", reg_names[REGNO (x)]);
7937       break;
7938 
7939     case MEM:
7940       output_address (GET_MODE (x), XEXP (x, 0));
7941       break;
7942 
7943     case CONST:
7944     case CODE_LABEL:
7945     case LABEL_REF:
7946     case SYMBOL_REF:
7947       output_addr_const (file, x);
7948       break;
7949 
7950     case CONST_INT:
7951       ival = INTVAL (x);
7952       switch (code)
7953 	{
7954 	case 0:
7955 	  break;
7956 	case 'b':
7957 	  ival &= 0xff;
7958 	  break;
7959 	case 'c':
7960 	  ival = ((ival & 0xff) ^ 0x80) - 0x80;
7961 	  break;
7962 	case 'x':
7963 	  ival &= 0xffff;
7964 	  break;
7965 	case 'h':
7966 	  ival = ((ival & 0xffff) ^ 0x8000) - 0x8000;
7967 	  break;
7968 	case 'i':
7969 	  ival = s390_extract_part (x, HImode, 0);
7970 	  break;
7971 	case 'j':
7972 	  ival = s390_extract_part (x, HImode, -1);
7973 	  break;
7974 	case 'k':
7975 	  ival = s390_extract_part (x, SImode, 0);
7976 	  break;
7977 	case 'm':
7978 	  ival = s390_extract_part (x, SImode, -1);
7979 	  break;
7980 	case 'o':
7981 	  ival &= 0xffffffff;
7982 	  break;
7983 	case 'e': case 'f':
7984 	case 's': case 't':
7985 	  {
7986 	    int start, end;
7987 	    int len;
7988 	    bool ok;
7989 
7990 	    len = (code == 's' || code == 'e' ? 64 : 32);
7991 	    ok = s390_contiguous_bitmask_p (ival, true, len, &start, &end);
7992 	    gcc_assert (ok);
7993 	    if (code == 's' || code == 't')
7994 	      ival = start;
7995 	    else
7996 	      ival = end;
7997 	  }
7998 	  break;
7999 	default:
8000 	  output_operand_lossage ("invalid constant for output modifier '%c'", code);
8001 	}
8002       fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
8003       break;
8004 
8005     case CONST_WIDE_INT:
8006       if (code == 'b')
8007 	fprintf (file, HOST_WIDE_INT_PRINT_DEC,
8008 		 CONST_WIDE_INT_ELT (x, 0) & 0xff);
8009       else if (code == 'x')
8010 	fprintf (file, HOST_WIDE_INT_PRINT_DEC,
8011 		 CONST_WIDE_INT_ELT (x, 0) & 0xffff);
8012       else if (code == 'h')
8013 	fprintf (file, HOST_WIDE_INT_PRINT_DEC,
8014 		 ((CONST_WIDE_INT_ELT (x, 0) & 0xffff) ^ 0x8000) - 0x8000);
8015       else
8016 	{
8017 	  if (code == 0)
8018 	    output_operand_lossage ("invalid constant - try using "
8019 				    "an output modifier");
8020 	  else
8021 	    output_operand_lossage ("invalid constant for output modifier '%c'",
8022 				    code);
8023 	}
8024       break;
8025     case CONST_VECTOR:
8026       switch (code)
8027 	{
8028 	case 'h':
8029 	  gcc_assert (const_vec_duplicate_p (x));
8030 	  fprintf (file, HOST_WIDE_INT_PRINT_DEC,
8031 		   ((INTVAL (XVECEXP (x, 0, 0)) & 0xffff) ^ 0x8000) - 0x8000);
8032 	  break;
8033 	case 'e':
8034 	case 's':
8035 	  {
8036 	    int start, end;
8037 	    bool ok;
8038 
8039 	    ok = s390_contiguous_bitmask_vector_p (x, &start, &end);
8040 	    gcc_assert (ok);
8041 	    ival = (code == 's') ? start : end;
8042 	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
8043 	  }
8044 	  break;
8045 	case 't':
8046 	  {
8047 	    unsigned mask;
8048 	    bool ok = s390_bytemask_vector_p (x, &mask);
8049 	    gcc_assert (ok);
8050 	    fprintf (file, "%u", mask);
8051 	  }
8052 	  break;
8053 
8054 	default:
8055 	  output_operand_lossage ("invalid constant vector for output "
8056 				  "modifier '%c'", code);
8057 	}
8058       break;
8059 
8060     default:
8061       if (code == 0)
8062 	output_operand_lossage ("invalid expression - try using "
8063 				"an output modifier");
8064       else
8065 	output_operand_lossage ("invalid expression for output "
8066 				"modifier '%c'", code);
8067       break;
8068     }
8069 }
8070 
8071 /* Target hook for assembling integer objects.  We need to define it
8072    here to work a round a bug in some versions of GAS, which couldn't
8073    handle values smaller than INT_MIN when printed in decimal.  */
8074 
8075 static bool
s390_assemble_integer(rtx x,unsigned int size,int aligned_p)8076 s390_assemble_integer (rtx x, unsigned int size, int aligned_p)
8077 {
8078   if (size == 8 && aligned_p
8079       && GET_CODE (x) == CONST_INT && INTVAL (x) < INT_MIN)
8080     {
8081       fprintf (asm_out_file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n",
8082 	       INTVAL (x));
8083       return true;
8084     }
8085   return default_assemble_integer (x, size, aligned_p);
8086 }
8087 
8088 /* Returns true if register REGNO is used  for forming
8089    a memory address in expression X.  */
8090 
8091 static bool
reg_used_in_mem_p(int regno,rtx x)8092 reg_used_in_mem_p (int regno, rtx x)
8093 {
8094   enum rtx_code code = GET_CODE (x);
8095   int i, j;
8096   const char *fmt;
8097 
8098   if (code == MEM)
8099     {
8100       if (refers_to_regno_p (regno, XEXP (x, 0)))
8101 	return true;
8102     }
8103   else if (code == SET
8104 	   && GET_CODE (SET_DEST (x)) == PC)
8105     {
8106       if (refers_to_regno_p (regno, SET_SRC (x)))
8107 	return true;
8108     }
8109 
8110   fmt = GET_RTX_FORMAT (code);
8111   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8112     {
8113       if (fmt[i] == 'e'
8114 	  && reg_used_in_mem_p (regno, XEXP (x, i)))
8115 	return true;
8116 
8117       else if (fmt[i] == 'E')
8118 	for (j = 0; j < XVECLEN (x, i); j++)
8119 	  if (reg_used_in_mem_p (regno, XVECEXP (x, i, j)))
8120 	    return true;
8121     }
8122   return false;
8123 }
8124 
8125 /* Returns true if expression DEP_RTX sets an address register
8126    used by instruction INSN to address memory.  */
8127 
8128 static bool
addr_generation_dependency_p(rtx dep_rtx,rtx_insn * insn)8129 addr_generation_dependency_p (rtx dep_rtx, rtx_insn *insn)
8130 {
8131   rtx target, pat;
8132 
8133   if (NONJUMP_INSN_P (dep_rtx))
8134     dep_rtx = PATTERN (dep_rtx);
8135 
8136   if (GET_CODE (dep_rtx) == SET)
8137     {
8138       target = SET_DEST (dep_rtx);
8139       if (GET_CODE (target) == STRICT_LOW_PART)
8140 	target = XEXP (target, 0);
8141       while (GET_CODE (target) == SUBREG)
8142 	target = SUBREG_REG (target);
8143 
8144       if (GET_CODE (target) == REG)
8145 	{
8146 	  int regno = REGNO (target);
8147 
8148 	  if (s390_safe_attr_type (insn) == TYPE_LA)
8149 	    {
8150 	      pat = PATTERN (insn);
8151 	      if (GET_CODE (pat) == PARALLEL)
8152 		{
8153 		  gcc_assert (XVECLEN (pat, 0) == 2);
8154 		  pat = XVECEXP (pat, 0, 0);
8155 		}
8156 	      gcc_assert (GET_CODE (pat) == SET);
8157 	      return refers_to_regno_p (regno, SET_SRC (pat));
8158 	    }
8159 	  else if (get_attr_atype (insn) == ATYPE_AGEN)
8160 	    return reg_used_in_mem_p (regno, PATTERN (insn));
8161 	}
8162     }
8163   return false;
8164 }
8165 
8166 /* Return 1, if dep_insn sets register used in insn in the agen unit.  */
8167 
8168 int
s390_agen_dep_p(rtx_insn * dep_insn,rtx_insn * insn)8169 s390_agen_dep_p (rtx_insn *dep_insn, rtx_insn *insn)
8170 {
8171   rtx dep_rtx = PATTERN (dep_insn);
8172   int i;
8173 
8174   if (GET_CODE (dep_rtx) == SET
8175       && addr_generation_dependency_p (dep_rtx, insn))
8176     return 1;
8177   else if (GET_CODE (dep_rtx) == PARALLEL)
8178     {
8179       for (i = 0; i < XVECLEN (dep_rtx, 0); i++)
8180 	{
8181 	  if (addr_generation_dependency_p (XVECEXP (dep_rtx, 0, i), insn))
8182 	    return 1;
8183 	}
8184     }
8185   return 0;
8186 }
8187 
8188 
8189 /* A C statement (sans semicolon) to update the integer scheduling priority
8190    INSN_PRIORITY (INSN).  Increase the priority to execute the INSN earlier,
8191    reduce the priority to execute INSN later.  Do not define this macro if
8192    you do not need to adjust the scheduling priorities of insns.
8193 
8194    A STD instruction should be scheduled earlier,
8195    in order to use the bypass.  */
8196 static int
s390_adjust_priority(rtx_insn * insn,int priority)8197 s390_adjust_priority (rtx_insn *insn, int priority)
8198 {
8199   if (! INSN_P (insn))
8200     return priority;
8201 
8202   if (s390_tune <= PROCESSOR_2064_Z900)
8203     return priority;
8204 
8205   switch (s390_safe_attr_type (insn))
8206     {
8207       case TYPE_FSTOREDF:
8208       case TYPE_FSTORESF:
8209 	priority = priority << 3;
8210 	break;
8211       case TYPE_STORE:
8212       case TYPE_STM:
8213 	priority = priority << 1;
8214 	break;
8215       default:
8216 	break;
8217     }
8218   return priority;
8219 }
8220 
8221 
8222 /* The number of instructions that can be issued per cycle.  */
8223 
8224 static int
s390_issue_rate(void)8225 s390_issue_rate (void)
8226 {
8227   switch (s390_tune)
8228     {
8229     case PROCESSOR_2084_Z990:
8230     case PROCESSOR_2094_Z9_109:
8231     case PROCESSOR_2094_Z9_EC:
8232     case PROCESSOR_2817_Z196:
8233       return 3;
8234     case PROCESSOR_2097_Z10:
8235       return 2;
8236     case PROCESSOR_2064_Z900:
8237       /* Starting with EC12 we use the sched_reorder hook to take care
8238 	 of instruction dispatch constraints.  The algorithm only
8239 	 picks the best instruction and assumes only a single
8240 	 instruction gets issued per cycle.  */
8241     case PROCESSOR_2827_ZEC12:
8242     case PROCESSOR_2964_Z13:
8243     case PROCESSOR_3906_Z14:
8244     default:
8245       return 1;
8246     }
8247 }
8248 
8249 static int
s390_first_cycle_multipass_dfa_lookahead(void)8250 s390_first_cycle_multipass_dfa_lookahead (void)
8251 {
8252   return 4;
8253 }
8254 
8255 static void
annotate_constant_pool_refs_1(rtx * x)8256 annotate_constant_pool_refs_1 (rtx *x)
8257 {
8258   int i, j;
8259   const char *fmt;
8260 
8261   gcc_assert (GET_CODE (*x) != SYMBOL_REF
8262 	      || !CONSTANT_POOL_ADDRESS_P (*x));
8263 
8264   /* Literal pool references can only occur inside a MEM ...  */
8265   if (GET_CODE (*x) == MEM)
8266     {
8267       rtx memref = XEXP (*x, 0);
8268 
8269       if (GET_CODE (memref) == SYMBOL_REF
8270 	  && CONSTANT_POOL_ADDRESS_P (memref))
8271 	{
8272 	  rtx base = cfun->machine->base_reg;
8273 	  rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, memref, base),
8274 				     UNSPEC_LTREF);
8275 
8276 	  *x = replace_equiv_address (*x, addr);
8277 	  return;
8278 	}
8279 
8280       if (GET_CODE (memref) == CONST
8281 	  && GET_CODE (XEXP (memref, 0)) == PLUS
8282 	  && GET_CODE (XEXP (XEXP (memref, 0), 1)) == CONST_INT
8283 	  && GET_CODE (XEXP (XEXP (memref, 0), 0)) == SYMBOL_REF
8284 	  && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref, 0), 0)))
8285 	{
8286 	  HOST_WIDE_INT off = INTVAL (XEXP (XEXP (memref, 0), 1));
8287 	  rtx sym = XEXP (XEXP (memref, 0), 0);
8288 	  rtx base = cfun->machine->base_reg;
8289 	  rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
8290 				     UNSPEC_LTREF);
8291 
8292 	  *x = replace_equiv_address (*x, plus_constant (Pmode, addr, off));
8293 	  return;
8294 	}
8295     }
8296 
8297   /* ... or a load-address type pattern.  */
8298   if (GET_CODE (*x) == SET)
8299     {
8300       rtx addrref = SET_SRC (*x);
8301 
8302       if (GET_CODE (addrref) == SYMBOL_REF
8303 	  && CONSTANT_POOL_ADDRESS_P (addrref))
8304 	{
8305 	  rtx base = cfun->machine->base_reg;
8306 	  rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addrref, base),
8307 				     UNSPEC_LTREF);
8308 
8309 	  SET_SRC (*x) = addr;
8310 	  return;
8311 	}
8312 
8313       if (GET_CODE (addrref) == CONST
8314 	  && GET_CODE (XEXP (addrref, 0)) == PLUS
8315 	  && GET_CODE (XEXP (XEXP (addrref, 0), 1)) == CONST_INT
8316 	  && GET_CODE (XEXP (XEXP (addrref, 0), 0)) == SYMBOL_REF
8317 	  && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref, 0), 0)))
8318 	{
8319 	  HOST_WIDE_INT off = INTVAL (XEXP (XEXP (addrref, 0), 1));
8320 	  rtx sym = XEXP (XEXP (addrref, 0), 0);
8321 	  rtx base = cfun->machine->base_reg;
8322 	  rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
8323 				     UNSPEC_LTREF);
8324 
8325 	  SET_SRC (*x) = plus_constant (Pmode, addr, off);
8326 	  return;
8327 	}
8328     }
8329 
8330   fmt = GET_RTX_FORMAT (GET_CODE (*x));
8331   for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8332     {
8333       if (fmt[i] == 'e')
8334 	{
8335 	  annotate_constant_pool_refs_1 (&XEXP (*x, i));
8336 	}
8337       else if (fmt[i] == 'E')
8338 	{
8339 	  for (j = 0; j < XVECLEN (*x, i); j++)
8340 	    annotate_constant_pool_refs_1 (&XVECEXP (*x, i, j));
8341 	}
8342     }
8343 }
8344 
8345 /* Annotate every literal pool reference in INSN by an UNSPEC_LTREF expression.
8346    Fix up MEMs as required.
8347    Skip insns which support relative addressing, because they do not use a base
8348    register.  */
8349 
8350 static void
annotate_constant_pool_refs(rtx_insn * insn)8351 annotate_constant_pool_refs (rtx_insn *insn)
8352 {
8353   if (s390_safe_relative_long_p (insn))
8354     return;
8355   annotate_constant_pool_refs_1 (&PATTERN (insn));
8356 }
8357 
8358 static void
find_constant_pool_ref_1(rtx x,rtx * ref)8359 find_constant_pool_ref_1 (rtx x, rtx *ref)
8360 {
8361   int i, j;
8362   const char *fmt;
8363 
8364   /* Likewise POOL_ENTRY insns.  */
8365   if (GET_CODE (x) == UNSPEC_VOLATILE
8366       && XINT (x, 1) == UNSPECV_POOL_ENTRY)
8367     return;
8368 
8369   gcc_assert (GET_CODE (x) != SYMBOL_REF
8370 	      || !CONSTANT_POOL_ADDRESS_P (x));
8371 
8372   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_LTREF)
8373     {
8374       rtx sym = XVECEXP (x, 0, 0);
8375       gcc_assert (GET_CODE (sym) == SYMBOL_REF
8376 		  && CONSTANT_POOL_ADDRESS_P (sym));
8377 
8378       if (*ref == NULL_RTX)
8379 	*ref = sym;
8380       else
8381 	gcc_assert (*ref == sym);
8382 
8383       return;
8384     }
8385 
8386   fmt = GET_RTX_FORMAT (GET_CODE (x));
8387   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8388     {
8389       if (fmt[i] == 'e')
8390 	{
8391 	  find_constant_pool_ref_1 (XEXP (x, i), ref);
8392 	}
8393       else if (fmt[i] == 'E')
8394 	{
8395 	  for (j = 0; j < XVECLEN (x, i); j++)
8396 	    find_constant_pool_ref_1 (XVECEXP (x, i, j), ref);
8397 	}
8398     }
8399 }
8400 
8401 /* Find an annotated literal pool symbol referenced in INSN,
8402    and store it at REF.  Will abort if INSN contains references to
8403    more than one such pool symbol; multiple references to the same
8404    symbol are allowed, however.
8405 
8406    The rtx pointed to by REF must be initialized to NULL_RTX
8407    by the caller before calling this routine.
8408 
8409    Skip insns which support relative addressing, because they do not use a base
8410    register.  */
8411 
8412 static void
find_constant_pool_ref(rtx_insn * insn,rtx * ref)8413 find_constant_pool_ref (rtx_insn *insn, rtx *ref)
8414 {
8415   if (s390_safe_relative_long_p (insn))
8416     return;
8417   find_constant_pool_ref_1 (PATTERN (insn), ref);
8418 }
8419 
8420 static void
replace_constant_pool_ref_1(rtx * x,rtx ref,rtx offset)8421 replace_constant_pool_ref_1 (rtx *x, rtx ref, rtx offset)
8422 {
8423   int i, j;
8424   const char *fmt;
8425 
8426   gcc_assert (*x != ref);
8427 
8428   if (GET_CODE (*x) == UNSPEC
8429       && XINT (*x, 1) == UNSPEC_LTREF
8430       && XVECEXP (*x, 0, 0) == ref)
8431     {
8432       *x = gen_rtx_PLUS (Pmode, XVECEXP (*x, 0, 1), offset);
8433       return;
8434     }
8435 
8436   if (GET_CODE (*x) == PLUS
8437       && GET_CODE (XEXP (*x, 1)) == CONST_INT
8438       && GET_CODE (XEXP (*x, 0)) == UNSPEC
8439       && XINT (XEXP (*x, 0), 1) == UNSPEC_LTREF
8440       && XVECEXP (XEXP (*x, 0), 0, 0) == ref)
8441     {
8442       rtx addr = gen_rtx_PLUS (Pmode, XVECEXP (XEXP (*x, 0), 0, 1), offset);
8443       *x = plus_constant (Pmode, addr, INTVAL (XEXP (*x, 1)));
8444       return;
8445     }
8446 
8447   fmt = GET_RTX_FORMAT (GET_CODE (*x));
8448   for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8449     {
8450       if (fmt[i] == 'e')
8451 	{
8452 	  replace_constant_pool_ref_1 (&XEXP (*x, i), ref, offset);
8453 	}
8454       else if (fmt[i] == 'E')
8455 	{
8456 	  for (j = 0; j < XVECLEN (*x, i); j++)
8457 	    replace_constant_pool_ref_1 (&XVECEXP (*x, i, j), ref, offset);
8458 	}
8459     }
8460 }
8461 
8462 /* Replace every reference to the annotated literal pool
8463    symbol REF in INSN by its base plus OFFSET.
8464    Skip insns which support relative addressing, because they do not use a base
8465    register.  */
8466 
8467 static void
replace_constant_pool_ref(rtx_insn * insn,rtx ref,rtx offset)8468 replace_constant_pool_ref (rtx_insn *insn, rtx ref, rtx offset)
8469 {
8470   if (s390_safe_relative_long_p (insn))
8471     return;
8472   replace_constant_pool_ref_1 (&PATTERN (insn), ref, offset);
8473 }
8474 
8475 /* We keep a list of constants which we have to add to internal
8476    constant tables in the middle of large functions.  */
8477 
8478 #define NR_C_MODES 32
8479 machine_mode constant_modes[NR_C_MODES] =
8480 {
8481   TFmode, TImode, TDmode,
8482   V16QImode, V8HImode, V4SImode, V2DImode, V1TImode,
8483   V4SFmode, V2DFmode, V1TFmode,
8484   DFmode, DImode, DDmode,
8485   V8QImode, V4HImode, V2SImode, V1DImode, V2SFmode, V1DFmode,
8486   SFmode, SImode, SDmode,
8487   V4QImode, V2HImode, V1SImode,  V1SFmode,
8488   HImode,
8489   V2QImode, V1HImode,
8490   QImode,
8491   V1QImode
8492 };
8493 
8494 struct constant
8495 {
8496   struct constant *next;
8497   rtx value;
8498   rtx_code_label *label;
8499 };
8500 
8501 struct constant_pool
8502 {
8503   struct constant_pool *next;
8504   rtx_insn *first_insn;
8505   rtx_insn *pool_insn;
8506   bitmap insns;
8507   rtx_insn *emit_pool_after;
8508 
8509   struct constant *constants[NR_C_MODES];
8510   struct constant *execute;
8511   rtx_code_label *label;
8512   int size;
8513 };
8514 
8515 /* Allocate new constant_pool structure.  */
8516 
8517 static struct constant_pool *
s390_alloc_pool(void)8518 s390_alloc_pool (void)
8519 {
8520   struct constant_pool *pool;
8521   int i;
8522 
8523   pool = (struct constant_pool *) xmalloc (sizeof *pool);
8524   pool->next = NULL;
8525   for (i = 0; i < NR_C_MODES; i++)
8526     pool->constants[i] = NULL;
8527 
8528   pool->execute = NULL;
8529   pool->label = gen_label_rtx ();
8530   pool->first_insn = NULL;
8531   pool->pool_insn = NULL;
8532   pool->insns = BITMAP_ALLOC (NULL);
8533   pool->size = 0;
8534   pool->emit_pool_after = NULL;
8535 
8536   return pool;
8537 }
8538 
8539 /* Create new constant pool covering instructions starting at INSN
8540    and chain it to the end of POOL_LIST.  */
8541 
8542 static struct constant_pool *
s390_start_pool(struct constant_pool ** pool_list,rtx_insn * insn)8543 s390_start_pool (struct constant_pool **pool_list, rtx_insn *insn)
8544 {
8545   struct constant_pool *pool, **prev;
8546 
8547   pool = s390_alloc_pool ();
8548   pool->first_insn = insn;
8549 
8550   for (prev = pool_list; *prev; prev = &(*prev)->next)
8551     ;
8552   *prev = pool;
8553 
8554   return pool;
8555 }
8556 
8557 /* End range of instructions covered by POOL at INSN and emit
8558    placeholder insn representing the pool.  */
8559 
8560 static void
s390_end_pool(struct constant_pool * pool,rtx_insn * insn)8561 s390_end_pool (struct constant_pool *pool, rtx_insn *insn)
8562 {
8563   rtx pool_size = GEN_INT (pool->size + 8 /* alignment slop */);
8564 
8565   if (!insn)
8566     insn = get_last_insn ();
8567 
8568   pool->pool_insn = emit_insn_after (gen_pool (pool_size), insn);
8569   INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8570 }
8571 
8572 /* Add INSN to the list of insns covered by POOL.  */
8573 
8574 static void
s390_add_pool_insn(struct constant_pool * pool,rtx insn)8575 s390_add_pool_insn (struct constant_pool *pool, rtx insn)
8576 {
8577   bitmap_set_bit (pool->insns, INSN_UID (insn));
8578 }
8579 
8580 /* Return pool out of POOL_LIST that covers INSN.  */
8581 
8582 static struct constant_pool *
s390_find_pool(struct constant_pool * pool_list,rtx insn)8583 s390_find_pool (struct constant_pool *pool_list, rtx insn)
8584 {
8585   struct constant_pool *pool;
8586 
8587   for (pool = pool_list; pool; pool = pool->next)
8588     if (bitmap_bit_p (pool->insns, INSN_UID (insn)))
8589       break;
8590 
8591   return pool;
8592 }
8593 
8594 /* Add constant VAL of mode MODE to the constant pool POOL.  */
8595 
8596 static void
s390_add_constant(struct constant_pool * pool,rtx val,machine_mode mode)8597 s390_add_constant (struct constant_pool *pool, rtx val, machine_mode mode)
8598 {
8599   struct constant *c;
8600   int i;
8601 
8602   for (i = 0; i < NR_C_MODES; i++)
8603     if (constant_modes[i] == mode)
8604       break;
8605   gcc_assert (i != NR_C_MODES);
8606 
8607   for (c = pool->constants[i]; c != NULL; c = c->next)
8608     if (rtx_equal_p (val, c->value))
8609       break;
8610 
8611   if (c == NULL)
8612     {
8613       c = (struct constant *) xmalloc (sizeof *c);
8614       c->value = val;
8615       c->label = gen_label_rtx ();
8616       c->next = pool->constants[i];
8617       pool->constants[i] = c;
8618       pool->size += GET_MODE_SIZE (mode);
8619     }
8620 }
8621 
8622 /* Return an rtx that represents the offset of X from the start of
8623    pool POOL.  */
8624 
8625 static rtx
s390_pool_offset(struct constant_pool * pool,rtx x)8626 s390_pool_offset (struct constant_pool *pool, rtx x)
8627 {
8628   rtx label;
8629 
8630   label = gen_rtx_LABEL_REF (GET_MODE (x), pool->label);
8631   x = gen_rtx_UNSPEC (GET_MODE (x), gen_rtvec (2, x, label),
8632 		      UNSPEC_POOL_OFFSET);
8633   return gen_rtx_CONST (GET_MODE (x), x);
8634 }
8635 
8636 /* Find constant VAL of mode MODE in the constant pool POOL.
8637    Return an RTX describing the distance from the start of
8638    the pool to the location of the new constant.  */
8639 
8640 static rtx
s390_find_constant(struct constant_pool * pool,rtx val,machine_mode mode)8641 s390_find_constant (struct constant_pool *pool, rtx val,
8642 		    machine_mode mode)
8643 {
8644   struct constant *c;
8645   int i;
8646 
8647   for (i = 0; i < NR_C_MODES; i++)
8648     if (constant_modes[i] == mode)
8649       break;
8650   gcc_assert (i != NR_C_MODES);
8651 
8652   for (c = pool->constants[i]; c != NULL; c = c->next)
8653     if (rtx_equal_p (val, c->value))
8654       break;
8655 
8656   gcc_assert (c);
8657 
8658   return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8659 }
8660 
8661 /* Check whether INSN is an execute.  Return the label_ref to its
8662    execute target template if so, NULL_RTX otherwise.  */
8663 
8664 static rtx
s390_execute_label(rtx insn)8665 s390_execute_label (rtx insn)
8666 {
8667   if (INSN_P (insn)
8668       && GET_CODE (PATTERN (insn)) == PARALLEL
8669       && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
8670       && (XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE
8671 	  || XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE_JUMP))
8672     {
8673       if (XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE)
8674 	return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2);
8675       else
8676 	{
8677 	  gcc_assert (JUMP_P (insn));
8678 	  /* For jump insns as execute target:
8679 	     - There is one operand less in the parallel (the
8680 	       modification register of the execute is always 0).
8681 	     - The execute target label is wrapped into an
8682 	       if_then_else in order to hide it from jump analysis.  */
8683 	  return XEXP (XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 0), 0);
8684 	}
8685     }
8686 
8687   return NULL_RTX;
8688 }
8689 
8690 /* Find execute target for INSN in the constant pool POOL.
8691    Return an RTX describing the distance from the start of
8692    the pool to the location of the execute target.  */
8693 
8694 static rtx
s390_find_execute(struct constant_pool * pool,rtx insn)8695 s390_find_execute (struct constant_pool *pool, rtx insn)
8696 {
8697   struct constant *c;
8698 
8699   for (c = pool->execute; c != NULL; c = c->next)
8700     if (INSN_UID (insn) == INSN_UID (c->value))
8701       break;
8702 
8703   gcc_assert (c);
8704 
8705   return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8706 }
8707 
8708 /* For an execute INSN, extract the execute target template.  */
8709 
8710 static rtx
s390_execute_target(rtx insn)8711 s390_execute_target (rtx insn)
8712 {
8713   rtx pattern = PATTERN (insn);
8714   gcc_assert (s390_execute_label (insn));
8715 
8716   if (XVECLEN (pattern, 0) == 2)
8717     {
8718       pattern = copy_rtx (XVECEXP (pattern, 0, 1));
8719     }
8720   else
8721     {
8722       rtvec vec = rtvec_alloc (XVECLEN (pattern, 0) - 1);
8723       int i;
8724 
8725       for (i = 0; i < XVECLEN (pattern, 0) - 1; i++)
8726 	RTVEC_ELT (vec, i) = copy_rtx (XVECEXP (pattern, 0, i + 1));
8727 
8728       pattern = gen_rtx_PARALLEL (VOIDmode, vec);
8729     }
8730 
8731   return pattern;
8732 }
8733 
8734 /* Indicate that INSN cannot be duplicated.  This is the case for
8735    execute insns that carry a unique label.  */
8736 
8737 static bool
s390_cannot_copy_insn_p(rtx_insn * insn)8738 s390_cannot_copy_insn_p (rtx_insn *insn)
8739 {
8740   rtx label = s390_execute_label (insn);
8741   return label && label != const0_rtx;
8742 }
8743 
8744 /* Dump out the constants in POOL.  If REMOTE_LABEL is true,
8745    do not emit the pool base label.  */
8746 
8747 static void
s390_dump_pool(struct constant_pool * pool,bool remote_label)8748 s390_dump_pool (struct constant_pool *pool, bool remote_label)
8749 {
8750   struct constant *c;
8751   rtx_insn *insn = pool->pool_insn;
8752   int i;
8753 
8754   /* Switch to rodata section.  */
8755   insn = emit_insn_after (gen_pool_section_start (), insn);
8756   INSN_ADDRESSES_NEW (insn, -1);
8757 
8758   /* Ensure minimum pool alignment.  */
8759   insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn);
8760   INSN_ADDRESSES_NEW (insn, -1);
8761 
8762   /* Emit pool base label.  */
8763   if (!remote_label)
8764     {
8765       insn = emit_label_after (pool->label, insn);
8766       INSN_ADDRESSES_NEW (insn, -1);
8767     }
8768 
8769   /* Dump constants in descending alignment requirement order,
8770      ensuring proper alignment for every constant.  */
8771   for (i = 0; i < NR_C_MODES; i++)
8772     for (c = pool->constants[i]; c; c = c->next)
8773       {
8774 	/* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references.  */
8775 	rtx value = copy_rtx (c->value);
8776 	if (GET_CODE (value) == CONST
8777 	    && GET_CODE (XEXP (value, 0)) == UNSPEC
8778 	    && XINT (XEXP (value, 0), 1) == UNSPEC_LTREL_OFFSET
8779 	    && XVECLEN (XEXP (value, 0), 0) == 1)
8780 	  value = s390_pool_offset (pool, XVECEXP (XEXP (value, 0), 0, 0));
8781 
8782 	insn = emit_label_after (c->label, insn);
8783 	INSN_ADDRESSES_NEW (insn, -1);
8784 
8785 	value = gen_rtx_UNSPEC_VOLATILE (constant_modes[i],
8786 					 gen_rtvec (1, value),
8787 					 UNSPECV_POOL_ENTRY);
8788 	insn = emit_insn_after (value, insn);
8789 	INSN_ADDRESSES_NEW (insn, -1);
8790       }
8791 
8792   /* Ensure minimum alignment for instructions.  */
8793   insn = emit_insn_after (gen_pool_align (GEN_INT (2)), insn);
8794   INSN_ADDRESSES_NEW (insn, -1);
8795 
8796   /* Output in-pool execute template insns.  */
8797   for (c = pool->execute; c; c = c->next)
8798     {
8799       insn = emit_label_after (c->label, insn);
8800       INSN_ADDRESSES_NEW (insn, -1);
8801 
8802       insn = emit_insn_after (s390_execute_target (c->value), insn);
8803       INSN_ADDRESSES_NEW (insn, -1);
8804     }
8805 
8806   /* Switch back to previous section.  */
8807   insn = emit_insn_after (gen_pool_section_end (), insn);
8808   INSN_ADDRESSES_NEW (insn, -1);
8809 
8810   insn = emit_barrier_after (insn);
8811   INSN_ADDRESSES_NEW (insn, -1);
8812 
8813   /* Remove placeholder insn.  */
8814   remove_insn (pool->pool_insn);
8815 }
8816 
8817 /* Free all memory used by POOL.  */
8818 
8819 static void
s390_free_pool(struct constant_pool * pool)8820 s390_free_pool (struct constant_pool *pool)
8821 {
8822   struct constant *c, *next;
8823   int i;
8824 
8825   for (i = 0; i < NR_C_MODES; i++)
8826     for (c = pool->constants[i]; c; c = next)
8827       {
8828 	next = c->next;
8829 	free (c);
8830       }
8831 
8832   for (c = pool->execute; c; c = next)
8833     {
8834       next = c->next;
8835       free (c);
8836     }
8837 
8838   BITMAP_FREE (pool->insns);
8839   free (pool);
8840 }
8841 
8842 
8843 /* Collect main literal pool.  Return NULL on overflow.  */
8844 
8845 static struct constant_pool *
s390_mainpool_start(void)8846 s390_mainpool_start (void)
8847 {
8848   struct constant_pool *pool;
8849   rtx_insn *insn;
8850 
8851   pool = s390_alloc_pool ();
8852 
8853   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8854     {
8855       if (NONJUMP_INSN_P (insn)
8856 	  && GET_CODE (PATTERN (insn)) == SET
8857 	  && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC_VOLATILE
8858 	  && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPECV_MAIN_POOL)
8859 	{
8860 	  /* There might be two main_pool instructions if base_reg
8861 	     is call-clobbered; one for shrink-wrapped code and one
8862 	     for the rest.  We want to keep the first.  */
8863 	  if (pool->pool_insn)
8864 	    {
8865 	      insn = PREV_INSN (insn);
8866 	      delete_insn (NEXT_INSN (insn));
8867 	      continue;
8868 	    }
8869 	  pool->pool_insn = insn;
8870 	}
8871 
8872       if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8873 	{
8874 	  rtx pool_ref = NULL_RTX;
8875 	  find_constant_pool_ref (insn, &pool_ref);
8876 	  if (pool_ref)
8877 	    {
8878 	      rtx constant = get_pool_constant (pool_ref);
8879 	      machine_mode mode = get_pool_mode (pool_ref);
8880 	      s390_add_constant (pool, constant, mode);
8881 	    }
8882 	}
8883 
8884       /* If hot/cold partitioning is enabled we have to make sure that
8885 	 the literal pool is emitted in the same section where the
8886 	 initialization of the literal pool base pointer takes place.
8887 	 emit_pool_after is only used in the non-overflow case on non
8888 	 Z cpus where we can emit the literal pool at the end of the
8889 	 function body within the text section.  */
8890       if (NOTE_P (insn)
8891 	  && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS
8892 	  && !pool->emit_pool_after)
8893 	pool->emit_pool_after = PREV_INSN (insn);
8894     }
8895 
8896   gcc_assert (pool->pool_insn || pool->size == 0);
8897 
8898   if (pool->size >= 4096)
8899     {
8900       /* We're going to chunkify the pool, so remove the main
8901 	 pool placeholder insn.  */
8902       remove_insn (pool->pool_insn);
8903 
8904       s390_free_pool (pool);
8905       pool = NULL;
8906     }
8907 
8908   /* If the functions ends with the section where the literal pool
8909      should be emitted set the marker to its end.  */
8910   if (pool && !pool->emit_pool_after)
8911     pool->emit_pool_after = get_last_insn ();
8912 
8913   return pool;
8914 }
8915 
8916 /* POOL holds the main literal pool as collected by s390_mainpool_start.
8917    Modify the current function to output the pool constants as well as
8918    the pool register setup instruction.  */
8919 
8920 static void
s390_mainpool_finish(struct constant_pool * pool)8921 s390_mainpool_finish (struct constant_pool *pool)
8922 {
8923   rtx base_reg = cfun->machine->base_reg;
8924   rtx set;
8925   rtx_insn *insn;
8926 
8927   /* If the pool is empty, we're done.  */
8928   if (pool->size == 0)
8929     {
8930       /* We don't actually need a base register after all.  */
8931       cfun->machine->base_reg = NULL_RTX;
8932 
8933       if (pool->pool_insn)
8934 	remove_insn (pool->pool_insn);
8935       s390_free_pool (pool);
8936       return;
8937     }
8938 
8939   /* We need correct insn addresses.  */
8940   shorten_branches (get_insns ());
8941 
8942   /* Use a LARL to load the pool register.  The pool is
8943      located in the .rodata section, so we emit it after the function.  */
8944   set = gen_main_base_64 (base_reg, pool->label);
8945   insn = emit_insn_after (set, pool->pool_insn);
8946   INSN_ADDRESSES_NEW (insn, -1);
8947   remove_insn (pool->pool_insn);
8948 
8949   insn = get_last_insn ();
8950   pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
8951   INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8952 
8953   s390_dump_pool (pool, 0);
8954 
8955   /* Replace all literal pool references.  */
8956 
8957   for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
8958     {
8959       if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8960 	{
8961 	  rtx addr, pool_ref = NULL_RTX;
8962 	  find_constant_pool_ref (insn, &pool_ref);
8963 	  if (pool_ref)
8964 	    {
8965 	      if (s390_execute_label (insn))
8966 		addr = s390_find_execute (pool, insn);
8967 	      else
8968 		addr = s390_find_constant (pool, get_pool_constant (pool_ref),
8969 						 get_pool_mode (pool_ref));
8970 
8971 	      replace_constant_pool_ref (insn, pool_ref, addr);
8972 	      INSN_CODE (insn) = -1;
8973 	    }
8974 	}
8975     }
8976 
8977 
8978   /* Free the pool.  */
8979   s390_free_pool (pool);
8980 }
8981 
8982 /* Chunkify the literal pool.  */
8983 
8984 #define S390_POOL_CHUNK_MIN	0xc00
8985 #define S390_POOL_CHUNK_MAX	0xe00
8986 
8987 static struct constant_pool *
s390_chunkify_start(void)8988 s390_chunkify_start (void)
8989 {
8990   struct constant_pool *curr_pool = NULL, *pool_list = NULL;
8991   bitmap far_labels;
8992   rtx_insn *insn;
8993 
8994   /* We need correct insn addresses.  */
8995 
8996   shorten_branches (get_insns ());
8997 
8998   /* Scan all insns and move literals to pool chunks.  */
8999 
9000   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9001     {
9002       if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9003 	{
9004 	  rtx pool_ref = NULL_RTX;
9005 	  find_constant_pool_ref (insn, &pool_ref);
9006 	  if (pool_ref)
9007 	    {
9008 	      rtx constant = get_pool_constant (pool_ref);
9009 	      machine_mode mode = get_pool_mode (pool_ref);
9010 
9011 	      if (!curr_pool)
9012 		curr_pool = s390_start_pool (&pool_list, insn);
9013 
9014 	      s390_add_constant (curr_pool, constant, mode);
9015 	      s390_add_pool_insn (curr_pool, insn);
9016 	    }
9017 	}
9018 
9019       if (JUMP_P (insn) || JUMP_TABLE_DATA_P (insn) || LABEL_P (insn))
9020 	{
9021 	  if (curr_pool)
9022 	    s390_add_pool_insn (curr_pool, insn);
9023 	}
9024 
9025       if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_VAR_LOCATION)
9026 	continue;
9027 
9028       if (!curr_pool
9029 	  || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn)
9030 	  || INSN_ADDRESSES (INSN_UID (insn)) == -1)
9031 	continue;
9032 
9033       if (curr_pool->size < S390_POOL_CHUNK_MAX)
9034 	continue;
9035 
9036       s390_end_pool (curr_pool, NULL);
9037       curr_pool = NULL;
9038     }
9039 
9040   if (curr_pool)
9041     s390_end_pool (curr_pool, NULL);
9042 
9043   /* Find all labels that are branched into
9044      from an insn belonging to a different chunk.  */
9045 
9046   far_labels = BITMAP_ALLOC (NULL);
9047 
9048   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9049     {
9050       rtx_jump_table_data *table;
9051 
9052       /* Labels marked with LABEL_PRESERVE_P can be target
9053 	 of non-local jumps, so we have to mark them.
9054 	 The same holds for named labels.
9055 
9056 	 Don't do that, however, if it is the label before
9057 	 a jump table.  */
9058 
9059       if (LABEL_P (insn)
9060 	  && (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn)))
9061 	{
9062 	  rtx_insn *vec_insn = NEXT_INSN (insn);
9063 	  if (! vec_insn || ! JUMP_TABLE_DATA_P (vec_insn))
9064 	    bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn));
9065 	}
9066       /* Check potential targets in a table jump (casesi_jump).  */
9067       else if (tablejump_p (insn, NULL, &table))
9068 	{
9069 	  rtx vec_pat = PATTERN (table);
9070 	  int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
9071 
9072 	  for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
9073 	    {
9074 	      rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
9075 
9076 	      if (s390_find_pool (pool_list, label)
9077 		  != s390_find_pool (pool_list, insn))
9078 		bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
9079 	    }
9080 	}
9081       /* If we have a direct jump (conditional or unconditional),
9082 	 check all potential targets.  */
9083       else if (JUMP_P (insn))
9084 	{
9085 	  rtx pat = PATTERN (insn);
9086 
9087 	  if (GET_CODE (pat) == PARALLEL)
9088 	    pat = XVECEXP (pat, 0, 0);
9089 
9090 	  if (GET_CODE (pat) == SET)
9091 	    {
9092 	      rtx label = JUMP_LABEL (insn);
9093 	      if (label && !ANY_RETURN_P (label))
9094 		{
9095 		  if (s390_find_pool (pool_list, label)
9096 		      != s390_find_pool (pool_list, insn))
9097 		    bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
9098 		}
9099 	    }
9100 	}
9101     }
9102 
9103   /* Insert base register reload insns before every pool.  */
9104 
9105   for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9106     {
9107       rtx new_insn = gen_reload_base_64 (cfun->machine->base_reg,
9108 					 curr_pool->label);
9109       rtx_insn *insn = curr_pool->first_insn;
9110       INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1);
9111     }
9112 
9113   /* Insert base register reload insns at every far label.  */
9114 
9115   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9116     if (LABEL_P (insn)
9117 	&& bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn)))
9118       {
9119 	struct constant_pool *pool = s390_find_pool (pool_list, insn);
9120 	if (pool)
9121 	  {
9122 	    rtx new_insn = gen_reload_base_64 (cfun->machine->base_reg,
9123 					       pool->label);
9124 	    INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1);
9125 	  }
9126       }
9127 
9128 
9129   BITMAP_FREE (far_labels);
9130 
9131 
9132   /* Recompute insn addresses.  */
9133 
9134   init_insn_lengths ();
9135   shorten_branches (get_insns ());
9136 
9137   return pool_list;
9138 }
9139 
9140 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
9141    After we have decided to use this list, finish implementing
9142    all changes to the current function as required.  */
9143 
9144 static void
s390_chunkify_finish(struct constant_pool * pool_list)9145 s390_chunkify_finish (struct constant_pool *pool_list)
9146 {
9147   struct constant_pool *curr_pool = NULL;
9148   rtx_insn *insn;
9149 
9150 
9151   /* Replace all literal pool references.  */
9152 
9153   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9154     {
9155       curr_pool = s390_find_pool (pool_list, insn);
9156       if (!curr_pool)
9157 	continue;
9158 
9159       if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9160 	{
9161 	  rtx addr, pool_ref = NULL_RTX;
9162 	  find_constant_pool_ref (insn, &pool_ref);
9163 	  if (pool_ref)
9164 	    {
9165 	      if (s390_execute_label (insn))
9166 		addr = s390_find_execute (curr_pool, insn);
9167 	      else
9168 		addr = s390_find_constant (curr_pool,
9169 					   get_pool_constant (pool_ref),
9170 					   get_pool_mode (pool_ref));
9171 
9172 	      replace_constant_pool_ref (insn, pool_ref, addr);
9173 	      INSN_CODE (insn) = -1;
9174 	    }
9175 	}
9176     }
9177 
9178   /* Dump out all literal pools.  */
9179 
9180   for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9181     s390_dump_pool (curr_pool, 0);
9182 
9183   /* Free pool list.  */
9184 
9185   while (pool_list)
9186     {
9187       struct constant_pool *next = pool_list->next;
9188       s390_free_pool (pool_list);
9189       pool_list = next;
9190     }
9191 }
9192 
9193 /* Output the constant pool entry EXP in mode MODE with alignment ALIGN.  */
9194 
9195 void
s390_output_pool_entry(rtx exp,machine_mode mode,unsigned int align)9196 s390_output_pool_entry (rtx exp, machine_mode mode, unsigned int align)
9197 {
9198   switch (GET_MODE_CLASS (mode))
9199     {
9200     case MODE_FLOAT:
9201     case MODE_DECIMAL_FLOAT:
9202       gcc_assert (GET_CODE (exp) == CONST_DOUBLE);
9203 
9204       assemble_real (*CONST_DOUBLE_REAL_VALUE (exp),
9205 		     as_a <scalar_float_mode> (mode), align);
9206       break;
9207 
9208     case MODE_INT:
9209       assemble_integer (exp, GET_MODE_SIZE (mode), align, 1);
9210       mark_symbol_refs_as_used (exp);
9211       break;
9212 
9213     case MODE_VECTOR_INT:
9214     case MODE_VECTOR_FLOAT:
9215       {
9216 	int i;
9217 	machine_mode inner_mode;
9218 	gcc_assert (GET_CODE (exp) == CONST_VECTOR);
9219 
9220 	inner_mode = GET_MODE_INNER (GET_MODE (exp));
9221 	for (i = 0; i < XVECLEN (exp, 0); i++)
9222 	  s390_output_pool_entry (XVECEXP (exp, 0, i),
9223 				  inner_mode,
9224 				  i == 0
9225 				  ? align
9226 				  : GET_MODE_BITSIZE (inner_mode));
9227       }
9228       break;
9229 
9230     default:
9231       gcc_unreachable ();
9232     }
9233 }
9234 
9235 
9236 /* Return an RTL expression representing the value of the return address
9237    for the frame COUNT steps up from the current frame.  FRAME is the
9238    frame pointer of that frame.  */
9239 
9240 rtx
s390_return_addr_rtx(int count,rtx frame ATTRIBUTE_UNUSED)9241 s390_return_addr_rtx (int count, rtx frame ATTRIBUTE_UNUSED)
9242 {
9243   int offset;
9244   rtx addr;
9245 
9246   /* Without backchain, we fail for all but the current frame.  */
9247 
9248   if (!TARGET_BACKCHAIN && count > 0)
9249     return NULL_RTX;
9250 
9251   /* For the current frame, we need to make sure the initial
9252      value of RETURN_REGNUM is actually saved.  */
9253 
9254   if (count == 0)
9255     return get_hard_reg_initial_val (Pmode, RETURN_REGNUM);
9256 
9257   if (TARGET_PACKED_STACK)
9258     offset = -2 * UNITS_PER_LONG;
9259   else
9260     offset = RETURN_REGNUM * UNITS_PER_LONG;
9261 
9262   addr = plus_constant (Pmode, frame, offset);
9263   addr = memory_address (Pmode, addr);
9264   return gen_rtx_MEM (Pmode, addr);
9265 }
9266 
9267 /* Return an RTL expression representing the back chain stored in
9268    the current stack frame.  */
9269 
9270 rtx
s390_back_chain_rtx(void)9271 s390_back_chain_rtx (void)
9272 {
9273   rtx chain;
9274 
9275   gcc_assert (TARGET_BACKCHAIN);
9276 
9277   if (TARGET_PACKED_STACK)
9278     chain = plus_constant (Pmode, stack_pointer_rtx,
9279 			   STACK_POINTER_OFFSET - UNITS_PER_LONG);
9280   else
9281     chain = stack_pointer_rtx;
9282 
9283   chain = gen_rtx_MEM (Pmode, chain);
9284   return chain;
9285 }
9286 
9287 /* Find first call clobbered register unused in a function.
9288    This could be used as base register in a leaf function
9289    or for holding the return address before epilogue.  */
9290 
9291 static int
find_unused_clobbered_reg(void)9292 find_unused_clobbered_reg (void)
9293 {
9294   int i;
9295   for (i = 0; i < 6; i++)
9296     if (!df_regs_ever_live_p (i))
9297       return i;
9298   return 0;
9299 }
9300 
9301 
9302 /* Helper function for s390_regs_ever_clobbered.  Sets the fields in DATA for all
9303    clobbered hard regs in SETREG.  */
9304 
9305 static void
s390_reg_clobbered_rtx(rtx setreg,const_rtx set_insn ATTRIBUTE_UNUSED,void * data)9306 s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data)
9307 {
9308   char *regs_ever_clobbered = (char *)data;
9309   unsigned int i, regno;
9310   machine_mode mode = GET_MODE (setreg);
9311 
9312   if (GET_CODE (setreg) == SUBREG)
9313     {
9314       rtx inner = SUBREG_REG (setreg);
9315       if (!GENERAL_REG_P (inner) && !FP_REG_P (inner))
9316 	return;
9317       regno = subreg_regno (setreg);
9318     }
9319   else if (GENERAL_REG_P (setreg) || FP_REG_P (setreg))
9320     regno = REGNO (setreg);
9321   else
9322     return;
9323 
9324   for (i = regno;
9325        i < end_hard_regno (mode, regno);
9326        i++)
9327     regs_ever_clobbered[i] = 1;
9328 }
9329 
9330 /* Walks through all basic blocks of the current function looking
9331    for clobbered hard regs using s390_reg_clobbered_rtx.  The fields
9332    of the passed integer array REGS_EVER_CLOBBERED are set to one for
9333    each of those regs.  */
9334 
9335 static void
s390_regs_ever_clobbered(char regs_ever_clobbered[])9336 s390_regs_ever_clobbered (char regs_ever_clobbered[])
9337 {
9338   basic_block cur_bb;
9339   rtx_insn *cur_insn;
9340   unsigned int i;
9341 
9342   memset (regs_ever_clobbered, 0, 32);
9343 
9344   /* For non-leaf functions we have to consider all call clobbered regs to be
9345      clobbered.  */
9346   if (!crtl->is_leaf)
9347     {
9348       for (i = 0; i < 32; i++)
9349 	regs_ever_clobbered[i] = call_really_used_regs[i];
9350     }
9351 
9352   /* Make the "magic" eh_return registers live if necessary.  For regs_ever_live
9353      this work is done by liveness analysis (mark_regs_live_at_end).
9354      Special care is needed for functions containing landing pads.  Landing pads
9355      may use the eh registers, but the code which sets these registers is not
9356      contained in that function.  Hence s390_regs_ever_clobbered is not able to
9357      deal with this automatically.  */
9358   if (crtl->calls_eh_return || cfun->machine->has_landing_pad_p)
9359     for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM ; i++)
9360       if (crtl->calls_eh_return
9361 	  || (cfun->machine->has_landing_pad_p
9362 	      && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i))))
9363 	regs_ever_clobbered[EH_RETURN_DATA_REGNO (i)] = 1;
9364 
9365   /* For nonlocal gotos all call-saved registers have to be saved.
9366      This flag is also set for the unwinding code in libgcc.
9367      See expand_builtin_unwind_init.  For regs_ever_live this is done by
9368      reload.  */
9369   if (crtl->saves_all_registers)
9370     for (i = 0; i < 32; i++)
9371       if (!call_really_used_regs[i])
9372 	regs_ever_clobbered[i] = 1;
9373 
9374   FOR_EACH_BB_FN (cur_bb, cfun)
9375     {
9376       FOR_BB_INSNS (cur_bb, cur_insn)
9377 	{
9378 	  rtx pat;
9379 
9380 	  if (!INSN_P (cur_insn))
9381 	    continue;
9382 
9383 	  pat = PATTERN (cur_insn);
9384 
9385 	  /* Ignore GPR restore insns.  */
9386 	  if (epilogue_completed && RTX_FRAME_RELATED_P (cur_insn))
9387 	    {
9388 	      if (GET_CODE (pat) == SET
9389 		  && GENERAL_REG_P (SET_DEST (pat)))
9390 		{
9391 		  /* lgdr  */
9392 		  if (GET_MODE (SET_SRC (pat)) == DImode
9393 		      && FP_REG_P (SET_SRC (pat)))
9394 		    continue;
9395 
9396 		  /* l / lg  */
9397 		  if (GET_CODE (SET_SRC (pat)) == MEM)
9398 		    continue;
9399 		}
9400 
9401 	      /* lm / lmg */
9402 	      if (GET_CODE (pat) == PARALLEL
9403 		  && load_multiple_operation (pat, VOIDmode))
9404 		continue;
9405 	    }
9406 
9407 	  note_stores (pat,
9408 		       s390_reg_clobbered_rtx,
9409 		       regs_ever_clobbered);
9410 	}
9411     }
9412 }
9413 
9414 /* Determine the frame area which actually has to be accessed
9415    in the function epilogue. The values are stored at the
9416    given pointers AREA_BOTTOM (address of the lowest used stack
9417    address) and AREA_TOP (address of the first item which does
9418    not belong to the stack frame).  */
9419 
9420 static void
s390_frame_area(int * area_bottom,int * area_top)9421 s390_frame_area (int *area_bottom, int *area_top)
9422 {
9423   int b, t;
9424 
9425   b = INT_MAX;
9426   t = INT_MIN;
9427 
9428   if (cfun_frame_layout.first_restore_gpr != -1)
9429     {
9430       b = (cfun_frame_layout.gprs_offset
9431 	   + cfun_frame_layout.first_restore_gpr * UNITS_PER_LONG);
9432       t = b + (cfun_frame_layout.last_restore_gpr
9433 	       - cfun_frame_layout.first_restore_gpr + 1) * UNITS_PER_LONG;
9434     }
9435 
9436   if (TARGET_64BIT && cfun_save_high_fprs_p)
9437     {
9438       b = MIN (b, cfun_frame_layout.f8_offset);
9439       t = MAX (t, (cfun_frame_layout.f8_offset
9440 		   + cfun_frame_layout.high_fprs * 8));
9441     }
9442 
9443   if (!TARGET_64BIT)
9444     {
9445       if (cfun_fpr_save_p (FPR4_REGNUM))
9446 	{
9447 	  b = MIN (b, cfun_frame_layout.f4_offset);
9448 	  t = MAX (t, cfun_frame_layout.f4_offset + 8);
9449 	}
9450       if (cfun_fpr_save_p (FPR6_REGNUM))
9451 	{
9452 	  b = MIN (b, cfun_frame_layout.f4_offset + 8);
9453 	  t = MAX (t, cfun_frame_layout.f4_offset + 16);
9454 	}
9455     }
9456   *area_bottom = b;
9457   *area_top = t;
9458 }
9459 /* Update gpr_save_slots in the frame layout trying to make use of
9460    FPRs as GPR save slots.
9461    This is a helper routine of s390_register_info.  */
9462 
9463 static void
s390_register_info_gprtofpr()9464 s390_register_info_gprtofpr ()
9465 {
9466   int save_reg_slot = FPR0_REGNUM;
9467   int i, j;
9468 
9469   if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
9470     return;
9471 
9472   /* builtin_eh_return needs to be able to modify the return address
9473      on the stack.  It could also adjust the FPR save slot instead but
9474      is it worth the trouble?!  */
9475   if (crtl->calls_eh_return)
9476     return;
9477 
9478   for (i = 15; i >= 6; i--)
9479     {
9480       if (cfun_gpr_save_slot (i) == SAVE_SLOT_NONE)
9481 	continue;
9482 
9483       /* Advance to the next FP register which can be used as a
9484 	 GPR save slot.  */
9485       while ((!call_really_used_regs[save_reg_slot]
9486 	      || df_regs_ever_live_p (save_reg_slot)
9487 	      || cfun_fpr_save_p (save_reg_slot))
9488 	     && FP_REGNO_P (save_reg_slot))
9489 	save_reg_slot++;
9490       if (!FP_REGNO_P (save_reg_slot))
9491 	{
9492 	  /* We only want to use ldgr/lgdr if we can get rid of
9493 	     stm/lm entirely.  So undo the gpr slot allocation in
9494 	     case we ran out of FPR save slots.  */
9495 	  for (j = 6; j <= 15; j++)
9496 	    if (FP_REGNO_P (cfun_gpr_save_slot (j)))
9497 	      cfun_gpr_save_slot (j) = SAVE_SLOT_STACK;
9498 	  break;
9499 	}
9500       cfun_gpr_save_slot (i) = save_reg_slot++;
9501     }
9502 }
9503 
9504 /* Set the bits in fpr_bitmap for FPRs which need to be saved due to
9505    stdarg.
9506    This is a helper routine for s390_register_info.  */
9507 
9508 static void
s390_register_info_stdarg_fpr()9509 s390_register_info_stdarg_fpr ()
9510 {
9511   int i;
9512   int min_fpr;
9513   int max_fpr;
9514 
9515   /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and
9516      f0-f4 for 64 bit.  */
9517   if (!cfun->stdarg
9518       || !TARGET_HARD_FLOAT
9519       || !cfun->va_list_fpr_size
9520       || crtl->args.info.fprs >= FP_ARG_NUM_REG)
9521     return;
9522 
9523   min_fpr = crtl->args.info.fprs;
9524   max_fpr = min_fpr + cfun->va_list_fpr_size - 1;
9525   if (max_fpr >= FP_ARG_NUM_REG)
9526     max_fpr = FP_ARG_NUM_REG - 1;
9527 
9528   /* FPR argument regs start at f0.  */
9529   min_fpr += FPR0_REGNUM;
9530   max_fpr += FPR0_REGNUM;
9531 
9532   for (i = min_fpr; i <= max_fpr; i++)
9533     cfun_set_fpr_save (i);
9534 }
9535 
9536 /* Reserve the GPR save slots for GPRs which need to be saved due to
9537    stdarg.
9538    This is a helper routine for s390_register_info.  */
9539 
9540 static void
s390_register_info_stdarg_gpr()9541 s390_register_info_stdarg_gpr ()
9542 {
9543   int i;
9544   int min_gpr;
9545   int max_gpr;
9546 
9547   if (!cfun->stdarg
9548       || !cfun->va_list_gpr_size
9549       || crtl->args.info.gprs >= GP_ARG_NUM_REG)
9550     return;
9551 
9552   min_gpr = crtl->args.info.gprs;
9553   max_gpr = min_gpr + cfun->va_list_gpr_size - 1;
9554   if (max_gpr >= GP_ARG_NUM_REG)
9555     max_gpr = GP_ARG_NUM_REG - 1;
9556 
9557   /* GPR argument regs start at r2.  */
9558   min_gpr += GPR2_REGNUM;
9559   max_gpr += GPR2_REGNUM;
9560 
9561   /* If r6 was supposed to be saved into an FPR and now needs to go to
9562      the stack for vararg we have to adjust the restore range to make
9563      sure that the restore is done from stack as well.  */
9564   if (FP_REGNO_P (cfun_gpr_save_slot (GPR6_REGNUM))
9565       && min_gpr <= GPR6_REGNUM
9566       && max_gpr >= GPR6_REGNUM)
9567     {
9568       if (cfun_frame_layout.first_restore_gpr == -1
9569 	  || cfun_frame_layout.first_restore_gpr > GPR6_REGNUM)
9570 	cfun_frame_layout.first_restore_gpr = GPR6_REGNUM;
9571       if (cfun_frame_layout.last_restore_gpr == -1
9572 	  || cfun_frame_layout.last_restore_gpr < GPR6_REGNUM)
9573 	cfun_frame_layout.last_restore_gpr = GPR6_REGNUM;
9574     }
9575 
9576   if (cfun_frame_layout.first_save_gpr == -1
9577       || cfun_frame_layout.first_save_gpr > min_gpr)
9578     cfun_frame_layout.first_save_gpr = min_gpr;
9579 
9580   if (cfun_frame_layout.last_save_gpr == -1
9581       || cfun_frame_layout.last_save_gpr < max_gpr)
9582     cfun_frame_layout.last_save_gpr = max_gpr;
9583 
9584   for (i = min_gpr; i <= max_gpr; i++)
9585     cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
9586 }
9587 
9588 /* Calculate the save and restore ranges for stm(g) and lm(g) in the
9589    prologue and epilogue.  */
9590 
9591 static void
s390_register_info_set_ranges()9592 s390_register_info_set_ranges ()
9593 {
9594   int i, j;
9595 
9596   /* Find the first and the last save slot supposed to use the stack
9597      to set the restore range.
9598      Vararg regs might be marked as save to stack but only the
9599      call-saved regs really need restoring (i.e. r6).  This code
9600      assumes that the vararg regs have not yet been recorded in
9601      cfun_gpr_save_slot.  */
9602   for (i = 0; i < 16 && cfun_gpr_save_slot (i) != SAVE_SLOT_STACK; i++);
9603   for (j = 15; j > i && cfun_gpr_save_slot (j) != SAVE_SLOT_STACK; j--);
9604   cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
9605   cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
9606   cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
9607   cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
9608 }
9609 
9610 /* The GPR and FPR save slots in cfun->machine->frame_layout are set
9611    for registers which need to be saved in function prologue.
9612    This function can be used until the insns emitted for save/restore
9613    of the regs are visible in the RTL stream.  */
9614 
9615 static void
s390_register_info()9616 s390_register_info ()
9617 {
9618   int i;
9619   char clobbered_regs[32];
9620 
9621   gcc_assert (!epilogue_completed);
9622 
9623   if (reload_completed)
9624     /* After reload we rely on our own routine to determine which
9625        registers need saving.  */
9626     s390_regs_ever_clobbered (clobbered_regs);
9627   else
9628     /* During reload we use regs_ever_live as a base since reload
9629        does changes in there which we otherwise would not be aware
9630        of.  */
9631     for (i = 0; i < 32; i++)
9632       clobbered_regs[i] = df_regs_ever_live_p (i);
9633 
9634   for (i = 0; i < 32; i++)
9635     clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
9636 
9637   /* Mark the call-saved FPRs which need to be saved.
9638      This needs to be done before checking the special GPRs since the
9639      stack pointer usage depends on whether high FPRs have to be saved
9640      or not.  */
9641   cfun_frame_layout.fpr_bitmap = 0;
9642   cfun_frame_layout.high_fprs = 0;
9643   for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
9644     if (clobbered_regs[i] && !call_really_used_regs[i])
9645       {
9646 	cfun_set_fpr_save (i);
9647 	if (i >= FPR8_REGNUM)
9648 	  cfun_frame_layout.high_fprs++;
9649       }
9650 
9651   /* Register 12 is used for GOT address, but also as temp in prologue
9652      for split-stack stdarg functions (unless r14 is available).  */
9653   clobbered_regs[12]
9654     |= ((flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
9655 	|| (flag_split_stack && cfun->stdarg
9656 	    && (crtl->is_leaf || TARGET_TPF_PROFILING
9657 		|| has_hard_reg_initial_val (Pmode, RETURN_REGNUM))));
9658 
9659   clobbered_regs[BASE_REGNUM]
9660     |= (cfun->machine->base_reg
9661 	&& REGNO (cfun->machine->base_reg) == BASE_REGNUM);
9662 
9663   clobbered_regs[HARD_FRAME_POINTER_REGNUM]
9664     |= !!frame_pointer_needed;
9665 
9666   /* On pre z900 machines this might take until machine dependent
9667      reorg to decide.
9668      save_return_addr_p will only be set on non-zarch machines so
9669      there is no risk that r14 goes into an FPR instead of a stack
9670      slot.  */
9671   clobbered_regs[RETURN_REGNUM]
9672     |= (!crtl->is_leaf
9673 	|| TARGET_TPF_PROFILING
9674 	|| cfun_frame_layout.save_return_addr_p
9675 	|| crtl->calls_eh_return);
9676 
9677   clobbered_regs[STACK_POINTER_REGNUM]
9678     |= (!crtl->is_leaf
9679 	|| TARGET_TPF_PROFILING
9680 	|| cfun_save_high_fprs_p
9681 	|| get_frame_size () > 0
9682 	|| (reload_completed && cfun_frame_layout.frame_size > 0)
9683 	|| cfun->calls_alloca);
9684 
9685   memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 16);
9686 
9687   for (i = 6; i < 16; i++)
9688     if (clobbered_regs[i])
9689       cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
9690 
9691   s390_register_info_stdarg_fpr ();
9692   s390_register_info_gprtofpr ();
9693   s390_register_info_set_ranges ();
9694   /* stdarg functions might need to save GPRs 2 to 6.  This might
9695      override the GPR->FPR save decision made by
9696      s390_register_info_gprtofpr for r6 since vararg regs must go to
9697      the stack.  */
9698   s390_register_info_stdarg_gpr ();
9699 }
9700 
9701 /* Return true if REGNO is a global register, but not one
9702    of the special ones that need to be saved/restored in anyway.  */
9703 
9704 static inline bool
global_not_special_regno_p(int regno)9705 global_not_special_regno_p (int regno)
9706 {
9707   return (global_regs[regno]
9708 	  /* These registers are special and need to be
9709 	     restored in any case.  */
9710 	  && !(regno == STACK_POINTER_REGNUM
9711 	       || regno == RETURN_REGNUM
9712 	       || regno == BASE_REGNUM
9713 	       || (flag_pic && regno == (int)PIC_OFFSET_TABLE_REGNUM)));
9714 }
9715 
9716 /* This function is called by s390_optimize_prologue in order to get
9717    rid of unnecessary GPR save/restore instructions.  The register info
9718    for the GPRs is re-computed and the ranges are re-calculated.  */
9719 
9720 static void
s390_optimize_register_info()9721 s390_optimize_register_info ()
9722 {
9723   char clobbered_regs[32];
9724   int i;
9725 
9726   gcc_assert (epilogue_completed);
9727 
9728   s390_regs_ever_clobbered (clobbered_regs);
9729 
9730   /* Global registers do not need to be saved and restored unless it
9731      is one of our special regs.  (r12, r13, r14, or r15).  */
9732   for (i = 0; i < 32; i++)
9733     clobbered_regs[i] = clobbered_regs[i] && !global_not_special_regno_p (i);
9734 
9735   /* There is still special treatment needed for cases invisible to
9736      s390_regs_ever_clobbered.  */
9737   clobbered_regs[RETURN_REGNUM]
9738     |= (TARGET_TPF_PROFILING
9739 	/* When expanding builtin_return_addr in ESA mode we do not
9740 	   know whether r14 will later be needed as scratch reg when
9741 	   doing branch splitting.  So the builtin always accesses the
9742 	   r14 save slot and we need to stick to the save/restore
9743 	   decision for r14 even if it turns out that it didn't get
9744 	   clobbered.  */
9745 	|| cfun_frame_layout.save_return_addr_p
9746 	|| crtl->calls_eh_return);
9747 
9748   memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 6);
9749 
9750   for (i = 6; i < 16; i++)
9751     if (!clobbered_regs[i])
9752       cfun_gpr_save_slot (i) = SAVE_SLOT_NONE;
9753 
9754   s390_register_info_set_ranges ();
9755   s390_register_info_stdarg_gpr ();
9756 }
9757 
9758 /* Fill cfun->machine with info about frame of current function.  */
9759 
9760 static void
s390_frame_info(void)9761 s390_frame_info (void)
9762 {
9763   HOST_WIDE_INT lowest_offset;
9764 
9765   cfun_frame_layout.first_save_gpr_slot = cfun_frame_layout.first_save_gpr;
9766   cfun_frame_layout.last_save_gpr_slot = cfun_frame_layout.last_save_gpr;
9767 
9768   /* The va_arg builtin uses a constant distance of 16 *
9769      UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area
9770      pointer.  So even if we are going to save the stack pointer in an
9771      FPR we need the stack space in order to keep the offsets
9772      correct.  */
9773   if (cfun->stdarg && cfun_save_arg_fprs_p)
9774     {
9775       cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
9776 
9777       if (cfun_frame_layout.first_save_gpr_slot == -1)
9778 	cfun_frame_layout.first_save_gpr_slot = STACK_POINTER_REGNUM;
9779     }
9780 
9781   cfun_frame_layout.frame_size = get_frame_size ();
9782   if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000)
9783     fatal_error (input_location,
9784 		 "total size of local variables exceeds architecture limit");
9785 
9786   if (!TARGET_PACKED_STACK)
9787     {
9788       /* Fixed stack layout.  */
9789       cfun_frame_layout.backchain_offset = 0;
9790       cfun_frame_layout.f0_offset = 16 * UNITS_PER_LONG;
9791       cfun_frame_layout.f4_offset = cfun_frame_layout.f0_offset + 2 * 8;
9792       cfun_frame_layout.f8_offset = -cfun_frame_layout.high_fprs * 8;
9793       cfun_frame_layout.gprs_offset = (cfun_frame_layout.first_save_gpr_slot
9794 				       * UNITS_PER_LONG);
9795     }
9796   else if (TARGET_BACKCHAIN)
9797     {
9798       /* Kernel stack layout - packed stack, backchain, no float  */
9799       gcc_assert (TARGET_SOFT_FLOAT);
9800       cfun_frame_layout.backchain_offset = (STACK_POINTER_OFFSET
9801 					    - UNITS_PER_LONG);
9802 
9803       /* The distance between the backchain and the return address
9804 	 save slot must not change.  So we always need a slot for the
9805 	 stack pointer which resides in between.  */
9806       cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
9807 
9808       cfun_frame_layout.gprs_offset
9809 	= cfun_frame_layout.backchain_offset - cfun_gprs_save_area_size;
9810 
9811       /* FPRs will not be saved.  Nevertheless pick sane values to
9812 	 keep area calculations valid.  */
9813       cfun_frame_layout.f0_offset =
9814 	cfun_frame_layout.f4_offset =
9815 	cfun_frame_layout.f8_offset = cfun_frame_layout.gprs_offset;
9816     }
9817   else
9818     {
9819       int num_fprs;
9820 
9821       /* Packed stack layout without backchain.  */
9822 
9823       /* With stdarg FPRs need their dedicated slots.  */
9824       num_fprs = (TARGET_64BIT && cfun->stdarg ? 2
9825 		  : (cfun_fpr_save_p (FPR4_REGNUM) +
9826 		     cfun_fpr_save_p (FPR6_REGNUM)));
9827       cfun_frame_layout.f4_offset = STACK_POINTER_OFFSET - 8 * num_fprs;
9828 
9829       num_fprs = (cfun->stdarg ? 2
9830 		  : (cfun_fpr_save_p (FPR0_REGNUM)
9831 		     + cfun_fpr_save_p (FPR2_REGNUM)));
9832       cfun_frame_layout.f0_offset = cfun_frame_layout.f4_offset - 8 * num_fprs;
9833 
9834       cfun_frame_layout.gprs_offset
9835 	= cfun_frame_layout.f0_offset - cfun_gprs_save_area_size;
9836 
9837       cfun_frame_layout.f8_offset = (cfun_frame_layout.gprs_offset
9838 				     - cfun_frame_layout.high_fprs * 8);
9839     }
9840 
9841   if (cfun_save_high_fprs_p)
9842     cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8;
9843 
9844   if (!crtl->is_leaf)
9845     cfun_frame_layout.frame_size += crtl->outgoing_args_size;
9846 
9847   /* In the following cases we have to allocate a STACK_POINTER_OFFSET
9848      sized area at the bottom of the stack.  This is required also for
9849      leaf functions.  When GCC generates a local stack reference it
9850      will always add STACK_POINTER_OFFSET to all these references.  */
9851   if (crtl->is_leaf
9852       && !TARGET_TPF_PROFILING
9853       && cfun_frame_layout.frame_size == 0
9854       && !cfun->calls_alloca)
9855     return;
9856 
9857   /* Calculate the number of bytes we have used in our own register
9858      save area.  With the packed stack layout we can re-use the
9859      remaining bytes for normal stack elements.  */
9860 
9861   if (TARGET_PACKED_STACK)
9862     lowest_offset = MIN (MIN (cfun_frame_layout.f0_offset,
9863 			      cfun_frame_layout.f4_offset),
9864 			 cfun_frame_layout.gprs_offset);
9865   else
9866     lowest_offset = 0;
9867 
9868   if (TARGET_BACKCHAIN)
9869     lowest_offset = MIN (lowest_offset, cfun_frame_layout.backchain_offset);
9870 
9871   cfun_frame_layout.frame_size += STACK_POINTER_OFFSET - lowest_offset;
9872 
9873   /* If under 31 bit an odd number of gprs has to be saved we have to
9874      adjust the frame size to sustain 8 byte alignment of stack
9875      frames.  */
9876   cfun_frame_layout.frame_size = ((cfun_frame_layout.frame_size +
9877 				   STACK_BOUNDARY / BITS_PER_UNIT - 1)
9878 				  & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1));
9879 }
9880 
9881 /* Generate frame layout.  Fills in register and frame data for the current
9882    function in cfun->machine.  This routine can be called multiple times;
9883    it will re-do the complete frame layout every time.  */
9884 
9885 static void
s390_init_frame_layout(void)9886 s390_init_frame_layout (void)
9887 {
9888   HOST_WIDE_INT frame_size;
9889   int base_used;
9890 
9891   /* After LRA the frame layout is supposed to be read-only and should
9892      not be re-computed.  */
9893   if (reload_completed)
9894     return;
9895 
9896   do
9897     {
9898       frame_size = cfun_frame_layout.frame_size;
9899 
9900       /* Try to predict whether we'll need the base register.  */
9901       base_used = crtl->uses_const_pool
9902 		  || (!DISP_IN_RANGE (frame_size)
9903 		      && !CONST_OK_FOR_K (frame_size));
9904 
9905       /* Decide which register to use as literal pool base.  In small
9906 	 leaf functions, try to use an unused call-clobbered register
9907 	 as base register to avoid save/restore overhead.  */
9908       if (!base_used)
9909 	cfun->machine->base_reg = NULL_RTX;
9910       else
9911 	{
9912 	  int br = 0;
9913 
9914 	  if (crtl->is_leaf)
9915 	    /* Prefer r5 (most likely to be free).  */
9916 	    for (br = 5; br >= 2 && df_regs_ever_live_p (br); br--)
9917 	      ;
9918 	  cfun->machine->base_reg =
9919 	    gen_rtx_REG (Pmode, (br >= 2) ? br : BASE_REGNUM);
9920 	}
9921 
9922       s390_register_info ();
9923       s390_frame_info ();
9924     }
9925   while (frame_size != cfun_frame_layout.frame_size);
9926 }
9927 
9928 /* Remove the FPR clobbers from a tbegin insn if it can be proven that
9929    the TX is nonescaping.  A transaction is considered escaping if
9930    there is at least one path from tbegin returning CC0 to the
9931    function exit block without an tend.
9932 
9933    The check so far has some limitations:
9934    - only single tbegin/tend BBs are supported
9935    - the first cond jump after tbegin must separate the CC0 path from ~CC0
9936    - when CC is copied to a GPR and the CC0 check is done with the GPR
9937      this is not supported
9938 */
9939 
9940 static void
s390_optimize_nonescaping_tx(void)9941 s390_optimize_nonescaping_tx (void)
9942 {
9943   const unsigned int CC0 = 1 << 3;
9944   basic_block tbegin_bb = NULL;
9945   basic_block tend_bb = NULL;
9946   basic_block bb;
9947   rtx_insn *insn;
9948   bool result = true;
9949   int bb_index;
9950   rtx_insn *tbegin_insn = NULL;
9951 
9952   if (!cfun->machine->tbegin_p)
9953     return;
9954 
9955   for (bb_index = 0; bb_index < n_basic_blocks_for_fn (cfun); bb_index++)
9956     {
9957       bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
9958 
9959       if (!bb)
9960 	continue;
9961 
9962       FOR_BB_INSNS (bb, insn)
9963 	{
9964 	  rtx ite, cc, pat, target;
9965 	  unsigned HOST_WIDE_INT mask;
9966 
9967 	  if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
9968 	    continue;
9969 
9970 	  pat = PATTERN (insn);
9971 
9972 	  if (GET_CODE (pat) == PARALLEL)
9973 	    pat = XVECEXP (pat, 0, 0);
9974 
9975 	  if (GET_CODE (pat) != SET
9976 	      || GET_CODE (SET_SRC (pat)) != UNSPEC_VOLATILE)
9977 	    continue;
9978 
9979 	  if (XINT (SET_SRC (pat), 1) == UNSPECV_TBEGIN)
9980 	    {
9981 	      rtx_insn *tmp;
9982 
9983 	      tbegin_insn = insn;
9984 
9985 	      /* Just return if the tbegin doesn't have clobbers.  */
9986 	      if (GET_CODE (PATTERN (insn)) != PARALLEL)
9987 		return;
9988 
9989 	      if (tbegin_bb != NULL)
9990 		return;
9991 
9992 	      /* Find the next conditional jump.  */
9993 	      for (tmp = NEXT_INSN (insn);
9994 		   tmp != NULL_RTX;
9995 		   tmp = NEXT_INSN (tmp))
9996 		{
9997 		  if (reg_set_p (gen_rtx_REG (CCmode, CC_REGNUM), tmp))
9998 		    return;
9999 		  if (!JUMP_P (tmp))
10000 		    continue;
10001 
10002 		  ite = SET_SRC (PATTERN (tmp));
10003 		  if (GET_CODE (ite) != IF_THEN_ELSE)
10004 		    continue;
10005 
10006 		  cc = XEXP (XEXP (ite, 0), 0);
10007 		  if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc))
10008 		      || GET_MODE (cc) != CCRAWmode
10009 		      || GET_CODE (XEXP (XEXP (ite, 0), 1)) != CONST_INT)
10010 		    return;
10011 
10012 		  if (bb->succs->length () != 2)
10013 		    return;
10014 
10015 		  mask = INTVAL (XEXP (XEXP (ite, 0), 1));
10016 		  if (GET_CODE (XEXP (ite, 0)) == NE)
10017 		    mask ^= 0xf;
10018 
10019 		  if (mask == CC0)
10020 		    target = XEXP (ite, 1);
10021 		  else if (mask == (CC0 ^ 0xf))
10022 		    target = XEXP (ite, 2);
10023 		  else
10024 		    return;
10025 
10026 		  {
10027 		    edge_iterator ei;
10028 		    edge e1, e2;
10029 
10030 		    ei = ei_start (bb->succs);
10031 		    e1 = ei_safe_edge (ei);
10032 		    ei_next (&ei);
10033 		    e2 = ei_safe_edge (ei);
10034 
10035 		    if (e2->flags & EDGE_FALLTHRU)
10036 		      {
10037 			e2 = e1;
10038 			e1 = ei_safe_edge (ei);
10039 		      }
10040 
10041 		    if (!(e1->flags & EDGE_FALLTHRU))
10042 		      return;
10043 
10044 		    tbegin_bb = (target == pc_rtx) ? e1->dest : e2->dest;
10045 		  }
10046 		  if (tmp == BB_END (bb))
10047 		    break;
10048 		}
10049 	    }
10050 
10051 	  if (XINT (SET_SRC (pat), 1) == UNSPECV_TEND)
10052 	    {
10053 	      if (tend_bb != NULL)
10054 		return;
10055 	      tend_bb = bb;
10056 	    }
10057 	}
10058     }
10059 
10060   /* Either we successfully remove the FPR clobbers here or we are not
10061      able to do anything for this TX.  Both cases don't qualify for
10062      another look.  */
10063   cfun->machine->tbegin_p = false;
10064 
10065   if (tbegin_bb == NULL || tend_bb == NULL)
10066     return;
10067 
10068   calculate_dominance_info (CDI_POST_DOMINATORS);
10069   result = dominated_by_p (CDI_POST_DOMINATORS, tbegin_bb, tend_bb);
10070   free_dominance_info (CDI_POST_DOMINATORS);
10071 
10072   if (!result)
10073     return;
10074 
10075   PATTERN (tbegin_insn) = gen_rtx_PARALLEL (VOIDmode,
10076 			    gen_rtvec (2,
10077 				       XVECEXP (PATTERN (tbegin_insn), 0, 0),
10078 				       XVECEXP (PATTERN (tbegin_insn), 0, 1)));
10079   INSN_CODE (tbegin_insn) = -1;
10080   df_insn_rescan (tbegin_insn);
10081 
10082   return;
10083 }
10084 
10085 /* Implement TARGET_HARD_REGNO_NREGS.  Because all registers in a class
10086    have the same size, this is equivalent to CLASS_MAX_NREGS.  */
10087 
10088 static unsigned int
s390_hard_regno_nregs(unsigned int regno,machine_mode mode)10089 s390_hard_regno_nregs (unsigned int regno, machine_mode mode)
10090 {
10091   return s390_class_max_nregs (REGNO_REG_CLASS (regno), mode);
10092 }
10093 
10094 /* Implement TARGET_HARD_REGNO_MODE_OK.
10095 
10096    Integer modes <= word size fit into any GPR.
10097    Integer modes > word size fit into successive GPRs, starting with
10098    an even-numbered register.
10099    SImode and DImode fit into FPRs as well.
10100 
10101    Floating point modes <= word size fit into any FPR or GPR.
10102    Floating point modes > word size (i.e. DFmode on 32-bit) fit
10103    into any FPR, or an even-odd GPR pair.
10104    TFmode fits only into an even-odd FPR pair.
10105 
10106    Complex floating point modes fit either into two FPRs, or into
10107    successive GPRs (again starting with an even number).
10108    TCmode fits only into two successive even-odd FPR pairs.
10109 
10110    Condition code modes fit only into the CC register.  */
10111 
10112 static bool
s390_hard_regno_mode_ok(unsigned int regno,machine_mode mode)10113 s390_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
10114 {
10115   if (!TARGET_VX && VECTOR_NOFP_REGNO_P (regno))
10116     return false;
10117 
10118   switch (REGNO_REG_CLASS (regno))
10119     {
10120     case VEC_REGS:
10121       return ((GET_MODE_CLASS (mode) == MODE_INT
10122 	       && s390_class_max_nregs (VEC_REGS, mode) == 1)
10123 	      || mode == DFmode
10124 	      || (TARGET_VXE && mode == SFmode)
10125 	      || s390_vector_mode_supported_p (mode));
10126       break;
10127     case FP_REGS:
10128       if (TARGET_VX
10129 	  && ((GET_MODE_CLASS (mode) == MODE_INT
10130 	       && s390_class_max_nregs (FP_REGS, mode) == 1)
10131 	      || mode == DFmode
10132 	      || s390_vector_mode_supported_p (mode)))
10133 	return true;
10134 
10135       if (REGNO_PAIR_OK (regno, mode))
10136 	{
10137 	  if (mode == SImode || mode == DImode)
10138 	    return true;
10139 
10140 	  if (FLOAT_MODE_P (mode) && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
10141 	    return true;
10142 	}
10143       break;
10144     case ADDR_REGS:
10145       if (FRAME_REGNO_P (regno) && mode == Pmode)
10146 	return true;
10147 
10148       /* fallthrough */
10149     case GENERAL_REGS:
10150       if (REGNO_PAIR_OK (regno, mode))
10151 	{
10152 	  if (TARGET_ZARCH
10153 	      || (mode != TFmode && mode != TCmode && mode != TDmode))
10154 	    return true;
10155 	}
10156       break;
10157     case CC_REGS:
10158       if (GET_MODE_CLASS (mode) == MODE_CC)
10159 	return true;
10160       break;
10161     case ACCESS_REGS:
10162       if (REGNO_PAIR_OK (regno, mode))
10163 	{
10164 	  if (mode == SImode || mode == Pmode)
10165 	    return true;
10166 	}
10167       break;
10168     default:
10169       return false;
10170     }
10171 
10172   return false;
10173 }
10174 
10175 /* Implement TARGET_MODES_TIEABLE_P.  */
10176 
10177 static bool
s390_modes_tieable_p(machine_mode mode1,machine_mode mode2)10178 s390_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10179 {
10180   return ((mode1 == SFmode || mode1 == DFmode)
10181 	  == (mode2 == SFmode || mode2 == DFmode));
10182 }
10183 
10184 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG.  */
10185 
10186 bool
s390_hard_regno_rename_ok(unsigned int old_reg,unsigned int new_reg)10187 s390_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg)
10188 {
10189    /* Once we've decided upon a register to use as base register, it must
10190       no longer be used for any other purpose.  */
10191   if (cfun->machine->base_reg)
10192     if (REGNO (cfun->machine->base_reg) == old_reg
10193 	|| REGNO (cfun->machine->base_reg) == new_reg)
10194       return false;
10195 
10196   /* Prevent regrename from using call-saved regs which haven't
10197      actually been saved.  This is necessary since regrename assumes
10198      the backend save/restore decisions are based on
10199      df_regs_ever_live.  Since we have our own routine we have to tell
10200      regrename manually about it.  */
10201   if (GENERAL_REGNO_P (new_reg)
10202       && !call_really_used_regs[new_reg]
10203       && cfun_gpr_save_slot (new_reg) == SAVE_SLOT_NONE)
10204     return false;
10205 
10206   return true;
10207 }
10208 
10209 /* Return nonzero if register REGNO can be used as a scratch register
10210    in peephole2.  */
10211 
10212 static bool
s390_hard_regno_scratch_ok(unsigned int regno)10213 s390_hard_regno_scratch_ok (unsigned int regno)
10214 {
10215   /* See s390_hard_regno_rename_ok.  */
10216   if (GENERAL_REGNO_P (regno)
10217       && !call_really_used_regs[regno]
10218       && cfun_gpr_save_slot (regno) == SAVE_SLOT_NONE)
10219     return false;
10220 
10221   return true;
10222 }
10223 
10224 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED.  When generating
10225    code that runs in z/Architecture mode, but conforms to the 31-bit
10226    ABI, GPRs can hold 8 bytes; the ABI guarantees only that the lower 4
10227    bytes are saved across calls, however.  */
10228 
10229 static bool
s390_hard_regno_call_part_clobbered(rtx_insn * insn ATTRIBUTE_UNUSED,unsigned int regno,machine_mode mode)10230 s390_hard_regno_call_part_clobbered (rtx_insn *insn ATTRIBUTE_UNUSED,
10231 				     unsigned int regno, machine_mode mode)
10232 {
10233   if (!TARGET_64BIT
10234       && TARGET_ZARCH
10235       && GET_MODE_SIZE (mode) > 4
10236       && ((regno >= 6 && regno <= 15) || regno == 32))
10237     return true;
10238 
10239   if (TARGET_VX
10240       && GET_MODE_SIZE (mode) > 8
10241       && (((TARGET_64BIT && regno >= 24 && regno <= 31))
10242 	  || (!TARGET_64BIT && (regno == 18 || regno == 19))))
10243     return true;
10244 
10245   return false;
10246 }
10247 
10248 /* Maximum number of registers to represent a value of mode MODE
10249    in a register of class RCLASS.  */
10250 
10251 int
s390_class_max_nregs(enum reg_class rclass,machine_mode mode)10252 s390_class_max_nregs (enum reg_class rclass, machine_mode mode)
10253 {
10254   int reg_size;
10255   bool reg_pair_required_p = false;
10256 
10257   switch (rclass)
10258     {
10259     case FP_REGS:
10260     case VEC_REGS:
10261       reg_size = TARGET_VX ? 16 : 8;
10262 
10263       /* TF and TD modes would fit into a VR but we put them into a
10264 	 register pair since we do not have 128bit FP instructions on
10265 	 full VRs.  */
10266       if (TARGET_VX
10267 	  && SCALAR_FLOAT_MODE_P (mode)
10268 	  && GET_MODE_SIZE (mode) >= 16)
10269 	reg_pair_required_p = true;
10270 
10271       /* Even if complex types would fit into a single FPR/VR we force
10272 	 them into a register pair to deal with the parts more easily.
10273 	 (FIXME: What about complex ints?)  */
10274       if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
10275 	reg_pair_required_p = true;
10276       break;
10277     case ACCESS_REGS:
10278       reg_size = 4;
10279       break;
10280     default:
10281       reg_size = UNITS_PER_WORD;
10282       break;
10283     }
10284 
10285   if (reg_pair_required_p)
10286     return 2 * ((GET_MODE_SIZE (mode) / 2 + reg_size - 1) / reg_size);
10287 
10288   return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
10289 }
10290 
10291 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
10292 
10293 static bool
s390_can_change_mode_class(machine_mode from_mode,machine_mode to_mode,reg_class_t rclass)10294 s390_can_change_mode_class (machine_mode from_mode,
10295 			    machine_mode to_mode,
10296 			    reg_class_t rclass)
10297 {
10298   machine_mode small_mode;
10299   machine_mode big_mode;
10300 
10301   /* V1TF and TF have different representations in vector
10302      registers.  */
10303   if (reg_classes_intersect_p (VEC_REGS, rclass)
10304       && ((from_mode == V1TFmode && to_mode == TFmode)
10305 	  || (from_mode == TFmode && to_mode == V1TFmode)))
10306     return false;
10307 
10308   if (GET_MODE_SIZE (from_mode) == GET_MODE_SIZE (to_mode))
10309     return true;
10310 
10311   if (GET_MODE_SIZE (from_mode) < GET_MODE_SIZE (to_mode))
10312     {
10313       small_mode = from_mode;
10314       big_mode = to_mode;
10315     }
10316   else
10317     {
10318       small_mode = to_mode;
10319       big_mode = from_mode;
10320     }
10321 
10322   /* Values residing in VRs are little-endian style.  All modes are
10323      placed left-aligned in an VR.  This means that we cannot allow
10324      switching between modes with differing sizes.  Also if the vector
10325      facility is available we still place TFmode values in VR register
10326      pairs, since the only instructions we have operating on TFmodes
10327      only deal with register pairs.  Therefore we have to allow DFmode
10328      subregs of TFmodes to enable the TFmode splitters.  */
10329   if (reg_classes_intersect_p (VEC_REGS, rclass)
10330       && (GET_MODE_SIZE (small_mode) < 8
10331 	  || s390_class_max_nregs (VEC_REGS, big_mode) == 1))
10332     return false;
10333 
10334   /* Likewise for access registers, since they have only half the
10335      word size on 64-bit.  */
10336   if (reg_classes_intersect_p (ACCESS_REGS, rclass))
10337     return false;
10338 
10339   return true;
10340 }
10341 
10342 /* Return true if we use LRA instead of reload pass.  */
10343 static bool
s390_lra_p(void)10344 s390_lra_p (void)
10345 {
10346   return s390_lra_flag;
10347 }
10348 
10349 /* Return true if register FROM can be eliminated via register TO.  */
10350 
10351 static bool
s390_can_eliminate(const int from,const int to)10352 s390_can_eliminate (const int from, const int to)
10353 {
10354   /* We have not marked the base register as fixed.
10355      Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
10356      If a function requires the base register, we say here that this
10357      elimination cannot be performed.  This will cause reload to free
10358      up the base register (as if it were fixed).  On the other hand,
10359      if the current function does *not* require the base register, we
10360      say here the elimination succeeds, which in turn allows reload
10361      to allocate the base register for any other purpose.  */
10362   if (from == BASE_REGNUM && to == BASE_REGNUM)
10363     {
10364       s390_init_frame_layout ();
10365       return cfun->machine->base_reg == NULL_RTX;
10366     }
10367 
10368   /* Everything else must point into the stack frame.  */
10369   gcc_assert (to == STACK_POINTER_REGNUM
10370 	      || to == HARD_FRAME_POINTER_REGNUM);
10371 
10372   gcc_assert (from == FRAME_POINTER_REGNUM
10373 	      || from == ARG_POINTER_REGNUM
10374 	      || from == RETURN_ADDRESS_POINTER_REGNUM);
10375 
10376   /* Make sure we actually saved the return address.  */
10377   if (from == RETURN_ADDRESS_POINTER_REGNUM)
10378     if (!crtl->calls_eh_return
10379 	&& !cfun->stdarg
10380 	&& !cfun_frame_layout.save_return_addr_p)
10381       return false;
10382 
10383   return true;
10384 }
10385 
10386 /* Return offset between register FROM and TO initially after prolog.  */
10387 
10388 HOST_WIDE_INT
s390_initial_elimination_offset(int from,int to)10389 s390_initial_elimination_offset (int from, int to)
10390 {
10391   HOST_WIDE_INT offset;
10392 
10393   /* ??? Why are we called for non-eliminable pairs?  */
10394   if (!s390_can_eliminate (from, to))
10395     return 0;
10396 
10397   switch (from)
10398     {
10399     case FRAME_POINTER_REGNUM:
10400       offset = (get_frame_size()
10401 		+ STACK_POINTER_OFFSET
10402 		+ crtl->outgoing_args_size);
10403       break;
10404 
10405     case ARG_POINTER_REGNUM:
10406       s390_init_frame_layout ();
10407       offset = cfun_frame_layout.frame_size + STACK_POINTER_OFFSET;
10408       break;
10409 
10410     case RETURN_ADDRESS_POINTER_REGNUM:
10411       s390_init_frame_layout ();
10412 
10413       if (cfun_frame_layout.first_save_gpr_slot == -1)
10414 	{
10415 	  /* If it turns out that for stdarg nothing went into the reg
10416 	     save area we also do not need the return address
10417 	     pointer.  */
10418 	  if (cfun->stdarg && !cfun_save_arg_fprs_p)
10419 	    return 0;
10420 
10421 	  gcc_unreachable ();
10422 	}
10423 
10424       /* In order to make the following work it is not necessary for
10425 	 r14 to have a save slot.  It is sufficient if one other GPR
10426 	 got one.  Since the GPRs are always stored without gaps we
10427 	 are able to calculate where the r14 save slot would
10428 	 reside.  */
10429       offset = (cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset +
10430 		(RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot) *
10431 		UNITS_PER_LONG);
10432       break;
10433 
10434     case BASE_REGNUM:
10435       offset = 0;
10436       break;
10437 
10438     default:
10439       gcc_unreachable ();
10440     }
10441 
10442   return offset;
10443 }
10444 
10445 /* Emit insn to save fpr REGNUM at offset OFFSET relative
10446    to register BASE.  Return generated insn.  */
10447 
10448 static rtx
save_fpr(rtx base,int offset,int regnum)10449 save_fpr (rtx base, int offset, int regnum)
10450 {
10451   rtx addr;
10452   addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10453 
10454   if (regnum >= 16 && regnum <= (16 + FP_ARG_NUM_REG))
10455     set_mem_alias_set (addr, get_varargs_alias_set ());
10456   else
10457     set_mem_alias_set (addr, get_frame_alias_set ());
10458 
10459   return emit_move_insn (addr, gen_rtx_REG (DFmode, regnum));
10460 }
10461 
10462 /* Emit insn to restore fpr REGNUM from offset OFFSET relative
10463    to register BASE.  Return generated insn.  */
10464 
10465 static rtx
restore_fpr(rtx base,int offset,int regnum)10466 restore_fpr (rtx base, int offset, int regnum)
10467 {
10468   rtx addr;
10469   addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10470   set_mem_alias_set (addr, get_frame_alias_set ());
10471 
10472   return emit_move_insn (gen_rtx_REG (DFmode, regnum), addr);
10473 }
10474 
10475 /* Generate insn to save registers FIRST to LAST into
10476    the register save area located at offset OFFSET
10477    relative to register BASE.  */
10478 
10479 static rtx
save_gprs(rtx base,int offset,int first,int last)10480 save_gprs (rtx base, int offset, int first, int last)
10481 {
10482   rtx addr, insn, note;
10483   int i;
10484 
10485   addr = plus_constant (Pmode, base, offset);
10486   addr = gen_rtx_MEM (Pmode, addr);
10487 
10488   set_mem_alias_set (addr, get_frame_alias_set ());
10489 
10490   /* Special-case single register.  */
10491   if (first == last)
10492     {
10493       if (TARGET_64BIT)
10494 	insn = gen_movdi (addr, gen_rtx_REG (Pmode, first));
10495       else
10496 	insn = gen_movsi (addr, gen_rtx_REG (Pmode, first));
10497 
10498       if (!global_not_special_regno_p (first))
10499 	RTX_FRAME_RELATED_P (insn) = 1;
10500       return insn;
10501     }
10502 
10503 
10504   insn = gen_store_multiple (addr,
10505 			     gen_rtx_REG (Pmode, first),
10506 			     GEN_INT (last - first + 1));
10507 
10508   if (first <= 6 && cfun->stdarg)
10509     for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
10510       {
10511 	rtx mem = XEXP (XVECEXP (PATTERN (insn), 0, i), 0);
10512 
10513 	if (first + i <= 6)
10514 	  set_mem_alias_set (mem, get_varargs_alias_set ());
10515       }
10516 
10517   /* We need to set the FRAME_RELATED flag on all SETs
10518      inside the store-multiple pattern.
10519 
10520      However, we must not emit DWARF records for registers 2..5
10521      if they are stored for use by variable arguments ...
10522 
10523      ??? Unfortunately, it is not enough to simply not the
10524      FRAME_RELATED flags for those SETs, because the first SET
10525      of the PARALLEL is always treated as if it had the flag
10526      set, even if it does not.  Therefore we emit a new pattern
10527      without those registers as REG_FRAME_RELATED_EXPR note.  */
10528 
10529   if (first >= 6 && !global_not_special_regno_p (first))
10530     {
10531       rtx pat = PATTERN (insn);
10532 
10533       for (i = 0; i < XVECLEN (pat, 0); i++)
10534 	if (GET_CODE (XVECEXP (pat, 0, i)) == SET
10535 	    && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat,
10536 								     0, i)))))
10537 	  RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1;
10538 
10539       RTX_FRAME_RELATED_P (insn) = 1;
10540     }
10541   else if (last >= 6)
10542     {
10543       int start;
10544 
10545       for (start = first >= 6 ? first : 6; start <= last; start++)
10546 	if (!global_not_special_regno_p (start))
10547 	  break;
10548 
10549       if (start > last)
10550 	return insn;
10551 
10552       addr = plus_constant (Pmode, base,
10553 			    offset + (start - first) * UNITS_PER_LONG);
10554 
10555       if (start == last)
10556 	{
10557 	  if (TARGET_64BIT)
10558 	    note = gen_movdi (gen_rtx_MEM (Pmode, addr),
10559 			      gen_rtx_REG (Pmode, start));
10560 	  else
10561 	    note = gen_movsi (gen_rtx_MEM (Pmode, addr),
10562 			      gen_rtx_REG (Pmode, start));
10563 	  note = PATTERN (note);
10564 
10565 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10566 	  RTX_FRAME_RELATED_P (insn) = 1;
10567 
10568 	  return insn;
10569 	}
10570 
10571       note = gen_store_multiple (gen_rtx_MEM (Pmode, addr),
10572 				 gen_rtx_REG (Pmode, start),
10573 				 GEN_INT (last - start + 1));
10574       note = PATTERN (note);
10575 
10576       add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10577 
10578       for (i = 0; i < XVECLEN (note, 0); i++)
10579 	if (GET_CODE (XVECEXP (note, 0, i)) == SET
10580 	    && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note,
10581 								     0, i)))))
10582 	  RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1;
10583 
10584       RTX_FRAME_RELATED_P (insn) = 1;
10585     }
10586 
10587   return insn;
10588 }
10589 
10590 /* Generate insn to restore registers FIRST to LAST from
10591    the register save area located at offset OFFSET
10592    relative to register BASE.  */
10593 
10594 static rtx
restore_gprs(rtx base,int offset,int first,int last)10595 restore_gprs (rtx base, int offset, int first, int last)
10596 {
10597   rtx addr, insn;
10598 
10599   addr = plus_constant (Pmode, base, offset);
10600   addr = gen_rtx_MEM (Pmode, addr);
10601   set_mem_alias_set (addr, get_frame_alias_set ());
10602 
10603   /* Special-case single register.  */
10604   if (first == last)
10605     {
10606       if (TARGET_64BIT)
10607 	insn = gen_movdi (gen_rtx_REG (Pmode, first), addr);
10608       else
10609 	insn = gen_movsi (gen_rtx_REG (Pmode, first), addr);
10610 
10611       RTX_FRAME_RELATED_P (insn) = 1;
10612       return insn;
10613     }
10614 
10615   insn = gen_load_multiple (gen_rtx_REG (Pmode, first),
10616 			    addr,
10617 			    GEN_INT (last - first + 1));
10618   RTX_FRAME_RELATED_P (insn) = 1;
10619   return insn;
10620 }
10621 
10622 /* Return insn sequence to load the GOT register.  */
10623 
10624 rtx_insn *
s390_load_got(void)10625 s390_load_got (void)
10626 {
10627   rtx_insn *insns;
10628 
10629   /* We cannot use pic_offset_table_rtx here since we use this
10630      function also for non-pic if __tls_get_offset is called and in
10631      that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
10632      aren't usable.  */
10633   rtx got_rtx = gen_rtx_REG (Pmode, 12);
10634 
10635   start_sequence ();
10636 
10637   emit_move_insn (got_rtx, s390_got_symbol ());
10638 
10639   insns = get_insns ();
10640   end_sequence ();
10641   return insns;
10642 }
10643 
10644 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
10645    and the change to the stack pointer.  */
10646 
10647 static void
s390_emit_stack_tie(void)10648 s390_emit_stack_tie (void)
10649 {
10650   rtx mem = gen_frame_mem (BLKmode,
10651 			   gen_rtx_REG (Pmode, STACK_POINTER_REGNUM));
10652 
10653   emit_insn (gen_stack_tie (mem));
10654 }
10655 
10656 /* Copy GPRS into FPR save slots.  */
10657 
10658 static void
s390_save_gprs_to_fprs(void)10659 s390_save_gprs_to_fprs (void)
10660 {
10661   int i;
10662 
10663   if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10664     return;
10665 
10666   for (i = 6; i < 16; i++)
10667     {
10668       if (FP_REGNO_P (cfun_gpr_save_slot (i)))
10669 	{
10670 	  rtx_insn *insn =
10671 	    emit_move_insn (gen_rtx_REG (DImode, cfun_gpr_save_slot (i)),
10672 			    gen_rtx_REG (DImode, i));
10673 	  RTX_FRAME_RELATED_P (insn) = 1;
10674 	  /* This prevents dwarf2cfi from interpreting the set.  Doing
10675 	     so it might emit def_cfa_register infos setting an FPR as
10676 	     new CFA.  */
10677 	  add_reg_note (insn, REG_CFA_REGISTER, copy_rtx (PATTERN (insn)));
10678 	}
10679     }
10680 }
10681 
10682 /* Restore GPRs from FPR save slots.  */
10683 
10684 static void
s390_restore_gprs_from_fprs(void)10685 s390_restore_gprs_from_fprs (void)
10686 {
10687   int i;
10688 
10689   if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10690     return;
10691 
10692   /* Restore the GPRs starting with the stack pointer.  That way the
10693      stack pointer already has its original value when it comes to
10694      restoring the hard frame pointer.  So we can set the cfa reg back
10695      to the stack pointer.  */
10696   for (i = STACK_POINTER_REGNUM; i >= 6; i--)
10697     {
10698       rtx_insn *insn;
10699 
10700       if (!FP_REGNO_P (cfun_gpr_save_slot (i)))
10701 	continue;
10702 
10703       rtx fpr = gen_rtx_REG (DImode, cfun_gpr_save_slot (i));
10704 
10705       if (i == STACK_POINTER_REGNUM)
10706 	insn = emit_insn (gen_stack_restore_from_fpr (fpr));
10707       else
10708 	insn = emit_move_insn (gen_rtx_REG (DImode, i), fpr);
10709 
10710       df_set_regs_ever_live (i, true);
10711       add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, i));
10712 
10713       /* If either the stack pointer or the frame pointer get restored
10714 	 set the CFA value to its value at function start.  Doing this
10715 	 for the frame pointer results in .cfi_def_cfa_register 15
10716 	 what is ok since if the stack pointer got modified it has
10717 	 been restored already.  */
10718       if (i == STACK_POINTER_REGNUM || i == HARD_FRAME_POINTER_REGNUM)
10719 	add_reg_note (insn, REG_CFA_DEF_CFA,
10720 		      plus_constant (Pmode, stack_pointer_rtx,
10721 				     STACK_POINTER_OFFSET));
10722       RTX_FRAME_RELATED_P (insn) = 1;
10723     }
10724 }
10725 
10726 
10727 /* A pass run immediately before shrink-wrapping and prologue and epilogue
10728    generation.  */
10729 
10730 namespace {
10731 
10732 const pass_data pass_data_s390_early_mach =
10733 {
10734   RTL_PASS, /* type */
10735   "early_mach", /* name */
10736   OPTGROUP_NONE, /* optinfo_flags */
10737   TV_MACH_DEP, /* tv_id */
10738   0, /* properties_required */
10739   0, /* properties_provided */
10740   0, /* properties_destroyed */
10741   0, /* todo_flags_start */
10742   ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */
10743 };
10744 
10745 class pass_s390_early_mach : public rtl_opt_pass
10746 {
10747 public:
pass_s390_early_mach(gcc::context * ctxt)10748   pass_s390_early_mach (gcc::context *ctxt)
10749     : rtl_opt_pass (pass_data_s390_early_mach, ctxt)
10750   {}
10751 
10752   /* opt_pass methods: */
10753   virtual unsigned int execute (function *);
10754 
10755 }; // class pass_s390_early_mach
10756 
10757 unsigned int
execute(function * fun)10758 pass_s390_early_mach::execute (function *fun)
10759 {
10760   rtx_insn *insn;
10761 
10762   /* Try to get rid of the FPR clobbers.  */
10763   s390_optimize_nonescaping_tx ();
10764 
10765   /* Re-compute register info.  */
10766   s390_register_info ();
10767 
10768   /* If we're using a base register, ensure that it is always valid for
10769      the first non-prologue instruction.  */
10770   if (fun->machine->base_reg)
10771     emit_insn_at_entry (gen_main_pool (fun->machine->base_reg));
10772 
10773   /* Annotate all constant pool references to let the scheduler know
10774      they implicitly use the base register.  */
10775   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
10776     if (INSN_P (insn))
10777       {
10778 	annotate_constant_pool_refs (insn);
10779 	df_insn_rescan (insn);
10780       }
10781   return 0;
10782 }
10783 
10784 } // anon namespace
10785 
10786 rtl_opt_pass *
make_pass_s390_early_mach(gcc::context * ctxt)10787 make_pass_s390_early_mach (gcc::context *ctxt)
10788 {
10789   return new pass_s390_early_mach (ctxt);
10790 }
10791 
10792 /* Calculate TARGET = REG + OFFSET as s390_emit_prologue would do it.
10793    - push too big immediates to the literal pool and annotate the refs
10794    - emit frame related notes for stack pointer changes.  */
10795 
10796 static rtx
s390_prologue_plus_offset(rtx target,rtx reg,rtx offset,bool frame_related_p)10797 s390_prologue_plus_offset (rtx target, rtx reg, rtx offset, bool frame_related_p)
10798 {
10799   rtx_insn *insn;
10800   rtx orig_offset = offset;
10801 
10802   gcc_assert (REG_P (target));
10803   gcc_assert (REG_P (reg));
10804   gcc_assert (CONST_INT_P (offset));
10805 
10806   if (offset == const0_rtx)                               /* lr/lgr */
10807     {
10808       insn = emit_move_insn (target, reg);
10809     }
10810   else if (DISP_IN_RANGE (INTVAL (offset)))               /* la */
10811     {
10812       insn = emit_move_insn (target, gen_rtx_PLUS (Pmode, reg,
10813 						   offset));
10814     }
10815   else
10816     {
10817       if (!satisfies_constraint_K (offset)                /* ahi/aghi */
10818 	  && (!TARGET_EXTIMM
10819 	      || (!satisfies_constraint_Op (offset)       /* alfi/algfi */
10820 		  && !satisfies_constraint_On (offset)))) /* slfi/slgfi */
10821 	offset = force_const_mem (Pmode, offset);
10822 
10823       if (target != reg)
10824 	{
10825 	  insn = emit_move_insn (target, reg);
10826 	  RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0;
10827 	}
10828 
10829       insn = emit_insn (gen_add2_insn (target, offset));
10830 
10831       if (!CONST_INT_P (offset))
10832 	{
10833 	  annotate_constant_pool_refs (insn);
10834 
10835 	  if (frame_related_p)
10836 	    add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10837 			  gen_rtx_SET (target,
10838 				       gen_rtx_PLUS (Pmode, target,
10839 						     orig_offset)));
10840 	}
10841     }
10842 
10843   RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0;
10844 
10845   /* If this is a stack adjustment and we are generating a stack clash
10846      prologue, then add a REG_STACK_CHECK note to signal that this insn
10847      should be left alone.  */
10848   if (flag_stack_clash_protection && target == stack_pointer_rtx)
10849     add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
10850 
10851   return insn;
10852 }
10853 
10854 /* Emit a compare instruction with a volatile memory access as stack
10855    probe.  It does not waste store tags and does not clobber any
10856    registers apart from the condition code.  */
10857 static void
s390_emit_stack_probe(rtx addr)10858 s390_emit_stack_probe (rtx addr)
10859 {
10860   rtx tmp = gen_rtx_MEM (Pmode, addr);
10861   MEM_VOLATILE_P (tmp) = 1;
10862   s390_emit_compare (EQ, gen_rtx_REG (Pmode, 0), tmp);
10863   emit_insn (gen_blockage ());
10864 }
10865 
10866 /* Use a runtime loop if we have to emit more probes than this.  */
10867 #define MIN_UNROLL_PROBES 3
10868 
10869 /* Allocate SIZE bytes of stack space, using TEMP_REG as a temporary
10870    if necessary.  LAST_PROBE_OFFSET contains the offset of the closest
10871    probe relative to the stack pointer.
10872 
10873    Note that SIZE is negative.
10874 
10875    The return value is true if TEMP_REG has been clobbered.  */
10876 static bool
allocate_stack_space(rtx size,HOST_WIDE_INT last_probe_offset,rtx temp_reg)10877 allocate_stack_space (rtx size, HOST_WIDE_INT last_probe_offset,
10878 		      rtx temp_reg)
10879 {
10880   bool temp_reg_clobbered_p = false;
10881   HOST_WIDE_INT probe_interval
10882     = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL);
10883   HOST_WIDE_INT guard_size
10884     = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE);
10885 
10886   if (flag_stack_clash_protection)
10887     {
10888       if (last_probe_offset + -INTVAL (size) < guard_size)
10889 	dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
10890       else
10891 	{
10892 	  rtx offset = GEN_INT (probe_interval - UNITS_PER_LONG);
10893 	  HOST_WIDE_INT rounded_size = -INTVAL (size) & -probe_interval;
10894 	  HOST_WIDE_INT num_probes = rounded_size / probe_interval;
10895 	  HOST_WIDE_INT residual = -INTVAL (size) - rounded_size;
10896 
10897 	  if (num_probes < MIN_UNROLL_PROBES)
10898 	    {
10899 	      /* Emit unrolled probe statements.  */
10900 
10901 	      for (unsigned int i = 0; i < num_probes; i++)
10902 		{
10903 		  s390_prologue_plus_offset (stack_pointer_rtx,
10904 					     stack_pointer_rtx,
10905 					     GEN_INT (-probe_interval), true);
10906 		  s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
10907 						       stack_pointer_rtx,
10908 						       offset));
10909 		}
10910 	      dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
10911 	    }
10912 	  else
10913 	    {
10914 	      /* Emit a loop probing the pages.  */
10915 
10916 	      rtx_code_label *loop_start_label = gen_label_rtx ();
10917 
10918 	      /* From now on temp_reg will be the CFA register.  */
10919 	      s390_prologue_plus_offset (temp_reg, stack_pointer_rtx,
10920 					 GEN_INT (-rounded_size), true);
10921 	      emit_label (loop_start_label);
10922 
10923 	      s390_prologue_plus_offset (stack_pointer_rtx,
10924 					 stack_pointer_rtx,
10925 					 GEN_INT (-probe_interval), false);
10926 	      s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
10927 						   stack_pointer_rtx,
10928 						   offset));
10929 	      emit_cmp_and_jump_insns (stack_pointer_rtx, temp_reg,
10930 				       GT, NULL_RTX,
10931 				       Pmode, 1, loop_start_label);
10932 
10933 	      /* Without this make_edges ICEes.  */
10934 	      JUMP_LABEL (get_last_insn ()) = loop_start_label;
10935 	      LABEL_NUSES (loop_start_label) = 1;
10936 
10937 	      /* That's going to be a NOP since stack pointer and
10938 		 temp_reg are supposed to be the same here.  We just
10939 		 emit it to set the CFA reg back to r15.  */
10940 	      s390_prologue_plus_offset (stack_pointer_rtx, temp_reg,
10941 					 const0_rtx, true);
10942 	      temp_reg_clobbered_p = true;
10943 	      dump_stack_clash_frame_info (PROBE_LOOP, residual != 0);
10944 	    }
10945 
10946 	  /* Handle any residual allocation request.  */
10947 	  s390_prologue_plus_offset (stack_pointer_rtx,
10948 				     stack_pointer_rtx,
10949 				     GEN_INT (-residual), true);
10950 	  last_probe_offset += residual;
10951 	  if (last_probe_offset >= probe_interval)
10952 	    s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
10953 						 stack_pointer_rtx,
10954 						 GEN_INT (residual
10955 							  - UNITS_PER_LONG)));
10956 
10957 	  return temp_reg_clobbered_p;
10958 	}
10959     }
10960 
10961   /* Subtract frame size from stack pointer.  */
10962   s390_prologue_plus_offset (stack_pointer_rtx,
10963 			     stack_pointer_rtx,
10964 			     size, true);
10965 
10966   return temp_reg_clobbered_p;
10967 }
10968 
10969 /* Expand the prologue into a bunch of separate insns.  */
10970 
10971 void
s390_emit_prologue(void)10972 s390_emit_prologue (void)
10973 {
10974   rtx insn, addr;
10975   rtx temp_reg;
10976   int i;
10977   int offset;
10978   int next_fpr = 0;
10979 
10980   /* Choose best register to use for temp use within prologue.
10981      TPF with profiling must avoid the register 14 - the tracing function
10982      needs the original contents of r14 to be preserved.  */
10983 
10984   if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM)
10985       && !crtl->is_leaf
10986       && !TARGET_TPF_PROFILING)
10987     temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
10988   else if (flag_split_stack && cfun->stdarg)
10989     temp_reg = gen_rtx_REG (Pmode, 12);
10990   else
10991     temp_reg = gen_rtx_REG (Pmode, 1);
10992 
10993   /* When probing for stack-clash mitigation, we have to track the distance
10994      between the stack pointer and closest known reference.
10995 
10996      Most of the time we have to make a worst case assumption.  The
10997      only exception is when TARGET_BACKCHAIN is active, in which case
10998      we know *sp (offset 0) was written.  */
10999   HOST_WIDE_INT probe_interval
11000     = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL);
11001   HOST_WIDE_INT last_probe_offset
11002     = (TARGET_BACKCHAIN
11003        ? (TARGET_PACKED_STACK ? STACK_POINTER_OFFSET - UNITS_PER_LONG : 0)
11004        : probe_interval - (STACK_BOUNDARY / UNITS_PER_WORD));
11005 
11006   s390_save_gprs_to_fprs ();
11007 
11008   /* Save call saved gprs.  */
11009   if (cfun_frame_layout.first_save_gpr != -1)
11010     {
11011       insn = save_gprs (stack_pointer_rtx,
11012 			cfun_frame_layout.gprs_offset +
11013 			UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
11014 					  - cfun_frame_layout.first_save_gpr_slot),
11015 			cfun_frame_layout.first_save_gpr,
11016 			cfun_frame_layout.last_save_gpr);
11017 
11018       /* This is not 100% correct.  If we have more than one register saved,
11019 	 then LAST_PROBE_OFFSET can move even closer to sp.  */
11020       last_probe_offset
11021 	= (cfun_frame_layout.gprs_offset +
11022 	   UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
11023 			     - cfun_frame_layout.first_save_gpr_slot));
11024 
11025       emit_insn (insn);
11026     }
11027 
11028   /* Dummy insn to mark literal pool slot.  */
11029 
11030   if (cfun->machine->base_reg)
11031     emit_insn (gen_main_pool (cfun->machine->base_reg));
11032 
11033   offset = cfun_frame_layout.f0_offset;
11034 
11035   /* Save f0 and f2.  */
11036   for (i = FPR0_REGNUM; i <= FPR0_REGNUM + 1; i++)
11037     {
11038       if (cfun_fpr_save_p (i))
11039 	{
11040 	  save_fpr (stack_pointer_rtx, offset, i);
11041 	  if (offset < last_probe_offset)
11042 	    last_probe_offset = offset;
11043 	  offset += 8;
11044 	}
11045       else if (!TARGET_PACKED_STACK || cfun->stdarg)
11046 	offset += 8;
11047     }
11048 
11049   /* Save f4 and f6.  */
11050   offset = cfun_frame_layout.f4_offset;
11051   for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
11052     {
11053       if (cfun_fpr_save_p (i))
11054 	{
11055 	  insn = save_fpr (stack_pointer_rtx, offset, i);
11056 	  if (offset < last_probe_offset)
11057 	    last_probe_offset = offset;
11058 	  offset += 8;
11059 
11060 	  /* If f4 and f6 are call clobbered they are saved due to
11061 	     stdargs and therefore are not frame related.  */
11062 	  if (!call_really_used_regs[i])
11063 	    RTX_FRAME_RELATED_P (insn) = 1;
11064 	}
11065       else if (!TARGET_PACKED_STACK || call_really_used_regs[i])
11066 	offset += 8;
11067     }
11068 
11069   if (TARGET_PACKED_STACK
11070       && cfun_save_high_fprs_p
11071       && cfun_frame_layout.f8_offset + cfun_frame_layout.high_fprs * 8 > 0)
11072     {
11073       offset = (cfun_frame_layout.f8_offset
11074 		+ (cfun_frame_layout.high_fprs - 1) * 8);
11075 
11076       for (i = FPR15_REGNUM; i >= FPR8_REGNUM && offset >= 0; i--)
11077 	if (cfun_fpr_save_p (i))
11078 	  {
11079 	    insn = save_fpr (stack_pointer_rtx, offset, i);
11080 	    if (offset < last_probe_offset)
11081 	      last_probe_offset = offset;
11082 
11083 	    RTX_FRAME_RELATED_P (insn) = 1;
11084 	    offset -= 8;
11085 	  }
11086       if (offset >= cfun_frame_layout.f8_offset)
11087 	next_fpr = i;
11088     }
11089 
11090   if (!TARGET_PACKED_STACK)
11091     next_fpr = cfun_save_high_fprs_p ? FPR15_REGNUM : 0;
11092 
11093   if (flag_stack_usage_info)
11094     current_function_static_stack_size = cfun_frame_layout.frame_size;
11095 
11096   /* Decrement stack pointer.  */
11097 
11098   if (cfun_frame_layout.frame_size > 0)
11099     {
11100       rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
11101       rtx_insn *stack_pointer_backup_loc;
11102       bool temp_reg_clobbered_p;
11103 
11104       if (s390_stack_size)
11105 	{
11106 	  HOST_WIDE_INT stack_guard;
11107 
11108 	  if (s390_stack_guard)
11109 	    stack_guard = s390_stack_guard;
11110 	  else
11111 	    {
11112 	      /* If no value for stack guard is provided the smallest power of 2
11113 		 larger than the current frame size is chosen.  */
11114 	      stack_guard = 1;
11115 	      while (stack_guard < cfun_frame_layout.frame_size)
11116 		stack_guard <<= 1;
11117 	    }
11118 
11119 	  if (cfun_frame_layout.frame_size >= s390_stack_size)
11120 	    {
11121 	      warning (0, "frame size of function %qs is %wd"
11122 		       " bytes exceeding user provided stack limit of "
11123 		       "%d bytes.  "
11124 		       "An unconditional trap is added.",
11125 		       current_function_name(), cfun_frame_layout.frame_size,
11126 		       s390_stack_size);
11127 	      emit_insn (gen_trap ());
11128 	      emit_barrier ();
11129 	    }
11130 	  else
11131 	    {
11132 	      /* stack_guard has to be smaller than s390_stack_size.
11133 		 Otherwise we would emit an AND with zero which would
11134 		 not match the test under mask pattern.  */
11135 	      if (stack_guard >= s390_stack_size)
11136 		{
11137 		  warning (0, "frame size of function %qs is %wd"
11138 			   " bytes which is more than half the stack size. "
11139 			   "The dynamic check would not be reliable. "
11140 			   "No check emitted for this function.",
11141 			   current_function_name(),
11142 			   cfun_frame_layout.frame_size);
11143 		}
11144 	      else
11145 		{
11146 		  HOST_WIDE_INT stack_check_mask = ((s390_stack_size - 1)
11147 						    & ~(stack_guard - 1));
11148 
11149 		  rtx t = gen_rtx_AND (Pmode, stack_pointer_rtx,
11150 				       GEN_INT (stack_check_mask));
11151 		  if (TARGET_64BIT)
11152 		    emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode,
11153 							 t, const0_rtx),
11154 					     t, const0_rtx, const0_rtx));
11155 		  else
11156 		    emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode,
11157 							 t, const0_rtx),
11158 					     t, const0_rtx, const0_rtx));
11159 		}
11160 	    }
11161 	}
11162 
11163       if (s390_warn_framesize > 0
11164 	  && cfun_frame_layout.frame_size >= s390_warn_framesize)
11165 	warning (0, "frame size of %qs is %wd bytes",
11166 		 current_function_name (), cfun_frame_layout.frame_size);
11167 
11168       if (s390_warn_dynamicstack_p && cfun->calls_alloca)
11169 	warning (0, "%qs uses dynamic stack allocation", current_function_name ());
11170 
11171       /* Save the location where we could backup the incoming stack
11172 	 pointer.  */
11173       stack_pointer_backup_loc = get_last_insn ();
11174 
11175       temp_reg_clobbered_p = allocate_stack_space (frame_off, last_probe_offset,
11176 						   temp_reg);
11177 
11178       if (TARGET_BACKCHAIN || next_fpr)
11179 	{
11180 	  if (temp_reg_clobbered_p)
11181 	    {
11182 	      /* allocate_stack_space had to make use of temp_reg and
11183 		 we need it to hold a backup of the incoming stack
11184 		 pointer.  Calculate back that value from the current
11185 		 stack pointer.  */
11186 	      s390_prologue_plus_offset (temp_reg, stack_pointer_rtx,
11187 					 GEN_INT (cfun_frame_layout.frame_size),
11188 					 false);
11189 	    }
11190 	  else
11191 	    {
11192 	      /* allocate_stack_space didn't actually required
11193 		 temp_reg.  Insert the stack pointer backup insn
11194 		 before the stack pointer decrement code - knowing now
11195 		 that the value will survive.  */
11196 	      emit_insn_after (gen_move_insn (temp_reg, stack_pointer_rtx),
11197 			       stack_pointer_backup_loc);
11198 	    }
11199 	}
11200 
11201       /* Set backchain.  */
11202 
11203       if (TARGET_BACKCHAIN)
11204 	{
11205 	  if (cfun_frame_layout.backchain_offset)
11206 	    addr = gen_rtx_MEM (Pmode,
11207 				plus_constant (Pmode, stack_pointer_rtx,
11208 				  cfun_frame_layout.backchain_offset));
11209 	  else
11210 	    addr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
11211 	  set_mem_alias_set (addr, get_frame_alias_set ());
11212 	  insn = emit_insn (gen_move_insn (addr, temp_reg));
11213 	}
11214 
11215       /* If we support non-call exceptions (e.g. for Java),
11216 	 we need to make sure the backchain pointer is set up
11217 	 before any possibly trapping memory access.  */
11218       if (TARGET_BACKCHAIN && cfun->can_throw_non_call_exceptions)
11219 	{
11220 	  addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
11221 	  emit_clobber (addr);
11222 	}
11223     }
11224   else if (flag_stack_clash_protection)
11225     dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
11226 
11227   /* Save fprs 8 - 15 (64 bit ABI).  */
11228 
11229   if (cfun_save_high_fprs_p && next_fpr)
11230     {
11231       /* If the stack might be accessed through a different register
11232 	 we have to make sure that the stack pointer decrement is not
11233 	 moved below the use of the stack slots.  */
11234       s390_emit_stack_tie ();
11235 
11236       insn = emit_insn (gen_add2_insn (temp_reg,
11237 				       GEN_INT (cfun_frame_layout.f8_offset)));
11238 
11239       offset = 0;
11240 
11241       for (i = FPR8_REGNUM; i <= next_fpr; i++)
11242 	if (cfun_fpr_save_p (i))
11243 	  {
11244 	    rtx addr = plus_constant (Pmode, stack_pointer_rtx,
11245 				      cfun_frame_layout.frame_size
11246 				      + cfun_frame_layout.f8_offset
11247 				      + offset);
11248 
11249 	    insn = save_fpr (temp_reg, offset, i);
11250 	    offset += 8;
11251 	    RTX_FRAME_RELATED_P (insn) = 1;
11252 	    add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11253 			  gen_rtx_SET (gen_rtx_MEM (DFmode, addr),
11254 				       gen_rtx_REG (DFmode, i)));
11255 	  }
11256     }
11257 
11258   /* Set frame pointer, if needed.  */
11259 
11260   if (frame_pointer_needed)
11261     {
11262       insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11263       RTX_FRAME_RELATED_P (insn) = 1;
11264     }
11265 
11266   /* Set up got pointer, if needed.  */
11267 
11268   if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
11269     {
11270       rtx_insn *insns = s390_load_got ();
11271 
11272       for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn))
11273 	annotate_constant_pool_refs (insn);
11274 
11275       emit_insn (insns);
11276     }
11277 
11278   if (TARGET_TPF_PROFILING)
11279     {
11280       /* Generate a BAS instruction to serve as a function
11281 	 entry intercept to facilitate the use of tracing
11282 	 algorithms located at the branch target.  */
11283       emit_insn (gen_prologue_tpf ());
11284 
11285       /* Emit a blockage here so that all code
11286 	 lies between the profiling mechanisms.  */
11287       emit_insn (gen_blockage ());
11288     }
11289 }
11290 
11291 /* Expand the epilogue into a bunch of separate insns.  */
11292 
11293 void
s390_emit_epilogue(bool sibcall)11294 s390_emit_epilogue (bool sibcall)
11295 {
11296   rtx frame_pointer, return_reg = NULL_RTX, cfa_restores = NULL_RTX;
11297   int area_bottom, area_top, offset = 0;
11298   int next_offset;
11299   int i;
11300 
11301   if (TARGET_TPF_PROFILING)
11302     {
11303 
11304       /* Generate a BAS instruction to serve as a function
11305 	 entry intercept to facilitate the use of tracing
11306 	 algorithms located at the branch target.  */
11307 
11308       /* Emit a blockage here so that all code
11309 	 lies between the profiling mechanisms.  */
11310       emit_insn (gen_blockage ());
11311 
11312       emit_insn (gen_epilogue_tpf ());
11313     }
11314 
11315   /* Check whether to use frame or stack pointer for restore.  */
11316 
11317   frame_pointer = (frame_pointer_needed
11318 		   ? hard_frame_pointer_rtx : stack_pointer_rtx);
11319 
11320   s390_frame_area (&area_bottom, &area_top);
11321 
11322   /* Check whether we can access the register save area.
11323      If not, increment the frame pointer as required.  */
11324 
11325   if (area_top <= area_bottom)
11326     {
11327       /* Nothing to restore.  */
11328     }
11329   else if (DISP_IN_RANGE (cfun_frame_layout.frame_size + area_bottom)
11330 	   && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1))
11331     {
11332       /* Area is in range.  */
11333       offset = cfun_frame_layout.frame_size;
11334     }
11335   else
11336     {
11337       rtx_insn *insn;
11338       rtx frame_off, cfa;
11339 
11340       offset = area_bottom < 0 ? -area_bottom : 0;
11341       frame_off = GEN_INT (cfun_frame_layout.frame_size - offset);
11342 
11343       cfa = gen_rtx_SET (frame_pointer,
11344 			 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11345       if (DISP_IN_RANGE (INTVAL (frame_off)))
11346 	{
11347 	  rtx set;
11348 
11349 	  set = gen_rtx_SET (frame_pointer,
11350 			     gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11351 	  insn = emit_insn (set);
11352 	}
11353       else
11354 	{
11355 	  if (!CONST_OK_FOR_K (INTVAL (frame_off)))
11356 	    frame_off = force_const_mem (Pmode, frame_off);
11357 
11358 	  insn = emit_insn (gen_add2_insn (frame_pointer, frame_off));
11359 	  annotate_constant_pool_refs (insn);
11360 	}
11361       add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa);
11362       RTX_FRAME_RELATED_P (insn) = 1;
11363     }
11364 
11365   /* Restore call saved fprs.  */
11366 
11367   if (TARGET_64BIT)
11368     {
11369       if (cfun_save_high_fprs_p)
11370 	{
11371 	  next_offset = cfun_frame_layout.f8_offset;
11372 	  for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
11373 	    {
11374 	      if (cfun_fpr_save_p (i))
11375 		{
11376 		  restore_fpr (frame_pointer,
11377 			       offset + next_offset, i);
11378 		  cfa_restores
11379 		    = alloc_reg_note (REG_CFA_RESTORE,
11380 				      gen_rtx_REG (DFmode, i), cfa_restores);
11381 		  next_offset += 8;
11382 		}
11383 	    }
11384 	}
11385 
11386     }
11387   else
11388     {
11389       next_offset = cfun_frame_layout.f4_offset;
11390       /* f4, f6 */
11391       for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
11392 	{
11393 	  if (cfun_fpr_save_p (i))
11394 	    {
11395 	      restore_fpr (frame_pointer,
11396 			   offset + next_offset, i);
11397 	      cfa_restores
11398 		= alloc_reg_note (REG_CFA_RESTORE,
11399 				  gen_rtx_REG (DFmode, i), cfa_restores);
11400 	      next_offset += 8;
11401 	    }
11402 	  else if (!TARGET_PACKED_STACK)
11403 	    next_offset += 8;
11404 	}
11405 
11406     }
11407 
11408   /* Restore call saved gprs.  */
11409 
11410   if (cfun_frame_layout.first_restore_gpr != -1)
11411     {
11412       rtx insn, addr;
11413       int i;
11414 
11415       /* Check for global register and save them
11416 	 to stack location from where they get restored.  */
11417 
11418       for (i = cfun_frame_layout.first_restore_gpr;
11419 	   i <= cfun_frame_layout.last_restore_gpr;
11420 	   i++)
11421 	{
11422 	  if (global_not_special_regno_p (i))
11423 	    {
11424 	      addr = plus_constant (Pmode, frame_pointer,
11425 				    offset + cfun_frame_layout.gprs_offset
11426 				    + (i - cfun_frame_layout.first_save_gpr_slot)
11427 				    * UNITS_PER_LONG);
11428 	      addr = gen_rtx_MEM (Pmode, addr);
11429 	      set_mem_alias_set (addr, get_frame_alias_set ());
11430 	      emit_move_insn (addr, gen_rtx_REG (Pmode, i));
11431 	    }
11432 	  else
11433 	    cfa_restores
11434 	      = alloc_reg_note (REG_CFA_RESTORE,
11435 				gen_rtx_REG (Pmode, i), cfa_restores);
11436 	}
11437 
11438       /* Fetch return address from stack before load multiple,
11439 	 this will do good for scheduling.
11440 
11441 	 Only do this if we already decided that r14 needs to be
11442 	 saved to a stack slot. (And not just because r14 happens to
11443 	 be in between two GPRs which need saving.)  Otherwise it
11444 	 would be difficult to take that decision back in
11445 	 s390_optimize_prologue.
11446 
11447 	 This optimization is only helpful on in-order machines.  */
11448       if (! sibcall
11449 	  && cfun_gpr_save_slot (RETURN_REGNUM) == SAVE_SLOT_STACK
11450 	  && s390_tune <= PROCESSOR_2097_Z10)
11451 	{
11452 	  int return_regnum = find_unused_clobbered_reg();
11453 	  if (!return_regnum
11454 	      || (TARGET_INDIRECT_BRANCH_NOBP_RET_OPTION
11455 		  && !TARGET_CPU_Z10
11456 		  && return_regnum == INDIRECT_BRANCH_THUNK_REGNUM))
11457 	    {
11458 	      gcc_assert (INDIRECT_BRANCH_THUNK_REGNUM != 4);
11459 	      return_regnum = 4;
11460 	    }
11461 	  return_reg = gen_rtx_REG (Pmode, return_regnum);
11462 
11463 	  addr = plus_constant (Pmode, frame_pointer,
11464 				offset + cfun_frame_layout.gprs_offset
11465 				+ (RETURN_REGNUM
11466 				   - cfun_frame_layout.first_save_gpr_slot)
11467 				* UNITS_PER_LONG);
11468 	  addr = gen_rtx_MEM (Pmode, addr);
11469 	  set_mem_alias_set (addr, get_frame_alias_set ());
11470 	  emit_move_insn (return_reg, addr);
11471 
11472 	  /* Once we did that optimization we have to make sure
11473 	     s390_optimize_prologue does not try to remove the store
11474 	     of r14 since we will not be able to find the load issued
11475 	     here.  */
11476 	  cfun_frame_layout.save_return_addr_p = true;
11477 	}
11478 
11479       insn = restore_gprs (frame_pointer,
11480 			   offset + cfun_frame_layout.gprs_offset
11481 			   + (cfun_frame_layout.first_restore_gpr
11482 			      - cfun_frame_layout.first_save_gpr_slot)
11483 			   * UNITS_PER_LONG,
11484 			   cfun_frame_layout.first_restore_gpr,
11485 			   cfun_frame_layout.last_restore_gpr);
11486       insn = emit_insn (insn);
11487       REG_NOTES (insn) = cfa_restores;
11488       add_reg_note (insn, REG_CFA_DEF_CFA,
11489 		    plus_constant (Pmode, stack_pointer_rtx,
11490 				   STACK_POINTER_OFFSET));
11491       RTX_FRAME_RELATED_P (insn) = 1;
11492     }
11493 
11494   s390_restore_gprs_from_fprs ();
11495 
11496   if (! sibcall)
11497     {
11498       if (!return_reg && !s390_can_use_return_insn ())
11499         /* We planned to emit (return), be we are not allowed to.  */
11500         return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
11501 
11502       if (return_reg)
11503         /* Emit (return) and (use).  */
11504         emit_jump_insn (gen_return_use (return_reg));
11505       else
11506         /* The fact that RETURN_REGNUM is used is already reflected by
11507            EPILOGUE_USES.  Emit plain (return).  */
11508         emit_jump_insn (gen_return ());
11509     }
11510 }
11511 
11512 /* Implement TARGET_SET_UP_BY_PROLOGUE.  */
11513 
11514 static void
s300_set_up_by_prologue(hard_reg_set_container * regs)11515 s300_set_up_by_prologue (hard_reg_set_container *regs)
11516 {
11517   if (cfun->machine->base_reg
11518       && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
11519     SET_HARD_REG_BIT (regs->set, REGNO (cfun->machine->base_reg));
11520 }
11521 
11522 /* -fsplit-stack support.  */
11523 
11524 /* A SYMBOL_REF for __morestack.  */
11525 static GTY(()) rtx morestack_ref;
11526 
11527 /* When using -fsplit-stack, the allocation routines set a field in
11528    the TCB to the bottom of the stack plus this much space, measured
11529    in bytes.  */
11530 
11531 #define SPLIT_STACK_AVAILABLE 1024
11532 
11533 /* Emit -fsplit-stack prologue, which goes before the regular function
11534    prologue.  */
11535 
11536 void
s390_expand_split_stack_prologue(void)11537 s390_expand_split_stack_prologue (void)
11538 {
11539   rtx r1, guard, cc = NULL;
11540   rtx_insn *insn;
11541   /* Offset from thread pointer to __private_ss.  */
11542   int psso = TARGET_64BIT ? 0x38 : 0x20;
11543   /* Pointer size in bytes.  */
11544   /* Frame size and argument size - the two parameters to __morestack.  */
11545   HOST_WIDE_INT frame_size = cfun_frame_layout.frame_size;
11546   /* Align argument size to 8 bytes - simplifies __morestack code.  */
11547   HOST_WIDE_INT args_size = crtl->args.size >= 0
11548 			    ? ((crtl->args.size + 7) & ~7)
11549 			    : 0;
11550   /* Label to be called by __morestack.  */
11551   rtx_code_label *call_done = NULL;
11552   rtx_code_label *parm_base = NULL;
11553   rtx tmp;
11554 
11555   gcc_assert (flag_split_stack && reload_completed);
11556 
11557   r1 = gen_rtx_REG (Pmode, 1);
11558 
11559   /* If no stack frame will be allocated, don't do anything.  */
11560   if (!frame_size)
11561     {
11562       if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11563 	{
11564 	  /* If va_start is used, just use r15.  */
11565 	  emit_move_insn (r1,
11566 			 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11567 				       GEN_INT (STACK_POINTER_OFFSET)));
11568 
11569 	}
11570       return;
11571     }
11572 
11573   if (morestack_ref == NULL_RTX)
11574     {
11575       morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11576       SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
11577 					   | SYMBOL_FLAG_FUNCTION);
11578     }
11579 
11580   if (CONST_OK_FOR_K (frame_size) || CONST_OK_FOR_Op (frame_size))
11581     {
11582       /* If frame_size will fit in an add instruction, do a stack space
11583 	 check, and only call __morestack if there's not enough space.  */
11584 
11585       /* Get thread pointer.  r1 is the only register we can always destroy - r0
11586 	 could contain a static chain (and cannot be used to address memory
11587 	 anyway), r2-r6 can contain parameters, and r6-r15 are callee-saved.  */
11588       emit_move_insn (r1, gen_rtx_REG (Pmode, TP_REGNUM));
11589       /* Aim at __private_ss.  */
11590       guard = gen_rtx_MEM (Pmode, plus_constant (Pmode, r1, psso));
11591 
11592       /* If less that 1kiB used, skip addition and compare directly with
11593 	 __private_ss.  */
11594       if (frame_size > SPLIT_STACK_AVAILABLE)
11595 	{
11596 	  emit_move_insn (r1, guard);
11597 	  if (TARGET_64BIT)
11598 	    emit_insn (gen_adddi3 (r1, r1, GEN_INT (frame_size)));
11599 	  else
11600 	    emit_insn (gen_addsi3 (r1, r1, GEN_INT (frame_size)));
11601 	  guard = r1;
11602 	}
11603 
11604       /* Compare the (maybe adjusted) guard with the stack pointer.  */
11605       cc = s390_emit_compare (LT, stack_pointer_rtx, guard);
11606     }
11607 
11608   call_done = gen_label_rtx ();
11609   parm_base = gen_label_rtx ();
11610 
11611   /* Emit the parameter block.  */
11612   tmp = gen_split_stack_data (parm_base, call_done,
11613 			      GEN_INT (frame_size),
11614 			      GEN_INT (args_size));
11615   insn = emit_insn (tmp);
11616   add_reg_note (insn, REG_LABEL_OPERAND, call_done);
11617   LABEL_NUSES (call_done)++;
11618   add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
11619   LABEL_NUSES (parm_base)++;
11620 
11621   /* %r1 = litbase.  */
11622   insn = emit_move_insn (r1, gen_rtx_LABEL_REF (VOIDmode, parm_base));
11623   add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
11624   LABEL_NUSES (parm_base)++;
11625 
11626   /* Now, we need to call __morestack.  It has very special calling
11627      conventions: it preserves param/return/static chain registers for
11628      calling main function body, and looks for its own parameters at %r1. */
11629 
11630   if (cc != NULL)
11631     {
11632       tmp = gen_split_stack_cond_call (morestack_ref, cc, call_done);
11633 
11634       insn = emit_jump_insn (tmp);
11635       JUMP_LABEL (insn) = call_done;
11636       LABEL_NUSES (call_done)++;
11637 
11638       /* Mark the jump as very unlikely to be taken.  */
11639       add_reg_br_prob_note (insn,
11640 			    profile_probability::very_unlikely ());
11641 
11642       if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11643 	{
11644 	  /* If va_start is used, and __morestack was not called, just use
11645 	     r15.  */
11646 	  emit_move_insn (r1,
11647 			 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11648 				       GEN_INT (STACK_POINTER_OFFSET)));
11649 	}
11650     }
11651   else
11652     {
11653       tmp = gen_split_stack_call (morestack_ref, call_done);
11654       insn = emit_jump_insn (tmp);
11655       JUMP_LABEL (insn) = call_done;
11656       LABEL_NUSES (call_done)++;
11657       emit_barrier ();
11658     }
11659 
11660   /* __morestack will call us here.  */
11661 
11662   emit_label (call_done);
11663 }
11664 
11665 /* We may have to tell the dataflow pass that the split stack prologue
11666    is initializing a register.  */
11667 
11668 static void
s390_live_on_entry(bitmap regs)11669 s390_live_on_entry (bitmap regs)
11670 {
11671   if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11672     {
11673       gcc_assert (flag_split_stack);
11674       bitmap_set_bit (regs, 1);
11675     }
11676 }
11677 
11678 /* Return true if the function can use simple_return to return outside
11679    of a shrink-wrapped region.  At present shrink-wrapping is supported
11680    in all cases.  */
11681 
11682 bool
s390_can_use_simple_return_insn(void)11683 s390_can_use_simple_return_insn (void)
11684 {
11685   return true;
11686 }
11687 
11688 /* Return true if the epilogue is guaranteed to contain only a return
11689    instruction and if a direct return can therefore be used instead.
11690    One of the main advantages of using direct return instructions
11691    is that we can then use conditional returns.  */
11692 
11693 bool
s390_can_use_return_insn(void)11694 s390_can_use_return_insn (void)
11695 {
11696   int i;
11697 
11698   if (!reload_completed)
11699     return false;
11700 
11701   if (crtl->profile)
11702     return false;
11703 
11704   if (TARGET_TPF_PROFILING)
11705     return false;
11706 
11707   for (i = 0; i < 16; i++)
11708     if (cfun_gpr_save_slot (i) != SAVE_SLOT_NONE)
11709       return false;
11710 
11711   /* For 31 bit this is not covered by the frame_size check below
11712      since f4, f6 are saved in the register save area without needing
11713      additional stack space.  */
11714   if (!TARGET_64BIT
11715       && (cfun_fpr_save_p (FPR4_REGNUM) || cfun_fpr_save_p (FPR6_REGNUM)))
11716     return false;
11717 
11718   if (cfun->machine->base_reg
11719       && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
11720     return false;
11721 
11722   return cfun_frame_layout.frame_size == 0;
11723 }
11724 
11725 /* The VX ABI differs for vararg functions.  Therefore we need the
11726    prototype of the callee to be available when passing vector type
11727    values.  */
11728 static const char *
s390_invalid_arg_for_unprototyped_fn(const_tree typelist,const_tree funcdecl,const_tree val)11729 s390_invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
11730 {
11731   return ((TARGET_VX_ABI
11732 	   && typelist == 0
11733 	   && VECTOR_TYPE_P (TREE_TYPE (val))
11734 	   && (funcdecl == NULL_TREE
11735 	       || (TREE_CODE (funcdecl) == FUNCTION_DECL
11736 		   && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
11737 	  ? N_("vector argument passed to unprototyped function")
11738 	  : NULL);
11739 }
11740 
11741 
11742 /* Return the size in bytes of a function argument of
11743    type TYPE and/or mode MODE.  At least one of TYPE or
11744    MODE must be specified.  */
11745 
11746 static int
s390_function_arg_size(machine_mode mode,const_tree type)11747 s390_function_arg_size (machine_mode mode, const_tree type)
11748 {
11749   if (type)
11750     return int_size_in_bytes (type);
11751 
11752   /* No type info available for some library calls ...  */
11753   if (mode != BLKmode)
11754     return GET_MODE_SIZE (mode);
11755 
11756   /* If we have neither type nor mode, abort */
11757   gcc_unreachable ();
11758 }
11759 
11760 /* Return true if a function argument of type TYPE and mode MODE
11761    is to be passed in a vector register, if available.  */
11762 
11763 bool
s390_function_arg_vector(machine_mode mode,const_tree type)11764 s390_function_arg_vector (machine_mode mode, const_tree type)
11765 {
11766   if (!TARGET_VX_ABI)
11767     return false;
11768 
11769   if (s390_function_arg_size (mode, type) > 16)
11770     return false;
11771 
11772   /* No type info available for some library calls ...  */
11773   if (!type)
11774     return VECTOR_MODE_P (mode);
11775 
11776   /* The ABI says that record types with a single member are treated
11777      just like that member would be.  */
11778   while (TREE_CODE (type) == RECORD_TYPE)
11779     {
11780       tree field, single = NULL_TREE;
11781 
11782       for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
11783 	{
11784 	  if (TREE_CODE (field) != FIELD_DECL)
11785 	    continue;
11786 
11787 	  if (single == NULL_TREE)
11788 	    single = TREE_TYPE (field);
11789 	  else
11790 	    return false;
11791 	}
11792 
11793       if (single == NULL_TREE)
11794 	return false;
11795       else
11796 	{
11797 	  /* If the field declaration adds extra byte due to
11798 	     e.g. padding this is not accepted as vector type.  */
11799 	  if (int_size_in_bytes (single) <= 0
11800 	      || int_size_in_bytes (single) != int_size_in_bytes (type))
11801 	    return false;
11802 	  type = single;
11803 	}
11804     }
11805 
11806   return VECTOR_TYPE_P (type);
11807 }
11808 
11809 /* Return true if a function argument of type TYPE and mode MODE
11810    is to be passed in a floating-point register, if available.  */
11811 
11812 static bool
s390_function_arg_float(machine_mode mode,const_tree type)11813 s390_function_arg_float (machine_mode mode, const_tree type)
11814 {
11815   if (s390_function_arg_size (mode, type) > 8)
11816     return false;
11817 
11818   /* Soft-float changes the ABI: no floating-point registers are used.  */
11819   if (TARGET_SOFT_FLOAT)
11820     return false;
11821 
11822   /* No type info available for some library calls ...  */
11823   if (!type)
11824     return mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode;
11825 
11826   /* The ABI says that record types with a single member are treated
11827      just like that member would be.  */
11828   while (TREE_CODE (type) == RECORD_TYPE)
11829     {
11830       tree field, single = NULL_TREE;
11831 
11832       for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
11833 	{
11834 	  if (TREE_CODE (field) != FIELD_DECL)
11835 	    continue;
11836 
11837 	  if (single == NULL_TREE)
11838 	    single = TREE_TYPE (field);
11839 	  else
11840 	    return false;
11841 	}
11842 
11843       if (single == NULL_TREE)
11844 	return false;
11845       else
11846 	type = single;
11847     }
11848 
11849   return TREE_CODE (type) == REAL_TYPE;
11850 }
11851 
11852 /* Return true if a function argument of type TYPE and mode MODE
11853    is to be passed in an integer register, or a pair of integer
11854    registers, if available.  */
11855 
11856 static bool
s390_function_arg_integer(machine_mode mode,const_tree type)11857 s390_function_arg_integer (machine_mode mode, const_tree type)
11858 {
11859   int size = s390_function_arg_size (mode, type);
11860   if (size > 8)
11861     return false;
11862 
11863   /* No type info available for some library calls ...  */
11864   if (!type)
11865     return GET_MODE_CLASS (mode) == MODE_INT
11866 	   || (TARGET_SOFT_FLOAT &&  SCALAR_FLOAT_MODE_P (mode));
11867 
11868   /* We accept small integral (and similar) types.  */
11869   if (INTEGRAL_TYPE_P (type)
11870       || POINTER_TYPE_P (type)
11871       || TREE_CODE (type) == NULLPTR_TYPE
11872       || TREE_CODE (type) == OFFSET_TYPE
11873       || (TARGET_SOFT_FLOAT && TREE_CODE (type) == REAL_TYPE))
11874     return true;
11875 
11876   /* We also accept structs of size 1, 2, 4, 8 that are not
11877      passed in floating-point registers.  */
11878   if (AGGREGATE_TYPE_P (type)
11879       && exact_log2 (size) >= 0
11880       && !s390_function_arg_float (mode, type))
11881     return true;
11882 
11883   return false;
11884 }
11885 
11886 /* Return 1 if a function argument of type TYPE and mode MODE
11887    is to be passed by reference.  The ABI specifies that only
11888    structures of size 1, 2, 4, or 8 bytes are passed by value,
11889    all other structures (and complex numbers) are passed by
11890    reference.  */
11891 
11892 static bool
s390_pass_by_reference(cumulative_args_t ca ATTRIBUTE_UNUSED,machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)11893 s390_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
11894 			machine_mode mode, const_tree type,
11895 			bool named ATTRIBUTE_UNUSED)
11896 {
11897   int size = s390_function_arg_size (mode, type);
11898 
11899   if (s390_function_arg_vector (mode, type))
11900     return false;
11901 
11902   if (size > 8)
11903     return true;
11904 
11905   if (type)
11906     {
11907       if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0)
11908 	return true;
11909 
11910       if (TREE_CODE (type) == COMPLEX_TYPE
11911 	  || TREE_CODE (type) == VECTOR_TYPE)
11912 	return true;
11913     }
11914 
11915   return false;
11916 }
11917 
11918 /* Update the data in CUM to advance over an argument of mode MODE and
11919    data type TYPE.  (TYPE is null for libcalls where that information
11920    may not be available.).  The boolean NAMED specifies whether the
11921    argument is a named argument (as opposed to an unnamed argument
11922    matching an ellipsis).  */
11923 
11924 static void
s390_function_arg_advance(cumulative_args_t cum_v,machine_mode mode,const_tree type,bool named)11925 s390_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
11926 			   const_tree type, bool named)
11927 {
11928   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11929 
11930   if (s390_function_arg_vector (mode, type))
11931     {
11932       /* We are called for unnamed vector stdarg arguments which are
11933 	 passed on the stack.  In this case this hook does not have to
11934 	 do anything since stack arguments are tracked by common
11935 	 code.  */
11936       if (!named)
11937 	return;
11938       cum->vrs += 1;
11939     }
11940   else if (s390_function_arg_float (mode, type))
11941     {
11942       cum->fprs += 1;
11943     }
11944   else if (s390_function_arg_integer (mode, type))
11945     {
11946       int size = s390_function_arg_size (mode, type);
11947       cum->gprs += ((size + UNITS_PER_LONG - 1) / UNITS_PER_LONG);
11948     }
11949   else
11950     gcc_unreachable ();
11951 }
11952 
11953 /* Define where to put the arguments to a function.
11954    Value is zero to push the argument on the stack,
11955    or a hard register in which to store the argument.
11956 
11957    MODE is the argument's machine mode.
11958    TYPE is the data type of the argument (as a tree).
11959     This is null for libcalls where that information may
11960     not be available.
11961    CUM is a variable of type CUMULATIVE_ARGS which gives info about
11962     the preceding args and about the function being called.
11963    NAMED is nonzero if this argument is a named parameter
11964     (otherwise it is an extra parameter matching an ellipsis).
11965 
11966    On S/390, we use general purpose registers 2 through 6 to
11967    pass integer, pointer, and certain structure arguments, and
11968    floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
11969    to pass floating point arguments.  All remaining arguments
11970    are pushed to the stack.  */
11971 
11972 static rtx
s390_function_arg(cumulative_args_t cum_v,machine_mode mode,const_tree type,bool named)11973 s390_function_arg (cumulative_args_t cum_v, machine_mode mode,
11974 		   const_tree type, bool named)
11975 {
11976   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11977 
11978   if (!named)
11979     s390_check_type_for_vector_abi (type, true, false);
11980 
11981   if (s390_function_arg_vector (mode, type))
11982     {
11983       /* Vector arguments being part of the ellipsis are passed on the
11984 	 stack.  */
11985       if (!named || (cum->vrs + 1 > VEC_ARG_NUM_REG))
11986 	return NULL_RTX;
11987 
11988       return gen_rtx_REG (mode, cum->vrs + FIRST_VEC_ARG_REGNO);
11989     }
11990   else if (s390_function_arg_float (mode, type))
11991     {
11992       if (cum->fprs + 1 > FP_ARG_NUM_REG)
11993 	return NULL_RTX;
11994       else
11995 	return gen_rtx_REG (mode, cum->fprs + 16);
11996     }
11997   else if (s390_function_arg_integer (mode, type))
11998     {
11999       int size = s390_function_arg_size (mode, type);
12000       int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
12001 
12002       if (cum->gprs + n_gprs > GP_ARG_NUM_REG)
12003 	return NULL_RTX;
12004       else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG)
12005 	return gen_rtx_REG (mode, cum->gprs + 2);
12006       else if (n_gprs == 2)
12007 	{
12008 	  rtvec p = rtvec_alloc (2);
12009 
12010 	  RTVEC_ELT (p, 0)
12011 	    = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 2),
12012 					 const0_rtx);
12013 	  RTVEC_ELT (p, 1)
12014 	    = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 3),
12015 					 GEN_INT (4));
12016 
12017 	  return gen_rtx_PARALLEL (mode, p);
12018 	}
12019     }
12020 
12021   /* After the real arguments, expand_call calls us once again
12022      with a void_type_node type.  Whatever we return here is
12023      passed as operand 2 to the call expanders.
12024 
12025      We don't need this feature ...  */
12026   else if (type == void_type_node)
12027     return const0_rtx;
12028 
12029   gcc_unreachable ();
12030 }
12031 
12032 /* Implement TARGET_FUNCTION_ARG_BOUNDARY.  Vector arguments are
12033    left-justified when placed on the stack during parameter passing.  */
12034 
12035 static pad_direction
s390_function_arg_padding(machine_mode mode,const_tree type)12036 s390_function_arg_padding (machine_mode mode, const_tree type)
12037 {
12038   if (s390_function_arg_vector (mode, type))
12039     return PAD_UPWARD;
12040 
12041   return default_function_arg_padding (mode, type);
12042 }
12043 
12044 /* Return true if return values of type TYPE should be returned
12045    in a memory buffer whose address is passed by the caller as
12046    hidden first argument.  */
12047 
12048 static bool
s390_return_in_memory(const_tree type,const_tree fundecl ATTRIBUTE_UNUSED)12049 s390_return_in_memory (const_tree type, const_tree fundecl ATTRIBUTE_UNUSED)
12050 {
12051   /* We accept small integral (and similar) types.  */
12052   if (INTEGRAL_TYPE_P (type)
12053       || POINTER_TYPE_P (type)
12054       || TREE_CODE (type) == OFFSET_TYPE
12055       || TREE_CODE (type) == REAL_TYPE)
12056     return int_size_in_bytes (type) > 8;
12057 
12058   /* vector types which fit into a VR.  */
12059   if (TARGET_VX_ABI
12060       && VECTOR_TYPE_P (type)
12061       && int_size_in_bytes (type) <= 16)
12062     return false;
12063 
12064   /* Aggregates and similar constructs are always returned
12065      in memory.  */
12066   if (AGGREGATE_TYPE_P (type)
12067       || TREE_CODE (type) == COMPLEX_TYPE
12068       || VECTOR_TYPE_P (type))
12069     return true;
12070 
12071   /* ??? We get called on all sorts of random stuff from
12072      aggregate_value_p.  We can't abort, but it's not clear
12073      what's safe to return.  Pretend it's a struct I guess.  */
12074   return true;
12075 }
12076 
12077 /* Function arguments and return values are promoted to word size.  */
12078 
12079 static machine_mode
s390_promote_function_mode(const_tree type,machine_mode mode,int * punsignedp,const_tree fntype ATTRIBUTE_UNUSED,int for_return ATTRIBUTE_UNUSED)12080 s390_promote_function_mode (const_tree type, machine_mode mode,
12081 			    int *punsignedp,
12082 			    const_tree fntype ATTRIBUTE_UNUSED,
12083 			    int for_return ATTRIBUTE_UNUSED)
12084 {
12085   if (INTEGRAL_MODE_P (mode)
12086       && GET_MODE_SIZE (mode) < UNITS_PER_LONG)
12087     {
12088       if (type != NULL_TREE && POINTER_TYPE_P (type))
12089 	*punsignedp = POINTERS_EXTEND_UNSIGNED;
12090       return Pmode;
12091     }
12092 
12093   return mode;
12094 }
12095 
12096 /* Define where to return a (scalar) value of type RET_TYPE.
12097    If RET_TYPE is null, define where to return a (scalar)
12098    value of mode MODE from a libcall.  */
12099 
12100 static rtx
s390_function_and_libcall_value(machine_mode mode,const_tree ret_type,const_tree fntype_or_decl,bool outgoing ATTRIBUTE_UNUSED)12101 s390_function_and_libcall_value (machine_mode mode,
12102 				 const_tree ret_type,
12103 				 const_tree fntype_or_decl,
12104 				 bool outgoing ATTRIBUTE_UNUSED)
12105 {
12106   /* For vector return types it is important to use the RET_TYPE
12107      argument whenever available since the middle-end might have
12108      changed the mode to a scalar mode.  */
12109   bool vector_ret_type_p = ((ret_type && VECTOR_TYPE_P (ret_type))
12110 			    || (!ret_type && VECTOR_MODE_P (mode)));
12111 
12112   /* For normal functions perform the promotion as
12113      promote_function_mode would do.  */
12114   if (ret_type)
12115     {
12116       int unsignedp = TYPE_UNSIGNED (ret_type);
12117       mode = promote_function_mode (ret_type, mode, &unsignedp,
12118 				    fntype_or_decl, 1);
12119     }
12120 
12121   gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
12122 	      || SCALAR_FLOAT_MODE_P (mode)
12123 	      || (TARGET_VX_ABI && vector_ret_type_p));
12124   gcc_assert (GET_MODE_SIZE (mode) <= (TARGET_VX_ABI ? 16 : 8));
12125 
12126   if (TARGET_VX_ABI && vector_ret_type_p)
12127     return gen_rtx_REG (mode, FIRST_VEC_ARG_REGNO);
12128   else if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
12129     return gen_rtx_REG (mode, 16);
12130   else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG
12131 	   || UNITS_PER_LONG == UNITS_PER_WORD)
12132     return gen_rtx_REG (mode, 2);
12133   else if (GET_MODE_SIZE (mode) == 2 * UNITS_PER_LONG)
12134     {
12135       /* This case is triggered when returning a 64 bit value with
12136 	 -m31 -mzarch.  Although the value would fit into a single
12137 	 register it has to be forced into a 32 bit register pair in
12138 	 order to match the ABI.  */
12139       rtvec p = rtvec_alloc (2);
12140 
12141       RTVEC_ELT (p, 0)
12142 	= gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 2), const0_rtx);
12143       RTVEC_ELT (p, 1)
12144 	= gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 3), GEN_INT (4));
12145 
12146       return gen_rtx_PARALLEL (mode, p);
12147     }
12148 
12149   gcc_unreachable ();
12150 }
12151 
12152 /* Define where to return a scalar return value of type RET_TYPE.  */
12153 
12154 static rtx
s390_function_value(const_tree ret_type,const_tree fn_decl_or_type,bool outgoing)12155 s390_function_value (const_tree ret_type, const_tree fn_decl_or_type,
12156 		     bool outgoing)
12157 {
12158   return s390_function_and_libcall_value (TYPE_MODE (ret_type), ret_type,
12159 					  fn_decl_or_type, outgoing);
12160 }
12161 
12162 /* Define where to return a scalar libcall return value of mode
12163    MODE.  */
12164 
12165 static rtx
s390_libcall_value(machine_mode mode,const_rtx fun ATTRIBUTE_UNUSED)12166 s390_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
12167 {
12168   return s390_function_and_libcall_value (mode, NULL_TREE,
12169 					  NULL_TREE, true);
12170 }
12171 
12172 
12173 /* Create and return the va_list datatype.
12174 
12175    On S/390, va_list is an array type equivalent to
12176 
12177       typedef struct __va_list_tag
12178 	{
12179 	    long __gpr;
12180 	    long __fpr;
12181 	    void *__overflow_arg_area;
12182 	    void *__reg_save_area;
12183 	} va_list[1];
12184 
12185    where __gpr and __fpr hold the number of general purpose
12186    or floating point arguments used up to now, respectively,
12187    __overflow_arg_area points to the stack location of the
12188    next argument passed on the stack, and __reg_save_area
12189    always points to the start of the register area in the
12190    call frame of the current function.  The function prologue
12191    saves all registers used for argument passing into this
12192    area if the function uses variable arguments.  */
12193 
12194 static tree
s390_build_builtin_va_list(void)12195 s390_build_builtin_va_list (void)
12196 {
12197   tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
12198 
12199   record = lang_hooks.types.make_type (RECORD_TYPE);
12200 
12201   type_decl =
12202     build_decl (BUILTINS_LOCATION,
12203 		TYPE_DECL, get_identifier ("__va_list_tag"), record);
12204 
12205   f_gpr = build_decl (BUILTINS_LOCATION,
12206 		      FIELD_DECL, get_identifier ("__gpr"),
12207 		      long_integer_type_node);
12208   f_fpr = build_decl (BUILTINS_LOCATION,
12209 		      FIELD_DECL, get_identifier ("__fpr"),
12210 		      long_integer_type_node);
12211   f_ovf = build_decl (BUILTINS_LOCATION,
12212 		      FIELD_DECL, get_identifier ("__overflow_arg_area"),
12213 		      ptr_type_node);
12214   f_sav = build_decl (BUILTINS_LOCATION,
12215 		      FIELD_DECL, get_identifier ("__reg_save_area"),
12216 		      ptr_type_node);
12217 
12218   va_list_gpr_counter_field = f_gpr;
12219   va_list_fpr_counter_field = f_fpr;
12220 
12221   DECL_FIELD_CONTEXT (f_gpr) = record;
12222   DECL_FIELD_CONTEXT (f_fpr) = record;
12223   DECL_FIELD_CONTEXT (f_ovf) = record;
12224   DECL_FIELD_CONTEXT (f_sav) = record;
12225 
12226   TYPE_STUB_DECL (record) = type_decl;
12227   TYPE_NAME (record) = type_decl;
12228   TYPE_FIELDS (record) = f_gpr;
12229   DECL_CHAIN (f_gpr) = f_fpr;
12230   DECL_CHAIN (f_fpr) = f_ovf;
12231   DECL_CHAIN (f_ovf) = f_sav;
12232 
12233   layout_type (record);
12234 
12235   /* The correct type is an array type of one element.  */
12236   return build_array_type (record, build_index_type (size_zero_node));
12237 }
12238 
12239 /* Implement va_start by filling the va_list structure VALIST.
12240    STDARG_P is always true, and ignored.
12241    NEXTARG points to the first anonymous stack argument.
12242 
12243    The following global variables are used to initialize
12244    the va_list structure:
12245 
12246      crtl->args.info:
12247        holds number of gprs and fprs used for named arguments.
12248      crtl->args.arg_offset_rtx:
12249        holds the offset of the first anonymous stack argument
12250        (relative to the virtual arg pointer).  */
12251 
12252 static void
s390_va_start(tree valist,rtx nextarg ATTRIBUTE_UNUSED)12253 s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
12254 {
12255   HOST_WIDE_INT n_gpr, n_fpr;
12256   int off;
12257   tree f_gpr, f_fpr, f_ovf, f_sav;
12258   tree gpr, fpr, ovf, sav, t;
12259 
12260   f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12261   f_fpr = DECL_CHAIN (f_gpr);
12262   f_ovf = DECL_CHAIN (f_fpr);
12263   f_sav = DECL_CHAIN (f_ovf);
12264 
12265   valist = build_simple_mem_ref (valist);
12266   gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12267   fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
12268   ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
12269   sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
12270 
12271   /* Count number of gp and fp argument registers used.  */
12272 
12273   n_gpr = crtl->args.info.gprs;
12274   n_fpr = crtl->args.info.fprs;
12275 
12276   if (cfun->va_list_gpr_size)
12277     {
12278       t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
12279 		  build_int_cst (NULL_TREE, n_gpr));
12280       TREE_SIDE_EFFECTS (t) = 1;
12281       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12282     }
12283 
12284   if (cfun->va_list_fpr_size)
12285     {
12286       t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
12287 		  build_int_cst (NULL_TREE, n_fpr));
12288       TREE_SIDE_EFFECTS (t) = 1;
12289       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12290     }
12291 
12292   if (flag_split_stack
12293      && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
12294 	 == NULL)
12295      && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
12296     {
12297       rtx reg;
12298       rtx_insn *seq;
12299 
12300       reg = gen_reg_rtx (Pmode);
12301       cfun->machine->split_stack_varargs_pointer = reg;
12302 
12303       start_sequence ();
12304       emit_move_insn (reg, gen_rtx_REG (Pmode, 1));
12305       seq = get_insns ();
12306       end_sequence ();
12307 
12308       push_topmost_sequence ();
12309       emit_insn_after (seq, entry_of_function ());
12310       pop_topmost_sequence ();
12311     }
12312 
12313   /* Find the overflow area.
12314      FIXME: This currently is too pessimistic when the vector ABI is
12315      enabled.  In that case we *always* set up the overflow area
12316      pointer.  */
12317   if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG
12318       || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG
12319       || TARGET_VX_ABI)
12320     {
12321       if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
12322 	t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
12323       else
12324 	t = make_tree (TREE_TYPE (ovf), cfun->machine->split_stack_varargs_pointer);
12325 
12326       off = INTVAL (crtl->args.arg_offset_rtx);
12327       off = off < 0 ? 0 : off;
12328       if (TARGET_DEBUG_ARG)
12329 	fprintf (stderr, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
12330 		 (int)n_gpr, (int)n_fpr, off);
12331 
12332       t = fold_build_pointer_plus_hwi (t, off);
12333 
12334       t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
12335       TREE_SIDE_EFFECTS (t) = 1;
12336       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12337     }
12338 
12339   /* Find the register save area.  */
12340   if ((cfun->va_list_gpr_size && n_gpr < GP_ARG_NUM_REG)
12341       || (cfun->va_list_fpr_size && n_fpr < FP_ARG_NUM_REG))
12342     {
12343       t = make_tree (TREE_TYPE (sav), return_address_pointer_rtx);
12344       t = fold_build_pointer_plus_hwi (t, -RETURN_REGNUM * UNITS_PER_LONG);
12345 
12346       t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
12347       TREE_SIDE_EFFECTS (t) = 1;
12348       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12349     }
12350 }
12351 
12352 /* Implement va_arg by updating the va_list structure
12353    VALIST as required to retrieve an argument of type
12354    TYPE, and returning that argument.
12355 
12356    Generates code equivalent to:
12357 
12358    if (integral value) {
12359      if (size  <= 4 && args.gpr < 5 ||
12360 	 size  > 4 && args.gpr < 4 )
12361        ret = args.reg_save_area[args.gpr+8]
12362      else
12363        ret = *args.overflow_arg_area++;
12364    } else if (vector value) {
12365        ret = *args.overflow_arg_area;
12366        args.overflow_arg_area += size / 8;
12367    } else if (float value) {
12368      if (args.fgpr < 2)
12369        ret = args.reg_save_area[args.fpr+64]
12370      else
12371        ret = *args.overflow_arg_area++;
12372    } else if (aggregate value) {
12373      if (args.gpr < 5)
12374        ret = *args.reg_save_area[args.gpr]
12375      else
12376        ret = **args.overflow_arg_area++;
12377    } */
12378 
12379 static tree
s390_gimplify_va_arg(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p ATTRIBUTE_UNUSED)12380 s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
12381 		      gimple_seq *post_p ATTRIBUTE_UNUSED)
12382 {
12383   tree f_gpr, f_fpr, f_ovf, f_sav;
12384   tree gpr, fpr, ovf, sav, reg, t, u;
12385   int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg;
12386   tree lab_false, lab_over = NULL_TREE;
12387   tree addr = create_tmp_var (ptr_type_node, "addr");
12388   bool left_align_p; /* How a value < UNITS_PER_LONG is aligned within
12389 			a stack slot.  */
12390 
12391   f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12392   f_fpr = DECL_CHAIN (f_gpr);
12393   f_ovf = DECL_CHAIN (f_fpr);
12394   f_sav = DECL_CHAIN (f_ovf);
12395 
12396   gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12397   fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
12398   sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
12399 
12400   /* The tree for args* cannot be shared between gpr/fpr and ovf since
12401      both appear on a lhs.  */
12402   valist = unshare_expr (valist);
12403   ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
12404 
12405   size = int_size_in_bytes (type);
12406 
12407   s390_check_type_for_vector_abi (type, true, false);
12408 
12409   if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
12410     {
12411       if (TARGET_DEBUG_ARG)
12412 	{
12413 	  fprintf (stderr, "va_arg: aggregate type");
12414 	  debug_tree (type);
12415 	}
12416 
12417       /* Aggregates are passed by reference.  */
12418       indirect_p = 1;
12419       reg = gpr;
12420       n_reg = 1;
12421 
12422       /* kernel stack layout on 31 bit: It is assumed here that no padding
12423 	 will be added by s390_frame_info because for va_args always an even
12424 	 number of gprs has to be saved r15-r2 = 14 regs.  */
12425       sav_ofs = 2 * UNITS_PER_LONG;
12426       sav_scale = UNITS_PER_LONG;
12427       size = UNITS_PER_LONG;
12428       max_reg = GP_ARG_NUM_REG - n_reg;
12429       left_align_p = false;
12430     }
12431   else if (s390_function_arg_vector (TYPE_MODE (type), type))
12432     {
12433       if (TARGET_DEBUG_ARG)
12434 	{
12435 	  fprintf (stderr, "va_arg: vector type");
12436 	  debug_tree (type);
12437 	}
12438 
12439       indirect_p = 0;
12440       reg = NULL_TREE;
12441       n_reg = 0;
12442       sav_ofs = 0;
12443       sav_scale = 8;
12444       max_reg = 0;
12445       left_align_p = true;
12446     }
12447   else if (s390_function_arg_float (TYPE_MODE (type), type))
12448     {
12449       if (TARGET_DEBUG_ARG)
12450 	{
12451 	  fprintf (stderr, "va_arg: float type");
12452 	  debug_tree (type);
12453 	}
12454 
12455       /* FP args go in FP registers, if present.  */
12456       indirect_p = 0;
12457       reg = fpr;
12458       n_reg = 1;
12459       sav_ofs = 16 * UNITS_PER_LONG;
12460       sav_scale = 8;
12461       max_reg = FP_ARG_NUM_REG - n_reg;
12462       left_align_p = false;
12463     }
12464   else
12465     {
12466       if (TARGET_DEBUG_ARG)
12467 	{
12468 	  fprintf (stderr, "va_arg: other type");
12469 	  debug_tree (type);
12470 	}
12471 
12472       /* Otherwise into GP registers.  */
12473       indirect_p = 0;
12474       reg = gpr;
12475       n_reg = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
12476 
12477       /* kernel stack layout on 31 bit: It is assumed here that no padding
12478 	 will be added by s390_frame_info because for va_args always an even
12479 	 number of gprs has to be saved r15-r2 = 14 regs.  */
12480       sav_ofs = 2 * UNITS_PER_LONG;
12481 
12482       if (size < UNITS_PER_LONG)
12483 	sav_ofs += UNITS_PER_LONG - size;
12484 
12485       sav_scale = UNITS_PER_LONG;
12486       max_reg = GP_ARG_NUM_REG - n_reg;
12487       left_align_p = false;
12488     }
12489 
12490   /* Pull the value out of the saved registers ...  */
12491 
12492   if (reg != NULL_TREE)
12493     {
12494       /*
12495 	if (reg > ((typeof (reg))max_reg))
12496 	  goto lab_false;
12497 
12498 	addr = sav + sav_ofs + reg * save_scale;
12499 
12500 	goto lab_over;
12501 
12502 	lab_false:
12503       */
12504 
12505       lab_false = create_artificial_label (UNKNOWN_LOCATION);
12506       lab_over = create_artificial_label (UNKNOWN_LOCATION);
12507 
12508       t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
12509       t = build2 (GT_EXPR, boolean_type_node, reg, t);
12510       u = build1 (GOTO_EXPR, void_type_node, lab_false);
12511       t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
12512       gimplify_and_add (t, pre_p);
12513 
12514       t = fold_build_pointer_plus_hwi (sav, sav_ofs);
12515       u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
12516 		  fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
12517       t = fold_build_pointer_plus (t, u);
12518 
12519       gimplify_assign (addr, t, pre_p);
12520 
12521       gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
12522 
12523       gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
12524     }
12525 
12526   /* ... Otherwise out of the overflow area.  */
12527 
12528   t = ovf;
12529   if (size < UNITS_PER_LONG && !left_align_p)
12530     t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size);
12531 
12532   gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
12533 
12534   gimplify_assign (addr, t, pre_p);
12535 
12536   if (size < UNITS_PER_LONG && left_align_p)
12537     t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG);
12538   else
12539     t = fold_build_pointer_plus_hwi (t, size);
12540 
12541   gimplify_assign (ovf, t, pre_p);
12542 
12543   if (reg != NULL_TREE)
12544     gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
12545 
12546 
12547   /* Increment register save count.  */
12548 
12549   if (n_reg > 0)
12550     {
12551       u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
12552 		  fold_convert (TREE_TYPE (reg), size_int (n_reg)));
12553       gimplify_and_add (u, pre_p);
12554     }
12555 
12556   if (indirect_p)
12557     {
12558       t = build_pointer_type_for_mode (build_pointer_type (type),
12559 				       ptr_mode, true);
12560       addr = fold_convert (t, addr);
12561       addr = build_va_arg_indirect_ref (addr);
12562     }
12563   else
12564     {
12565       t = build_pointer_type_for_mode (type, ptr_mode, true);
12566       addr = fold_convert (t, addr);
12567     }
12568 
12569   return build_va_arg_indirect_ref (addr);
12570 }
12571 
12572 /* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX)
12573    expanders.
12574    DEST  - Register location where CC will be stored.
12575    TDB   - Pointer to a 256 byte area where to store the transaction.
12576 	   diagnostic block. NULL if TDB is not needed.
12577    RETRY - Retry count value.  If non-NULL a retry loop for CC2
12578 	   is emitted
12579    CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
12580 		    of the tbegin instruction pattern.  */
12581 
12582 void
s390_expand_tbegin(rtx dest,rtx tdb,rtx retry,bool clobber_fprs_p)12583 s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
12584 {
12585   rtx retry_plus_two = gen_reg_rtx (SImode);
12586   rtx retry_reg = gen_reg_rtx (SImode);
12587   rtx_code_label *retry_label = NULL;
12588 
12589   if (retry != NULL_RTX)
12590     {
12591       emit_move_insn (retry_reg, retry);
12592       emit_insn (gen_addsi3 (retry_plus_two, retry_reg, const2_rtx));
12593       emit_insn (gen_addsi3 (retry_reg, retry_reg, const1_rtx));
12594       retry_label = gen_label_rtx ();
12595       emit_label (retry_label);
12596     }
12597 
12598   if (clobber_fprs_p)
12599     {
12600       if (TARGET_VX)
12601 	emit_insn (gen_tbegin_1_z13 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12602 				     tdb));
12603       else
12604 	emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12605 				 tdb));
12606     }
12607   else
12608     emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12609 				     tdb));
12610 
12611   emit_move_insn (dest, gen_rtx_UNSPEC (SImode,
12612 					gen_rtvec (1, gen_rtx_REG (CCRAWmode,
12613 								   CC_REGNUM)),
12614 					UNSPEC_CC_TO_INT));
12615   if (retry != NULL_RTX)
12616     {
12617       const int CC0 = 1 << 3;
12618       const int CC1 = 1 << 2;
12619       const int CC3 = 1 << 0;
12620       rtx jump;
12621       rtx count = gen_reg_rtx (SImode);
12622       rtx_code_label *leave_label = gen_label_rtx ();
12623 
12624       /* Exit for success and permanent failures.  */
12625       jump = s390_emit_jump (leave_label,
12626 			     gen_rtx_EQ (VOIDmode,
12627 			       gen_rtx_REG (CCRAWmode, CC_REGNUM),
12628 			       gen_rtx_CONST_INT (VOIDmode, CC0 | CC1 | CC3)));
12629       LABEL_NUSES (leave_label) = 1;
12630 
12631       /* CC2 - transient failure. Perform retry with ppa.  */
12632       emit_move_insn (count, retry_plus_two);
12633       emit_insn (gen_subsi3 (count, count, retry_reg));
12634       emit_insn (gen_tx_assist (count));
12635       jump = emit_jump_insn (gen_doloop_si64 (retry_label,
12636 					      retry_reg,
12637 					      retry_reg));
12638       JUMP_LABEL (jump) = retry_label;
12639       LABEL_NUSES (retry_label) = 1;
12640       emit_label (leave_label);
12641     }
12642 }
12643 
12644 
12645 /* Return the decl for the target specific builtin with the function
12646    code FCODE.  */
12647 
12648 static tree
s390_builtin_decl(unsigned fcode,bool initialized_p ATTRIBUTE_UNUSED)12649 s390_builtin_decl (unsigned fcode, bool initialized_p ATTRIBUTE_UNUSED)
12650 {
12651   if (fcode >= S390_BUILTIN_MAX)
12652     return error_mark_node;
12653 
12654   return s390_builtin_decls[fcode];
12655 }
12656 
12657 /* We call mcount before the function prologue.  So a profiled leaf
12658    function should stay a leaf function.  */
12659 
12660 static bool
s390_keep_leaf_when_profiled()12661 s390_keep_leaf_when_profiled ()
12662 {
12663   return true;
12664 }
12665 
12666 /* Output assembly code for the trampoline template to
12667    stdio stream FILE.
12668 
12669    On S/390, we use gpr 1 internally in the trampoline code;
12670    gpr 0 is used to hold the static chain.  */
12671 
12672 static void
s390_asm_trampoline_template(FILE * file)12673 s390_asm_trampoline_template (FILE *file)
12674 {
12675   rtx op[2];
12676   op[0] = gen_rtx_REG (Pmode, 0);
12677   op[1] = gen_rtx_REG (Pmode, 1);
12678 
12679   if (TARGET_64BIT)
12680     {
12681       output_asm_insn ("basr\t%1,0", op);         /* 2 byte */
12682       output_asm_insn ("lmg\t%0,%1,14(%1)", op);  /* 6 byte */
12683       output_asm_insn ("br\t%1", op);             /* 2 byte */
12684       ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10));
12685     }
12686   else
12687     {
12688       output_asm_insn ("basr\t%1,0", op);         /* 2 byte */
12689       output_asm_insn ("lm\t%0,%1,6(%1)", op);    /* 4 byte */
12690       output_asm_insn ("br\t%1", op);             /* 2 byte */
12691       ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8));
12692     }
12693 }
12694 
12695 /* Emit RTL insns to initialize the variable parts of a trampoline.
12696    FNADDR is an RTX for the address of the function's pure code.
12697    CXT is an RTX for the static chain value for the function.  */
12698 
12699 static void
s390_trampoline_init(rtx m_tramp,tree fndecl,rtx cxt)12700 s390_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
12701 {
12702   rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
12703   rtx mem;
12704 
12705   emit_block_move (m_tramp, assemble_trampoline_template (),
12706 		   GEN_INT (2 * UNITS_PER_LONG), BLOCK_OP_NORMAL);
12707 
12708   mem = adjust_address (m_tramp, Pmode, 2 * UNITS_PER_LONG);
12709   emit_move_insn (mem, cxt);
12710   mem = adjust_address (m_tramp, Pmode, 3 * UNITS_PER_LONG);
12711   emit_move_insn (mem, fnaddr);
12712 }
12713 
12714 static void
output_asm_nops(const char * user,int hw)12715 output_asm_nops (const char *user, int hw)
12716 {
12717   asm_fprintf (asm_out_file, "\t# NOPs for %s (%d halfwords)\n", user, hw);
12718   while (hw > 0)
12719     {
12720       if (hw >= 3)
12721 	{
12722 	  output_asm_insn ("brcl\t0,0", NULL);
12723 	  hw -= 3;
12724 	}
12725       else if (hw >= 2)
12726 	{
12727 	  output_asm_insn ("bc\t0,0", NULL);
12728 	  hw -= 2;
12729 	}
12730       else
12731 	{
12732 	  output_asm_insn ("bcr\t0,0", NULL);
12733 	  hw -= 1;
12734 	}
12735     }
12736 }
12737 
12738 /* Output assembler code to FILE to increment profiler label # LABELNO
12739    for profiling a function entry.  */
12740 
12741 void
s390_function_profiler(FILE * file,int labelno)12742 s390_function_profiler (FILE *file, int labelno)
12743 {
12744   rtx op[8];
12745 
12746   char label[128];
12747   ASM_GENERATE_INTERNAL_LABEL (label, "LP", labelno);
12748 
12749   fprintf (file, "# function profiler \n");
12750 
12751   op[0] = gen_rtx_REG (Pmode, RETURN_REGNUM);
12752   op[1] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
12753   op[1] = gen_rtx_MEM (Pmode, plus_constant (Pmode, op[1], UNITS_PER_LONG));
12754   op[7] = GEN_INT (UNITS_PER_LONG);
12755 
12756   op[2] = gen_rtx_REG (Pmode, 1);
12757   op[3] = gen_rtx_SYMBOL_REF (Pmode, label);
12758   SYMBOL_REF_FLAGS (op[3]) = SYMBOL_FLAG_LOCAL;
12759 
12760   op[4] = gen_rtx_SYMBOL_REF (Pmode, flag_fentry ? "__fentry__" : "_mcount");
12761   if (flag_pic)
12762     {
12763       op[4] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[4]), UNSPEC_PLT);
12764       op[4] = gen_rtx_CONST (Pmode, op[4]);
12765     }
12766 
12767   if (flag_record_mcount)
12768     fprintf (file, "1:\n");
12769 
12770   if (flag_fentry)
12771     {
12772       if (flag_nop_mcount)
12773 	output_asm_nops ("-mnop-mcount", /* brasl */ 3);
12774       else if (cfun->static_chain_decl)
12775 	warning (OPT_Wcannot_profile, "nested functions cannot be profiled "
12776 		 "with %<-mfentry%> on s390");
12777       else
12778 	output_asm_insn ("brasl\t0,%4", op);
12779     }
12780   else if (TARGET_64BIT)
12781     {
12782       if (flag_nop_mcount)
12783 	output_asm_nops ("-mnop-mcount", /* stg */ 3 + /* larl */ 3 +
12784 			 /* brasl */ 3 + /* lg */ 3);
12785       else
12786 	{
12787 	  output_asm_insn ("stg\t%0,%1", op);
12788 	  if (flag_dwarf2_cfi_asm)
12789 	    output_asm_insn (".cfi_rel_offset\t%0,%7", op);
12790 	  output_asm_insn ("larl\t%2,%3", op);
12791 	  output_asm_insn ("brasl\t%0,%4", op);
12792 	  output_asm_insn ("lg\t%0,%1", op);
12793 	  if (flag_dwarf2_cfi_asm)
12794 	    output_asm_insn (".cfi_restore\t%0", op);
12795 	}
12796     }
12797   else
12798     {
12799       if (flag_nop_mcount)
12800 	output_asm_nops ("-mnop-mcount", /* st */ 2 + /* larl */ 3 +
12801 			 /* brasl */ 3 + /* l */ 2);
12802       else
12803 	{
12804 	  output_asm_insn ("st\t%0,%1", op);
12805 	  if (flag_dwarf2_cfi_asm)
12806 	    output_asm_insn (".cfi_rel_offset\t%0,%7", op);
12807 	  output_asm_insn ("larl\t%2,%3", op);
12808 	  output_asm_insn ("brasl\t%0,%4", op);
12809 	  output_asm_insn ("l\t%0,%1", op);
12810 	  if (flag_dwarf2_cfi_asm)
12811 	    output_asm_insn (".cfi_restore\t%0", op);
12812 	}
12813     }
12814 
12815   if (flag_record_mcount)
12816     {
12817       fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
12818       fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
12819       fprintf (file, "\t.previous\n");
12820     }
12821 }
12822 
12823 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
12824    into its SYMBOL_REF_FLAGS.  */
12825 
12826 static void
s390_encode_section_info(tree decl,rtx rtl,int first)12827 s390_encode_section_info (tree decl, rtx rtl, int first)
12828 {
12829   default_encode_section_info (decl, rtl, first);
12830 
12831   if (TREE_CODE (decl) == VAR_DECL)
12832     {
12833       /* Store the alignment to be able to check if we can use
12834 	 a larl/load-relative instruction.  We only handle the cases
12835 	 that can go wrong (i.e. no FUNC_DECLs).  */
12836       if (DECL_ALIGN (decl) == 0 || DECL_ALIGN (decl) % 16)
12837 	SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
12838       else if (DECL_ALIGN (decl) % 32)
12839 	SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
12840       else if (DECL_ALIGN (decl) % 64)
12841 	SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
12842     }
12843 
12844   /* Literal pool references don't have a decl so they are handled
12845      differently here.  We rely on the information in the MEM_ALIGN
12846      entry to decide upon the alignment.  */
12847   if (MEM_P (rtl)
12848       && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF
12849       && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0)))
12850     {
12851       if (MEM_ALIGN (rtl) == 0 || MEM_ALIGN (rtl) % 16)
12852 	SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
12853       else if (MEM_ALIGN (rtl) % 32)
12854 	SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
12855       else if (MEM_ALIGN (rtl) % 64)
12856 	SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
12857     }
12858 }
12859 
12860 /* Output thunk to FILE that implements a C++ virtual function call (with
12861    multiple inheritance) to FUNCTION.  The thunk adjusts the this pointer
12862    by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
12863    stored at VCALL_OFFSET in the vtable whose address is located at offset 0
12864    relative to the resulting this pointer.  */
12865 
12866 static void
s390_output_mi_thunk(FILE * file,tree thunk ATTRIBUTE_UNUSED,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)12867 s390_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
12868 		      HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12869 		      tree function)
12870 {
12871   rtx op[10];
12872   int nonlocal = 0;
12873 
12874   /* Make sure unwind info is emitted for the thunk if needed.  */
12875   final_start_function (emit_barrier (), file, 1);
12876 
12877   /* Operand 0 is the target function.  */
12878   op[0] = XEXP (DECL_RTL (function), 0);
12879   if (flag_pic && !SYMBOL_REF_LOCAL_P (op[0]))
12880     {
12881       nonlocal = 1;
12882       op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]),
12883 			      TARGET_64BIT ? UNSPEC_PLT : UNSPEC_GOT);
12884       op[0] = gen_rtx_CONST (Pmode, op[0]);
12885     }
12886 
12887   /* Operand 1 is the 'this' pointer.  */
12888   if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12889     op[1] = gen_rtx_REG (Pmode, 3);
12890   else
12891     op[1] = gen_rtx_REG (Pmode, 2);
12892 
12893   /* Operand 2 is the delta.  */
12894   op[2] = GEN_INT (delta);
12895 
12896   /* Operand 3 is the vcall_offset.  */
12897   op[3] = GEN_INT (vcall_offset);
12898 
12899   /* Operand 4 is the temporary register.  */
12900   op[4] = gen_rtx_REG (Pmode, 1);
12901 
12902   /* Operands 5 to 8 can be used as labels.  */
12903   op[5] = NULL_RTX;
12904   op[6] = NULL_RTX;
12905   op[7] = NULL_RTX;
12906   op[8] = NULL_RTX;
12907 
12908   /* Operand 9 can be used for temporary register.  */
12909   op[9] = NULL_RTX;
12910 
12911   /* Generate code.  */
12912   if (TARGET_64BIT)
12913     {
12914       /* Setup literal pool pointer if required.  */
12915       if ((!DISP_IN_RANGE (delta)
12916 	   && !CONST_OK_FOR_K (delta)
12917 	   && !CONST_OK_FOR_Os (delta))
12918 	  || (!DISP_IN_RANGE (vcall_offset)
12919 	      && !CONST_OK_FOR_K (vcall_offset)
12920 	      && !CONST_OK_FOR_Os (vcall_offset)))
12921 	{
12922 	  op[5] = gen_label_rtx ();
12923 	  output_asm_insn ("larl\t%4,%5", op);
12924 	}
12925 
12926       /* Add DELTA to this pointer.  */
12927       if (delta)
12928 	{
12929 	  if (CONST_OK_FOR_J (delta))
12930 	    output_asm_insn ("la\t%1,%2(%1)", op);
12931 	  else if (DISP_IN_RANGE (delta))
12932 	    output_asm_insn ("lay\t%1,%2(%1)", op);
12933 	  else if (CONST_OK_FOR_K (delta))
12934 	    output_asm_insn ("aghi\t%1,%2", op);
12935 	  else if (CONST_OK_FOR_Os (delta))
12936 	    output_asm_insn ("agfi\t%1,%2", op);
12937 	  else
12938 	    {
12939 	      op[6] = gen_label_rtx ();
12940 	      output_asm_insn ("agf\t%1,%6-%5(%4)", op);
12941 	    }
12942 	}
12943 
12944       /* Perform vcall adjustment.  */
12945       if (vcall_offset)
12946 	{
12947 	  if (DISP_IN_RANGE (vcall_offset))
12948 	    {
12949 	      output_asm_insn ("lg\t%4,0(%1)", op);
12950 	      output_asm_insn ("ag\t%1,%3(%4)", op);
12951 	    }
12952 	  else if (CONST_OK_FOR_K (vcall_offset))
12953 	    {
12954 	      output_asm_insn ("lghi\t%4,%3", op);
12955 	      output_asm_insn ("ag\t%4,0(%1)", op);
12956 	      output_asm_insn ("ag\t%1,0(%4)", op);
12957 	    }
12958 	  else if (CONST_OK_FOR_Os (vcall_offset))
12959 	    {
12960 	      output_asm_insn ("lgfi\t%4,%3", op);
12961 	      output_asm_insn ("ag\t%4,0(%1)", op);
12962 	      output_asm_insn ("ag\t%1,0(%4)", op);
12963 	    }
12964 	  else
12965 	    {
12966 	      op[7] = gen_label_rtx ();
12967 	      output_asm_insn ("llgf\t%4,%7-%5(%4)", op);
12968 	      output_asm_insn ("ag\t%4,0(%1)", op);
12969 	      output_asm_insn ("ag\t%1,0(%4)", op);
12970 	    }
12971 	}
12972 
12973       /* Jump to target.  */
12974       output_asm_insn ("jg\t%0", op);
12975 
12976       /* Output literal pool if required.  */
12977       if (op[5])
12978 	{
12979 	  output_asm_insn (".align\t4", op);
12980 	  targetm.asm_out.internal_label (file, "L",
12981 					  CODE_LABEL_NUMBER (op[5]));
12982 	}
12983       if (op[6])
12984 	{
12985 	  targetm.asm_out.internal_label (file, "L",
12986 					  CODE_LABEL_NUMBER (op[6]));
12987 	  output_asm_insn (".long\t%2", op);
12988 	}
12989       if (op[7])
12990 	{
12991 	  targetm.asm_out.internal_label (file, "L",
12992 					  CODE_LABEL_NUMBER (op[7]));
12993 	  output_asm_insn (".long\t%3", op);
12994 	}
12995     }
12996   else
12997     {
12998       /* Setup base pointer if required.  */
12999       if (!vcall_offset
13000 	  || (!DISP_IN_RANGE (delta)
13001 	      && !CONST_OK_FOR_K (delta)
13002 	      && !CONST_OK_FOR_Os (delta))
13003 	  || (!DISP_IN_RANGE (delta)
13004 	      && !CONST_OK_FOR_K (vcall_offset)
13005 	      && !CONST_OK_FOR_Os (vcall_offset)))
13006 	{
13007 	  op[5] = gen_label_rtx ();
13008 	  output_asm_insn ("basr\t%4,0", op);
13009 	  targetm.asm_out.internal_label (file, "L",
13010 					  CODE_LABEL_NUMBER (op[5]));
13011 	}
13012 
13013       /* Add DELTA to this pointer.  */
13014       if (delta)
13015 	{
13016 	  if (CONST_OK_FOR_J (delta))
13017 	    output_asm_insn ("la\t%1,%2(%1)", op);
13018 	  else if (DISP_IN_RANGE (delta))
13019 	    output_asm_insn ("lay\t%1,%2(%1)", op);
13020 	  else if (CONST_OK_FOR_K (delta))
13021 	    output_asm_insn ("ahi\t%1,%2", op);
13022 	  else if (CONST_OK_FOR_Os (delta))
13023 	    output_asm_insn ("afi\t%1,%2", op);
13024 	  else
13025 	    {
13026 	      op[6] = gen_label_rtx ();
13027 	      output_asm_insn ("a\t%1,%6-%5(%4)", op);
13028 	    }
13029 	}
13030 
13031       /* Perform vcall adjustment.  */
13032       if (vcall_offset)
13033 	{
13034 	  if (CONST_OK_FOR_J (vcall_offset))
13035 	    {
13036 	      output_asm_insn ("l\t%4,0(%1)", op);
13037 	      output_asm_insn ("a\t%1,%3(%4)", op);
13038 	    }
13039 	  else if (DISP_IN_RANGE (vcall_offset))
13040 	    {
13041 	      output_asm_insn ("l\t%4,0(%1)", op);
13042 	      output_asm_insn ("ay\t%1,%3(%4)", op);
13043 	    }
13044 	  else if (CONST_OK_FOR_K (vcall_offset))
13045 	    {
13046 	      output_asm_insn ("lhi\t%4,%3", op);
13047 	      output_asm_insn ("a\t%4,0(%1)", op);
13048 	      output_asm_insn ("a\t%1,0(%4)", op);
13049 	    }
13050 	  else if (CONST_OK_FOR_Os (vcall_offset))
13051 	    {
13052 	      output_asm_insn ("iilf\t%4,%3", op);
13053 	      output_asm_insn ("a\t%4,0(%1)", op);
13054 	      output_asm_insn ("a\t%1,0(%4)", op);
13055 	    }
13056 	  else
13057 	    {
13058 	      op[7] = gen_label_rtx ();
13059 	      output_asm_insn ("l\t%4,%7-%5(%4)", op);
13060 	      output_asm_insn ("a\t%4,0(%1)", op);
13061 	      output_asm_insn ("a\t%1,0(%4)", op);
13062 	    }
13063 
13064 	  /* We had to clobber the base pointer register.
13065 	     Re-setup the base pointer (with a different base).  */
13066 	  op[5] = gen_label_rtx ();
13067 	  output_asm_insn ("basr\t%4,0", op);
13068 	  targetm.asm_out.internal_label (file, "L",
13069 					  CODE_LABEL_NUMBER (op[5]));
13070 	}
13071 
13072       /* Jump to target.  */
13073       op[8] = gen_label_rtx ();
13074 
13075       if (!flag_pic)
13076 	output_asm_insn ("l\t%4,%8-%5(%4)", op);
13077       else if (!nonlocal)
13078 	output_asm_insn ("a\t%4,%8-%5(%4)", op);
13079       /* We cannot call through .plt, since .plt requires %r12 loaded.  */
13080       else if (flag_pic == 1)
13081 	{
13082 	  output_asm_insn ("a\t%4,%8-%5(%4)", op);
13083 	  output_asm_insn ("l\t%4,%0(%4)", op);
13084 	}
13085       else if (flag_pic == 2)
13086 	{
13087 	  op[9] = gen_rtx_REG (Pmode, 0);
13088 	  output_asm_insn ("l\t%9,%8-4-%5(%4)", op);
13089 	  output_asm_insn ("a\t%4,%8-%5(%4)", op);
13090 	  output_asm_insn ("ar\t%4,%9", op);
13091 	  output_asm_insn ("l\t%4,0(%4)", op);
13092 	}
13093 
13094       output_asm_insn ("br\t%4", op);
13095 
13096       /* Output literal pool.  */
13097       output_asm_insn (".align\t4", op);
13098 
13099       if (nonlocal && flag_pic == 2)
13100 	output_asm_insn (".long\t%0", op);
13101       if (nonlocal)
13102 	{
13103 	  op[0] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
13104 	  SYMBOL_REF_FLAGS (op[0]) = SYMBOL_FLAG_LOCAL;
13105 	}
13106 
13107       targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[8]));
13108       if (!flag_pic)
13109 	output_asm_insn (".long\t%0", op);
13110       else
13111 	output_asm_insn (".long\t%0-%5", op);
13112 
13113       if (op[6])
13114 	{
13115 	  targetm.asm_out.internal_label (file, "L",
13116 					  CODE_LABEL_NUMBER (op[6]));
13117 	  output_asm_insn (".long\t%2", op);
13118 	}
13119       if (op[7])
13120 	{
13121 	  targetm.asm_out.internal_label (file, "L",
13122 					  CODE_LABEL_NUMBER (op[7]));
13123 	  output_asm_insn (".long\t%3", op);
13124 	}
13125     }
13126   final_end_function ();
13127 }
13128 
13129 /* Output either an indirect jump or a an indirect call
13130    (RETURN_ADDR_REGNO != INVALID_REGNUM) with target register REGNO
13131    using a branch trampoline disabling branch target prediction.  */
13132 
13133 void
s390_indirect_branch_via_thunk(unsigned int regno,unsigned int return_addr_regno,rtx comparison_operator,enum s390_indirect_branch_type type)13134 s390_indirect_branch_via_thunk (unsigned int regno,
13135 				unsigned int return_addr_regno,
13136 				rtx comparison_operator,
13137 				enum s390_indirect_branch_type type)
13138 {
13139   enum s390_indirect_branch_option option;
13140 
13141   if (type == s390_indirect_branch_type_return)
13142     {
13143       if (s390_return_addr_from_memory ())
13144 	option = s390_opt_function_return_mem;
13145       else
13146 	option = s390_opt_function_return_reg;
13147     }
13148   else if (type == s390_indirect_branch_type_jump)
13149     option = s390_opt_indirect_branch_jump;
13150   else if (type == s390_indirect_branch_type_call)
13151     option = s390_opt_indirect_branch_call;
13152   else
13153     gcc_unreachable ();
13154 
13155   if (TARGET_INDIRECT_BRANCH_TABLE)
13156     {
13157       char label[32];
13158 
13159       ASM_GENERATE_INTERNAL_LABEL (label,
13160 				   indirect_branch_table_label[option],
13161 				   indirect_branch_table_label_no[option]++);
13162       ASM_OUTPUT_LABEL (asm_out_file, label);
13163     }
13164 
13165   if (return_addr_regno != INVALID_REGNUM)
13166     {
13167       gcc_assert (comparison_operator == NULL_RTX);
13168       fprintf (asm_out_file, " \tbrasl\t%%r%d,", return_addr_regno);
13169     }
13170   else
13171     {
13172       fputs (" \tjg", asm_out_file);
13173       if (comparison_operator != NULL_RTX)
13174 	print_operand (asm_out_file, comparison_operator, 'C');
13175 
13176       fputs ("\t", asm_out_file);
13177     }
13178 
13179   if (TARGET_CPU_Z10)
13180     fprintf (asm_out_file,
13181 	     TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL "\n",
13182 	     regno);
13183   else
13184     fprintf (asm_out_file,
13185 	     TARGET_INDIRECT_BRANCH_THUNK_NAME_EX "\n",
13186 	     INDIRECT_BRANCH_THUNK_REGNUM, regno);
13187 
13188   if ((option == s390_opt_indirect_branch_jump
13189        && cfun->machine->indirect_branch_jump == indirect_branch_thunk)
13190       || (option == s390_opt_indirect_branch_call
13191 	  && cfun->machine->indirect_branch_call == indirect_branch_thunk)
13192       || (option == s390_opt_function_return_reg
13193 	  && cfun->machine->function_return_reg == indirect_branch_thunk)
13194       || (option == s390_opt_function_return_mem
13195 	  && cfun->machine->function_return_mem == indirect_branch_thunk))
13196     {
13197       if (TARGET_CPU_Z10)
13198 	indirect_branch_z10thunk_mask |= (1 << regno);
13199       else
13200 	indirect_branch_prez10thunk_mask |= (1 << regno);
13201     }
13202 }
13203 
13204 /* Output an inline thunk for indirect jumps.  EXECUTE_TARGET can
13205    either be an address register or a label pointing to the location
13206    of the jump instruction.  */
13207 
13208 void
s390_indirect_branch_via_inline_thunk(rtx execute_target)13209 s390_indirect_branch_via_inline_thunk (rtx execute_target)
13210 {
13211   if (TARGET_INDIRECT_BRANCH_TABLE)
13212     {
13213       char label[32];
13214 
13215       ASM_GENERATE_INTERNAL_LABEL (label,
13216 				   indirect_branch_table_label[s390_opt_indirect_branch_jump],
13217 				   indirect_branch_table_label_no[s390_opt_indirect_branch_jump]++);
13218       ASM_OUTPUT_LABEL (asm_out_file, label);
13219     }
13220 
13221   if (!TARGET_ZARCH)
13222     fputs ("\t.machinemode zarch\n", asm_out_file);
13223 
13224   if (REG_P (execute_target))
13225     fprintf (asm_out_file, "\tex\t%%r0,0(%%r%d)\n", REGNO (execute_target));
13226   else
13227     output_asm_insn ("\texrl\t%%r0,%0", &execute_target);
13228 
13229   if (!TARGET_ZARCH)
13230     fputs ("\t.machinemode esa\n", asm_out_file);
13231 
13232   fputs ("0:\tj\t0b\n", asm_out_file);
13233 }
13234 
13235 static bool
s390_valid_pointer_mode(scalar_int_mode mode)13236 s390_valid_pointer_mode (scalar_int_mode mode)
13237 {
13238   return (mode == SImode || (TARGET_64BIT && mode == DImode));
13239 }
13240 
13241 /* Checks whether the given CALL_EXPR would use a caller
13242    saved register.  This is used to decide whether sibling call
13243    optimization could be performed on the respective function
13244    call.  */
13245 
13246 static bool
s390_call_saved_register_used(tree call_expr)13247 s390_call_saved_register_used (tree call_expr)
13248 {
13249   CUMULATIVE_ARGS cum_v;
13250   cumulative_args_t cum;
13251   tree parameter;
13252   machine_mode mode;
13253   tree type;
13254   rtx parm_rtx;
13255   int reg, i;
13256 
13257   INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
13258   cum = pack_cumulative_args (&cum_v);
13259 
13260   for (i = 0; i < call_expr_nargs (call_expr); i++)
13261     {
13262       parameter = CALL_EXPR_ARG (call_expr, i);
13263       gcc_assert (parameter);
13264 
13265       /* For an undeclared variable passed as parameter we will get
13266 	 an ERROR_MARK node here.  */
13267       if (TREE_CODE (parameter) == ERROR_MARK)
13268 	return true;
13269 
13270       type = TREE_TYPE (parameter);
13271       gcc_assert (type);
13272 
13273       mode = TYPE_MODE (type);
13274       gcc_assert (mode);
13275 
13276       /* We assume that in the target function all parameters are
13277 	 named.  This only has an impact on vector argument register
13278 	 usage none of which is call-saved.  */
13279       if (pass_by_reference (&cum_v, mode, type, true))
13280 	{
13281 	  mode = Pmode;
13282 	  type = build_pointer_type (type);
13283 	}
13284 
13285        parm_rtx = s390_function_arg (cum, mode, type, true);
13286 
13287        s390_function_arg_advance (cum, mode, type, true);
13288 
13289        if (!parm_rtx)
13290 	 continue;
13291 
13292        if (REG_P (parm_rtx))
13293 	 {
13294 	   for (reg = 0; reg < REG_NREGS (parm_rtx); reg++)
13295 	     if (!call_used_regs[reg + REGNO (parm_rtx)])
13296 	       return true;
13297 	 }
13298 
13299        if (GET_CODE (parm_rtx) == PARALLEL)
13300 	 {
13301 	   int i;
13302 
13303 	   for (i = 0; i < XVECLEN (parm_rtx, 0); i++)
13304 	     {
13305 	       rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0);
13306 
13307 	       gcc_assert (REG_P (r));
13308 
13309 	       for (reg = 0; reg < REG_NREGS (r); reg++)
13310 		 if (!call_used_regs[reg + REGNO (r)])
13311 		   return true;
13312 	     }
13313 	 }
13314 
13315     }
13316   return false;
13317 }
13318 
13319 /* Return true if the given call expression can be
13320    turned into a sibling call.
13321    DECL holds the declaration of the function to be called whereas
13322    EXP is the call expression itself.  */
13323 
13324 static bool
s390_function_ok_for_sibcall(tree decl,tree exp)13325 s390_function_ok_for_sibcall (tree decl, tree exp)
13326 {
13327   /* The TPF epilogue uses register 1.  */
13328   if (TARGET_TPF_PROFILING)
13329     return false;
13330 
13331   /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
13332      which would have to be restored before the sibcall.  */
13333   if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl))
13334     return false;
13335 
13336   /* The thunks for indirect branches require r1 if no exrl is
13337      available.  r1 might not be available when doing a sibling
13338      call.  */
13339   if (TARGET_INDIRECT_BRANCH_NOBP_CALL
13340       && !TARGET_CPU_Z10
13341       && !decl)
13342     return false;
13343 
13344   /* Register 6 on s390 is available as an argument register but unfortunately
13345      "caller saved". This makes functions needing this register for arguments
13346      not suitable for sibcalls.  */
13347   return !s390_call_saved_register_used (exp);
13348 }
13349 
13350 /* Return the fixed registers used for condition codes.  */
13351 
13352 static bool
s390_fixed_condition_code_regs(unsigned int * p1,unsigned int * p2)13353 s390_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
13354 {
13355   *p1 = CC_REGNUM;
13356   *p2 = INVALID_REGNUM;
13357 
13358   return true;
13359 }
13360 
13361 /* This function is used by the call expanders of the machine description.
13362    It emits the call insn itself together with the necessary operations
13363    to adjust the target address and returns the emitted insn.
13364    ADDR_LOCATION is the target address rtx
13365    TLS_CALL the location of the thread-local symbol
13366    RESULT_REG the register where the result of the call should be stored
13367    RETADDR_REG the register where the return address should be stored
13368 	       If this parameter is NULL_RTX the call is considered
13369 	       to be a sibling call.  */
13370 
13371 rtx_insn *
s390_emit_call(rtx addr_location,rtx tls_call,rtx result_reg,rtx retaddr_reg)13372 s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg,
13373 		rtx retaddr_reg)
13374 {
13375   bool plt_call = false;
13376   rtx_insn *insn;
13377   rtx vec[4] = { NULL_RTX };
13378   int elts = 0;
13379   rtx *call = &vec[0];
13380   rtx *clobber_ret_reg = &vec[1];
13381   rtx *use = &vec[2];
13382   rtx *clobber_thunk_reg = &vec[3];
13383   int i;
13384 
13385   /* Direct function calls need special treatment.  */
13386   if (GET_CODE (addr_location) == SYMBOL_REF)
13387     {
13388       /* When calling a global routine in PIC mode, we must
13389 	 replace the symbol itself with the PLT stub.  */
13390       if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location))
13391 	{
13392 	  if (TARGET_64BIT || retaddr_reg != NULL_RTX)
13393 	    {
13394 	      addr_location = gen_rtx_UNSPEC (Pmode,
13395 					      gen_rtvec (1, addr_location),
13396 					      UNSPEC_PLT);
13397 	      addr_location = gen_rtx_CONST (Pmode, addr_location);
13398 	      plt_call = true;
13399 	    }
13400 	  else
13401 	    /* For -fpic code the PLT entries might use r12 which is
13402 	       call-saved.  Therefore we cannot do a sibcall when
13403 	       calling directly using a symbol ref.  When reaching
13404 	       this point we decided (in s390_function_ok_for_sibcall)
13405 	       to do a sibcall for a function pointer but one of the
13406 	       optimizers was able to get rid of the function pointer
13407 	       by propagating the symbol ref into the call.  This
13408 	       optimization is illegal for S/390 so we turn the direct
13409 	       call into a indirect call again.  */
13410 	    addr_location = force_reg (Pmode, addr_location);
13411 	}
13412     }
13413 
13414   /* If it is already an indirect call or the code above moved the
13415      SYMBOL_REF to somewhere else make sure the address can be found in
13416      register 1.  */
13417   if (retaddr_reg == NULL_RTX
13418       && GET_CODE (addr_location) != SYMBOL_REF
13419       && !plt_call)
13420     {
13421       emit_move_insn (gen_rtx_REG (Pmode, SIBCALL_REGNUM), addr_location);
13422       addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM);
13423     }
13424 
13425   if (TARGET_INDIRECT_BRANCH_NOBP_CALL
13426       && GET_CODE (addr_location) != SYMBOL_REF
13427       && !plt_call)
13428     {
13429       /* Indirect branch thunks require the target to be a single GPR.  */
13430       addr_location = force_reg (Pmode, addr_location);
13431 
13432       /* Without exrl the indirect branch thunks need an additional
13433 	 register for larl;ex */
13434       if (!TARGET_CPU_Z10)
13435 	{
13436 	  *clobber_thunk_reg = gen_rtx_REG (Pmode, INDIRECT_BRANCH_THUNK_REGNUM);
13437 	  *clobber_thunk_reg = gen_rtx_CLOBBER (VOIDmode, *clobber_thunk_reg);
13438 	}
13439     }
13440 
13441   addr_location = gen_rtx_MEM (QImode, addr_location);
13442   *call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx);
13443 
13444   if (result_reg != NULL_RTX)
13445     *call = gen_rtx_SET (result_reg, *call);
13446 
13447   if (retaddr_reg != NULL_RTX)
13448     {
13449       *clobber_ret_reg = gen_rtx_CLOBBER (VOIDmode, retaddr_reg);
13450 
13451       if (tls_call != NULL_RTX)
13452 	*use = gen_rtx_USE (VOIDmode, tls_call);
13453     }
13454 
13455 
13456   for (i = 0; i < 4; i++)
13457     if (vec[i] != NULL_RTX)
13458       elts++;
13459 
13460   if (elts > 1)
13461     {
13462       rtvec v;
13463       int e = 0;
13464 
13465       v = rtvec_alloc (elts);
13466       for (i = 0; i < 4; i++)
13467 	if (vec[i] != NULL_RTX)
13468 	  {
13469 	    RTVEC_ELT (v, e) = vec[i];
13470 	    e++;
13471 	  }
13472 
13473       *call = gen_rtx_PARALLEL (VOIDmode, v);
13474     }
13475 
13476   insn = emit_call_insn (*call);
13477 
13478   /* 31-bit PLT stubs and tls calls use the GOT register implicitly.  */
13479   if ((!TARGET_64BIT && plt_call) || tls_call != NULL_RTX)
13480     {
13481       /* s390_function_ok_for_sibcall should
13482 	 have denied sibcalls in this case.  */
13483       gcc_assert (retaddr_reg != NULL_RTX);
13484       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, 12));
13485     }
13486   return insn;
13487 }
13488 
13489 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE.  */
13490 
13491 static void
s390_conditional_register_usage(void)13492 s390_conditional_register_usage (void)
13493 {
13494   int i;
13495 
13496   if (flag_pic)
13497     {
13498       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13499       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13500     }
13501   fixed_regs[BASE_REGNUM] = 0;
13502   call_used_regs[BASE_REGNUM] = 0;
13503   fixed_regs[RETURN_REGNUM] = 0;
13504   call_used_regs[RETURN_REGNUM] = 0;
13505   if (TARGET_64BIT)
13506     {
13507       for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
13508 	call_used_regs[i] = call_really_used_regs[i] = 0;
13509     }
13510   else
13511     {
13512       call_used_regs[FPR4_REGNUM] = call_really_used_regs[FPR4_REGNUM] = 0;
13513       call_used_regs[FPR6_REGNUM] = call_really_used_regs[FPR6_REGNUM] = 0;
13514     }
13515 
13516   if (TARGET_SOFT_FLOAT)
13517     {
13518       for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
13519 	call_used_regs[i] = fixed_regs[i] = 1;
13520     }
13521 
13522   /* Disable v16 - v31 for non-vector target.  */
13523   if (!TARGET_VX)
13524     {
13525       for (i = VR16_REGNUM; i <= VR31_REGNUM; i++)
13526 	fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
13527     }
13528 }
13529 
13530 /* Corresponding function to eh_return expander.  */
13531 
13532 static GTY(()) rtx s390_tpf_eh_return_symbol;
13533 void
s390_emit_tpf_eh_return(rtx target)13534 s390_emit_tpf_eh_return (rtx target)
13535 {
13536   rtx_insn *insn;
13537   rtx reg, orig_ra;
13538 
13539   if (!s390_tpf_eh_return_symbol)
13540     s390_tpf_eh_return_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tpf_eh_return");
13541 
13542   reg = gen_rtx_REG (Pmode, 2);
13543   orig_ra = gen_rtx_REG (Pmode, 3);
13544 
13545   emit_move_insn (reg, target);
13546   emit_move_insn (orig_ra, get_hard_reg_initial_val (Pmode, RETURN_REGNUM));
13547   insn = s390_emit_call (s390_tpf_eh_return_symbol, NULL_RTX, reg,
13548 				     gen_rtx_REG (Pmode, RETURN_REGNUM));
13549   use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
13550   use_reg (&CALL_INSN_FUNCTION_USAGE (insn), orig_ra);
13551 
13552   emit_move_insn (EH_RETURN_HANDLER_RTX, reg);
13553 }
13554 
13555 /* Rework the prologue/epilogue to avoid saving/restoring
13556    registers unnecessarily.  */
13557 
13558 static void
s390_optimize_prologue(void)13559 s390_optimize_prologue (void)
13560 {
13561   rtx_insn *insn, *new_insn, *next_insn;
13562 
13563   /* Do a final recompute of the frame-related data.  */
13564   s390_optimize_register_info ();
13565 
13566   /* If all special registers are in fact used, there's nothing we
13567      can do, so no point in walking the insn list.  */
13568 
13569   if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM
13570       && cfun_frame_layout.last_save_gpr >= BASE_REGNUM)
13571     return;
13572 
13573   /* Search for prologue/epilogue insns and replace them.  */
13574   for (insn = get_insns (); insn; insn = next_insn)
13575     {
13576       int first, last, off;
13577       rtx set, base, offset;
13578       rtx pat;
13579 
13580       next_insn = NEXT_INSN (insn);
13581 
13582       if (! NONJUMP_INSN_P (insn) || ! RTX_FRAME_RELATED_P (insn))
13583 	continue;
13584 
13585       pat = PATTERN (insn);
13586 
13587       /* Remove ldgr/lgdr instructions used for saving and restore
13588 	 GPRs if possible.  */
13589       if (TARGET_Z10)
13590 	{
13591 	  rtx tmp_pat = pat;
13592 
13593 	  if (INSN_CODE (insn) == CODE_FOR_stack_restore_from_fpr)
13594 	    tmp_pat = XVECEXP (pat, 0, 0);
13595 
13596 	  if (GET_CODE (tmp_pat) == SET
13597 	      && GET_MODE (SET_SRC (tmp_pat)) == DImode
13598 	      && REG_P (SET_SRC (tmp_pat))
13599 	      && REG_P (SET_DEST (tmp_pat)))
13600 	    {
13601 	      int src_regno = REGNO (SET_SRC (tmp_pat));
13602 	      int dest_regno = REGNO (SET_DEST (tmp_pat));
13603 	      int gpr_regno;
13604 	      int fpr_regno;
13605 
13606 	      if (!((GENERAL_REGNO_P (src_regno)
13607 		     && FP_REGNO_P (dest_regno))
13608 		    || (FP_REGNO_P (src_regno)
13609 			&& GENERAL_REGNO_P (dest_regno))))
13610 		continue;
13611 
13612 	      gpr_regno = GENERAL_REGNO_P (src_regno) ? src_regno : dest_regno;
13613 	      fpr_regno = FP_REGNO_P (src_regno) ? src_regno : dest_regno;
13614 
13615 	      /* GPR must be call-saved, FPR must be call-clobbered.  */
13616 	      if (!call_really_used_regs[fpr_regno]
13617 		  || call_really_used_regs[gpr_regno])
13618 		continue;
13619 
13620 	      /* It must not happen that what we once saved in an FPR now
13621 		 needs a stack slot.  */
13622 	      gcc_assert (cfun_gpr_save_slot (gpr_regno) != SAVE_SLOT_STACK);
13623 
13624 	      if (cfun_gpr_save_slot (gpr_regno) == SAVE_SLOT_NONE)
13625 		{
13626 		  remove_insn (insn);
13627 		  continue;
13628 		}
13629 	    }
13630 	}
13631 
13632       if (GET_CODE (pat) == PARALLEL
13633 	  && store_multiple_operation (pat, VOIDmode))
13634 	{
13635 	  set = XVECEXP (pat, 0, 0);
13636 	  first = REGNO (SET_SRC (set));
13637 	  last = first + XVECLEN (pat, 0) - 1;
13638 	  offset = const0_rtx;
13639 	  base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
13640 	  off = INTVAL (offset);
13641 
13642 	  if (GET_CODE (base) != REG || off < 0)
13643 	    continue;
13644 	  if (cfun_frame_layout.first_save_gpr != -1
13645 	      && (cfun_frame_layout.first_save_gpr < first
13646 		  || cfun_frame_layout.last_save_gpr > last))
13647 	    continue;
13648 	  if (REGNO (base) != STACK_POINTER_REGNUM
13649 	      && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13650 	    continue;
13651 	  if (first > BASE_REGNUM || last < BASE_REGNUM)
13652 	    continue;
13653 
13654 	  if (cfun_frame_layout.first_save_gpr != -1)
13655 	    {
13656 	      rtx s_pat = save_gprs (base,
13657 				     off + (cfun_frame_layout.first_save_gpr
13658 					    - first) * UNITS_PER_LONG,
13659 				     cfun_frame_layout.first_save_gpr,
13660 				     cfun_frame_layout.last_save_gpr);
13661 	      new_insn = emit_insn_before (s_pat, insn);
13662 	      INSN_ADDRESSES_NEW (new_insn, -1);
13663 	    }
13664 
13665 	  remove_insn (insn);
13666 	  continue;
13667 	}
13668 
13669       if (cfun_frame_layout.first_save_gpr == -1
13670 	  && GET_CODE (pat) == SET
13671 	  && GENERAL_REG_P (SET_SRC (pat))
13672 	  && GET_CODE (SET_DEST (pat)) == MEM)
13673 	{
13674 	  set = pat;
13675 	  first = REGNO (SET_SRC (set));
13676 	  offset = const0_rtx;
13677 	  base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
13678 	  off = INTVAL (offset);
13679 
13680 	  if (GET_CODE (base) != REG || off < 0)
13681 	    continue;
13682 	  if (REGNO (base) != STACK_POINTER_REGNUM
13683 	      && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13684 	    continue;
13685 
13686 	  remove_insn (insn);
13687 	  continue;
13688 	}
13689 
13690       if (GET_CODE (pat) == PARALLEL
13691 	  && load_multiple_operation (pat, VOIDmode))
13692 	{
13693 	  set = XVECEXP (pat, 0, 0);
13694 	  first = REGNO (SET_DEST (set));
13695 	  last = first + XVECLEN (pat, 0) - 1;
13696 	  offset = const0_rtx;
13697 	  base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
13698 	  off = INTVAL (offset);
13699 
13700 	  if (GET_CODE (base) != REG || off < 0)
13701 	    continue;
13702 
13703 	  if (cfun_frame_layout.first_restore_gpr != -1
13704 	      && (cfun_frame_layout.first_restore_gpr < first
13705 		  || cfun_frame_layout.last_restore_gpr > last))
13706 	    continue;
13707 	  if (REGNO (base) != STACK_POINTER_REGNUM
13708 	      && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13709 	    continue;
13710 	  if (first > BASE_REGNUM || last < BASE_REGNUM)
13711 	    continue;
13712 
13713 	  if (cfun_frame_layout.first_restore_gpr != -1)
13714 	    {
13715 	      rtx rpat = restore_gprs (base,
13716 				       off + (cfun_frame_layout.first_restore_gpr
13717 					      - first) * UNITS_PER_LONG,
13718 				       cfun_frame_layout.first_restore_gpr,
13719 				       cfun_frame_layout.last_restore_gpr);
13720 
13721 	      /* Remove REG_CFA_RESTOREs for registers that we no
13722 		 longer need to save.  */
13723 	      REG_NOTES (rpat) = REG_NOTES (insn);
13724 	      for (rtx *ptr = &REG_NOTES (rpat); *ptr; )
13725 		if (REG_NOTE_KIND (*ptr) == REG_CFA_RESTORE
13726 		    && ((int) REGNO (XEXP (*ptr, 0))
13727 			< cfun_frame_layout.first_restore_gpr))
13728 		  *ptr = XEXP (*ptr, 1);
13729 		else
13730 		  ptr = &XEXP (*ptr, 1);
13731 	      new_insn = emit_insn_before (rpat, insn);
13732 	      RTX_FRAME_RELATED_P (new_insn) = 1;
13733 	      INSN_ADDRESSES_NEW (new_insn, -1);
13734 	    }
13735 
13736 	  remove_insn (insn);
13737 	  continue;
13738 	}
13739 
13740       if (cfun_frame_layout.first_restore_gpr == -1
13741 	  && GET_CODE (pat) == SET
13742 	  && GENERAL_REG_P (SET_DEST (pat))
13743 	  && GET_CODE (SET_SRC (pat)) == MEM)
13744 	{
13745 	  set = pat;
13746 	  first = REGNO (SET_DEST (set));
13747 	  offset = const0_rtx;
13748 	  base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
13749 	  off = INTVAL (offset);
13750 
13751 	  if (GET_CODE (base) != REG || off < 0)
13752 	    continue;
13753 
13754 	  if (REGNO (base) != STACK_POINTER_REGNUM
13755 	      && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13756 	    continue;
13757 
13758 	  remove_insn (insn);
13759 	  continue;
13760 	}
13761     }
13762 }
13763 
13764 /* On z10 and later the dynamic branch prediction must see the
13765    backward jump within a certain windows.  If not it falls back to
13766    the static prediction.  This function rearranges the loop backward
13767    branch in a way which makes the static prediction always correct.
13768    The function returns true if it added an instruction.  */
13769 static bool
s390_fix_long_loop_prediction(rtx_insn * insn)13770 s390_fix_long_loop_prediction (rtx_insn *insn)
13771 {
13772   rtx set = single_set (insn);
13773   rtx code_label, label_ref;
13774   rtx_insn *uncond_jump;
13775   rtx_insn *cur_insn;
13776   rtx tmp;
13777   int distance;
13778 
13779   /* This will exclude branch on count and branch on index patterns
13780      since these are correctly statically predicted.  */
13781   if (!set
13782       || SET_DEST (set) != pc_rtx
13783       || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
13784     return false;
13785 
13786   /* Skip conditional returns.  */
13787   if (ANY_RETURN_P (XEXP (SET_SRC (set), 1))
13788       && XEXP (SET_SRC (set), 2) == pc_rtx)
13789     return false;
13790 
13791   label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
13792 	       XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
13793 
13794   gcc_assert (GET_CODE (label_ref) == LABEL_REF);
13795 
13796   code_label = XEXP (label_ref, 0);
13797 
13798   if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
13799       || INSN_ADDRESSES (INSN_UID (insn)) == -1
13800       || (INSN_ADDRESSES (INSN_UID (insn))
13801 	  - INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE))
13802     return false;
13803 
13804   for (distance = 0, cur_insn = PREV_INSN (insn);
13805        distance < PREDICT_DISTANCE - 6;
13806        distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
13807     if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
13808       return false;
13809 
13810   rtx_code_label *new_label = gen_label_rtx ();
13811   uncond_jump = emit_jump_insn_after (
13812 		  gen_rtx_SET (pc_rtx,
13813 			       gen_rtx_LABEL_REF (VOIDmode, code_label)),
13814 		  insn);
13815   emit_label_after (new_label, uncond_jump);
13816 
13817   tmp = XEXP (SET_SRC (set), 1);
13818   XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
13819   XEXP (SET_SRC (set), 2) = tmp;
13820   INSN_CODE (insn) = -1;
13821 
13822   XEXP (label_ref, 0) = new_label;
13823   JUMP_LABEL (insn) = new_label;
13824   JUMP_LABEL (uncond_jump) = code_label;
13825 
13826   return true;
13827 }
13828 
13829 /* Returns 1 if INSN reads the value of REG for purposes not related
13830    to addressing of memory, and 0 otherwise.  */
13831 static int
s390_non_addr_reg_read_p(rtx reg,rtx_insn * insn)13832 s390_non_addr_reg_read_p (rtx reg, rtx_insn *insn)
13833 {
13834   return reg_referenced_p (reg, PATTERN (insn))
13835     && !reg_used_in_mem_p (REGNO (reg), PATTERN (insn));
13836 }
13837 
13838 /* Starting from INSN find_cond_jump looks downwards in the insn
13839    stream for a single jump insn which is the last user of the
13840    condition code set in INSN.  */
13841 static rtx_insn *
find_cond_jump(rtx_insn * insn)13842 find_cond_jump (rtx_insn *insn)
13843 {
13844   for (; insn; insn = NEXT_INSN (insn))
13845     {
13846       rtx ite, cc;
13847 
13848       if (LABEL_P (insn))
13849 	break;
13850 
13851       if (!JUMP_P (insn))
13852 	{
13853 	  if (reg_mentioned_p (gen_rtx_REG (CCmode, CC_REGNUM), insn))
13854 	    break;
13855 	  continue;
13856 	}
13857 
13858       /* This will be triggered by a return.  */
13859       if (GET_CODE (PATTERN (insn)) != SET)
13860 	break;
13861 
13862       gcc_assert (SET_DEST (PATTERN (insn)) == pc_rtx);
13863       ite = SET_SRC (PATTERN (insn));
13864 
13865       if (GET_CODE (ite) != IF_THEN_ELSE)
13866 	break;
13867 
13868       cc = XEXP (XEXP (ite, 0), 0);
13869       if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)))
13870 	break;
13871 
13872       if (find_reg_note (insn, REG_DEAD, cc))
13873 	return insn;
13874       break;
13875     }
13876 
13877   return NULL;
13878 }
13879 
13880 /* Swap the condition in COND and the operands in OP0 and OP1 so that
13881    the semantics does not change.  If NULL_RTX is passed as COND the
13882    function tries to find the conditional jump starting with INSN.  */
13883 static void
s390_swap_cmp(rtx cond,rtx * op0,rtx * op1,rtx_insn * insn)13884 s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx_insn *insn)
13885 {
13886   rtx tmp = *op0;
13887 
13888   if (cond == NULL_RTX)
13889     {
13890       rtx_insn *jump = find_cond_jump (NEXT_INSN (insn));
13891       rtx set = jump ? single_set (jump) : NULL_RTX;
13892 
13893       if (set == NULL_RTX)
13894 	return;
13895 
13896       cond = XEXP (SET_SRC (set), 0);
13897     }
13898 
13899   *op0 = *op1;
13900   *op1 = tmp;
13901   PUT_CODE (cond, swap_condition (GET_CODE (cond)));
13902 }
13903 
13904 /* On z10, instructions of the compare-and-branch family have the
13905    property to access the register occurring as second operand with
13906    its bits complemented.  If such a compare is grouped with a second
13907    instruction that accesses the same register non-complemented, and
13908    if that register's value is delivered via a bypass, then the
13909    pipeline recycles, thereby causing significant performance decline.
13910    This function locates such situations and exchanges the two
13911    operands of the compare.  The function return true whenever it
13912    added an insn.  */
13913 static bool
s390_z10_optimize_cmp(rtx_insn * insn)13914 s390_z10_optimize_cmp (rtx_insn *insn)
13915 {
13916   rtx_insn *prev_insn, *next_insn;
13917   bool insn_added_p = false;
13918   rtx cond, *op0, *op1;
13919 
13920   if (GET_CODE (PATTERN (insn)) == PARALLEL)
13921     {
13922       /* Handle compare and branch and branch on count
13923 	 instructions.  */
13924       rtx pattern = single_set (insn);
13925 
13926       if (!pattern
13927 	  || SET_DEST (pattern) != pc_rtx
13928 	  || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
13929 	return false;
13930 
13931       cond = XEXP (SET_SRC (pattern), 0);
13932       op0 = &XEXP (cond, 0);
13933       op1 = &XEXP (cond, 1);
13934     }
13935   else if (GET_CODE (PATTERN (insn)) == SET)
13936     {
13937       rtx src, dest;
13938 
13939       /* Handle normal compare instructions.  */
13940       src = SET_SRC (PATTERN (insn));
13941       dest = SET_DEST (PATTERN (insn));
13942 
13943       if (!REG_P (dest)
13944 	  || !CC_REGNO_P (REGNO (dest))
13945 	  || GET_CODE (src) != COMPARE)
13946 	return false;
13947 
13948       /* s390_swap_cmp will try to find the conditional
13949 	 jump when passing NULL_RTX as condition.  */
13950       cond = NULL_RTX;
13951       op0 = &XEXP (src, 0);
13952       op1 = &XEXP (src, 1);
13953     }
13954   else
13955     return false;
13956 
13957   if (!REG_P (*op0) || !REG_P (*op1))
13958     return false;
13959 
13960   if (GET_MODE_CLASS (GET_MODE (*op0)) != MODE_INT)
13961     return false;
13962 
13963   /* Swap the COMPARE arguments and its mask if there is a
13964      conflicting access in the previous insn.  */
13965   prev_insn = prev_active_insn (insn);
13966   if (prev_insn != NULL_RTX && INSN_P (prev_insn)
13967       && reg_referenced_p (*op1, PATTERN (prev_insn)))
13968     s390_swap_cmp (cond, op0, op1, insn);
13969 
13970   /* Check if there is a conflict with the next insn. If there
13971      was no conflict with the previous insn, then swap the
13972      COMPARE arguments and its mask.  If we already swapped
13973      the operands, or if swapping them would cause a conflict
13974      with the previous insn, issue a NOP after the COMPARE in
13975      order to separate the two instuctions.  */
13976   next_insn = next_active_insn (insn);
13977   if (next_insn != NULL_RTX && INSN_P (next_insn)
13978       && s390_non_addr_reg_read_p (*op1, next_insn))
13979     {
13980       if (prev_insn != NULL_RTX && INSN_P (prev_insn)
13981 	  && s390_non_addr_reg_read_p (*op0, prev_insn))
13982 	{
13983 	  if (REGNO (*op1) == 0)
13984 	    emit_insn_after (gen_nop_lr1 (), insn);
13985 	  else
13986 	    emit_insn_after (gen_nop_lr0 (), insn);
13987 	  insn_added_p = true;
13988 	}
13989       else
13990 	s390_swap_cmp (cond, op0, op1, insn);
13991     }
13992   return insn_added_p;
13993 }
13994 
13995 /* Number of INSNs to be scanned backward in the last BB of the loop
13996    and forward in the first BB of the loop.  This usually should be a
13997    bit more than the number of INSNs which could go into one
13998    group.  */
13999 #define S390_OSC_SCAN_INSN_NUM 5
14000 
14001 /* Scan LOOP for static OSC collisions and return true if a osc_break
14002    should be issued for this loop.  */
14003 static bool
s390_adjust_loop_scan_osc(struct loop * loop)14004 s390_adjust_loop_scan_osc (struct loop* loop)
14005 
14006 {
14007   HARD_REG_SET modregs, newregs;
14008   rtx_insn *insn, *store_insn = NULL;
14009   rtx set;
14010   struct s390_address addr_store, addr_load;
14011   subrtx_iterator::array_type array;
14012   int insn_count;
14013 
14014   CLEAR_HARD_REG_SET (modregs);
14015 
14016   insn_count = 0;
14017   FOR_BB_INSNS_REVERSE (loop->latch, insn)
14018     {
14019       if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14020 	continue;
14021 
14022       insn_count++;
14023       if (insn_count > S390_OSC_SCAN_INSN_NUM)
14024 	return false;
14025 
14026       find_all_hard_reg_sets (insn, &newregs, true);
14027       IOR_HARD_REG_SET (modregs, newregs);
14028 
14029       set = single_set (insn);
14030       if (!set)
14031 	continue;
14032 
14033       if (MEM_P (SET_DEST (set))
14034 	  && s390_decompose_address (XEXP (SET_DEST (set), 0), &addr_store))
14035 	{
14036 	  store_insn = insn;
14037 	  break;
14038 	}
14039     }
14040 
14041   if (store_insn == NULL_RTX)
14042     return false;
14043 
14044   insn_count = 0;
14045   FOR_BB_INSNS (loop->header, insn)
14046     {
14047       if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14048 	continue;
14049 
14050       if (insn == store_insn)
14051 	return false;
14052 
14053       insn_count++;
14054       if (insn_count > S390_OSC_SCAN_INSN_NUM)
14055 	return false;
14056 
14057       find_all_hard_reg_sets (insn, &newregs, true);
14058       IOR_HARD_REG_SET (modregs, newregs);
14059 
14060       set = single_set (insn);
14061       if (!set)
14062 	continue;
14063 
14064       /* An intermediate store disrupts static OSC checking
14065 	 anyway.  */
14066       if (MEM_P (SET_DEST (set))
14067 	  && s390_decompose_address (XEXP (SET_DEST (set), 0), NULL))
14068 	return false;
14069 
14070       FOR_EACH_SUBRTX (iter, array, SET_SRC (set), NONCONST)
14071 	if (MEM_P (*iter)
14072 	    && s390_decompose_address (XEXP (*iter, 0), &addr_load)
14073 	    && rtx_equal_p (addr_load.base, addr_store.base)
14074 	    && rtx_equal_p (addr_load.indx, addr_store.indx)
14075 	    && rtx_equal_p (addr_load.disp, addr_store.disp))
14076 	  {
14077 	    if ((addr_load.base != NULL_RTX
14078 		 && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.base)))
14079 		|| (addr_load.indx != NULL_RTX
14080 		    && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.indx))))
14081 	      return true;
14082 	  }
14083     }
14084   return false;
14085 }
14086 
14087 /* Look for adjustments which can be done on simple innermost
14088    loops.  */
14089 static void
s390_adjust_loops()14090 s390_adjust_loops ()
14091 {
14092   struct loop *loop = NULL;
14093 
14094   df_analyze ();
14095   compute_bb_for_insn ();
14096 
14097   /* Find the loops.  */
14098   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
14099 
14100   FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
14101     {
14102       if (dump_file)
14103 	{
14104 	  flow_loop_dump (loop, dump_file, NULL, 0);
14105 	  fprintf (dump_file, ";;  OSC loop scan Loop: ");
14106 	}
14107       if (loop->latch == NULL
14108 	  || pc_set (BB_END (loop->latch)) == NULL_RTX
14109 	  || !s390_adjust_loop_scan_osc (loop))
14110 	{
14111 	  if (dump_file)
14112 	    {
14113 	      if (loop->latch == NULL)
14114 		fprintf (dump_file, " muliple backward jumps\n");
14115 	      else
14116 		{
14117 		  fprintf (dump_file, " header insn: %d latch insn: %d ",
14118 			   INSN_UID (BB_HEAD (loop->header)),
14119 			   INSN_UID (BB_END (loop->latch)));
14120 		  if (pc_set (BB_END (loop->latch)) == NULL_RTX)
14121 		    fprintf (dump_file, " loop does not end with jump\n");
14122 		  else
14123 		    fprintf (dump_file, " not instrumented\n");
14124 		}
14125 	    }
14126 	}
14127       else
14128 	{
14129 	  rtx_insn *new_insn;
14130 
14131 	  if (dump_file)
14132 	    fprintf (dump_file, " adding OSC break insn: ");
14133 	  new_insn = emit_insn_before (gen_osc_break (),
14134 				       BB_END (loop->latch));
14135 	  INSN_ADDRESSES_NEW (new_insn, -1);
14136 	}
14137     }
14138 
14139   loop_optimizer_finalize ();
14140 
14141   df_finish_pass (false);
14142 }
14143 
14144 /* Perform machine-dependent processing.  */
14145 
14146 static void
s390_reorg(void)14147 s390_reorg (void)
14148 {
14149   struct constant_pool *pool;
14150   rtx_insn *insn;
14151   int hw_before, hw_after;
14152 
14153   if (s390_tune == PROCESSOR_2964_Z13)
14154     s390_adjust_loops ();
14155 
14156   /* Make sure all splits have been performed; splits after
14157      machine_dependent_reorg might confuse insn length counts.  */
14158   split_all_insns_noflow ();
14159 
14160   /* Install the main literal pool and the associated base
14161      register load insns.  The literal pool might be > 4096 bytes in
14162      size, so that some of its elements cannot be directly accessed.
14163 
14164      To fix this, we split the single literal pool into multiple
14165      pool chunks, reloading the pool base register at various
14166      points throughout the function to ensure it always points to
14167      the pool chunk the following code expects.  */
14168 
14169   /* Collect the literal pool.  */
14170   pool = s390_mainpool_start ();
14171   if (pool)
14172     {
14173       /* Finish up literal pool related changes.  */
14174       s390_mainpool_finish (pool);
14175     }
14176   else
14177     {
14178       /* If literal pool overflowed, chunkify it.  */
14179       pool = s390_chunkify_start ();
14180       s390_chunkify_finish (pool);
14181     }
14182 
14183   /* Generate out-of-pool execute target insns.  */
14184   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14185     {
14186       rtx label;
14187       rtx_insn *target;
14188 
14189       label = s390_execute_label (insn);
14190       if (!label)
14191 	continue;
14192 
14193       gcc_assert (label != const0_rtx);
14194 
14195       target = emit_label (XEXP (label, 0));
14196       INSN_ADDRESSES_NEW (target, -1);
14197 
14198       if (JUMP_P (insn))
14199 	{
14200 	  target = emit_jump_insn (s390_execute_target (insn));
14201 	  /* This is important in order to keep a table jump
14202 	     pointing at the jump table label.  Only this makes it
14203 	     being recognized as table jump.  */
14204 	  JUMP_LABEL (target) = JUMP_LABEL (insn);
14205 	}
14206       else
14207 	target = emit_insn (s390_execute_target (insn));
14208       INSN_ADDRESSES_NEW (target, -1);
14209     }
14210 
14211   /* Try to optimize prologue and epilogue further.  */
14212   s390_optimize_prologue ();
14213 
14214   /* Walk over the insns and do some >=z10 specific changes.  */
14215   if (s390_tune >= PROCESSOR_2097_Z10)
14216     {
14217       rtx_insn *insn;
14218       bool insn_added_p = false;
14219 
14220       /* The insn lengths and addresses have to be up to date for the
14221 	 following manipulations.  */
14222       shorten_branches (get_insns ());
14223 
14224       for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14225 	{
14226 	  if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14227 	    continue;
14228 
14229 	  if (JUMP_P (insn))
14230 	    insn_added_p |= s390_fix_long_loop_prediction (insn);
14231 
14232 	  if ((GET_CODE (PATTERN (insn)) == PARALLEL
14233 	       || GET_CODE (PATTERN (insn)) == SET)
14234 	      && s390_tune == PROCESSOR_2097_Z10)
14235 	    insn_added_p |= s390_z10_optimize_cmp (insn);
14236 	}
14237 
14238       /* Adjust branches if we added new instructions.  */
14239       if (insn_added_p)
14240 	shorten_branches (get_insns ());
14241     }
14242 
14243   s390_function_num_hotpatch_hw (current_function_decl, &hw_before, &hw_after);
14244   if (hw_after > 0)
14245     {
14246       rtx_insn *insn;
14247 
14248       /* Insert NOPs for hotpatching. */
14249       for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14250 	/* Emit NOPs
14251 	    1. inside the area covered by debug information to allow setting
14252 	       breakpoints at the NOPs,
14253 	    2. before any insn which results in an asm instruction,
14254 	    3. before in-function labels to avoid jumping to the NOPs, for
14255 	       example as part of a loop,
14256 	    4. before any barrier in case the function is completely empty
14257 	       (__builtin_unreachable ()) and has neither internal labels nor
14258 	       active insns.
14259 	*/
14260 	if (active_insn_p (insn) || BARRIER_P (insn) || LABEL_P (insn))
14261 	  break;
14262       /* Output a series of NOPs before the first active insn.  */
14263       while (insn && hw_after > 0)
14264 	{
14265 	  if (hw_after >= 3)
14266 	    {
14267 	      emit_insn_before (gen_nop_6_byte (), insn);
14268 	      hw_after -= 3;
14269 	    }
14270 	  else if (hw_after >= 2)
14271 	    {
14272 	      emit_insn_before (gen_nop_4_byte (), insn);
14273 	      hw_after -= 2;
14274 	    }
14275 	  else
14276 	    {
14277 	      emit_insn_before (gen_nop_2_byte (), insn);
14278 	      hw_after -= 1;
14279 	    }
14280 	}
14281     }
14282 }
14283 
14284 /* Return true if INSN is a fp load insn writing register REGNO.  */
14285 static inline bool
s390_fpload_toreg(rtx_insn * insn,unsigned int regno)14286 s390_fpload_toreg (rtx_insn *insn, unsigned int regno)
14287 {
14288   rtx set;
14289   enum attr_type flag = s390_safe_attr_type (insn);
14290 
14291   if (flag != TYPE_FLOADSF && flag != TYPE_FLOADDF)
14292     return false;
14293 
14294   set = single_set (insn);
14295 
14296   if (set == NULL_RTX)
14297     return false;
14298 
14299   if (!REG_P (SET_DEST (set)) || !MEM_P (SET_SRC (set)))
14300     return false;
14301 
14302   if (REGNO (SET_DEST (set)) != regno)
14303     return false;
14304 
14305   return true;
14306 }
14307 
14308 /* This value describes the distance to be avoided between an
14309    arithmetic fp instruction and an fp load writing the same register.
14310    Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
14311    fine but the exact value has to be avoided. Otherwise the FP
14312    pipeline will throw an exception causing a major penalty.  */
14313 #define Z10_EARLYLOAD_DISTANCE 7
14314 
14315 /* Rearrange the ready list in order to avoid the situation described
14316    for Z10_EARLYLOAD_DISTANCE.  A problematic load instruction is
14317    moved to the very end of the ready list.  */
14318 static void
s390_z10_prevent_earlyload_conflicts(rtx_insn ** ready,int * nready_p)14319 s390_z10_prevent_earlyload_conflicts (rtx_insn **ready, int *nready_p)
14320 {
14321   unsigned int regno;
14322   int nready = *nready_p;
14323   rtx_insn *tmp;
14324   int i;
14325   rtx_insn *insn;
14326   rtx set;
14327   enum attr_type flag;
14328   int distance;
14329 
14330   /* Skip DISTANCE - 1 active insns.  */
14331   for (insn = last_scheduled_insn, distance = Z10_EARLYLOAD_DISTANCE - 1;
14332        distance > 0 && insn != NULL_RTX;
14333        distance--, insn = prev_active_insn (insn))
14334     if (CALL_P (insn) || JUMP_P (insn))
14335       return;
14336 
14337   if (insn == NULL_RTX)
14338     return;
14339 
14340   set = single_set (insn);
14341 
14342   if (set == NULL_RTX || !REG_P (SET_DEST (set))
14343       || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) != MODE_FLOAT)
14344     return;
14345 
14346   flag = s390_safe_attr_type (insn);
14347 
14348   if (flag == TYPE_FLOADSF || flag == TYPE_FLOADDF)
14349     return;
14350 
14351   regno = REGNO (SET_DEST (set));
14352   i = nready - 1;
14353 
14354   while (!s390_fpload_toreg (ready[i], regno) && i > 0)
14355     i--;
14356 
14357   if (!i)
14358     return;
14359 
14360   tmp = ready[i];
14361   memmove (&ready[1], &ready[0], sizeof (rtx_insn *) * i);
14362   ready[0] = tmp;
14363 }
14364 
14365 /* Returns TRUE if BB is entered via a fallthru edge and all other
14366    incoming edges are less than likely.  */
14367 static bool
s390_bb_fallthru_entry_likely(basic_block bb)14368 s390_bb_fallthru_entry_likely (basic_block bb)
14369 {
14370   edge e, fallthru_edge;
14371   edge_iterator ei;
14372 
14373   if (!bb)
14374     return false;
14375 
14376   fallthru_edge = find_fallthru_edge (bb->preds);
14377   if (!fallthru_edge)
14378     return false;
14379 
14380   FOR_EACH_EDGE (e, ei, bb->preds)
14381     if (e != fallthru_edge
14382 	&& e->probability >= profile_probability::likely ())
14383       return false;
14384 
14385   return true;
14386 }
14387 
14388 struct s390_sched_state
14389 {
14390   /* Number of insns in the group.  */
14391   int group_state;
14392   /* Execution side of the group.  */
14393   int side;
14394   /* Group can only hold two insns.  */
14395   bool group_of_two;
14396 } s390_sched_state;
14397 
14398 static struct s390_sched_state sched_state = {0, 1, false};
14399 
14400 #define S390_SCHED_ATTR_MASK_CRACKED    0x1
14401 #define S390_SCHED_ATTR_MASK_EXPANDED   0x2
14402 #define S390_SCHED_ATTR_MASK_ENDGROUP   0x4
14403 #define S390_SCHED_ATTR_MASK_GROUPALONE 0x8
14404 #define S390_SCHED_ATTR_MASK_GROUPOFTWO 0x10
14405 
14406 static unsigned int
s390_get_sched_attrmask(rtx_insn * insn)14407 s390_get_sched_attrmask (rtx_insn *insn)
14408 {
14409   unsigned int mask = 0;
14410 
14411   switch (s390_tune)
14412     {
14413     case PROCESSOR_2827_ZEC12:
14414       if (get_attr_zEC12_cracked (insn))
14415 	mask |= S390_SCHED_ATTR_MASK_CRACKED;
14416       if (get_attr_zEC12_expanded (insn))
14417 	mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14418       if (get_attr_zEC12_endgroup (insn))
14419 	mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14420       if (get_attr_zEC12_groupalone (insn))
14421 	mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14422       break;
14423     case PROCESSOR_2964_Z13:
14424       if (get_attr_z13_cracked (insn))
14425 	mask |= S390_SCHED_ATTR_MASK_CRACKED;
14426       if (get_attr_z13_expanded (insn))
14427 	mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14428       if (get_attr_z13_endgroup (insn))
14429 	mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14430       if (get_attr_z13_groupalone (insn))
14431 	mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14432       if (get_attr_z13_groupoftwo (insn))
14433 	mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
14434       break;
14435     case PROCESSOR_3906_Z14:
14436       if (get_attr_z14_cracked (insn))
14437 	mask |= S390_SCHED_ATTR_MASK_CRACKED;
14438       if (get_attr_z14_expanded (insn))
14439 	mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14440       if (get_attr_z14_endgroup (insn))
14441 	mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14442       if (get_attr_z14_groupalone (insn))
14443 	mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14444       if (get_attr_z14_groupoftwo (insn))
14445 	mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
14446       break;
14447     case PROCESSOR_8561_Z15:
14448       if (get_attr_z15_cracked (insn))
14449 	mask |= S390_SCHED_ATTR_MASK_CRACKED;
14450       if (get_attr_z15_expanded (insn))
14451 	mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14452       if (get_attr_z15_endgroup (insn))
14453 	mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14454       if (get_attr_z15_groupalone (insn))
14455 	mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14456       if (get_attr_z15_groupoftwo (insn))
14457 	mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
14458       break;
14459     default:
14460       gcc_unreachable ();
14461     }
14462   return mask;
14463 }
14464 
14465 static unsigned int
s390_get_unit_mask(rtx_insn * insn,int * units)14466 s390_get_unit_mask (rtx_insn *insn, int *units)
14467 {
14468   unsigned int mask = 0;
14469 
14470   switch (s390_tune)
14471     {
14472     case PROCESSOR_2964_Z13:
14473       *units = 4;
14474       if (get_attr_z13_unit_lsu (insn))
14475 	mask |= 1 << 0;
14476       if (get_attr_z13_unit_fxa (insn))
14477 	mask |= 1 << 1;
14478       if (get_attr_z13_unit_fxb (insn))
14479 	mask |= 1 << 2;
14480       if (get_attr_z13_unit_vfu (insn))
14481 	mask |= 1 << 3;
14482       break;
14483     case PROCESSOR_3906_Z14:
14484       *units = 4;
14485       if (get_attr_z14_unit_lsu (insn))
14486 	mask |= 1 << 0;
14487       if (get_attr_z14_unit_fxa (insn))
14488 	mask |= 1 << 1;
14489       if (get_attr_z14_unit_fxb (insn))
14490 	mask |= 1 << 2;
14491       if (get_attr_z14_unit_vfu (insn))
14492 	mask |= 1 << 3;
14493       break;
14494     case PROCESSOR_8561_Z15:
14495       *units = 4;
14496       if (get_attr_z15_unit_lsu (insn))
14497 	mask |= 1 << 0;
14498       if (get_attr_z15_unit_fxa (insn))
14499 	mask |= 1 << 1;
14500       if (get_attr_z15_unit_fxb (insn))
14501 	mask |= 1 << 2;
14502       if (get_attr_z15_unit_vfu (insn))
14503 	mask |= 1 << 3;
14504       break;
14505     default:
14506       gcc_unreachable ();
14507     }
14508   return mask;
14509 }
14510 
14511 static bool
s390_is_fpd(rtx_insn * insn)14512 s390_is_fpd (rtx_insn *insn)
14513 {
14514   if (insn == NULL_RTX)
14515     return false;
14516 
14517   return get_attr_z13_unit_fpd (insn) || get_attr_z14_unit_fpd (insn)
14518     || get_attr_z15_unit_fpd (insn);
14519 }
14520 
14521 static bool
s390_is_fxd(rtx_insn * insn)14522 s390_is_fxd (rtx_insn *insn)
14523 {
14524   if (insn == NULL_RTX)
14525     return false;
14526 
14527   return get_attr_z13_unit_fxd (insn) || get_attr_z14_unit_fxd (insn)
14528     || get_attr_z15_unit_fxd (insn);
14529 }
14530 
14531 /* Returns TRUE if INSN is a long-running instruction.  */
14532 static bool
s390_is_longrunning(rtx_insn * insn)14533 s390_is_longrunning (rtx_insn *insn)
14534 {
14535   if (insn == NULL_RTX)
14536     return false;
14537 
14538   return s390_is_fxd (insn) || s390_is_fpd (insn);
14539 }
14540 
14541 
14542 /* Return the scheduling score for INSN.  The higher the score the
14543    better.  The score is calculated from the OOO scheduling attributes
14544    of INSN and the scheduling state sched_state.  */
14545 static int
s390_sched_score(rtx_insn * insn)14546 s390_sched_score (rtx_insn *insn)
14547 {
14548   unsigned int mask = s390_get_sched_attrmask (insn);
14549   int score = 0;
14550 
14551   switch (sched_state.group_state)
14552     {
14553     case 0:
14554       /* Try to put insns into the first slot which would otherwise
14555 	 break a group.  */
14556       if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14557 	  || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
14558 	score += 5;
14559       if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
14560 	score += 10;
14561       break;
14562     case 1:
14563       /* Prefer not cracked insns while trying to put together a
14564 	 group.  */
14565       if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14566 	  && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
14567 	  && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
14568 	score += 10;
14569       if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) == 0)
14570 	score += 5;
14571       /* If we are in a group of two already, try to schedule another
14572 	 group-of-two insn to avoid shortening another group.  */
14573       if (sched_state.group_of_two
14574 	  && (mask & S390_SCHED_ATTR_MASK_GROUPOFTWO) != 0)
14575 	score += 15;
14576       break;
14577     case 2:
14578       /* Prefer not cracked insns while trying to put together a
14579 	 group.  */
14580       if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14581 	  && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
14582 	  && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
14583 	score += 10;
14584       /* Prefer endgroup insns in the last slot.  */
14585       if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0)
14586 	score += 10;
14587       /* Try to avoid group-of-two insns in the last slot as they will
14588 	 shorten this group as well as the next one.  */
14589       if ((mask & S390_SCHED_ATTR_MASK_GROUPOFTWO) != 0)
14590 	score = MAX (0, score - 15);
14591       break;
14592     }
14593 
14594   if (s390_tune >= PROCESSOR_2964_Z13)
14595     {
14596       int units, i;
14597       unsigned unit_mask, m = 1;
14598 
14599       unit_mask = s390_get_unit_mask (insn, &units);
14600       gcc_assert (units <= MAX_SCHED_UNITS);
14601 
14602       /* Add a score in range 0..MAX_SCHED_MIX_SCORE depending on how long
14603 	 ago the last insn of this unit type got scheduled.  This is
14604 	 supposed to help providing a proper instruction mix to the
14605 	 CPU.  */
14606       for (i = 0; i < units; i++, m <<= 1)
14607 	if (m & unit_mask)
14608 	  score += (last_scheduled_unit_distance[i][sched_state.side]
14609 	      * MAX_SCHED_MIX_SCORE / MAX_SCHED_MIX_DISTANCE);
14610 
14611       int other_side = 1 - sched_state.side;
14612 
14613       /* Try to delay long-running insns when side is busy.  */
14614       if (s390_is_longrunning (insn))
14615 	{
14616 	  if (s390_is_fxd (insn))
14617 	    {
14618 	      if (fxd_longrunning[sched_state.side]
14619 		  && fxd_longrunning[other_side]
14620 		  <= fxd_longrunning[sched_state.side])
14621 		score = MAX (0, score - 10);
14622 
14623 	      else if (fxd_longrunning[other_side]
14624 		  >= fxd_longrunning[sched_state.side])
14625 		score += 10;
14626 	    }
14627 
14628 	  if (s390_is_fpd (insn))
14629 	    {
14630 	      if (fpd_longrunning[sched_state.side]
14631 		  && fpd_longrunning[other_side]
14632 		  <= fpd_longrunning[sched_state.side])
14633 		score = MAX (0, score - 10);
14634 
14635 	      else if (fpd_longrunning[other_side]
14636 		  >= fpd_longrunning[sched_state.side])
14637 		score += 10;
14638 	    }
14639 	}
14640     }
14641 
14642   return score;
14643 }
14644 
14645 /* This function is called via hook TARGET_SCHED_REORDER before
14646    issuing one insn from list READY which contains *NREADYP entries.
14647    For target z10 it reorders load instructions to avoid early load
14648    conflicts in the floating point pipeline  */
14649 static int
s390_sched_reorder(FILE * file,int verbose,rtx_insn ** ready,int * nreadyp,int clock ATTRIBUTE_UNUSED)14650 s390_sched_reorder (FILE *file, int verbose,
14651 		    rtx_insn **ready, int *nreadyp, int clock ATTRIBUTE_UNUSED)
14652 {
14653   if (s390_tune == PROCESSOR_2097_Z10
14654       && reload_completed
14655       && *nreadyp > 1)
14656     s390_z10_prevent_earlyload_conflicts (ready, nreadyp);
14657 
14658   if (s390_tune >= PROCESSOR_2827_ZEC12
14659       && reload_completed
14660       && *nreadyp > 1)
14661     {
14662       int i;
14663       int last_index = *nreadyp - 1;
14664       int max_index = -1;
14665       int max_score = -1;
14666       rtx_insn *tmp;
14667 
14668       /* Just move the insn with the highest score to the top (the
14669 	 end) of the list.  A full sort is not needed since a conflict
14670 	 in the hazard recognition cannot happen.  So the top insn in
14671 	 the ready list will always be taken.  */
14672       for (i = last_index; i >= 0; i--)
14673 	{
14674 	  int score;
14675 
14676 	  if (recog_memoized (ready[i]) < 0)
14677 	    continue;
14678 
14679 	  score = s390_sched_score (ready[i]);
14680 	  if (score > max_score)
14681 	    {
14682 	      max_score = score;
14683 	      max_index = i;
14684 	    }
14685 	}
14686 
14687       if (max_index != -1)
14688 	{
14689 	  if (max_index != last_index)
14690 	    {
14691 	      tmp = ready[max_index];
14692 	      ready[max_index] = ready[last_index];
14693 	      ready[last_index] = tmp;
14694 
14695 	      if (verbose > 5)
14696 		fprintf (file,
14697 			 ";;\t\tBACKEND: move insn %d to the top of list\n",
14698 			 INSN_UID (ready[last_index]));
14699 	    }
14700 	  else if (verbose > 5)
14701 	    fprintf (file,
14702 		     ";;\t\tBACKEND: best insn %d already on top\n",
14703 		     INSN_UID (ready[last_index]));
14704 	}
14705 
14706       if (verbose > 5)
14707 	{
14708 	  fprintf (file, "ready list ooo attributes - sched state: %d\n",
14709 		   sched_state.group_state);
14710 
14711 	  for (i = last_index; i >= 0; i--)
14712 	    {
14713 	      unsigned int sched_mask;
14714 	      rtx_insn *insn = ready[i];
14715 
14716 	      if (recog_memoized (insn) < 0)
14717 		continue;
14718 
14719 	      sched_mask = s390_get_sched_attrmask (insn);
14720 	      fprintf (file, ";;\t\tBACKEND: insn %d score: %d: ",
14721 		       INSN_UID (insn),
14722 		       s390_sched_score (insn));
14723 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ",\
14724 					   ((M) & sched_mask) ? #ATTR : "");
14725 	      PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
14726 	      PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
14727 	      PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
14728 	      PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
14729 #undef PRINT_SCHED_ATTR
14730 	      if (s390_tune >= PROCESSOR_2964_Z13)
14731 		{
14732 		  unsigned int unit_mask, m = 1;
14733 		  int units, j;
14734 
14735 		  unit_mask  = s390_get_unit_mask (insn, &units);
14736 		  fprintf (file, "(units:");
14737 		  for (j = 0; j < units; j++, m <<= 1)
14738 		    if (m & unit_mask)
14739 		      fprintf (file, " u%d", j);
14740 		  fprintf (file, ")");
14741 		}
14742 	      fprintf (file, "\n");
14743 	    }
14744 	}
14745     }
14746 
14747   return s390_issue_rate ();
14748 }
14749 
14750 
14751 /* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
14752    the scheduler has issued INSN.  It stores the last issued insn into
14753    last_scheduled_insn in order to make it available for
14754    s390_sched_reorder.  */
14755 static int
s390_sched_variable_issue(FILE * file,int verbose,rtx_insn * insn,int more)14756 s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
14757 {
14758   last_scheduled_insn = insn;
14759 
14760   bool ends_group = false;
14761 
14762   if (s390_tune >= PROCESSOR_2827_ZEC12
14763       && reload_completed
14764       && recog_memoized (insn) >= 0)
14765     {
14766       unsigned int mask = s390_get_sched_attrmask (insn);
14767 
14768       if ((mask & S390_SCHED_ATTR_MASK_GROUPOFTWO) != 0)
14769 	sched_state.group_of_two = true;
14770 
14771       /* If this is a group-of-two insn, we actually ended the last group
14772 	 and this insn is the first one of the new group.  */
14773       if (sched_state.group_state == 2 && sched_state.group_of_two)
14774 	{
14775 	  sched_state.side = sched_state.side ? 0 : 1;
14776 	  sched_state.group_state = 0;
14777 	}
14778 
14779       /* Longrunning and side bookkeeping.  */
14780       for (int i = 0; i < 2; i++)
14781 	{
14782 	  fxd_longrunning[i] = MAX (0, fxd_longrunning[i] - 1);
14783 	  fpd_longrunning[i] = MAX (0, fpd_longrunning[i] - 1);
14784 	}
14785 
14786       unsigned latency = insn_default_latency (insn);
14787       if (s390_is_longrunning (insn))
14788 	{
14789 	  if (s390_is_fxd (insn))
14790 	    fxd_longrunning[sched_state.side] = latency;
14791 	  else
14792 	    fpd_longrunning[sched_state.side] = latency;
14793 	}
14794 
14795       if (s390_tune >= PROCESSOR_2964_Z13)
14796 	{
14797 	  int units, i;
14798 	  unsigned unit_mask, m = 1;
14799 
14800 	  unit_mask = s390_get_unit_mask (insn, &units);
14801 	  gcc_assert (units <= MAX_SCHED_UNITS);
14802 
14803 	  for (i = 0; i < units; i++, m <<= 1)
14804 	    if (m & unit_mask)
14805 	      last_scheduled_unit_distance[i][sched_state.side] = 0;
14806 	    else if (last_scheduled_unit_distance[i][sched_state.side]
14807 		< MAX_SCHED_MIX_DISTANCE)
14808 	      last_scheduled_unit_distance[i][sched_state.side]++;
14809 	}
14810 
14811       if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14812 	  || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0
14813 	  || (mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0
14814 	  || (mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0)
14815 	{
14816 	  sched_state.group_state = 0;
14817 	  ends_group = true;
14818 	}
14819       else
14820 	{
14821 	  switch (sched_state.group_state)
14822 	    {
14823 	    case 0:
14824 	      sched_state.group_state++;
14825 	      break;
14826 	    case 1:
14827 	      sched_state.group_state++;
14828 	      if (sched_state.group_of_two)
14829 		{
14830 		  sched_state.group_state = 0;
14831 		  ends_group = true;
14832 		}
14833 	      break;
14834 	    case 2:
14835 	      sched_state.group_state++;
14836 	      ends_group = true;
14837 	      break;
14838 	    }
14839 	}
14840 
14841       if (verbose > 5)
14842 	{
14843 	  unsigned int sched_mask;
14844 
14845 	  sched_mask = s390_get_sched_attrmask (insn);
14846 
14847 	  fprintf (file, ";;\t\tBACKEND: insn %d: ", INSN_UID (insn));
14848 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ", ((M) & sched_mask) ? #ATTR : "");
14849 	  PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
14850 	  PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
14851 	  PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
14852 	  PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
14853 #undef PRINT_SCHED_ATTR
14854 
14855 	  if (s390_tune >= PROCESSOR_2964_Z13)
14856 	    {
14857 	      unsigned int unit_mask, m = 1;
14858 	      int units, j;
14859 
14860 	      unit_mask  = s390_get_unit_mask (insn, &units);
14861 	      fprintf (file, "(units:");
14862 	      for (j = 0; j < units; j++, m <<= 1)
14863 		if (m & unit_mask)
14864 		  fprintf (file, " %d", j);
14865 	      fprintf (file, ")");
14866 	    }
14867 	  fprintf (file, " sched state: %d\n", sched_state.group_state);
14868 
14869 	  if (s390_tune >= PROCESSOR_2964_Z13)
14870 	    {
14871 	      int units, j;
14872 
14873 	      s390_get_unit_mask (insn, &units);
14874 
14875 	      fprintf (file, ";;\t\tBACKEND: units on this side unused for: ");
14876 	      for (j = 0; j < units; j++)
14877 		fprintf (file, "%d:%d ", j,
14878 		    last_scheduled_unit_distance[j][sched_state.side]);
14879 	      fprintf (file, "\n");
14880 	    }
14881 	}
14882 
14883       /* If this insn ended a group, the next will be on the other side.  */
14884       if (ends_group)
14885 	{
14886 	  sched_state.group_state = 0;
14887 	  sched_state.side = sched_state.side ? 0 : 1;
14888 	  sched_state.group_of_two = false;
14889 	}
14890     }
14891 
14892   if (GET_CODE (PATTERN (insn)) != USE
14893       && GET_CODE (PATTERN (insn)) != CLOBBER)
14894     return more - 1;
14895   else
14896     return more;
14897 }
14898 
14899 static void
s390_sched_init(FILE * file ATTRIBUTE_UNUSED,int verbose ATTRIBUTE_UNUSED,int max_ready ATTRIBUTE_UNUSED)14900 s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
14901 		 int verbose ATTRIBUTE_UNUSED,
14902 		 int max_ready ATTRIBUTE_UNUSED)
14903 {
14904   /* If the next basic block is most likely entered via a fallthru edge
14905      we keep the last sched state.  Otherwise we start a new group.
14906      The scheduler traverses basic blocks in "instruction stream" ordering
14907      so if we see a fallthru edge here, sched_state will be of its
14908      source block.
14909 
14910      current_sched_info->prev_head is the insn before the first insn of the
14911      block of insns to be scheduled.
14912      */
14913   rtx_insn *insn = current_sched_info->prev_head
14914     ? NEXT_INSN (current_sched_info->prev_head) : NULL;
14915   basic_block bb = insn ? BLOCK_FOR_INSN (insn) : NULL;
14916   if (s390_tune < PROCESSOR_2964_Z13 || !s390_bb_fallthru_entry_likely (bb))
14917     {
14918       last_scheduled_insn = NULL;
14919       memset (last_scheduled_unit_distance, 0,
14920 	  MAX_SCHED_UNITS * NUM_SIDES * sizeof (int));
14921       sched_state.group_state = 0;
14922       sched_state.group_of_two = false;
14923     }
14924 }
14925 
14926 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
14927    a new number struct loop *loop should be unrolled if tuned for cpus with
14928    a built-in stride prefetcher.
14929    The loop is analyzed for memory accesses by calling check_dpu for
14930    each rtx of the loop. Depending on the loop_depth and the amount of
14931    memory accesses a new number <=nunroll is returned to improve the
14932    behavior of the hardware prefetch unit.  */
14933 static unsigned
s390_loop_unroll_adjust(unsigned nunroll,struct loop * loop)14934 s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
14935 {
14936   basic_block *bbs;
14937   rtx_insn *insn;
14938   unsigned i;
14939   unsigned mem_count = 0;
14940 
14941   if (s390_tune < PROCESSOR_2097_Z10)
14942     return nunroll;
14943 
14944   /* Count the number of memory references within the loop body.  */
14945   bbs = get_loop_body (loop);
14946   subrtx_iterator::array_type array;
14947   for (i = 0; i < loop->num_nodes; i++)
14948     FOR_BB_INSNS (bbs[i], insn)
14949       if (INSN_P (insn) && INSN_CODE (insn) != -1)
14950 	{
14951 	  rtx set;
14952 
14953 	  /* The runtime of small loops with memory block operations
14954 	     will be determined by the memory operation.  Doing
14955 	     unrolling doesn't help here.  Measurements to confirm
14956 	     this where only done on recent CPU levels.  So better do
14957 	     not change anything for older CPUs.  */
14958 	  if (s390_tune >= PROCESSOR_2964_Z13
14959 	      && loop->ninsns <= BLOCK_MEM_OPS_LOOP_INSNS
14960 	      && ((set = single_set (insn)) != NULL_RTX)
14961 	      && ((GET_MODE (SET_DEST (set)) == BLKmode
14962 		   && (GET_MODE (SET_SRC (set)) == BLKmode
14963 		       || SET_SRC (set) == const0_rtx))
14964 		  || (GET_CODE (SET_SRC (set)) == COMPARE
14965 		      && GET_MODE (XEXP (SET_SRC (set), 0)) == BLKmode
14966 		      && GET_MODE (XEXP (SET_SRC (set), 1)) == BLKmode)))
14967 	    return 1;
14968 
14969 	  FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
14970 	    if (MEM_P (*iter))
14971 	      mem_count += 1;
14972 	}
14973   free (bbs);
14974 
14975   /* Prevent division by zero, and we do not need to adjust nunroll in this case.  */
14976   if (mem_count == 0)
14977     return nunroll;
14978 
14979   switch (loop_depth(loop))
14980     {
14981     case 1:
14982       return MIN (nunroll, 28 / mem_count);
14983     case 2:
14984       return MIN (nunroll, 22 / mem_count);
14985     default:
14986       return MIN (nunroll, 16 / mem_count);
14987     }
14988 }
14989 
14990 /* Restore the current options.  This is a hook function and also called
14991    internally.  */
14992 
14993 static void
s390_function_specific_restore(struct gcc_options * opts,struct cl_target_option * ptr ATTRIBUTE_UNUSED)14994 s390_function_specific_restore (struct gcc_options *opts,
14995 				struct cl_target_option *ptr ATTRIBUTE_UNUSED)
14996 {
14997   opts->x_s390_cost_pointer = (long)processor_table[opts->x_s390_tune].cost;
14998 }
14999 
15000 static void
s390_default_align(struct gcc_options * opts)15001 s390_default_align (struct gcc_options *opts)
15002 {
15003   /* Set the default function alignment to 16 in order to get rid of
15004      some unwanted performance effects. */
15005   if (opts->x_flag_align_functions && !opts->x_str_align_functions
15006       && opts->x_s390_tune >= PROCESSOR_2964_Z13)
15007     opts->x_str_align_functions = "16";
15008 }
15009 
15010 static void
s390_override_options_after_change(void)15011 s390_override_options_after_change (void)
15012 {
15013   s390_default_align (&global_options);
15014 }
15015 
15016 static void
s390_option_override_internal(struct gcc_options * opts,const struct gcc_options * opts_set)15017 s390_option_override_internal (struct gcc_options *opts,
15018 			       const struct gcc_options *opts_set)
15019 {
15020   /* Architecture mode defaults according to ABI.  */
15021   if (!(opts_set->x_target_flags & MASK_ZARCH))
15022     {
15023       if (TARGET_64BIT)
15024 	opts->x_target_flags |= MASK_ZARCH;
15025       else
15026 	opts->x_target_flags &= ~MASK_ZARCH;
15027     }
15028 
15029   /* Set the march default in case it hasn't been specified on cmdline.  */
15030   if (!opts_set->x_s390_arch)
15031     opts->x_s390_arch = PROCESSOR_2064_Z900;
15032 
15033   opts->x_s390_arch_flags = processor_flags_table[(int) opts->x_s390_arch];
15034 
15035   /* Determine processor to tune for.  */
15036   if (!opts_set->x_s390_tune)
15037     opts->x_s390_tune = opts->x_s390_arch;
15038 
15039   opts->x_s390_tune_flags = processor_flags_table[opts->x_s390_tune];
15040 
15041   /* Sanity checks.  */
15042   if (opts->x_s390_arch == PROCESSOR_NATIVE
15043       || opts->x_s390_tune == PROCESSOR_NATIVE)
15044     gcc_unreachable ();
15045   if (TARGET_64BIT && !TARGET_ZARCH_P (opts->x_target_flags))
15046     error ("64-bit ABI not supported in ESA/390 mode");
15047 
15048   if (opts->x_s390_indirect_branch == indirect_branch_thunk_inline
15049       || opts->x_s390_indirect_branch_call == indirect_branch_thunk_inline
15050       || opts->x_s390_function_return == indirect_branch_thunk_inline
15051       || opts->x_s390_function_return_reg == indirect_branch_thunk_inline
15052       || opts->x_s390_function_return_mem == indirect_branch_thunk_inline)
15053     error ("thunk-inline is only supported with %<-mindirect-branch-jump%>");
15054 
15055   if (opts->x_s390_indirect_branch != indirect_branch_keep)
15056     {
15057       if (!opts_set->x_s390_indirect_branch_call)
15058 	opts->x_s390_indirect_branch_call = opts->x_s390_indirect_branch;
15059 
15060       if (!opts_set->x_s390_indirect_branch_jump)
15061 	opts->x_s390_indirect_branch_jump = opts->x_s390_indirect_branch;
15062     }
15063 
15064   if (opts->x_s390_function_return != indirect_branch_keep)
15065     {
15066       if (!opts_set->x_s390_function_return_reg)
15067 	opts->x_s390_function_return_reg = opts->x_s390_function_return;
15068 
15069       if (!opts_set->x_s390_function_return_mem)
15070 	opts->x_s390_function_return_mem = opts->x_s390_function_return;
15071     }
15072 
15073   /* Enable hardware transactions if available and not explicitly
15074      disabled by user.  E.g. with -m31 -march=zEC12 -mzarch */
15075   if (!TARGET_OPT_HTM_P (opts_set->x_target_flags))
15076     {
15077       if (TARGET_CPU_HTM_P (opts) && TARGET_ZARCH_P (opts->x_target_flags))
15078 	opts->x_target_flags |= MASK_OPT_HTM;
15079       else
15080 	opts->x_target_flags &= ~MASK_OPT_HTM;
15081     }
15082 
15083   if (TARGET_OPT_VX_P (opts_set->x_target_flags))
15084     {
15085       if (TARGET_OPT_VX_P (opts->x_target_flags))
15086 	{
15087 	  if (!TARGET_CPU_VX_P (opts))
15088 	    error ("hardware vector support not available on %s",
15089 		   processor_table[(int)opts->x_s390_arch].name);
15090 	  if (TARGET_SOFT_FLOAT_P (opts->x_target_flags))
15091 	    error ("hardware vector support not available with "
15092 		   "%<-msoft-float%>");
15093 	}
15094     }
15095   else
15096     {
15097       if (TARGET_CPU_VX_P (opts))
15098 	/* Enable vector support if available and not explicitly disabled
15099 	   by user.  E.g. with -m31 -march=z13 -mzarch */
15100 	opts->x_target_flags |= MASK_OPT_VX;
15101       else
15102 	opts->x_target_flags &= ~MASK_OPT_VX;
15103     }
15104 
15105   /* Use hardware DFP if available and not explicitly disabled by
15106      user. E.g. with -m31 -march=z10 -mzarch   */
15107   if (!TARGET_HARD_DFP_P (opts_set->x_target_flags))
15108     {
15109       if (TARGET_DFP_P (opts))
15110 	opts->x_target_flags |= MASK_HARD_DFP;
15111       else
15112 	opts->x_target_flags &= ~MASK_HARD_DFP;
15113     }
15114 
15115   if (TARGET_HARD_DFP_P (opts->x_target_flags) && !TARGET_DFP_P (opts))
15116     {
15117       if (TARGET_HARD_DFP_P (opts_set->x_target_flags))
15118 	{
15119 	  if (!TARGET_CPU_DFP_P (opts))
15120 	    error ("hardware decimal floating point instructions"
15121 		   " not available on %s",
15122 		   processor_table[(int)opts->x_s390_arch].name);
15123 	  if (!TARGET_ZARCH_P (opts->x_target_flags))
15124 	    error ("hardware decimal floating point instructions"
15125 		   " not available in ESA/390 mode");
15126 	}
15127       else
15128 	opts->x_target_flags &= ~MASK_HARD_DFP;
15129     }
15130 
15131   if (TARGET_SOFT_FLOAT_P (opts_set->x_target_flags)
15132       && TARGET_SOFT_FLOAT_P (opts->x_target_flags))
15133     {
15134       if (TARGET_HARD_DFP_P (opts_set->x_target_flags)
15135 	  && TARGET_HARD_DFP_P (opts->x_target_flags))
15136 	error ("%<-mhard-dfp%> can%'t be used in conjunction with "
15137 	       "%<-msoft-float%>");
15138 
15139       opts->x_target_flags &= ~MASK_HARD_DFP;
15140     }
15141 
15142   if (TARGET_BACKCHAIN_P (opts->x_target_flags)
15143       && TARGET_PACKED_STACK_P (opts->x_target_flags)
15144       && TARGET_HARD_FLOAT_P (opts->x_target_flags))
15145     error ("%<-mbackchain%> %<-mpacked-stack%> %<-mhard-float%> are not "
15146 	   "supported in combination");
15147 
15148   if (opts->x_s390_stack_size)
15149     {
15150       if (opts->x_s390_stack_guard >= opts->x_s390_stack_size)
15151 	error ("stack size must be greater than the stack guard value");
15152       else if (opts->x_s390_stack_size > 1 << 16)
15153 	error ("stack size must not be greater than 64k");
15154     }
15155   else if (opts->x_s390_stack_guard)
15156     error ("%<-mstack-guard%> implies use of %<-mstack-size%>");
15157 
15158   /* Our implementation of the stack probe requires the probe interval
15159      to be used as displacement in an address operand.  The maximum
15160      probe interval currently is 64k.  This would exceed short
15161      displacements.  Trim that value down to 4k if that happens.  This
15162      might result in too many probes being generated only on the
15163      oldest supported machine level z900.  */
15164   if (!DISP_IN_RANGE ((1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL))))
15165     set_param_value ("stack-clash-protection-probe-interval", 12,
15166 		     opts->x_param_values,
15167 		     opts_set->x_param_values);
15168 
15169 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
15170   if (!TARGET_LONG_DOUBLE_128_P (opts_set->x_target_flags))
15171     opts->x_target_flags |= MASK_LONG_DOUBLE_128;
15172 #endif
15173 
15174   if (opts->x_s390_tune >= PROCESSOR_2097_Z10)
15175     {
15176       maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 100,
15177 			     opts->x_param_values,
15178 			     opts_set->x_param_values);
15179       maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 32,
15180 			     opts->x_param_values,
15181 			     opts_set->x_param_values);
15182       maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 2000,
15183 			     opts->x_param_values,
15184 			     opts_set->x_param_values);
15185       maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 64,
15186 			     opts->x_param_values,
15187 			     opts_set->x_param_values);
15188     }
15189 
15190   maybe_set_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 256,
15191 			 opts->x_param_values,
15192 			 opts_set->x_param_values);
15193   /* values for loop prefetching */
15194   maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, 256,
15195 			 opts->x_param_values,
15196 			 opts_set->x_param_values);
15197   maybe_set_param_value (PARAM_L1_CACHE_SIZE, 128,
15198 			 opts->x_param_values,
15199 			 opts_set->x_param_values);
15200   /* s390 has more than 2 levels and the size is much larger.  Since
15201      we are always running virtualized assume that we only get a small
15202      part of the caches above l1.  */
15203   maybe_set_param_value (PARAM_L2_CACHE_SIZE, 1500,
15204 			 opts->x_param_values,
15205 			 opts_set->x_param_values);
15206   maybe_set_param_value (PARAM_PREFETCH_MIN_INSN_TO_MEM_RATIO, 2,
15207 			 opts->x_param_values,
15208 			 opts_set->x_param_values);
15209   maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 6,
15210 			 opts->x_param_values,
15211 			 opts_set->x_param_values);
15212 
15213   /* Use the alternative scheduling-pressure algorithm by default.  */
15214   maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
15215 			 opts->x_param_values,
15216 			 opts_set->x_param_values);
15217 
15218   maybe_set_param_value (PARAM_MIN_VECT_LOOP_BOUND, 2,
15219 			 opts->x_param_values,
15220 			 opts_set->x_param_values);
15221 
15222   /* Use aggressive inlining parameters.  */
15223   if (opts->x_s390_tune >= PROCESSOR_2964_Z13)
15224     {
15225       maybe_set_param_value (PARAM_INLINE_MIN_SPEEDUP, 2,
15226 			     opts->x_param_values,
15227 			     opts_set->x_param_values);
15228 
15229       maybe_set_param_value (PARAM_MAX_INLINE_INSNS_AUTO, 80,
15230 			     opts->x_param_values,
15231 			     opts_set->x_param_values);
15232     }
15233 
15234   /* Set the default alignment.  */
15235   s390_default_align (opts);
15236 
15237   /* Call target specific restore function to do post-init work.  At the moment,
15238      this just sets opts->x_s390_cost_pointer.  */
15239   s390_function_specific_restore (opts, NULL);
15240 
15241   /* Check whether -mfentry is supported. It cannot be used in 31-bit mode,
15242      because 31-bit PLT stubs assume that %r12 contains GOT address, which is
15243      not the case when the code runs before the prolog. */
15244   if (opts->x_flag_fentry && !TARGET_64BIT)
15245     error ("%<-mfentry%> is supported only for 64-bit CPUs");
15246 }
15247 
15248 static void
s390_option_override(void)15249 s390_option_override (void)
15250 {
15251   unsigned int i;
15252   cl_deferred_option *opt;
15253   vec<cl_deferred_option> *v =
15254     (vec<cl_deferred_option> *) s390_deferred_options;
15255 
15256   if (v)
15257     FOR_EACH_VEC_ELT (*v, i, opt)
15258       {
15259 	switch (opt->opt_index)
15260 	  {
15261 	  case OPT_mhotpatch_:
15262 	    {
15263 	      int val1;
15264 	      int val2;
15265 	      char *s = strtok (ASTRDUP (opt->arg), ",");
15266 	      char *t = strtok (NULL, "\0");
15267 
15268 	      if (t != NULL)
15269 		{
15270 		  val1 = integral_argument (s);
15271 		  val2 = integral_argument (t);
15272 		}
15273 	      else
15274 		{
15275 		  val1 = -1;
15276 		  val2 = -1;
15277 		}
15278 	      if (val1 == -1 || val2 == -1)
15279 		{
15280 		  /* argument is not a plain number */
15281 		  error ("arguments to %qs should be non-negative integers",
15282 			 "-mhotpatch=n,m");
15283 		  break;
15284 		}
15285 	      else if (val1 > s390_hotpatch_hw_max
15286 		       || val2 > s390_hotpatch_hw_max)
15287 		{
15288 		  error ("argument to %qs is too large (max. %d)",
15289 			 "-mhotpatch=n,m", s390_hotpatch_hw_max);
15290 		  break;
15291 		}
15292 	      s390_hotpatch_hw_before_label = val1;
15293 	      s390_hotpatch_hw_after_label = val2;
15294 	      break;
15295 	    }
15296 	  default:
15297 	    gcc_unreachable ();
15298 	  }
15299       }
15300 
15301   /* Set up function hooks.  */
15302   init_machine_status = s390_init_machine_status;
15303 
15304   s390_option_override_internal (&global_options, &global_options_set);
15305 
15306   /* Save the initial options in case the user does function specific
15307      options.  */
15308   target_option_default_node = build_target_option_node (&global_options);
15309   target_option_current_node = target_option_default_node;
15310 
15311   /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
15312      requires the arch flags to be evaluated already.  Since prefetching
15313      is beneficial on s390, we enable it if available.  */
15314   if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3)
15315     flag_prefetch_loop_arrays = 1;
15316 
15317   if (!s390_pic_data_is_text_relative && !flag_pic)
15318     error ("%<-mno-pic-data-is-text-relative%> cannot be used without "
15319 	   "%<-fpic%>/%<-fPIC%>");
15320 
15321   if (TARGET_TPF)
15322     {
15323       /* Don't emit DWARF3/4 unless specifically selected.  The TPF
15324 	 debuggers do not yet support DWARF 3/4.  */
15325       if (!global_options_set.x_dwarf_strict)
15326 	dwarf_strict = 1;
15327       if (!global_options_set.x_dwarf_version)
15328 	dwarf_version = 2;
15329     }
15330 }
15331 
15332 #if S390_USE_TARGET_ATTRIBUTE
15333 /* Inner function to process the attribute((target(...))), take an argument and
15334    set the current options from the argument. If we have a list, recursively go
15335    over the list.  */
15336 
15337 static bool
s390_valid_target_attribute_inner_p(tree args,struct gcc_options * opts,struct gcc_options * new_opts_set,bool force_pragma)15338 s390_valid_target_attribute_inner_p (tree args,
15339 				     struct gcc_options *opts,
15340 				     struct gcc_options *new_opts_set,
15341 				     bool force_pragma)
15342 {
15343   char *next_optstr;
15344   bool ret = true;
15345 
15346 #define S390_ATTRIB(S,O,A)  { S, sizeof (S)-1, O, A, 0 }
15347 #define S390_PRAGMA(S,O,A)  { S, sizeof (S)-1, O, A, 1 }
15348   static const struct
15349   {
15350     const char *string;
15351     size_t len;
15352     int opt;
15353     int has_arg;
15354     int only_as_pragma;
15355   } attrs[] = {
15356     /* enum options */
15357     S390_ATTRIB ("arch=", OPT_march_, 1),
15358     S390_ATTRIB ("tune=", OPT_mtune_, 1),
15359     /* uinteger options */
15360     S390_ATTRIB ("stack-guard=", OPT_mstack_guard_, 1),
15361     S390_ATTRIB ("stack-size=", OPT_mstack_size_, 1),
15362     S390_ATTRIB ("branch-cost=", OPT_mbranch_cost_, 1),
15363     S390_ATTRIB ("warn-framesize=", OPT_mwarn_framesize_, 1),
15364     /* flag options */
15365     S390_ATTRIB ("backchain", OPT_mbackchain, 0),
15366     S390_ATTRIB ("hard-dfp", OPT_mhard_dfp, 0),
15367     S390_ATTRIB ("hard-float", OPT_mhard_float, 0),
15368     S390_ATTRIB ("htm", OPT_mhtm, 0),
15369     S390_ATTRIB ("vx", OPT_mvx, 0),
15370     S390_ATTRIB ("packed-stack", OPT_mpacked_stack, 0),
15371     S390_ATTRIB ("small-exec", OPT_msmall_exec, 0),
15372     S390_ATTRIB ("soft-float", OPT_msoft_float, 0),
15373     S390_ATTRIB ("mvcle", OPT_mmvcle, 0),
15374     S390_PRAGMA ("zvector", OPT_mzvector, 0),
15375     /* boolean options */
15376     S390_ATTRIB ("warn-dynamicstack", OPT_mwarn_dynamicstack, 0),
15377   };
15378 #undef S390_ATTRIB
15379 #undef S390_PRAGMA
15380 
15381   /* If this is a list, recurse to get the options.  */
15382   if (TREE_CODE (args) == TREE_LIST)
15383     {
15384       bool ret = true;
15385       int num_pragma_values;
15386       int i;
15387 
15388       /* Note: attribs.c:decl_attributes prepends the values from
15389 	 current_target_pragma to the list of target attributes.  To determine
15390 	 whether we're looking at a value of the attribute or the pragma we
15391 	 assume that the first [list_length (current_target_pragma)] values in
15392 	 the list are the values from the pragma.  */
15393       num_pragma_values = (!force_pragma && current_target_pragma != NULL)
15394 	? list_length (current_target_pragma) : 0;
15395       for (i = 0; args; args = TREE_CHAIN (args), i++)
15396 	{
15397 	  bool is_pragma;
15398 
15399 	  is_pragma = (force_pragma || i < num_pragma_values);
15400 	  if (TREE_VALUE (args)
15401 	      && !s390_valid_target_attribute_inner_p (TREE_VALUE (args),
15402 						       opts, new_opts_set,
15403 						       is_pragma))
15404 	    {
15405 	      ret = false;
15406 	    }
15407 	}
15408       return ret;
15409     }
15410 
15411   else if (TREE_CODE (args) != STRING_CST)
15412     {
15413       error ("attribute %<target%> argument not a string");
15414       return false;
15415     }
15416 
15417   /* Handle multiple arguments separated by commas.  */
15418   next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
15419 
15420   while (next_optstr && *next_optstr != '\0')
15421     {
15422       char *p = next_optstr;
15423       char *orig_p = p;
15424       char *comma = strchr (next_optstr, ',');
15425       size_t len, opt_len;
15426       int opt;
15427       bool opt_set_p;
15428       char ch;
15429       unsigned i;
15430       int mask = 0;
15431       enum cl_var_type var_type;
15432       bool found;
15433 
15434       if (comma)
15435 	{
15436 	  *comma = '\0';
15437 	  len = comma - next_optstr;
15438 	  next_optstr = comma + 1;
15439 	}
15440       else
15441 	{
15442 	  len = strlen (p);
15443 	  next_optstr = NULL;
15444 	}
15445 
15446       /* Recognize no-xxx.  */
15447       if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
15448 	{
15449 	  opt_set_p = false;
15450 	  p += 3;
15451 	  len -= 3;
15452 	}
15453       else
15454 	opt_set_p = true;
15455 
15456       /* Find the option.  */
15457       ch = *p;
15458       found = false;
15459       for (i = 0; i < ARRAY_SIZE (attrs); i++)
15460 	{
15461 	  opt_len = attrs[i].len;
15462 	  if (ch == attrs[i].string[0]
15463 	      && ((attrs[i].has_arg) ? len > opt_len : len == opt_len)
15464 	      && memcmp (p, attrs[i].string, opt_len) == 0)
15465 	    {
15466 	      opt = attrs[i].opt;
15467 	      if (!opt_set_p && cl_options[opt].cl_reject_negative)
15468 		continue;
15469 	      mask = cl_options[opt].var_value;
15470 	      var_type = cl_options[opt].var_type;
15471 	      found = true;
15472 	      break;
15473 	    }
15474 	}
15475 
15476       /* Process the option.  */
15477       if (!found)
15478 	{
15479 	  error ("attribute(target(\"%s\")) is unknown", orig_p);
15480 	  return false;
15481 	}
15482       else if (attrs[i].only_as_pragma && !force_pragma)
15483 	{
15484 	  /* Value is not allowed for the target attribute.  */
15485 	  error ("value %qs is not supported by attribute %<target%>",
15486 		 attrs[i].string);
15487 	  return false;
15488 	}
15489 
15490       else if (var_type == CLVC_BIT_SET || var_type == CLVC_BIT_CLEAR)
15491 	{
15492 	  if (var_type == CLVC_BIT_CLEAR)
15493 	    opt_set_p = !opt_set_p;
15494 
15495 	  if (opt_set_p)
15496 	    opts->x_target_flags |= mask;
15497 	  else
15498 	    opts->x_target_flags &= ~mask;
15499 	  new_opts_set->x_target_flags |= mask;
15500 	}
15501 
15502       else if (cl_options[opt].var_type == CLVC_BOOLEAN)
15503 	{
15504 	  int value;
15505 
15506 	  if (cl_options[opt].cl_uinteger)
15507 	    {
15508 	      /* Unsigned integer argument.  Code based on the function
15509 		 decode_cmdline_option () in opts-common.c.  */
15510 	      value = integral_argument (p + opt_len);
15511 	    }
15512 	  else
15513 	    value = (opt_set_p) ? 1 : 0;
15514 
15515 	  if (value != -1)
15516 	    {
15517 	      struct cl_decoded_option decoded;
15518 
15519 	      /* Value range check; only implemented for numeric and boolean
15520 		 options at the moment.  */
15521 	      generate_option (opt, NULL, value, CL_TARGET, &decoded);
15522 	      s390_handle_option (opts, new_opts_set, &decoded, input_location);
15523 	      set_option (opts, new_opts_set, opt, value,
15524 			  p + opt_len, DK_UNSPECIFIED, input_location,
15525 			  global_dc);
15526 	    }
15527 	  else
15528 	    {
15529 	      error ("attribute(target(\"%s\")) is unknown", orig_p);
15530 	      ret = false;
15531 	    }
15532 	}
15533 
15534       else if (cl_options[opt].var_type == CLVC_ENUM)
15535 	{
15536 	  bool arg_ok;
15537 	  int value;
15538 
15539 	  arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
15540 	  if (arg_ok)
15541 	    set_option (opts, new_opts_set, opt, value,
15542 			p + opt_len, DK_UNSPECIFIED, input_location,
15543 			global_dc);
15544 	  else
15545 	    {
15546 	      error ("attribute(target(\"%s\")) is unknown", orig_p);
15547 	      ret = false;
15548 	    }
15549 	}
15550 
15551       else
15552 	gcc_unreachable ();
15553     }
15554   return ret;
15555 }
15556 
15557 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
15558 
15559 tree
s390_valid_target_attribute_tree(tree args,struct gcc_options * opts,const struct gcc_options * opts_set,bool force_pragma)15560 s390_valid_target_attribute_tree (tree args,
15561 				  struct gcc_options *opts,
15562 				  const struct gcc_options *opts_set,
15563 				  bool force_pragma)
15564 {
15565   tree t = NULL_TREE;
15566   struct gcc_options new_opts_set;
15567 
15568   memset (&new_opts_set, 0, sizeof (new_opts_set));
15569 
15570   /* Process each of the options on the chain.  */
15571   if (! s390_valid_target_attribute_inner_p (args, opts, &new_opts_set,
15572 					     force_pragma))
15573     return error_mark_node;
15574 
15575   /* If some option was set (even if it has not changed), rerun
15576      s390_option_override_internal, and then save the options away.  */
15577   if (new_opts_set.x_target_flags
15578       || new_opts_set.x_s390_arch
15579       || new_opts_set.x_s390_tune
15580       || new_opts_set.x_s390_stack_guard
15581       || new_opts_set.x_s390_stack_size
15582       || new_opts_set.x_s390_branch_cost
15583       || new_opts_set.x_s390_warn_framesize
15584       || new_opts_set.x_s390_warn_dynamicstack_p)
15585     {
15586       const unsigned char *src = (const unsigned char *)opts_set;
15587       unsigned char *dest = (unsigned char *)&new_opts_set;
15588       unsigned int i;
15589 
15590       /* Merge the original option flags into the new ones.  */
15591       for (i = 0; i < sizeof(*opts_set); i++)
15592 	dest[i] |= src[i];
15593 
15594       /* Do any overrides, such as arch=xxx, or tune=xxx support.  */
15595       s390_option_override_internal (opts, &new_opts_set);
15596       /* Save the current options unless we are validating options for
15597 	 #pragma.  */
15598       t = build_target_option_node (opts);
15599     }
15600   return t;
15601 }
15602 
15603 /* Hook to validate attribute((target("string"))).  */
15604 
15605 static bool
s390_valid_target_attribute_p(tree fndecl,tree ARG_UNUSED (name),tree args,int ARG_UNUSED (flags))15606 s390_valid_target_attribute_p (tree fndecl,
15607 			       tree ARG_UNUSED (name),
15608 			       tree args,
15609 			       int ARG_UNUSED (flags))
15610 {
15611   struct gcc_options func_options;
15612   tree new_target, new_optimize;
15613   bool ret = true;
15614 
15615   /* attribute((target("default"))) does nothing, beyond
15616      affecting multi-versioning.  */
15617   if (TREE_VALUE (args)
15618       && TREE_CODE (TREE_VALUE (args)) == STRING_CST
15619       && TREE_CHAIN (args) == NULL_TREE
15620       && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
15621     return true;
15622 
15623   tree old_optimize = build_optimization_node (&global_options);
15624 
15625   /* Get the optimization options of the current function.  */
15626   tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
15627 
15628   if (!func_optimize)
15629     func_optimize = old_optimize;
15630 
15631   /* Init func_options.  */
15632   memset (&func_options, 0, sizeof (func_options));
15633   init_options_struct (&func_options, NULL);
15634   lang_hooks.init_options_struct (&func_options);
15635 
15636   cl_optimization_restore (&func_options, TREE_OPTIMIZATION (func_optimize));
15637 
15638   /* Initialize func_options to the default before its target options can
15639      be set.  */
15640   cl_target_option_restore (&func_options,
15641 			    TREE_TARGET_OPTION (target_option_default_node));
15642 
15643   new_target = s390_valid_target_attribute_tree (args, &func_options,
15644 						 &global_options_set,
15645 						 (args ==
15646 						  current_target_pragma));
15647   new_optimize = build_optimization_node (&func_options);
15648   if (new_target == error_mark_node)
15649     ret = false;
15650   else if (fndecl && new_target)
15651     {
15652       DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
15653       if (old_optimize != new_optimize)
15654 	DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
15655     }
15656   return ret;
15657 }
15658 
15659 /* Hook to determine if one function can safely inline another.  */
15660 
15661 static bool
s390_can_inline_p(tree caller,tree callee)15662 s390_can_inline_p (tree caller, tree callee)
15663 {
15664   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
15665   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
15666 
15667   if (!callee_tree)
15668     callee_tree = target_option_default_node;
15669   if (!caller_tree)
15670     caller_tree = target_option_default_node;
15671   if (callee_tree == caller_tree)
15672     return true;
15673 
15674   struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
15675   struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
15676   bool ret = true;
15677 
15678   if ((caller_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP))
15679       != (callee_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP)))
15680     ret = false;
15681 
15682   /* Don't inline functions to be compiled for a more recent arch into a
15683      function for an older arch.  */
15684   else if (caller_opts->x_s390_arch < callee_opts->x_s390_arch)
15685     ret = false;
15686 
15687   /* Inlining a hard float function into a soft float function is only
15688      allowed if the hard float function doesn't actually make use of
15689      floating point.
15690 
15691      We are called from FEs for multi-versioning call optimization, so
15692      beware of ipa_fn_summaries not available.  */
15693   else if (((TARGET_SOFT_FLOAT_P (caller_opts->x_target_flags)
15694 	     && !TARGET_SOFT_FLOAT_P (callee_opts->x_target_flags))
15695 	    || (!TARGET_HARD_DFP_P (caller_opts->x_target_flags)
15696 		&& TARGET_HARD_DFP_P (callee_opts->x_target_flags)))
15697 	   && (! ipa_fn_summaries
15698 	       || ipa_fn_summaries->get
15699 	       (cgraph_node::get (callee))->fp_expressions))
15700     ret = false;
15701 
15702   return ret;
15703 }
15704 #endif
15705 
15706 /* Set VAL to correct enum value according to the indirect-branch or
15707    function-return attribute in ATTR.  */
15708 
15709 static inline void
s390_indirect_branch_attrvalue(tree attr,enum indirect_branch * val)15710 s390_indirect_branch_attrvalue (tree attr, enum indirect_branch *val)
15711 {
15712   const char *str = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
15713   if (strcmp (str, "keep") == 0)
15714     *val = indirect_branch_keep;
15715   else if (strcmp (str, "thunk") == 0)
15716     *val = indirect_branch_thunk;
15717   else if (strcmp (str, "thunk-inline") == 0)
15718     *val = indirect_branch_thunk_inline;
15719   else if (strcmp (str, "thunk-extern") == 0)
15720     *val = indirect_branch_thunk_extern;
15721 }
15722 
15723 /* Memorize the setting for -mindirect-branch* and -mfunction-return*
15724    from either the cmdline or the function attributes in
15725    cfun->machine.  */
15726 
15727 static void
s390_indirect_branch_settings(tree fndecl)15728 s390_indirect_branch_settings (tree fndecl)
15729 {
15730   tree attr;
15731 
15732   if (!fndecl)
15733     return;
15734 
15735   /* Initialize with the cmdline options and let the attributes
15736      override it.  */
15737   cfun->machine->indirect_branch_jump = s390_indirect_branch_jump;
15738   cfun->machine->indirect_branch_call = s390_indirect_branch_call;
15739 
15740   cfun->machine->function_return_reg = s390_function_return_reg;
15741   cfun->machine->function_return_mem = s390_function_return_mem;
15742 
15743   if ((attr = lookup_attribute ("indirect_branch",
15744 				DECL_ATTRIBUTES (fndecl))))
15745     {
15746       s390_indirect_branch_attrvalue (attr,
15747 				      &cfun->machine->indirect_branch_jump);
15748       s390_indirect_branch_attrvalue (attr,
15749 				      &cfun->machine->indirect_branch_call);
15750     }
15751 
15752   if ((attr = lookup_attribute ("indirect_branch_jump",
15753 				DECL_ATTRIBUTES (fndecl))))
15754     s390_indirect_branch_attrvalue (attr, &cfun->machine->indirect_branch_jump);
15755 
15756   if ((attr = lookup_attribute ("indirect_branch_call",
15757 				DECL_ATTRIBUTES (fndecl))))
15758     s390_indirect_branch_attrvalue (attr, &cfun->machine->indirect_branch_call);
15759 
15760   if ((attr = lookup_attribute ("function_return",
15761 				DECL_ATTRIBUTES (fndecl))))
15762     {
15763       s390_indirect_branch_attrvalue (attr,
15764 				      &cfun->machine->function_return_reg);
15765       s390_indirect_branch_attrvalue (attr,
15766 				      &cfun->machine->function_return_mem);
15767     }
15768 
15769   if ((attr = lookup_attribute ("function_return_reg",
15770 				DECL_ATTRIBUTES (fndecl))))
15771     s390_indirect_branch_attrvalue (attr, &cfun->machine->function_return_reg);
15772 
15773   if ((attr = lookup_attribute ("function_return_mem",
15774 				DECL_ATTRIBUTES (fndecl))))
15775     s390_indirect_branch_attrvalue (attr, &cfun->machine->function_return_mem);
15776 }
15777 
15778 #if S390_USE_TARGET_ATTRIBUTE
15779 /* Restore targets globals from NEW_TREE and invalidate s390_previous_fndecl
15780    cache.  */
15781 
15782 void
s390_activate_target_options(tree new_tree)15783 s390_activate_target_options (tree new_tree)
15784 {
15785   cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
15786   if (TREE_TARGET_GLOBALS (new_tree))
15787     restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
15788   else if (new_tree == target_option_default_node)
15789     restore_target_globals (&default_target_globals);
15790   else
15791     TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
15792   s390_previous_fndecl = NULL_TREE;
15793 }
15794 #endif
15795 
15796 /* Establish appropriate back-end context for processing the function
15797    FNDECL.  The argument might be NULL to indicate processing at top
15798    level, outside of any function scope.  */
15799 static void
s390_set_current_function(tree fndecl)15800 s390_set_current_function (tree fndecl)
15801 {
15802 #if S390_USE_TARGET_ATTRIBUTE
15803   /* Only change the context if the function changes.  This hook is called
15804      several times in the course of compiling a function, and we don't want to
15805      slow things down too much or call target_reinit when it isn't safe.  */
15806   if (fndecl == s390_previous_fndecl)
15807     {
15808       s390_indirect_branch_settings (fndecl);
15809       return;
15810     }
15811 
15812   tree old_tree;
15813   if (s390_previous_fndecl == NULL_TREE)
15814     old_tree = target_option_current_node;
15815   else if (DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl))
15816     old_tree = DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl);
15817   else
15818     old_tree = target_option_default_node;
15819 
15820   if (fndecl == NULL_TREE)
15821     {
15822       if (old_tree != target_option_current_node)
15823 	s390_activate_target_options (target_option_current_node);
15824       return;
15825     }
15826 
15827   tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
15828   if (new_tree == NULL_TREE)
15829     new_tree = target_option_default_node;
15830 
15831   if (old_tree != new_tree)
15832     s390_activate_target_options (new_tree);
15833   s390_previous_fndecl = fndecl;
15834 #endif
15835   s390_indirect_branch_settings (fndecl);
15836 }
15837 
15838 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P.  */
15839 
15840 static bool
s390_use_by_pieces_infrastructure_p(unsigned HOST_WIDE_INT size,unsigned int align ATTRIBUTE_UNUSED,enum by_pieces_operation op ATTRIBUTE_UNUSED,bool speed_p ATTRIBUTE_UNUSED)15841 s390_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
15842 				     unsigned int align ATTRIBUTE_UNUSED,
15843 				     enum by_pieces_operation op ATTRIBUTE_UNUSED,
15844 				     bool speed_p ATTRIBUTE_UNUSED)
15845 {
15846   return (size == 1 || size == 2
15847 	  || size == 4 || (TARGET_ZARCH && size == 8));
15848 }
15849 
15850 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook.  */
15851 
15852 static void
s390_atomic_assign_expand_fenv(tree * hold,tree * clear,tree * update)15853 s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
15854 {
15855   tree sfpc = s390_builtin_decls[S390_BUILTIN_s390_sfpc];
15856   tree efpc = s390_builtin_decls[S390_BUILTIN_s390_efpc];
15857   tree call_efpc = build_call_expr (efpc, 0);
15858   tree fenv_var = create_tmp_var_raw (unsigned_type_node);
15859 
15860 #define FPC_EXCEPTION_MASK	 HOST_WIDE_INT_UC (0xf8000000)
15861 #define FPC_FLAGS_MASK		 HOST_WIDE_INT_UC (0x00f80000)
15862 #define FPC_DXC_MASK		 HOST_WIDE_INT_UC (0x0000ff00)
15863 #define FPC_EXCEPTION_MASK_SHIFT HOST_WIDE_INT_UC (24)
15864 #define FPC_FLAGS_SHIFT		 HOST_WIDE_INT_UC (16)
15865 #define FPC_DXC_SHIFT		 HOST_WIDE_INT_UC (8)
15866 
15867   /* Generates the equivalent of feholdexcept (&fenv_var)
15868 
15869      fenv_var = __builtin_s390_efpc ();
15870      __builtin_s390_sfpc (fenv_var & mask) */
15871   tree old_fpc = build4 (TARGET_EXPR, unsigned_type_node, fenv_var, call_efpc,
15872 			 NULL_TREE, NULL_TREE);
15873   tree new_fpc
15874     = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
15875 	      build_int_cst (unsigned_type_node,
15876 			     ~(FPC_DXC_MASK | FPC_FLAGS_MASK
15877 			       | FPC_EXCEPTION_MASK)));
15878   tree set_new_fpc = build_call_expr (sfpc, 1, new_fpc);
15879   *hold = build2 (COMPOUND_EXPR, void_type_node, old_fpc, set_new_fpc);
15880 
15881   /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT)
15882 
15883      __builtin_s390_sfpc (__builtin_s390_efpc () & mask) */
15884   new_fpc = build2 (BIT_AND_EXPR, unsigned_type_node, call_efpc,
15885 		    build_int_cst (unsigned_type_node,
15886 				   ~(FPC_DXC_MASK | FPC_FLAGS_MASK)));
15887   *clear = build_call_expr (sfpc, 1, new_fpc);
15888 
15889   /* Generates the equivalent of feupdateenv (fenv_var)
15890 
15891   old_fpc = __builtin_s390_efpc ();
15892   __builtin_s390_sfpc (fenv_var);
15893   __atomic_feraiseexcept ((old_fpc & FPC_FLAGS_MASK) >> FPC_FLAGS_SHIFT);  */
15894 
15895   old_fpc = create_tmp_var_raw (unsigned_type_node);
15896   tree store_old_fpc = build4 (TARGET_EXPR, void_type_node, old_fpc, call_efpc,
15897 			       NULL_TREE, NULL_TREE);
15898 
15899   set_new_fpc = build_call_expr (sfpc, 1, fenv_var);
15900 
15901   tree raise_old_except = build2 (BIT_AND_EXPR, unsigned_type_node, old_fpc,
15902 				  build_int_cst (unsigned_type_node,
15903 						 FPC_FLAGS_MASK));
15904   raise_old_except = build2 (RSHIFT_EXPR, unsigned_type_node, raise_old_except,
15905 			     build_int_cst (unsigned_type_node,
15906 					    FPC_FLAGS_SHIFT));
15907   tree atomic_feraiseexcept
15908     = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
15909   raise_old_except = build_call_expr (atomic_feraiseexcept,
15910 				      1, raise_old_except);
15911 
15912   *update = build2 (COMPOUND_EXPR, void_type_node,
15913 		    build2 (COMPOUND_EXPR, void_type_node,
15914 			    store_old_fpc, set_new_fpc),
15915 		    raise_old_except);
15916 
15917 #undef FPC_EXCEPTION_MASK
15918 #undef FPC_FLAGS_MASK
15919 #undef FPC_DXC_MASK
15920 #undef FPC_EXCEPTION_MASK_SHIFT
15921 #undef FPC_FLAGS_SHIFT
15922 #undef FPC_DXC_SHIFT
15923 }
15924 
15925 /* Return the vector mode to be used for inner mode MODE when doing
15926    vectorization.  */
15927 static machine_mode
s390_preferred_simd_mode(scalar_mode mode)15928 s390_preferred_simd_mode (scalar_mode mode)
15929 {
15930   if (TARGET_VXE)
15931     switch (mode)
15932       {
15933       case E_SFmode:
15934 	return V4SFmode;
15935       default:;
15936       }
15937 
15938   if (TARGET_VX)
15939     switch (mode)
15940       {
15941       case E_DFmode:
15942 	return V2DFmode;
15943       case E_DImode:
15944 	return V2DImode;
15945       case E_SImode:
15946 	return V4SImode;
15947       case E_HImode:
15948 	return V8HImode;
15949       case E_QImode:
15950 	return V16QImode;
15951       default:;
15952       }
15953   return word_mode;
15954 }
15955 
15956 /* Our hardware does not require vectors to be strictly aligned.  */
15957 static bool
s390_support_vector_misalignment(machine_mode mode ATTRIBUTE_UNUSED,const_tree type ATTRIBUTE_UNUSED,int misalignment ATTRIBUTE_UNUSED,bool is_packed ATTRIBUTE_UNUSED)15958 s390_support_vector_misalignment (machine_mode mode ATTRIBUTE_UNUSED,
15959 				  const_tree type ATTRIBUTE_UNUSED,
15960 				  int misalignment ATTRIBUTE_UNUSED,
15961 				  bool is_packed ATTRIBUTE_UNUSED)
15962 {
15963   if (TARGET_VX)
15964     return true;
15965 
15966   return default_builtin_support_vector_misalignment (mode, type, misalignment,
15967 						      is_packed);
15968 }
15969 
15970 /* The vector ABI requires vector types to be aligned on an 8 byte
15971    boundary (our stack alignment).  However, we allow this to be
15972    overriden by the user, while this definitely breaks the ABI.  */
15973 static HOST_WIDE_INT
s390_vector_alignment(const_tree type)15974 s390_vector_alignment (const_tree type)
15975 {
15976   tree size = TYPE_SIZE (type);
15977 
15978   if (!TARGET_VX_ABI)
15979     return default_vector_alignment (type);
15980 
15981   if (TYPE_USER_ALIGN (type))
15982     return TYPE_ALIGN (type);
15983 
15984   if (tree_fits_uhwi_p (size)
15985       && tree_to_uhwi (size) < BIGGEST_ALIGNMENT)
15986     return tree_to_uhwi (size);
15987 
15988   return BIGGEST_ALIGNMENT;
15989 }
15990 
15991 /* Implement TARGET_CONSTANT_ALIGNMENT.  Alignment on even addresses for
15992    LARL instruction.  */
15993 
15994 static HOST_WIDE_INT
s390_constant_alignment(const_tree,HOST_WIDE_INT align)15995 s390_constant_alignment (const_tree, HOST_WIDE_INT align)
15996 {
15997   return MAX (align, 16);
15998 }
15999 
16000 #ifdef HAVE_AS_MACHINE_MACHINEMODE
16001 /* Implement TARGET_ASM_FILE_START.  */
16002 static void
s390_asm_file_start(void)16003 s390_asm_file_start (void)
16004 {
16005   default_file_start ();
16006   s390_asm_output_machine_for_arch (asm_out_file);
16007 }
16008 #endif
16009 
16010 /* Implement TARGET_ASM_FILE_END.  */
16011 static void
s390_asm_file_end(void)16012 s390_asm_file_end (void)
16013 {
16014 #ifdef HAVE_AS_GNU_ATTRIBUTE
16015   varpool_node *vnode;
16016   cgraph_node *cnode;
16017 
16018   FOR_EACH_VARIABLE (vnode)
16019     if (TREE_PUBLIC (vnode->decl))
16020       s390_check_type_for_vector_abi (TREE_TYPE (vnode->decl), false, false);
16021 
16022   FOR_EACH_FUNCTION (cnode)
16023     if (TREE_PUBLIC (cnode->decl))
16024       s390_check_type_for_vector_abi (TREE_TYPE (cnode->decl), false, false);
16025 
16026 
16027   if (s390_vector_abi != 0)
16028     fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
16029 	     s390_vector_abi);
16030 #endif
16031   file_end_indicate_exec_stack ();
16032 
16033   if (flag_split_stack)
16034     file_end_indicate_split_stack ();
16035 }
16036 
16037 /* Return true if TYPE is a vector bool type.  */
16038 static inline bool
s390_vector_bool_type_p(const_tree type)16039 s390_vector_bool_type_p (const_tree type)
16040 {
16041   return TYPE_VECTOR_OPAQUE (type);
16042 }
16043 
16044 /* Return the diagnostic message string if the binary operation OP is
16045    not permitted on TYPE1 and TYPE2, NULL otherwise.  */
16046 static const char*
s390_invalid_binary_op(int op ATTRIBUTE_UNUSED,const_tree type1,const_tree type2)16047 s390_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
16048 {
16049   bool bool1_p, bool2_p;
16050   bool plusminus_p;
16051   bool muldiv_p;
16052   bool compare_p;
16053   machine_mode mode1, mode2;
16054 
16055   if (!TARGET_ZVECTOR)
16056     return NULL;
16057 
16058   if (!VECTOR_TYPE_P (type1) || !VECTOR_TYPE_P (type2))
16059     return NULL;
16060 
16061   bool1_p = s390_vector_bool_type_p (type1);
16062   bool2_p = s390_vector_bool_type_p (type2);
16063 
16064   /* Mixing signed and unsigned types is forbidden for all
16065      operators.  */
16066   if (!bool1_p && !bool2_p
16067       && TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
16068     return N_("types differ in signedness");
16069 
16070   plusminus_p = (op == PLUS_EXPR || op == MINUS_EXPR);
16071   muldiv_p = (op == MULT_EXPR || op == RDIV_EXPR || op == TRUNC_DIV_EXPR
16072 	      || op == CEIL_DIV_EXPR || op == FLOOR_DIV_EXPR
16073 	      || op == ROUND_DIV_EXPR);
16074   compare_p = (op == LT_EXPR || op == LE_EXPR || op == GT_EXPR || op == GE_EXPR
16075 	       || op == EQ_EXPR || op == NE_EXPR);
16076 
16077   if (bool1_p && bool2_p && (plusminus_p || muldiv_p))
16078     return N_("binary operator does not support two vector bool operands");
16079 
16080   if (bool1_p != bool2_p && (muldiv_p || compare_p))
16081     return N_("binary operator does not support vector bool operand");
16082 
16083   mode1 = TYPE_MODE (type1);
16084   mode2 = TYPE_MODE (type2);
16085 
16086   if (bool1_p != bool2_p && plusminus_p
16087       && (GET_MODE_CLASS (mode1) == MODE_VECTOR_FLOAT
16088 	  || GET_MODE_CLASS (mode2) == MODE_VECTOR_FLOAT))
16089     return N_("binary operator does not support mixing vector "
16090 	      "bool with floating point vector operands");
16091 
16092   return NULL;
16093 }
16094 
16095 /* Implement TARGET_C_EXCESS_PRECISION.
16096 
16097    FIXME: For historical reasons, float_t and double_t are typedef'ed to
16098    double on s390, causing operations on float_t to operate in a higher
16099    precision than is necessary.  However, it is not the case that SFmode
16100    operations have implicit excess precision, and we generate more optimal
16101    code if we let the compiler know no implicit extra precision is added.
16102 
16103    That means when we are compiling with -fexcess-precision=fast, the value
16104    we set for FLT_EVAL_METHOD will be out of line with the actual precision of
16105    float_t (though they would be correct for -fexcess-precision=standard).
16106 
16107    A complete fix would modify glibc to remove the unnecessary typedef
16108    of float_t to double.  */
16109 
16110 static enum flt_eval_method
s390_excess_precision(enum excess_precision_type type)16111 s390_excess_precision (enum excess_precision_type type)
16112 {
16113   switch (type)
16114     {
16115       case EXCESS_PRECISION_TYPE_IMPLICIT:
16116       case EXCESS_PRECISION_TYPE_FAST:
16117 	/* The fastest type to promote to will always be the native type,
16118 	   whether that occurs with implicit excess precision or
16119 	   otherwise.  */
16120 	return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
16121       case EXCESS_PRECISION_TYPE_STANDARD:
16122 	/* Otherwise, when we are in a standards compliant mode, to
16123 	   ensure consistency with the implementation in glibc, report that
16124 	   float is evaluated to the range and precision of double.  */
16125 	return FLT_EVAL_METHOD_PROMOTE_TO_DOUBLE;
16126       default:
16127 	gcc_unreachable ();
16128     }
16129   return FLT_EVAL_METHOD_UNPREDICTABLE;
16130 }
16131 
16132 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
16133 
16134 static unsigned HOST_WIDE_INT
s390_asan_shadow_offset(void)16135 s390_asan_shadow_offset (void)
16136 {
16137   return TARGET_64BIT ? HOST_WIDE_INT_1U << 52 : HOST_WIDE_INT_UC (0x20000000);
16138 }
16139 
16140 #ifdef HAVE_GAS_HIDDEN
16141 # define USE_HIDDEN_LINKONCE 1
16142 #else
16143 # define USE_HIDDEN_LINKONCE 0
16144 #endif
16145 
16146 /* Output an indirect branch trampoline for target register REGNO.  */
16147 
16148 static void
s390_output_indirect_thunk_function(unsigned int regno,bool z10_p)16149 s390_output_indirect_thunk_function (unsigned int regno, bool z10_p)
16150 {
16151   tree decl;
16152   char thunk_label[32];
16153   int i;
16154 
16155   if (z10_p)
16156     sprintf (thunk_label, TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL, regno);
16157   else
16158     sprintf (thunk_label, TARGET_INDIRECT_BRANCH_THUNK_NAME_EX,
16159 	     INDIRECT_BRANCH_THUNK_REGNUM, regno);
16160 
16161   decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
16162 		     get_identifier (thunk_label),
16163 		     build_function_type_list (void_type_node, NULL_TREE));
16164   DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
16165 				   NULL_TREE, void_type_node);
16166   TREE_PUBLIC (decl) = 1;
16167   TREE_STATIC (decl) = 1;
16168   DECL_IGNORED_P (decl) = 1;
16169 
16170   if (USE_HIDDEN_LINKONCE)
16171     {
16172       cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
16173 
16174       targetm.asm_out.unique_section (decl, 0);
16175       switch_to_section (get_named_section (decl, NULL, 0));
16176 
16177       targetm.asm_out.globalize_label (asm_out_file, thunk_label);
16178       fputs ("\t.hidden\t", asm_out_file);
16179       assemble_name (asm_out_file, thunk_label);
16180       putc ('\n', asm_out_file);
16181       ASM_DECLARE_FUNCTION_NAME (asm_out_file, thunk_label, decl);
16182     }
16183   else
16184     {
16185       switch_to_section (text_section);
16186       ASM_OUTPUT_LABEL (asm_out_file, thunk_label);
16187     }
16188 
16189   DECL_INITIAL (decl) = make_node (BLOCK);
16190   current_function_decl = decl;
16191   allocate_struct_function (decl, false);
16192   init_function_start (decl);
16193   cfun->is_thunk = true;
16194   first_function_block_is_cold = false;
16195   final_start_function (emit_barrier (), asm_out_file, 1);
16196 
16197   /* This makes CFI at least usable for indirect jumps.
16198 
16199      Stopping in the thunk: backtrace will point to the thunk target
16200      is if it was interrupted by a signal.  For a call this means that
16201      the call chain will be: caller->callee->thunk   */
16202   if (flag_asynchronous_unwind_tables && flag_dwarf2_cfi_asm)
16203     {
16204       fputs ("\t.cfi_signal_frame\n", asm_out_file);
16205       fprintf (asm_out_file, "\t.cfi_return_column %d\n", regno);
16206       for (i = 0; i < FPR15_REGNUM; i++)
16207 	fprintf (asm_out_file, "\t.cfi_same_value %s\n", reg_names[i]);
16208     }
16209 
16210   if (z10_p)
16211     {
16212       /* exrl  0,1f  */
16213 
16214       /* We generate a thunk for z10 compiled code although z10 is
16215 	 currently not enabled.  Tell the assembler to accept the
16216 	 instruction.  */
16217       if (!TARGET_CPU_Z10)
16218 	{
16219 	  fputs ("\t.machine push\n", asm_out_file);
16220 	  fputs ("\t.machine z10\n", asm_out_file);
16221 	}
16222       /* We use exrl even if -mzarch hasn't been specified on the
16223 	 command line so we have to tell the assembler to accept
16224 	 it.  */
16225       if (!TARGET_ZARCH)
16226 	fputs ("\t.machinemode zarch\n", asm_out_file);
16227 
16228       fputs ("\texrl\t0,1f\n", asm_out_file);
16229 
16230       if (!TARGET_ZARCH)
16231 	fputs ("\t.machinemode esa\n", asm_out_file);
16232 
16233       if (!TARGET_CPU_Z10)
16234 	fputs ("\t.machine pop\n", asm_out_file);
16235     }
16236   else
16237     {
16238       /* larl %r1,1f  */
16239       fprintf (asm_out_file, "\tlarl\t%%r%d,1f\n",
16240 	       INDIRECT_BRANCH_THUNK_REGNUM);
16241 
16242       /* ex 0,0(%r1)  */
16243       fprintf (asm_out_file, "\tex\t0,0(%%r%d)\n",
16244 	       INDIRECT_BRANCH_THUNK_REGNUM);
16245     }
16246 
16247   /* 0:    j 0b  */
16248   fputs ("0:\tj\t0b\n", asm_out_file);
16249 
16250   /* 1:    br <regno>  */
16251   fprintf (asm_out_file, "1:\tbr\t%%r%d\n", regno);
16252 
16253   final_end_function ();
16254   init_insn_lengths ();
16255   free_after_compilation (cfun);
16256   set_cfun (NULL);
16257   current_function_decl = NULL;
16258 }
16259 
16260 /* Implement the asm.code_end target hook.  */
16261 
16262 static void
s390_code_end(void)16263 s390_code_end (void)
16264 {
16265   int i;
16266 
16267   for (i = 1; i < 16; i++)
16268     {
16269       if (indirect_branch_z10thunk_mask & (1 << i))
16270 	s390_output_indirect_thunk_function (i, true);
16271 
16272       if (indirect_branch_prez10thunk_mask & (1 << i))
16273 	s390_output_indirect_thunk_function (i, false);
16274     }
16275 
16276   if (TARGET_INDIRECT_BRANCH_TABLE)
16277     {
16278       int o;
16279       int i;
16280 
16281       for (o = 0; o < INDIRECT_BRANCH_NUM_OPTIONS; o++)
16282 	{
16283 	  if (indirect_branch_table_label_no[o] == 0)
16284 	    continue;
16285 
16286 	  switch_to_section (get_section (indirect_branch_table_name[o],
16287 					  0,
16288 					  NULL_TREE));
16289 	  for (i = 0; i < indirect_branch_table_label_no[o]; i++)
16290 	    {
16291 	      char label_start[32];
16292 
16293 	      ASM_GENERATE_INTERNAL_LABEL (label_start,
16294 					   indirect_branch_table_label[o], i);
16295 
16296 	      fputs ("\t.long\t", asm_out_file);
16297 	      assemble_name_raw (asm_out_file, label_start);
16298 	      fputs ("-.\n", asm_out_file);
16299 	    }
16300 	  switch_to_section (current_function_section ());
16301 	}
16302     }
16303 }
16304 
16305 /* Implement the TARGET_CASE_VALUES_THRESHOLD target hook.  */
16306 
16307 unsigned int
s390_case_values_threshold(void)16308 s390_case_values_threshold (void)
16309 {
16310   /* Disabling branch prediction for indirect jumps makes jump tables
16311      much more expensive.  */
16312   if (TARGET_INDIRECT_BRANCH_NOBP_JUMP)
16313     return 20;
16314 
16315   return default_case_values_threshold ();
16316 }
16317 
16318 /* Evaluate the insns between HEAD and TAIL and do back-end to install
16319    back-end specific dependencies.
16320 
16321    Establish an ANTI dependency between r11 and r15 restores from FPRs
16322    to prevent the instructions scheduler from reordering them since
16323    this would break CFI.  No further handling in the sched_reorder
16324    hook is required since the r11 and r15 restore will never appear in
16325    the same ready list with that change.  */
16326 void
s390_sched_dependencies_evaluation(rtx_insn * head,rtx_insn * tail)16327 s390_sched_dependencies_evaluation (rtx_insn *head, rtx_insn *tail)
16328 {
16329   if (!frame_pointer_needed || !epilogue_completed)
16330     return;
16331 
16332   while (head != tail && DEBUG_INSN_P (head))
16333     head = NEXT_INSN (head);
16334 
16335   rtx_insn *r15_restore = NULL, *r11_restore = NULL;
16336 
16337   for (rtx_insn *insn = tail; insn != head; insn = PREV_INSN (insn))
16338     {
16339       rtx set = single_set (insn);
16340       if (!INSN_P (insn)
16341 	  || !RTX_FRAME_RELATED_P (insn)
16342 	  || set == NULL_RTX
16343 	  || !REG_P (SET_DEST (set))
16344 	  || !FP_REG_P (SET_SRC (set)))
16345 	continue;
16346 
16347       if (REGNO (SET_DEST (set)) == HARD_FRAME_POINTER_REGNUM)
16348 	r11_restore = insn;
16349 
16350       if (REGNO (SET_DEST (set)) == STACK_POINTER_REGNUM)
16351 	r15_restore = insn;
16352     }
16353 
16354   if (r11_restore == NULL || r15_restore == NULL)
16355     return;
16356   add_dependence (r11_restore, r15_restore, REG_DEP_ANTI);
16357 }
16358 
16359 
16360 
16361 /* Initialize GCC target structure.  */
16362 
16363 #undef  TARGET_ASM_ALIGNED_HI_OP
16364 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
16365 #undef  TARGET_ASM_ALIGNED_DI_OP
16366 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
16367 #undef  TARGET_ASM_INTEGER
16368 #define TARGET_ASM_INTEGER s390_assemble_integer
16369 
16370 #undef  TARGET_ASM_OPEN_PAREN
16371 #define TARGET_ASM_OPEN_PAREN ""
16372 
16373 #undef  TARGET_ASM_CLOSE_PAREN
16374 #define TARGET_ASM_CLOSE_PAREN ""
16375 
16376 #undef TARGET_OPTION_OVERRIDE
16377 #define TARGET_OPTION_OVERRIDE s390_option_override
16378 
16379 #ifdef TARGET_THREAD_SSP_OFFSET
16380 #undef TARGET_STACK_PROTECT_GUARD
16381 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
16382 #endif
16383 
16384 #undef	TARGET_ENCODE_SECTION_INFO
16385 #define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
16386 
16387 #undef TARGET_SCALAR_MODE_SUPPORTED_P
16388 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
16389 
16390 #ifdef HAVE_AS_TLS
16391 #undef TARGET_HAVE_TLS
16392 #define TARGET_HAVE_TLS true
16393 #endif
16394 #undef TARGET_CANNOT_FORCE_CONST_MEM
16395 #define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
16396 
16397 #undef TARGET_DELEGITIMIZE_ADDRESS
16398 #define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
16399 
16400 #undef TARGET_LEGITIMIZE_ADDRESS
16401 #define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
16402 
16403 #undef TARGET_RETURN_IN_MEMORY
16404 #define TARGET_RETURN_IN_MEMORY s390_return_in_memory
16405 
16406 #undef  TARGET_INIT_BUILTINS
16407 #define TARGET_INIT_BUILTINS s390_init_builtins
16408 #undef  TARGET_EXPAND_BUILTIN
16409 #define TARGET_EXPAND_BUILTIN s390_expand_builtin
16410 #undef  TARGET_BUILTIN_DECL
16411 #define TARGET_BUILTIN_DECL s390_builtin_decl
16412 
16413 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
16414 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
16415 
16416 #undef TARGET_ASM_OUTPUT_MI_THUNK
16417 #define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
16418 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
16419 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
16420 
16421 #undef TARGET_C_EXCESS_PRECISION
16422 #define TARGET_C_EXCESS_PRECISION s390_excess_precision
16423 
16424 #undef  TARGET_SCHED_ADJUST_PRIORITY
16425 #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
16426 #undef TARGET_SCHED_ISSUE_RATE
16427 #define TARGET_SCHED_ISSUE_RATE s390_issue_rate
16428 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
16429 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
16430 
16431 #undef TARGET_SCHED_VARIABLE_ISSUE
16432 #define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
16433 #undef TARGET_SCHED_REORDER
16434 #define TARGET_SCHED_REORDER s390_sched_reorder
16435 #undef TARGET_SCHED_INIT
16436 #define TARGET_SCHED_INIT s390_sched_init
16437 
16438 #undef TARGET_CANNOT_COPY_INSN_P
16439 #define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
16440 #undef TARGET_RTX_COSTS
16441 #define TARGET_RTX_COSTS s390_rtx_costs
16442 #undef TARGET_ADDRESS_COST
16443 #define TARGET_ADDRESS_COST s390_address_cost
16444 #undef TARGET_REGISTER_MOVE_COST
16445 #define TARGET_REGISTER_MOVE_COST s390_register_move_cost
16446 #undef TARGET_MEMORY_MOVE_COST
16447 #define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
16448 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
16449 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
16450   s390_builtin_vectorization_cost
16451 
16452 #undef TARGET_MACHINE_DEPENDENT_REORG
16453 #define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
16454 
16455 #undef TARGET_VALID_POINTER_MODE
16456 #define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
16457 
16458 #undef TARGET_BUILD_BUILTIN_VA_LIST
16459 #define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
16460 #undef TARGET_EXPAND_BUILTIN_VA_START
16461 #define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
16462 #undef TARGET_ASAN_SHADOW_OFFSET
16463 #define TARGET_ASAN_SHADOW_OFFSET s390_asan_shadow_offset
16464 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
16465 #define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
16466 
16467 #undef TARGET_PROMOTE_FUNCTION_MODE
16468 #define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
16469 #undef TARGET_PASS_BY_REFERENCE
16470 #define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
16471 
16472 #undef  TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
16473 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE s390_override_options_after_change
16474 
16475 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
16476 #define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
16477 #undef TARGET_FUNCTION_ARG
16478 #define TARGET_FUNCTION_ARG s390_function_arg
16479 #undef TARGET_FUNCTION_ARG_ADVANCE
16480 #define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
16481 #undef TARGET_FUNCTION_ARG_PADDING
16482 #define TARGET_FUNCTION_ARG_PADDING s390_function_arg_padding
16483 #undef TARGET_FUNCTION_VALUE
16484 #define TARGET_FUNCTION_VALUE s390_function_value
16485 #undef TARGET_LIBCALL_VALUE
16486 #define TARGET_LIBCALL_VALUE s390_libcall_value
16487 #undef TARGET_STRICT_ARGUMENT_NAMING
16488 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
16489 
16490 #undef TARGET_KEEP_LEAF_WHEN_PROFILED
16491 #define TARGET_KEEP_LEAF_WHEN_PROFILED s390_keep_leaf_when_profiled
16492 
16493 #undef TARGET_FIXED_CONDITION_CODE_REGS
16494 #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
16495 
16496 #undef TARGET_CC_MODES_COMPATIBLE
16497 #define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
16498 
16499 #undef TARGET_INVALID_WITHIN_DOLOOP
16500 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
16501 
16502 #ifdef HAVE_AS_TLS
16503 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
16504 #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
16505 #endif
16506 
16507 #undef TARGET_DWARF_FRAME_REG_MODE
16508 #define TARGET_DWARF_FRAME_REG_MODE s390_dwarf_frame_reg_mode
16509 
16510 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
16511 #undef TARGET_MANGLE_TYPE
16512 #define TARGET_MANGLE_TYPE s390_mangle_type
16513 #endif
16514 
16515 #undef TARGET_SCALAR_MODE_SUPPORTED_P
16516 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
16517 
16518 #undef TARGET_VECTOR_MODE_SUPPORTED_P
16519 #define TARGET_VECTOR_MODE_SUPPORTED_P s390_vector_mode_supported_p
16520 
16521 #undef  TARGET_PREFERRED_RELOAD_CLASS
16522 #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
16523 
16524 #undef TARGET_SECONDARY_RELOAD
16525 #define TARGET_SECONDARY_RELOAD s390_secondary_reload
16526 #undef TARGET_SECONDARY_MEMORY_NEEDED
16527 #define TARGET_SECONDARY_MEMORY_NEEDED s390_secondary_memory_needed
16528 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
16529 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE s390_secondary_memory_needed_mode
16530 
16531 #undef TARGET_LIBGCC_CMP_RETURN_MODE
16532 #define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
16533 
16534 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
16535 #define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
16536 
16537 #undef TARGET_LEGITIMATE_ADDRESS_P
16538 #define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
16539 
16540 #undef TARGET_LEGITIMATE_CONSTANT_P
16541 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
16542 
16543 #undef TARGET_LRA_P
16544 #define TARGET_LRA_P s390_lra_p
16545 
16546 #undef TARGET_CAN_ELIMINATE
16547 #define TARGET_CAN_ELIMINATE s390_can_eliminate
16548 
16549 #undef TARGET_CONDITIONAL_REGISTER_USAGE
16550 #define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
16551 
16552 #undef TARGET_LOOP_UNROLL_ADJUST
16553 #define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
16554 
16555 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
16556 #define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
16557 #undef TARGET_TRAMPOLINE_INIT
16558 #define TARGET_TRAMPOLINE_INIT s390_trampoline_init
16559 
16560 /* PR 79421 */
16561 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
16562 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
16563 
16564 #undef TARGET_UNWIND_WORD_MODE
16565 #define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
16566 
16567 #undef TARGET_CANONICALIZE_COMPARISON
16568 #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
16569 
16570 #undef TARGET_HARD_REGNO_SCRATCH_OK
16571 #define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok
16572 
16573 #undef TARGET_HARD_REGNO_NREGS
16574 #define TARGET_HARD_REGNO_NREGS s390_hard_regno_nregs
16575 #undef TARGET_HARD_REGNO_MODE_OK
16576 #define TARGET_HARD_REGNO_MODE_OK s390_hard_regno_mode_ok
16577 #undef TARGET_MODES_TIEABLE_P
16578 #define TARGET_MODES_TIEABLE_P s390_modes_tieable_p
16579 
16580 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
16581 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
16582   s390_hard_regno_call_part_clobbered
16583 
16584 #undef TARGET_ATTRIBUTE_TABLE
16585 #define TARGET_ATTRIBUTE_TABLE s390_attribute_table
16586 
16587 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
16588 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
16589 
16590 #undef TARGET_SET_UP_BY_PROLOGUE
16591 #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
16592 
16593 #undef TARGET_EXTRA_LIVE_ON_ENTRY
16594 #define TARGET_EXTRA_LIVE_ON_ENTRY s390_live_on_entry
16595 
16596 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
16597 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
16598   s390_use_by_pieces_infrastructure_p
16599 
16600 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
16601 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV s390_atomic_assign_expand_fenv
16602 
16603 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
16604 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN s390_invalid_arg_for_unprototyped_fn
16605 
16606 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
16607 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE s390_preferred_simd_mode
16608 
16609 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
16610 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT s390_support_vector_misalignment
16611 
16612 #undef TARGET_VECTOR_ALIGNMENT
16613 #define TARGET_VECTOR_ALIGNMENT s390_vector_alignment
16614 
16615 #undef TARGET_INVALID_BINARY_OP
16616 #define TARGET_INVALID_BINARY_OP s390_invalid_binary_op
16617 
16618 #ifdef HAVE_AS_MACHINE_MACHINEMODE
16619 #undef TARGET_ASM_FILE_START
16620 #define TARGET_ASM_FILE_START s390_asm_file_start
16621 #endif
16622 
16623 #undef TARGET_ASM_FILE_END
16624 #define TARGET_ASM_FILE_END s390_asm_file_end
16625 
16626 #undef TARGET_SET_CURRENT_FUNCTION
16627 #define TARGET_SET_CURRENT_FUNCTION s390_set_current_function
16628 
16629 #if S390_USE_TARGET_ATTRIBUTE
16630 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
16631 #define TARGET_OPTION_VALID_ATTRIBUTE_P s390_valid_target_attribute_p
16632 
16633 #undef TARGET_CAN_INLINE_P
16634 #define TARGET_CAN_INLINE_P s390_can_inline_p
16635 #endif
16636 
16637 #undef TARGET_OPTION_RESTORE
16638 #define TARGET_OPTION_RESTORE s390_function_specific_restore
16639 
16640 #undef TARGET_CAN_CHANGE_MODE_CLASS
16641 #define TARGET_CAN_CHANGE_MODE_CLASS s390_can_change_mode_class
16642 
16643 #undef TARGET_CONSTANT_ALIGNMENT
16644 #define TARGET_CONSTANT_ALIGNMENT s390_constant_alignment
16645 
16646 #undef TARGET_ASM_CODE_END
16647 #define TARGET_ASM_CODE_END s390_code_end
16648 
16649 #undef TARGET_CASE_VALUES_THRESHOLD
16650 #define TARGET_CASE_VALUES_THRESHOLD s390_case_values_threshold
16651 
16652 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
16653 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
16654   s390_sched_dependencies_evaluation
16655 
16656 
16657 /* Use only short displacement, since long displacement is not available for
16658    the floating point instructions.  */
16659 #undef TARGET_MAX_ANCHOR_OFFSET
16660 #define TARGET_MAX_ANCHOR_OFFSET 0xfff
16661 
16662 struct gcc_target targetm = TARGET_INITIALIZER;
16663 
16664 #include "gt-s390.h"
16665