1 /* Subroutines used for code generation on IBM S/390 and zSeries
2    Copyright (C) 1999-2018 Free Software Foundation, Inc.
3    Contributed by Hartmut Penner (hpenner@de.ibm.com) and
4                   Ulrich Weigand (uweigand@de.ibm.com) and
5                   Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
6 
7 This file is part of GCC.
8 
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13 
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17 for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3.  If not see
21 <http://www.gnu.org/licenses/>.  */
22 
23 #define IN_TARGET_CODE 1
24 
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "backend.h"
29 #include "target.h"
30 #include "target-globals.h"
31 #include "rtl.h"
32 #include "tree.h"
33 #include "gimple.h"
34 #include "cfghooks.h"
35 #include "cfgloop.h"
36 #include "df.h"
37 #include "memmodel.h"
38 #include "tm_p.h"
39 #include "stringpool.h"
40 #include "attribs.h"
41 #include "expmed.h"
42 #include "optabs.h"
43 #include "regs.h"
44 #include "emit-rtl.h"
45 #include "recog.h"
46 #include "cgraph.h"
47 #include "diagnostic-core.h"
48 #include "diagnostic.h"
49 #include "alias.h"
50 #include "fold-const.h"
51 #include "print-tree.h"
52 #include "stor-layout.h"
53 #include "varasm.h"
54 #include "calls.h"
55 #include "conditions.h"
56 #include "output.h"
57 #include "insn-attr.h"
58 #include "flags.h"
59 #include "except.h"
60 #include "dojump.h"
61 #include "explow.h"
62 #include "stmt.h"
63 #include "expr.h"
64 #include "reload.h"
65 #include "cfgrtl.h"
66 #include "cfganal.h"
67 #include "lcm.h"
68 #include "cfgbuild.h"
69 #include "cfgcleanup.h"
70 #include "debug.h"
71 #include "langhooks.h"
72 #include "internal-fn.h"
73 #include "gimple-fold.h"
74 #include "tree-eh.h"
75 #include "gimplify.h"
76 #include "params.h"
77 #include "opts.h"
78 #include "tree-pass.h"
79 #include "context.h"
80 #include "builtins.h"
81 #include "rtl-iter.h"
82 #include "intl.h"
83 #include "tm-constrs.h"
84 #include "tree-vrp.h"
85 #include "symbol-summary.h"
86 #include "ipa-prop.h"
87 #include "ipa-fnsummary.h"
88 #include "sched-int.h"
89 
90 /* This file should be included last.  */
91 #include "target-def.h"
92 
93 static bool s390_hard_regno_mode_ok (unsigned int, machine_mode);
94 
95 /* Remember the last target of s390_set_current_function.  */
96 static GTY(()) tree s390_previous_fndecl;
97 
98 /* Define the specific costs for a given cpu.  */
99 
100 struct processor_costs
101 {
102   /* multiplication */
103   const int m;        /* cost of an M instruction.  */
104   const int mghi;     /* cost of an MGHI instruction.  */
105   const int mh;       /* cost of an MH instruction.  */
106   const int mhi;      /* cost of an MHI instruction.  */
107   const int ml;       /* cost of an ML instruction.  */
108   const int mr;       /* cost of an MR instruction.  */
109   const int ms;       /* cost of an MS instruction.  */
110   const int msg;      /* cost of an MSG instruction.  */
111   const int msgf;     /* cost of an MSGF instruction.  */
112   const int msgfr;    /* cost of an MSGFR instruction.  */
113   const int msgr;     /* cost of an MSGR instruction.  */
114   const int msr;      /* cost of an MSR instruction.  */
115   const int mult_df;  /* cost of multiplication in DFmode.  */
116   const int mxbr;
117   /* square root */
118   const int sqxbr;    /* cost of square root in TFmode.  */
119   const int sqdbr;    /* cost of square root in DFmode.  */
120   const int sqebr;    /* cost of square root in SFmode.  */
121   /* multiply and add */
122   const int madbr;    /* cost of multiply and add in DFmode.  */
123   const int maebr;    /* cost of multiply and add in SFmode.  */
124   /* division */
125   const int dxbr;
126   const int ddbr;
127   const int debr;
128   const int dlgr;
129   const int dlr;
130   const int dr;
131   const int dsgfr;
132   const int dsgr;
133 };
134 
135 #define s390_cost ((const struct processor_costs *)(s390_cost_pointer))
136 
137 static const
138 struct processor_costs z900_cost =
139 {
140   COSTS_N_INSNS (5),     /* M     */
141   COSTS_N_INSNS (10),    /* MGHI  */
142   COSTS_N_INSNS (5),     /* MH    */
143   COSTS_N_INSNS (4),     /* MHI   */
144   COSTS_N_INSNS (5),     /* ML    */
145   COSTS_N_INSNS (5),     /* MR    */
146   COSTS_N_INSNS (4),     /* MS    */
147   COSTS_N_INSNS (15),    /* MSG   */
148   COSTS_N_INSNS (7),     /* MSGF  */
149   COSTS_N_INSNS (7),     /* MSGFR */
150   COSTS_N_INSNS (10),    /* MSGR  */
151   COSTS_N_INSNS (4),     /* MSR   */
152   COSTS_N_INSNS (7),     /* multiplication in DFmode */
153   COSTS_N_INSNS (13),    /* MXBR */
154   COSTS_N_INSNS (136),   /* SQXBR */
155   COSTS_N_INSNS (44),    /* SQDBR */
156   COSTS_N_INSNS (35),    /* SQEBR */
157   COSTS_N_INSNS (18),    /* MADBR */
158   COSTS_N_INSNS (13),    /* MAEBR */
159   COSTS_N_INSNS (134),   /* DXBR */
160   COSTS_N_INSNS (30),    /* DDBR */
161   COSTS_N_INSNS (27),    /* DEBR */
162   COSTS_N_INSNS (220),   /* DLGR */
163   COSTS_N_INSNS (34),    /* DLR */
164   COSTS_N_INSNS (34),    /* DR */
165   COSTS_N_INSNS (32),    /* DSGFR */
166   COSTS_N_INSNS (32),    /* DSGR */
167 };
168 
169 static const
170 struct processor_costs z990_cost =
171 {
172   COSTS_N_INSNS (4),     /* M     */
173   COSTS_N_INSNS (2),     /* MGHI  */
174   COSTS_N_INSNS (2),     /* MH    */
175   COSTS_N_INSNS (2),     /* MHI   */
176   COSTS_N_INSNS (4),     /* ML    */
177   COSTS_N_INSNS (4),     /* MR    */
178   COSTS_N_INSNS (5),     /* MS    */
179   COSTS_N_INSNS (6),     /* MSG   */
180   COSTS_N_INSNS (4),     /* MSGF  */
181   COSTS_N_INSNS (4),     /* MSGFR */
182   COSTS_N_INSNS (4),     /* MSGR  */
183   COSTS_N_INSNS (4),     /* MSR   */
184   COSTS_N_INSNS (1),     /* multiplication in DFmode */
185   COSTS_N_INSNS (28),    /* MXBR */
186   COSTS_N_INSNS (130),   /* SQXBR */
187   COSTS_N_INSNS (66),    /* SQDBR */
188   COSTS_N_INSNS (38),    /* SQEBR */
189   COSTS_N_INSNS (1),     /* MADBR */
190   COSTS_N_INSNS (1),     /* MAEBR */
191   COSTS_N_INSNS (60),    /* DXBR */
192   COSTS_N_INSNS (40),    /* DDBR */
193   COSTS_N_INSNS (26),    /* DEBR */
194   COSTS_N_INSNS (176),   /* DLGR */
195   COSTS_N_INSNS (31),    /* DLR */
196   COSTS_N_INSNS (31),    /* DR */
197   COSTS_N_INSNS (31),    /* DSGFR */
198   COSTS_N_INSNS (31),    /* DSGR */
199 };
200 
201 static const
202 struct processor_costs z9_109_cost =
203 {
204   COSTS_N_INSNS (4),     /* M     */
205   COSTS_N_INSNS (2),     /* MGHI  */
206   COSTS_N_INSNS (2),     /* MH    */
207   COSTS_N_INSNS (2),     /* MHI   */
208   COSTS_N_INSNS (4),     /* ML    */
209   COSTS_N_INSNS (4),     /* MR    */
210   COSTS_N_INSNS (5),     /* MS    */
211   COSTS_N_INSNS (6),     /* MSG   */
212   COSTS_N_INSNS (4),     /* MSGF  */
213   COSTS_N_INSNS (4),     /* MSGFR */
214   COSTS_N_INSNS (4),     /* MSGR  */
215   COSTS_N_INSNS (4),     /* MSR   */
216   COSTS_N_INSNS (1),     /* multiplication in DFmode */
217   COSTS_N_INSNS (28),    /* MXBR */
218   COSTS_N_INSNS (130),   /* SQXBR */
219   COSTS_N_INSNS (66),    /* SQDBR */
220   COSTS_N_INSNS (38),    /* SQEBR */
221   COSTS_N_INSNS (1),     /* MADBR */
222   COSTS_N_INSNS (1),     /* MAEBR */
223   COSTS_N_INSNS (60),    /* DXBR */
224   COSTS_N_INSNS (40),    /* DDBR */
225   COSTS_N_INSNS (26),    /* DEBR */
226   COSTS_N_INSNS (30),    /* DLGR */
227   COSTS_N_INSNS (23),    /* DLR */
228   COSTS_N_INSNS (23),    /* DR */
229   COSTS_N_INSNS (24),    /* DSGFR */
230   COSTS_N_INSNS (24),    /* DSGR */
231 };
232 
233 static const
234 struct processor_costs z10_cost =
235 {
236   COSTS_N_INSNS (10),    /* M     */
237   COSTS_N_INSNS (10),    /* MGHI  */
238   COSTS_N_INSNS (10),    /* MH    */
239   COSTS_N_INSNS (10),    /* MHI   */
240   COSTS_N_INSNS (10),    /* ML    */
241   COSTS_N_INSNS (10),    /* MR    */
242   COSTS_N_INSNS (10),    /* MS    */
243   COSTS_N_INSNS (10),    /* MSG   */
244   COSTS_N_INSNS (10),    /* MSGF  */
245   COSTS_N_INSNS (10),    /* MSGFR */
246   COSTS_N_INSNS (10),    /* MSGR  */
247   COSTS_N_INSNS (10),    /* MSR   */
248   COSTS_N_INSNS (1) ,    /* multiplication in DFmode */
249   COSTS_N_INSNS (50),    /* MXBR */
250   COSTS_N_INSNS (120),   /* SQXBR */
251   COSTS_N_INSNS (52),    /* SQDBR */
252   COSTS_N_INSNS (38),    /* SQEBR */
253   COSTS_N_INSNS (1),     /* MADBR */
254   COSTS_N_INSNS (1),     /* MAEBR */
255   COSTS_N_INSNS (111),   /* DXBR */
256   COSTS_N_INSNS (39),    /* DDBR */
257   COSTS_N_INSNS (32),    /* DEBR */
258   COSTS_N_INSNS (160),   /* DLGR */
259   COSTS_N_INSNS (71),    /* DLR */
260   COSTS_N_INSNS (71),    /* DR */
261   COSTS_N_INSNS (71),    /* DSGFR */
262   COSTS_N_INSNS (71),    /* DSGR */
263 };
264 
265 static const
266 struct processor_costs z196_cost =
267 {
268   COSTS_N_INSNS (7),     /* M     */
269   COSTS_N_INSNS (5),     /* MGHI  */
270   COSTS_N_INSNS (5),     /* MH    */
271   COSTS_N_INSNS (5),     /* MHI   */
272   COSTS_N_INSNS (7),     /* ML    */
273   COSTS_N_INSNS (7),     /* MR    */
274   COSTS_N_INSNS (6),     /* MS    */
275   COSTS_N_INSNS (8),     /* MSG   */
276   COSTS_N_INSNS (6),     /* MSGF  */
277   COSTS_N_INSNS (6),     /* MSGFR */
278   COSTS_N_INSNS (8),     /* MSGR  */
279   COSTS_N_INSNS (6),     /* MSR   */
280   COSTS_N_INSNS (1) ,    /* multiplication in DFmode */
281   COSTS_N_INSNS (40),    /* MXBR B+40 */
282   COSTS_N_INSNS (100),   /* SQXBR B+100 */
283   COSTS_N_INSNS (42),    /* SQDBR B+42 */
284   COSTS_N_INSNS (28),    /* SQEBR B+28 */
285   COSTS_N_INSNS (1),     /* MADBR B */
286   COSTS_N_INSNS (1),     /* MAEBR B */
287   COSTS_N_INSNS (101),   /* DXBR B+101 */
288   COSTS_N_INSNS (29),    /* DDBR */
289   COSTS_N_INSNS (22),    /* DEBR */
290   COSTS_N_INSNS (160),   /* DLGR cracked */
291   COSTS_N_INSNS (160),   /* DLR cracked */
292   COSTS_N_INSNS (160),   /* DR expanded */
293   COSTS_N_INSNS (160),   /* DSGFR cracked */
294   COSTS_N_INSNS (160),   /* DSGR cracked */
295 };
296 
297 static const
298 struct processor_costs zEC12_cost =
299 {
300   COSTS_N_INSNS (7),     /* M     */
301   COSTS_N_INSNS (5),     /* MGHI  */
302   COSTS_N_INSNS (5),     /* MH    */
303   COSTS_N_INSNS (5),     /* MHI   */
304   COSTS_N_INSNS (7),     /* ML    */
305   COSTS_N_INSNS (7),     /* MR    */
306   COSTS_N_INSNS (6),     /* MS    */
307   COSTS_N_INSNS (8),     /* MSG   */
308   COSTS_N_INSNS (6),     /* MSGF  */
309   COSTS_N_INSNS (6),     /* MSGFR */
310   COSTS_N_INSNS (8),     /* MSGR  */
311   COSTS_N_INSNS (6),     /* MSR   */
312   COSTS_N_INSNS (1) ,    /* multiplication in DFmode */
313   COSTS_N_INSNS (40),    /* MXBR B+40 */
314   COSTS_N_INSNS (100),   /* SQXBR B+100 */
315   COSTS_N_INSNS (42),    /* SQDBR B+42 */
316   COSTS_N_INSNS (28),    /* SQEBR B+28 */
317   COSTS_N_INSNS (1),     /* MADBR B */
318   COSTS_N_INSNS (1),     /* MAEBR B */
319   COSTS_N_INSNS (131),   /* DXBR B+131 */
320   COSTS_N_INSNS (29),    /* DDBR */
321   COSTS_N_INSNS (22),    /* DEBR */
322   COSTS_N_INSNS (160),   /* DLGR cracked */
323   COSTS_N_INSNS (160),   /* DLR cracked */
324   COSTS_N_INSNS (160),   /* DR expanded */
325   COSTS_N_INSNS (160),   /* DSGFR cracked */
326   COSTS_N_INSNS (160),   /* DSGR cracked */
327 };
328 
329 static struct
330 {
331   /* The preferred name to be used in user visible output.  */
332   const char *const name;
333   /* CPU name as it should be passed to Binutils via .machine  */
334   const char *const binutils_name;
335   const enum processor_type processor;
336   const struct processor_costs *cost;
337 }
338 const processor_table[] =
339 {
340   { "g5",     "g5",     PROCESSOR_9672_G5,     &z900_cost },
341   { "g6",     "g6",     PROCESSOR_9672_G6,     &z900_cost },
342   { "z900",   "z900",   PROCESSOR_2064_Z900,   &z900_cost },
343   { "z990",   "z990",   PROCESSOR_2084_Z990,   &z990_cost },
344   { "z9-109", "z9-109", PROCESSOR_2094_Z9_109, &z9_109_cost },
345   { "z9-ec",  "z9-ec",  PROCESSOR_2094_Z9_EC,  &z9_109_cost },
346   { "z10",    "z10",    PROCESSOR_2097_Z10,    &z10_cost },
347   { "z196",   "z196",   PROCESSOR_2817_Z196,   &z196_cost },
348   { "zEC12",  "zEC12",  PROCESSOR_2827_ZEC12,  &zEC12_cost },
349   { "z13",    "z13",    PROCESSOR_2964_Z13,    &zEC12_cost },
350   { "z14",    "arch12", PROCESSOR_3906_Z14,    &zEC12_cost },
351   { "native", "",       PROCESSOR_NATIVE,      NULL }
352 };
353 
354 extern int reload_completed;
355 
356 /* Kept up to date using the SCHED_VARIABLE_ISSUE hook.  */
357 static rtx_insn *last_scheduled_insn;
358 #define MAX_SCHED_UNITS 3
359 static int last_scheduled_unit_distance[MAX_SCHED_UNITS];
360 
361 #define NUM_SIDES 2
362 static int current_side = 1;
363 #define LONGRUNNING_THRESHOLD 5
364 
365 /* Estimate of number of cycles a long-running insn occupies an
366    execution unit.  */
367 static unsigned fxu_longrunning[NUM_SIDES];
368 static unsigned vfu_longrunning[NUM_SIDES];
369 
370 /* Factor to scale latencies by, determined by measurements.  */
371 #define LATENCY_FACTOR 4
372 
373 /* The maximum score added for an instruction whose unit hasn't been
374    in use for MAX_SCHED_MIX_DISTANCE steps.  Increase this value to
375    give instruction mix scheduling more priority over instruction
376    grouping.  */
377 #define MAX_SCHED_MIX_SCORE      8
378 
379 /* The maximum distance up to which individual scores will be
380    calculated.  Everything beyond this gives MAX_SCHED_MIX_SCORE.
381    Increase this with the OOO windows size of the machine.  */
382 #define MAX_SCHED_MIX_DISTANCE 100
383 
384 /* Structure used to hold the components of a S/390 memory
385    address.  A legitimate address on S/390 is of the general
386    form
387           base + index + displacement
388    where any of the components is optional.
389 
390    base and index are registers of the class ADDR_REGS,
391    displacement is an unsigned 12-bit immediate constant.  */
392 
393 struct s390_address
394 {
395   rtx base;
396   rtx indx;
397   rtx disp;
398   bool pointer;
399   bool literal_pool;
400 };
401 
402 /* Few accessor macros for struct cfun->machine->s390_frame_layout.  */
403 
404 #define cfun_frame_layout (cfun->machine->frame_layout)
405 #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
406 #define cfun_save_arg_fprs_p (!!(TARGET_64BIT				\
407 				 ? cfun_frame_layout.fpr_bitmap & 0x0f	\
408 				 : cfun_frame_layout.fpr_bitmap & 0x03))
409 #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
410   cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
411 #define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |=    \
412   (1 << (REGNO - FPR0_REGNUM)))
413 #define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap &    \
414   (1 << (REGNO - FPR0_REGNUM))))
415 #define cfun_gpr_save_slot(REGNO) \
416   cfun->machine->frame_layout.gpr_save_slots[REGNO]
417 
418 /* Number of GPRs and FPRs used for argument passing.  */
419 #define GP_ARG_NUM_REG 5
420 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
421 #define VEC_ARG_NUM_REG 8
422 
423 /* A couple of shortcuts.  */
424 #define CONST_OK_FOR_J(x) \
425 	CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
426 #define CONST_OK_FOR_K(x) \
427 	CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
428 #define CONST_OK_FOR_Os(x) \
429         CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
430 #define CONST_OK_FOR_Op(x) \
431         CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
432 #define CONST_OK_FOR_On(x) \
433         CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
434 
435 #define REGNO_PAIR_OK(REGNO, MODE)                               \
436   (s390_hard_regno_nregs ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
437 
438 /* That's the read ahead of the dynamic branch prediction unit in
439    bytes on a z10 (or higher) CPU.  */
440 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
441 
442 /* Masks per jump target register indicating which thunk need to be
443    generated.  */
444 static GTY(()) int indirect_branch_prez10thunk_mask = 0;
445 static GTY(()) int indirect_branch_z10thunk_mask = 0;
446 
447 #define INDIRECT_BRANCH_NUM_OPTIONS 4
448 
449 enum s390_indirect_branch_option
450   {
451     s390_opt_indirect_branch_jump = 0,
452     s390_opt_indirect_branch_call,
453     s390_opt_function_return_reg,
454     s390_opt_function_return_mem
455   };
456 
457 static GTY(()) int indirect_branch_table_label_no[INDIRECT_BRANCH_NUM_OPTIONS] = { 0 };
458 const char *indirect_branch_table_label[INDIRECT_BRANCH_NUM_OPTIONS] = \
459   { "LJUMP", "LCALL", "LRETREG", "LRETMEM" };
460 const char *indirect_branch_table_name[INDIRECT_BRANCH_NUM_OPTIONS] =	\
461   { ".s390_indirect_jump", ".s390_indirect_call",
462     ".s390_return_reg", ".s390_return_mem" };
463 
464 bool
s390_return_addr_from_memory()465 s390_return_addr_from_memory ()
466 {
467   return cfun_gpr_save_slot(RETURN_REGNUM) == SAVE_SLOT_STACK;
468 }
469 
470 /* Indicate which ABI has been used for passing vector args.
471    0 - no vector type arguments have been passed where the ABI is relevant
472    1 - the old ABI has been used
473    2 - a vector type argument has been passed either in a vector register
474        or on the stack by value  */
475 static int s390_vector_abi = 0;
476 
477 /* Set the vector ABI marker if TYPE is subject to the vector ABI
478    switch.  The vector ABI affects only vector data types.  There are
479    two aspects of the vector ABI relevant here:
480 
481    1. vectors >= 16 bytes have an alignment of 8 bytes with the new
482    ABI and natural alignment with the old.
483 
484    2. vector <= 16 bytes are passed in VRs or by value on the stack
485    with the new ABI but by reference on the stack with the old.
486 
487    If ARG_P is true TYPE is used for a function argument or return
488    value.  The ABI marker then is set for all vector data types.  If
489    ARG_P is false only type 1 vectors are being checked.  */
490 
491 static void
s390_check_type_for_vector_abi(const_tree type,bool arg_p,bool in_struct_p)492 s390_check_type_for_vector_abi (const_tree type, bool arg_p, bool in_struct_p)
493 {
494   static hash_set<const_tree> visited_types_hash;
495 
496   if (s390_vector_abi)
497     return;
498 
499   if (type == NULL_TREE || TREE_CODE (type) == ERROR_MARK)
500     return;
501 
502   if (visited_types_hash.contains (type))
503     return;
504 
505   visited_types_hash.add (type);
506 
507   if (VECTOR_TYPE_P (type))
508     {
509       int type_size = int_size_in_bytes (type);
510 
511       /* Outside arguments only the alignment is changing and this
512 	 only happens for vector types >= 16 bytes.  */
513       if (!arg_p && type_size < 16)
514 	return;
515 
516       /* In arguments vector types > 16 are passed as before (GCC
517 	 never enforced the bigger alignment for arguments which was
518 	 required by the old vector ABI).  However, it might still be
519 	 ABI relevant due to the changed alignment if it is a struct
520 	 member.  */
521       if (arg_p && type_size > 16 && !in_struct_p)
522 	return;
523 
524       s390_vector_abi = TARGET_VX_ABI ? 2 : 1;
525     }
526   else if (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE)
527     {
528       /* ARRAY_TYPE: Since with neither of the ABIs we have more than
529 	 natural alignment there will never be ABI dependent padding
530 	 in an array type.  That's why we do not set in_struct_p to
531 	 true here.  */
532       s390_check_type_for_vector_abi (TREE_TYPE (type), arg_p, in_struct_p);
533     }
534   else if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
535     {
536       tree arg_chain;
537 
538       /* Check the return type.  */
539       s390_check_type_for_vector_abi (TREE_TYPE (type), true, false);
540 
541       for (arg_chain = TYPE_ARG_TYPES (type);
542 	   arg_chain;
543 	   arg_chain = TREE_CHAIN (arg_chain))
544 	s390_check_type_for_vector_abi (TREE_VALUE (arg_chain), true, false);
545     }
546   else if (RECORD_OR_UNION_TYPE_P (type))
547     {
548       tree field;
549 
550       for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
551 	{
552 	  if (TREE_CODE (field) != FIELD_DECL)
553 	    continue;
554 
555 	  s390_check_type_for_vector_abi (TREE_TYPE (field), arg_p, true);
556 	}
557     }
558 }
559 
560 
561 /* System z builtins.  */
562 
563 #include "s390-builtins.h"
564 
565 const unsigned int bflags_builtin[S390_BUILTIN_MAX + 1] =
566   {
567 #undef B_DEF
568 #undef OB_DEF
569 #undef OB_DEF_VAR
570 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, ...) BFLAGS,
571 #define OB_DEF(...)
572 #define OB_DEF_VAR(...)
573 #include "s390-builtins.def"
574     0
575   };
576 
577 const unsigned int opflags_builtin[S390_BUILTIN_MAX + 1] =
578   {
579 #undef B_DEF
580 #undef OB_DEF
581 #undef OB_DEF_VAR
582 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, ...) OPFLAGS,
583 #define OB_DEF(...)
584 #define OB_DEF_VAR(...)
585 #include "s390-builtins.def"
586     0
587   };
588 
589 const unsigned int bflags_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1] =
590   {
591 #undef B_DEF
592 #undef OB_DEF
593 #undef OB_DEF_VAR
594 #define B_DEF(...)
595 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, ...) BFLAGS,
596 #define OB_DEF_VAR(...)
597 #include "s390-builtins.def"
598     0
599   };
600 
601 const unsigned int
602 bflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
603   {
604 #undef B_DEF
605 #undef OB_DEF
606 #undef OB_DEF_VAR
607 #define B_DEF(...)
608 #define OB_DEF(...)
609 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) FLAGS,
610 #include "s390-builtins.def"
611     0
612   };
613 
614 const unsigned int
615 opflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
616   {
617 #undef B_DEF
618 #undef OB_DEF
619 #undef OB_DEF_VAR
620 #define B_DEF(...)
621 #define OB_DEF(...)
622 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) OPFLAGS,
623 #include "s390-builtins.def"
624     0
625   };
626 
627 tree s390_builtin_types[BT_MAX];
628 tree s390_builtin_fn_types[BT_FN_MAX];
629 tree s390_builtin_decls[S390_BUILTIN_MAX +
630 			S390_OVERLOADED_BUILTIN_MAX +
631 			S390_OVERLOADED_BUILTIN_VAR_MAX];
632 
633 static enum insn_code const code_for_builtin[S390_BUILTIN_MAX + 1] = {
634 #undef B_DEF
635 #undef OB_DEF
636 #undef OB_DEF_VAR
637 #define B_DEF(NAME, PATTERN, ...) CODE_FOR_##PATTERN,
638 #define OB_DEF(...)
639 #define OB_DEF_VAR(...)
640 
641 #include "s390-builtins.def"
642   CODE_FOR_nothing
643 };
644 
645 static void
s390_init_builtins(void)646 s390_init_builtins (void)
647 {
648   /* These definitions are being used in s390-builtins.def.  */
649   tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"),
650 				       NULL, NULL);
651   tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL);
652   tree c_uint64_type_node;
653 
654   /* The uint64_type_node from tree.c is not compatible to the C99
655      uint64_t data type.  What we want is c_uint64_type_node from
656      c-common.c.  But since backend code is not supposed to interface
657      with the frontend we recreate it here.  */
658   if (TARGET_64BIT)
659     c_uint64_type_node = long_unsigned_type_node;
660   else
661     c_uint64_type_node = long_long_unsigned_type_node;
662 
663 #undef DEF_TYPE
664 #define DEF_TYPE(INDEX, NODE, CONST_P)			\
665   if (s390_builtin_types[INDEX] == NULL)		\
666     s390_builtin_types[INDEX] = (!CONST_P) ?		\
667       (NODE) : build_type_variant ((NODE), 1, 0);
668 
669 #undef DEF_POINTER_TYPE
670 #define DEF_POINTER_TYPE(INDEX, INDEX_BASE)				\
671   if (s390_builtin_types[INDEX] == NULL)				\
672     s390_builtin_types[INDEX] =						\
673       build_pointer_type (s390_builtin_types[INDEX_BASE]);
674 
675 #undef DEF_DISTINCT_TYPE
676 #define DEF_DISTINCT_TYPE(INDEX, INDEX_BASE)				\
677   if (s390_builtin_types[INDEX] == NULL)				\
678     s390_builtin_types[INDEX] =						\
679       build_distinct_type_copy (s390_builtin_types[INDEX_BASE]);
680 
681 #undef DEF_VECTOR_TYPE
682 #define DEF_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS)			\
683   if (s390_builtin_types[INDEX] == NULL)				\
684     s390_builtin_types[INDEX] =						\
685       build_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
686 
687 #undef DEF_OPAQUE_VECTOR_TYPE
688 #define DEF_OPAQUE_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS)		\
689   if (s390_builtin_types[INDEX] == NULL)				\
690     s390_builtin_types[INDEX] =						\
691       build_opaque_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
692 
693 #undef DEF_FN_TYPE
694 #define DEF_FN_TYPE(INDEX, args...)				\
695   if (s390_builtin_fn_types[INDEX] == NULL)			\
696     s390_builtin_fn_types[INDEX] =				\
697       build_function_type_list (args, NULL_TREE);
698 #undef DEF_OV_TYPE
699 #define DEF_OV_TYPE(...)
700 #include "s390-builtin-types.def"
701 
702 #undef B_DEF
703 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, FNTYPE)		\
704   if (s390_builtin_decls[S390_BUILTIN_##NAME] == NULL)			\
705     s390_builtin_decls[S390_BUILTIN_##NAME] =				\
706       add_builtin_function ("__builtin_" #NAME,				\
707 			    s390_builtin_fn_types[FNTYPE],		\
708 			    S390_BUILTIN_##NAME,			\
709 			    BUILT_IN_MD,				\
710 			    NULL,					\
711 			    ATTRS);
712 #undef OB_DEF
713 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, FNTYPE)	\
714   if (s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] \
715       == NULL)								\
716     s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \
717       add_builtin_function ("__builtin_" #NAME,				\
718 			    s390_builtin_fn_types[FNTYPE],		\
719 			    S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX, \
720 			    BUILT_IN_MD,				\
721 			    NULL,					\
722 			    0);
723 #undef OB_DEF_VAR
724 #define OB_DEF_VAR(...)
725 #include "s390-builtins.def"
726 
727 }
728 
729 /* Return true if ARG is appropriate as argument number ARGNUM of
730    builtin DECL.  The operand flags from s390-builtins.def have to
731    passed as OP_FLAGS.  */
732 bool
s390_const_operand_ok(tree arg,int argnum,int op_flags,tree decl)733 s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl)
734 {
735   if (O_UIMM_P (op_flags))
736     {
737       int bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32 };
738       int bitwidth = bitwidths[op_flags - O_U1];
739 
740       if (!tree_fits_uhwi_p (arg)
741 	  || tree_to_uhwi (arg) > (HOST_WIDE_INT_1U << bitwidth) - 1)
742 	{
743 	  error("constant argument %d for builtin %qF is out of range (0.."
744 		HOST_WIDE_INT_PRINT_UNSIGNED ")",
745 		argnum, decl,
746 		(HOST_WIDE_INT_1U << bitwidth) - 1);
747 	  return false;
748 	}
749     }
750 
751   if (O_SIMM_P (op_flags))
752     {
753       int bitwidths[] = { 2, 3, 4, 5, 8, 12, 16, 32 };
754       int bitwidth = bitwidths[op_flags - O_S2];
755 
756       if (!tree_fits_shwi_p (arg)
757 	  || tree_to_shwi (arg) < -(HOST_WIDE_INT_1 << (bitwidth - 1))
758 	  || tree_to_shwi (arg) > ((HOST_WIDE_INT_1 << (bitwidth - 1)) - 1))
759 	{
760 	  error("constant argument %d for builtin %qF is out of range ("
761 		HOST_WIDE_INT_PRINT_DEC ".."
762 		HOST_WIDE_INT_PRINT_DEC ")",
763 		argnum, decl,
764 		-(HOST_WIDE_INT_1 << (bitwidth - 1)),
765 		(HOST_WIDE_INT_1 << (bitwidth - 1)) - 1);
766 	  return false;
767 	}
768     }
769   return true;
770 }
771 
772 /* Expand an expression EXP that calls a built-in function,
773    with result going to TARGET if that's convenient
774    (and in mode MODE if that's convenient).
775    SUBTARGET may be used as the target for computing one of EXP's operands.
776    IGNORE is nonzero if the value is to be ignored.  */
777 
778 static rtx
s390_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,int ignore ATTRIBUTE_UNUSED)779 s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
780 		     machine_mode mode ATTRIBUTE_UNUSED,
781 		     int ignore ATTRIBUTE_UNUSED)
782 {
783 #define MAX_ARGS 6
784 
785   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
786   unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
787   enum insn_code icode;
788   rtx op[MAX_ARGS], pat;
789   int arity;
790   bool nonvoid;
791   tree arg;
792   call_expr_arg_iterator iter;
793   unsigned int all_op_flags = opflags_for_builtin (fcode);
794   machine_mode last_vec_mode = VOIDmode;
795 
796   if (TARGET_DEBUG_ARG)
797     {
798       fprintf (stderr,
799 	       "s390_expand_builtin, code = %4d, %s, bflags = 0x%x\n",
800 	       (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl)),
801 	       bflags_for_builtin (fcode));
802     }
803 
804   if (S390_USE_TARGET_ATTRIBUTE)
805     {
806       unsigned int bflags;
807 
808       bflags = bflags_for_builtin (fcode);
809       if ((bflags & B_HTM) && !TARGET_HTM)
810 	{
811 	  error ("builtin %qF is not supported without -mhtm "
812 		 "(default with -march=zEC12 and higher).", fndecl);
813 	  return const0_rtx;
814 	}
815       if (((bflags & B_VX) || (bflags & B_VXE)) && !TARGET_VX)
816 	{
817 	  error ("builtin %qF requires -mvx "
818 		 "(default with -march=z13 and higher).", fndecl);
819 	  return const0_rtx;
820 	}
821 
822       if ((bflags & B_VXE) && !TARGET_VXE)
823 	{
824 	  error ("Builtin %qF requires z14 or higher.", fndecl);
825 	  return const0_rtx;
826 	}
827     }
828   if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET
829       && fcode < S390_ALL_BUILTIN_MAX)
830     {
831       gcc_unreachable ();
832     }
833   else if (fcode < S390_OVERLOADED_BUILTIN_OFFSET)
834     {
835       icode = code_for_builtin[fcode];
836       /* Set a flag in the machine specific cfun part in order to support
837 	 saving/restoring of FPRs.  */
838       if (fcode == S390_BUILTIN_tbegin || fcode == S390_BUILTIN_tbegin_retry)
839 	cfun->machine->tbegin_p = true;
840     }
841   else if (fcode < S390_OVERLOADED_BUILTIN_VAR_OFFSET)
842     {
843       error ("unresolved overloaded builtin");
844       return const0_rtx;
845     }
846   else
847     internal_error ("bad builtin fcode");
848 
849   if (icode == 0)
850     internal_error ("bad builtin icode");
851 
852   nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
853 
854   if (nonvoid)
855     {
856       machine_mode tmode = insn_data[icode].operand[0].mode;
857       if (!target
858 	  || GET_MODE (target) != tmode
859 	  || !(*insn_data[icode].operand[0].predicate) (target, tmode))
860 	target = gen_reg_rtx (tmode);
861 
862       /* There are builtins (e.g. vec_promote) with no vector
863 	 arguments but an element selector.  So we have to also look
864 	 at the vector return type when emitting the modulo
865 	 operation.  */
866       if (VECTOR_MODE_P (insn_data[icode].operand[0].mode))
867 	last_vec_mode = insn_data[icode].operand[0].mode;
868     }
869 
870   arity = 0;
871   FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
872     {
873       rtx tmp_rtx;
874       const struct insn_operand_data *insn_op;
875       unsigned int op_flags = all_op_flags & ((1 << O_SHIFT) - 1);
876 
877       all_op_flags = all_op_flags >> O_SHIFT;
878 
879       if (arg == error_mark_node)
880 	return NULL_RTX;
881       if (arity >= MAX_ARGS)
882 	return NULL_RTX;
883 
884       if (O_IMM_P (op_flags)
885 	  && TREE_CODE (arg) != INTEGER_CST)
886 	{
887 	  error ("constant value required for builtin %qF argument %d",
888 		 fndecl, arity + 1);
889 	  return const0_rtx;
890 	}
891 
892       if (!s390_const_operand_ok (arg, arity + 1, op_flags, fndecl))
893 	return const0_rtx;
894 
895       insn_op = &insn_data[icode].operand[arity + nonvoid];
896       op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
897 
898       /* expand_expr truncates constants to the target mode only if it
899 	 is "convenient".  However, our checks below rely on this
900 	 being done.  */
901       if (CONST_INT_P (op[arity])
902 	  && SCALAR_INT_MODE_P (insn_op->mode)
903 	  && GET_MODE (op[arity]) != insn_op->mode)
904 	op[arity] = GEN_INT (trunc_int_for_mode (INTVAL (op[arity]),
905 						 insn_op->mode));
906 
907       /* Wrap the expanded RTX for pointer types into a MEM expr with
908 	 the proper mode.  This allows us to use e.g. (match_operand
909 	 "memory_operand"..) in the insn patterns instead of (mem
910 	 (match_operand "address_operand)).  This is helpful for
911 	 patterns not just accepting MEMs.  */
912       if (POINTER_TYPE_P (TREE_TYPE (arg))
913 	  && insn_op->predicate != address_operand)
914 	op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]);
915 
916       /* Expand the module operation required on element selectors.  */
917       if (op_flags == O_ELEM)
918 	{
919 	  gcc_assert (last_vec_mode != VOIDmode);
920 	  op[arity] = simplify_expand_binop (SImode, code_to_optab (AND),
921 					     op[arity],
922 					     GEN_INT (GET_MODE_NUNITS (last_vec_mode) - 1),
923 					     NULL_RTX, 1, OPTAB_DIRECT);
924 	}
925 
926       /* Record the vector mode used for an element selector.  This assumes:
927 	 1. There is no builtin with two different vector modes and an element selector
928          2. The element selector comes after the vector type it is referring to.
929 	 This currently the true for all the builtins but FIXME we
930 	 should better check for that.  */
931       if (VECTOR_MODE_P (insn_op->mode))
932 	last_vec_mode = insn_op->mode;
933 
934       if (insn_op->predicate (op[arity], insn_op->mode))
935 	{
936 	  arity++;
937 	  continue;
938 	}
939 
940       if (MEM_P (op[arity])
941 	  && insn_op->predicate == memory_operand
942 	  && (GET_MODE (XEXP (op[arity], 0)) == Pmode
943 	      || GET_MODE (XEXP (op[arity], 0)) == VOIDmode))
944 	{
945 	  op[arity] = replace_equiv_address (op[arity],
946 					     copy_to_mode_reg (Pmode,
947 					       XEXP (op[arity], 0)));
948 	}
949       /* Some of the builtins require different modes/types than the
950 	 pattern in order to implement a specific API.  Instead of
951 	 adding many expanders which do the mode change we do it here.
952 	 E.g. s390_vec_add_u128 required to have vector unsigned char
953 	 arguments is mapped to addti3.  */
954       else if (insn_op->mode != VOIDmode
955 	       && GET_MODE (op[arity]) != VOIDmode
956 	       && GET_MODE (op[arity]) != insn_op->mode
957 	       && ((tmp_rtx = simplify_gen_subreg (insn_op->mode, op[arity],
958 						   GET_MODE (op[arity]), 0))
959 		   != NULL_RTX))
960 	{
961 	  op[arity] = tmp_rtx;
962 	}
963       else if (GET_MODE (op[arity]) == insn_op->mode
964 	       || GET_MODE (op[arity]) == VOIDmode
965 	       || (insn_op->predicate == address_operand
966 		   && GET_MODE (op[arity]) == Pmode))
967 	{
968 	  /* An address_operand usually has VOIDmode in the expander
969 	     so we cannot use this.  */
970 	  machine_mode target_mode =
971 	    (insn_op->predicate == address_operand
972 	     ? (machine_mode) Pmode : insn_op->mode);
973 	  op[arity] = copy_to_mode_reg (target_mode, op[arity]);
974 	}
975 
976       if (!insn_op->predicate (op[arity], insn_op->mode))
977 	{
978 	  error ("invalid argument %d for builtin %qF", arity + 1, fndecl);
979 	  return const0_rtx;
980 	}
981       arity++;
982     }
983 
984   switch (arity)
985     {
986     case 0:
987       pat = GEN_FCN (icode) (target);
988       break;
989     case 1:
990       if (nonvoid)
991         pat = GEN_FCN (icode) (target, op[0]);
992       else
993 	pat = GEN_FCN (icode) (op[0]);
994       break;
995     case 2:
996       if (nonvoid)
997 	pat = GEN_FCN (icode) (target, op[0], op[1]);
998       else
999 	pat = GEN_FCN (icode) (op[0], op[1]);
1000       break;
1001     case 3:
1002       if (nonvoid)
1003 	pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
1004       else
1005 	pat = GEN_FCN (icode) (op[0], op[1], op[2]);
1006       break;
1007     case 4:
1008       if (nonvoid)
1009 	pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
1010       else
1011 	pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
1012       break;
1013     case 5:
1014       if (nonvoid)
1015 	pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
1016       else
1017 	pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
1018       break;
1019     case 6:
1020       if (nonvoid)
1021 	pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4], op[5]);
1022       else
1023 	pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
1024       break;
1025     default:
1026       gcc_unreachable ();
1027     }
1028   if (!pat)
1029     return NULL_RTX;
1030   emit_insn (pat);
1031 
1032   if (nonvoid)
1033     return target;
1034   else
1035     return const0_rtx;
1036 }
1037 
1038 
1039 static const int s390_hotpatch_hw_max = 1000000;
1040 static int s390_hotpatch_hw_before_label = 0;
1041 static int s390_hotpatch_hw_after_label = 0;
1042 
1043 /* Check whether the hotpatch attribute is applied to a function and, if it has
1044    an argument, the argument is valid.  */
1045 
1046 static tree
s390_handle_hotpatch_attribute(tree * node,tree name,tree args,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)1047 s390_handle_hotpatch_attribute (tree *node, tree name, tree args,
1048 				int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1049 {
1050   tree expr;
1051   tree expr2;
1052   int err;
1053 
1054   if (TREE_CODE (*node) != FUNCTION_DECL)
1055     {
1056       warning (OPT_Wattributes, "%qE attribute only applies to functions",
1057 	       name);
1058       *no_add_attrs = true;
1059     }
1060   if (args != NULL && TREE_CHAIN (args) != NULL)
1061     {
1062       expr = TREE_VALUE (args);
1063       expr2 = TREE_VALUE (TREE_CHAIN (args));
1064     }
1065   if (args == NULL || TREE_CHAIN (args) == NULL)
1066     err = 1;
1067   else if (TREE_CODE (expr) != INTEGER_CST
1068 	   || !INTEGRAL_TYPE_P (TREE_TYPE (expr))
1069 	   || wi::gtu_p (wi::to_wide (expr), s390_hotpatch_hw_max))
1070     err = 1;
1071   else if (TREE_CODE (expr2) != INTEGER_CST
1072 	   || !INTEGRAL_TYPE_P (TREE_TYPE (expr2))
1073 	   || wi::gtu_p (wi::to_wide (expr2), s390_hotpatch_hw_max))
1074     err = 1;
1075   else
1076     err = 0;
1077   if (err)
1078     {
1079       error ("requested %qE attribute is not a comma separated pair of"
1080 	     " non-negative integer constants or too large (max. %d)", name,
1081 	     s390_hotpatch_hw_max);
1082       *no_add_attrs = true;
1083     }
1084 
1085   return NULL_TREE;
1086 }
1087 
1088 /* Expand the s390_vector_bool type attribute.  */
1089 
1090 static tree
s390_handle_vectorbool_attribute(tree * node,tree name ATTRIBUTE_UNUSED,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)1091 s390_handle_vectorbool_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
1092 				  tree args ATTRIBUTE_UNUSED,
1093 				  int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1094 {
1095   tree type = *node, result = NULL_TREE;
1096   machine_mode mode;
1097 
1098   while (POINTER_TYPE_P (type)
1099 	 || TREE_CODE (type) == FUNCTION_TYPE
1100 	 || TREE_CODE (type) == METHOD_TYPE
1101 	 || TREE_CODE (type) == ARRAY_TYPE)
1102     type = TREE_TYPE (type);
1103 
1104   mode = TYPE_MODE (type);
1105   switch (mode)
1106     {
1107     case E_DImode: case E_V2DImode:
1108       result = s390_builtin_types[BT_BV2DI];
1109       break;
1110     case E_SImode: case E_V4SImode:
1111       result = s390_builtin_types[BT_BV4SI];
1112       break;
1113     case E_HImode: case E_V8HImode:
1114       result = s390_builtin_types[BT_BV8HI];
1115       break;
1116     case E_QImode: case E_V16QImode:
1117       result = s390_builtin_types[BT_BV16QI];
1118       break;
1119     default:
1120       break;
1121     }
1122 
1123   *no_add_attrs = true;  /* No need to hang on to the attribute.  */
1124 
1125   if (result)
1126     *node = lang_hooks.types.reconstruct_complex_type (*node, result);
1127 
1128   return NULL_TREE;
1129 }
1130 
1131 /* Check syntax of function decl attributes having a string type value.  */
1132 
1133 static tree
s390_handle_string_attribute(tree * node,tree name ATTRIBUTE_UNUSED,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)1134 s390_handle_string_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
1135 			      tree args ATTRIBUTE_UNUSED,
1136 			      int flags ATTRIBUTE_UNUSED,
1137 			      bool *no_add_attrs)
1138 {
1139   tree cst;
1140 
1141   if (TREE_CODE (*node) != FUNCTION_DECL)
1142     {
1143       warning (OPT_Wattributes, "%qE attribute only applies to functions",
1144 	       name);
1145       *no_add_attrs = true;
1146     }
1147 
1148   cst = TREE_VALUE (args);
1149 
1150   if (TREE_CODE (cst) != STRING_CST)
1151     {
1152       warning (OPT_Wattributes,
1153 	       "%qE attribute requires a string constant argument",
1154 	       name);
1155       *no_add_attrs = true;
1156     }
1157 
1158   if (is_attribute_p ("indirect_branch", name)
1159       || is_attribute_p ("indirect_branch_call", name)
1160       || is_attribute_p ("function_return", name)
1161       || is_attribute_p ("function_return_reg", name)
1162       || is_attribute_p ("function_return_mem", name))
1163     {
1164       if (strcmp (TREE_STRING_POINTER (cst), "keep") != 0
1165 	  && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0
1166 	  && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0)
1167       {
1168 	warning (OPT_Wattributes,
1169 		 "argument to %qE attribute is not "
1170 		 "(keep|thunk|thunk-extern)", name);
1171 	*no_add_attrs = true;
1172       }
1173     }
1174 
1175   if (is_attribute_p ("indirect_branch_jump", name)
1176       && strcmp (TREE_STRING_POINTER (cst), "keep") != 0
1177       && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0
1178       && strcmp (TREE_STRING_POINTER (cst), "thunk-inline") != 0
1179       && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0)
1180     {
1181       warning (OPT_Wattributes,
1182 	       "argument to %qE attribute is not "
1183 	       "(keep|thunk|thunk-inline|thunk-extern)", name);
1184       *no_add_attrs = true;
1185     }
1186 
1187   return NULL_TREE;
1188 }
1189 
1190 static const struct attribute_spec s390_attribute_table[] = {
1191   { "hotpatch", 2, 2, true, false, false, false,
1192     s390_handle_hotpatch_attribute, NULL },
1193   { "s390_vector_bool", 0, 0, false, true, false, true,
1194     s390_handle_vectorbool_attribute, NULL },
1195   { "indirect_branch", 1, 1, true, false, false, false,
1196     s390_handle_string_attribute, NULL },
1197   { "indirect_branch_jump", 1, 1, true, false, false, false,
1198     s390_handle_string_attribute, NULL },
1199   { "indirect_branch_call", 1, 1, true, false, false, false,
1200     s390_handle_string_attribute, NULL },
1201   { "function_return", 1, 1, true, false, false, false,
1202     s390_handle_string_attribute, NULL },
1203   { "function_return_reg", 1, 1, true, false, false, false,
1204     s390_handle_string_attribute, NULL },
1205   { "function_return_mem", 1, 1, true, false, false, false,
1206     s390_handle_string_attribute, NULL },
1207 
1208   /* End element.  */
1209   { NULL,        0, 0, false, false, false, false, NULL, NULL }
1210 };
1211 
1212 /* Return the alignment for LABEL.  We default to the -falign-labels
1213    value except for the literal pool base label.  */
1214 int
s390_label_align(rtx_insn * label)1215 s390_label_align (rtx_insn *label)
1216 {
1217   rtx_insn *prev_insn = prev_active_insn (label);
1218   rtx set, src;
1219 
1220   if (prev_insn == NULL_RTX)
1221     goto old;
1222 
1223   set = single_set (prev_insn);
1224 
1225   if (set == NULL_RTX)
1226     goto old;
1227 
1228   src = SET_SRC (set);
1229 
1230   /* Don't align literal pool base labels.  */
1231   if (GET_CODE (src) == UNSPEC
1232       && XINT (src, 1) == UNSPEC_MAIN_BASE)
1233     return 0;
1234 
1235  old:
1236   return align_labels_log;
1237 }
1238 
1239 static GTY(()) rtx got_symbol;
1240 
1241 /* Return the GOT table symbol.  The symbol will be created when the
1242    function is invoked for the first time.  */
1243 
1244 static rtx
s390_got_symbol(void)1245 s390_got_symbol (void)
1246 {
1247   if (!got_symbol)
1248     {
1249       got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
1250       SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL;
1251     }
1252 
1253   return got_symbol;
1254 }
1255 
1256 static scalar_int_mode
s390_libgcc_cmp_return_mode(void)1257 s390_libgcc_cmp_return_mode (void)
1258 {
1259   return TARGET_64BIT ? DImode : SImode;
1260 }
1261 
1262 static scalar_int_mode
s390_libgcc_shift_count_mode(void)1263 s390_libgcc_shift_count_mode (void)
1264 {
1265   return TARGET_64BIT ? DImode : SImode;
1266 }
1267 
1268 static scalar_int_mode
s390_unwind_word_mode(void)1269 s390_unwind_word_mode (void)
1270 {
1271   return TARGET_64BIT ? DImode : SImode;
1272 }
1273 
1274 /* Return true if the back end supports mode MODE.  */
1275 static bool
s390_scalar_mode_supported_p(scalar_mode mode)1276 s390_scalar_mode_supported_p (scalar_mode mode)
1277 {
1278   /* In contrast to the default implementation reject TImode constants on 31bit
1279      TARGET_ZARCH for ABI compliance.  */
1280   if (!TARGET_64BIT && TARGET_ZARCH && mode == TImode)
1281     return false;
1282 
1283   if (DECIMAL_FLOAT_MODE_P (mode))
1284     return default_decimal_float_supported_p ();
1285 
1286   return default_scalar_mode_supported_p (mode);
1287 }
1288 
1289 /* Return true if the back end supports vector mode MODE.  */
1290 static bool
s390_vector_mode_supported_p(machine_mode mode)1291 s390_vector_mode_supported_p (machine_mode mode)
1292 {
1293   machine_mode inner;
1294 
1295   if (!VECTOR_MODE_P (mode)
1296       || !TARGET_VX
1297       || GET_MODE_SIZE (mode) > 16)
1298     return false;
1299 
1300   inner = GET_MODE_INNER (mode);
1301 
1302   switch (inner)
1303     {
1304     case E_QImode:
1305     case E_HImode:
1306     case E_SImode:
1307     case E_DImode:
1308     case E_TImode:
1309     case E_SFmode:
1310     case E_DFmode:
1311     case E_TFmode:
1312       return true;
1313     default:
1314       return false;
1315     }
1316 }
1317 
1318 /* Set the has_landing_pad_p flag in struct machine_function to VALUE.  */
1319 
1320 void
s390_set_has_landing_pad_p(bool value)1321 s390_set_has_landing_pad_p (bool value)
1322 {
1323   cfun->machine->has_landing_pad_p = value;
1324 }
1325 
1326 /* If two condition code modes are compatible, return a condition code
1327    mode which is compatible with both.  Otherwise, return
1328    VOIDmode.  */
1329 
1330 static machine_mode
s390_cc_modes_compatible(machine_mode m1,machine_mode m2)1331 s390_cc_modes_compatible (machine_mode m1, machine_mode m2)
1332 {
1333   if (m1 == m2)
1334     return m1;
1335 
1336   switch (m1)
1337     {
1338     case E_CCZmode:
1339       if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode
1340 	  || m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode)
1341         return m2;
1342       return VOIDmode;
1343 
1344     case E_CCSmode:
1345     case E_CCUmode:
1346     case E_CCTmode:
1347     case E_CCSRmode:
1348     case E_CCURmode:
1349     case E_CCZ1mode:
1350       if (m2 == CCZmode)
1351 	return m1;
1352 
1353       return VOIDmode;
1354 
1355     default:
1356       return VOIDmode;
1357     }
1358   return VOIDmode;
1359 }
1360 
1361 /* Return true if SET either doesn't set the CC register, or else
1362    the source and destination have matching CC modes and that
1363    CC mode is at least as constrained as REQ_MODE.  */
1364 
1365 static bool
s390_match_ccmode_set(rtx set,machine_mode req_mode)1366 s390_match_ccmode_set (rtx set, machine_mode req_mode)
1367 {
1368   machine_mode set_mode;
1369 
1370   gcc_assert (GET_CODE (set) == SET);
1371 
1372   /* These modes are supposed to be used only in CC consumer
1373      patterns.  */
1374   gcc_assert (req_mode != CCVIALLmode && req_mode != CCVIANYmode
1375 	      && req_mode != CCVFALLmode && req_mode != CCVFANYmode);
1376 
1377   if (GET_CODE (SET_DEST (set)) != REG || !CC_REGNO_P (REGNO (SET_DEST (set))))
1378     return 1;
1379 
1380   set_mode = GET_MODE (SET_DEST (set));
1381   switch (set_mode)
1382     {
1383     case E_CCZ1mode:
1384     case E_CCSmode:
1385     case E_CCSRmode:
1386     case E_CCUmode:
1387     case E_CCURmode:
1388     case E_CCLmode:
1389     case E_CCL1mode:
1390     case E_CCL2mode:
1391     case E_CCL3mode:
1392     case E_CCT1mode:
1393     case E_CCT2mode:
1394     case E_CCT3mode:
1395     case E_CCVEQmode:
1396     case E_CCVIHmode:
1397     case E_CCVIHUmode:
1398     case E_CCVFHmode:
1399     case E_CCVFHEmode:
1400       if (req_mode != set_mode)
1401         return 0;
1402       break;
1403 
1404     case E_CCZmode:
1405       if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode
1406 	  && req_mode != CCSRmode && req_mode != CCURmode
1407 	  && req_mode != CCZ1mode)
1408         return 0;
1409       break;
1410 
1411     case E_CCAPmode:
1412     case E_CCANmode:
1413       if (req_mode != CCAmode)
1414         return 0;
1415       break;
1416 
1417     default:
1418       gcc_unreachable ();
1419     }
1420 
1421   return (GET_MODE (SET_SRC (set)) == set_mode);
1422 }
1423 
1424 /* Return true if every SET in INSN that sets the CC register
1425    has source and destination with matching CC modes and that
1426    CC mode is at least as constrained as REQ_MODE.
1427    If REQ_MODE is VOIDmode, always return false.  */
1428 
1429 bool
s390_match_ccmode(rtx_insn * insn,machine_mode req_mode)1430 s390_match_ccmode (rtx_insn *insn, machine_mode req_mode)
1431 {
1432   int i;
1433 
1434   /* s390_tm_ccmode returns VOIDmode to indicate failure.  */
1435   if (req_mode == VOIDmode)
1436     return false;
1437 
1438   if (GET_CODE (PATTERN (insn)) == SET)
1439     return s390_match_ccmode_set (PATTERN (insn), req_mode);
1440 
1441   if (GET_CODE (PATTERN (insn)) == PARALLEL)
1442       for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1443         {
1444           rtx set = XVECEXP (PATTERN (insn), 0, i);
1445           if (GET_CODE (set) == SET)
1446             if (!s390_match_ccmode_set (set, req_mode))
1447               return false;
1448         }
1449 
1450   return true;
1451 }
1452 
1453 /* If a test-under-mask instruction can be used to implement
1454    (compare (and ... OP1) OP2), return the CC mode required
1455    to do that.  Otherwise, return VOIDmode.
1456    MIXED is true if the instruction can distinguish between
1457    CC1 and CC2 for mixed selected bits (TMxx), it is false
1458    if the instruction cannot (TM).  */
1459 
1460 machine_mode
s390_tm_ccmode(rtx op1,rtx op2,bool mixed)1461 s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
1462 {
1463   int bit0, bit1;
1464 
1465   /* ??? Fixme: should work on CONST_WIDE_INT as well.  */
1466   if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT)
1467     return VOIDmode;
1468 
1469   /* Selected bits all zero: CC0.
1470      e.g.: int a; if ((a & (16 + 128)) == 0) */
1471   if (INTVAL (op2) == 0)
1472     return CCTmode;
1473 
1474   /* Selected bits all one: CC3.
1475      e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
1476   if (INTVAL (op2) == INTVAL (op1))
1477     return CCT3mode;
1478 
1479   /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
1480      int a;
1481      if ((a & (16 + 128)) == 16)         -> CCT1
1482      if ((a & (16 + 128)) == 128)        -> CCT2  */
1483   if (mixed)
1484     {
1485       bit1 = exact_log2 (INTVAL (op2));
1486       bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2));
1487       if (bit0 != -1 && bit1 != -1)
1488         return bit0 > bit1 ? CCT1mode : CCT2mode;
1489     }
1490 
1491   return VOIDmode;
1492 }
1493 
1494 /* Given a comparison code OP (EQ, NE, etc.) and the operands
1495    OP0 and OP1 of a COMPARE, return the mode to be used for the
1496    comparison.  */
1497 
1498 machine_mode
s390_select_ccmode(enum rtx_code code,rtx op0,rtx op1)1499 s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
1500 {
1501   switch (code)
1502     {
1503       case EQ:
1504       case NE:
1505 	if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1506 	    && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1507 	  return CCAPmode;
1508 	if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1509 	    && CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))))
1510 	  return CCAPmode;
1511 	if ((GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS
1512 	     || GET_CODE (op1) == NEG)
1513 	    && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1514 	  return CCLmode;
1515 
1516 	if (GET_CODE (op0) == AND)
1517 	  {
1518 	    /* Check whether we can potentially do it via TM.  */
1519 	    machine_mode ccmode;
1520 	    ccmode = s390_tm_ccmode (XEXP (op0, 1), op1, 1);
1521 	    if (ccmode != VOIDmode)
1522 	      {
1523 		/* Relax CCTmode to CCZmode to allow fall-back to AND
1524 		   if that turns out to be beneficial.  */
1525 	        return ccmode == CCTmode ? CCZmode : ccmode;
1526 	      }
1527 	  }
1528 
1529 	if (register_operand (op0, HImode)
1530 	    && GET_CODE (op1) == CONST_INT
1531 	    && (INTVAL (op1) == -1 || INTVAL (op1) == 65535))
1532 	  return CCT3mode;
1533 	if (register_operand (op0, QImode)
1534 	    && GET_CODE (op1) == CONST_INT
1535 	    && (INTVAL (op1) == -1 || INTVAL (op1) == 255))
1536 	  return CCT3mode;
1537 
1538 	return CCZmode;
1539 
1540       case LE:
1541       case LT:
1542       case GE:
1543       case GT:
1544 	/* The only overflow condition of NEG and ABS happens when
1545 	   -INT_MAX is used as parameter, which stays negative. So
1546 	   we have an overflow from a positive value to a negative.
1547 	   Using CCAP mode the resulting cc can be used for comparisons.  */
1548 	if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1549 	    && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1550 	  return CCAPmode;
1551 
1552  	/* If constants are involved in an add instruction it is possible to use
1553  	   the resulting cc for comparisons with zero. Knowing the sign of the
1554 	   constant the overflow behavior gets predictable. e.g.:
1555  	     int a, b; if ((b = a + c) > 0)
1556  	   with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP  */
1557 	if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1558 	    && (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1)))
1559 		|| (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os")
1560 		    /* Avoid INT32_MIN on 32 bit.  */
1561 		    && (!TARGET_ZARCH || INTVAL (XEXP (op0, 1)) != -0x7fffffff - 1))))
1562 	  {
1563 	    if (INTVAL (XEXP((op0), 1)) < 0)
1564 	      return CCANmode;
1565 	    else
1566 	      return CCAPmode;
1567 	  }
1568 	/* Fall through.  */
1569       case UNORDERED:
1570       case ORDERED:
1571       case UNEQ:
1572       case UNLE:
1573       case UNLT:
1574       case UNGE:
1575       case UNGT:
1576       case LTGT:
1577 	if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1578 	    && GET_CODE (op1) != CONST_INT)
1579 	  return CCSRmode;
1580 	return CCSmode;
1581 
1582       case LTU:
1583       case GEU:
1584 	if (GET_CODE (op0) == PLUS
1585 	    && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1586 	  return CCL1mode;
1587 
1588 	if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1589 	    && GET_CODE (op1) != CONST_INT)
1590 	  return CCURmode;
1591 	return CCUmode;
1592 
1593       case LEU:
1594       case GTU:
1595 	if (GET_CODE (op0) == MINUS
1596 	    && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1597 	  return CCL2mode;
1598 
1599 	if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1600 	    && GET_CODE (op1) != CONST_INT)
1601 	  return CCURmode;
1602 	return CCUmode;
1603 
1604       default:
1605 	gcc_unreachable ();
1606     }
1607 }
1608 
1609 /* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
1610    that we can implement more efficiently.  */
1611 
1612 static void
s390_canonicalize_comparison(int * code,rtx * op0,rtx * op1,bool op0_preserve_value)1613 s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1614 			      bool op0_preserve_value)
1615 {
1616   if (op0_preserve_value)
1617     return;
1618 
1619   /* Convert ZERO_EXTRACT back to AND to enable TM patterns.  */
1620   if ((*code == EQ || *code == NE)
1621       && *op1 == const0_rtx
1622       && GET_CODE (*op0) == ZERO_EXTRACT
1623       && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1624       && GET_CODE (XEXP (*op0, 2)) == CONST_INT
1625       && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1626     {
1627       rtx inner = XEXP (*op0, 0);
1628       HOST_WIDE_INT modesize = GET_MODE_BITSIZE (GET_MODE (inner));
1629       HOST_WIDE_INT len = INTVAL (XEXP (*op0, 1));
1630       HOST_WIDE_INT pos = INTVAL (XEXP (*op0, 2));
1631 
1632       if (len > 0 && len < modesize
1633 	  && pos >= 0 && pos + len <= modesize
1634 	  && modesize <= HOST_BITS_PER_WIDE_INT)
1635 	{
1636 	  unsigned HOST_WIDE_INT block;
1637 	  block = (HOST_WIDE_INT_1U << len) - 1;
1638 	  block <<= modesize - pos - len;
1639 
1640 	  *op0 = gen_rtx_AND (GET_MODE (inner), inner,
1641 			      gen_int_mode (block, GET_MODE (inner)));
1642 	}
1643     }
1644 
1645   /* Narrow AND of memory against immediate to enable TM.  */
1646   if ((*code == EQ || *code == NE)
1647       && *op1 == const0_rtx
1648       && GET_CODE (*op0) == AND
1649       && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1650       && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1651     {
1652       rtx inner = XEXP (*op0, 0);
1653       rtx mask = XEXP (*op0, 1);
1654 
1655       /* Ignore paradoxical SUBREGs if all extra bits are masked out.  */
1656       if (GET_CODE (inner) == SUBREG
1657 	  && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner)))
1658 	  && (GET_MODE_SIZE (GET_MODE (inner))
1659 	      >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1660 	  && ((INTVAL (mask)
1661                & GET_MODE_MASK (GET_MODE (inner))
1662                & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner))))
1663 	      == 0))
1664 	inner = SUBREG_REG (inner);
1665 
1666       /* Do not change volatile MEMs.  */
1667       if (MEM_P (inner) && !MEM_VOLATILE_P (inner))
1668 	{
1669 	  int part = s390_single_part (XEXP (*op0, 1),
1670 				       GET_MODE (inner), QImode, 0);
1671 	  if (part >= 0)
1672 	    {
1673 	      mask = gen_int_mode (s390_extract_part (mask, QImode, 0), QImode);
1674 	      inner = adjust_address_nv (inner, QImode, part);
1675 	      *op0 = gen_rtx_AND (QImode, inner, mask);
1676 	    }
1677 	}
1678     }
1679 
1680   /* Narrow comparisons against 0xffff to HImode if possible.  */
1681   if ((*code == EQ || *code == NE)
1682       && GET_CODE (*op1) == CONST_INT
1683       && INTVAL (*op1) == 0xffff
1684       && SCALAR_INT_MODE_P (GET_MODE (*op0))
1685       && (nonzero_bits (*op0, GET_MODE (*op0))
1686 	  & ~HOST_WIDE_INT_UC (0xffff)) == 0)
1687     {
1688       *op0 = gen_lowpart (HImode, *op0);
1689       *op1 = constm1_rtx;
1690     }
1691 
1692   /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible.  */
1693   if (GET_CODE (*op0) == UNSPEC
1694       && XINT (*op0, 1) == UNSPEC_STRCMPCC_TO_INT
1695       && XVECLEN (*op0, 0) == 1
1696       && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode
1697       && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1698       && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1699       && *op1 == const0_rtx)
1700     {
1701       enum rtx_code new_code = UNKNOWN;
1702       switch (*code)
1703 	{
1704 	  case EQ: new_code = EQ;  break;
1705 	  case NE: new_code = NE;  break;
1706 	  case LT: new_code = GTU; break;
1707 	  case GT: new_code = LTU; break;
1708 	  case LE: new_code = GEU; break;
1709 	  case GE: new_code = LEU; break;
1710 	  default: break;
1711 	}
1712 
1713       if (new_code != UNKNOWN)
1714 	{
1715 	  *op0 = XVECEXP (*op0, 0, 0);
1716 	  *code = new_code;
1717 	}
1718     }
1719 
1720   /* Remove redundant UNSPEC_CC_TO_INT conversions if possible.  */
1721   if (GET_CODE (*op0) == UNSPEC
1722       && XINT (*op0, 1) == UNSPEC_CC_TO_INT
1723       && XVECLEN (*op0, 0) == 1
1724       && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1725       && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1726       && CONST_INT_P (*op1))
1727     {
1728       enum rtx_code new_code = UNKNOWN;
1729       switch (GET_MODE (XVECEXP (*op0, 0, 0)))
1730 	{
1731 	case E_CCZmode:
1732 	case E_CCRAWmode:
1733 	  switch (*code)
1734 	    {
1735 	    case EQ: new_code = EQ;  break;
1736 	    case NE: new_code = NE;  break;
1737 	    default: break;
1738 	    }
1739 	  break;
1740 	default: break;
1741 	}
1742 
1743       if (new_code != UNKNOWN)
1744 	{
1745 	  /* For CCRAWmode put the required cc mask into the second
1746 	     operand.  */
1747         if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode
1748             && INTVAL (*op1) >= 0 && INTVAL (*op1) <= 3)
1749 	    *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1)));
1750 	  *op0 = XVECEXP (*op0, 0, 0);
1751 	  *code = new_code;
1752 	}
1753     }
1754 
1755   /* Simplify cascaded EQ, NE with const0_rtx.  */
1756   if ((*code == NE || *code == EQ)
1757       && (GET_CODE (*op0) == EQ || GET_CODE (*op0) == NE)
1758       && GET_MODE (*op0) == SImode
1759       && GET_MODE (XEXP (*op0, 0)) == CCZ1mode
1760       && REG_P (XEXP (*op0, 0))
1761       && XEXP (*op0, 1) == const0_rtx
1762       && *op1 == const0_rtx)
1763     {
1764       if ((*code == EQ && GET_CODE (*op0) == NE)
1765           || (*code == NE && GET_CODE (*op0) == EQ))
1766 	*code = EQ;
1767       else
1768 	*code = NE;
1769       *op0 = XEXP (*op0, 0);
1770     }
1771 
1772   /* Prefer register over memory as first operand.  */
1773   if (MEM_P (*op0) && REG_P (*op1))
1774     {
1775       rtx tem = *op0; *op0 = *op1; *op1 = tem;
1776       *code = (int)swap_condition ((enum rtx_code)*code);
1777     }
1778 
1779   /* A comparison result is compared against zero.  Replace it with
1780      the (perhaps inverted) original comparison.
1781      This probably should be done by simplify_relational_operation.  */
1782   if ((*code == EQ || *code == NE)
1783       && *op1 == const0_rtx
1784       && COMPARISON_P (*op0)
1785       && CC_REG_P (XEXP (*op0, 0)))
1786     {
1787       enum rtx_code new_code;
1788 
1789       if (*code == EQ)
1790 	new_code = reversed_comparison_code_parts (GET_CODE (*op0),
1791 						   XEXP (*op0, 0),
1792 						   XEXP (*op1, 0), NULL);
1793       else
1794 	new_code = GET_CODE (*op0);
1795 
1796       if (new_code != UNKNOWN)
1797 	{
1798 	  *code = new_code;
1799 	  *op1 = XEXP (*op0, 1);
1800 	  *op0 = XEXP (*op0, 0);
1801 	}
1802     }
1803 }
1804 
1805 
1806 /* Emit a compare instruction suitable to implement the comparison
1807    OP0 CODE OP1.  Return the correct condition RTL to be placed in
1808    the IF_THEN_ELSE of the conditional branch testing the result.  */
1809 
1810 rtx
s390_emit_compare(enum rtx_code code,rtx op0,rtx op1)1811 s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
1812 {
1813   machine_mode mode = s390_select_ccmode (code, op0, op1);
1814   rtx cc;
1815 
1816   if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
1817     {
1818       /* Do not output a redundant compare instruction if a
1819 	 compare_and_swap pattern already computed the result and the
1820 	 machine modes are compatible.  */
1821       gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
1822 		  == GET_MODE (op0));
1823       cc = op0;
1824     }
1825   else
1826     {
1827       cc = gen_rtx_REG (mode, CC_REGNUM);
1828       emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, op0, op1)));
1829     }
1830 
1831   return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
1832 }
1833 
1834 /* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
1835    matches CMP.
1836    Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
1837    conditional branch testing the result.  */
1838 
1839 static rtx
s390_emit_compare_and_swap(enum rtx_code code,rtx old,rtx mem,rtx cmp,rtx new_rtx,machine_mode ccmode)1840 s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
1841 			    rtx cmp, rtx new_rtx, machine_mode ccmode)
1842 {
1843   rtx cc;
1844 
1845   cc = gen_rtx_REG (ccmode, CC_REGNUM);
1846   switch (GET_MODE (mem))
1847     {
1848     case E_SImode:
1849       emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp,
1850 							 new_rtx, cc));
1851       break;
1852     case E_DImode:
1853       emit_insn (gen_atomic_compare_and_swapdi_internal (old, mem, cmp,
1854 							 new_rtx, cc));
1855       break;
1856     case E_TImode:
1857 	emit_insn (gen_atomic_compare_and_swapti_internal (old, mem, cmp,
1858 							   new_rtx, cc));
1859       break;
1860     case E_QImode:
1861     case E_HImode:
1862     default:
1863       gcc_unreachable ();
1864     }
1865   return s390_emit_compare (code, cc, const0_rtx);
1866 }
1867 
1868 /* Emit a jump instruction to TARGET and return it.  If COND is
1869    NULL_RTX, emit an unconditional jump, else a conditional jump under
1870    condition COND.  */
1871 
1872 rtx_insn *
s390_emit_jump(rtx target,rtx cond)1873 s390_emit_jump (rtx target, rtx cond)
1874 {
1875   rtx insn;
1876 
1877   target = gen_rtx_LABEL_REF (VOIDmode, target);
1878   if (cond)
1879     target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
1880 
1881   insn = gen_rtx_SET (pc_rtx, target);
1882   return emit_jump_insn (insn);
1883 }
1884 
1885 /* Return branch condition mask to implement a branch
1886    specified by CODE.  Return -1 for invalid comparisons.  */
1887 
1888 int
s390_branch_condition_mask(rtx code)1889 s390_branch_condition_mask (rtx code)
1890 {
1891   const int CC0 = 1 << 3;
1892   const int CC1 = 1 << 2;
1893   const int CC2 = 1 << 1;
1894   const int CC3 = 1 << 0;
1895 
1896   gcc_assert (GET_CODE (XEXP (code, 0)) == REG);
1897   gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM);
1898   gcc_assert (XEXP (code, 1) == const0_rtx
1899 	      || (GET_MODE (XEXP (code, 0)) == CCRAWmode
1900 		  && CONST_INT_P (XEXP (code, 1))));
1901 
1902 
1903   switch (GET_MODE (XEXP (code, 0)))
1904     {
1905     case E_CCZmode:
1906     case E_CCZ1mode:
1907       switch (GET_CODE (code))
1908         {
1909         case EQ:	return CC0;
1910 	case NE:	return CC1 | CC2 | CC3;
1911 	default:	return -1;
1912         }
1913       break;
1914 
1915     case E_CCT1mode:
1916       switch (GET_CODE (code))
1917         {
1918         case EQ:	return CC1;
1919 	case NE:	return CC0 | CC2 | CC3;
1920 	default:	return -1;
1921         }
1922       break;
1923 
1924     case E_CCT2mode:
1925       switch (GET_CODE (code))
1926         {
1927         case EQ:	return CC2;
1928 	case NE:	return CC0 | CC1 | CC3;
1929 	default:	return -1;
1930         }
1931       break;
1932 
1933     case E_CCT3mode:
1934       switch (GET_CODE (code))
1935         {
1936         case EQ:	return CC3;
1937 	case NE:	return CC0 | CC1 | CC2;
1938 	default:	return -1;
1939         }
1940       break;
1941 
1942     case E_CCLmode:
1943       switch (GET_CODE (code))
1944         {
1945         case EQ:	return CC0 | CC2;
1946 	case NE:	return CC1 | CC3;
1947 	default:	return -1;
1948         }
1949       break;
1950 
1951     case E_CCL1mode:
1952       switch (GET_CODE (code))
1953         {
1954 	case LTU:	return CC2 | CC3;  /* carry */
1955 	case GEU:	return CC0 | CC1;  /* no carry */
1956 	default:	return -1;
1957         }
1958       break;
1959 
1960     case E_CCL2mode:
1961       switch (GET_CODE (code))
1962         {
1963 	case GTU:	return CC0 | CC1;  /* borrow */
1964 	case LEU:	return CC2 | CC3;  /* no borrow */
1965 	default:	return -1;
1966         }
1967       break;
1968 
1969     case E_CCL3mode:
1970       switch (GET_CODE (code))
1971 	{
1972 	case EQ:	return CC0 | CC2;
1973 	case NE:	return CC1 | CC3;
1974 	case LTU:	return CC1;
1975 	case GTU:	return CC3;
1976 	case LEU:	return CC1 | CC2;
1977 	case GEU:	return CC2 | CC3;
1978 	default:	return -1;
1979 	}
1980 
1981     case E_CCUmode:
1982       switch (GET_CODE (code))
1983         {
1984         case EQ:	return CC0;
1985         case NE:	return CC1 | CC2 | CC3;
1986         case LTU:	return CC1;
1987         case GTU:	return CC2;
1988         case LEU:	return CC0 | CC1;
1989         case GEU:	return CC0 | CC2;
1990 	default:	return -1;
1991         }
1992       break;
1993 
1994     case E_CCURmode:
1995       switch (GET_CODE (code))
1996         {
1997         case EQ:	return CC0;
1998         case NE:	return CC2 | CC1 | CC3;
1999         case LTU:	return CC2;
2000         case GTU:	return CC1;
2001         case LEU:	return CC0 | CC2;
2002         case GEU:	return CC0 | CC1;
2003 	default:	return -1;
2004         }
2005       break;
2006 
2007     case E_CCAPmode:
2008       switch (GET_CODE (code))
2009         {
2010         case EQ:	return CC0;
2011         case NE:	return CC1 | CC2 | CC3;
2012         case LT:	return CC1 | CC3;
2013         case GT:	return CC2;
2014         case LE:	return CC0 | CC1 | CC3;
2015         case GE:	return CC0 | CC2;
2016 	default:	return -1;
2017         }
2018       break;
2019 
2020     case E_CCANmode:
2021       switch (GET_CODE (code))
2022         {
2023         case EQ:	return CC0;
2024         case NE:	return CC1 | CC2 | CC3;
2025         case LT:	return CC1;
2026         case GT:	return CC2 | CC3;
2027         case LE:	return CC0 | CC1;
2028         case GE:	return CC0 | CC2 | CC3;
2029 	default:	return -1;
2030         }
2031       break;
2032 
2033     case E_CCSmode:
2034       switch (GET_CODE (code))
2035         {
2036         case EQ:	return CC0;
2037         case NE:	return CC1 | CC2 | CC3;
2038         case LT:	return CC1;
2039         case GT:	return CC2;
2040         case LE:	return CC0 | CC1;
2041         case GE:	return CC0 | CC2;
2042 	case UNORDERED:	return CC3;
2043 	case ORDERED:	return CC0 | CC1 | CC2;
2044 	case UNEQ:	return CC0 | CC3;
2045         case UNLT:	return CC1 | CC3;
2046         case UNGT:	return CC2 | CC3;
2047         case UNLE:	return CC0 | CC1 | CC3;
2048         case UNGE:	return CC0 | CC2 | CC3;
2049 	case LTGT:	return CC1 | CC2;
2050 	default:	return -1;
2051         }
2052       break;
2053 
2054     case E_CCSRmode:
2055       switch (GET_CODE (code))
2056         {
2057         case EQ:	return CC0;
2058         case NE:	return CC2 | CC1 | CC3;
2059         case LT:	return CC2;
2060         case GT:	return CC1;
2061         case LE:	return CC0 | CC2;
2062         case GE:	return CC0 | CC1;
2063 	case UNORDERED:	return CC3;
2064 	case ORDERED:	return CC0 | CC2 | CC1;
2065 	case UNEQ:	return CC0 | CC3;
2066         case UNLT:	return CC2 | CC3;
2067         case UNGT:	return CC1 | CC3;
2068         case UNLE:	return CC0 | CC2 | CC3;
2069         case UNGE:	return CC0 | CC1 | CC3;
2070 	case LTGT:	return CC2 | CC1;
2071 	default:	return -1;
2072         }
2073       break;
2074 
2075       /* Vector comparison modes.  */
2076       /* CC2 will never be set.  It however is part of the negated
2077 	 masks.  */
2078     case E_CCVIALLmode:
2079       switch (GET_CODE (code))
2080 	{
2081 	case EQ:
2082 	case GTU:
2083 	case GT:
2084 	case GE:        return CC0;
2085 	  /* The inverted modes are in fact *any* modes.  */
2086 	case NE:
2087 	case LEU:
2088 	case LE:
2089 	case LT:        return CC3 | CC1 | CC2;
2090 	default:        return -1;
2091 	}
2092 
2093     case E_CCVIANYmode:
2094       switch (GET_CODE (code))
2095 	{
2096 	case EQ:
2097 	case GTU:
2098 	case GT:
2099 	case GE:        return CC0 | CC1;
2100 	  /* The inverted modes are in fact *all* modes.  */
2101 	case NE:
2102 	case LEU:
2103 	case LE:
2104 	case LT:        return CC3 | CC2;
2105 	default:        return -1;
2106 	}
2107     case E_CCVFALLmode:
2108       switch (GET_CODE (code))
2109 	{
2110 	case EQ:
2111 	case GT:
2112 	case GE:        return CC0;
2113 	  /* The inverted modes are in fact *any* modes.  */
2114 	case NE:
2115 	case UNLE:
2116 	case UNLT:      return CC3 | CC1 | CC2;
2117 	default:        return -1;
2118 	}
2119 
2120     case E_CCVFANYmode:
2121       switch (GET_CODE (code))
2122 	{
2123 	case EQ:
2124 	case GT:
2125 	case GE:        return CC0 | CC1;
2126 	  /* The inverted modes are in fact *all* modes.  */
2127 	case NE:
2128 	case UNLE:
2129 	case UNLT:      return CC3 | CC2;
2130 	default:        return -1;
2131 	}
2132 
2133     case E_CCRAWmode:
2134       switch (GET_CODE (code))
2135 	{
2136 	case EQ:
2137 	  return INTVAL (XEXP (code, 1));
2138 	case NE:
2139 	  return (INTVAL (XEXP (code, 1))) ^ 0xf;
2140 	default:
2141 	  gcc_unreachable ();
2142 	}
2143 
2144     default:
2145       return -1;
2146     }
2147 }
2148 
2149 
2150 /* Return branch condition mask to implement a compare and branch
2151    specified by CODE.  Return -1 for invalid comparisons.  */
2152 
2153 int
s390_compare_and_branch_condition_mask(rtx code)2154 s390_compare_and_branch_condition_mask (rtx code)
2155 {
2156   const int CC0 = 1 << 3;
2157   const int CC1 = 1 << 2;
2158   const int CC2 = 1 << 1;
2159 
2160   switch (GET_CODE (code))
2161     {
2162     case EQ:
2163       return CC0;
2164     case NE:
2165       return CC1 | CC2;
2166     case LT:
2167     case LTU:
2168       return CC1;
2169     case GT:
2170     case GTU:
2171       return CC2;
2172     case LE:
2173     case LEU:
2174       return CC0 | CC1;
2175     case GE:
2176     case GEU:
2177       return CC0 | CC2;
2178     default:
2179       gcc_unreachable ();
2180     }
2181   return -1;
2182 }
2183 
2184 /* If INV is false, return assembler mnemonic string to implement
2185    a branch specified by CODE.  If INV is true, return mnemonic
2186    for the corresponding inverted branch.  */
2187 
2188 static const char *
s390_branch_condition_mnemonic(rtx code,int inv)2189 s390_branch_condition_mnemonic (rtx code, int inv)
2190 {
2191   int mask;
2192 
2193   static const char *const mnemonic[16] =
2194     {
2195       NULL, "o", "h", "nle",
2196       "l", "nhe", "lh", "ne",
2197       "e", "nlh", "he", "nl",
2198       "le", "nh", "no", NULL
2199     };
2200 
2201   if (GET_CODE (XEXP (code, 0)) == REG
2202       && REGNO (XEXP (code, 0)) == CC_REGNUM
2203       && (XEXP (code, 1) == const0_rtx
2204 	  || (GET_MODE (XEXP (code, 0)) == CCRAWmode
2205 	      && CONST_INT_P (XEXP (code, 1)))))
2206     mask = s390_branch_condition_mask (code);
2207   else
2208     mask = s390_compare_and_branch_condition_mask (code);
2209 
2210   gcc_assert (mask >= 0);
2211 
2212   if (inv)
2213     mask ^= 15;
2214 
2215   gcc_assert (mask >= 1 && mask <= 14);
2216 
2217   return mnemonic[mask];
2218 }
2219 
2220 /* Return the part of op which has a value different from def.
2221    The size of the part is determined by mode.
2222    Use this function only if you already know that op really
2223    contains such a part.  */
2224 
2225 unsigned HOST_WIDE_INT
s390_extract_part(rtx op,machine_mode mode,int def)2226 s390_extract_part (rtx op, machine_mode mode, int def)
2227 {
2228   unsigned HOST_WIDE_INT value = 0;
2229   int max_parts = HOST_BITS_PER_WIDE_INT / GET_MODE_BITSIZE (mode);
2230   int part_bits = GET_MODE_BITSIZE (mode);
2231   unsigned HOST_WIDE_INT part_mask = (HOST_WIDE_INT_1U << part_bits) - 1;
2232   int i;
2233 
2234   for (i = 0; i < max_parts; i++)
2235     {
2236       if (i == 0)
2237 	value = UINTVAL (op);
2238       else
2239 	value >>= part_bits;
2240 
2241       if ((value & part_mask) != (def & part_mask))
2242 	return value & part_mask;
2243     }
2244 
2245   gcc_unreachable ();
2246 }
2247 
2248 /* If OP is an integer constant of mode MODE with exactly one
2249    part of mode PART_MODE unequal to DEF, return the number of that
2250    part. Otherwise, return -1.  */
2251 
2252 int
s390_single_part(rtx op,machine_mode mode,machine_mode part_mode,int def)2253 s390_single_part (rtx op,
2254 		  machine_mode mode,
2255 		  machine_mode part_mode,
2256 		  int def)
2257 {
2258   unsigned HOST_WIDE_INT value = 0;
2259   int n_parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (part_mode);
2260   unsigned HOST_WIDE_INT part_mask
2261     = (HOST_WIDE_INT_1U << GET_MODE_BITSIZE (part_mode)) - 1;
2262   int i, part = -1;
2263 
2264   if (GET_CODE (op) != CONST_INT)
2265     return -1;
2266 
2267   for (i = 0; i < n_parts; i++)
2268     {
2269       if (i == 0)
2270 	value = UINTVAL (op);
2271       else
2272 	value >>= GET_MODE_BITSIZE (part_mode);
2273 
2274       if ((value & part_mask) != (def & part_mask))
2275 	{
2276 	  if (part != -1)
2277 	    return -1;
2278 	  else
2279 	    part = i;
2280 	}
2281     }
2282   return part == -1 ? -1 : n_parts - 1 - part;
2283 }
2284 
2285 /* Return true if IN contains a contiguous bitfield in the lower SIZE
2286    bits and no other bits are set in (the lower SIZE bits of) IN.
2287 
2288    PSTART and PEND can be used to obtain the start and end
2289    position (inclusive) of the bitfield relative to 64
2290    bits. *PSTART / *PEND gives the position of the first/last bit
2291    of the bitfield counting from the highest order bit starting
2292    with zero.  */
2293 
2294 bool
s390_contiguous_bitmask_nowrap_p(unsigned HOST_WIDE_INT in,int size,int * pstart,int * pend)2295 s390_contiguous_bitmask_nowrap_p (unsigned HOST_WIDE_INT in, int size,
2296 				  int *pstart, int *pend)
2297 {
2298   int start;
2299   int end = -1;
2300   int lowbit = HOST_BITS_PER_WIDE_INT - 1;
2301   int highbit = HOST_BITS_PER_WIDE_INT - size;
2302   unsigned HOST_WIDE_INT bitmask = HOST_WIDE_INT_1U;
2303 
2304   gcc_assert (!!pstart == !!pend);
2305   for (start = lowbit; start >= highbit; bitmask <<= 1, start--)
2306     if (end == -1)
2307       {
2308 	/* Look for the rightmost bit of a contiguous range of ones.  */
2309 	if (bitmask & in)
2310 	  /* Found it.  */
2311 	  end = start;
2312       }
2313     else
2314       {
2315 	/* Look for the firt zero bit after the range of ones.  */
2316 	if (! (bitmask & in))
2317 	  /* Found it.  */
2318 	  break;
2319       }
2320   /* We're one past the last one-bit.  */
2321   start++;
2322 
2323   if (end == -1)
2324     /* No one bits found.  */
2325     return false;
2326 
2327   if (start > highbit)
2328     {
2329       unsigned HOST_WIDE_INT mask;
2330 
2331       /* Calculate a mask for all bits beyond the contiguous bits.  */
2332       mask = ((~HOST_WIDE_INT_0U >> highbit)
2333 	      & (~HOST_WIDE_INT_0U << (lowbit - start + 1)));
2334       if (mask & in)
2335 	/* There are more bits set beyond the first range of one bits.  */
2336 	return false;
2337     }
2338 
2339   if (pstart)
2340     {
2341       *pstart = start;
2342       *pend = end;
2343     }
2344 
2345   return true;
2346 }
2347 
2348 /* Same as s390_contiguous_bitmask_nowrap_p but also returns true
2349    if ~IN contains a contiguous bitfield.  In that case, *END is <
2350    *START.
2351 
2352    If WRAP_P is true, a bitmask that wraps around is also tested.
2353    When a wraparoud occurs *START is greater than *END (in
2354    non-null pointers), and the uppermost (64 - SIZE) bits are thus
2355    part of the range.  If WRAP_P is false, no wraparound is
2356    tested.  */
2357 
2358 bool
s390_contiguous_bitmask_p(unsigned HOST_WIDE_INT in,bool wrap_p,int size,int * start,int * end)2359 s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, bool wrap_p,
2360 			   int size, int *start, int *end)
2361 {
2362   int bs = HOST_BITS_PER_WIDE_INT;
2363   bool b;
2364 
2365   gcc_assert (!!start == !!end);
2366   if ((in & ((~HOST_WIDE_INT_0U) >> (bs - size))) == 0)
2367     /* This cannot be expressed as a contiguous bitmask.  Exit early because
2368        the second call of s390_contiguous_bitmask_nowrap_p would accept this as
2369        a valid bitmask.  */
2370     return false;
2371   b = s390_contiguous_bitmask_nowrap_p (in, size, start, end);
2372   if (b)
2373     return true;
2374   if (! wrap_p)
2375     return false;
2376   b = s390_contiguous_bitmask_nowrap_p (~in, size, start, end);
2377   if (b && start)
2378     {
2379       int s = *start;
2380       int e = *end;
2381 
2382       gcc_assert (s >= 1);
2383       *start = ((e + 1) & (bs - 1));
2384       *end = ((s - 1 + bs) & (bs - 1));
2385     }
2386 
2387   return b;
2388 }
2389 
2390 /* Return true if OP contains the same contiguous bitfield in *all*
2391    its elements.  START and END can be used to obtain the start and
2392    end position of the bitfield.
2393 
2394    START/STOP give the position of the first/last bit of the bitfield
2395    counting from the lowest order bit starting with zero.  In order to
2396    use these values for S/390 instructions this has to be converted to
2397    "bits big endian" style.  */
2398 
2399 bool
s390_contiguous_bitmask_vector_p(rtx op,int * start,int * end)2400 s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end)
2401 {
2402   unsigned HOST_WIDE_INT mask;
2403   int size;
2404   rtx elt;
2405   bool b;
2406 
2407   gcc_assert (!!start == !!end);
2408   if (!const_vec_duplicate_p (op, &elt)
2409       || !CONST_INT_P (elt))
2410     return false;
2411 
2412   size = GET_MODE_UNIT_BITSIZE (GET_MODE (op));
2413 
2414   /* We cannot deal with V1TI/V1TF. This would require a vgmq.  */
2415   if (size > 64)
2416     return false;
2417 
2418   mask = UINTVAL (elt);
2419 
2420   b = s390_contiguous_bitmask_p (mask, true, size, start, end);
2421   if (b)
2422     {
2423       if (start)
2424 	{
2425 	  *start -= (HOST_BITS_PER_WIDE_INT - size);
2426 	  *end -= (HOST_BITS_PER_WIDE_INT - size);
2427 	}
2428       return true;
2429     }
2430   else
2431     return false;
2432 }
2433 
2434 /* Return true if C consists only of byte chunks being either 0 or
2435    0xff.  If MASK is !=NULL a byte mask is generated which is
2436    appropriate for the vector generate byte mask instruction.  */
2437 
2438 bool
s390_bytemask_vector_p(rtx op,unsigned * mask)2439 s390_bytemask_vector_p (rtx op, unsigned *mask)
2440 {
2441   int i;
2442   unsigned tmp_mask = 0;
2443   int nunit, unit_size;
2444 
2445   if (!VECTOR_MODE_P (GET_MODE (op))
2446       || GET_CODE (op) != CONST_VECTOR
2447       || !CONST_INT_P (XVECEXP (op, 0, 0)))
2448     return false;
2449 
2450   nunit = GET_MODE_NUNITS (GET_MODE (op));
2451   unit_size = GET_MODE_UNIT_SIZE (GET_MODE (op));
2452 
2453   for (i = 0; i < nunit; i++)
2454     {
2455       unsigned HOST_WIDE_INT c;
2456       int j;
2457 
2458       if (!CONST_INT_P (XVECEXP (op, 0, i)))
2459 	return false;
2460 
2461       c = UINTVAL (XVECEXP (op, 0, i));
2462       for (j = 0; j < unit_size; j++)
2463 	{
2464 	  if ((c & 0xff) != 0 && (c & 0xff) != 0xff)
2465 	    return false;
2466 	  tmp_mask |= (c & 1) << ((nunit - 1 - i) * unit_size + j);
2467 	  c = c >> BITS_PER_UNIT;
2468 	}
2469     }
2470 
2471   if (mask != NULL)
2472     *mask = tmp_mask;
2473 
2474   return true;
2475 }
2476 
2477 /* Check whether a rotate of ROTL followed by an AND of CONTIG is
2478    equivalent to a shift followed by the AND.  In particular, CONTIG
2479    should not overlap the (rotated) bit 0/bit 63 gap.  Negative values
2480    for ROTL indicate a rotate to the right.  */
2481 
2482 bool
s390_extzv_shift_ok(int bitsize,int rotl,unsigned HOST_WIDE_INT contig)2483 s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig)
2484 {
2485   int start, end;
2486   bool ok;
2487 
2488   ok = s390_contiguous_bitmask_nowrap_p (contig, bitsize, &start, &end);
2489   gcc_assert (ok);
2490 
2491   if (rotl >= 0)
2492     return (64 - end >= rotl);
2493   else
2494     {
2495       /* Translate "- rotate right" in BITSIZE mode to "rotate left" in
2496 	 DIMode.  */
2497       rotl = -rotl + (64 - bitsize);
2498       return (start >= rotl);
2499     }
2500 }
2501 
2502 /* Check whether we can (and want to) split a double-word
2503    move in mode MODE from SRC to DST into two single-word
2504    moves, moving the subword FIRST_SUBWORD first.  */
2505 
2506 bool
s390_split_ok_p(rtx dst,rtx src,machine_mode mode,int first_subword)2507 s390_split_ok_p (rtx dst, rtx src, machine_mode mode, int first_subword)
2508 {
2509   /* Floating point and vector registers cannot be split.  */
2510   if (FP_REG_P (src) || FP_REG_P (dst) || VECTOR_REG_P (src) || VECTOR_REG_P (dst))
2511     return false;
2512 
2513   /* Non-offsettable memory references cannot be split.  */
2514   if ((GET_CODE (src) == MEM && !offsettable_memref_p (src))
2515       || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst)))
2516     return false;
2517 
2518   /* Moving the first subword must not clobber a register
2519      needed to move the second subword.  */
2520   if (register_operand (dst, mode))
2521     {
2522       rtx subreg = operand_subword (dst, first_subword, 0, mode);
2523       if (reg_overlap_mentioned_p (subreg, src))
2524         return false;
2525     }
2526 
2527   return true;
2528 }
2529 
2530 /* Return true if it can be proven that [MEM1, MEM1 + SIZE]
2531    and [MEM2, MEM2 + SIZE] do overlap and false
2532    otherwise.  */
2533 
2534 bool
s390_overlap_p(rtx mem1,rtx mem2,HOST_WIDE_INT size)2535 s390_overlap_p (rtx mem1, rtx mem2, HOST_WIDE_INT size)
2536 {
2537   rtx addr1, addr2, addr_delta;
2538   HOST_WIDE_INT delta;
2539 
2540   if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2541     return true;
2542 
2543   if (size == 0)
2544     return false;
2545 
2546   addr1 = XEXP (mem1, 0);
2547   addr2 = XEXP (mem2, 0);
2548 
2549   addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2550 
2551   /* This overlapping check is used by peepholes merging memory block operations.
2552      Overlapping operations would otherwise be recognized by the S/390 hardware
2553      and would fall back to a slower implementation. Allowing overlapping
2554      operations would lead to slow code but not to wrong code. Therefore we are
2555      somewhat optimistic if we cannot prove that the memory blocks are
2556      overlapping.
2557      That's why we return false here although this may accept operations on
2558      overlapping memory areas.  */
2559   if (!addr_delta || GET_CODE (addr_delta) != CONST_INT)
2560     return false;
2561 
2562   delta = INTVAL (addr_delta);
2563 
2564   if (delta == 0
2565       || (delta > 0 && delta < size)
2566       || (delta < 0 && -delta < size))
2567     return true;
2568 
2569   return false;
2570 }
2571 
2572 /* Check whether the address of memory reference MEM2 equals exactly
2573    the address of memory reference MEM1 plus DELTA.  Return true if
2574    we can prove this to be the case, false otherwise.  */
2575 
2576 bool
s390_offset_p(rtx mem1,rtx mem2,rtx delta)2577 s390_offset_p (rtx mem1, rtx mem2, rtx delta)
2578 {
2579   rtx addr1, addr2, addr_delta;
2580 
2581   if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2582     return false;
2583 
2584   addr1 = XEXP (mem1, 0);
2585   addr2 = XEXP (mem2, 0);
2586 
2587   addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2588   if (!addr_delta || !rtx_equal_p (addr_delta, delta))
2589     return false;
2590 
2591   return true;
2592 }
2593 
2594 /* Expand logical operator CODE in mode MODE with operands OPERANDS.  */
2595 
2596 void
s390_expand_logical_operator(enum rtx_code code,machine_mode mode,rtx * operands)2597 s390_expand_logical_operator (enum rtx_code code, machine_mode mode,
2598 			      rtx *operands)
2599 {
2600   machine_mode wmode = mode;
2601   rtx dst = operands[0];
2602   rtx src1 = operands[1];
2603   rtx src2 = operands[2];
2604   rtx op, clob, tem;
2605 
2606   /* If we cannot handle the operation directly, use a temp register.  */
2607   if (!s390_logical_operator_ok_p (operands))
2608     dst = gen_reg_rtx (mode);
2609 
2610   /* QImode and HImode patterns make sense only if we have a destination
2611      in memory.  Otherwise perform the operation in SImode.  */
2612   if ((mode == QImode || mode == HImode) && GET_CODE (dst) != MEM)
2613     wmode = SImode;
2614 
2615   /* Widen operands if required.  */
2616   if (mode != wmode)
2617     {
2618       if (GET_CODE (dst) == SUBREG
2619 	  && (tem = simplify_subreg (wmode, dst, mode, 0)) != 0)
2620 	dst = tem;
2621       else if (REG_P (dst))
2622 	dst = gen_rtx_SUBREG (wmode, dst, 0);
2623       else
2624         dst = gen_reg_rtx (wmode);
2625 
2626       if (GET_CODE (src1) == SUBREG
2627 	  && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0)
2628 	src1 = tem;
2629       else if (GET_MODE (src1) != VOIDmode)
2630 	src1 = gen_rtx_SUBREG (wmode, force_reg (mode, src1), 0);
2631 
2632       if (GET_CODE (src2) == SUBREG
2633 	  && (tem = simplify_subreg (wmode, src2, mode, 0)) != 0)
2634 	src2 = tem;
2635       else if (GET_MODE (src2) != VOIDmode)
2636 	src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0);
2637     }
2638 
2639   /* Emit the instruction.  */
2640   op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, wmode, src1, src2));
2641   clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
2642   emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
2643 
2644   /* Fix up the destination if needed.  */
2645   if (dst != operands[0])
2646     emit_move_insn (operands[0], gen_lowpart (mode, dst));
2647 }
2648 
2649 /* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR).  */
2650 
2651 bool
s390_logical_operator_ok_p(rtx * operands)2652 s390_logical_operator_ok_p (rtx *operands)
2653 {
2654   /* If the destination operand is in memory, it needs to coincide
2655      with one of the source operands.  After reload, it has to be
2656      the first source operand.  */
2657   if (GET_CODE (operands[0]) == MEM)
2658     return rtx_equal_p (operands[0], operands[1])
2659 	   || (!reload_completed && rtx_equal_p (operands[0], operands[2]));
2660 
2661   return true;
2662 }
2663 
2664 /* Narrow logical operation CODE of memory operand MEMOP with immediate
2665    operand IMMOP to switch from SS to SI type instructions.  */
2666 
2667 void
s390_narrow_logical_operator(enum rtx_code code,rtx * memop,rtx * immop)2668 s390_narrow_logical_operator (enum rtx_code code, rtx *memop, rtx *immop)
2669 {
2670   int def = code == AND ? -1 : 0;
2671   HOST_WIDE_INT mask;
2672   int part;
2673 
2674   gcc_assert (GET_CODE (*memop) == MEM);
2675   gcc_assert (!MEM_VOLATILE_P (*memop));
2676 
2677   mask = s390_extract_part (*immop, QImode, def);
2678   part = s390_single_part (*immop, GET_MODE (*memop), QImode, def);
2679   gcc_assert (part >= 0);
2680 
2681   *memop = adjust_address (*memop, QImode, part);
2682   *immop = gen_int_mode (mask, QImode);
2683 }
2684 
2685 
2686 /* How to allocate a 'struct machine_function'.  */
2687 
2688 static struct machine_function *
s390_init_machine_status(void)2689 s390_init_machine_status (void)
2690 {
2691   return ggc_cleared_alloc<machine_function> ();
2692 }
2693 
2694 /* Map for smallest class containing reg regno.  */
2695 
2696 const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
2697 { GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,  /*  0 */
2698   ADDR_REGS,    ADDR_REGS, ADDR_REGS, ADDR_REGS,  /*  4 */
2699   ADDR_REGS,    ADDR_REGS, ADDR_REGS, ADDR_REGS,  /*  8 */
2700   ADDR_REGS,    ADDR_REGS, ADDR_REGS, ADDR_REGS,  /* 12 */
2701   FP_REGS,      FP_REGS,   FP_REGS,   FP_REGS,    /* 16 */
2702   FP_REGS,      FP_REGS,   FP_REGS,   FP_REGS,    /* 20 */
2703   FP_REGS,      FP_REGS,   FP_REGS,   FP_REGS,    /* 24 */
2704   FP_REGS,      FP_REGS,   FP_REGS,   FP_REGS,    /* 28 */
2705   ADDR_REGS,    CC_REGS,   ADDR_REGS, ADDR_REGS,  /* 32 */
2706   ACCESS_REGS,	ACCESS_REGS, VEC_REGS, VEC_REGS,  /* 36 */
2707   VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS,         /* 40 */
2708   VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS,         /* 44 */
2709   VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS,         /* 48 */
2710   VEC_REGS, VEC_REGS                              /* 52 */
2711 };
2712 
2713 /* Return attribute type of insn.  */
2714 
2715 static enum attr_type
s390_safe_attr_type(rtx_insn * insn)2716 s390_safe_attr_type (rtx_insn *insn)
2717 {
2718   if (recog_memoized (insn) >= 0)
2719     return get_attr_type (insn);
2720   else
2721     return TYPE_NONE;
2722 }
2723 
2724 /* Return true if DISP is a valid short displacement.  */
2725 
2726 static bool
s390_short_displacement(rtx disp)2727 s390_short_displacement (rtx disp)
2728 {
2729   /* No displacement is OK.  */
2730   if (!disp)
2731     return true;
2732 
2733   /* Without the long displacement facility we don't need to
2734      distingiush between long and short displacement.  */
2735   if (!TARGET_LONG_DISPLACEMENT)
2736     return true;
2737 
2738   /* Integer displacement in range.  */
2739   if (GET_CODE (disp) == CONST_INT)
2740     return INTVAL (disp) >= 0 && INTVAL (disp) < 4096;
2741 
2742   /* GOT offset is not OK, the GOT can be large.  */
2743   if (GET_CODE (disp) == CONST
2744       && GET_CODE (XEXP (disp, 0)) == UNSPEC
2745       && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOT
2746           || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF))
2747     return false;
2748 
2749   /* All other symbolic constants are literal pool references,
2750      which are OK as the literal pool must be small.  */
2751   if (GET_CODE (disp) == CONST)
2752     return true;
2753 
2754   return false;
2755 }
2756 
2757 /* Decompose a RTL expression ADDR for a memory address into
2758    its components, returned in OUT.
2759 
2760    Returns false if ADDR is not a valid memory address, true
2761    otherwise.  If OUT is NULL, don't return the components,
2762    but check for validity only.
2763 
2764    Note: Only addresses in canonical form are recognized.
2765    LEGITIMIZE_ADDRESS should convert non-canonical forms to the
2766    canonical form so that they will be recognized.  */
2767 
2768 static int
s390_decompose_address(rtx addr,struct s390_address * out)2769 s390_decompose_address (rtx addr, struct s390_address *out)
2770 {
2771   HOST_WIDE_INT offset = 0;
2772   rtx base = NULL_RTX;
2773   rtx indx = NULL_RTX;
2774   rtx disp = NULL_RTX;
2775   rtx orig_disp;
2776   bool pointer = false;
2777   bool base_ptr = false;
2778   bool indx_ptr = false;
2779   bool literal_pool = false;
2780 
2781   /* We may need to substitute the literal pool base register into the address
2782      below.  However, at this point we do not know which register is going to
2783      be used as base, so we substitute the arg pointer register.  This is going
2784      to be treated as holding a pointer below -- it shouldn't be used for any
2785      other purpose.  */
2786   rtx fake_pool_base = gen_rtx_REG (Pmode, ARG_POINTER_REGNUM);
2787 
2788   /* Decompose address into base + index + displacement.  */
2789 
2790   if (GET_CODE (addr) == REG || GET_CODE (addr) == UNSPEC)
2791     base = addr;
2792 
2793   else if (GET_CODE (addr) == PLUS)
2794     {
2795       rtx op0 = XEXP (addr, 0);
2796       rtx op1 = XEXP (addr, 1);
2797       enum rtx_code code0 = GET_CODE (op0);
2798       enum rtx_code code1 = GET_CODE (op1);
2799 
2800       if (code0 == REG || code0 == UNSPEC)
2801 	{
2802 	  if (code1 == REG || code1 == UNSPEC)
2803 	    {
2804 	      indx = op0;	/* index + base */
2805 	      base = op1;
2806 	    }
2807 
2808 	  else
2809 	    {
2810 	      base = op0;	/* base + displacement */
2811 	      disp = op1;
2812 	    }
2813 	}
2814 
2815       else if (code0 == PLUS)
2816 	{
2817 	  indx = XEXP (op0, 0);	/* index + base + disp */
2818 	  base = XEXP (op0, 1);
2819 	  disp = op1;
2820 	}
2821 
2822       else
2823 	{
2824 	  return false;
2825 	}
2826     }
2827 
2828   else
2829     disp = addr;		/* displacement */
2830 
2831   /* Extract integer part of displacement.  */
2832   orig_disp = disp;
2833   if (disp)
2834     {
2835       if (GET_CODE (disp) == CONST_INT)
2836 	{
2837 	  offset = INTVAL (disp);
2838 	  disp = NULL_RTX;
2839 	}
2840       else if (GET_CODE (disp) == CONST
2841 	       && GET_CODE (XEXP (disp, 0)) == PLUS
2842 	       && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
2843 	{
2844 	  offset = INTVAL (XEXP (XEXP (disp, 0), 1));
2845 	  disp = XEXP (XEXP (disp, 0), 0);
2846 	}
2847     }
2848 
2849   /* Strip off CONST here to avoid special case tests later.  */
2850   if (disp && GET_CODE (disp) == CONST)
2851     disp = XEXP (disp, 0);
2852 
2853   /* We can convert literal pool addresses to
2854      displacements by basing them off the base register.  */
2855   if (disp && GET_CODE (disp) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (disp))
2856     {
2857       if (base || indx)
2858 	return false;
2859 
2860       base = fake_pool_base, literal_pool = true;
2861 
2862       /* Mark up the displacement.  */
2863       disp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, disp),
2864 			     UNSPEC_LTREL_OFFSET);
2865     }
2866 
2867   /* Validate base register.  */
2868   if (base)
2869     {
2870       if (GET_CODE (base) == UNSPEC)
2871 	switch (XINT (base, 1))
2872 	  {
2873 	  case UNSPEC_LTREF:
2874 	    if (!disp)
2875 	      disp = gen_rtx_UNSPEC (Pmode,
2876 				     gen_rtvec (1, XVECEXP (base, 0, 0)),
2877 				     UNSPEC_LTREL_OFFSET);
2878 	    else
2879 	      return false;
2880 
2881 	    base = XVECEXP (base, 0, 1);
2882 	    break;
2883 
2884 	  case UNSPEC_LTREL_BASE:
2885 	    if (XVECLEN (base, 0) == 1)
2886 	      base = fake_pool_base, literal_pool = true;
2887 	    else
2888 	      base = XVECEXP (base, 0, 1);
2889 	    break;
2890 
2891 	  default:
2892 	    return false;
2893 	  }
2894 
2895       if (!REG_P (base) || GET_MODE (base) != Pmode)
2896 	return false;
2897 
2898       if (REGNO (base) == STACK_POINTER_REGNUM
2899 	  || REGNO (base) == FRAME_POINTER_REGNUM
2900 	  || ((reload_completed || reload_in_progress)
2901 	      && frame_pointer_needed
2902 	      && REGNO (base) == HARD_FRAME_POINTER_REGNUM)
2903 	  || REGNO (base) == ARG_POINTER_REGNUM
2904           || (flag_pic
2905               && REGNO (base) == PIC_OFFSET_TABLE_REGNUM))
2906         pointer = base_ptr = true;
2907 
2908       if ((reload_completed || reload_in_progress)
2909 	  && base == cfun->machine->base_reg)
2910         pointer = base_ptr = literal_pool = true;
2911     }
2912 
2913   /* Validate index register.  */
2914   if (indx)
2915     {
2916       if (GET_CODE (indx) == UNSPEC)
2917 	switch (XINT (indx, 1))
2918 	  {
2919 	  case UNSPEC_LTREF:
2920 	    if (!disp)
2921 	      disp = gen_rtx_UNSPEC (Pmode,
2922 				     gen_rtvec (1, XVECEXP (indx, 0, 0)),
2923 				     UNSPEC_LTREL_OFFSET);
2924 	    else
2925 	      return false;
2926 
2927 	    indx = XVECEXP (indx, 0, 1);
2928 	    break;
2929 
2930 	  case UNSPEC_LTREL_BASE:
2931 	    if (XVECLEN (indx, 0) == 1)
2932 	      indx = fake_pool_base, literal_pool = true;
2933 	    else
2934 	      indx = XVECEXP (indx, 0, 1);
2935 	    break;
2936 
2937 	  default:
2938 	    return false;
2939 	  }
2940 
2941       if (!REG_P (indx) || GET_MODE (indx) != Pmode)
2942 	return false;
2943 
2944       if (REGNO (indx) == STACK_POINTER_REGNUM
2945 	  || REGNO (indx) == FRAME_POINTER_REGNUM
2946 	  || ((reload_completed || reload_in_progress)
2947 	      && frame_pointer_needed
2948 	      && REGNO (indx) == HARD_FRAME_POINTER_REGNUM)
2949 	  || REGNO (indx) == ARG_POINTER_REGNUM
2950           || (flag_pic
2951               && REGNO (indx) == PIC_OFFSET_TABLE_REGNUM))
2952         pointer = indx_ptr = true;
2953 
2954       if ((reload_completed || reload_in_progress)
2955 	  && indx == cfun->machine->base_reg)
2956         pointer = indx_ptr = literal_pool = true;
2957     }
2958 
2959   /* Prefer to use pointer as base, not index.  */
2960   if (base && indx && !base_ptr
2961       && (indx_ptr || (!REG_POINTER (base) && REG_POINTER (indx))))
2962     {
2963       rtx tmp = base;
2964       base = indx;
2965       indx = tmp;
2966     }
2967 
2968   /* Validate displacement.  */
2969   if (!disp)
2970     {
2971       /* If virtual registers are involved, the displacement will change later
2972 	 anyway as the virtual registers get eliminated.  This could make a
2973 	 valid displacement invalid, but it is more likely to make an invalid
2974 	 displacement valid, because we sometimes access the register save area
2975 	 via negative offsets to one of those registers.
2976 	 Thus we don't check the displacement for validity here.  If after
2977 	 elimination the displacement turns out to be invalid after all,
2978 	 this is fixed up by reload in any case.  */
2979       /* LRA maintains always displacements up to date and we need to
2980 	 know the displacement is right during all LRA not only at the
2981 	 final elimination.  */
2982       if (lra_in_progress
2983 	  || (base != arg_pointer_rtx
2984 	      && indx != arg_pointer_rtx
2985 	      && base != return_address_pointer_rtx
2986 	      && indx != return_address_pointer_rtx
2987 	      && base != frame_pointer_rtx
2988 	      && indx != frame_pointer_rtx
2989 	      && base != virtual_stack_vars_rtx
2990 	      && indx != virtual_stack_vars_rtx))
2991 	if (!DISP_IN_RANGE (offset))
2992 	  return false;
2993     }
2994   else
2995     {
2996       /* All the special cases are pointers.  */
2997       pointer = true;
2998 
2999       /* In the small-PIC case, the linker converts @GOT
3000          and @GOTNTPOFF offsets to possible displacements.  */
3001       if (GET_CODE (disp) == UNSPEC
3002           && (XINT (disp, 1) == UNSPEC_GOT
3003 	      || XINT (disp, 1) == UNSPEC_GOTNTPOFF)
3004 	  && flag_pic == 1)
3005         {
3006 	  ;
3007         }
3008 
3009       /* Accept pool label offsets.  */
3010       else if (GET_CODE (disp) == UNSPEC
3011 	       && XINT (disp, 1) == UNSPEC_POOL_OFFSET)
3012 	;
3013 
3014       /* Accept literal pool references.  */
3015       else if (GET_CODE (disp) == UNSPEC
3016 	       && XINT (disp, 1) == UNSPEC_LTREL_OFFSET)
3017         {
3018 	  /* In case CSE pulled a non literal pool reference out of
3019 	     the pool we have to reject the address.  This is
3020 	     especially important when loading the GOT pointer on non
3021 	     zarch CPUs.  In this case the literal pool contains an lt
3022 	     relative offset to the _GLOBAL_OFFSET_TABLE_ label which
3023 	     will most likely exceed the displacement.  */
3024 	  if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
3025 	      || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp, 0, 0)))
3026 	    return false;
3027 
3028 	  orig_disp = gen_rtx_CONST (Pmode, disp);
3029 	  if (offset)
3030 	    {
3031 	      /* If we have an offset, make sure it does not
3032 		 exceed the size of the constant pool entry.  */
3033 	      rtx sym = XVECEXP (disp, 0, 0);
3034 	      if (offset >= GET_MODE_SIZE (get_pool_mode (sym)))
3035 		return false;
3036 
3037               orig_disp = plus_constant (Pmode, orig_disp, offset);
3038 	    }
3039         }
3040 
3041       else
3042 	return false;
3043     }
3044 
3045   if (!base && !indx)
3046     pointer = true;
3047 
3048   if (out)
3049     {
3050       out->base = base;
3051       out->indx = indx;
3052       out->disp = orig_disp;
3053       out->pointer = pointer;
3054       out->literal_pool = literal_pool;
3055     }
3056 
3057   return true;
3058 }
3059 
3060 /* Decompose a RTL expression OP for an address style operand into its
3061    components, and return the base register in BASE and the offset in
3062    OFFSET.  While OP looks like an address it is never supposed to be
3063    used as such.
3064 
3065    Return true if OP is a valid address operand, false if not.  */
3066 
3067 bool
s390_decompose_addrstyle_without_index(rtx op,rtx * base,HOST_WIDE_INT * offset)3068 s390_decompose_addrstyle_without_index (rtx op, rtx *base,
3069 					HOST_WIDE_INT *offset)
3070 {
3071   rtx off = NULL_RTX;
3072 
3073   /* We can have an integer constant, an address register,
3074      or a sum of the two.  */
3075   if (CONST_SCALAR_INT_P (op))
3076     {
3077       off = op;
3078       op = NULL_RTX;
3079     }
3080   if (op && GET_CODE (op) == PLUS && CONST_SCALAR_INT_P (XEXP (op, 1)))
3081     {
3082       off = XEXP (op, 1);
3083       op = XEXP (op, 0);
3084     }
3085   while (op && GET_CODE (op) == SUBREG)
3086     op = SUBREG_REG (op);
3087 
3088   if (op && GET_CODE (op) != REG)
3089     return false;
3090 
3091   if (offset)
3092     {
3093       if (off == NULL_RTX)
3094 	*offset = 0;
3095       else if (CONST_INT_P (off))
3096 	*offset = INTVAL (off);
3097       else if (CONST_WIDE_INT_P (off))
3098 	/* The offset will anyway be cut down to 12 bits so take just
3099 	   the lowest order chunk of the wide int.  */
3100 	*offset = CONST_WIDE_INT_ELT (off, 0);
3101       else
3102 	gcc_unreachable ();
3103     }
3104   if (base)
3105     *base = op;
3106 
3107    return true;
3108 }
3109 
3110 
3111 /* Return true if CODE is a valid address without index.  */
3112 
3113 bool
s390_legitimate_address_without_index_p(rtx op)3114 s390_legitimate_address_without_index_p (rtx op)
3115 {
3116   struct s390_address addr;
3117 
3118   if (!s390_decompose_address (XEXP (op, 0), &addr))
3119     return false;
3120   if (addr.indx)
3121     return false;
3122 
3123   return true;
3124 }
3125 
3126 
3127 /* Return TRUE if ADDR is an operand valid for a load/store relative
3128    instruction.  Be aware that the alignment of the operand needs to
3129    be checked separately.
3130    Valid addresses are single references or a sum of a reference and a
3131    constant integer. Return these parts in SYMREF and ADDEND.  You can
3132    pass NULL in REF and/or ADDEND if you are not interested in these
3133    values.  Literal pool references are *not* considered symbol
3134    references.  */
3135 
3136 static bool
s390_loadrelative_operand_p(rtx addr,rtx * symref,HOST_WIDE_INT * addend)3137 s390_loadrelative_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
3138 {
3139   HOST_WIDE_INT tmpaddend = 0;
3140 
3141   if (GET_CODE (addr) == CONST)
3142     addr = XEXP (addr, 0);
3143 
3144   if (GET_CODE (addr) == PLUS)
3145     {
3146       if (!CONST_INT_P (XEXP (addr, 1)))
3147 	return false;
3148 
3149       tmpaddend = INTVAL (XEXP (addr, 1));
3150       addr = XEXP (addr, 0);
3151     }
3152 
3153   if ((GET_CODE (addr) == SYMBOL_REF && !CONSTANT_POOL_ADDRESS_P (addr))
3154       || (GET_CODE (addr) == UNSPEC
3155 	  && (XINT (addr, 1) == UNSPEC_GOTENT
3156 	      || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
3157     {
3158       if (symref)
3159 	*symref = addr;
3160       if (addend)
3161 	*addend = tmpaddend;
3162 
3163       return true;
3164     }
3165   return false;
3166 }
3167 
3168 /* Return true if the address in OP is valid for constraint letter C
3169    if wrapped in a MEM rtx.  Set LIT_POOL_OK to true if it literal
3170    pool MEMs should be accepted.  Only the Q, R, S, T constraint
3171    letters are allowed for C.  */
3172 
3173 static int
s390_check_qrst_address(char c,rtx op,bool lit_pool_ok)3174 s390_check_qrst_address (char c, rtx op, bool lit_pool_ok)
3175 {
3176   struct s390_address addr;
3177   bool decomposed = false;
3178 
3179   if (!address_operand (op, GET_MODE (op)))
3180     return 0;
3181 
3182   /* This check makes sure that no symbolic address (except literal
3183      pool references) are accepted by the R or T constraints.  */
3184   if (s390_loadrelative_operand_p (op, NULL, NULL))
3185     return 0;
3186 
3187   /* Ensure literal pool references are only accepted if LIT_POOL_OK.  */
3188   if (!lit_pool_ok)
3189     {
3190       if (!s390_decompose_address (op, &addr))
3191 	return 0;
3192       if (addr.literal_pool)
3193 	return 0;
3194       decomposed = true;
3195     }
3196 
3197   /* With reload, we sometimes get intermediate address forms that are
3198      actually invalid as-is, but we need to accept them in the most
3199      generic cases below ('R' or 'T'), since reload will in fact fix
3200      them up.  LRA behaves differently here; we never see such forms,
3201      but on the other hand, we need to strictly reject every invalid
3202      address form.  Perform this check right up front.  */
3203   if (lra_in_progress)
3204     {
3205       if (!decomposed && !s390_decompose_address (op, &addr))
3206 	return 0;
3207       decomposed = true;
3208     }
3209 
3210   switch (c)
3211     {
3212     case 'Q': /* no index short displacement */
3213       if (!decomposed && !s390_decompose_address (op, &addr))
3214 	return 0;
3215       if (addr.indx)
3216 	return 0;
3217       if (!s390_short_displacement (addr.disp))
3218 	return 0;
3219       break;
3220 
3221     case 'R': /* with index short displacement */
3222       if (TARGET_LONG_DISPLACEMENT)
3223 	{
3224 	  if (!decomposed && !s390_decompose_address (op, &addr))
3225 	    return 0;
3226 	  if (!s390_short_displacement (addr.disp))
3227 	    return 0;
3228 	}
3229       /* Any invalid address here will be fixed up by reload,
3230 	 so accept it for the most generic constraint.  */
3231       break;
3232 
3233     case 'S': /* no index long displacement */
3234       if (!decomposed && !s390_decompose_address (op, &addr))
3235 	return 0;
3236       if (addr.indx)
3237 	return 0;
3238       break;
3239 
3240     case 'T': /* with index long displacement */
3241       /* Any invalid address here will be fixed up by reload,
3242 	 so accept it for the most generic constraint.  */
3243       break;
3244 
3245     default:
3246       return 0;
3247     }
3248   return 1;
3249 }
3250 
3251 
3252 /* Evaluates constraint strings described by the regular expression
3253    ([A|B|Z](Q|R|S|T))|Y and returns 1 if OP is a valid operand for
3254    the constraint given in STR, or 0 else.  */
3255 
3256 int
s390_mem_constraint(const char * str,rtx op)3257 s390_mem_constraint (const char *str, rtx op)
3258 {
3259   char c = str[0];
3260 
3261   switch (c)
3262     {
3263     case 'A':
3264       /* Check for offsettable variants of memory constraints.  */
3265       if (!MEM_P (op) || MEM_VOLATILE_P (op))
3266 	return 0;
3267       if ((reload_completed || reload_in_progress)
3268 	  ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op))
3269 	return 0;
3270       return s390_check_qrst_address (str[1], XEXP (op, 0), true);
3271     case 'B':
3272       /* Check for non-literal-pool variants of memory constraints.  */
3273       if (!MEM_P (op))
3274 	return 0;
3275       return s390_check_qrst_address (str[1], XEXP (op, 0), false);
3276     case 'Q':
3277     case 'R':
3278     case 'S':
3279     case 'T':
3280       if (GET_CODE (op) != MEM)
3281 	return 0;
3282       return s390_check_qrst_address (c, XEXP (op, 0), true);
3283     case 'Y':
3284       /* Simply check for the basic form of a shift count.  Reload will
3285 	 take care of making sure we have a proper base register.  */
3286       if (!s390_decompose_addrstyle_without_index (op, NULL, NULL))
3287 	return 0;
3288       break;
3289     case 'Z':
3290       return s390_check_qrst_address (str[1], op, true);
3291     default:
3292       return 0;
3293     }
3294   return 1;
3295 }
3296 
3297 
3298 /* Evaluates constraint strings starting with letter O.  Input
3299    parameter C is the second letter following the "O" in the constraint
3300    string. Returns 1 if VALUE meets the respective constraint and 0
3301    otherwise.  */
3302 
3303 int
s390_O_constraint_str(const char c,HOST_WIDE_INT value)3304 s390_O_constraint_str (const char c, HOST_WIDE_INT value)
3305 {
3306   if (!TARGET_EXTIMM)
3307     return 0;
3308 
3309   switch (c)
3310     {
3311     case 's':
3312       return trunc_int_for_mode (value, SImode) == value;
3313 
3314     case 'p':
3315       return value == 0
3316 	|| s390_single_part (GEN_INT (value), DImode, SImode, 0) == 1;
3317 
3318     case 'n':
3319       return s390_single_part (GEN_INT (value - 1), DImode, SImode, -1) == 1;
3320 
3321     default:
3322       gcc_unreachable ();
3323     }
3324 }
3325 
3326 
3327 /* Evaluates constraint strings starting with letter N.  Parameter STR
3328    contains the letters following letter "N" in the constraint string.
3329    Returns true if VALUE matches the constraint.  */
3330 
3331 int
s390_N_constraint_str(const char * str,HOST_WIDE_INT value)3332 s390_N_constraint_str (const char *str, HOST_WIDE_INT value)
3333 {
3334   machine_mode mode, part_mode;
3335   int def;
3336   int part, part_goal;
3337 
3338 
3339   if (str[0] == 'x')
3340     part_goal = -1;
3341   else
3342     part_goal = str[0] - '0';
3343 
3344   switch (str[1])
3345     {
3346     case 'Q':
3347       part_mode = QImode;
3348       break;
3349     case 'H':
3350       part_mode = HImode;
3351       break;
3352     case 'S':
3353       part_mode = SImode;
3354       break;
3355     default:
3356       return 0;
3357     }
3358 
3359   switch (str[2])
3360     {
3361     case 'H':
3362       mode = HImode;
3363       break;
3364     case 'S':
3365       mode = SImode;
3366       break;
3367     case 'D':
3368       mode = DImode;
3369       break;
3370     default:
3371       return 0;
3372     }
3373 
3374   switch (str[3])
3375     {
3376     case '0':
3377       def = 0;
3378       break;
3379     case 'F':
3380       def = -1;
3381       break;
3382     default:
3383       return 0;
3384     }
3385 
3386   if (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (part_mode))
3387     return 0;
3388 
3389   part = s390_single_part (GEN_INT (value), mode, part_mode, def);
3390   if (part < 0)
3391     return 0;
3392   if (part_goal != -1 && part_goal != part)
3393     return 0;
3394 
3395   return 1;
3396 }
3397 
3398 
3399 /* Returns true if the input parameter VALUE is a float zero.  */
3400 
3401 int
s390_float_const_zero_p(rtx value)3402 s390_float_const_zero_p (rtx value)
3403 {
3404   return (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT
3405 	  && value == CONST0_RTX (GET_MODE (value)));
3406 }
3407 
3408 /* Implement TARGET_REGISTER_MOVE_COST.  */
3409 
3410 static int
s390_register_move_cost(machine_mode mode,reg_class_t from,reg_class_t to)3411 s390_register_move_cost (machine_mode mode,
3412                          reg_class_t from, reg_class_t to)
3413 {
3414   /* On s390, copy between fprs and gprs is expensive.  */
3415 
3416   /* It becomes somewhat faster having ldgr/lgdr.  */
3417   if (TARGET_Z10 && GET_MODE_SIZE (mode) == 8)
3418     {
3419       /* ldgr is single cycle. */
3420       if (reg_classes_intersect_p (from, GENERAL_REGS)
3421 	  && reg_classes_intersect_p (to, FP_REGS))
3422 	return 1;
3423       /* lgdr needs 3 cycles. */
3424       if (reg_classes_intersect_p (to, GENERAL_REGS)
3425 	  && reg_classes_intersect_p (from, FP_REGS))
3426 	return 3;
3427     }
3428 
3429   /* Otherwise copying is done via memory.  */
3430   if ((reg_classes_intersect_p (from, GENERAL_REGS)
3431        && reg_classes_intersect_p (to, FP_REGS))
3432       || (reg_classes_intersect_p (from, FP_REGS)
3433 	  && reg_classes_intersect_p (to, GENERAL_REGS)))
3434     return 10;
3435 
3436   return 1;
3437 }
3438 
3439 /* Implement TARGET_MEMORY_MOVE_COST.  */
3440 
3441 static int
s390_memory_move_cost(machine_mode mode ATTRIBUTE_UNUSED,reg_class_t rclass ATTRIBUTE_UNUSED,bool in ATTRIBUTE_UNUSED)3442 s390_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
3443 		       reg_class_t rclass ATTRIBUTE_UNUSED,
3444 		       bool in ATTRIBUTE_UNUSED)
3445 {
3446   return 2;
3447 }
3448 
3449 /* Compute a (partial) cost for rtx X.  Return true if the complete
3450    cost has been computed, and false if subexpressions should be
3451    scanned.  In either case, *TOTAL contains the cost result.  The
3452    initial value of *TOTAL is the default value computed by
3453    rtx_cost.  It may be left unmodified.  OUTER_CODE contains the
3454    code of the superexpression of x.  */
3455 
3456 static bool
s390_rtx_costs(rtx x,machine_mode mode,int outer_code,int opno ATTRIBUTE_UNUSED,int * total,bool speed ATTRIBUTE_UNUSED)3457 s390_rtx_costs (rtx x, machine_mode mode, int outer_code,
3458 		int opno ATTRIBUTE_UNUSED,
3459 		int *total, bool speed ATTRIBUTE_UNUSED)
3460 {
3461   int code = GET_CODE (x);
3462   switch (code)
3463     {
3464     case CONST:
3465     case CONST_INT:
3466     case LABEL_REF:
3467     case SYMBOL_REF:
3468     case CONST_DOUBLE:
3469     case CONST_WIDE_INT:
3470     case MEM:
3471       *total = 0;
3472       return true;
3473 
3474     case SET:
3475       {
3476 	/* Without this a conditional move instruction would be
3477 	   accounted as 3 * COSTS_N_INSNS (set, if_then_else,
3478 	   comparison operator).  That's a bit pessimistic.  */
3479 
3480 	if (!TARGET_Z196 || GET_CODE (SET_SRC (x)) != IF_THEN_ELSE)
3481 	  return false;
3482 
3483 	rtx cond = XEXP (SET_SRC (x), 0);
3484 
3485 	if (!CC_REG_P (XEXP (cond, 0)) || !CONST_INT_P (XEXP (cond, 1)))
3486 	  return false;
3487 
3488 	/* It is going to be a load/store on condition.  Make it
3489 	   slightly more expensive than a normal load.  */
3490 	*total = COSTS_N_INSNS (1) + 1;
3491 
3492 	rtx dst = SET_DEST (x);
3493 	rtx then = XEXP (SET_SRC (x), 1);
3494 	rtx els = XEXP (SET_SRC (x), 2);
3495 
3496 	/* It is a real IF-THEN-ELSE.  An additional move will be
3497 	   needed to implement that.  */
3498 	if (reload_completed
3499 	    && !rtx_equal_p (dst, then)
3500 	    && !rtx_equal_p (dst, els))
3501 	  *total += COSTS_N_INSNS (1) / 2;
3502 
3503 	/* A minor penalty for constants we cannot directly handle.  */
3504 	if ((CONST_INT_P (then) || CONST_INT_P (els))
3505 	    && (!TARGET_Z13 || MEM_P (dst)
3506 		|| (CONST_INT_P (then) && !satisfies_constraint_K (then))
3507 		|| (CONST_INT_P (els) && !satisfies_constraint_K (els))))
3508 	  *total += COSTS_N_INSNS (1) / 2;
3509 
3510 	/* A store on condition can only handle register src operands.  */
3511 	if (MEM_P (dst) && (!REG_P (then) || !REG_P (els)))
3512 	  *total += COSTS_N_INSNS (1) / 2;
3513 
3514 	return true;
3515       }
3516     case IOR:
3517       /* risbg */
3518       if (GET_CODE (XEXP (x, 0)) == AND
3519 	  && GET_CODE (XEXP (x, 1)) == ASHIFT
3520 	  && REG_P (XEXP (XEXP (x, 0), 0))
3521 	  && REG_P (XEXP (XEXP (x, 1), 0))
3522 	  && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3523 	  && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3524 	  && (UINTVAL (XEXP (XEXP (x, 0), 1)) ==
3525 	      (HOST_WIDE_INT_1U << UINTVAL (XEXP (XEXP (x, 1), 1))) - 1))
3526 	{
3527 	  *total = COSTS_N_INSNS (2);
3528 	  return true;
3529 	}
3530 
3531       /* ~AND on a 128 bit mode.  This can be done using a vector
3532 	 instruction.  */
3533       if (TARGET_VXE
3534 	  && GET_CODE (XEXP (x, 0)) == NOT
3535 	  && GET_CODE (XEXP (x, 1)) == NOT
3536 	  && REG_P (XEXP (XEXP (x, 0), 0))
3537 	  && REG_P (XEXP (XEXP (x, 1), 0))
3538 	  && GET_MODE_SIZE (GET_MODE (XEXP (XEXP (x, 0), 0))) == 16
3539 	  && s390_hard_regno_mode_ok (VR0_REGNUM,
3540 				      GET_MODE (XEXP (XEXP (x, 0), 0))))
3541 	{
3542 	  *total = COSTS_N_INSNS (1);
3543 	  return true;
3544 	}
3545       /* fallthrough */
3546     case ASHIFT:
3547     case ASHIFTRT:
3548     case LSHIFTRT:
3549     case ROTATE:
3550     case ROTATERT:
3551     case AND:
3552     case XOR:
3553     case NEG:
3554     case NOT:
3555       *total = COSTS_N_INSNS (1);
3556       return false;
3557 
3558     case PLUS:
3559     case MINUS:
3560       *total = COSTS_N_INSNS (1);
3561       return false;
3562 
3563     case MULT:
3564       switch (mode)
3565 	{
3566 	case E_SImode:
3567 	  {
3568 	    rtx left = XEXP (x, 0);
3569 	    rtx right = XEXP (x, 1);
3570 	    if (GET_CODE (right) == CONST_INT
3571 		&& CONST_OK_FOR_K (INTVAL (right)))
3572 	      *total = s390_cost->mhi;
3573 	    else if (GET_CODE (left) == SIGN_EXTEND)
3574 	      *total = s390_cost->mh;
3575 	    else
3576 	      *total = s390_cost->ms;  /* msr, ms, msy */
3577 	    break;
3578 	  }
3579 	case E_DImode:
3580 	  {
3581 	    rtx left = XEXP (x, 0);
3582 	    rtx right = XEXP (x, 1);
3583 	    if (TARGET_ZARCH)
3584 	      {
3585 		if (GET_CODE (right) == CONST_INT
3586 		    && CONST_OK_FOR_K (INTVAL (right)))
3587 		  *total = s390_cost->mghi;
3588 		else if (GET_CODE (left) == SIGN_EXTEND)
3589 		  *total = s390_cost->msgf;
3590 		else
3591 		  *total = s390_cost->msg;  /* msgr, msg */
3592 	      }
3593 	    else /* TARGET_31BIT */
3594 	      {
3595 		if (GET_CODE (left) == SIGN_EXTEND
3596 		    && GET_CODE (right) == SIGN_EXTEND)
3597 		  /* mulsidi case: mr, m */
3598 		  *total = s390_cost->m;
3599 		else if (GET_CODE (left) == ZERO_EXTEND
3600 			 && GET_CODE (right) == ZERO_EXTEND
3601 			 && TARGET_CPU_ZARCH)
3602 		  /* umulsidi case: ml, mlr */
3603 		  *total = s390_cost->ml;
3604 		else
3605 		  /* Complex calculation is required.  */
3606 		  *total = COSTS_N_INSNS (40);
3607 	      }
3608 	    break;
3609 	  }
3610 	case E_SFmode:
3611 	case E_DFmode:
3612 	  *total = s390_cost->mult_df;
3613 	  break;
3614 	case E_TFmode:
3615 	  *total = s390_cost->mxbr;
3616 	  break;
3617 	default:
3618 	  return false;
3619 	}
3620       return false;
3621 
3622     case FMA:
3623       switch (mode)
3624 	{
3625 	case E_DFmode:
3626 	  *total = s390_cost->madbr;
3627 	  break;
3628 	case E_SFmode:
3629 	  *total = s390_cost->maebr;
3630 	  break;
3631 	default:
3632 	  return false;
3633 	}
3634       /* Negate in the third argument is free: FMSUB.  */
3635       if (GET_CODE (XEXP (x, 2)) == NEG)
3636 	{
3637 	  *total += (rtx_cost (XEXP (x, 0), mode, FMA, 0, speed)
3638 		     + rtx_cost (XEXP (x, 1), mode, FMA, 1, speed)
3639 		     + rtx_cost (XEXP (XEXP (x, 2), 0), mode, FMA, 2, speed));
3640 	  return true;
3641 	}
3642       return false;
3643 
3644     case UDIV:
3645     case UMOD:
3646       if (mode == TImode) 	       /* 128 bit division */
3647 	*total = s390_cost->dlgr;
3648       else if (mode == DImode)
3649 	{
3650 	  rtx right = XEXP (x, 1);
3651 	  if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3652 	    *total = s390_cost->dlr;
3653 	  else 	                               /* 64 by 64 bit division */
3654 	    *total = s390_cost->dlgr;
3655 	}
3656       else if (mode == SImode)         /* 32 bit division */
3657 	*total = s390_cost->dlr;
3658       return false;
3659 
3660     case DIV:
3661     case MOD:
3662       if (mode == DImode)
3663 	{
3664 	  rtx right = XEXP (x, 1);
3665 	  if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3666 	    if (TARGET_ZARCH)
3667 	      *total = s390_cost->dsgfr;
3668 	    else
3669 	      *total = s390_cost->dr;
3670 	  else 	                               /* 64 by 64 bit division */
3671 	    *total = s390_cost->dsgr;
3672 	}
3673       else if (mode == SImode)         /* 32 bit division */
3674 	*total = s390_cost->dlr;
3675       else if (mode == SFmode)
3676 	{
3677 	  *total = s390_cost->debr;
3678 	}
3679       else if (mode == DFmode)
3680 	{
3681 	  *total = s390_cost->ddbr;
3682 	}
3683       else if (mode == TFmode)
3684 	{
3685 	  *total = s390_cost->dxbr;
3686 	}
3687       return false;
3688 
3689     case SQRT:
3690       if (mode == SFmode)
3691 	*total = s390_cost->sqebr;
3692       else if (mode == DFmode)
3693 	*total = s390_cost->sqdbr;
3694       else /* TFmode */
3695 	*total = s390_cost->sqxbr;
3696       return false;
3697 
3698     case SIGN_EXTEND:
3699     case ZERO_EXTEND:
3700       if (outer_code == MULT || outer_code == DIV || outer_code == MOD
3701 	  || outer_code == PLUS || outer_code == MINUS
3702 	  || outer_code == COMPARE)
3703 	*total = 0;
3704       return false;
3705 
3706     case COMPARE:
3707       *total = COSTS_N_INSNS (1);
3708       if (GET_CODE (XEXP (x, 0)) == AND
3709 	  && GET_CODE (XEXP (x, 1)) == CONST_INT
3710 	  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
3711 	{
3712 	  rtx op0 = XEXP (XEXP (x, 0), 0);
3713 	  rtx op1 = XEXP (XEXP (x, 0), 1);
3714 	  rtx op2 = XEXP (x, 1);
3715 
3716 	  if (memory_operand (op0, GET_MODE (op0))
3717 	      && s390_tm_ccmode (op1, op2, 0) != VOIDmode)
3718 	    return true;
3719 	  if (register_operand (op0, GET_MODE (op0))
3720 	      && s390_tm_ccmode (op1, op2, 1) != VOIDmode)
3721 	    return true;
3722 	}
3723       return false;
3724 
3725     default:
3726       return false;
3727     }
3728 }
3729 
3730 /* Return the cost of an address rtx ADDR.  */
3731 
3732 static int
s390_address_cost(rtx addr,machine_mode mode ATTRIBUTE_UNUSED,addr_space_t as ATTRIBUTE_UNUSED,bool speed ATTRIBUTE_UNUSED)3733 s390_address_cost (rtx addr, machine_mode mode ATTRIBUTE_UNUSED,
3734 		   addr_space_t as ATTRIBUTE_UNUSED,
3735 		   bool speed ATTRIBUTE_UNUSED)
3736 {
3737   struct s390_address ad;
3738   if (!s390_decompose_address (addr, &ad))
3739     return 1000;
3740 
3741   return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
3742 }
3743 
3744 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
3745 static int
s390_builtin_vectorization_cost(enum vect_cost_for_stmt type_of_cost,tree vectype,int misalign ATTRIBUTE_UNUSED)3746 s390_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
3747 				 tree vectype,
3748 				 int misalign ATTRIBUTE_UNUSED)
3749 {
3750   switch (type_of_cost)
3751     {
3752       case scalar_stmt:
3753       case scalar_load:
3754       case scalar_store:
3755       case vector_stmt:
3756       case vector_load:
3757       case vector_store:
3758       case vector_gather_load:
3759       case vector_scatter_store:
3760       case vec_to_scalar:
3761       case scalar_to_vec:
3762       case cond_branch_not_taken:
3763       case vec_perm:
3764       case vec_promote_demote:
3765       case unaligned_load:
3766       case unaligned_store:
3767 	return 1;
3768 
3769       case cond_branch_taken:
3770 	return 3;
3771 
3772       case vec_construct:
3773 	return TYPE_VECTOR_SUBPARTS (vectype) - 1;
3774 
3775       default:
3776 	gcc_unreachable ();
3777     }
3778 }
3779 
3780 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
3781    otherwise return 0.  */
3782 
3783 int
tls_symbolic_operand(rtx op)3784 tls_symbolic_operand (rtx op)
3785 {
3786   if (GET_CODE (op) != SYMBOL_REF)
3787     return 0;
3788   return SYMBOL_REF_TLS_MODEL (op);
3789 }
3790 
3791 /* Split DImode access register reference REG (on 64-bit) into its constituent
3792    low and high parts, and store them into LO and HI.  Note that gen_lowpart/
3793    gen_highpart cannot be used as they assume all registers are word-sized,
3794    while our access registers have only half that size.  */
3795 
3796 void
s390_split_access_reg(rtx reg,rtx * lo,rtx * hi)3797 s390_split_access_reg (rtx reg, rtx *lo, rtx *hi)
3798 {
3799   gcc_assert (TARGET_64BIT);
3800   gcc_assert (ACCESS_REG_P (reg));
3801   gcc_assert (GET_MODE (reg) == DImode);
3802   gcc_assert (!(REGNO (reg) & 1));
3803 
3804   *lo = gen_rtx_REG (SImode, REGNO (reg) + 1);
3805   *hi = gen_rtx_REG (SImode, REGNO (reg));
3806 }
3807 
3808 /* Return true if OP contains a symbol reference */
3809 
3810 bool
symbolic_reference_mentioned_p(rtx op)3811 symbolic_reference_mentioned_p (rtx op)
3812 {
3813   const char *fmt;
3814   int i;
3815 
3816   if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3817     return 1;
3818 
3819   fmt = GET_RTX_FORMAT (GET_CODE (op));
3820   for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3821     {
3822       if (fmt[i] == 'E')
3823 	{
3824 	  int j;
3825 
3826 	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3827 	    if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3828 	      return 1;
3829 	}
3830 
3831       else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3832 	return 1;
3833     }
3834 
3835   return 0;
3836 }
3837 
3838 /* Return true if OP contains a reference to a thread-local symbol.  */
3839 
3840 bool
tls_symbolic_reference_mentioned_p(rtx op)3841 tls_symbolic_reference_mentioned_p (rtx op)
3842 {
3843   const char *fmt;
3844   int i;
3845 
3846   if (GET_CODE (op) == SYMBOL_REF)
3847     return tls_symbolic_operand (op);
3848 
3849   fmt = GET_RTX_FORMAT (GET_CODE (op));
3850   for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3851     {
3852       if (fmt[i] == 'E')
3853 	{
3854 	  int j;
3855 
3856 	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3857 	    if (tls_symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3858 	      return true;
3859 	}
3860 
3861       else if (fmt[i] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op, i)))
3862 	return true;
3863     }
3864 
3865   return false;
3866 }
3867 
3868 
3869 /* Return true if OP is a legitimate general operand when
3870    generating PIC code.  It is given that flag_pic is on
3871    and that OP satisfies CONSTANT_P.  */
3872 
3873 int
legitimate_pic_operand_p(rtx op)3874 legitimate_pic_operand_p (rtx op)
3875 {
3876   /* Accept all non-symbolic constants.  */
3877   if (!SYMBOLIC_CONST (op))
3878     return 1;
3879 
3880   /* Reject everything else; must be handled
3881      via emit_symbolic_move.  */
3882   return 0;
3883 }
3884 
3885 /* Returns true if the constant value OP is a legitimate general operand.
3886    It is given that OP satisfies CONSTANT_P.  */
3887 
3888 static bool
s390_legitimate_constant_p(machine_mode mode,rtx op)3889 s390_legitimate_constant_p (machine_mode mode, rtx op)
3890 {
3891   if (TARGET_VX && VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR)
3892     {
3893       if (GET_MODE_SIZE (mode) != 16)
3894 	return 0;
3895 
3896       if (!satisfies_constraint_j00 (op)
3897 	  && !satisfies_constraint_jm1 (op)
3898 	  && !satisfies_constraint_jKK (op)
3899 	  && !satisfies_constraint_jxx (op)
3900 	  && !satisfies_constraint_jyy (op))
3901 	return 0;
3902     }
3903 
3904   /* Accept all non-symbolic constants.  */
3905   if (!SYMBOLIC_CONST (op))
3906     return 1;
3907 
3908   /* Accept immediate LARL operands.  */
3909   if (TARGET_CPU_ZARCH && larl_operand (op, mode))
3910     return 1;
3911 
3912   /* Thread-local symbols are never legal constants.  This is
3913      so that emit_call knows that computing such addresses
3914      might require a function call.  */
3915   if (TLS_SYMBOLIC_CONST (op))
3916     return 0;
3917 
3918   /* In the PIC case, symbolic constants must *not* be
3919      forced into the literal pool.  We accept them here,
3920      so that they will be handled by emit_symbolic_move.  */
3921   if (flag_pic)
3922     return 1;
3923 
3924   /* All remaining non-PIC symbolic constants are
3925      forced into the literal pool.  */
3926   return 0;
3927 }
3928 
3929 /* Determine if it's legal to put X into the constant pool.  This
3930    is not possible if X contains the address of a symbol that is
3931    not constant (TLS) or not known at final link time (PIC).  */
3932 
3933 static bool
s390_cannot_force_const_mem(machine_mode mode,rtx x)3934 s390_cannot_force_const_mem (machine_mode mode, rtx x)
3935 {
3936   switch (GET_CODE (x))
3937     {
3938     case CONST_INT:
3939     case CONST_DOUBLE:
3940     case CONST_WIDE_INT:
3941     case CONST_VECTOR:
3942       /* Accept all non-symbolic constants.  */
3943       return false;
3944 
3945     case LABEL_REF:
3946       /* Labels are OK iff we are non-PIC.  */
3947       return flag_pic != 0;
3948 
3949     case SYMBOL_REF:
3950       /* 'Naked' TLS symbol references are never OK,
3951          non-TLS symbols are OK iff we are non-PIC.  */
3952       if (tls_symbolic_operand (x))
3953 	return true;
3954       else
3955 	return flag_pic != 0;
3956 
3957     case CONST:
3958       return s390_cannot_force_const_mem (mode, XEXP (x, 0));
3959     case PLUS:
3960     case MINUS:
3961       return s390_cannot_force_const_mem (mode, XEXP (x, 0))
3962 	     || s390_cannot_force_const_mem (mode, XEXP (x, 1));
3963 
3964     case UNSPEC:
3965       switch (XINT (x, 1))
3966 	{
3967 	/* Only lt-relative or GOT-relative UNSPECs are OK.  */
3968 	case UNSPEC_LTREL_OFFSET:
3969 	case UNSPEC_GOT:
3970 	case UNSPEC_GOTOFF:
3971 	case UNSPEC_PLTOFF:
3972 	case UNSPEC_TLSGD:
3973 	case UNSPEC_TLSLDM:
3974 	case UNSPEC_NTPOFF:
3975 	case UNSPEC_DTPOFF:
3976 	case UNSPEC_GOTNTPOFF:
3977 	case UNSPEC_INDNTPOFF:
3978 	  return false;
3979 
3980 	/* If the literal pool shares the code section, be put
3981 	   execute template placeholders into the pool as well.  */
3982 	case UNSPEC_INSN:
3983 	  return TARGET_CPU_ZARCH;
3984 
3985 	default:
3986 	  return true;
3987 	}
3988       break;
3989 
3990     default:
3991       gcc_unreachable ();
3992     }
3993 }
3994 
3995 /* Returns true if the constant value OP is a legitimate general
3996    operand during and after reload.  The difference to
3997    legitimate_constant_p is that this function will not accept
3998    a constant that would need to be forced to the literal pool
3999    before it can be used as operand.
4000    This function accepts all constants which can be loaded directly
4001    into a GPR.  */
4002 
4003 bool
legitimate_reload_constant_p(rtx op)4004 legitimate_reload_constant_p (rtx op)
4005 {
4006   /* Accept la(y) operands.  */
4007   if (GET_CODE (op) == CONST_INT
4008       && DISP_IN_RANGE (INTVAL (op)))
4009     return true;
4010 
4011   /* Accept l(g)hi/l(g)fi operands.  */
4012   if (GET_CODE (op) == CONST_INT
4013       && (CONST_OK_FOR_K (INTVAL (op)) || CONST_OK_FOR_Os (INTVAL (op))))
4014     return true;
4015 
4016   /* Accept lliXX operands.  */
4017   if (TARGET_ZARCH
4018       && GET_CODE (op) == CONST_INT
4019       && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
4020       && s390_single_part (op, word_mode, HImode, 0) >= 0)
4021   return true;
4022 
4023   if (TARGET_EXTIMM
4024       && GET_CODE (op) == CONST_INT
4025       && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
4026       && s390_single_part (op, word_mode, SImode, 0) >= 0)
4027     return true;
4028 
4029   /* Accept larl operands.  */
4030   if (TARGET_CPU_ZARCH
4031       && larl_operand (op, VOIDmode))
4032     return true;
4033 
4034   /* Accept floating-point zero operands that fit into a single GPR.  */
4035   if (GET_CODE (op) == CONST_DOUBLE
4036       && s390_float_const_zero_p (op)
4037       && GET_MODE_SIZE (GET_MODE (op)) <= UNITS_PER_WORD)
4038     return true;
4039 
4040   /* Accept double-word operands that can be split.  */
4041   if (GET_CODE (op) == CONST_WIDE_INT
4042       || (GET_CODE (op) == CONST_INT
4043 	  && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op)))
4044     {
4045       machine_mode dword_mode = word_mode == SImode ? DImode : TImode;
4046       rtx hi = operand_subword (op, 0, 0, dword_mode);
4047       rtx lo = operand_subword (op, 1, 0, dword_mode);
4048       return legitimate_reload_constant_p (hi)
4049 	     && legitimate_reload_constant_p (lo);
4050     }
4051 
4052   /* Everything else cannot be handled without reload.  */
4053   return false;
4054 }
4055 
4056 /* Returns true if the constant value OP is a legitimate fp operand
4057    during and after reload.
4058    This function accepts all constants which can be loaded directly
4059    into an FPR.  */
4060 
4061 static bool
legitimate_reload_fp_constant_p(rtx op)4062 legitimate_reload_fp_constant_p (rtx op)
4063 {
4064   /* Accept floating-point zero operands if the load zero instruction
4065      can be used.  Prior to z196 the load fp zero instruction caused a
4066      performance penalty if the result is used as BFP number.  */
4067   if (TARGET_Z196
4068       && GET_CODE (op) == CONST_DOUBLE
4069       && s390_float_const_zero_p (op))
4070     return true;
4071 
4072   return false;
4073 }
4074 
4075 /* Returns true if the constant value OP is a legitimate vector operand
4076    during and after reload.
4077    This function accepts all constants which can be loaded directly
4078    into an VR.  */
4079 
4080 static bool
legitimate_reload_vector_constant_p(rtx op)4081 legitimate_reload_vector_constant_p (rtx op)
4082 {
4083   if (TARGET_VX && GET_MODE_SIZE (GET_MODE (op)) == 16
4084       && (satisfies_constraint_j00 (op)
4085 	  || satisfies_constraint_jm1 (op)
4086 	  || satisfies_constraint_jKK (op)
4087 	  || satisfies_constraint_jxx (op)
4088 	  || satisfies_constraint_jyy (op)))
4089     return true;
4090 
4091   return false;
4092 }
4093 
4094 /* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
4095    return the class of reg to actually use.  */
4096 
4097 static reg_class_t
s390_preferred_reload_class(rtx op,reg_class_t rclass)4098 s390_preferred_reload_class (rtx op, reg_class_t rclass)
4099 {
4100   switch (GET_CODE (op))
4101     {
4102       /* Constants we cannot reload into general registers
4103 	 must be forced into the literal pool.  */
4104       case CONST_VECTOR:
4105       case CONST_DOUBLE:
4106       case CONST_INT:
4107       case CONST_WIDE_INT:
4108 	if (reg_class_subset_p (GENERAL_REGS, rclass)
4109 	    && legitimate_reload_constant_p (op))
4110 	  return GENERAL_REGS;
4111 	else if (reg_class_subset_p (ADDR_REGS, rclass)
4112 		 && legitimate_reload_constant_p (op))
4113 	  return ADDR_REGS;
4114 	else if (reg_class_subset_p (FP_REGS, rclass)
4115 		 && legitimate_reload_fp_constant_p (op))
4116 	  return FP_REGS;
4117 	else if (reg_class_subset_p (VEC_REGS, rclass)
4118 		 && legitimate_reload_vector_constant_p (op))
4119 	  return VEC_REGS;
4120 
4121 	return NO_REGS;
4122 
4123       /* If a symbolic constant or a PLUS is reloaded,
4124 	 it is most likely being used as an address, so
4125 	 prefer ADDR_REGS.  If 'class' is not a superset
4126 	 of ADDR_REGS, e.g. FP_REGS, reject this reload.  */
4127       case CONST:
4128 	/* Symrefs cannot be pushed into the literal pool with -fPIC
4129 	   so we *MUST NOT* return NO_REGS for these cases
4130 	   (s390_cannot_force_const_mem will return true).
4131 
4132 	   On the other hand we MUST return NO_REGS for symrefs with
4133 	   invalid addend which might have been pushed to the literal
4134 	   pool (no -fPIC).  Usually we would expect them to be
4135 	   handled via secondary reload but this does not happen if
4136 	   they are used as literal pool slot replacement in reload
4137 	   inheritance (see emit_input_reload_insns).  */
4138 	if (TARGET_CPU_ZARCH
4139 	    && GET_CODE (XEXP (op, 0)) == PLUS
4140 	    && GET_CODE (XEXP (XEXP(op, 0), 0)) == SYMBOL_REF
4141 	    && GET_CODE (XEXP (XEXP(op, 0), 1)) == CONST_INT)
4142 	  {
4143 	    if (flag_pic && reg_class_subset_p (ADDR_REGS, rclass))
4144 	      return ADDR_REGS;
4145 	    else
4146 	      return NO_REGS;
4147 	  }
4148 	/* fallthrough */
4149       case LABEL_REF:
4150       case SYMBOL_REF:
4151 	if (!legitimate_reload_constant_p (op))
4152           return NO_REGS;
4153 	/* fallthrough */
4154       case PLUS:
4155 	/* load address will be used.  */
4156 	if (reg_class_subset_p (ADDR_REGS, rclass))
4157 	  return ADDR_REGS;
4158 	else
4159 	  return NO_REGS;
4160 
4161       default:
4162 	break;
4163     }
4164 
4165   return rclass;
4166 }
4167 
4168 /* Return true if ADDR is SYMBOL_REF + addend with addend being a
4169    multiple of ALIGNMENT and the SYMBOL_REF being naturally
4170    aligned.  */
4171 
4172 bool
s390_check_symref_alignment(rtx addr,HOST_WIDE_INT alignment)4173 s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment)
4174 {
4175   HOST_WIDE_INT addend;
4176   rtx symref;
4177 
4178   /* The "required alignment" might be 0 (e.g. for certain structs
4179      accessed via BLKmode).  Early abort in this case, as well as when
4180      an alignment > 8 is required.  */
4181   if (alignment < 2 || alignment > 8)
4182     return false;
4183 
4184   if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4185     return false;
4186 
4187   if (addend & (alignment - 1))
4188     return false;
4189 
4190   if (GET_CODE (symref) == SYMBOL_REF)
4191     {
4192       /* We have load-relative instructions for 2-byte, 4-byte, and
4193          8-byte alignment so allow only these.  */
4194       switch (alignment)
4195 	{
4196 	case 8:	return !SYMBOL_FLAG_NOTALIGN8_P (symref);
4197 	case 4:	return !SYMBOL_FLAG_NOTALIGN4_P (symref);
4198 	case 2:	return !SYMBOL_FLAG_NOTALIGN2_P (symref);
4199 	default: return false;
4200 	}
4201     }
4202 
4203   if (GET_CODE (symref) == UNSPEC
4204       && alignment <= UNITS_PER_LONG)
4205     return true;
4206 
4207   return false;
4208 }
4209 
4210 /* ADDR is moved into REG using larl.  If ADDR isn't a valid larl
4211    operand SCRATCH is used to reload the even part of the address and
4212    adding one.  */
4213 
4214 void
s390_reload_larl_operand(rtx reg,rtx addr,rtx scratch)4215 s390_reload_larl_operand (rtx reg, rtx addr, rtx scratch)
4216 {
4217   HOST_WIDE_INT addend;
4218   rtx symref;
4219 
4220   if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4221     gcc_unreachable ();
4222 
4223   if (!(addend & 1))
4224     /* Easy case.  The addend is even so larl will do fine.  */
4225     emit_move_insn (reg, addr);
4226   else
4227     {
4228       /* We can leave the scratch register untouched if the target
4229 	 register is a valid base register.  */
4230       if (REGNO (reg) < FIRST_PSEUDO_REGISTER
4231 	  && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS)
4232 	scratch = reg;
4233 
4234       gcc_assert (REGNO (scratch) < FIRST_PSEUDO_REGISTER);
4235       gcc_assert (REGNO_REG_CLASS (REGNO (scratch)) == ADDR_REGS);
4236 
4237       if (addend != 1)
4238 	emit_move_insn (scratch,
4239 			gen_rtx_CONST (Pmode,
4240 				       gen_rtx_PLUS (Pmode, symref,
4241 						     GEN_INT (addend - 1))));
4242       else
4243 	emit_move_insn (scratch, symref);
4244 
4245       /* Increment the address using la in order to avoid clobbering cc.  */
4246       s390_load_address (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx));
4247     }
4248 }
4249 
4250 /* Generate what is necessary to move between REG and MEM using
4251    SCRATCH.  The direction is given by TOMEM.  */
4252 
4253 void
s390_reload_symref_address(rtx reg,rtx mem,rtx scratch,bool tomem)4254 s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem)
4255 {
4256   /* Reload might have pulled a constant out of the literal pool.
4257      Force it back in.  */
4258   if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
4259       || GET_CODE (mem) == CONST_WIDE_INT
4260       || GET_CODE (mem) == CONST_VECTOR
4261       || GET_CODE (mem) == CONST)
4262     mem = force_const_mem (GET_MODE (reg), mem);
4263 
4264   gcc_assert (MEM_P (mem));
4265 
4266   /* For a load from memory we can leave the scratch register
4267      untouched if the target register is a valid base register.  */
4268   if (!tomem
4269       && REGNO (reg) < FIRST_PSEUDO_REGISTER
4270       && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS
4271       && GET_MODE (reg) == GET_MODE (scratch))
4272     scratch = reg;
4273 
4274   /* Load address into scratch register.  Since we can't have a
4275      secondary reload for a secondary reload we have to cover the case
4276      where larl would need a secondary reload here as well.  */
4277   s390_reload_larl_operand (scratch, XEXP (mem, 0), scratch);
4278 
4279   /* Now we can use a standard load/store to do the move.  */
4280   if (tomem)
4281     emit_move_insn (replace_equiv_address (mem, scratch), reg);
4282   else
4283     emit_move_insn (reg, replace_equiv_address (mem, scratch));
4284 }
4285 
4286 /* Inform reload about cases where moving X with a mode MODE to a register in
4287    RCLASS requires an extra scratch or immediate register.  Return the class
4288    needed for the immediate register.  */
4289 
4290 static reg_class_t
s390_secondary_reload(bool in_p,rtx x,reg_class_t rclass_i,machine_mode mode,secondary_reload_info * sri)4291 s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
4292 		       machine_mode mode, secondary_reload_info *sri)
4293 {
4294   enum reg_class rclass = (enum reg_class) rclass_i;
4295 
4296   /* Intermediate register needed.  */
4297   if (reg_classes_intersect_p (CC_REGS, rclass))
4298     return GENERAL_REGS;
4299 
4300   if (TARGET_VX)
4301     {
4302       /* The vst/vl vector move instructions allow only for short
4303 	 displacements.  */
4304       if (MEM_P (x)
4305 	  && GET_CODE (XEXP (x, 0)) == PLUS
4306 	  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4307 	  && !SHORT_DISP_IN_RANGE(INTVAL (XEXP (XEXP (x, 0), 1)))
4308 	  && reg_class_subset_p (rclass, VEC_REGS)
4309 	  && (!reg_class_subset_p (rclass, FP_REGS)
4310 	      || (GET_MODE_SIZE (mode) > 8
4311 		  && s390_class_max_nregs (FP_REGS, mode) == 1)))
4312 	{
4313 	  if (in_p)
4314 	    sri->icode = (TARGET_64BIT ?
4315 			  CODE_FOR_reloaddi_la_in :
4316 			  CODE_FOR_reloadsi_la_in);
4317 	  else
4318 	    sri->icode = (TARGET_64BIT ?
4319 			  CODE_FOR_reloaddi_la_out :
4320 			  CODE_FOR_reloadsi_la_out);
4321 	}
4322     }
4323 
4324   if (TARGET_Z10)
4325     {
4326       HOST_WIDE_INT offset;
4327       rtx symref;
4328 
4329       /* On z10 several optimizer steps may generate larl operands with
4330 	 an odd addend.  */
4331       if (in_p
4332 	  && s390_loadrelative_operand_p (x, &symref, &offset)
4333 	  && mode == Pmode
4334 	  && !SYMBOL_FLAG_NOTALIGN2_P (symref)
4335 	  && (offset & 1) == 1)
4336 	sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10
4337 		      : CODE_FOR_reloadsi_larl_odd_addend_z10);
4338 
4339       /* Handle all the (mem (symref)) accesses we cannot use the z10
4340 	 instructions for.  */
4341       if (MEM_P (x)
4342 	  && s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL)
4343 	  && (mode == QImode
4344 	      || !reg_class_subset_p (rclass, GENERAL_REGS)
4345 	      || GET_MODE_SIZE (mode) > UNITS_PER_WORD
4346 	      || !s390_check_symref_alignment (XEXP (x, 0),
4347 					       GET_MODE_SIZE (mode))))
4348 	{
4349 #define __SECONDARY_RELOAD_CASE(M,m)					\
4350 	  case E_##M##mode:						\
4351 	    if (TARGET_64BIT)						\
4352 	      sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 :	\
4353                                   CODE_FOR_reload##m##di_tomem_z10;	\
4354 	    else							\
4355   	      sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 :	\
4356                                   CODE_FOR_reload##m##si_tomem_z10;	\
4357 	  break;
4358 
4359 	  switch (GET_MODE (x))
4360 	    {
4361 	      __SECONDARY_RELOAD_CASE (QI, qi);
4362 	      __SECONDARY_RELOAD_CASE (HI, hi);
4363 	      __SECONDARY_RELOAD_CASE (SI, si);
4364 	      __SECONDARY_RELOAD_CASE (DI, di);
4365 	      __SECONDARY_RELOAD_CASE (TI, ti);
4366 	      __SECONDARY_RELOAD_CASE (SF, sf);
4367 	      __SECONDARY_RELOAD_CASE (DF, df);
4368 	      __SECONDARY_RELOAD_CASE (TF, tf);
4369 	      __SECONDARY_RELOAD_CASE (SD, sd);
4370 	      __SECONDARY_RELOAD_CASE (DD, dd);
4371 	      __SECONDARY_RELOAD_CASE (TD, td);
4372 	      __SECONDARY_RELOAD_CASE (V1QI, v1qi);
4373 	      __SECONDARY_RELOAD_CASE (V2QI, v2qi);
4374 	      __SECONDARY_RELOAD_CASE (V4QI, v4qi);
4375 	      __SECONDARY_RELOAD_CASE (V8QI, v8qi);
4376 	      __SECONDARY_RELOAD_CASE (V16QI, v16qi);
4377 	      __SECONDARY_RELOAD_CASE (V1HI, v1hi);
4378 	      __SECONDARY_RELOAD_CASE (V2HI, v2hi);
4379 	      __SECONDARY_RELOAD_CASE (V4HI, v4hi);
4380 	      __SECONDARY_RELOAD_CASE (V8HI, v8hi);
4381 	      __SECONDARY_RELOAD_CASE (V1SI, v1si);
4382 	      __SECONDARY_RELOAD_CASE (V2SI, v2si);
4383 	      __SECONDARY_RELOAD_CASE (V4SI, v4si);
4384 	      __SECONDARY_RELOAD_CASE (V1DI, v1di);
4385 	      __SECONDARY_RELOAD_CASE (V2DI, v2di);
4386 	      __SECONDARY_RELOAD_CASE (V1TI, v1ti);
4387 	      __SECONDARY_RELOAD_CASE (V1SF, v1sf);
4388 	      __SECONDARY_RELOAD_CASE (V2SF, v2sf);
4389 	      __SECONDARY_RELOAD_CASE (V4SF, v4sf);
4390 	      __SECONDARY_RELOAD_CASE (V1DF, v1df);
4391 	      __SECONDARY_RELOAD_CASE (V2DF, v2df);
4392 	      __SECONDARY_RELOAD_CASE (V1TF, v1tf);
4393 	    default:
4394 	      gcc_unreachable ();
4395 	    }
4396 #undef __SECONDARY_RELOAD_CASE
4397 	}
4398     }
4399 
4400   /* We need a scratch register when loading a PLUS expression which
4401      is not a legitimate operand of the LOAD ADDRESS instruction.  */
4402   /* LRA can deal with transformation of plus op very well -- so we
4403      don't need to prompt LRA in this case.  */
4404   if (! lra_in_progress && in_p && s390_plus_operand (x, mode))
4405     sri->icode = (TARGET_64BIT ?
4406 		  CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
4407 
4408   /* Performing a multiword move from or to memory we have to make sure the
4409      second chunk in memory is addressable without causing a displacement
4410      overflow.  If that would be the case we calculate the address in
4411      a scratch register.  */
4412   if (MEM_P (x)
4413       && GET_CODE (XEXP (x, 0)) == PLUS
4414       && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4415       && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1))
4416 			 + GET_MODE_SIZE (mode) - 1))
4417     {
4418       /* For GENERAL_REGS a displacement overflow is no problem if occurring
4419 	 in a s_operand address since we may fallback to lm/stm.  So we only
4420 	 have to care about overflows in the b+i+d case.  */
4421       if ((reg_classes_intersect_p (GENERAL_REGS, rclass)
4422 	   && s390_class_max_nregs (GENERAL_REGS, mode) > 1
4423 	   && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
4424 	  /* For FP_REGS no lm/stm is available so this check is triggered
4425 	     for displacement overflows in b+i+d and b+d like addresses.  */
4426 	  || (reg_classes_intersect_p (FP_REGS, rclass)
4427 	      && s390_class_max_nregs (FP_REGS, mode) > 1))
4428 	{
4429 	  if (in_p)
4430 	    sri->icode = (TARGET_64BIT ?
4431 			  CODE_FOR_reloaddi_la_in :
4432 			  CODE_FOR_reloadsi_la_in);
4433 	  else
4434 	    sri->icode = (TARGET_64BIT ?
4435 			  CODE_FOR_reloaddi_la_out :
4436 			  CODE_FOR_reloadsi_la_out);
4437 	}
4438     }
4439 
4440   /* A scratch address register is needed when a symbolic constant is
4441      copied to r0 compiling with -fPIC.  In other cases the target
4442      register might be used as temporary (see legitimize_pic_address).  */
4443   if (in_p && SYMBOLIC_CONST (x) && flag_pic == 2 && rclass != ADDR_REGS)
4444     sri->icode = (TARGET_64BIT ?
4445 		  CODE_FOR_reloaddi_PIC_addr :
4446 		  CODE_FOR_reloadsi_PIC_addr);
4447 
4448   /* Either scratch or no register needed.  */
4449   return NO_REGS;
4450 }
4451 
4452 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
4453 
4454    We need secondary memory to move data between GPRs and FPRs.
4455 
4456    - With DFP the ldgr lgdr instructions are available.  Due to the
4457      different alignment we cannot use them for SFmode.  For 31 bit a
4458      64 bit value in GPR would be a register pair so here we still
4459      need to go via memory.
4460 
4461    - With z13 we can do the SF/SImode moves with vlgvf.  Due to the
4462      overlapping of FPRs and VRs we still disallow TF/TD modes to be
4463      in full VRs so as before also on z13 we do these moves via
4464      memory.
4465 
4466      FIXME: Should we try splitting it into two vlgvg's/vlvg's instead?  */
4467 
4468 static bool
s390_secondary_memory_needed(machine_mode mode,reg_class_t class1,reg_class_t class2)4469 s390_secondary_memory_needed (machine_mode mode,
4470 			      reg_class_t class1, reg_class_t class2)
4471 {
4472   return (((reg_classes_intersect_p (class1, VEC_REGS)
4473 	    && reg_classes_intersect_p (class2, GENERAL_REGS))
4474 	   || (reg_classes_intersect_p (class1, GENERAL_REGS)
4475 	       && reg_classes_intersect_p (class2, VEC_REGS)))
4476 	  && (!TARGET_DFP || !TARGET_64BIT || GET_MODE_SIZE (mode) != 8)
4477 	  && (!TARGET_VX || (SCALAR_FLOAT_MODE_P (mode)
4478 			     && GET_MODE_SIZE (mode) > 8)));
4479 }
4480 
4481 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
4482 
4483    get_secondary_mem widens its argument to BITS_PER_WORD which loses on 64bit
4484    because the movsi and movsf patterns don't handle r/f moves.  */
4485 
4486 static machine_mode
s390_secondary_memory_needed_mode(machine_mode mode)4487 s390_secondary_memory_needed_mode (machine_mode mode)
4488 {
4489   if (GET_MODE_BITSIZE (mode) < 32)
4490     return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
4491   return mode;
4492 }
4493 
4494 /* Generate code to load SRC, which is PLUS that is not a
4495    legitimate operand for the LA instruction, into TARGET.
4496    SCRATCH may be used as scratch register.  */
4497 
4498 void
s390_expand_plus_operand(rtx target,rtx src,rtx scratch)4499 s390_expand_plus_operand (rtx target, rtx src,
4500 			  rtx scratch)
4501 {
4502   rtx sum1, sum2;
4503   struct s390_address ad;
4504 
4505   /* src must be a PLUS; get its two operands.  */
4506   gcc_assert (GET_CODE (src) == PLUS);
4507   gcc_assert (GET_MODE (src) == Pmode);
4508 
4509   /* Check if any of the two operands is already scheduled
4510      for replacement by reload.  This can happen e.g. when
4511      float registers occur in an address.  */
4512   sum1 = find_replacement (&XEXP (src, 0));
4513   sum2 = find_replacement (&XEXP (src, 1));
4514   src = gen_rtx_PLUS (Pmode, sum1, sum2);
4515 
4516   /* If the address is already strictly valid, there's nothing to do.  */
4517   if (!s390_decompose_address (src, &ad)
4518       || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4519       || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
4520     {
4521       /* Otherwise, one of the operands cannot be an address register;
4522          we reload its value into the scratch register.  */
4523       if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15)
4524 	{
4525 	  emit_move_insn (scratch, sum1);
4526 	  sum1 = scratch;
4527 	}
4528       if (true_regnum (sum2) < 1 || true_regnum (sum2) > 15)
4529 	{
4530 	  emit_move_insn (scratch, sum2);
4531 	  sum2 = scratch;
4532 	}
4533 
4534       /* According to the way these invalid addresses are generated
4535          in reload.c, it should never happen (at least on s390) that
4536          *neither* of the PLUS components, after find_replacements
4537          was applied, is an address register.  */
4538       if (sum1 == scratch && sum2 == scratch)
4539 	{
4540 	  debug_rtx (src);
4541 	  gcc_unreachable ();
4542 	}
4543 
4544       src = gen_rtx_PLUS (Pmode, sum1, sum2);
4545     }
4546 
4547   /* Emit the LOAD ADDRESS pattern.  Note that reload of PLUS
4548      is only ever performed on addresses, so we can mark the
4549      sum as legitimate for LA in any case.  */
4550   s390_load_address (target, src);
4551 }
4552 
4553 
4554 /* Return true if ADDR is a valid memory address.
4555    STRICT specifies whether strict register checking applies.  */
4556 
4557 static bool
s390_legitimate_address_p(machine_mode mode,rtx addr,bool strict)4558 s390_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4559 {
4560   struct s390_address ad;
4561 
4562   if (TARGET_Z10
4563       && larl_operand (addr, VOIDmode)
4564       && (mode == VOIDmode
4565 	  || s390_check_symref_alignment (addr, GET_MODE_SIZE (mode))))
4566     return true;
4567 
4568   if (!s390_decompose_address (addr, &ad))
4569     return false;
4570 
4571   if (strict)
4572     {
4573       if (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4574 	return false;
4575 
4576       if (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))
4577 	return false;
4578     }
4579   else
4580     {
4581       if (ad.base
4582 	  && !(REGNO (ad.base) >= FIRST_PSEUDO_REGISTER
4583 	       || REGNO_REG_CLASS (REGNO (ad.base)) == ADDR_REGS))
4584 	return false;
4585 
4586       if (ad.indx
4587 	  && !(REGNO (ad.indx) >= FIRST_PSEUDO_REGISTER
4588 	       || REGNO_REG_CLASS (REGNO (ad.indx)) == ADDR_REGS))
4589 	  return false;
4590     }
4591   return true;
4592 }
4593 
4594 /* Return true if OP is a valid operand for the LA instruction.
4595    In 31-bit, we need to prove that the result is used as an
4596    address, as LA performs only a 31-bit addition.  */
4597 
4598 bool
legitimate_la_operand_p(rtx op)4599 legitimate_la_operand_p (rtx op)
4600 {
4601   struct s390_address addr;
4602   if (!s390_decompose_address (op, &addr))
4603     return false;
4604 
4605   return (TARGET_64BIT || addr.pointer);
4606 }
4607 
4608 /* Return true if it is valid *and* preferable to use LA to
4609    compute the sum of OP1 and OP2.  */
4610 
4611 bool
preferred_la_operand_p(rtx op1,rtx op2)4612 preferred_la_operand_p (rtx op1, rtx op2)
4613 {
4614   struct s390_address addr;
4615 
4616   if (op2 != const0_rtx)
4617     op1 = gen_rtx_PLUS (Pmode, op1, op2);
4618 
4619   if (!s390_decompose_address (op1, &addr))
4620     return false;
4621   if (addr.base && !REGNO_OK_FOR_BASE_P (REGNO (addr.base)))
4622     return false;
4623   if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx)))
4624     return false;
4625 
4626   /* Avoid LA instructions with index register on z196; it is
4627      preferable to use regular add instructions when possible.
4628      Starting with zEC12 the la with index register is "uncracked"
4629      again.  */
4630   if (addr.indx && s390_tune == PROCESSOR_2817_Z196)
4631     return false;
4632 
4633   if (!TARGET_64BIT && !addr.pointer)
4634     return false;
4635 
4636   if (addr.pointer)
4637     return true;
4638 
4639   if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base))
4640       || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx)))
4641     return true;
4642 
4643   return false;
4644 }
4645 
4646 /* Emit a forced load-address operation to load SRC into DST.
4647    This will use the LOAD ADDRESS instruction even in situations
4648    where legitimate_la_operand_p (SRC) returns false.  */
4649 
4650 void
s390_load_address(rtx dst,rtx src)4651 s390_load_address (rtx dst, rtx src)
4652 {
4653   if (TARGET_64BIT)
4654     emit_move_insn (dst, src);
4655   else
4656     emit_insn (gen_force_la_31 (dst, src));
4657 }
4658 
4659 /* Return true if it ok to use SYMBOL_REF in a relative address.  */
4660 
4661 bool
s390_rel_address_ok_p(rtx symbol_ref)4662 s390_rel_address_ok_p (rtx symbol_ref)
4663 {
4664   tree decl;
4665 
4666   if (symbol_ref == s390_got_symbol () || CONSTANT_POOL_ADDRESS_P (symbol_ref))
4667     return true;
4668 
4669   decl = SYMBOL_REF_DECL (symbol_ref);
4670 
4671   if (!flag_pic || SYMBOL_REF_LOCAL_P (symbol_ref))
4672     return (s390_pic_data_is_text_relative
4673 	    || (decl
4674 		&& TREE_CODE (decl) == FUNCTION_DECL));
4675 
4676   return false;
4677 }
4678 
4679 /* Return a legitimate reference for ORIG (an address) using the
4680    register REG.  If REG is 0, a new pseudo is generated.
4681 
4682    There are two types of references that must be handled:
4683 
4684    1. Global data references must load the address from the GOT, via
4685       the PIC reg.  An insn is emitted to do this load, and the reg is
4686       returned.
4687 
4688    2. Static data references, constant pool addresses, and code labels
4689       compute the address as an offset from the GOT, whose base is in
4690       the PIC reg.  Static data objects have SYMBOL_FLAG_LOCAL set to
4691       differentiate them from global data objects.  The returned
4692       address is the PIC reg + an unspec constant.
4693 
4694    TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
4695    reg also appears in the address.  */
4696 
4697 rtx
legitimize_pic_address(rtx orig,rtx reg)4698 legitimize_pic_address (rtx orig, rtx reg)
4699 {
4700   rtx addr = orig;
4701   rtx addend = const0_rtx;
4702   rtx new_rtx = orig;
4703 
4704   gcc_assert (!TLS_SYMBOLIC_CONST (addr));
4705 
4706   if (GET_CODE (addr) == CONST)
4707     addr = XEXP (addr, 0);
4708 
4709   if (GET_CODE (addr) == PLUS)
4710     {
4711       addend = XEXP (addr, 1);
4712       addr = XEXP (addr, 0);
4713     }
4714 
4715   if ((GET_CODE (addr) == LABEL_REF
4716        || (SYMBOL_REF_P (addr) && s390_rel_address_ok_p (addr))
4717        || (GET_CODE (addr) == UNSPEC &&
4718 	   (XINT (addr, 1) == UNSPEC_GOTENT
4719 	    || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
4720       && GET_CODE (addend) == CONST_INT)
4721     {
4722       /* This can be locally addressed.  */
4723 
4724       /* larl_operand requires UNSPECs to be wrapped in a const rtx.  */
4725       rtx const_addr = (GET_CODE (addr) == UNSPEC ?
4726 			gen_rtx_CONST (Pmode, addr) : addr);
4727 
4728       if (TARGET_CPU_ZARCH
4729 	  && larl_operand (const_addr, VOIDmode)
4730 	  && INTVAL (addend) < HOST_WIDE_INT_1 << 31
4731 	  && INTVAL (addend) >= -(HOST_WIDE_INT_1 << 31))
4732 	{
4733 	  if (INTVAL (addend) & 1)
4734 	    {
4735 	      /* LARL can't handle odd offsets, so emit a pair of LARL
4736 		 and LA.  */
4737 	      rtx temp = reg? reg : gen_reg_rtx (Pmode);
4738 
4739 	      if (!DISP_IN_RANGE (INTVAL (addend)))
4740 		{
4741 		  HOST_WIDE_INT even = INTVAL (addend) - 1;
4742 		  addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (even));
4743 		  addr = gen_rtx_CONST (Pmode, addr);
4744 		  addend = const1_rtx;
4745 		}
4746 
4747 	      emit_move_insn (temp, addr);
4748 	      new_rtx = gen_rtx_PLUS (Pmode, temp, addend);
4749 
4750 	      if (reg != 0)
4751 		{
4752 		  s390_load_address (reg, new_rtx);
4753 		  new_rtx = reg;
4754 		}
4755 	    }
4756 	  else
4757 	    {
4758 	      /* If the offset is even, we can just use LARL.  This
4759 		 will happen automatically.  */
4760 	    }
4761 	}
4762       else
4763 	{
4764 	  /* No larl - Access local symbols relative to the GOT.  */
4765 
4766 	  rtx temp = reg? reg : gen_reg_rtx (Pmode);
4767 
4768 	  if (reload_in_progress || reload_completed)
4769 	    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4770 
4771 	  addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
4772 	  if (addend != const0_rtx)
4773 	    addr = gen_rtx_PLUS (Pmode, addr, addend);
4774 	  addr = gen_rtx_CONST (Pmode, addr);
4775 	  addr = force_const_mem (Pmode, addr);
4776 	  emit_move_insn (temp, addr);
4777 
4778 	  new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4779 	  if (reg != 0)
4780 	    {
4781 	      s390_load_address (reg, new_rtx);
4782 	      new_rtx = reg;
4783 	    }
4784 	}
4785     }
4786   else if (GET_CODE (addr) == SYMBOL_REF && addend == const0_rtx)
4787     {
4788       /* A non-local symbol reference without addend.
4789 
4790 	 The symbol ref is wrapped into an UNSPEC to make sure the
4791 	 proper operand modifier (@GOT or @GOTENT) will be emitted.
4792 	 This will tell the linker to put the symbol into the GOT.
4793 
4794 	 Additionally the code dereferencing the GOT slot is emitted here.
4795 
4796 	 An addend to the symref needs to be added afterwards.
4797 	 legitimize_pic_address calls itself recursively to handle
4798 	 that case.  So no need to do it here.  */
4799 
4800       if (reg == 0)
4801         reg = gen_reg_rtx (Pmode);
4802 
4803       if (TARGET_Z10)
4804 	{
4805 	  /* Use load relative if possible.
4806 	     lgrl <target>, sym@GOTENT  */
4807 	  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4808 	  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4809 	  new_rtx = gen_const_mem (GET_MODE (reg), new_rtx);
4810 
4811 	  emit_move_insn (reg, new_rtx);
4812 	  new_rtx = reg;
4813 	}
4814       else if (flag_pic == 1)
4815         {
4816           /* Assume GOT offset is a valid displacement operand (< 4k
4817              or < 512k with z990).  This is handled the same way in
4818              both 31- and 64-bit code (@GOT).
4819              lg <target>, sym@GOT(r12)  */
4820 
4821 	  if (reload_in_progress || reload_completed)
4822 	    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4823 
4824           new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
4825           new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4826           new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
4827           new_rtx = gen_const_mem (Pmode, new_rtx);
4828           emit_move_insn (reg, new_rtx);
4829           new_rtx = reg;
4830         }
4831       else if (TARGET_CPU_ZARCH)
4832         {
4833           /* If the GOT offset might be >= 4k, we determine the position
4834              of the GOT entry via a PC-relative LARL (@GOTENT).
4835 	     larl temp, sym@GOTENT
4836              lg   <target>, 0(temp) */
4837 
4838           rtx temp = reg ? reg : gen_reg_rtx (Pmode);
4839 
4840 	  gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
4841 		      || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
4842 
4843           new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4844           new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4845 	  emit_move_insn (temp, new_rtx);
4846 
4847 	  new_rtx = gen_const_mem (Pmode, temp);
4848           emit_move_insn (reg, new_rtx);
4849 
4850           new_rtx = reg;
4851         }
4852       else
4853         {
4854           /* If the GOT offset might be >= 4k, we have to load it
4855              from the literal pool (@GOT).
4856 
4857 	     lg temp, lit-litbase(r13)
4858              lg <target>, 0(temp)
4859 	     lit:  .long sym@GOT  */
4860 
4861           rtx temp = reg ? reg : gen_reg_rtx (Pmode);
4862 
4863 	  gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
4864 		      || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
4865 
4866 	  if (reload_in_progress || reload_completed)
4867 	    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4868 
4869           addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
4870           addr = gen_rtx_CONST (Pmode, addr);
4871           addr = force_const_mem (Pmode, addr);
4872           emit_move_insn (temp, addr);
4873 
4874           new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4875           new_rtx = gen_const_mem (Pmode, new_rtx);
4876           emit_move_insn (reg, new_rtx);
4877           new_rtx = reg;
4878         }
4879     }
4880   else if (GET_CODE (addr) == UNSPEC && GET_CODE (addend) == CONST_INT)
4881     {
4882       gcc_assert (XVECLEN (addr, 0) == 1);
4883       switch (XINT (addr, 1))
4884 	{
4885 	  /* These address symbols (or PLT slots) relative to the GOT
4886 	     (not GOT slots!).  In general this will exceed the
4887 	     displacement range so these value belong into the literal
4888 	     pool.  */
4889 	case UNSPEC_GOTOFF:
4890 	case UNSPEC_PLTOFF:
4891 	  new_rtx = force_const_mem (Pmode, orig);
4892 	  break;
4893 
4894 	  /* For -fPIC the GOT size might exceed the displacement
4895 	     range so make sure the value is in the literal pool.  */
4896 	case UNSPEC_GOT:
4897 	  if (flag_pic == 2)
4898 	    new_rtx = force_const_mem (Pmode, orig);
4899 	  break;
4900 
4901 	  /* For @GOTENT larl is used.  This is handled like local
4902 	     symbol refs.  */
4903 	case UNSPEC_GOTENT:
4904 	  gcc_unreachable ();
4905 	  break;
4906 
4907 	  /* @PLT is OK as is on 64-bit, must be converted to
4908 	     GOT-relative @PLTOFF on 31-bit.  */
4909 	case UNSPEC_PLT:
4910 	  if (!TARGET_CPU_ZARCH)
4911 	    {
4912 	      rtx temp = reg? reg : gen_reg_rtx (Pmode);
4913 
4914 	      if (reload_in_progress || reload_completed)
4915 		df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4916 
4917 	      addr = XVECEXP (addr, 0, 0);
4918 	      addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
4919 				     UNSPEC_PLTOFF);
4920 	      if (addend != const0_rtx)
4921 		addr = gen_rtx_PLUS (Pmode, addr, addend);
4922 	      addr = gen_rtx_CONST (Pmode, addr);
4923 	      addr = force_const_mem (Pmode, addr);
4924 	      emit_move_insn (temp, addr);
4925 
4926 	      new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4927 	      if (reg != 0)
4928 		{
4929 		  s390_load_address (reg, new_rtx);
4930 		  new_rtx = reg;
4931 		}
4932 	    }
4933 	  else
4934 	    /* On 64 bit larl can be used.  This case is handled like
4935 	       local symbol refs.  */
4936 	    gcc_unreachable ();
4937 	  break;
4938 
4939 	  /* Everything else cannot happen.  */
4940 	default:
4941 	  gcc_unreachable ();
4942 	}
4943     }
4944   else if (addend != const0_rtx)
4945     {
4946       /* Otherwise, compute the sum.  */
4947 
4948       rtx base = legitimize_pic_address (addr, reg);
4949       new_rtx  = legitimize_pic_address (addend,
4950 					 base == reg ? NULL_RTX : reg);
4951       if (GET_CODE (new_rtx) == CONST_INT)
4952 	new_rtx = plus_constant (Pmode, base, INTVAL (new_rtx));
4953       else
4954 	{
4955 	  if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
4956 	    {
4957 	      base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
4958 	      new_rtx = XEXP (new_rtx, 1);
4959 	    }
4960 	  new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
4961 	}
4962 
4963       if (GET_CODE (new_rtx) == CONST)
4964 	new_rtx = XEXP (new_rtx, 0);
4965       new_rtx = force_operand (new_rtx, 0);
4966     }
4967 
4968   return new_rtx;
4969 }
4970 
4971 /* Load the thread pointer into a register.  */
4972 
4973 rtx
s390_get_thread_pointer(void)4974 s390_get_thread_pointer (void)
4975 {
4976   rtx tp = gen_reg_rtx (Pmode);
4977 
4978   emit_move_insn (tp, gen_rtx_REG (Pmode, TP_REGNUM));
4979   mark_reg_pointer (tp, BITS_PER_WORD);
4980 
4981   return tp;
4982 }
4983 
4984 /* Emit a tls call insn. The call target is the SYMBOL_REF stored
4985    in s390_tls_symbol which always refers to __tls_get_offset.
4986    The returned offset is written to RESULT_REG and an USE rtx is
4987    generated for TLS_CALL.  */
4988 
4989 static GTY(()) rtx s390_tls_symbol;
4990 
4991 static void
s390_emit_tls_call_insn(rtx result_reg,rtx tls_call)4992 s390_emit_tls_call_insn (rtx result_reg, rtx tls_call)
4993 {
4994   rtx insn;
4995 
4996   if (!flag_pic)
4997     emit_insn (s390_load_got ());
4998 
4999   if (!s390_tls_symbol)
5000     s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset");
5001 
5002   insn = s390_emit_call (s390_tls_symbol, tls_call, result_reg,
5003 			 gen_rtx_REG (Pmode, RETURN_REGNUM));
5004 
5005   use_reg (&CALL_INSN_FUNCTION_USAGE (insn), result_reg);
5006   RTL_CONST_CALL_P (insn) = 1;
5007 }
5008 
5009 /* ADDR contains a thread-local SYMBOL_REF.  Generate code to compute
5010    this (thread-local) address.  REG may be used as temporary.  */
5011 
5012 static rtx
legitimize_tls_address(rtx addr,rtx reg)5013 legitimize_tls_address (rtx addr, rtx reg)
5014 {
5015   rtx new_rtx, tls_call, temp, base, r2;
5016   rtx_insn *insn;
5017 
5018   if (GET_CODE (addr) == SYMBOL_REF)
5019     switch (tls_symbolic_operand (addr))
5020       {
5021       case TLS_MODEL_GLOBAL_DYNAMIC:
5022 	start_sequence ();
5023 	r2 = gen_rtx_REG (Pmode, 2);
5024 	tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD);
5025 	new_rtx = gen_rtx_CONST (Pmode, tls_call);
5026 	new_rtx = force_const_mem (Pmode, new_rtx);
5027 	emit_move_insn (r2, new_rtx);
5028 	s390_emit_tls_call_insn (r2, tls_call);
5029 	insn = get_insns ();
5030 	end_sequence ();
5031 
5032 	new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
5033 	temp = gen_reg_rtx (Pmode);
5034 	emit_libcall_block (insn, temp, r2, new_rtx);
5035 
5036 	new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5037 	if (reg != 0)
5038 	  {
5039 	    s390_load_address (reg, new_rtx);
5040 	    new_rtx = reg;
5041 	  }
5042 	break;
5043 
5044       case TLS_MODEL_LOCAL_DYNAMIC:
5045 	start_sequence ();
5046 	r2 = gen_rtx_REG (Pmode, 2);
5047 	tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM);
5048 	new_rtx = gen_rtx_CONST (Pmode, tls_call);
5049 	new_rtx = force_const_mem (Pmode, new_rtx);
5050 	emit_move_insn (r2, new_rtx);
5051 	s390_emit_tls_call_insn (r2, tls_call);
5052 	insn = get_insns ();
5053 	end_sequence ();
5054 
5055 	new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
5056 	temp = gen_reg_rtx (Pmode);
5057 	emit_libcall_block (insn, temp, r2, new_rtx);
5058 
5059 	new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5060 	base = gen_reg_rtx (Pmode);
5061 	s390_load_address (base, new_rtx);
5062 
5063 	new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF);
5064 	new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5065 	new_rtx = force_const_mem (Pmode, new_rtx);
5066 	temp = gen_reg_rtx (Pmode);
5067 	emit_move_insn (temp, new_rtx);
5068 
5069 	new_rtx = gen_rtx_PLUS (Pmode, base, temp);
5070 	if (reg != 0)
5071 	  {
5072 	    s390_load_address (reg, new_rtx);
5073 	    new_rtx = reg;
5074 	  }
5075 	break;
5076 
5077       case TLS_MODEL_INITIAL_EXEC:
5078 	if (flag_pic == 1)
5079 	  {
5080 	    /* Assume GOT offset < 4k.  This is handled the same way
5081 	       in both 31- and 64-bit code.  */
5082 
5083 	    if (reload_in_progress || reload_completed)
5084 	      df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
5085 
5086 	    new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
5087 	    new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5088 	    new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
5089 	    new_rtx = gen_const_mem (Pmode, new_rtx);
5090 	    temp = gen_reg_rtx (Pmode);
5091 	    emit_move_insn (temp, new_rtx);
5092 	  }
5093 	else if (TARGET_CPU_ZARCH)
5094 	  {
5095 	    /* If the GOT offset might be >= 4k, we determine the position
5096 	       of the GOT entry via a PC-relative LARL.  */
5097 
5098 	    new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
5099 	    new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5100 	    temp = gen_reg_rtx (Pmode);
5101 	    emit_move_insn (temp, new_rtx);
5102 
5103 	    new_rtx = gen_const_mem (Pmode, temp);
5104 	    temp = gen_reg_rtx (Pmode);
5105 	    emit_move_insn (temp, new_rtx);
5106 	  }
5107 	else if (flag_pic)
5108 	  {
5109 	    /* If the GOT offset might be >= 4k, we have to load it
5110 	       from the literal pool.  */
5111 
5112 	    if (reload_in_progress || reload_completed)
5113 	      df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
5114 
5115 	    new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
5116 	    new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5117 	    new_rtx = force_const_mem (Pmode, new_rtx);
5118 	    temp = gen_reg_rtx (Pmode);
5119 	    emit_move_insn (temp, new_rtx);
5120 
5121             new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
5122 	    new_rtx = gen_const_mem (Pmode, new_rtx);
5123 
5124 	    new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
5125 	    temp = gen_reg_rtx (Pmode);
5126 	    emit_insn (gen_rtx_SET (temp, new_rtx));
5127 	  }
5128 	else
5129 	  {
5130 	    /* In position-dependent code, load the absolute address of
5131 	       the GOT entry from the literal pool.  */
5132 
5133 	    new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
5134 	    new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5135 	    new_rtx = force_const_mem (Pmode, new_rtx);
5136 	    temp = gen_reg_rtx (Pmode);
5137 	    emit_move_insn (temp, new_rtx);
5138 
5139 	    new_rtx = temp;
5140 	    new_rtx = gen_const_mem (Pmode, new_rtx);
5141 	    new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
5142 	    temp = gen_reg_rtx (Pmode);
5143 	    emit_insn (gen_rtx_SET (temp, new_rtx));
5144 	  }
5145 
5146 	new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5147 	if (reg != 0)
5148 	  {
5149 	    s390_load_address (reg, new_rtx);
5150 	    new_rtx = reg;
5151 	  }
5152 	break;
5153 
5154       case TLS_MODEL_LOCAL_EXEC:
5155 	new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
5156 	new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5157 	new_rtx = force_const_mem (Pmode, new_rtx);
5158         temp = gen_reg_rtx (Pmode);
5159 	emit_move_insn (temp, new_rtx);
5160 
5161 	new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5162 	if (reg != 0)
5163 	  {
5164 	    s390_load_address (reg, new_rtx);
5165 	    new_rtx = reg;
5166 	  }
5167 	break;
5168 
5169       default:
5170 	gcc_unreachable ();
5171       }
5172 
5173   else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == UNSPEC)
5174     {
5175       switch (XINT (XEXP (addr, 0), 1))
5176 	{
5177 	case UNSPEC_INDNTPOFF:
5178 	  gcc_assert (TARGET_CPU_ZARCH);
5179 	  new_rtx = addr;
5180 	  break;
5181 
5182 	default:
5183 	  gcc_unreachable ();
5184 	}
5185     }
5186 
5187   else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
5188 	   && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
5189     {
5190       new_rtx = XEXP (XEXP (addr, 0), 0);
5191       if (GET_CODE (new_rtx) != SYMBOL_REF)
5192 	new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5193 
5194       new_rtx = legitimize_tls_address (new_rtx, reg);
5195       new_rtx = plus_constant (Pmode, new_rtx,
5196 			       INTVAL (XEXP (XEXP (addr, 0), 1)));
5197       new_rtx = force_operand (new_rtx, 0);
5198     }
5199 
5200   else
5201     gcc_unreachable ();  /* for now ... */
5202 
5203   return new_rtx;
5204 }
5205 
5206 /* Emit insns making the address in operands[1] valid for a standard
5207    move to operands[0].  operands[1] is replaced by an address which
5208    should be used instead of the former RTX to emit the move
5209    pattern.  */
5210 
5211 void
emit_symbolic_move(rtx * operands)5212 emit_symbolic_move (rtx *operands)
5213 {
5214   rtx temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
5215 
5216   if (GET_CODE (operands[0]) == MEM)
5217     operands[1] = force_reg (Pmode, operands[1]);
5218   else if (TLS_SYMBOLIC_CONST (operands[1]))
5219     operands[1] = legitimize_tls_address (operands[1], temp);
5220   else if (flag_pic)
5221     operands[1] = legitimize_pic_address (operands[1], temp);
5222 }
5223 
5224 /* Try machine-dependent ways of modifying an illegitimate address X
5225    to be legitimate.  If we find one, return the new, valid address.
5226 
5227    OLDX is the address as it was before break_out_memory_refs was called.
5228    In some cases it is useful to look at this to decide what needs to be done.
5229 
5230    MODE is the mode of the operand pointed to by X.
5231 
5232    When -fpic is used, special handling is needed for symbolic references.
5233    See comments by legitimize_pic_address for details.  */
5234 
5235 static rtx
s390_legitimize_address(rtx x,rtx oldx ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED)5236 s390_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
5237 			 machine_mode mode ATTRIBUTE_UNUSED)
5238 {
5239   rtx constant_term = const0_rtx;
5240 
5241   if (TLS_SYMBOLIC_CONST (x))
5242     {
5243       x = legitimize_tls_address (x, 0);
5244 
5245       if (s390_legitimate_address_p (mode, x, FALSE))
5246 	return x;
5247     }
5248   else if (GET_CODE (x) == PLUS
5249 	   && (TLS_SYMBOLIC_CONST (XEXP (x, 0))
5250 	       || TLS_SYMBOLIC_CONST (XEXP (x, 1))))
5251     {
5252       return x;
5253     }
5254   else if (flag_pic)
5255     {
5256       if (SYMBOLIC_CONST (x)
5257           || (GET_CODE (x) == PLUS
5258               && (SYMBOLIC_CONST (XEXP (x, 0))
5259                   || SYMBOLIC_CONST (XEXP (x, 1)))))
5260 	  x = legitimize_pic_address (x, 0);
5261 
5262       if (s390_legitimate_address_p (mode, x, FALSE))
5263 	return x;
5264     }
5265 
5266   x = eliminate_constant_term (x, &constant_term);
5267 
5268   /* Optimize loading of large displacements by splitting them
5269      into the multiple of 4K and the rest; this allows the
5270      former to be CSE'd if possible.
5271 
5272      Don't do this if the displacement is added to a register
5273      pointing into the stack frame, as the offsets will
5274      change later anyway.  */
5275 
5276   if (GET_CODE (constant_term) == CONST_INT
5277       && !TARGET_LONG_DISPLACEMENT
5278       && !DISP_IN_RANGE (INTVAL (constant_term))
5279       && !(REG_P (x) && REGNO_PTR_FRAME_P (REGNO (x))))
5280     {
5281       HOST_WIDE_INT lower = INTVAL (constant_term) & 0xfff;
5282       HOST_WIDE_INT upper = INTVAL (constant_term) ^ lower;
5283 
5284       rtx temp = gen_reg_rtx (Pmode);
5285       rtx val  = force_operand (GEN_INT (upper), temp);
5286       if (val != temp)
5287 	emit_move_insn (temp, val);
5288 
5289       x = gen_rtx_PLUS (Pmode, x, temp);
5290       constant_term = GEN_INT (lower);
5291     }
5292 
5293   if (GET_CODE (x) == PLUS)
5294     {
5295       if (GET_CODE (XEXP (x, 0)) == REG)
5296 	{
5297 	  rtx temp = gen_reg_rtx (Pmode);
5298 	  rtx val  = force_operand (XEXP (x, 1), temp);
5299 	  if (val != temp)
5300 	    emit_move_insn (temp, val);
5301 
5302 	  x = gen_rtx_PLUS (Pmode, XEXP (x, 0), temp);
5303 	}
5304 
5305       else if (GET_CODE (XEXP (x, 1)) == REG)
5306 	{
5307 	  rtx temp = gen_reg_rtx (Pmode);
5308 	  rtx val  = force_operand (XEXP (x, 0), temp);
5309 	  if (val != temp)
5310 	    emit_move_insn (temp, val);
5311 
5312 	  x = gen_rtx_PLUS (Pmode, temp, XEXP (x, 1));
5313 	}
5314     }
5315 
5316   if (constant_term != const0_rtx)
5317     x = gen_rtx_PLUS (Pmode, x, constant_term);
5318 
5319   return x;
5320 }
5321 
5322 /* Try a machine-dependent way of reloading an illegitimate address AD
5323    operand.  If we find one, push the reload and return the new address.
5324 
5325    MODE is the mode of the enclosing MEM.  OPNUM is the operand number
5326    and TYPE is the reload type of the current reload.  */
5327 
5328 rtx
legitimize_reload_address(rtx ad,machine_mode mode ATTRIBUTE_UNUSED,int opnum,int type)5329 legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
5330 			   int opnum, int type)
5331 {
5332   if (!optimize || TARGET_LONG_DISPLACEMENT)
5333     return NULL_RTX;
5334 
5335   if (GET_CODE (ad) == PLUS)
5336     {
5337       rtx tem = simplify_binary_operation (PLUS, Pmode,
5338 					   XEXP (ad, 0), XEXP (ad, 1));
5339       if (tem)
5340 	ad = tem;
5341     }
5342 
5343   if (GET_CODE (ad) == PLUS
5344       && GET_CODE (XEXP (ad, 0)) == REG
5345       && GET_CODE (XEXP (ad, 1)) == CONST_INT
5346       && !DISP_IN_RANGE (INTVAL (XEXP (ad, 1))))
5347     {
5348       HOST_WIDE_INT lower = INTVAL (XEXP (ad, 1)) & 0xfff;
5349       HOST_WIDE_INT upper = INTVAL (XEXP (ad, 1)) ^ lower;
5350       rtx cst, tem, new_rtx;
5351 
5352       cst = GEN_INT (upper);
5353       if (!legitimate_reload_constant_p (cst))
5354 	cst = force_const_mem (Pmode, cst);
5355 
5356       tem = gen_rtx_PLUS (Pmode, XEXP (ad, 0), cst);
5357       new_rtx = gen_rtx_PLUS (Pmode, tem, GEN_INT (lower));
5358 
5359       push_reload (XEXP (tem, 1), 0, &XEXP (tem, 1), 0,
5360 		   BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
5361 		   opnum, (enum reload_type) type);
5362       return new_rtx;
5363     }
5364 
5365   return NULL_RTX;
5366 }
5367 
5368 /* Emit code to move LEN bytes from DST to SRC.  */
5369 
5370 bool
s390_expand_movmem(rtx dst,rtx src,rtx len)5371 s390_expand_movmem (rtx dst, rtx src, rtx len)
5372 {
5373   /* When tuning for z10 or higher we rely on the Glibc functions to
5374      do the right thing. Only for constant lengths below 64k we will
5375      generate inline code.  */
5376   if (s390_tune >= PROCESSOR_2097_Z10
5377       && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5378     return false;
5379 
5380   /* Expand memcpy for constant length operands without a loop if it
5381      is shorter that way.
5382 
5383      With a constant length argument a
5384      memcpy loop (without pfd) is 36 bytes -> 6 * mvc  */
5385   if (GET_CODE (len) == CONST_INT
5386       && INTVAL (len) >= 0
5387       && INTVAL (len) <= 256 * 6
5388       && (!TARGET_MVCLE || INTVAL (len) <= 256))
5389     {
5390       HOST_WIDE_INT o, l;
5391 
5392       for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
5393 	{
5394 	  rtx newdst = adjust_address (dst, BLKmode, o);
5395 	  rtx newsrc = adjust_address (src, BLKmode, o);
5396 	  emit_insn (gen_movmem_short (newdst, newsrc,
5397 				       GEN_INT (l > 256 ? 255 : l - 1)));
5398 	}
5399     }
5400 
5401   else if (TARGET_MVCLE)
5402     {
5403       emit_insn (gen_movmem_long (dst, src, convert_to_mode (Pmode, len, 1)));
5404     }
5405 
5406   else
5407     {
5408       rtx dst_addr, src_addr, count, blocks, temp;
5409       rtx_code_label *loop_start_label = gen_label_rtx ();
5410       rtx_code_label *loop_end_label = gen_label_rtx ();
5411       rtx_code_label *end_label = gen_label_rtx ();
5412       machine_mode mode;
5413 
5414       mode = GET_MODE (len);
5415       if (mode == VOIDmode)
5416         mode = Pmode;
5417 
5418       dst_addr = gen_reg_rtx (Pmode);
5419       src_addr = gen_reg_rtx (Pmode);
5420       count = gen_reg_rtx (mode);
5421       blocks = gen_reg_rtx (mode);
5422 
5423       convert_move (count, len, 1);
5424       emit_cmp_and_jump_insns (count, const0_rtx,
5425 			       EQ, NULL_RTX, mode, 1, end_label);
5426 
5427       emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5428       emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX));
5429       dst = change_address (dst, VOIDmode, dst_addr);
5430       src = change_address (src, VOIDmode, src_addr);
5431 
5432       temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5433 			   OPTAB_DIRECT);
5434       if (temp != count)
5435         emit_move_insn (count, temp);
5436 
5437       temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5438 			   OPTAB_DIRECT);
5439       if (temp != blocks)
5440         emit_move_insn (blocks, temp);
5441 
5442       emit_cmp_and_jump_insns (blocks, const0_rtx,
5443 			       EQ, NULL_RTX, mode, 1, loop_end_label);
5444 
5445       emit_label (loop_start_label);
5446 
5447       if (TARGET_Z10
5448 	  && (GET_CODE (len) != CONST_INT || INTVAL (len) > 768))
5449 	{
5450 	  rtx prefetch;
5451 
5452 	  /* Issue a read prefetch for the +3 cache line.  */
5453 	  prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, src_addr, GEN_INT (768)),
5454 				   const0_rtx, const0_rtx);
5455 	  PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5456 	  emit_insn (prefetch);
5457 
5458 	  /* Issue a write prefetch for the +3 cache line.  */
5459 	  prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (768)),
5460 				   const1_rtx, const0_rtx);
5461 	  PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5462 	  emit_insn (prefetch);
5463 	}
5464 
5465       emit_insn (gen_movmem_short (dst, src, GEN_INT (255)));
5466       s390_load_address (dst_addr,
5467 			 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5468       s390_load_address (src_addr,
5469 			 gen_rtx_PLUS (Pmode, src_addr, GEN_INT (256)));
5470 
5471       temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5472 			   OPTAB_DIRECT);
5473       if (temp != blocks)
5474         emit_move_insn (blocks, temp);
5475 
5476       emit_cmp_and_jump_insns (blocks, const0_rtx,
5477 			       EQ, NULL_RTX, mode, 1, loop_end_label);
5478 
5479       emit_jump (loop_start_label);
5480       emit_label (loop_end_label);
5481 
5482       emit_insn (gen_movmem_short (dst, src,
5483 				   convert_to_mode (Pmode, count, 1)));
5484       emit_label (end_label);
5485     }
5486   return true;
5487 }
5488 
5489 /* Emit code to set LEN bytes at DST to VAL.
5490    Make use of clrmem if VAL is zero.  */
5491 
5492 void
s390_expand_setmem(rtx dst,rtx len,rtx val)5493 s390_expand_setmem (rtx dst, rtx len, rtx val)
5494 {
5495   if (GET_CODE (len) == CONST_INT && INTVAL (len) <= 0)
5496     return;
5497 
5498   gcc_assert (GET_CODE (val) == CONST_INT || GET_MODE (val) == QImode);
5499 
5500   /* Expand setmem/clrmem for a constant length operand without a
5501      loop if it will be shorter that way.
5502      With a constant length and without pfd argument a
5503      clrmem loop is 32 bytes -> 5.3 * xc
5504      setmem loop is 36 bytes -> 3.6 * (mvi/stc + mvc) */
5505   if (GET_CODE (len) == CONST_INT
5506       && ((INTVAL (len) <= 256 * 5 && val == const0_rtx)
5507 	  || INTVAL (len) <= 257 * 3)
5508       && (!TARGET_MVCLE || INTVAL (len) <= 256))
5509     {
5510       HOST_WIDE_INT o, l;
5511 
5512       if (val == const0_rtx)
5513 	/* clrmem: emit 256 byte blockwise XCs.  */
5514 	for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
5515 	  {
5516 	    rtx newdst = adjust_address (dst, BLKmode, o);
5517 	    emit_insn (gen_clrmem_short (newdst,
5518 					 GEN_INT (l > 256 ? 255 : l - 1)));
5519 	  }
5520       else
5521 	/* setmem: emit 1(mvi) + 256(mvc) byte blockwise memsets by
5522 	   setting first byte to val and using a 256 byte mvc with one
5523 	   byte overlap to propagate the byte.  */
5524 	for (l = INTVAL (len), o = 0; l > 0; l -= 257, o += 257)
5525 	  {
5526 	    rtx newdst = adjust_address (dst, BLKmode, o);
5527 	    emit_move_insn (adjust_address (dst, QImode, o), val);
5528 	    if (l > 1)
5529 	      {
5530 		rtx newdstp1 = adjust_address (dst, BLKmode, o + 1);
5531 		emit_insn (gen_movmem_short (newdstp1, newdst,
5532 					     GEN_INT (l > 257 ? 255 : l - 2)));
5533 	      }
5534 	  }
5535     }
5536 
5537   else if (TARGET_MVCLE)
5538     {
5539       val = force_not_mem (convert_modes (Pmode, QImode, val, 1));
5540       if (TARGET_64BIT)
5541 	emit_insn (gen_setmem_long_di (dst, convert_to_mode (Pmode, len, 1),
5542 				       val));
5543       else
5544 	emit_insn (gen_setmem_long_si (dst, convert_to_mode (Pmode, len, 1),
5545 				       val));
5546     }
5547 
5548   else
5549     {
5550       rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX;
5551       rtx_code_label *loop_start_label = gen_label_rtx ();
5552       rtx_code_label *onebyte_end_label = gen_label_rtx ();
5553       rtx_code_label *zerobyte_end_label = gen_label_rtx ();
5554       rtx_code_label *restbyte_end_label = gen_label_rtx ();
5555       machine_mode mode;
5556 
5557       mode = GET_MODE (len);
5558       if (mode == VOIDmode)
5559 	mode = Pmode;
5560 
5561       dst_addr = gen_reg_rtx (Pmode);
5562       count = gen_reg_rtx (mode);
5563       blocks = gen_reg_rtx (mode);
5564 
5565       convert_move (count, len, 1);
5566       emit_cmp_and_jump_insns (count, const0_rtx,
5567 			       EQ, NULL_RTX, mode, 1, zerobyte_end_label,
5568 			       profile_probability::very_unlikely ());
5569 
5570       /* We need to make a copy of the target address since memset is
5571 	 supposed to return it unmodified.  We have to make it here
5572 	 already since the new reg is used at onebyte_end_label.  */
5573       emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5574       dst = change_address (dst, VOIDmode, dst_addr);
5575 
5576       if (val != const0_rtx)
5577 	{
5578 	  /* When using the overlapping mvc the original target
5579 	     address is only accessed as single byte entity (even by
5580 	     the mvc reading this value).  */
5581 	  set_mem_size (dst, 1);
5582 	  dstp1 = adjust_address (dst, VOIDmode, 1);
5583 	  emit_cmp_and_jump_insns (count,
5584 				   const1_rtx, EQ, NULL_RTX, mode, 1,
5585 				   onebyte_end_label,
5586 				   profile_probability::very_unlikely ());
5587 	}
5588 
5589       /* There is one unconditional (mvi+mvc)/xc after the loop
5590 	 dealing with the rest of the bytes, subtracting two (mvi+mvc)
5591 	 or one (xc) here leaves this number of bytes to be handled by
5592 	 it.  */
5593       temp = expand_binop (mode, add_optab, count,
5594 			   val == const0_rtx ? constm1_rtx : GEN_INT (-2),
5595 			   count, 1, OPTAB_DIRECT);
5596       if (temp != count)
5597 	emit_move_insn (count, temp);
5598 
5599       temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5600 			   OPTAB_DIRECT);
5601       if (temp != blocks)
5602 	emit_move_insn (blocks, temp);
5603 
5604       emit_cmp_and_jump_insns (blocks, const0_rtx,
5605 			       EQ, NULL_RTX, mode, 1, restbyte_end_label);
5606 
5607       emit_jump (loop_start_label);
5608 
5609       if (val != const0_rtx)
5610 	{
5611 	  /* The 1 byte != 0 special case.  Not handled efficiently
5612 	     since we require two jumps for that.  However, this
5613 	     should be very rare.  */
5614 	  emit_label (onebyte_end_label);
5615 	  emit_move_insn (adjust_address (dst, QImode, 0), val);
5616 	  emit_jump (zerobyte_end_label);
5617 	}
5618 
5619       emit_label (loop_start_label);
5620 
5621       if (TARGET_Z10
5622 	  && (GET_CODE (len) != CONST_INT || INTVAL (len) > 1024))
5623 	{
5624 	  /* Issue a write prefetch for the +4 cache line.  */
5625 	  rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr,
5626 						     GEN_INT (1024)),
5627 				       const1_rtx, const0_rtx);
5628 	  emit_insn (prefetch);
5629 	  PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5630 	}
5631 
5632       if (val == const0_rtx)
5633 	emit_insn (gen_clrmem_short (dst, GEN_INT (255)));
5634       else
5635 	{
5636 	  /* Set the first byte in the block to the value and use an
5637 	     overlapping mvc for the block.  */
5638 	  emit_move_insn (adjust_address (dst, QImode, 0), val);
5639 	  emit_insn (gen_movmem_short (dstp1, dst, GEN_INT (254)));
5640 	}
5641       s390_load_address (dst_addr,
5642 			 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5643 
5644       temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5645 			   OPTAB_DIRECT);
5646       if (temp != blocks)
5647 	emit_move_insn (blocks, temp);
5648 
5649       emit_cmp_and_jump_insns (blocks, const0_rtx,
5650 			       NE, NULL_RTX, mode, 1, loop_start_label);
5651 
5652       emit_label (restbyte_end_label);
5653 
5654       if (val == const0_rtx)
5655 	emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1)));
5656       else
5657 	{
5658 	  /* Set the first byte in the block to the value and use an
5659 	     overlapping mvc for the block.  */
5660 	  emit_move_insn (adjust_address (dst, QImode, 0), val);
5661 	  /* execute only uses the lowest 8 bits of count that's
5662 	     exactly what we need here.  */
5663 	  emit_insn (gen_movmem_short (dstp1, dst,
5664 				       convert_to_mode (Pmode, count, 1)));
5665 	}
5666 
5667       emit_label (zerobyte_end_label);
5668     }
5669 }
5670 
5671 /* Emit code to compare LEN bytes at OP0 with those at OP1,
5672    and return the result in TARGET.  */
5673 
5674 bool
s390_expand_cmpmem(rtx target,rtx op0,rtx op1,rtx len)5675 s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
5676 {
5677   rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM);
5678   rtx tmp;
5679 
5680   /* When tuning for z10 or higher we rely on the Glibc functions to
5681      do the right thing. Only for constant lengths below 64k we will
5682      generate inline code.  */
5683   if (s390_tune >= PROCESSOR_2097_Z10
5684       && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5685     return false;
5686 
5687   /* As the result of CMPINT is inverted compared to what we need,
5688      we have to swap the operands.  */
5689   tmp = op0; op0 = op1; op1 = tmp;
5690 
5691   if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
5692     {
5693       if (INTVAL (len) > 0)
5694         {
5695           emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1)));
5696           emit_insn (gen_cmpint (target, ccreg));
5697         }
5698       else
5699         emit_move_insn (target, const0_rtx);
5700     }
5701   else if (TARGET_MVCLE)
5702     {
5703       emit_insn (gen_cmpmem_long (op0, op1, convert_to_mode (Pmode, len, 1)));
5704       emit_insn (gen_cmpint (target, ccreg));
5705     }
5706   else
5707     {
5708       rtx addr0, addr1, count, blocks, temp;
5709       rtx_code_label *loop_start_label = gen_label_rtx ();
5710       rtx_code_label *loop_end_label = gen_label_rtx ();
5711       rtx_code_label *end_label = gen_label_rtx ();
5712       machine_mode mode;
5713 
5714       mode = GET_MODE (len);
5715       if (mode == VOIDmode)
5716         mode = Pmode;
5717 
5718       addr0 = gen_reg_rtx (Pmode);
5719       addr1 = gen_reg_rtx (Pmode);
5720       count = gen_reg_rtx (mode);
5721       blocks = gen_reg_rtx (mode);
5722 
5723       convert_move (count, len, 1);
5724       emit_cmp_and_jump_insns (count, const0_rtx,
5725 			       EQ, NULL_RTX, mode, 1, end_label);
5726 
5727       emit_move_insn (addr0, force_operand (XEXP (op0, 0), NULL_RTX));
5728       emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX));
5729       op0 = change_address (op0, VOIDmode, addr0);
5730       op1 = change_address (op1, VOIDmode, addr1);
5731 
5732       temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5733 			   OPTAB_DIRECT);
5734       if (temp != count)
5735         emit_move_insn (count, temp);
5736 
5737       temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5738 			   OPTAB_DIRECT);
5739       if (temp != blocks)
5740         emit_move_insn (blocks, temp);
5741 
5742       emit_cmp_and_jump_insns (blocks, const0_rtx,
5743 			       EQ, NULL_RTX, mode, 1, loop_end_label);
5744 
5745       emit_label (loop_start_label);
5746 
5747       if (TARGET_Z10
5748 	  && (GET_CODE (len) != CONST_INT || INTVAL (len) > 512))
5749 	{
5750 	  rtx prefetch;
5751 
5752 	  /* Issue a read prefetch for the +2 cache line of operand 1.  */
5753 	  prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr0, GEN_INT (512)),
5754 				   const0_rtx, const0_rtx);
5755 	  emit_insn (prefetch);
5756 	  PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5757 
5758 	  /* Issue a read prefetch for the +2 cache line of operand 2.  */
5759 	  prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr1, GEN_INT (512)),
5760 				   const0_rtx, const0_rtx);
5761 	  emit_insn (prefetch);
5762 	  PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5763 	}
5764 
5765       emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (255)));
5766       temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
5767       temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5768 			gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx);
5769       temp = gen_rtx_SET (pc_rtx, temp);
5770       emit_jump_insn (temp);
5771 
5772       s390_load_address (addr0,
5773 			 gen_rtx_PLUS (Pmode, addr0, GEN_INT (256)));
5774       s390_load_address (addr1,
5775 			 gen_rtx_PLUS (Pmode, addr1, GEN_INT (256)));
5776 
5777       temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5778 			   OPTAB_DIRECT);
5779       if (temp != blocks)
5780         emit_move_insn (blocks, temp);
5781 
5782       emit_cmp_and_jump_insns (blocks, const0_rtx,
5783 			       EQ, NULL_RTX, mode, 1, loop_end_label);
5784 
5785       emit_jump (loop_start_label);
5786       emit_label (loop_end_label);
5787 
5788       emit_insn (gen_cmpmem_short (op0, op1,
5789 				   convert_to_mode (Pmode, count, 1)));
5790       emit_label (end_label);
5791 
5792       emit_insn (gen_cmpint (target, ccreg));
5793     }
5794   return true;
5795 }
5796 
5797 /* Emit a conditional jump to LABEL for condition code mask MASK using
5798    comparsion operator COMPARISON.  Return the emitted jump insn.  */
5799 
5800 static rtx_insn *
s390_emit_ccraw_jump(HOST_WIDE_INT mask,enum rtx_code comparison,rtx label)5801 s390_emit_ccraw_jump (HOST_WIDE_INT mask, enum rtx_code comparison, rtx label)
5802 {
5803   rtx temp;
5804 
5805   gcc_assert (comparison == EQ || comparison == NE);
5806   gcc_assert (mask > 0 && mask < 15);
5807 
5808   temp = gen_rtx_fmt_ee (comparison, VOIDmode,
5809 			 gen_rtx_REG (CCRAWmode, CC_REGNUM), GEN_INT (mask));
5810   temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5811 			       gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
5812   temp = gen_rtx_SET (pc_rtx, temp);
5813   return emit_jump_insn (temp);
5814 }
5815 
5816 /* Emit the instructions to implement strlen of STRING and store the
5817    result in TARGET.  The string has the known ALIGNMENT.  This
5818    version uses vector instructions and is therefore not appropriate
5819    for targets prior to z13.  */
5820 
5821 void
s390_expand_vec_strlen(rtx target,rtx string,rtx alignment)5822 s390_expand_vec_strlen (rtx target, rtx string, rtx alignment)
5823 {
5824   rtx highest_index_to_load_reg = gen_reg_rtx (Pmode);
5825   rtx str_reg = gen_reg_rtx (V16QImode);
5826   rtx str_addr_base_reg = gen_reg_rtx (Pmode);
5827   rtx str_idx_reg = gen_reg_rtx (Pmode);
5828   rtx result_reg = gen_reg_rtx (V16QImode);
5829   rtx is_aligned_label = gen_label_rtx ();
5830   rtx into_loop_label = NULL_RTX;
5831   rtx loop_start_label = gen_label_rtx ();
5832   rtx temp;
5833   rtx len = gen_reg_rtx (QImode);
5834   rtx cond;
5835 
5836   s390_load_address (str_addr_base_reg, XEXP (string, 0));
5837   emit_move_insn (str_idx_reg, const0_rtx);
5838 
5839   if (INTVAL (alignment) < 16)
5840     {
5841       /* Check whether the address happens to be aligned properly so
5842 	 jump directly to the aligned loop.  */
5843       emit_cmp_and_jump_insns (gen_rtx_AND (Pmode,
5844 					    str_addr_base_reg, GEN_INT (15)),
5845 			       const0_rtx, EQ, NULL_RTX,
5846 			       Pmode, 1, is_aligned_label);
5847 
5848       temp = gen_reg_rtx (Pmode);
5849       temp = expand_binop (Pmode, and_optab, str_addr_base_reg,
5850 			   GEN_INT (15), temp, 1, OPTAB_DIRECT);
5851       gcc_assert (REG_P (temp));
5852       highest_index_to_load_reg =
5853 	expand_binop (Pmode, sub_optab, GEN_INT (15), temp,
5854 		      highest_index_to_load_reg, 1, OPTAB_DIRECT);
5855       gcc_assert (REG_P (highest_index_to_load_reg));
5856       emit_insn (gen_vllv16qi (str_reg,
5857 		   convert_to_mode (SImode, highest_index_to_load_reg, 1),
5858 		   gen_rtx_MEM (BLKmode, str_addr_base_reg)));
5859 
5860       into_loop_label = gen_label_rtx ();
5861       s390_emit_jump (into_loop_label, NULL_RTX);
5862       emit_barrier ();
5863     }
5864 
5865   emit_label (is_aligned_label);
5866   LABEL_NUSES (is_aligned_label) = INTVAL (alignment) < 16 ? 2 : 1;
5867 
5868   /* Reaching this point we are only performing 16 bytes aligned
5869      loads.  */
5870   emit_move_insn (highest_index_to_load_reg, GEN_INT (15));
5871 
5872   emit_label (loop_start_label);
5873   LABEL_NUSES (loop_start_label) = 1;
5874 
5875   /* Load 16 bytes of the string into VR.  */
5876   emit_move_insn (str_reg,
5877 		  gen_rtx_MEM (V16QImode,
5878 			       gen_rtx_PLUS (Pmode, str_idx_reg,
5879 					     str_addr_base_reg)));
5880   if (into_loop_label != NULL_RTX)
5881     {
5882       emit_label (into_loop_label);
5883       LABEL_NUSES (into_loop_label) = 1;
5884     }
5885 
5886   /* Increment string index by 16 bytes.  */
5887   expand_binop (Pmode, add_optab, str_idx_reg, GEN_INT (16),
5888 		str_idx_reg, 1, OPTAB_DIRECT);
5889 
5890   emit_insn (gen_vec_vfenesv16qi (result_reg, str_reg, str_reg,
5891 				  GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
5892 
5893   add_int_reg_note (s390_emit_ccraw_jump (8, NE, loop_start_label),
5894 		    REG_BR_PROB,
5895 		    profile_probability::very_likely ().to_reg_br_prob_note ());
5896   emit_insn (gen_vec_extractv16qiqi (len, result_reg, GEN_INT (7)));
5897 
5898   /* If the string pointer wasn't aligned we have loaded less then 16
5899      bytes and the remaining bytes got filled with zeros (by vll).
5900      Now we have to check whether the resulting index lies within the
5901      bytes actually part of the string.  */
5902 
5903   cond = s390_emit_compare (GT, convert_to_mode (Pmode, len, 1),
5904 			    highest_index_to_load_reg);
5905   s390_load_address (highest_index_to_load_reg,
5906 		     gen_rtx_PLUS (Pmode, highest_index_to_load_reg,
5907 				   const1_rtx));
5908   if (TARGET_64BIT)
5909     emit_insn (gen_movdicc (str_idx_reg, cond,
5910 			    highest_index_to_load_reg, str_idx_reg));
5911   else
5912     emit_insn (gen_movsicc (str_idx_reg, cond,
5913 			    highest_index_to_load_reg, str_idx_reg));
5914 
5915   add_reg_br_prob_note (s390_emit_jump (is_aligned_label, cond),
5916 		        profile_probability::very_unlikely ());
5917 
5918   expand_binop (Pmode, add_optab, str_idx_reg,
5919 		GEN_INT (-16), str_idx_reg, 1, OPTAB_DIRECT);
5920   /* FIXME: len is already zero extended - so avoid the llgcr emitted
5921      here.  */
5922   temp = expand_binop (Pmode, add_optab, str_idx_reg,
5923 		       convert_to_mode (Pmode, len, 1),
5924 		       target, 1, OPTAB_DIRECT);
5925   if (temp != target)
5926     emit_move_insn (target, temp);
5927 }
5928 
5929 void
s390_expand_vec_movstr(rtx result,rtx dst,rtx src)5930 s390_expand_vec_movstr (rtx result, rtx dst, rtx src)
5931 {
5932   rtx temp = gen_reg_rtx (Pmode);
5933   rtx src_addr = XEXP (src, 0);
5934   rtx dst_addr = XEXP (dst, 0);
5935   rtx src_addr_reg = gen_reg_rtx (Pmode);
5936   rtx dst_addr_reg = gen_reg_rtx (Pmode);
5937   rtx offset = gen_reg_rtx (Pmode);
5938   rtx vsrc = gen_reg_rtx (V16QImode);
5939   rtx vpos = gen_reg_rtx (V16QImode);
5940   rtx loadlen = gen_reg_rtx (SImode);
5941   rtx gpos_qi = gen_reg_rtx(QImode);
5942   rtx gpos = gen_reg_rtx (SImode);
5943   rtx done_label = gen_label_rtx ();
5944   rtx loop_label = gen_label_rtx ();
5945   rtx exit_label = gen_label_rtx ();
5946   rtx full_label = gen_label_rtx ();
5947 
5948   /* Perform a quick check for string ending on the first up to 16
5949      bytes and exit early if successful.  */
5950 
5951   emit_insn (gen_vlbb (vsrc, src, GEN_INT (6)));
5952   emit_insn (gen_lcbb (loadlen, src_addr, GEN_INT (6)));
5953   emit_insn (gen_vfenezv16qi (vpos, vsrc, vsrc));
5954   emit_insn (gen_vec_extractv16qiqi (gpos_qi, vpos, GEN_INT (7)));
5955   emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
5956   /* gpos is the byte index if a zero was found and 16 otherwise.
5957      So if it is lower than the loaded bytes we have a hit.  */
5958   emit_cmp_and_jump_insns (gpos, loadlen, GE, NULL_RTX, SImode, 1,
5959 			   full_label);
5960   emit_insn (gen_vstlv16qi (vsrc, gpos, dst));
5961 
5962   force_expand_binop (Pmode, add_optab, dst_addr, gpos, result,
5963 		      1, OPTAB_DIRECT);
5964   emit_jump (exit_label);
5965   emit_barrier ();
5966 
5967   emit_label (full_label);
5968   LABEL_NUSES (full_label) = 1;
5969 
5970   /* Calculate `offset' so that src + offset points to the last byte
5971      before 16 byte alignment.  */
5972 
5973   /* temp = src_addr & 0xf */
5974   force_expand_binop (Pmode, and_optab, src_addr, GEN_INT (15), temp,
5975 		      1, OPTAB_DIRECT);
5976 
5977   /* offset = 0xf - temp */
5978   emit_move_insn (offset, GEN_INT (15));
5979   force_expand_binop (Pmode, sub_optab, offset, temp, offset,
5980 		      1, OPTAB_DIRECT);
5981 
5982   /* Store `offset' bytes in the dstination string.  The quick check
5983      has loaded at least `offset' bytes into vsrc.  */
5984 
5985   emit_insn (gen_vstlv16qi (vsrc, gen_lowpart (SImode, offset), dst));
5986 
5987   /* Advance to the next byte to be loaded.  */
5988   force_expand_binop (Pmode, add_optab, offset, const1_rtx, offset,
5989 		      1, OPTAB_DIRECT);
5990 
5991   /* Make sure the addresses are single regs which can be used as a
5992      base.  */
5993   emit_move_insn (src_addr_reg, src_addr);
5994   emit_move_insn (dst_addr_reg, dst_addr);
5995 
5996   /* MAIN LOOP */
5997 
5998   emit_label (loop_label);
5999   LABEL_NUSES (loop_label) = 1;
6000 
6001   emit_move_insn (vsrc,
6002 		  gen_rtx_MEM (V16QImode,
6003 			       gen_rtx_PLUS (Pmode, src_addr_reg, offset)));
6004 
6005   emit_insn (gen_vec_vfenesv16qi (vpos, vsrc, vsrc,
6006 				  GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
6007   add_int_reg_note (s390_emit_ccraw_jump (8, EQ, done_label),
6008 		    REG_BR_PROB, profile_probability::very_unlikely ()
6009 				  .to_reg_br_prob_note ());
6010 
6011   emit_move_insn (gen_rtx_MEM (V16QImode,
6012 			       gen_rtx_PLUS (Pmode, dst_addr_reg, offset)),
6013 		  vsrc);
6014   /* offset += 16 */
6015   force_expand_binop (Pmode, add_optab, offset, GEN_INT (16),
6016 		      offset,  1, OPTAB_DIRECT);
6017 
6018   emit_jump (loop_label);
6019   emit_barrier ();
6020 
6021   /* REGULAR EXIT */
6022 
6023   /* We are done.  Add the offset of the zero character to the dst_addr
6024      pointer to get the result.  */
6025 
6026   emit_label (done_label);
6027   LABEL_NUSES (done_label) = 1;
6028 
6029   force_expand_binop (Pmode, add_optab, dst_addr_reg, offset, dst_addr_reg,
6030 		      1, OPTAB_DIRECT);
6031 
6032   emit_insn (gen_vec_extractv16qiqi (gpos_qi, vpos, GEN_INT (7)));
6033   emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
6034 
6035   emit_insn (gen_vstlv16qi (vsrc, gpos, gen_rtx_MEM (BLKmode, dst_addr_reg)));
6036 
6037   force_expand_binop (Pmode, add_optab, dst_addr_reg, gpos, result,
6038 		      1, OPTAB_DIRECT);
6039 
6040   /* EARLY EXIT */
6041 
6042   emit_label (exit_label);
6043   LABEL_NUSES (exit_label) = 1;
6044 }
6045 
6046 
6047 /* Expand conditional increment or decrement using alc/slb instructions.
6048    Should generate code setting DST to either SRC or SRC + INCREMENT,
6049    depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
6050    Returns true if successful, false otherwise.
6051 
6052    That makes it possible to implement some if-constructs without jumps e.g.:
6053    (borrow = CC0 | CC1 and carry = CC2 | CC3)
6054    unsigned int a, b, c;
6055    if (a < b)  c++; -> CCU  b > a  -> CC2;    c += carry;
6056    if (a < b)  c--; -> CCL3 a - b  -> borrow; c -= borrow;
6057    if (a <= b) c++; -> CCL3 b - a  -> borrow; c += carry;
6058    if (a <= b) c--; -> CCU  a <= b -> borrow; c -= borrow;
6059 
6060    Checks for EQ and NE with a nonzero value need an additional xor e.g.:
6061    if (a == b) c++; -> CCL3 a ^= b; 0 - a  -> borrow;    c += carry;
6062    if (a == b) c--; -> CCU  a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
6063    if (a != b) c++; -> CCU  a ^= b; a > 0  -> CC2;       c += carry;
6064    if (a != b) c--; -> CCL3 a ^= b; 0 - a  -> borrow;    c -= borrow; */
6065 
6066 bool
s390_expand_addcc(enum rtx_code cmp_code,rtx cmp_op0,rtx cmp_op1,rtx dst,rtx src,rtx increment)6067 s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1,
6068 		   rtx dst, rtx src, rtx increment)
6069 {
6070   machine_mode cmp_mode;
6071   machine_mode cc_mode;
6072   rtx op_res;
6073   rtx insn;
6074   rtvec p;
6075   int ret;
6076 
6077   if ((GET_MODE (cmp_op0) == SImode || GET_MODE (cmp_op0) == VOIDmode)
6078       && (GET_MODE (cmp_op1) == SImode || GET_MODE (cmp_op1) == VOIDmode))
6079     cmp_mode = SImode;
6080   else if ((GET_MODE (cmp_op0) == DImode || GET_MODE (cmp_op0) == VOIDmode)
6081 	   && (GET_MODE (cmp_op1) == DImode || GET_MODE (cmp_op1) == VOIDmode))
6082     cmp_mode = DImode;
6083   else
6084     return false;
6085 
6086   /* Try ADD LOGICAL WITH CARRY.  */
6087   if (increment == const1_rtx)
6088     {
6089       /* Determine CC mode to use.  */
6090       if (cmp_code == EQ || cmp_code == NE)
6091 	{
6092 	  if (cmp_op1 != const0_rtx)
6093 	    {
6094 	      cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
6095 					     NULL_RTX, 0, OPTAB_WIDEN);
6096 	      cmp_op1 = const0_rtx;
6097 	    }
6098 
6099 	  cmp_code = cmp_code == EQ ? LEU : GTU;
6100 	}
6101 
6102       if (cmp_code == LTU || cmp_code == LEU)
6103 	{
6104 	  rtx tem = cmp_op0;
6105 	  cmp_op0 = cmp_op1;
6106 	  cmp_op1 = tem;
6107 	  cmp_code = swap_condition (cmp_code);
6108 	}
6109 
6110       switch (cmp_code)
6111 	{
6112 	  case GTU:
6113 	    cc_mode = CCUmode;
6114 	    break;
6115 
6116 	  case GEU:
6117 	    cc_mode = CCL3mode;
6118 	    break;
6119 
6120 	  default:
6121 	    return false;
6122 	}
6123 
6124       /* Emit comparison instruction pattern. */
6125       if (!register_operand (cmp_op0, cmp_mode))
6126 	cmp_op0 = force_reg (cmp_mode, cmp_op0);
6127 
6128       insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
6129 			  gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
6130       /* We use insn_invalid_p here to add clobbers if required.  */
6131       ret = insn_invalid_p (emit_insn (insn), false);
6132       gcc_assert (!ret);
6133 
6134       /* Emit ALC instruction pattern.  */
6135       op_res = gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
6136 			       gen_rtx_REG (cc_mode, CC_REGNUM),
6137 			       const0_rtx);
6138 
6139       if (src != const0_rtx)
6140 	{
6141 	  if (!register_operand (src, GET_MODE (dst)))
6142 	    src = force_reg (GET_MODE (dst), src);
6143 
6144 	  op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, src);
6145 	  op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, const0_rtx);
6146 	}
6147 
6148       p = rtvec_alloc (2);
6149       RTVEC_ELT (p, 0) =
6150         gen_rtx_SET (dst, op_res);
6151       RTVEC_ELT (p, 1) =
6152 	gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6153       emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
6154 
6155       return true;
6156     }
6157 
6158   /* Try SUBTRACT LOGICAL WITH BORROW.  */
6159   if (increment == constm1_rtx)
6160     {
6161       /* Determine CC mode to use.  */
6162       if (cmp_code == EQ || cmp_code == NE)
6163 	{
6164 	  if (cmp_op1 != const0_rtx)
6165 	    {
6166 	      cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
6167 					     NULL_RTX, 0, OPTAB_WIDEN);
6168 	      cmp_op1 = const0_rtx;
6169 	    }
6170 
6171 	  cmp_code = cmp_code == EQ ? LEU : GTU;
6172 	}
6173 
6174       if (cmp_code == GTU || cmp_code == GEU)
6175 	{
6176 	  rtx tem = cmp_op0;
6177 	  cmp_op0 = cmp_op1;
6178 	  cmp_op1 = tem;
6179 	  cmp_code = swap_condition (cmp_code);
6180 	}
6181 
6182       switch (cmp_code)
6183 	{
6184 	  case LEU:
6185 	    cc_mode = CCUmode;
6186 	    break;
6187 
6188 	  case LTU:
6189 	    cc_mode = CCL3mode;
6190 	    break;
6191 
6192 	  default:
6193 	    return false;
6194 	}
6195 
6196       /* Emit comparison instruction pattern. */
6197       if (!register_operand (cmp_op0, cmp_mode))
6198 	cmp_op0 = force_reg (cmp_mode, cmp_op0);
6199 
6200       insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
6201 			  gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
6202       /* We use insn_invalid_p here to add clobbers if required.  */
6203       ret = insn_invalid_p (emit_insn (insn), false);
6204       gcc_assert (!ret);
6205 
6206       /* Emit SLB instruction pattern.  */
6207       if (!register_operand (src, GET_MODE (dst)))
6208 	src = force_reg (GET_MODE (dst), src);
6209 
6210       op_res = gen_rtx_MINUS (GET_MODE (dst),
6211 			      gen_rtx_MINUS (GET_MODE (dst), src, const0_rtx),
6212 			      gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
6213 					      gen_rtx_REG (cc_mode, CC_REGNUM),
6214 					      const0_rtx));
6215       p = rtvec_alloc (2);
6216       RTVEC_ELT (p, 0) =
6217         gen_rtx_SET (dst, op_res);
6218       RTVEC_ELT (p, 1) =
6219 	gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6220       emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
6221 
6222       return true;
6223     }
6224 
6225   return false;
6226 }
6227 
6228 /* Expand code for the insv template. Return true if successful.  */
6229 
6230 bool
s390_expand_insv(rtx dest,rtx op1,rtx op2,rtx src)6231 s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
6232 {
6233   int bitsize = INTVAL (op1);
6234   int bitpos = INTVAL (op2);
6235   machine_mode mode = GET_MODE (dest);
6236   machine_mode smode;
6237   int smode_bsize, mode_bsize;
6238   rtx op, clobber;
6239 
6240   if (bitsize + bitpos > GET_MODE_BITSIZE (mode))
6241     return false;
6242 
6243   /* Generate INSERT IMMEDIATE (IILL et al).  */
6244   /* (set (ze (reg)) (const_int)).  */
6245   if (TARGET_ZARCH
6246       && register_operand (dest, word_mode)
6247       && (bitpos % 16) == 0
6248       && (bitsize % 16) == 0
6249       && const_int_operand (src, VOIDmode))
6250     {
6251       HOST_WIDE_INT val = INTVAL (src);
6252       int regpos = bitpos + bitsize;
6253 
6254       while (regpos > bitpos)
6255 	{
6256 	  machine_mode putmode;
6257 	  int putsize;
6258 
6259 	  if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32))
6260 	    putmode = SImode;
6261 	  else
6262 	    putmode = HImode;
6263 
6264 	  putsize = GET_MODE_BITSIZE (putmode);
6265 	  regpos -= putsize;
6266 	  emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6267 						GEN_INT (putsize),
6268 						GEN_INT (regpos)),
6269 			  gen_int_mode (val, putmode));
6270 	  val >>= putsize;
6271 	}
6272       gcc_assert (regpos == bitpos);
6273       return true;
6274     }
6275 
6276   smode = smallest_int_mode_for_size (bitsize);
6277   smode_bsize = GET_MODE_BITSIZE (smode);
6278   mode_bsize = GET_MODE_BITSIZE (mode);
6279 
6280   /* Generate STORE CHARACTERS UNDER MASK (STCM et al).  */
6281   if (bitpos == 0
6282       && (bitsize % BITS_PER_UNIT) == 0
6283       && MEM_P (dest)
6284       && (register_operand (src, word_mode)
6285 	  || const_int_operand (src, VOIDmode)))
6286     {
6287       /* Emit standard pattern if possible.  */
6288       if (smode_bsize == bitsize)
6289 	{
6290 	  emit_move_insn (adjust_address (dest, smode, 0),
6291 			  gen_lowpart (smode, src));
6292 	  return true;
6293 	}
6294 
6295       /* (set (ze (mem)) (const_int)).  */
6296       else if (const_int_operand (src, VOIDmode))
6297 	{
6298 	  int size = bitsize / BITS_PER_UNIT;
6299 	  rtx src_mem = adjust_address (force_const_mem (word_mode, src),
6300 					BLKmode,
6301 					UNITS_PER_WORD - size);
6302 
6303 	  dest = adjust_address (dest, BLKmode, 0);
6304 	  set_mem_size (dest, size);
6305 	  s390_expand_movmem (dest, src_mem, GEN_INT (size));
6306 	  return true;
6307 	}
6308 
6309       /* (set (ze (mem)) (reg)).  */
6310       else if (register_operand (src, word_mode))
6311 	{
6312 	  if (bitsize <= 32)
6313 	    emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1,
6314 						  const0_rtx), src);
6315 	  else
6316 	    {
6317 	      /* Emit st,stcmh sequence.  */
6318 	      int stcmh_width = bitsize - 32;
6319 	      int size = stcmh_width / BITS_PER_UNIT;
6320 
6321 	      emit_move_insn (adjust_address (dest, SImode, size),
6322 			      gen_lowpart (SImode, src));
6323 	      set_mem_size (dest, size);
6324 	      emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6325 						    GEN_INT (stcmh_width),
6326 						    const0_rtx),
6327 			      gen_rtx_LSHIFTRT (word_mode, src, GEN_INT (32)));
6328 	    }
6329 	  return true;
6330 	}
6331     }
6332 
6333   /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al).  */
6334   if ((bitpos % BITS_PER_UNIT) == 0
6335       && (bitsize % BITS_PER_UNIT) == 0
6336       && (bitpos & 32) == ((bitpos + bitsize - 1) & 32)
6337       && MEM_P (src)
6338       && (mode == DImode || mode == SImode)
6339       && register_operand (dest, mode))
6340     {
6341       /* Emit a strict_low_part pattern if possible.  */
6342       if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
6343 	{
6344 	  op = gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (smode, dest));
6345 	  op = gen_rtx_SET (op, gen_lowpart (smode, src));
6346 	  clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6347 	  emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)));
6348 	  return true;
6349 	}
6350 
6351       /* ??? There are more powerful versions of ICM that are not
6352 	 completely represented in the md file.  */
6353     }
6354 
6355   /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al).  */
6356   if (TARGET_Z10 && (mode == DImode || mode == SImode))
6357     {
6358       machine_mode mode_s = GET_MODE (src);
6359 
6360       if (CONSTANT_P (src))
6361 	{
6362 	  /* For constant zero values the representation with AND
6363 	     appears to be folded in more situations than the (set
6364 	     (zero_extract) ...).
6365 	     We only do this when the start and end of the bitfield
6366 	     remain in the same SImode chunk.  That way nihf or nilf
6367 	     can be used.
6368 	     The AND patterns might still generate a risbg for this.  */
6369 	  if (src == const0_rtx && bitpos / 32  == (bitpos + bitsize - 1) / 32)
6370 	    return false;
6371 	  else
6372 	    src = force_reg (mode, src);
6373 	}
6374       else if (mode_s != mode)
6375 	{
6376 	  gcc_assert (GET_MODE_BITSIZE (mode_s) >= bitsize);
6377 	  src = force_reg (mode_s, src);
6378 	  src = gen_lowpart (mode, src);
6379 	}
6380 
6381       op = gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2),
6382       op = gen_rtx_SET (op, src);
6383 
6384       if (!TARGET_ZEC12)
6385 	{
6386 	  clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6387 	  op = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber));
6388 	}
6389       emit_insn (op);
6390 
6391       return true;
6392     }
6393 
6394   return false;
6395 }
6396 
6397 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
6398    register that holds VAL of mode MODE shifted by COUNT bits.  */
6399 
6400 static inline rtx
s390_expand_mask_and_shift(rtx val,machine_mode mode,rtx count)6401 s390_expand_mask_and_shift (rtx val, machine_mode mode, rtx count)
6402 {
6403   val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
6404 			     NULL_RTX, 1, OPTAB_DIRECT);
6405   return expand_simple_binop (SImode, ASHIFT, val, count,
6406 			      NULL_RTX, 1, OPTAB_DIRECT);
6407 }
6408 
6409 /* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
6410    the result in TARGET.  */
6411 
6412 void
s390_expand_vec_compare(rtx target,enum rtx_code cond,rtx cmp_op1,rtx cmp_op2)6413 s390_expand_vec_compare (rtx target, enum rtx_code cond,
6414 			 rtx cmp_op1, rtx cmp_op2)
6415 {
6416   machine_mode mode = GET_MODE (target);
6417   bool neg_p = false, swap_p = false;
6418   rtx tmp;
6419 
6420   if (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_VECTOR_FLOAT)
6421     {
6422       switch (cond)
6423 	{
6424 	  /* NE a != b -> !(a == b) */
6425 	case NE:   cond = EQ; neg_p = true;                break;
6426 	  /* UNGT a u> b -> !(b >= a) */
6427 	case UNGT: cond = GE; neg_p = true; swap_p = true; break;
6428 	  /* UNGE a u>= b -> !(b > a) */
6429 	case UNGE: cond = GT; neg_p = true; swap_p = true; break;
6430 	  /* LE: a <= b -> b >= a */
6431 	case LE:   cond = GE;               swap_p = true; break;
6432 	  /* UNLE: a u<= b -> !(a > b) */
6433 	case UNLE: cond = GT; neg_p = true;                break;
6434 	  /* LT: a < b -> b > a */
6435 	case LT:   cond = GT;               swap_p = true; break;
6436 	  /* UNLT: a u< b -> !(a >= b) */
6437 	case UNLT: cond = GE; neg_p = true;                break;
6438 	case UNEQ:
6439 	  emit_insn (gen_vec_cmpuneq (target, cmp_op1, cmp_op2));
6440 	  return;
6441 	case LTGT:
6442 	  emit_insn (gen_vec_cmpltgt (target, cmp_op1, cmp_op2));
6443 	  return;
6444 	case ORDERED:
6445 	  emit_insn (gen_vec_ordered (target, cmp_op1, cmp_op2));
6446 	  return;
6447 	case UNORDERED:
6448 	  emit_insn (gen_vec_unordered (target, cmp_op1, cmp_op2));
6449 	  return;
6450 	default: break;
6451 	}
6452     }
6453   else
6454     {
6455       switch (cond)
6456 	{
6457 	  /* NE: a != b -> !(a == b) */
6458 	case NE:  cond = EQ;  neg_p = true;                break;
6459 	  /* GE: a >= b -> !(b > a) */
6460 	case GE:  cond = GT;  neg_p = true; swap_p = true; break;
6461 	  /* GEU: a >= b -> !(b > a) */
6462 	case GEU: cond = GTU; neg_p = true; swap_p = true; break;
6463 	  /* LE: a <= b -> !(a > b) */
6464 	case LE:  cond = GT;  neg_p = true;                break;
6465 	  /* LEU: a <= b -> !(a > b) */
6466 	case LEU: cond = GTU; neg_p = true;                break;
6467 	  /* LT: a < b -> b > a */
6468 	case LT:  cond = GT;                swap_p = true; break;
6469 	  /* LTU: a < b -> b > a */
6470 	case LTU: cond = GTU;               swap_p = true; break;
6471 	default: break;
6472 	}
6473     }
6474 
6475   if (swap_p)
6476     {
6477       tmp = cmp_op1; cmp_op1 = cmp_op2; cmp_op2 = tmp;
6478     }
6479 
6480   emit_insn (gen_rtx_SET (target, gen_rtx_fmt_ee (cond,
6481 						  mode,
6482 						  cmp_op1, cmp_op2)));
6483   if (neg_p)
6484     emit_insn (gen_rtx_SET (target, gen_rtx_NOT (mode, target)));
6485 }
6486 
6487 /* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into
6488    TARGET if either all (ALL_P is true) or any (ALL_P is false) of the
6489    elements in CMP1 and CMP2 fulfill the comparison.
6490    This function is only used to emit patterns for the vx builtins and
6491    therefore only handles comparison codes required by the
6492    builtins.  */
6493 void
s390_expand_vec_compare_cc(rtx target,enum rtx_code code,rtx cmp1,rtx cmp2,bool all_p)6494 s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
6495 			    rtx cmp1, rtx cmp2, bool all_p)
6496 {
6497   machine_mode cc_producer_mode, cc_consumer_mode, scratch_mode;
6498   rtx tmp_reg = gen_reg_rtx (SImode);
6499   bool swap_p = false;
6500 
6501   if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_INT)
6502     {
6503       switch (code)
6504 	{
6505 	case EQ:
6506 	case NE:
6507 	  cc_producer_mode = CCVEQmode;
6508 	  break;
6509 	case GE:
6510 	case LT:
6511 	  code = swap_condition (code);
6512 	  swap_p = true;
6513 	  /* fallthrough */
6514 	case GT:
6515 	case LE:
6516 	  cc_producer_mode = CCVIHmode;
6517 	  break;
6518 	case GEU:
6519 	case LTU:
6520 	  code = swap_condition (code);
6521 	  swap_p = true;
6522 	  /* fallthrough */
6523 	case GTU:
6524 	case LEU:
6525 	  cc_producer_mode = CCVIHUmode;
6526 	  break;
6527 	default:
6528 	  gcc_unreachable ();
6529 	}
6530 
6531       scratch_mode = GET_MODE (cmp1);
6532       /* These codes represent inverted CC interpretations.  Inverting
6533 	 an ALL CC mode results in an ANY CC mode and the other way
6534 	 around.  Invert the all_p flag here to compensate for
6535 	 that.  */
6536       if (code == NE || code == LE || code == LEU)
6537 	all_p = !all_p;
6538 
6539       cc_consumer_mode = all_p ? CCVIALLmode : CCVIANYmode;
6540     }
6541   else if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_FLOAT)
6542     {
6543       bool inv_p = false;
6544 
6545       switch (code)
6546 	{
6547 	case EQ:   cc_producer_mode = CCVEQmode;  break;
6548 	case NE:   cc_producer_mode = CCVEQmode;  inv_p = true; break;
6549 	case GT:   cc_producer_mode = CCVFHmode;  break;
6550 	case GE:   cc_producer_mode = CCVFHEmode; break;
6551 	case UNLE: cc_producer_mode = CCVFHmode;  inv_p = true; break;
6552 	case UNLT: cc_producer_mode = CCVFHEmode; inv_p = true; break;
6553 	case LT:   cc_producer_mode = CCVFHmode;  code = GT; swap_p = true; break;
6554 	case LE:   cc_producer_mode = CCVFHEmode; code = GE; swap_p = true; break;
6555 	default: gcc_unreachable ();
6556 	}
6557       scratch_mode = mode_for_int_vector (GET_MODE (cmp1)).require ();
6558 
6559       if (inv_p)
6560 	all_p = !all_p;
6561 
6562       cc_consumer_mode = all_p ? CCVFALLmode : CCVFANYmode;
6563     }
6564   else
6565     gcc_unreachable ();
6566 
6567   if (swap_p)
6568     {
6569       rtx tmp = cmp2;
6570       cmp2 = cmp1;
6571       cmp1 = tmp;
6572     }
6573 
6574   emit_insn (gen_rtx_PARALLEL (VOIDmode,
6575 	       gen_rtvec (2, gen_rtx_SET (
6576 			       gen_rtx_REG (cc_producer_mode, CC_REGNUM),
6577 			       gen_rtx_COMPARE (cc_producer_mode, cmp1, cmp2)),
6578 			  gen_rtx_CLOBBER (VOIDmode,
6579 					   gen_rtx_SCRATCH (scratch_mode)))));
6580   emit_move_insn (target, const0_rtx);
6581   emit_move_insn (tmp_reg, const1_rtx);
6582 
6583   emit_move_insn (target,
6584 		  gen_rtx_IF_THEN_ELSE (SImode,
6585 		    gen_rtx_fmt_ee (code, VOIDmode,
6586 				    gen_rtx_REG (cc_consumer_mode, CC_REGNUM),
6587 				    const0_rtx),
6588 					tmp_reg, target));
6589 }
6590 
6591 /* Invert the comparison CODE applied to a CC mode.  This is only safe
6592    if we know whether there result was created by a floating point
6593    compare or not.  For the CCV modes this is encoded as part of the
6594    mode.  */
6595 enum rtx_code
s390_reverse_condition(machine_mode mode,enum rtx_code code)6596 s390_reverse_condition (machine_mode mode, enum rtx_code code)
6597 {
6598   /* Reversal of FP compares takes care -- an ordered compare
6599      becomes an unordered compare and vice versa.  */
6600   if (mode == CCVFALLmode || mode == CCVFANYmode)
6601     return reverse_condition_maybe_unordered (code);
6602   else if (mode == CCVIALLmode || mode == CCVIANYmode)
6603     return reverse_condition (code);
6604   else
6605     gcc_unreachable ();
6606 }
6607 
6608 /* Generate a vector comparison expression loading either elements of
6609    THEN or ELS into TARGET depending on the comparison COND of CMP_OP1
6610    and CMP_OP2.  */
6611 
6612 void
s390_expand_vcond(rtx target,rtx then,rtx els,enum rtx_code cond,rtx cmp_op1,rtx cmp_op2)6613 s390_expand_vcond (rtx target, rtx then, rtx els,
6614 		   enum rtx_code cond, rtx cmp_op1, rtx cmp_op2)
6615 {
6616   rtx tmp;
6617   machine_mode result_mode;
6618   rtx result_target;
6619 
6620   machine_mode target_mode = GET_MODE (target);
6621   machine_mode cmp_mode = GET_MODE (cmp_op1);
6622   rtx op = (cond == LT) ? els : then;
6623 
6624   /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
6625      and x < 0 ? 1 : 0 into (unsigned) x >> 31.  Likewise
6626      for short and byte (x >> 15 and x >> 7 respectively).  */
6627   if ((cond == LT || cond == GE)
6628       && target_mode == cmp_mode
6629       && cmp_op2 == CONST0_RTX (cmp_mode)
6630       && op == CONST0_RTX (target_mode)
6631       && s390_vector_mode_supported_p (target_mode)
6632       && GET_MODE_CLASS (target_mode) == MODE_VECTOR_INT)
6633     {
6634       rtx negop = (cond == LT) ? then : els;
6635 
6636       int shift = GET_MODE_BITSIZE (GET_MODE_INNER (target_mode)) - 1;
6637 
6638       /* if x < 0 ? 1 : 0 or if x >= 0 ? 0 : 1 */
6639       if (negop == CONST1_RTX (target_mode))
6640 	{
6641 	  rtx res = expand_simple_binop (cmp_mode, LSHIFTRT, cmp_op1,
6642 					 GEN_INT (shift), target,
6643 					 1, OPTAB_DIRECT);
6644 	  if (res != target)
6645 	    emit_move_insn (target, res);
6646 	  return;
6647 	}
6648 
6649       /* if x < 0 ? -1 : 0 or if x >= 0 ? 0 : -1 */
6650       else if (all_ones_operand (negop, target_mode))
6651 	{
6652 	  rtx res = expand_simple_binop (cmp_mode, ASHIFTRT, cmp_op1,
6653 					 GEN_INT (shift), target,
6654 					 0, OPTAB_DIRECT);
6655 	  if (res != target)
6656 	    emit_move_insn (target, res);
6657 	  return;
6658 	}
6659     }
6660 
6661   /* We always use an integral type vector to hold the comparison
6662      result.  */
6663   result_mode = mode_for_int_vector (cmp_mode).require ();
6664   result_target = gen_reg_rtx (result_mode);
6665 
6666   /* We allow vector immediates as comparison operands that
6667      can be handled by the optimization above but not by the
6668      following code.  Hence, force them into registers here.  */
6669   if (!REG_P (cmp_op1))
6670     cmp_op1 = force_reg (GET_MODE (cmp_op1), cmp_op1);
6671 
6672   if (!REG_P (cmp_op2))
6673     cmp_op2 = force_reg (GET_MODE (cmp_op2), cmp_op2);
6674 
6675   s390_expand_vec_compare (result_target, cond,
6676 			   cmp_op1, cmp_op2);
6677 
6678   /* If the results are supposed to be either -1 or 0 we are done
6679      since this is what our compare instructions generate anyway.  */
6680   if (all_ones_operand (then, GET_MODE (then))
6681       && const0_operand (els, GET_MODE (els)))
6682     {
6683       emit_move_insn (target, gen_rtx_SUBREG (target_mode,
6684 					      result_target, 0));
6685       return;
6686     }
6687 
6688   /* Otherwise we will do a vsel afterwards.  */
6689   /* This gets triggered e.g.
6690      with gcc.c-torture/compile/pr53410-1.c */
6691   if (!REG_P (then))
6692     then = force_reg (target_mode, then);
6693 
6694   if (!REG_P (els))
6695     els = force_reg (target_mode, els);
6696 
6697   tmp = gen_rtx_fmt_ee (EQ, VOIDmode,
6698 			result_target,
6699 			CONST0_RTX (result_mode));
6700 
6701   /* We compared the result against zero above so we have to swap then
6702      and els here.  */
6703   tmp = gen_rtx_IF_THEN_ELSE (target_mode, tmp, els, then);
6704 
6705   gcc_assert (target_mode == GET_MODE (then));
6706   emit_insn (gen_rtx_SET (target, tmp));
6707 }
6708 
6709 /* Emit the RTX necessary to initialize the vector TARGET with values
6710    in VALS.  */
6711 void
s390_expand_vec_init(rtx target,rtx vals)6712 s390_expand_vec_init (rtx target, rtx vals)
6713 {
6714   machine_mode mode = GET_MODE (target);
6715   machine_mode inner_mode = GET_MODE_INNER (mode);
6716   int n_elts = GET_MODE_NUNITS (mode);
6717   bool all_same = true, all_regs = true, all_const_int = true;
6718   rtx x;
6719   int i;
6720 
6721   for (i = 0; i < n_elts; ++i)
6722     {
6723       x = XVECEXP (vals, 0, i);
6724 
6725       if (!CONST_INT_P (x))
6726 	all_const_int = false;
6727 
6728       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6729 	all_same = false;
6730 
6731       if (!REG_P (x))
6732 	all_regs = false;
6733     }
6734 
6735   /* Use vector gen mask or vector gen byte mask if possible.  */
6736   if (all_same && all_const_int
6737       && (XVECEXP (vals, 0, 0) == const0_rtx
6738 	  || s390_contiguous_bitmask_vector_p (XVECEXP (vals, 0, 0),
6739 					       NULL, NULL)
6740 	  || s390_bytemask_vector_p (XVECEXP (vals, 0, 0), NULL)))
6741     {
6742       emit_insn (gen_rtx_SET (target,
6743 			      gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))));
6744       return;
6745     }
6746 
6747   /* Use vector replicate instructions.  vlrep/vrepi/vrep  */
6748   if (all_same)
6749     {
6750       rtx elem = XVECEXP (vals, 0, 0);
6751 
6752       /* vec_splats accepts general_operand as source.  */
6753       if (!general_operand (elem, GET_MODE (elem)))
6754 	elem = force_reg (inner_mode, elem);
6755 
6756       emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, elem)));
6757       return;
6758     }
6759 
6760   if (all_regs
6761       && REG_P (target)
6762       && n_elts == 2
6763       && GET_MODE_SIZE (inner_mode) == 8)
6764     {
6765       /* Use vector load pair.  */
6766       emit_insn (gen_rtx_SET (target,
6767 			      gen_rtx_VEC_CONCAT (mode,
6768 						  XVECEXP (vals, 0, 0),
6769 						  XVECEXP (vals, 0, 1))));
6770       return;
6771     }
6772 
6773   /* Use vector load logical element and zero.  */
6774   if (TARGET_VXE && (mode == V4SImode || mode == V4SFmode))
6775     {
6776       bool found = true;
6777 
6778       x = XVECEXP (vals, 0, 0);
6779       if (memory_operand (x, inner_mode))
6780 	{
6781 	  for (i = 1; i < n_elts; ++i)
6782 	    found = found && XVECEXP (vals, 0, i) == const0_rtx;
6783 
6784 	  if (found)
6785 	    {
6786 	      machine_mode half_mode = (inner_mode == SFmode
6787 					? V2SFmode : V2SImode);
6788 	      emit_insn (gen_rtx_SET (target,
6789 			      gen_rtx_VEC_CONCAT (mode,
6790 						  gen_rtx_VEC_CONCAT (half_mode,
6791 								      x,
6792 								      const0_rtx),
6793 						  gen_rtx_VEC_CONCAT (half_mode,
6794 								      const0_rtx,
6795 								      const0_rtx))));
6796 	      return;
6797 	    }
6798 	}
6799     }
6800 
6801   /* We are about to set the vector elements one by one.  Zero out the
6802      full register first in order to help the data flow framework to
6803      detect it as full VR set.  */
6804   emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
6805 
6806   /* Unfortunately the vec_init expander is not allowed to fail.  So
6807      we have to implement the fallback ourselves.  */
6808   for (i = 0; i < n_elts; i++)
6809     {
6810       rtx elem = XVECEXP (vals, 0, i);
6811       if (!general_operand (elem, GET_MODE (elem)))
6812 	elem = force_reg (inner_mode, elem);
6813 
6814       emit_insn (gen_rtx_SET (target,
6815 			      gen_rtx_UNSPEC (mode,
6816 					      gen_rtvec (3, elem,
6817 							 GEN_INT (i), target),
6818 					      UNSPEC_VEC_SET)));
6819     }
6820 }
6821 
6822 /* Structure to hold the initial parameters for a compare_and_swap operation
6823    in HImode and QImode.  */
6824 
6825 struct alignment_context
6826 {
6827   rtx memsi;	  /* SI aligned memory location.  */
6828   rtx shift;	  /* Bit offset with regard to lsb.  */
6829   rtx modemask;	  /* Mask of the HQImode shifted by SHIFT bits.  */
6830   rtx modemaski;  /* ~modemask */
6831   bool aligned;	  /* True if memory is aligned, false else.  */
6832 };
6833 
6834 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
6835    structure AC for transparent simplifying, if the memory alignment is known
6836    to be at least 32bit.  MEM is the memory location for the actual operation
6837    and MODE its mode.  */
6838 
6839 static void
init_alignment_context(struct alignment_context * ac,rtx mem,machine_mode mode)6840 init_alignment_context (struct alignment_context *ac, rtx mem,
6841 			machine_mode mode)
6842 {
6843   ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode));
6844   ac->aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode));
6845 
6846   if (ac->aligned)
6847     ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned.  */
6848   else
6849     {
6850       /* Alignment is unknown.  */
6851       rtx byteoffset, addr, align;
6852 
6853       /* Force the address into a register.  */
6854       addr = force_reg (Pmode, XEXP (mem, 0));
6855 
6856       /* Align it to SImode.  */
6857       align = expand_simple_binop (Pmode, AND, addr,
6858 				   GEN_INT (-GET_MODE_SIZE (SImode)),
6859 				   NULL_RTX, 1, OPTAB_DIRECT);
6860       /* Generate MEM.  */
6861       ac->memsi = gen_rtx_MEM (SImode, align);
6862       MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem);
6863       set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER);
6864       set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode));
6865 
6866       /* Calculate shiftcount.  */
6867       byteoffset = expand_simple_binop (Pmode, AND, addr,
6868 					GEN_INT (GET_MODE_SIZE (SImode) - 1),
6869 					NULL_RTX, 1, OPTAB_DIRECT);
6870       /* As we already have some offset, evaluate the remaining distance.  */
6871       ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
6872 				      NULL_RTX, 1, OPTAB_DIRECT);
6873     }
6874 
6875   /* Shift is the byte count, but we need the bitcount.  */
6876   ac->shift = expand_simple_binop (SImode, ASHIFT, ac->shift, GEN_INT (3),
6877 				   NULL_RTX, 1, OPTAB_DIRECT);
6878 
6879   /* Calculate masks.  */
6880   ac->modemask = expand_simple_binop (SImode, ASHIFT,
6881 				      GEN_INT (GET_MODE_MASK (mode)),
6882 				      ac->shift, NULL_RTX, 1, OPTAB_DIRECT);
6883   ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask,
6884 				      NULL_RTX, 1);
6885 }
6886 
6887 /* A subroutine of s390_expand_cs_hqi.  Insert INS into VAL.  If possible,
6888    use a single insv insn into SEQ2.  Otherwise, put prep insns in SEQ1 and
6889    perform the merge in SEQ2.  */
6890 
6891 static rtx
s390_two_part_insv(struct alignment_context * ac,rtx * seq1,rtx * seq2,machine_mode mode,rtx val,rtx ins)6892 s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
6893 		    machine_mode mode, rtx val, rtx ins)
6894 {
6895   rtx tmp;
6896 
6897   if (ac->aligned)
6898     {
6899       start_sequence ();
6900       tmp = copy_to_mode_reg (SImode, val);
6901       if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)),
6902 			    const0_rtx, ins))
6903 	{
6904 	  *seq1 = NULL;
6905 	  *seq2 = get_insns ();
6906 	  end_sequence ();
6907 	  return tmp;
6908 	}
6909       end_sequence ();
6910     }
6911 
6912   /* Failed to use insv.  Generate a two part shift and mask.  */
6913   start_sequence ();
6914   tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
6915   *seq1 = get_insns ();
6916   end_sequence ();
6917 
6918   start_sequence ();
6919   tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
6920   *seq2 = get_insns ();
6921   end_sequence ();
6922 
6923   return tmp;
6924 }
6925 
6926 /* Expand an atomic compare and swap operation for HImode and QImode.  MEM is
6927    the memory location, CMP the old value to compare MEM with and NEW_RTX the
6928    value to set if CMP == MEM.  */
6929 
6930 static void
s390_expand_cs_hqi(machine_mode mode,rtx btarget,rtx vtarget,rtx mem,rtx cmp,rtx new_rtx,bool is_weak)6931 s390_expand_cs_hqi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
6932 		    rtx cmp, rtx new_rtx, bool is_weak)
6933 {
6934   struct alignment_context ac;
6935   rtx cmpv, newv, val, cc, seq0, seq1, seq2, seq3;
6936   rtx res = gen_reg_rtx (SImode);
6937   rtx_code_label *csloop = NULL, *csend = NULL;
6938 
6939   gcc_assert (MEM_P (mem));
6940 
6941   init_alignment_context (&ac, mem, mode);
6942 
6943   /* Load full word.  Subsequent loads are performed by CS.  */
6944   val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski,
6945 			     NULL_RTX, 1, OPTAB_DIRECT);
6946 
6947   /* Prepare insertions of cmp and new_rtx into the loaded value.  When
6948      possible, we try to use insv to make this happen efficiently.  If
6949      that fails we'll generate code both inside and outside the loop.  */
6950   cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp);
6951   newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx);
6952 
6953   if (seq0)
6954     emit_insn (seq0);
6955   if (seq1)
6956     emit_insn (seq1);
6957 
6958   /* Start CS loop.  */
6959   if (!is_weak)
6960     {
6961       /* Begin assuming success.  */
6962       emit_move_insn (btarget, const1_rtx);
6963 
6964       csloop = gen_label_rtx ();
6965       csend = gen_label_rtx ();
6966       emit_label (csloop);
6967     }
6968 
6969   /* val = "<mem>00..0<mem>"
6970    * cmp = "00..0<cmp>00..0"
6971    * new = "00..0<new>00..0"
6972    */
6973 
6974   emit_insn (seq2);
6975   emit_insn (seq3);
6976 
6977   cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv, CCZ1mode);
6978   if (is_weak)
6979     emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1)));
6980   else
6981     {
6982       rtx tmp;
6983 
6984       /* Jump to end if we're done (likely?).  */
6985       s390_emit_jump (csend, cc);
6986 
6987       /* Check for changes outside mode, and loop internal if so.
6988 	 Arrange the moves so that the compare is adjacent to the
6989 	 branch so that we can generate CRJ.  */
6990       tmp = copy_to_reg (val);
6991       force_expand_binop (SImode, and_optab, res, ac.modemaski, val,
6992 			  1, OPTAB_DIRECT);
6993       cc = s390_emit_compare (NE, val, tmp);
6994       s390_emit_jump (csloop, cc);
6995 
6996       /* Failed.  */
6997       emit_move_insn (btarget, const0_rtx);
6998       emit_label (csend);
6999     }
7000 
7001   /* Return the correct part of the bitfield.  */
7002   convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
7003 					      NULL_RTX, 1, OPTAB_DIRECT), 1);
7004 }
7005 
7006 /* Variant of s390_expand_cs for SI, DI and TI modes.  */
7007 static void
s390_expand_cs_tdsi(machine_mode mode,rtx btarget,rtx vtarget,rtx mem,rtx cmp,rtx new_rtx,bool is_weak)7008 s390_expand_cs_tdsi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
7009 		     rtx cmp, rtx new_rtx, bool is_weak)
7010 {
7011   rtx output = vtarget;
7012   rtx_code_label *skip_cs_label = NULL;
7013   bool do_const_opt = false;
7014 
7015   if (!register_operand (output, mode))
7016     output = gen_reg_rtx (mode);
7017 
7018   /* If IS_WEAK is true and the INPUT value is a constant, compare the memory
7019      with the constant first and skip the compare_and_swap because its very
7020      expensive and likely to fail anyway.
7021      Note 1: This is done only for IS_WEAK.  C11 allows optimizations that may
7022      cause spurious in that case.
7023      Note 2: It may be useful to do this also for non-constant INPUT.
7024      Note 3: Currently only targets with "load on condition" are supported
7025      (z196 and newer).  */
7026 
7027   if (TARGET_Z196
7028       && (mode == SImode || mode == DImode))
7029     do_const_opt = (is_weak && CONST_INT_P (cmp));
7030 
7031   if (do_const_opt)
7032     {
7033       rtx cc = gen_rtx_REG (CCZmode, CC_REGNUM);
7034 
7035       skip_cs_label = gen_label_rtx ();
7036       emit_move_insn (btarget, const0_rtx);
7037       if (CONST_INT_P (cmp) && INTVAL (cmp) == 0)
7038 	{
7039 	  rtvec lt = rtvec_alloc (2);
7040 
7041 	  /* Load-and-test + conditional jump.  */
7042 	  RTVEC_ELT (lt, 0)
7043 	    = gen_rtx_SET (cc, gen_rtx_COMPARE (CCZmode, mem, cmp));
7044 	  RTVEC_ELT (lt, 1) = gen_rtx_SET (output, mem);
7045 	  emit_insn (gen_rtx_PARALLEL (VOIDmode, lt));
7046 	}
7047       else
7048 	{
7049 	  emit_move_insn (output, mem);
7050 	  emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (CCZmode, output, cmp)));
7051 	}
7052       s390_emit_jump (skip_cs_label, gen_rtx_NE (VOIDmode, cc, const0_rtx));
7053       add_reg_br_prob_note (get_last_insn (),
7054 		            profile_probability::very_unlikely ());
7055       /* If the jump is not taken, OUTPUT is the expected value.  */
7056       cmp = output;
7057       /* Reload newval to a register manually, *after* the compare and jump
7058 	 above.  Otherwise Reload might place it before the jump.  */
7059     }
7060   else
7061     cmp = force_reg (mode, cmp);
7062   new_rtx = force_reg (mode, new_rtx);
7063   s390_emit_compare_and_swap (EQ, output, mem, cmp, new_rtx,
7064 			      (do_const_opt) ? CCZmode : CCZ1mode);
7065   if (skip_cs_label != NULL)
7066     emit_label (skip_cs_label);
7067 
7068   /* We deliberately accept non-register operands in the predicate
7069      to ensure the write back to the output operand happens *before*
7070      the store-flags code below.  This makes it easier for combine
7071      to merge the store-flags code with a potential test-and-branch
7072      pattern following (immediately!) afterwards.  */
7073   if (output != vtarget)
7074     emit_move_insn (vtarget, output);
7075 
7076   if (do_const_opt)
7077     {
7078       rtx cc, cond, ite;
7079 
7080       /* Do not use gen_cstorecc4 here because it writes either 1 or 0, but
7081 	 btarget has already been initialized with 0 above.  */
7082       cc = gen_rtx_REG (CCZmode, CC_REGNUM);
7083       cond = gen_rtx_EQ (VOIDmode, cc, const0_rtx);
7084       ite = gen_rtx_IF_THEN_ELSE (SImode, cond, const1_rtx, btarget);
7085       emit_insn (gen_rtx_SET (btarget, ite));
7086     }
7087   else
7088     {
7089       rtx cc, cond;
7090 
7091       cc = gen_rtx_REG (CCZ1mode, CC_REGNUM);
7092       cond = gen_rtx_EQ (SImode, cc, const0_rtx);
7093       emit_insn (gen_cstorecc4 (btarget, cond, cc, const0_rtx));
7094     }
7095 }
7096 
7097 /* Expand an atomic compare and swap operation.  MEM is the memory location,
7098    CMP the old value to compare MEM with and NEW_RTX the value to set if
7099    CMP == MEM.  */
7100 
7101 void
s390_expand_cs(machine_mode mode,rtx btarget,rtx vtarget,rtx mem,rtx cmp,rtx new_rtx,bool is_weak)7102 s390_expand_cs (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
7103 		rtx cmp, rtx new_rtx, bool is_weak)
7104 {
7105   switch (mode)
7106     {
7107     case E_TImode:
7108     case E_DImode:
7109     case E_SImode:
7110       s390_expand_cs_tdsi (mode, btarget, vtarget, mem, cmp, new_rtx, is_weak);
7111       break;
7112     case E_HImode:
7113     case E_QImode:
7114       s390_expand_cs_hqi (mode, btarget, vtarget, mem, cmp, new_rtx, is_weak);
7115       break;
7116     default:
7117       gcc_unreachable ();
7118     }
7119 }
7120 
7121 /* Expand an atomic_exchange operation simulated with a compare-and-swap loop.
7122    The memory location MEM is set to INPUT.  OUTPUT is set to the previous value
7123    of MEM.  */
7124 
7125 void
s390_expand_atomic_exchange_tdsi(rtx output,rtx mem,rtx input)7126 s390_expand_atomic_exchange_tdsi (rtx output, rtx mem, rtx input)
7127 {
7128   machine_mode mode = GET_MODE (mem);
7129   rtx_code_label *csloop;
7130 
7131   if (TARGET_Z196
7132       && (mode == DImode || mode == SImode)
7133       && CONST_INT_P (input) && INTVAL (input) == 0)
7134     {
7135       emit_move_insn (output, const0_rtx);
7136       if (mode == DImode)
7137 	emit_insn (gen_atomic_fetch_anddi (output, mem, const0_rtx, input));
7138       else
7139 	emit_insn (gen_atomic_fetch_andsi (output, mem, const0_rtx, input));
7140       return;
7141     }
7142 
7143   input = force_reg (mode, input);
7144   emit_move_insn (output, mem);
7145   csloop = gen_label_rtx ();
7146   emit_label (csloop);
7147   s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, output, mem, output,
7148 						      input, CCZ1mode));
7149 }
7150 
7151 /* Expand an atomic operation CODE of mode MODE.  MEM is the memory location
7152    and VAL the value to play with.  If AFTER is true then store the value
7153    MEM holds after the operation, if AFTER is false then store the value MEM
7154    holds before the operation.  If TARGET is zero then discard that value, else
7155    store it to TARGET.  */
7156 
7157 void
s390_expand_atomic(machine_mode mode,enum rtx_code code,rtx target,rtx mem,rtx val,bool after)7158 s390_expand_atomic (machine_mode mode, enum rtx_code code,
7159 		    rtx target, rtx mem, rtx val, bool after)
7160 {
7161   struct alignment_context ac;
7162   rtx cmp;
7163   rtx new_rtx = gen_reg_rtx (SImode);
7164   rtx orig = gen_reg_rtx (SImode);
7165   rtx_code_label *csloop = gen_label_rtx ();
7166 
7167   gcc_assert (!target || register_operand (target, VOIDmode));
7168   gcc_assert (MEM_P (mem));
7169 
7170   init_alignment_context (&ac, mem, mode);
7171 
7172   /* Shift val to the correct bit positions.
7173      Preserve "icm", but prevent "ex icm".  */
7174   if (!(ac.aligned && code == SET && MEM_P (val)))
7175     val = s390_expand_mask_and_shift (val, mode, ac.shift);
7176 
7177   /* Further preparation insns.  */
7178   if (code == PLUS || code == MINUS)
7179     emit_move_insn (orig, val);
7180   else if (code == MULT || code == AND) /* val = "11..1<val>11..1" */
7181     val = expand_simple_binop (SImode, XOR, val, ac.modemaski,
7182 			       NULL_RTX, 1, OPTAB_DIRECT);
7183 
7184   /* Load full word.  Subsequent loads are performed by CS.  */
7185   cmp = force_reg (SImode, ac.memsi);
7186 
7187   /* Start CS loop.  */
7188   emit_label (csloop);
7189   emit_move_insn (new_rtx, cmp);
7190 
7191   /* Patch new with val at correct position.  */
7192   switch (code)
7193     {
7194     case PLUS:
7195     case MINUS:
7196       val = expand_simple_binop (SImode, code, new_rtx, orig,
7197 				 NULL_RTX, 1, OPTAB_DIRECT);
7198       val = expand_simple_binop (SImode, AND, val, ac.modemask,
7199 				 NULL_RTX, 1, OPTAB_DIRECT);
7200       /* FALLTHRU */
7201     case SET:
7202       if (ac.aligned && MEM_P (val))
7203 	store_bit_field (new_rtx, GET_MODE_BITSIZE (mode), 0,
7204 			 0, 0, SImode, val, false);
7205       else
7206 	{
7207 	  new_rtx = expand_simple_binop (SImode, AND, new_rtx, ac.modemaski,
7208 				     NULL_RTX, 1, OPTAB_DIRECT);
7209 	  new_rtx = expand_simple_binop (SImode, IOR, new_rtx, val,
7210 				     NULL_RTX, 1, OPTAB_DIRECT);
7211 	}
7212       break;
7213     case AND:
7214     case IOR:
7215     case XOR:
7216       new_rtx = expand_simple_binop (SImode, code, new_rtx, val,
7217 				 NULL_RTX, 1, OPTAB_DIRECT);
7218       break;
7219     case MULT: /* NAND */
7220       new_rtx = expand_simple_binop (SImode, AND, new_rtx, val,
7221 				 NULL_RTX, 1, OPTAB_DIRECT);
7222       new_rtx = expand_simple_binop (SImode, XOR, new_rtx, ac.modemask,
7223 				 NULL_RTX, 1, OPTAB_DIRECT);
7224       break;
7225     default:
7226       gcc_unreachable ();
7227     }
7228 
7229   s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, cmp,
7230 						      ac.memsi, cmp, new_rtx,
7231 						      CCZ1mode));
7232 
7233   /* Return the correct part of the bitfield.  */
7234   if (target)
7235     convert_move (target, expand_simple_binop (SImode, LSHIFTRT,
7236 					       after ? new_rtx : cmp, ac.shift,
7237 					       NULL_RTX, 1, OPTAB_DIRECT), 1);
7238 }
7239 
7240 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7241    We need to emit DTP-relative relocations.  */
7242 
7243 static void s390_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
7244 
7245 static void
s390_output_dwarf_dtprel(FILE * file,int size,rtx x)7246 s390_output_dwarf_dtprel (FILE *file, int size, rtx x)
7247 {
7248   switch (size)
7249     {
7250     case 4:
7251       fputs ("\t.long\t", file);
7252       break;
7253     case 8:
7254       fputs ("\t.quad\t", file);
7255       break;
7256     default:
7257       gcc_unreachable ();
7258     }
7259   output_addr_const (file, x);
7260   fputs ("@DTPOFF", file);
7261 }
7262 
7263 /* Return the proper mode for REGNO being represented in the dwarf
7264    unwind table.  */
7265 machine_mode
s390_dwarf_frame_reg_mode(int regno)7266 s390_dwarf_frame_reg_mode (int regno)
7267 {
7268   machine_mode save_mode = default_dwarf_frame_reg_mode (regno);
7269 
7270   /* Make sure not to return DImode for any GPR with -m31 -mzarch.  */
7271   if (GENERAL_REGNO_P (regno))
7272     save_mode = Pmode;
7273 
7274   /* The rightmost 64 bits of vector registers are call-clobbered.  */
7275   if (GET_MODE_SIZE (save_mode) > 8)
7276     save_mode = DImode;
7277 
7278   return save_mode;
7279 }
7280 
7281 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
7282 /* Implement TARGET_MANGLE_TYPE.  */
7283 
7284 static const char *
s390_mangle_type(const_tree type)7285 s390_mangle_type (const_tree type)
7286 {
7287   type = TYPE_MAIN_VARIANT (type);
7288 
7289   if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
7290       && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
7291     return NULL;
7292 
7293   if (type == s390_builtin_types[BT_BV16QI]) return "U6__boolc";
7294   if (type == s390_builtin_types[BT_BV8HI]) return "U6__bools";
7295   if (type == s390_builtin_types[BT_BV4SI]) return "U6__booli";
7296   if (type == s390_builtin_types[BT_BV2DI]) return "U6__booll";
7297 
7298   if (TYPE_MAIN_VARIANT (type) == long_double_type_node
7299       && TARGET_LONG_DOUBLE_128)
7300     return "g";
7301 
7302   /* For all other types, use normal C++ mangling.  */
7303   return NULL;
7304 }
7305 #endif
7306 
7307 /* In the name of slightly smaller debug output, and to cater to
7308    general assembler lossage, recognize various UNSPEC sequences
7309    and turn them back into a direct symbol reference.  */
7310 
7311 static rtx
s390_delegitimize_address(rtx orig_x)7312 s390_delegitimize_address (rtx orig_x)
7313 {
7314   rtx x, y;
7315 
7316   orig_x = delegitimize_mem_from_attrs (orig_x);
7317   x = orig_x;
7318 
7319   /* Extract the symbol ref from:
7320      (plus:SI (reg:SI 12 %r12)
7321               (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
7322 	                            UNSPEC_GOTOFF/PLTOFF)))
7323      and
7324      (plus:SI (reg:SI 12 %r12)
7325               (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
7326                                              UNSPEC_GOTOFF/PLTOFF)
7327 				 (const_int 4 [0x4]))))  */
7328   if (GET_CODE (x) == PLUS
7329       && REG_P (XEXP (x, 0))
7330       && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
7331       && GET_CODE (XEXP (x, 1)) == CONST)
7332     {
7333       HOST_WIDE_INT offset = 0;
7334 
7335       /* The const operand.  */
7336       y = XEXP (XEXP (x, 1), 0);
7337 
7338       if (GET_CODE (y) == PLUS
7339 	  && GET_CODE (XEXP (y, 1)) == CONST_INT)
7340 	{
7341 	  offset = INTVAL (XEXP (y, 1));
7342 	  y = XEXP (y, 0);
7343 	}
7344 
7345       if (GET_CODE (y) == UNSPEC
7346 	  && (XINT (y, 1) == UNSPEC_GOTOFF
7347 	      || XINT (y, 1) == UNSPEC_PLTOFF))
7348 	return plus_constant (Pmode, XVECEXP (y, 0, 0), offset);
7349     }
7350 
7351   if (GET_CODE (x) != MEM)
7352     return orig_x;
7353 
7354   x = XEXP (x, 0);
7355   if (GET_CODE (x) == PLUS
7356       && GET_CODE (XEXP (x, 1)) == CONST
7357       && GET_CODE (XEXP (x, 0)) == REG
7358       && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7359     {
7360       y = XEXP (XEXP (x, 1), 0);
7361       if (GET_CODE (y) == UNSPEC
7362 	  && XINT (y, 1) == UNSPEC_GOT)
7363 	y = XVECEXP (y, 0, 0);
7364       else
7365 	return orig_x;
7366     }
7367   else if (GET_CODE (x) == CONST)
7368     {
7369       /* Extract the symbol ref from:
7370 	 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
7371 	                               UNSPEC_PLT/GOTENT)))  */
7372 
7373       y = XEXP (x, 0);
7374       if (GET_CODE (y) == UNSPEC
7375 	  && (XINT (y, 1) == UNSPEC_GOTENT
7376 	      || XINT (y, 1) == UNSPEC_PLT))
7377 	y = XVECEXP (y, 0, 0);
7378       else
7379 	return orig_x;
7380     }
7381   else
7382     return orig_x;
7383 
7384   if (GET_MODE (orig_x) != Pmode)
7385     {
7386       if (GET_MODE (orig_x) == BLKmode)
7387 	return orig_x;
7388       y = lowpart_subreg (GET_MODE (orig_x), y, Pmode);
7389       if (y == NULL_RTX)
7390 	return orig_x;
7391     }
7392   return y;
7393 }
7394 
7395 /* Output operand OP to stdio stream FILE.
7396    OP is an address (register + offset) which is not used to address data;
7397    instead the rightmost bits are interpreted as the value.  */
7398 
7399 static void
print_addrstyle_operand(FILE * file,rtx op)7400 print_addrstyle_operand (FILE *file, rtx op)
7401 {
7402   HOST_WIDE_INT offset;
7403   rtx base;
7404 
7405   /* Extract base register and offset.  */
7406   if (!s390_decompose_addrstyle_without_index (op, &base, &offset))
7407     gcc_unreachable ();
7408 
7409   /* Sanity check.  */
7410   if (base)
7411     {
7412       gcc_assert (GET_CODE (base) == REG);
7413       gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER);
7414       gcc_assert (REGNO_REG_CLASS (REGNO (base)) == ADDR_REGS);
7415     }
7416 
7417   /* Offsets are constricted to twelve bits.  */
7418   fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset & ((1 << 12) - 1));
7419   if (base)
7420     fprintf (file, "(%s)", reg_names[REGNO (base)]);
7421 }
7422 
7423 /* Assigns the number of NOP halfwords to be emitted before and after the
7424    function label to *HW_BEFORE and *HW_AFTER.  Both pointers must not be NULL.
7425    If hotpatching is disabled for the function, the values are set to zero.
7426 */
7427 
7428 static void
s390_function_num_hotpatch_hw(tree decl,int * hw_before,int * hw_after)7429 s390_function_num_hotpatch_hw (tree decl,
7430 			       int *hw_before,
7431 			       int *hw_after)
7432 {
7433   tree attr;
7434 
7435   attr = lookup_attribute ("hotpatch", DECL_ATTRIBUTES (decl));
7436 
7437   /* Handle the arguments of the hotpatch attribute.  The values
7438      specified via attribute might override the cmdline argument
7439      values.  */
7440   if (attr)
7441     {
7442       tree args = TREE_VALUE (attr);
7443 
7444       *hw_before = TREE_INT_CST_LOW (TREE_VALUE (args));
7445       *hw_after = TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (args)));
7446     }
7447   else
7448     {
7449       /* Use the values specified by the cmdline arguments.  */
7450       *hw_before = s390_hotpatch_hw_before_label;
7451       *hw_after = s390_hotpatch_hw_after_label;
7452     }
7453 }
7454 
7455 /* Write the current .machine and .machinemode specification to the assembler
7456    file.  */
7457 
7458 #ifdef HAVE_AS_MACHINE_MACHINEMODE
7459 static void
s390_asm_output_machine_for_arch(FILE * asm_out_file)7460 s390_asm_output_machine_for_arch (FILE *asm_out_file)
7461 {
7462   fprintf (asm_out_file, "\t.machinemode %s\n",
7463 	   (TARGET_ZARCH) ? "zarch" : "esa");
7464   fprintf (asm_out_file, "\t.machine \"%s",
7465 	   processor_table[s390_arch].binutils_name);
7466   if (S390_USE_ARCHITECTURE_MODIFIERS)
7467     {
7468       int cpu_flags;
7469 
7470       cpu_flags = processor_flags_table[(int) s390_arch];
7471       if (TARGET_HTM && !(cpu_flags & PF_TX))
7472 	fprintf (asm_out_file, "+htm");
7473       else if (!TARGET_HTM && (cpu_flags & PF_TX))
7474 	fprintf (asm_out_file, "+nohtm");
7475       if (TARGET_VX && !(cpu_flags & PF_VX))
7476 	fprintf (asm_out_file, "+vx");
7477       else if (!TARGET_VX && (cpu_flags & PF_VX))
7478 	fprintf (asm_out_file, "+novx");
7479     }
7480   fprintf (asm_out_file, "\"\n");
7481 }
7482 
7483 /* Write an extra function header before the very start of the function.  */
7484 
7485 void
s390_asm_output_function_prefix(FILE * asm_out_file,const char * fnname ATTRIBUTE_UNUSED)7486 s390_asm_output_function_prefix (FILE *asm_out_file,
7487 				 const char *fnname ATTRIBUTE_UNUSED)
7488 {
7489   if (DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl) == NULL)
7490     return;
7491   /* Since only the function specific options are saved but not the indications
7492      which options are set, it's too much work here to figure out which options
7493      have actually changed.  Thus, generate .machine and .machinemode whenever a
7494      function has the target attribute or pragma.  */
7495   fprintf (asm_out_file, "\t.machinemode push\n");
7496   fprintf (asm_out_file, "\t.machine push\n");
7497   s390_asm_output_machine_for_arch (asm_out_file);
7498 }
7499 
7500 /* Write an extra function footer after the very end of the function.  */
7501 
7502 void
s390_asm_declare_function_size(FILE * asm_out_file,const char * fnname,tree decl)7503 s390_asm_declare_function_size (FILE *asm_out_file,
7504 				const char *fnname, tree decl)
7505 {
7506   if (!flag_inhibit_size_directive)
7507     ASM_OUTPUT_MEASURED_SIZE (asm_out_file, fnname);
7508   if (DECL_FUNCTION_SPECIFIC_TARGET (decl) == NULL)
7509     return;
7510   fprintf (asm_out_file, "\t.machine pop\n");
7511   fprintf (asm_out_file, "\t.machinemode pop\n");
7512 }
7513 #endif
7514 
7515 /* Write the extra assembler code needed to declare a function properly.  */
7516 
7517 void
s390_asm_output_function_label(FILE * asm_out_file,const char * fname,tree decl)7518 s390_asm_output_function_label (FILE *asm_out_file, const char *fname,
7519 				tree decl)
7520 {
7521   int hw_before, hw_after;
7522 
7523   s390_function_num_hotpatch_hw (decl, &hw_before, &hw_after);
7524   if (hw_before > 0)
7525     {
7526       unsigned int function_alignment;
7527       int i;
7528 
7529       /* Add a trampoline code area before the function label and initialize it
7530 	 with two-byte nop instructions.  This area can be overwritten with code
7531 	 that jumps to a patched version of the function.  */
7532       asm_fprintf (asm_out_file, "\tnopr\t%%r0"
7533 		   "\t# pre-label NOPs for hotpatch (%d halfwords)\n",
7534 		   hw_before);
7535       for (i = 1; i < hw_before; i++)
7536 	fputs ("\tnopr\t%r0\n", asm_out_file);
7537 
7538       /* Note:  The function label must be aligned so that (a) the bytes of the
7539 	 following nop do not cross a cacheline boundary, and (b) a jump address
7540 	 (eight bytes for 64 bit targets, 4 bytes for 32 bit targets) can be
7541 	 stored directly before the label without crossing a cacheline
7542 	 boundary.  All this is necessary to make sure the trampoline code can
7543 	 be changed atomically.
7544 	 This alignment is done automatically using the FOUNCTION_BOUNDARY, but
7545 	 if there are NOPs before the function label, the alignment is placed
7546 	 before them.  So it is necessary to duplicate the alignment after the
7547 	 NOPs.  */
7548       function_alignment = MAX (8, DECL_ALIGN (decl) / BITS_PER_UNIT);
7549       if (! DECL_USER_ALIGN (decl))
7550 	function_alignment = MAX (function_alignment,
7551 				  (unsigned int) align_functions);
7552       fputs ("\t# alignment for hotpatch\n", asm_out_file);
7553       ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (function_alignment));
7554     }
7555 
7556   if (S390_USE_TARGET_ATTRIBUTE && TARGET_DEBUG_ARG)
7557     {
7558       asm_fprintf (asm_out_file, "\t# fn:%s ar%d\n", fname, s390_arch);
7559       asm_fprintf (asm_out_file, "\t# fn:%s tu%d\n", fname, s390_tune);
7560       asm_fprintf (asm_out_file, "\t# fn:%s sg%d\n", fname, s390_stack_guard);
7561       asm_fprintf (asm_out_file, "\t# fn:%s ss%d\n", fname, s390_stack_size);
7562       asm_fprintf (asm_out_file, "\t# fn:%s bc%d\n", fname, s390_branch_cost);
7563       asm_fprintf (asm_out_file, "\t# fn:%s wf%d\n", fname,
7564 		   s390_warn_framesize);
7565       asm_fprintf (asm_out_file, "\t# fn:%s ba%d\n", fname, TARGET_BACKCHAIN);
7566       asm_fprintf (asm_out_file, "\t# fn:%s hd%d\n", fname, TARGET_HARD_DFP);
7567       asm_fprintf (asm_out_file, "\t# fn:%s hf%d\n", fname, !TARGET_SOFT_FLOAT);
7568       asm_fprintf (asm_out_file, "\t# fn:%s ht%d\n", fname, TARGET_OPT_HTM);
7569       asm_fprintf (asm_out_file, "\t# fn:%s vx%d\n", fname, TARGET_OPT_VX);
7570       asm_fprintf (asm_out_file, "\t# fn:%s ps%d\n", fname,
7571 		   TARGET_PACKED_STACK);
7572       asm_fprintf (asm_out_file, "\t# fn:%s se%d\n", fname, TARGET_SMALL_EXEC);
7573       asm_fprintf (asm_out_file, "\t# fn:%s mv%d\n", fname, TARGET_MVCLE);
7574       asm_fprintf (asm_out_file, "\t# fn:%s zv%d\n", fname, TARGET_ZVECTOR);
7575       asm_fprintf (asm_out_file, "\t# fn:%s wd%d\n", fname,
7576 		   s390_warn_dynamicstack_p);
7577     }
7578   ASM_OUTPUT_LABEL (asm_out_file, fname);
7579   if (hw_after > 0)
7580     asm_fprintf (asm_out_file,
7581 		 "\t# post-label NOPs for hotpatch (%d halfwords)\n",
7582 		 hw_after);
7583 }
7584 
7585 /* Output machine-dependent UNSPECs occurring in address constant X
7586    in assembler syntax to stdio stream FILE.  Returns true if the
7587    constant X could be recognized, false otherwise.  */
7588 
7589 static bool
s390_output_addr_const_extra(FILE * file,rtx x)7590 s390_output_addr_const_extra (FILE *file, rtx x)
7591 {
7592   if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1)
7593     switch (XINT (x, 1))
7594       {
7595       case UNSPEC_GOTENT:
7596 	output_addr_const (file, XVECEXP (x, 0, 0));
7597 	fprintf (file, "@GOTENT");
7598 	return true;
7599       case UNSPEC_GOT:
7600 	output_addr_const (file, XVECEXP (x, 0, 0));
7601 	fprintf (file, "@GOT");
7602 	return true;
7603       case UNSPEC_GOTOFF:
7604 	output_addr_const (file, XVECEXP (x, 0, 0));
7605 	fprintf (file, "@GOTOFF");
7606 	return true;
7607       case UNSPEC_PLT:
7608 	output_addr_const (file, XVECEXP (x, 0, 0));
7609 	fprintf (file, "@PLT");
7610 	return true;
7611       case UNSPEC_PLTOFF:
7612 	output_addr_const (file, XVECEXP (x, 0, 0));
7613 	fprintf (file, "@PLTOFF");
7614 	return true;
7615       case UNSPEC_TLSGD:
7616 	output_addr_const (file, XVECEXP (x, 0, 0));
7617 	fprintf (file, "@TLSGD");
7618 	return true;
7619       case UNSPEC_TLSLDM:
7620 	assemble_name (file, get_some_local_dynamic_name ());
7621 	fprintf (file, "@TLSLDM");
7622 	return true;
7623       case UNSPEC_DTPOFF:
7624 	output_addr_const (file, XVECEXP (x, 0, 0));
7625 	fprintf (file, "@DTPOFF");
7626 	return true;
7627       case UNSPEC_NTPOFF:
7628 	output_addr_const (file, XVECEXP (x, 0, 0));
7629 	fprintf (file, "@NTPOFF");
7630 	return true;
7631       case UNSPEC_GOTNTPOFF:
7632 	output_addr_const (file, XVECEXP (x, 0, 0));
7633 	fprintf (file, "@GOTNTPOFF");
7634 	return true;
7635       case UNSPEC_INDNTPOFF:
7636 	output_addr_const (file, XVECEXP (x, 0, 0));
7637 	fprintf (file, "@INDNTPOFF");
7638 	return true;
7639       }
7640 
7641   if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 2)
7642     switch (XINT (x, 1))
7643       {
7644       case UNSPEC_POOL_OFFSET:
7645 	x = gen_rtx_MINUS (GET_MODE (x), XVECEXP (x, 0, 0), XVECEXP (x, 0, 1));
7646 	output_addr_const (file, x);
7647 	return true;
7648       }
7649   return false;
7650 }
7651 
7652 /* Output address operand ADDR in assembler syntax to
7653    stdio stream FILE.  */
7654 
7655 void
print_operand_address(FILE * file,rtx addr)7656 print_operand_address (FILE *file, rtx addr)
7657 {
7658   struct s390_address ad;
7659   memset (&ad, 0, sizeof (s390_address));
7660 
7661   if (s390_loadrelative_operand_p (addr, NULL, NULL))
7662     {
7663       if (!TARGET_Z10)
7664 	{
7665 	  output_operand_lossage ("symbolic memory references are "
7666 				  "only supported on z10 or later");
7667 	  return;
7668 	}
7669       output_addr_const (file, addr);
7670       return;
7671     }
7672 
7673   if (!s390_decompose_address (addr, &ad)
7674       || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7675       || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
7676     output_operand_lossage ("cannot decompose address");
7677 
7678   if (ad.disp)
7679     output_addr_const (file, ad.disp);
7680   else
7681     fprintf (file, "0");
7682 
7683   if (ad.base && ad.indx)
7684     fprintf (file, "(%s,%s)", reg_names[REGNO (ad.indx)],
7685                               reg_names[REGNO (ad.base)]);
7686   else if (ad.base)
7687     fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7688 }
7689 
7690 /* Output operand X in assembler syntax to stdio stream FILE.
7691    CODE specified the format flag.  The following format flags
7692    are recognized:
7693 
7694     'C': print opcode suffix for branch condition.
7695     'D': print opcode suffix for inverse branch condition.
7696     'E': print opcode suffix for branch on index instruction.
7697     'G': print the size of the operand in bytes.
7698     'J': print tls_load/tls_gdcall/tls_ldcall suffix
7699     'M': print the second word of a TImode operand.
7700     'N': print the second word of a DImode operand.
7701     'O': print only the displacement of a memory reference or address.
7702     'R': print only the base register of a memory reference or address.
7703     'S': print S-type memory reference (base+displacement).
7704     'Y': print address style operand without index (e.g. shift count or setmem
7705 	 operand).
7706 
7707     'b': print integer X as if it's an unsigned byte.
7708     'c': print integer X as if it's an signed byte.
7709     'e': "end" contiguous bitmask X in either DImode or vector inner mode.
7710     'f': "end" contiguous bitmask X in SImode.
7711     'h': print integer X as if it's a signed halfword.
7712     'i': print the first nonzero HImode part of X.
7713     'j': print the first HImode part unequal to -1 of X.
7714     'k': print the first nonzero SImode part of X.
7715     'm': print the first SImode part unequal to -1 of X.
7716     'o': print integer X as if it's an unsigned 32bit word.
7717     's': "start" of contiguous bitmask X in either DImode or vector inner mode.
7718     't': CONST_INT: "start" of contiguous bitmask X in SImode.
7719          CONST_VECTOR: Generate a bitmask for vgbm instruction.
7720     'x': print integer X as if it's an unsigned halfword.
7721     'v': print register number as vector register (v1 instead of f1).
7722 */
7723 
7724 void
print_operand(FILE * file,rtx x,int code)7725 print_operand (FILE *file, rtx x, int code)
7726 {
7727   HOST_WIDE_INT ival;
7728 
7729   switch (code)
7730     {
7731     case 'C':
7732       fprintf (file, s390_branch_condition_mnemonic (x, FALSE));
7733       return;
7734 
7735     case 'D':
7736       fprintf (file, s390_branch_condition_mnemonic (x, TRUE));
7737       return;
7738 
7739     case 'E':
7740       if (GET_CODE (x) == LE)
7741 	fprintf (file, "l");
7742       else if (GET_CODE (x) == GT)
7743 	fprintf (file, "h");
7744       else
7745 	output_operand_lossage ("invalid comparison operator "
7746 				"for 'E' output modifier");
7747       return;
7748 
7749     case 'J':
7750       if (GET_CODE (x) == SYMBOL_REF)
7751 	{
7752 	  fprintf (file, "%s", ":tls_load:");
7753 	  output_addr_const (file, x);
7754 	}
7755       else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
7756 	{
7757 	  fprintf (file, "%s", ":tls_gdcall:");
7758 	  output_addr_const (file, XVECEXP (x, 0, 0));
7759 	}
7760       else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM)
7761 	{
7762 	  fprintf (file, "%s", ":tls_ldcall:");
7763 	  const char *name = get_some_local_dynamic_name ();
7764 	  gcc_assert (name);
7765 	  assemble_name (file, name);
7766 	}
7767       else
7768 	output_operand_lossage ("invalid reference for 'J' output modifier");
7769       return;
7770 
7771     case 'G':
7772       fprintf (file, "%u", GET_MODE_SIZE (GET_MODE (x)));
7773       return;
7774 
7775     case 'O':
7776       {
7777         struct s390_address ad;
7778 	int ret;
7779 
7780 	ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7781 
7782 	if (!ret
7783 	    || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7784 	    || ad.indx)
7785 	  {
7786 	    output_operand_lossage ("invalid address for 'O' output modifier");
7787 	    return;
7788 	  }
7789 
7790         if (ad.disp)
7791           output_addr_const (file, ad.disp);
7792         else
7793           fprintf (file, "0");
7794       }
7795       return;
7796 
7797     case 'R':
7798       {
7799         struct s390_address ad;
7800 	int ret;
7801 
7802 	ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7803 
7804 	if (!ret
7805 	    || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7806 	    || ad.indx)
7807 	  {
7808 	    output_operand_lossage ("invalid address for 'R' output modifier");
7809 	    return;
7810 	  }
7811 
7812         if (ad.base)
7813           fprintf (file, "%s", reg_names[REGNO (ad.base)]);
7814         else
7815           fprintf (file, "0");
7816       }
7817       return;
7818 
7819     case 'S':
7820       {
7821 	struct s390_address ad;
7822 	int ret;
7823 
7824 	if (!MEM_P (x))
7825 	  {
7826 	    output_operand_lossage ("memory reference expected for "
7827 				    "'S' output modifier");
7828 	    return;
7829 	  }
7830 	ret = s390_decompose_address (XEXP (x, 0), &ad);
7831 
7832 	if (!ret
7833 	    || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7834 	    || ad.indx)
7835 	  {
7836 	    output_operand_lossage ("invalid address for 'S' output modifier");
7837 	    return;
7838 	  }
7839 
7840 	if (ad.disp)
7841 	  output_addr_const (file, ad.disp);
7842 	else
7843 	  fprintf (file, "0");
7844 
7845 	if (ad.base)
7846 	  fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7847       }
7848       return;
7849 
7850     case 'N':
7851       if (GET_CODE (x) == REG)
7852 	x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7853       else if (GET_CODE (x) == MEM)
7854 	x = change_address (x, VOIDmode,
7855 			    plus_constant (Pmode, XEXP (x, 0), 4));
7856       else
7857 	output_operand_lossage ("register or memory expression expected "
7858 				"for 'N' output modifier");
7859       break;
7860 
7861     case 'M':
7862       if (GET_CODE (x) == REG)
7863 	x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7864       else if (GET_CODE (x) == MEM)
7865 	x = change_address (x, VOIDmode,
7866 			    plus_constant (Pmode, XEXP (x, 0), 8));
7867       else
7868 	output_operand_lossage ("register or memory expression expected "
7869 				"for 'M' output modifier");
7870       break;
7871 
7872     case 'Y':
7873       print_addrstyle_operand (file, x);
7874       return;
7875     }
7876 
7877   switch (GET_CODE (x))
7878     {
7879     case REG:
7880       /* Print FP regs as fx instead of vx when they are accessed
7881 	 through non-vector mode.  */
7882       if (code == 'v'
7883 	  || VECTOR_NOFP_REG_P (x)
7884 	  || (FP_REG_P (x) && VECTOR_MODE_P (GET_MODE (x)))
7885 	  || (VECTOR_REG_P (x)
7886 	      && (GET_MODE_SIZE (GET_MODE (x)) /
7887 		  s390_class_max_nregs (FP_REGS, GET_MODE (x))) > 8))
7888 	fprintf (file, "%%v%s", reg_names[REGNO (x)] + 2);
7889       else
7890 	fprintf (file, "%s", reg_names[REGNO (x)]);
7891       break;
7892 
7893     case MEM:
7894       output_address (GET_MODE (x), XEXP (x, 0));
7895       break;
7896 
7897     case CONST:
7898     case CODE_LABEL:
7899     case LABEL_REF:
7900     case SYMBOL_REF:
7901       output_addr_const (file, x);
7902       break;
7903 
7904     case CONST_INT:
7905       ival = INTVAL (x);
7906       switch (code)
7907 	{
7908 	case 0:
7909 	  break;
7910 	case 'b':
7911 	  ival &= 0xff;
7912 	  break;
7913 	case 'c':
7914 	  ival = ((ival & 0xff) ^ 0x80) - 0x80;
7915 	  break;
7916 	case 'x':
7917 	  ival &= 0xffff;
7918 	  break;
7919 	case 'h':
7920 	  ival = ((ival & 0xffff) ^ 0x8000) - 0x8000;
7921 	  break;
7922 	case 'i':
7923 	  ival = s390_extract_part (x, HImode, 0);
7924 	  break;
7925 	case 'j':
7926 	  ival = s390_extract_part (x, HImode, -1);
7927 	  break;
7928 	case 'k':
7929 	  ival = s390_extract_part (x, SImode, 0);
7930 	  break;
7931 	case 'm':
7932 	  ival = s390_extract_part (x, SImode, -1);
7933 	  break;
7934 	case 'o':
7935 	  ival &= 0xffffffff;
7936 	  break;
7937 	case 'e': case 'f':
7938 	case 's': case 't':
7939 	  {
7940 	    int start, end;
7941 	    int len;
7942 	    bool ok;
7943 
7944 	    len = (code == 's' || code == 'e' ? 64 : 32);
7945 	    ok = s390_contiguous_bitmask_p (ival, true, len, &start, &end);
7946 	    gcc_assert (ok);
7947 	    if (code == 's' || code == 't')
7948 	      ival = start;
7949 	    else
7950 	      ival = end;
7951 	  }
7952 	  break;
7953 	default:
7954 	  output_operand_lossage ("invalid constant for output modifier '%c'", code);
7955 	}
7956       fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
7957       break;
7958 
7959     case CONST_WIDE_INT:
7960       if (code == 'b')
7961         fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7962 		 CONST_WIDE_INT_ELT (x, 0) & 0xff);
7963       else if (code == 'x')
7964         fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7965 		 CONST_WIDE_INT_ELT (x, 0) & 0xffff);
7966       else if (code == 'h')
7967         fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7968 		 ((CONST_WIDE_INT_ELT (x, 0) & 0xffff) ^ 0x8000) - 0x8000);
7969       else
7970 	{
7971 	  if (code == 0)
7972 	    output_operand_lossage ("invalid constant - try using "
7973 				    "an output modifier");
7974 	  else
7975 	    output_operand_lossage ("invalid constant for output modifier '%c'",
7976 				    code);
7977 	}
7978       break;
7979     case CONST_VECTOR:
7980       switch (code)
7981 	{
7982 	case 'h':
7983 	  gcc_assert (const_vec_duplicate_p (x));
7984 	  fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7985 		   ((INTVAL (XVECEXP (x, 0, 0)) & 0xffff) ^ 0x8000) - 0x8000);
7986 	  break;
7987 	case 'e':
7988 	case 's':
7989 	  {
7990 	    int start, end;
7991 	    bool ok;
7992 
7993 	    ok = s390_contiguous_bitmask_vector_p (x, &start, &end);
7994 	    gcc_assert (ok);
7995 	    ival = (code == 's') ? start : end;
7996 	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
7997 	  }
7998 	  break;
7999 	case 't':
8000 	  {
8001 	    unsigned mask;
8002 	    bool ok = s390_bytemask_vector_p (x, &mask);
8003 	    gcc_assert (ok);
8004 	    fprintf (file, "%u", mask);
8005 	  }
8006 	  break;
8007 
8008 	default:
8009 	  output_operand_lossage ("invalid constant vector for output "
8010 				  "modifier '%c'", code);
8011 	}
8012       break;
8013 
8014     default:
8015       if (code == 0)
8016 	output_operand_lossage ("invalid expression - try using "
8017 				"an output modifier");
8018       else
8019 	output_operand_lossage ("invalid expression for output "
8020 				"modifier '%c'", code);
8021       break;
8022     }
8023 }
8024 
8025 /* Target hook for assembling integer objects.  We need to define it
8026    here to work a round a bug in some versions of GAS, which couldn't
8027    handle values smaller than INT_MIN when printed in decimal.  */
8028 
8029 static bool
s390_assemble_integer(rtx x,unsigned int size,int aligned_p)8030 s390_assemble_integer (rtx x, unsigned int size, int aligned_p)
8031 {
8032   if (size == 8 && aligned_p
8033       && GET_CODE (x) == CONST_INT && INTVAL (x) < INT_MIN)
8034     {
8035       fprintf (asm_out_file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n",
8036 	       INTVAL (x));
8037       return true;
8038     }
8039   return default_assemble_integer (x, size, aligned_p);
8040 }
8041 
8042 /* Returns true if register REGNO is used  for forming
8043    a memory address in expression X.  */
8044 
8045 static bool
reg_used_in_mem_p(int regno,rtx x)8046 reg_used_in_mem_p (int regno, rtx x)
8047 {
8048   enum rtx_code code = GET_CODE (x);
8049   int i, j;
8050   const char *fmt;
8051 
8052   if (code == MEM)
8053     {
8054       if (refers_to_regno_p (regno, XEXP (x, 0)))
8055 	return true;
8056     }
8057   else if (code == SET
8058 	   && GET_CODE (SET_DEST (x)) == PC)
8059     {
8060       if (refers_to_regno_p (regno, SET_SRC (x)))
8061 	return true;
8062     }
8063 
8064   fmt = GET_RTX_FORMAT (code);
8065   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8066     {
8067       if (fmt[i] == 'e'
8068 	  && reg_used_in_mem_p (regno, XEXP (x, i)))
8069 	return true;
8070 
8071       else if (fmt[i] == 'E')
8072 	for (j = 0; j < XVECLEN (x, i); j++)
8073 	  if (reg_used_in_mem_p (regno, XVECEXP (x, i, j)))
8074 	    return true;
8075     }
8076   return false;
8077 }
8078 
8079 /* Returns true if expression DEP_RTX sets an address register
8080    used by instruction INSN to address memory.  */
8081 
8082 static bool
addr_generation_dependency_p(rtx dep_rtx,rtx_insn * insn)8083 addr_generation_dependency_p (rtx dep_rtx, rtx_insn *insn)
8084 {
8085   rtx target, pat;
8086 
8087   if (NONJUMP_INSN_P (dep_rtx))
8088     dep_rtx = PATTERN (dep_rtx);
8089 
8090   if (GET_CODE (dep_rtx) == SET)
8091     {
8092       target = SET_DEST (dep_rtx);
8093       if (GET_CODE (target) == STRICT_LOW_PART)
8094 	target = XEXP (target, 0);
8095       while (GET_CODE (target) == SUBREG)
8096 	target = SUBREG_REG (target);
8097 
8098       if (GET_CODE (target) == REG)
8099 	{
8100 	  int regno = REGNO (target);
8101 
8102 	  if (s390_safe_attr_type (insn) == TYPE_LA)
8103 	    {
8104 	      pat = PATTERN (insn);
8105 	      if (GET_CODE (pat) == PARALLEL)
8106 		{
8107 		  gcc_assert (XVECLEN (pat, 0) == 2);
8108 		  pat = XVECEXP (pat, 0, 0);
8109 		}
8110 	      gcc_assert (GET_CODE (pat) == SET);
8111 	      return refers_to_regno_p (regno, SET_SRC (pat));
8112 	    }
8113 	  else if (get_attr_atype (insn) == ATYPE_AGEN)
8114 	    return reg_used_in_mem_p (regno, PATTERN (insn));
8115 	}
8116     }
8117   return false;
8118 }
8119 
8120 /* Return 1, if dep_insn sets register used in insn in the agen unit.  */
8121 
8122 int
s390_agen_dep_p(rtx_insn * dep_insn,rtx_insn * insn)8123 s390_agen_dep_p (rtx_insn *dep_insn, rtx_insn *insn)
8124 {
8125   rtx dep_rtx = PATTERN (dep_insn);
8126   int i;
8127 
8128   if (GET_CODE (dep_rtx) == SET
8129       && addr_generation_dependency_p (dep_rtx, insn))
8130     return 1;
8131   else if (GET_CODE (dep_rtx) == PARALLEL)
8132     {
8133       for (i = 0; i < XVECLEN (dep_rtx, 0); i++)
8134 	{
8135 	  if (addr_generation_dependency_p (XVECEXP (dep_rtx, 0, i), insn))
8136 	    return 1;
8137 	}
8138     }
8139   return 0;
8140 }
8141 
8142 
8143 /* A C statement (sans semicolon) to update the integer scheduling priority
8144    INSN_PRIORITY (INSN).  Increase the priority to execute the INSN earlier,
8145    reduce the priority to execute INSN later.  Do not define this macro if
8146    you do not need to adjust the scheduling priorities of insns.
8147 
8148    A STD instruction should be scheduled earlier,
8149    in order to use the bypass.  */
8150 static int
s390_adjust_priority(rtx_insn * insn,int priority)8151 s390_adjust_priority (rtx_insn *insn, int priority)
8152 {
8153   if (! INSN_P (insn))
8154     return priority;
8155 
8156   if (s390_tune <= PROCESSOR_2064_Z900)
8157     return priority;
8158 
8159   switch (s390_safe_attr_type (insn))
8160     {
8161       case TYPE_FSTOREDF:
8162       case TYPE_FSTORESF:
8163 	priority = priority << 3;
8164 	break;
8165       case TYPE_STORE:
8166       case TYPE_STM:
8167 	priority = priority << 1;
8168 	break;
8169       default:
8170         break;
8171     }
8172   return priority;
8173 }
8174 
8175 
8176 /* The number of instructions that can be issued per cycle.  */
8177 
8178 static int
s390_issue_rate(void)8179 s390_issue_rate (void)
8180 {
8181   switch (s390_tune)
8182     {
8183     case PROCESSOR_2084_Z990:
8184     case PROCESSOR_2094_Z9_109:
8185     case PROCESSOR_2094_Z9_EC:
8186     case PROCESSOR_2817_Z196:
8187       return 3;
8188     case PROCESSOR_2097_Z10:
8189       return 2;
8190     case PROCESSOR_9672_G5:
8191     case PROCESSOR_9672_G6:
8192     case PROCESSOR_2064_Z900:
8193       /* Starting with EC12 we use the sched_reorder hook to take care
8194 	 of instruction dispatch constraints.  The algorithm only
8195 	 picks the best instruction and assumes only a single
8196 	 instruction gets issued per cycle.  */
8197     case PROCESSOR_2827_ZEC12:
8198     case PROCESSOR_2964_Z13:
8199     case PROCESSOR_3906_Z14:
8200     default:
8201       return 1;
8202     }
8203 }
8204 
8205 static int
s390_first_cycle_multipass_dfa_lookahead(void)8206 s390_first_cycle_multipass_dfa_lookahead (void)
8207 {
8208   return 4;
8209 }
8210 
8211 /* Annotate every literal pool reference in X by an UNSPEC_LTREF expression.
8212    Fix up MEMs as required.  */
8213 
8214 static void
annotate_constant_pool_refs(rtx * x)8215 annotate_constant_pool_refs (rtx *x)
8216 {
8217   int i, j;
8218   const char *fmt;
8219 
8220   gcc_assert (GET_CODE (*x) != SYMBOL_REF
8221 	      || !CONSTANT_POOL_ADDRESS_P (*x));
8222 
8223   /* Literal pool references can only occur inside a MEM ...  */
8224   if (GET_CODE (*x) == MEM)
8225     {
8226       rtx memref = XEXP (*x, 0);
8227 
8228       if (GET_CODE (memref) == SYMBOL_REF
8229 	  && CONSTANT_POOL_ADDRESS_P (memref))
8230 	{
8231 	  rtx base = cfun->machine->base_reg;
8232 	  rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, memref, base),
8233 				     UNSPEC_LTREF);
8234 
8235 	  *x = replace_equiv_address (*x, addr);
8236 	  return;
8237 	}
8238 
8239       if (GET_CODE (memref) == CONST
8240 	  && GET_CODE (XEXP (memref, 0)) == PLUS
8241 	  && GET_CODE (XEXP (XEXP (memref, 0), 1)) == CONST_INT
8242 	  && GET_CODE (XEXP (XEXP (memref, 0), 0)) == SYMBOL_REF
8243 	  && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref, 0), 0)))
8244 	{
8245 	  HOST_WIDE_INT off = INTVAL (XEXP (XEXP (memref, 0), 1));
8246 	  rtx sym = XEXP (XEXP (memref, 0), 0);
8247 	  rtx base = cfun->machine->base_reg;
8248 	  rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
8249 				     UNSPEC_LTREF);
8250 
8251 	  *x = replace_equiv_address (*x, plus_constant (Pmode, addr, off));
8252 	  return;
8253 	}
8254     }
8255 
8256   /* ... or a load-address type pattern.  */
8257   if (GET_CODE (*x) == SET)
8258     {
8259       rtx addrref = SET_SRC (*x);
8260 
8261       if (GET_CODE (addrref) == SYMBOL_REF
8262 	  && CONSTANT_POOL_ADDRESS_P (addrref))
8263 	{
8264 	  rtx base = cfun->machine->base_reg;
8265 	  rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addrref, base),
8266 				     UNSPEC_LTREF);
8267 
8268 	  SET_SRC (*x) = addr;
8269 	  return;
8270 	}
8271 
8272       if (GET_CODE (addrref) == CONST
8273 	  && GET_CODE (XEXP (addrref, 0)) == PLUS
8274 	  && GET_CODE (XEXP (XEXP (addrref, 0), 1)) == CONST_INT
8275 	  && GET_CODE (XEXP (XEXP (addrref, 0), 0)) == SYMBOL_REF
8276 	  && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref, 0), 0)))
8277 	{
8278 	  HOST_WIDE_INT off = INTVAL (XEXP (XEXP (addrref, 0), 1));
8279 	  rtx sym = XEXP (XEXP (addrref, 0), 0);
8280 	  rtx base = cfun->machine->base_reg;
8281 	  rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
8282 				     UNSPEC_LTREF);
8283 
8284 	  SET_SRC (*x) = plus_constant (Pmode, addr, off);
8285 	  return;
8286 	}
8287     }
8288 
8289   /* Annotate LTREL_BASE as well.  */
8290   if (GET_CODE (*x) == UNSPEC
8291       && XINT (*x, 1) == UNSPEC_LTREL_BASE)
8292     {
8293       rtx base = cfun->machine->base_reg;
8294       *x = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XVECEXP (*x, 0, 0), base),
8295 				  UNSPEC_LTREL_BASE);
8296       return;
8297     }
8298 
8299   fmt = GET_RTX_FORMAT (GET_CODE (*x));
8300   for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8301     {
8302       if (fmt[i] == 'e')
8303         {
8304           annotate_constant_pool_refs (&XEXP (*x, i));
8305         }
8306       else if (fmt[i] == 'E')
8307         {
8308           for (j = 0; j < XVECLEN (*x, i); j++)
8309             annotate_constant_pool_refs (&XVECEXP (*x, i, j));
8310         }
8311     }
8312 }
8313 
8314 /* Split all branches that exceed the maximum distance.
8315    Returns true if this created a new literal pool entry.  */
8316 
8317 static int
s390_split_branches(void)8318 s390_split_branches (void)
8319 {
8320   rtx temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
8321   int new_literal = 0, ret;
8322   rtx_insn *insn;
8323   rtx pat, target;
8324   rtx *label;
8325 
8326   /* We need correct insn addresses.  */
8327 
8328   shorten_branches (get_insns ());
8329 
8330   /* Find all branches that exceed 64KB, and split them.  */
8331 
8332   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8333     {
8334       if (! JUMP_P (insn) || tablejump_p (insn, NULL, NULL))
8335 	continue;
8336 
8337       pat = PATTERN (insn);
8338       if (GET_CODE (pat) == PARALLEL)
8339 	pat = XVECEXP (pat, 0, 0);
8340       if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
8341 	continue;
8342 
8343       if (GET_CODE (SET_SRC (pat)) == LABEL_REF)
8344 	{
8345 	  label = &SET_SRC (pat);
8346 	}
8347       else if (GET_CODE (SET_SRC (pat)) == IF_THEN_ELSE)
8348 	{
8349 	  if (GET_CODE (XEXP (SET_SRC (pat), 1)) == LABEL_REF)
8350 	    label = &XEXP (SET_SRC (pat), 1);
8351           else if (GET_CODE (XEXP (SET_SRC (pat), 2)) == LABEL_REF)
8352             label = &XEXP (SET_SRC (pat), 2);
8353 	  else
8354 	    continue;
8355         }
8356       else
8357 	continue;
8358 
8359       if (get_attr_length (insn) <= 4)
8360 	continue;
8361 
8362       /* We are going to use the return register as scratch register,
8363 	 make sure it will be saved/restored by the prologue/epilogue.  */
8364       cfun_frame_layout.save_return_addr_p = 1;
8365 
8366       if (!flag_pic)
8367 	{
8368 	  new_literal = 1;
8369 	  rtx mem = force_const_mem (Pmode, *label);
8370 	  rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, mem),
8371 						 insn);
8372 	  INSN_ADDRESSES_NEW (set_insn, -1);
8373 	  annotate_constant_pool_refs (&PATTERN (set_insn));
8374 
8375 	  target = temp_reg;
8376 	}
8377       else
8378 	{
8379 	  new_literal = 1;
8380 	  target = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, *label),
8381 				   UNSPEC_LTREL_OFFSET);
8382 	  target = gen_rtx_CONST (Pmode, target);
8383 	  target = force_const_mem (Pmode, target);
8384 	  rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, target),
8385 						 insn);
8386 	  INSN_ADDRESSES_NEW (set_insn, -1);
8387 	  annotate_constant_pool_refs (&PATTERN (set_insn));
8388 
8389           target = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XEXP (target, 0),
8390 							cfun->machine->base_reg),
8391 				   UNSPEC_LTREL_BASE);
8392 	  target = gen_rtx_PLUS (Pmode, temp_reg, target);
8393 	}
8394 
8395       ret = validate_change (insn, label, target, 0);
8396       gcc_assert (ret);
8397     }
8398 
8399   return new_literal;
8400 }
8401 
8402 
8403 /* Find an annotated literal pool symbol referenced in RTX X,
8404    and store it at REF.  Will abort if X contains references to
8405    more than one such pool symbol; multiple references to the same
8406    symbol are allowed, however.
8407 
8408    The rtx pointed to by REF must be initialized to NULL_RTX
8409    by the caller before calling this routine.  */
8410 
8411 static void
find_constant_pool_ref(rtx x,rtx * ref)8412 find_constant_pool_ref (rtx x, rtx *ref)
8413 {
8414   int i, j;
8415   const char *fmt;
8416 
8417   /* Ignore LTREL_BASE references.  */
8418   if (GET_CODE (x) == UNSPEC
8419       && XINT (x, 1) == UNSPEC_LTREL_BASE)
8420     return;
8421   /* Likewise POOL_ENTRY insns.  */
8422   if (GET_CODE (x) == UNSPEC_VOLATILE
8423       && XINT (x, 1) == UNSPECV_POOL_ENTRY)
8424     return;
8425 
8426   gcc_assert (GET_CODE (x) != SYMBOL_REF
8427               || !CONSTANT_POOL_ADDRESS_P (x));
8428 
8429   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_LTREF)
8430     {
8431       rtx sym = XVECEXP (x, 0, 0);
8432       gcc_assert (GET_CODE (sym) == SYMBOL_REF
8433 	          && CONSTANT_POOL_ADDRESS_P (sym));
8434 
8435       if (*ref == NULL_RTX)
8436 	*ref = sym;
8437       else
8438 	gcc_assert (*ref == sym);
8439 
8440       return;
8441     }
8442 
8443   fmt = GET_RTX_FORMAT (GET_CODE (x));
8444   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8445     {
8446       if (fmt[i] == 'e')
8447         {
8448           find_constant_pool_ref (XEXP (x, i), ref);
8449         }
8450       else if (fmt[i] == 'E')
8451         {
8452           for (j = 0; j < XVECLEN (x, i); j++)
8453             find_constant_pool_ref (XVECEXP (x, i, j), ref);
8454         }
8455     }
8456 }
8457 
8458 /* Replace every reference to the annotated literal pool
8459    symbol REF in X by its base plus OFFSET.  */
8460 
8461 static void
replace_constant_pool_ref(rtx * x,rtx ref,rtx offset)8462 replace_constant_pool_ref (rtx *x, rtx ref, rtx offset)
8463 {
8464   int i, j;
8465   const char *fmt;
8466 
8467   gcc_assert (*x != ref);
8468 
8469   if (GET_CODE (*x) == UNSPEC
8470       && XINT (*x, 1) == UNSPEC_LTREF
8471       && XVECEXP (*x, 0, 0) == ref)
8472     {
8473       *x = gen_rtx_PLUS (Pmode, XVECEXP (*x, 0, 1), offset);
8474       return;
8475     }
8476 
8477   if (GET_CODE (*x) == PLUS
8478       && GET_CODE (XEXP (*x, 1)) == CONST_INT
8479       && GET_CODE (XEXP (*x, 0)) == UNSPEC
8480       && XINT (XEXP (*x, 0), 1) == UNSPEC_LTREF
8481       && XVECEXP (XEXP (*x, 0), 0, 0) == ref)
8482     {
8483       rtx addr = gen_rtx_PLUS (Pmode, XVECEXP (XEXP (*x, 0), 0, 1), offset);
8484       *x = plus_constant (Pmode, addr, INTVAL (XEXP (*x, 1)));
8485       return;
8486     }
8487 
8488   fmt = GET_RTX_FORMAT (GET_CODE (*x));
8489   for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8490     {
8491       if (fmt[i] == 'e')
8492         {
8493           replace_constant_pool_ref (&XEXP (*x, i), ref, offset);
8494         }
8495       else if (fmt[i] == 'E')
8496         {
8497           for (j = 0; j < XVECLEN (*x, i); j++)
8498             replace_constant_pool_ref (&XVECEXP (*x, i, j), ref, offset);
8499         }
8500     }
8501 }
8502 
8503 /* Check whether X contains an UNSPEC_LTREL_BASE.
8504    Return its constant pool symbol if found, NULL_RTX otherwise.  */
8505 
8506 static rtx
find_ltrel_base(rtx x)8507 find_ltrel_base (rtx x)
8508 {
8509   int i, j;
8510   const char *fmt;
8511 
8512   if (GET_CODE (x) == UNSPEC
8513       && XINT (x, 1) == UNSPEC_LTREL_BASE)
8514     return XVECEXP (x, 0, 0);
8515 
8516   fmt = GET_RTX_FORMAT (GET_CODE (x));
8517   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8518     {
8519       if (fmt[i] == 'e')
8520         {
8521           rtx fnd = find_ltrel_base (XEXP (x, i));
8522 	  if (fnd)
8523 	    return fnd;
8524         }
8525       else if (fmt[i] == 'E')
8526         {
8527           for (j = 0; j < XVECLEN (x, i); j++)
8528 	    {
8529               rtx fnd = find_ltrel_base (XVECEXP (x, i, j));
8530 	      if (fnd)
8531 		return fnd;
8532 	    }
8533         }
8534     }
8535 
8536   return NULL_RTX;
8537 }
8538 
8539 /* Replace any occurrence of UNSPEC_LTREL_BASE in X with its base.  */
8540 
8541 static void
replace_ltrel_base(rtx * x)8542 replace_ltrel_base (rtx *x)
8543 {
8544   int i, j;
8545   const char *fmt;
8546 
8547   if (GET_CODE (*x) == UNSPEC
8548       && XINT (*x, 1) == UNSPEC_LTREL_BASE)
8549     {
8550       *x = XVECEXP (*x, 0, 1);
8551       return;
8552     }
8553 
8554   fmt = GET_RTX_FORMAT (GET_CODE (*x));
8555   for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8556     {
8557       if (fmt[i] == 'e')
8558         {
8559           replace_ltrel_base (&XEXP (*x, i));
8560         }
8561       else if (fmt[i] == 'E')
8562         {
8563           for (j = 0; j < XVECLEN (*x, i); j++)
8564             replace_ltrel_base (&XVECEXP (*x, i, j));
8565         }
8566     }
8567 }
8568 
8569 
8570 /* We keep a list of constants which we have to add to internal
8571    constant tables in the middle of large functions.  */
8572 
8573 #define NR_C_MODES 32
8574 machine_mode constant_modes[NR_C_MODES] =
8575 {
8576   TFmode, TImode, TDmode,
8577   V16QImode, V8HImode, V4SImode, V2DImode, V1TImode,
8578   V4SFmode, V2DFmode, V1TFmode,
8579   DFmode, DImode, DDmode,
8580   V8QImode, V4HImode, V2SImode, V1DImode, V2SFmode, V1DFmode,
8581   SFmode, SImode, SDmode,
8582   V4QImode, V2HImode, V1SImode,  V1SFmode,
8583   HImode,
8584   V2QImode, V1HImode,
8585   QImode,
8586   V1QImode
8587 };
8588 
8589 struct constant
8590 {
8591   struct constant *next;
8592   rtx value;
8593   rtx_code_label *label;
8594 };
8595 
8596 struct constant_pool
8597 {
8598   struct constant_pool *next;
8599   rtx_insn *first_insn;
8600   rtx_insn *pool_insn;
8601   bitmap insns;
8602   rtx_insn *emit_pool_after;
8603 
8604   struct constant *constants[NR_C_MODES];
8605   struct constant *execute;
8606   rtx_code_label *label;
8607   int size;
8608 };
8609 
8610 /* Allocate new constant_pool structure.  */
8611 
8612 static struct constant_pool *
s390_alloc_pool(void)8613 s390_alloc_pool (void)
8614 {
8615   struct constant_pool *pool;
8616   int i;
8617 
8618   pool = (struct constant_pool *) xmalloc (sizeof *pool);
8619   pool->next = NULL;
8620   for (i = 0; i < NR_C_MODES; i++)
8621     pool->constants[i] = NULL;
8622 
8623   pool->execute = NULL;
8624   pool->label = gen_label_rtx ();
8625   pool->first_insn = NULL;
8626   pool->pool_insn = NULL;
8627   pool->insns = BITMAP_ALLOC (NULL);
8628   pool->size = 0;
8629   pool->emit_pool_after = NULL;
8630 
8631   return pool;
8632 }
8633 
8634 /* Create new constant pool covering instructions starting at INSN
8635    and chain it to the end of POOL_LIST.  */
8636 
8637 static struct constant_pool *
s390_start_pool(struct constant_pool ** pool_list,rtx_insn * insn)8638 s390_start_pool (struct constant_pool **pool_list, rtx_insn *insn)
8639 {
8640   struct constant_pool *pool, **prev;
8641 
8642   pool = s390_alloc_pool ();
8643   pool->first_insn = insn;
8644 
8645   for (prev = pool_list; *prev; prev = &(*prev)->next)
8646     ;
8647   *prev = pool;
8648 
8649   return pool;
8650 }
8651 
8652 /* End range of instructions covered by POOL at INSN and emit
8653    placeholder insn representing the pool.  */
8654 
8655 static void
s390_end_pool(struct constant_pool * pool,rtx_insn * insn)8656 s390_end_pool (struct constant_pool *pool, rtx_insn *insn)
8657 {
8658   rtx pool_size = GEN_INT (pool->size + 8 /* alignment slop */);
8659 
8660   if (!insn)
8661     insn = get_last_insn ();
8662 
8663   pool->pool_insn = emit_insn_after (gen_pool (pool_size), insn);
8664   INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8665 }
8666 
8667 /* Add INSN to the list of insns covered by POOL.  */
8668 
8669 static void
s390_add_pool_insn(struct constant_pool * pool,rtx insn)8670 s390_add_pool_insn (struct constant_pool *pool, rtx insn)
8671 {
8672   bitmap_set_bit (pool->insns, INSN_UID (insn));
8673 }
8674 
8675 /* Return pool out of POOL_LIST that covers INSN.  */
8676 
8677 static struct constant_pool *
s390_find_pool(struct constant_pool * pool_list,rtx insn)8678 s390_find_pool (struct constant_pool *pool_list, rtx insn)
8679 {
8680   struct constant_pool *pool;
8681 
8682   for (pool = pool_list; pool; pool = pool->next)
8683     if (bitmap_bit_p (pool->insns, INSN_UID (insn)))
8684       break;
8685 
8686   return pool;
8687 }
8688 
8689 /* Add constant VAL of mode MODE to the constant pool POOL.  */
8690 
8691 static void
s390_add_constant(struct constant_pool * pool,rtx val,machine_mode mode)8692 s390_add_constant (struct constant_pool *pool, rtx val, machine_mode mode)
8693 {
8694   struct constant *c;
8695   int i;
8696 
8697   for (i = 0; i < NR_C_MODES; i++)
8698     if (constant_modes[i] == mode)
8699       break;
8700   gcc_assert (i != NR_C_MODES);
8701 
8702   for (c = pool->constants[i]; c != NULL; c = c->next)
8703     if (rtx_equal_p (val, c->value))
8704       break;
8705 
8706   if (c == NULL)
8707     {
8708       c = (struct constant *) xmalloc (sizeof *c);
8709       c->value = val;
8710       c->label = gen_label_rtx ();
8711       c->next = pool->constants[i];
8712       pool->constants[i] = c;
8713       pool->size += GET_MODE_SIZE (mode);
8714     }
8715 }
8716 
8717 /* Return an rtx that represents the offset of X from the start of
8718    pool POOL.  */
8719 
8720 static rtx
s390_pool_offset(struct constant_pool * pool,rtx x)8721 s390_pool_offset (struct constant_pool *pool, rtx x)
8722 {
8723   rtx label;
8724 
8725   label = gen_rtx_LABEL_REF (GET_MODE (x), pool->label);
8726   x = gen_rtx_UNSPEC (GET_MODE (x), gen_rtvec (2, x, label),
8727 		      UNSPEC_POOL_OFFSET);
8728   return gen_rtx_CONST (GET_MODE (x), x);
8729 }
8730 
8731 /* Find constant VAL of mode MODE in the constant pool POOL.
8732    Return an RTX describing the distance from the start of
8733    the pool to the location of the new constant.  */
8734 
8735 static rtx
s390_find_constant(struct constant_pool * pool,rtx val,machine_mode mode)8736 s390_find_constant (struct constant_pool *pool, rtx val,
8737 		    machine_mode mode)
8738 {
8739   struct constant *c;
8740   int i;
8741 
8742   for (i = 0; i < NR_C_MODES; i++)
8743     if (constant_modes[i] == mode)
8744       break;
8745   gcc_assert (i != NR_C_MODES);
8746 
8747   for (c = pool->constants[i]; c != NULL; c = c->next)
8748     if (rtx_equal_p (val, c->value))
8749       break;
8750 
8751   gcc_assert (c);
8752 
8753   return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8754 }
8755 
8756 /* Check whether INSN is an execute.  Return the label_ref to its
8757    execute target template if so, NULL_RTX otherwise.  */
8758 
8759 static rtx
s390_execute_label(rtx insn)8760 s390_execute_label (rtx insn)
8761 {
8762   if (INSN_P (insn)
8763       && GET_CODE (PATTERN (insn)) == PARALLEL
8764       && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
8765       && (XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE
8766 	  || XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE_JUMP))
8767     {
8768       if (XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE)
8769 	return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2);
8770       else
8771 	{
8772 	  gcc_assert (JUMP_P (insn));
8773 	  /* For jump insns as execute target:
8774 	     - There is one operand less in the parallel (the
8775 	       modification register of the execute is always 0).
8776 	     - The execute target label is wrapped into an
8777 	       if_then_else in order to hide it from jump analysis.  */
8778 	  return XEXP (XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 0), 0);
8779 	}
8780     }
8781 
8782   return NULL_RTX;
8783 }
8784 
8785 /* Add execute target for INSN to the constant pool POOL.  */
8786 
8787 static void
s390_add_execute(struct constant_pool * pool,rtx insn)8788 s390_add_execute (struct constant_pool *pool, rtx insn)
8789 {
8790   struct constant *c;
8791 
8792   for (c = pool->execute; c != NULL; c = c->next)
8793     if (INSN_UID (insn) == INSN_UID (c->value))
8794       break;
8795 
8796   if (c == NULL)
8797     {
8798       c = (struct constant *) xmalloc (sizeof *c);
8799       c->value = insn;
8800       c->label = gen_label_rtx ();
8801       c->next = pool->execute;
8802       pool->execute = c;
8803       pool->size += 6;
8804     }
8805 }
8806 
8807 /* Find execute target for INSN in the constant pool POOL.
8808    Return an RTX describing the distance from the start of
8809    the pool to the location of the execute target.  */
8810 
8811 static rtx
s390_find_execute(struct constant_pool * pool,rtx insn)8812 s390_find_execute (struct constant_pool *pool, rtx insn)
8813 {
8814   struct constant *c;
8815 
8816   for (c = pool->execute; c != NULL; c = c->next)
8817     if (INSN_UID (insn) == INSN_UID (c->value))
8818       break;
8819 
8820   gcc_assert (c);
8821 
8822   return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8823 }
8824 
8825 /* For an execute INSN, extract the execute target template.  */
8826 
8827 static rtx
s390_execute_target(rtx insn)8828 s390_execute_target (rtx insn)
8829 {
8830   rtx pattern = PATTERN (insn);
8831   gcc_assert (s390_execute_label (insn));
8832 
8833   if (XVECLEN (pattern, 0) == 2)
8834     {
8835       pattern = copy_rtx (XVECEXP (pattern, 0, 1));
8836     }
8837   else
8838     {
8839       rtvec vec = rtvec_alloc (XVECLEN (pattern, 0) - 1);
8840       int i;
8841 
8842       for (i = 0; i < XVECLEN (pattern, 0) - 1; i++)
8843 	RTVEC_ELT (vec, i) = copy_rtx (XVECEXP (pattern, 0, i + 1));
8844 
8845       pattern = gen_rtx_PARALLEL (VOIDmode, vec);
8846     }
8847 
8848   return pattern;
8849 }
8850 
8851 /* Indicate that INSN cannot be duplicated.  This is the case for
8852    execute insns that carry a unique label.  */
8853 
8854 static bool
s390_cannot_copy_insn_p(rtx_insn * insn)8855 s390_cannot_copy_insn_p (rtx_insn *insn)
8856 {
8857   rtx label = s390_execute_label (insn);
8858   return label && label != const0_rtx;
8859 }
8860 
8861 /* Dump out the constants in POOL.  If REMOTE_LABEL is true,
8862    do not emit the pool base label.  */
8863 
8864 static void
s390_dump_pool(struct constant_pool * pool,bool remote_label)8865 s390_dump_pool (struct constant_pool *pool, bool remote_label)
8866 {
8867   struct constant *c;
8868   rtx_insn *insn = pool->pool_insn;
8869   int i;
8870 
8871   /* Switch to rodata section.  */
8872   if (TARGET_CPU_ZARCH)
8873     {
8874       insn = emit_insn_after (gen_pool_section_start (), insn);
8875       INSN_ADDRESSES_NEW (insn, -1);
8876     }
8877 
8878   /* Ensure minimum pool alignment.  */
8879   if (TARGET_CPU_ZARCH)
8880     insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn);
8881   else
8882     insn = emit_insn_after (gen_pool_align (GEN_INT (4)), insn);
8883   INSN_ADDRESSES_NEW (insn, -1);
8884 
8885   /* Emit pool base label.  */
8886   if (!remote_label)
8887     {
8888       insn = emit_label_after (pool->label, insn);
8889       INSN_ADDRESSES_NEW (insn, -1);
8890     }
8891 
8892   /* Dump constants in descending alignment requirement order,
8893      ensuring proper alignment for every constant.  */
8894   for (i = 0; i < NR_C_MODES; i++)
8895     for (c = pool->constants[i]; c; c = c->next)
8896       {
8897 	/* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references.  */
8898 	rtx value = copy_rtx (c->value);
8899 	if (GET_CODE (value) == CONST
8900 	    && GET_CODE (XEXP (value, 0)) == UNSPEC
8901 	    && XINT (XEXP (value, 0), 1) == UNSPEC_LTREL_OFFSET
8902 	    && XVECLEN (XEXP (value, 0), 0) == 1)
8903 	  value = s390_pool_offset (pool, XVECEXP (XEXP (value, 0), 0, 0));
8904 
8905 	insn = emit_label_after (c->label, insn);
8906 	INSN_ADDRESSES_NEW (insn, -1);
8907 
8908 	value = gen_rtx_UNSPEC_VOLATILE (constant_modes[i],
8909 					 gen_rtvec (1, value),
8910 					 UNSPECV_POOL_ENTRY);
8911 	insn = emit_insn_after (value, insn);
8912 	INSN_ADDRESSES_NEW (insn, -1);
8913       }
8914 
8915   /* Ensure minimum alignment for instructions.  */
8916   insn = emit_insn_after (gen_pool_align (GEN_INT (2)), insn);
8917   INSN_ADDRESSES_NEW (insn, -1);
8918 
8919   /* Output in-pool execute template insns.  */
8920   for (c = pool->execute; c; c = c->next)
8921     {
8922       insn = emit_label_after (c->label, insn);
8923       INSN_ADDRESSES_NEW (insn, -1);
8924 
8925       insn = emit_insn_after (s390_execute_target (c->value), insn);
8926       INSN_ADDRESSES_NEW (insn, -1);
8927     }
8928 
8929   /* Switch back to previous section.  */
8930   if (TARGET_CPU_ZARCH)
8931     {
8932       insn = emit_insn_after (gen_pool_section_end (), insn);
8933       INSN_ADDRESSES_NEW (insn, -1);
8934     }
8935 
8936   insn = emit_barrier_after (insn);
8937   INSN_ADDRESSES_NEW (insn, -1);
8938 
8939   /* Remove placeholder insn.  */
8940   remove_insn (pool->pool_insn);
8941 }
8942 
8943 /* Free all memory used by POOL.  */
8944 
8945 static void
s390_free_pool(struct constant_pool * pool)8946 s390_free_pool (struct constant_pool *pool)
8947 {
8948   struct constant *c, *next;
8949   int i;
8950 
8951   for (i = 0; i < NR_C_MODES; i++)
8952     for (c = pool->constants[i]; c; c = next)
8953       {
8954 	next = c->next;
8955 	free (c);
8956       }
8957 
8958   for (c = pool->execute; c; c = next)
8959     {
8960       next = c->next;
8961       free (c);
8962     }
8963 
8964   BITMAP_FREE (pool->insns);
8965   free (pool);
8966 }
8967 
8968 
8969 /* Collect main literal pool.  Return NULL on overflow.  */
8970 
8971 static struct constant_pool *
s390_mainpool_start(void)8972 s390_mainpool_start (void)
8973 {
8974   struct constant_pool *pool;
8975   rtx_insn *insn;
8976 
8977   pool = s390_alloc_pool ();
8978 
8979   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8980     {
8981       if (NONJUMP_INSN_P (insn)
8982 	  && GET_CODE (PATTERN (insn)) == SET
8983 	  && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC_VOLATILE
8984 	  && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPECV_MAIN_POOL)
8985 	{
8986 	  /* There might be two main_pool instructions if base_reg
8987 	     is call-clobbered; one for shrink-wrapped code and one
8988 	     for the rest.  We want to keep the first.  */
8989 	  if (pool->pool_insn)
8990 	    {
8991 	      insn = PREV_INSN (insn);
8992 	      delete_insn (NEXT_INSN (insn));
8993 	      continue;
8994 	    }
8995 	  pool->pool_insn = insn;
8996 	}
8997 
8998       if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
8999 	{
9000 	  s390_add_execute (pool, insn);
9001 	}
9002       else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9003 	{
9004 	  rtx pool_ref = NULL_RTX;
9005 	  find_constant_pool_ref (PATTERN (insn), &pool_ref);
9006 	  if (pool_ref)
9007 	    {
9008 	      rtx constant = get_pool_constant (pool_ref);
9009 	      machine_mode mode = get_pool_mode (pool_ref);
9010 	      s390_add_constant (pool, constant, mode);
9011 	    }
9012 	}
9013 
9014       /* If hot/cold partitioning is enabled we have to make sure that
9015 	 the literal pool is emitted in the same section where the
9016 	 initialization of the literal pool base pointer takes place.
9017 	 emit_pool_after is only used in the non-overflow case on non
9018 	 Z cpus where we can emit the literal pool at the end of the
9019 	 function body within the text section.  */
9020       if (NOTE_P (insn)
9021 	  && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS
9022 	  && !pool->emit_pool_after)
9023 	pool->emit_pool_after = PREV_INSN (insn);
9024     }
9025 
9026   gcc_assert (pool->pool_insn || pool->size == 0);
9027 
9028   if (pool->size >= 4096)
9029     {
9030       /* We're going to chunkify the pool, so remove the main
9031 	 pool placeholder insn.  */
9032       remove_insn (pool->pool_insn);
9033 
9034       s390_free_pool (pool);
9035       pool = NULL;
9036     }
9037 
9038   /* If the functions ends with the section where the literal pool
9039      should be emitted set the marker to its end.  */
9040   if (pool && !pool->emit_pool_after)
9041     pool->emit_pool_after = get_last_insn ();
9042 
9043   return pool;
9044 }
9045 
9046 /* POOL holds the main literal pool as collected by s390_mainpool_start.
9047    Modify the current function to output the pool constants as well as
9048    the pool register setup instruction.  */
9049 
9050 static void
s390_mainpool_finish(struct constant_pool * pool)9051 s390_mainpool_finish (struct constant_pool *pool)
9052 {
9053   rtx base_reg = cfun->machine->base_reg;
9054 
9055   /* If the pool is empty, we're done.  */
9056   if (pool->size == 0)
9057     {
9058       /* We don't actually need a base register after all.  */
9059       cfun->machine->base_reg = NULL_RTX;
9060 
9061       if (pool->pool_insn)
9062 	remove_insn (pool->pool_insn);
9063       s390_free_pool (pool);
9064       return;
9065     }
9066 
9067   /* We need correct insn addresses.  */
9068   shorten_branches (get_insns ());
9069 
9070   /* On zSeries, we use a LARL to load the pool register.  The pool is
9071      located in the .rodata section, so we emit it after the function.  */
9072   if (TARGET_CPU_ZARCH)
9073     {
9074       rtx set = gen_main_base_64 (base_reg, pool->label);
9075       rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
9076       INSN_ADDRESSES_NEW (insn, -1);
9077       remove_insn (pool->pool_insn);
9078 
9079       insn = get_last_insn ();
9080       pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
9081       INSN_ADDRESSES_NEW (pool->pool_insn, -1);
9082 
9083       s390_dump_pool (pool, 0);
9084     }
9085 
9086   /* On S/390, if the total size of the function's code plus literal pool
9087      does not exceed 4096 bytes, we use BASR to set up a function base
9088      pointer, and emit the literal pool at the end of the function.  */
9089   else if (INSN_ADDRESSES (INSN_UID (pool->emit_pool_after))
9090 	   + pool->size + 8 /* alignment slop */ < 4096)
9091     {
9092       rtx set = gen_main_base_31_small (base_reg, pool->label);
9093       rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
9094       INSN_ADDRESSES_NEW (insn, -1);
9095       remove_insn (pool->pool_insn);
9096 
9097       insn = emit_label_after (pool->label, insn);
9098       INSN_ADDRESSES_NEW (insn, -1);
9099 
9100       /* emit_pool_after will be set by s390_mainpool_start to the
9101 	 last insn of the section where the literal pool should be
9102 	 emitted.  */
9103       insn = pool->emit_pool_after;
9104 
9105       pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
9106       INSN_ADDRESSES_NEW (pool->pool_insn, -1);
9107 
9108       s390_dump_pool (pool, 1);
9109     }
9110 
9111   /* Otherwise, we emit an inline literal pool and use BASR to branch
9112      over it, setting up the pool register at the same time.  */
9113   else
9114     {
9115       rtx_code_label *pool_end = gen_label_rtx ();
9116 
9117       rtx pat = gen_main_base_31_large (base_reg, pool->label, pool_end);
9118       rtx_insn *insn = emit_jump_insn_after (pat, pool->pool_insn);
9119       JUMP_LABEL (insn) = pool_end;
9120       INSN_ADDRESSES_NEW (insn, -1);
9121       remove_insn (pool->pool_insn);
9122 
9123       insn = emit_label_after (pool->label, insn);
9124       INSN_ADDRESSES_NEW (insn, -1);
9125 
9126       pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
9127       INSN_ADDRESSES_NEW (pool->pool_insn, -1);
9128 
9129       insn = emit_label_after (pool_end, pool->pool_insn);
9130       INSN_ADDRESSES_NEW (insn, -1);
9131 
9132       s390_dump_pool (pool, 1);
9133     }
9134 
9135 
9136   /* Replace all literal pool references.  */
9137 
9138   for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
9139     {
9140       if (INSN_P (insn))
9141 	replace_ltrel_base (&PATTERN (insn));
9142 
9143       if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9144         {
9145           rtx addr, pool_ref = NULL_RTX;
9146           find_constant_pool_ref (PATTERN (insn), &pool_ref);
9147           if (pool_ref)
9148             {
9149 	      if (s390_execute_label (insn))
9150 		addr = s390_find_execute (pool, insn);
9151 	      else
9152 		addr = s390_find_constant (pool, get_pool_constant (pool_ref),
9153 						 get_pool_mode (pool_ref));
9154 
9155               replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
9156               INSN_CODE (insn) = -1;
9157             }
9158         }
9159     }
9160 
9161 
9162   /* Free the pool.  */
9163   s390_free_pool (pool);
9164 }
9165 
9166 /* POOL holds the main literal pool as collected by s390_mainpool_start.
9167    We have decided we cannot use this pool, so revert all changes
9168    to the current function that were done by s390_mainpool_start.  */
9169 static void
s390_mainpool_cancel(struct constant_pool * pool)9170 s390_mainpool_cancel (struct constant_pool *pool)
9171 {
9172   /* We didn't actually change the instruction stream, so simply
9173      free the pool memory.  */
9174   s390_free_pool (pool);
9175 }
9176 
9177 
9178 /* Chunkify the literal pool.  */
9179 
9180 #define S390_POOL_CHUNK_MIN	0xc00
9181 #define S390_POOL_CHUNK_MAX	0xe00
9182 
9183 static struct constant_pool *
s390_chunkify_start(void)9184 s390_chunkify_start (void)
9185 {
9186   struct constant_pool *curr_pool = NULL, *pool_list = NULL;
9187   int extra_size = 0;
9188   bitmap far_labels;
9189   rtx pending_ltrel = NULL_RTX;
9190   rtx_insn *insn;
9191 
9192   rtx (*gen_reload_base) (rtx, rtx) =
9193     TARGET_CPU_ZARCH? gen_reload_base_64 : gen_reload_base_31;
9194 
9195 
9196   /* We need correct insn addresses.  */
9197 
9198   shorten_branches (get_insns ());
9199 
9200   /* Scan all insns and move literals to pool chunks.  */
9201 
9202   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9203     {
9204       bool section_switch_p = false;
9205 
9206       /* Check for pending LTREL_BASE.  */
9207       if (INSN_P (insn))
9208 	{
9209 	  rtx ltrel_base = find_ltrel_base (PATTERN (insn));
9210 	  if (ltrel_base)
9211 	    {
9212 	      gcc_assert (ltrel_base == pending_ltrel);
9213 	      pending_ltrel = NULL_RTX;
9214 	    }
9215 	}
9216 
9217       if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
9218 	{
9219 	  if (!curr_pool)
9220 	    curr_pool = s390_start_pool (&pool_list, insn);
9221 
9222 	  s390_add_execute (curr_pool, insn);
9223 	  s390_add_pool_insn (curr_pool, insn);
9224 	}
9225       else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9226 	{
9227 	  rtx pool_ref = NULL_RTX;
9228 	  find_constant_pool_ref (PATTERN (insn), &pool_ref);
9229 	  if (pool_ref)
9230 	    {
9231 	      rtx constant = get_pool_constant (pool_ref);
9232 	      machine_mode mode = get_pool_mode (pool_ref);
9233 
9234 	      if (!curr_pool)
9235 		curr_pool = s390_start_pool (&pool_list, insn);
9236 
9237 	      s390_add_constant (curr_pool, constant, mode);
9238 	      s390_add_pool_insn (curr_pool, insn);
9239 
9240 	      /* Don't split the pool chunk between a LTREL_OFFSET load
9241 		 and the corresponding LTREL_BASE.  */
9242 	      if (GET_CODE (constant) == CONST
9243 		  && GET_CODE (XEXP (constant, 0)) == UNSPEC
9244 		  && XINT (XEXP (constant, 0), 1) == UNSPEC_LTREL_OFFSET)
9245 		{
9246 		  gcc_assert (!pending_ltrel);
9247 		  pending_ltrel = pool_ref;
9248 		}
9249 	    }
9250 	}
9251 
9252       if (JUMP_P (insn) || JUMP_TABLE_DATA_P (insn) || LABEL_P (insn))
9253 	{
9254 	  if (curr_pool)
9255 	    s390_add_pool_insn (curr_pool, insn);
9256 	  /* An LTREL_BASE must follow within the same basic block.  */
9257 	  gcc_assert (!pending_ltrel);
9258 	}
9259 
9260       if (NOTE_P (insn))
9261 	switch (NOTE_KIND (insn))
9262 	  {
9263 	  case NOTE_INSN_SWITCH_TEXT_SECTIONS:
9264 	    section_switch_p = true;
9265 	    break;
9266 	  case NOTE_INSN_VAR_LOCATION:
9267 	    continue;
9268 	  default:
9269 	    break;
9270 	  }
9271 
9272       if (!curr_pool
9273 	  || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn)
9274           || INSN_ADDRESSES (INSN_UID (insn)) == -1)
9275 	continue;
9276 
9277       if (TARGET_CPU_ZARCH)
9278 	{
9279 	  if (curr_pool->size < S390_POOL_CHUNK_MAX)
9280 	    continue;
9281 
9282 	  s390_end_pool (curr_pool, NULL);
9283 	  curr_pool = NULL;
9284 	}
9285       else
9286 	{
9287           int chunk_size = INSN_ADDRESSES (INSN_UID (insn))
9288 			   - INSN_ADDRESSES (INSN_UID (curr_pool->first_insn))
9289 			 + extra_size;
9290 
9291 	  /* We will later have to insert base register reload insns.
9292 	     Those will have an effect on code size, which we need to
9293 	     consider here.  This calculation makes rather pessimistic
9294 	     worst-case assumptions.  */
9295 	  if (LABEL_P (insn))
9296 	    extra_size += 6;
9297 
9298 	  if (chunk_size < S390_POOL_CHUNK_MIN
9299 	      && curr_pool->size < S390_POOL_CHUNK_MIN
9300 	      && !section_switch_p)
9301 	    continue;
9302 
9303 	  /* Pool chunks can only be inserted after BARRIERs ...  */
9304 	  if (BARRIER_P (insn))
9305 	    {
9306 	      s390_end_pool (curr_pool, insn);
9307 	      curr_pool = NULL;
9308 	      extra_size = 0;
9309 	    }
9310 
9311 	  /* ... so if we don't find one in time, create one.  */
9312           else if (chunk_size > S390_POOL_CHUNK_MAX
9313 	           || curr_pool->size > S390_POOL_CHUNK_MAX
9314 		   || section_switch_p)
9315 	    {
9316 	      rtx_insn *label, *jump, *barrier, *next, *prev;
9317 
9318 	      if (!section_switch_p)
9319 		{
9320 		  /* We can insert the barrier only after a 'real' insn.  */
9321 		  if (! NONJUMP_INSN_P (insn) && ! CALL_P (insn))
9322 		    continue;
9323 		  if (get_attr_length (insn) == 0)
9324 		    continue;
9325 		  /* Don't separate LTREL_BASE from the corresponding
9326 		     LTREL_OFFSET load.  */
9327 		  if (pending_ltrel)
9328 		    continue;
9329 		  next = insn;
9330 		  do
9331 		    {
9332 		      insn = next;
9333 		      next = NEXT_INSN (insn);
9334 		    }
9335 		  while (next
9336 			 && NOTE_P (next)
9337 			 && NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION);
9338 		}
9339 	      else
9340 		{
9341 		  gcc_assert (!pending_ltrel);
9342 
9343 		  /* The old pool has to end before the section switch
9344 		     note in order to make it part of the current
9345 		     section.  */
9346 		  insn = PREV_INSN (insn);
9347 		}
9348 
9349 	      label = gen_label_rtx ();
9350 	      prev = insn;
9351 	      if (prev && NOTE_P (prev))
9352 		prev = prev_nonnote_insn (prev);
9353 	      if (prev)
9354 		jump = emit_jump_insn_after_setloc (gen_jump (label), insn,
9355 						    INSN_LOCATION (prev));
9356 	      else
9357 		jump = emit_jump_insn_after_noloc (gen_jump (label), insn);
9358 	      barrier = emit_barrier_after (jump);
9359 	      insn = emit_label_after (label, barrier);
9360 	      JUMP_LABEL (jump) = label;
9361 	      LABEL_NUSES (label) = 1;
9362 
9363 	      INSN_ADDRESSES_NEW (jump, -1);
9364 	      INSN_ADDRESSES_NEW (barrier, -1);
9365 	      INSN_ADDRESSES_NEW (insn, -1);
9366 
9367 	      s390_end_pool (curr_pool, barrier);
9368 	      curr_pool = NULL;
9369 	      extra_size = 0;
9370 	    }
9371 	}
9372     }
9373 
9374   if (curr_pool)
9375     s390_end_pool (curr_pool, NULL);
9376   gcc_assert (!pending_ltrel);
9377 
9378   /* Find all labels that are branched into
9379      from an insn belonging to a different chunk.  */
9380 
9381   far_labels = BITMAP_ALLOC (NULL);
9382 
9383   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9384     {
9385       rtx_jump_table_data *table;
9386 
9387       /* Labels marked with LABEL_PRESERVE_P can be target
9388 	 of non-local jumps, so we have to mark them.
9389 	 The same holds for named labels.
9390 
9391 	 Don't do that, however, if it is the label before
9392 	 a jump table.  */
9393 
9394       if (LABEL_P (insn)
9395 	  && (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn)))
9396 	{
9397 	  rtx_insn *vec_insn = NEXT_INSN (insn);
9398 	  if (! vec_insn || ! JUMP_TABLE_DATA_P (vec_insn))
9399 	    bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn));
9400 	}
9401       /* Check potential targets in a table jump (casesi_jump).  */
9402       else if (tablejump_p (insn, NULL, &table))
9403 	{
9404 	  rtx vec_pat = PATTERN (table);
9405 	  int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
9406 
9407 	  for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
9408 	    {
9409 	      rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
9410 
9411 	      if (s390_find_pool (pool_list, label)
9412 		  != s390_find_pool (pool_list, insn))
9413 		bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
9414 	    }
9415 	}
9416       /* If we have a direct jump (conditional or unconditional),
9417 	 check all potential targets.  */
9418       else if (JUMP_P (insn))
9419 	{
9420 	  rtx pat = PATTERN (insn);
9421 
9422 	  if (GET_CODE (pat) == PARALLEL)
9423 	    pat = XVECEXP (pat, 0, 0);
9424 
9425 	  if (GET_CODE (pat) == SET)
9426 	    {
9427 	      rtx label = JUMP_LABEL (insn);
9428 	      if (label && !ANY_RETURN_P (label))
9429 		{
9430 		  if (s390_find_pool (pool_list, label)
9431 		      != s390_find_pool (pool_list, insn))
9432 		    bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
9433 		}
9434 	    }
9435 	}
9436     }
9437 
9438   /* Insert base register reload insns before every pool.  */
9439 
9440   for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9441     {
9442       rtx new_insn = gen_reload_base (cfun->machine->base_reg,
9443 				      curr_pool->label);
9444       rtx_insn *insn = curr_pool->first_insn;
9445       INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1);
9446     }
9447 
9448   /* Insert base register reload insns at every far label.  */
9449 
9450   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9451     if (LABEL_P (insn)
9452         && bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn)))
9453       {
9454 	struct constant_pool *pool = s390_find_pool (pool_list, insn);
9455 	if (pool)
9456 	  {
9457 	    rtx new_insn = gen_reload_base (cfun->machine->base_reg,
9458 					    pool->label);
9459 	    INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1);
9460 	  }
9461       }
9462 
9463 
9464   BITMAP_FREE (far_labels);
9465 
9466 
9467   /* Recompute insn addresses.  */
9468 
9469   init_insn_lengths ();
9470   shorten_branches (get_insns ());
9471 
9472   return pool_list;
9473 }
9474 
9475 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
9476    After we have decided to use this list, finish implementing
9477    all changes to the current function as required.  */
9478 
9479 static void
s390_chunkify_finish(struct constant_pool * pool_list)9480 s390_chunkify_finish (struct constant_pool *pool_list)
9481 {
9482   struct constant_pool *curr_pool = NULL;
9483   rtx_insn *insn;
9484 
9485 
9486   /* Replace all literal pool references.  */
9487 
9488   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9489     {
9490       if (INSN_P (insn))
9491 	replace_ltrel_base (&PATTERN (insn));
9492 
9493       curr_pool = s390_find_pool (pool_list, insn);
9494       if (!curr_pool)
9495 	continue;
9496 
9497       if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9498         {
9499           rtx addr, pool_ref = NULL_RTX;
9500           find_constant_pool_ref (PATTERN (insn), &pool_ref);
9501           if (pool_ref)
9502             {
9503 	      if (s390_execute_label (insn))
9504 		addr = s390_find_execute (curr_pool, insn);
9505 	      else
9506 		addr = s390_find_constant (curr_pool,
9507 					   get_pool_constant (pool_ref),
9508 					   get_pool_mode (pool_ref));
9509 
9510               replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
9511               INSN_CODE (insn) = -1;
9512             }
9513         }
9514     }
9515 
9516   /* Dump out all literal pools.  */
9517 
9518   for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9519     s390_dump_pool (curr_pool, 0);
9520 
9521   /* Free pool list.  */
9522 
9523   while (pool_list)
9524     {
9525       struct constant_pool *next = pool_list->next;
9526       s390_free_pool (pool_list);
9527       pool_list = next;
9528     }
9529 }
9530 
9531 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
9532    We have decided we cannot use this list, so revert all changes
9533    to the current function that were done by s390_chunkify_start.  */
9534 
9535 static void
s390_chunkify_cancel(struct constant_pool * pool_list)9536 s390_chunkify_cancel (struct constant_pool *pool_list)
9537 {
9538   struct constant_pool *curr_pool = NULL;
9539   rtx_insn *insn;
9540 
9541   /* Remove all pool placeholder insns.  */
9542 
9543   for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9544     {
9545       /* Did we insert an extra barrier?  Remove it.  */
9546       rtx_insn *barrier = PREV_INSN (curr_pool->pool_insn);
9547       rtx_insn *jump = barrier? PREV_INSN (barrier) : NULL;
9548       rtx_insn *label = NEXT_INSN (curr_pool->pool_insn);
9549 
9550       if (jump && JUMP_P (jump)
9551 	  && barrier && BARRIER_P (barrier)
9552 	  && label && LABEL_P (label)
9553 	  && GET_CODE (PATTERN (jump)) == SET
9554 	  && SET_DEST (PATTERN (jump)) == pc_rtx
9555 	  && GET_CODE (SET_SRC (PATTERN (jump))) == LABEL_REF
9556 	  && XEXP (SET_SRC (PATTERN (jump)), 0) == label)
9557 	{
9558 	  remove_insn (jump);
9559 	  remove_insn (barrier);
9560 	  remove_insn (label);
9561 	}
9562 
9563       remove_insn (curr_pool->pool_insn);
9564     }
9565 
9566   /* Remove all base register reload insns.  */
9567 
9568   for (insn = get_insns (); insn; )
9569     {
9570       rtx_insn *next_insn = NEXT_INSN (insn);
9571 
9572       if (NONJUMP_INSN_P (insn)
9573 	  && GET_CODE (PATTERN (insn)) == SET
9574 	  && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
9575 	  && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_RELOAD_BASE)
9576 	remove_insn (insn);
9577 
9578       insn = next_insn;
9579     }
9580 
9581   /* Free pool list.  */
9582 
9583   while (pool_list)
9584     {
9585       struct constant_pool *next = pool_list->next;
9586       s390_free_pool (pool_list);
9587       pool_list = next;
9588     }
9589 }
9590 
9591 /* Output the constant pool entry EXP in mode MODE with alignment ALIGN.  */
9592 
9593 void
s390_output_pool_entry(rtx exp,machine_mode mode,unsigned int align)9594 s390_output_pool_entry (rtx exp, machine_mode mode, unsigned int align)
9595 {
9596   switch (GET_MODE_CLASS (mode))
9597     {
9598     case MODE_FLOAT:
9599     case MODE_DECIMAL_FLOAT:
9600       gcc_assert (GET_CODE (exp) == CONST_DOUBLE);
9601 
9602       assemble_real (*CONST_DOUBLE_REAL_VALUE (exp),
9603 		     as_a <scalar_float_mode> (mode), align);
9604       break;
9605 
9606     case MODE_INT:
9607       assemble_integer (exp, GET_MODE_SIZE (mode), align, 1);
9608       mark_symbol_refs_as_used (exp);
9609       break;
9610 
9611     case MODE_VECTOR_INT:
9612     case MODE_VECTOR_FLOAT:
9613       {
9614 	int i;
9615 	machine_mode inner_mode;
9616 	gcc_assert (GET_CODE (exp) == CONST_VECTOR);
9617 
9618 	inner_mode = GET_MODE_INNER (GET_MODE (exp));
9619 	for (i = 0; i < XVECLEN (exp, 0); i++)
9620 	  s390_output_pool_entry (XVECEXP (exp, 0, i),
9621 				  inner_mode,
9622 				  i == 0
9623 				  ? align
9624 				  : GET_MODE_BITSIZE (inner_mode));
9625       }
9626       break;
9627 
9628     default:
9629       gcc_unreachable ();
9630     }
9631 }
9632 
9633 
9634 /* Return an RTL expression representing the value of the return address
9635    for the frame COUNT steps up from the current frame.  FRAME is the
9636    frame pointer of that frame.  */
9637 
9638 rtx
s390_return_addr_rtx(int count,rtx frame ATTRIBUTE_UNUSED)9639 s390_return_addr_rtx (int count, rtx frame ATTRIBUTE_UNUSED)
9640 {
9641   int offset;
9642   rtx addr;
9643 
9644   /* Without backchain, we fail for all but the current frame.  */
9645 
9646   if (!TARGET_BACKCHAIN && count > 0)
9647     return NULL_RTX;
9648 
9649   /* For the current frame, we need to make sure the initial
9650      value of RETURN_REGNUM is actually saved.  */
9651 
9652   if (count == 0)
9653     {
9654       /* On non-z architectures branch splitting could overwrite r14.  */
9655       if (TARGET_CPU_ZARCH)
9656 	return get_hard_reg_initial_val (Pmode, RETURN_REGNUM);
9657       else
9658 	{
9659 	  cfun_frame_layout.save_return_addr_p = true;
9660 	  return gen_rtx_MEM (Pmode, return_address_pointer_rtx);
9661 	}
9662     }
9663 
9664   if (TARGET_PACKED_STACK)
9665     offset = -2 * UNITS_PER_LONG;
9666   else
9667     offset = RETURN_REGNUM * UNITS_PER_LONG;
9668 
9669   addr = plus_constant (Pmode, frame, offset);
9670   addr = memory_address (Pmode, addr);
9671   return gen_rtx_MEM (Pmode, addr);
9672 }
9673 
9674 /* Return an RTL expression representing the back chain stored in
9675    the current stack frame.  */
9676 
9677 rtx
s390_back_chain_rtx(void)9678 s390_back_chain_rtx (void)
9679 {
9680   rtx chain;
9681 
9682   gcc_assert (TARGET_BACKCHAIN);
9683 
9684   if (TARGET_PACKED_STACK)
9685     chain = plus_constant (Pmode, stack_pointer_rtx,
9686 			   STACK_POINTER_OFFSET - UNITS_PER_LONG);
9687   else
9688     chain = stack_pointer_rtx;
9689 
9690   chain = gen_rtx_MEM (Pmode, chain);
9691   return chain;
9692 }
9693 
9694 /* Find first call clobbered register unused in a function.
9695    This could be used as base register in a leaf function
9696    or for holding the return address before epilogue.  */
9697 
9698 static int
find_unused_clobbered_reg(void)9699 find_unused_clobbered_reg (void)
9700 {
9701   int i;
9702   for (i = 0; i < 6; i++)
9703     if (!df_regs_ever_live_p (i))
9704       return i;
9705   return 0;
9706 }
9707 
9708 
9709 /* Helper function for s390_regs_ever_clobbered.  Sets the fields in DATA for all
9710    clobbered hard regs in SETREG.  */
9711 
9712 static void
s390_reg_clobbered_rtx(rtx setreg,const_rtx set_insn ATTRIBUTE_UNUSED,void * data)9713 s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data)
9714 {
9715   char *regs_ever_clobbered = (char *)data;
9716   unsigned int i, regno;
9717   machine_mode mode = GET_MODE (setreg);
9718 
9719   if (GET_CODE (setreg) == SUBREG)
9720     {
9721       rtx inner = SUBREG_REG (setreg);
9722       if (!GENERAL_REG_P (inner) && !FP_REG_P (inner))
9723 	return;
9724       regno = subreg_regno (setreg);
9725     }
9726   else if (GENERAL_REG_P (setreg) || FP_REG_P (setreg))
9727     regno = REGNO (setreg);
9728   else
9729     return;
9730 
9731   for (i = regno;
9732        i < end_hard_regno (mode, regno);
9733        i++)
9734     regs_ever_clobbered[i] = 1;
9735 }
9736 
9737 /* Walks through all basic blocks of the current function looking
9738    for clobbered hard regs using s390_reg_clobbered_rtx.  The fields
9739    of the passed integer array REGS_EVER_CLOBBERED are set to one for
9740    each of those regs.  */
9741 
9742 static void
s390_regs_ever_clobbered(char regs_ever_clobbered[])9743 s390_regs_ever_clobbered (char regs_ever_clobbered[])
9744 {
9745   basic_block cur_bb;
9746   rtx_insn *cur_insn;
9747   unsigned int i;
9748 
9749   memset (regs_ever_clobbered, 0, 32);
9750 
9751   /* For non-leaf functions we have to consider all call clobbered regs to be
9752      clobbered.  */
9753   if (!crtl->is_leaf)
9754     {
9755       for (i = 0; i < 32; i++)
9756 	regs_ever_clobbered[i] = call_really_used_regs[i];
9757     }
9758 
9759   /* Make the "magic" eh_return registers live if necessary.  For regs_ever_live
9760      this work is done by liveness analysis (mark_regs_live_at_end).
9761      Special care is needed for functions containing landing pads.  Landing pads
9762      may use the eh registers, but the code which sets these registers is not
9763      contained in that function.  Hence s390_regs_ever_clobbered is not able to
9764      deal with this automatically.  */
9765   if (crtl->calls_eh_return || cfun->machine->has_landing_pad_p)
9766     for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM ; i++)
9767       if (crtl->calls_eh_return
9768 	  || (cfun->machine->has_landing_pad_p
9769 	      && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i))))
9770 	regs_ever_clobbered[EH_RETURN_DATA_REGNO (i)] = 1;
9771 
9772   /* For nonlocal gotos all call-saved registers have to be saved.
9773      This flag is also set for the unwinding code in libgcc.
9774      See expand_builtin_unwind_init.  For regs_ever_live this is done by
9775      reload.  */
9776   if (crtl->saves_all_registers)
9777     for (i = 0; i < 32; i++)
9778       if (!call_really_used_regs[i])
9779 	regs_ever_clobbered[i] = 1;
9780 
9781   FOR_EACH_BB_FN (cur_bb, cfun)
9782     {
9783       FOR_BB_INSNS (cur_bb, cur_insn)
9784 	{
9785 	  rtx pat;
9786 
9787 	  if (!INSN_P (cur_insn))
9788 	    continue;
9789 
9790 	  pat = PATTERN (cur_insn);
9791 
9792 	  /* Ignore GPR restore insns.  */
9793 	  if (epilogue_completed && RTX_FRAME_RELATED_P (cur_insn))
9794 	    {
9795 	      if (GET_CODE (pat) == SET
9796 		  && GENERAL_REG_P (SET_DEST (pat)))
9797 		{
9798 		  /* lgdr  */
9799 		  if (GET_MODE (SET_SRC (pat)) == DImode
9800 		      && FP_REG_P (SET_SRC (pat)))
9801 		    continue;
9802 
9803 		  /* l / lg  */
9804 		  if (GET_CODE (SET_SRC (pat)) == MEM)
9805 		    continue;
9806 		}
9807 
9808 	      /* lm / lmg */
9809 	      if (GET_CODE (pat) == PARALLEL
9810 		  && load_multiple_operation (pat, VOIDmode))
9811 		continue;
9812 	    }
9813 
9814 	  note_stores (pat,
9815 		       s390_reg_clobbered_rtx,
9816 		       regs_ever_clobbered);
9817 	}
9818     }
9819 }
9820 
9821 /* Determine the frame area which actually has to be accessed
9822    in the function epilogue. The values are stored at the
9823    given pointers AREA_BOTTOM (address of the lowest used stack
9824    address) and AREA_TOP (address of the first item which does
9825    not belong to the stack frame).  */
9826 
9827 static void
s390_frame_area(int * area_bottom,int * area_top)9828 s390_frame_area (int *area_bottom, int *area_top)
9829 {
9830   int b, t;
9831 
9832   b = INT_MAX;
9833   t = INT_MIN;
9834 
9835   if (cfun_frame_layout.first_restore_gpr != -1)
9836     {
9837       b = (cfun_frame_layout.gprs_offset
9838 	   + cfun_frame_layout.first_restore_gpr * UNITS_PER_LONG);
9839       t = b + (cfun_frame_layout.last_restore_gpr
9840 	       - cfun_frame_layout.first_restore_gpr + 1) * UNITS_PER_LONG;
9841     }
9842 
9843   if (TARGET_64BIT && cfun_save_high_fprs_p)
9844     {
9845       b = MIN (b, cfun_frame_layout.f8_offset);
9846       t = MAX (t, (cfun_frame_layout.f8_offset
9847 		   + cfun_frame_layout.high_fprs * 8));
9848     }
9849 
9850   if (!TARGET_64BIT)
9851     {
9852       if (cfun_fpr_save_p (FPR4_REGNUM))
9853 	{
9854 	  b = MIN (b, cfun_frame_layout.f4_offset);
9855 	  t = MAX (t, cfun_frame_layout.f4_offset + 8);
9856 	}
9857       if (cfun_fpr_save_p (FPR6_REGNUM))
9858 	{
9859 	  b = MIN (b, cfun_frame_layout.f4_offset + 8);
9860 	  t = MAX (t, cfun_frame_layout.f4_offset + 16);
9861 	}
9862     }
9863   *area_bottom = b;
9864   *area_top = t;
9865 }
9866 /* Update gpr_save_slots in the frame layout trying to make use of
9867    FPRs as GPR save slots.
9868    This is a helper routine of s390_register_info.  */
9869 
9870 static void
s390_register_info_gprtofpr()9871 s390_register_info_gprtofpr ()
9872 {
9873   int save_reg_slot = FPR0_REGNUM;
9874   int i, j;
9875 
9876   if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
9877     return;
9878 
9879   /* builtin_eh_return needs to be able to modify the return address
9880      on the stack.  It could also adjust the FPR save slot instead but
9881      is it worth the trouble?!  */
9882   if (crtl->calls_eh_return)
9883     return;
9884 
9885   for (i = 15; i >= 6; i--)
9886     {
9887       if (cfun_gpr_save_slot (i) == SAVE_SLOT_NONE)
9888 	continue;
9889 
9890       /* Advance to the next FP register which can be used as a
9891 	 GPR save slot.  */
9892       while ((!call_really_used_regs[save_reg_slot]
9893 	      || df_regs_ever_live_p (save_reg_slot)
9894 	      || cfun_fpr_save_p (save_reg_slot))
9895 	     && FP_REGNO_P (save_reg_slot))
9896 	save_reg_slot++;
9897       if (!FP_REGNO_P (save_reg_slot))
9898 	{
9899 	  /* We only want to use ldgr/lgdr if we can get rid of
9900 	     stm/lm entirely.  So undo the gpr slot allocation in
9901 	     case we ran out of FPR save slots.  */
9902 	  for (j = 6; j <= 15; j++)
9903 	    if (FP_REGNO_P (cfun_gpr_save_slot (j)))
9904 	      cfun_gpr_save_slot (j) = SAVE_SLOT_STACK;
9905 	  break;
9906 	}
9907       cfun_gpr_save_slot (i) = save_reg_slot++;
9908     }
9909 }
9910 
9911 /* Set the bits in fpr_bitmap for FPRs which need to be saved due to
9912    stdarg.
9913    This is a helper routine for s390_register_info.  */
9914 
9915 static void
s390_register_info_stdarg_fpr()9916 s390_register_info_stdarg_fpr ()
9917 {
9918   int i;
9919   int min_fpr;
9920   int max_fpr;
9921 
9922   /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and
9923      f0-f4 for 64 bit.  */
9924   if (!cfun->stdarg
9925       || !TARGET_HARD_FLOAT
9926       || !cfun->va_list_fpr_size
9927       || crtl->args.info.fprs >= FP_ARG_NUM_REG)
9928     return;
9929 
9930   min_fpr = crtl->args.info.fprs;
9931   max_fpr = min_fpr + cfun->va_list_fpr_size - 1;
9932   if (max_fpr >= FP_ARG_NUM_REG)
9933     max_fpr = FP_ARG_NUM_REG - 1;
9934 
9935   /* FPR argument regs start at f0.  */
9936   min_fpr += FPR0_REGNUM;
9937   max_fpr += FPR0_REGNUM;
9938 
9939   for (i = min_fpr; i <= max_fpr; i++)
9940     cfun_set_fpr_save (i);
9941 }
9942 
9943 /* Reserve the GPR save slots for GPRs which need to be saved due to
9944    stdarg.
9945    This is a helper routine for s390_register_info.  */
9946 
9947 static void
s390_register_info_stdarg_gpr()9948 s390_register_info_stdarg_gpr ()
9949 {
9950   int i;
9951   int min_gpr;
9952   int max_gpr;
9953 
9954   if (!cfun->stdarg
9955       || !cfun->va_list_gpr_size
9956       || crtl->args.info.gprs >= GP_ARG_NUM_REG)
9957     return;
9958 
9959   min_gpr = crtl->args.info.gprs;
9960   max_gpr = min_gpr + cfun->va_list_gpr_size - 1;
9961   if (max_gpr >= GP_ARG_NUM_REG)
9962     max_gpr = GP_ARG_NUM_REG - 1;
9963 
9964   /* GPR argument regs start at r2.  */
9965   min_gpr += GPR2_REGNUM;
9966   max_gpr += GPR2_REGNUM;
9967 
9968   /* If r6 was supposed to be saved into an FPR and now needs to go to
9969      the stack for vararg we have to adjust the restore range to make
9970      sure that the restore is done from stack as well.  */
9971   if (FP_REGNO_P (cfun_gpr_save_slot (GPR6_REGNUM))
9972       && min_gpr <= GPR6_REGNUM
9973       && max_gpr >= GPR6_REGNUM)
9974     {
9975       if (cfun_frame_layout.first_restore_gpr == -1
9976 	  || cfun_frame_layout.first_restore_gpr > GPR6_REGNUM)
9977 	cfun_frame_layout.first_restore_gpr = GPR6_REGNUM;
9978       if (cfun_frame_layout.last_restore_gpr == -1
9979 	  || cfun_frame_layout.last_restore_gpr < GPR6_REGNUM)
9980 	cfun_frame_layout.last_restore_gpr = GPR6_REGNUM;
9981     }
9982 
9983   if (cfun_frame_layout.first_save_gpr == -1
9984       || cfun_frame_layout.first_save_gpr > min_gpr)
9985     cfun_frame_layout.first_save_gpr = min_gpr;
9986 
9987   if (cfun_frame_layout.last_save_gpr == -1
9988       || cfun_frame_layout.last_save_gpr < max_gpr)
9989     cfun_frame_layout.last_save_gpr = max_gpr;
9990 
9991   for (i = min_gpr; i <= max_gpr; i++)
9992     cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
9993 }
9994 
9995 /* Calculate the save and restore ranges for stm(g) and lm(g) in the
9996    prologue and epilogue.  */
9997 
9998 static void
s390_register_info_set_ranges()9999 s390_register_info_set_ranges ()
10000 {
10001   int i, j;
10002 
10003   /* Find the first and the last save slot supposed to use the stack
10004      to set the restore range.
10005      Vararg regs might be marked as save to stack but only the
10006      call-saved regs really need restoring (i.e. r6).  This code
10007      assumes that the vararg regs have not yet been recorded in
10008      cfun_gpr_save_slot.  */
10009   for (i = 0; i < 16 && cfun_gpr_save_slot (i) != SAVE_SLOT_STACK; i++);
10010   for (j = 15; j > i && cfun_gpr_save_slot (j) != SAVE_SLOT_STACK; j--);
10011   cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
10012   cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
10013   cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
10014   cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
10015 }
10016 
10017 /* The GPR and FPR save slots in cfun->machine->frame_layout are set
10018    for registers which need to be saved in function prologue.
10019    This function can be used until the insns emitted for save/restore
10020    of the regs are visible in the RTL stream.  */
10021 
10022 static void
s390_register_info()10023 s390_register_info ()
10024 {
10025   int i;
10026   char clobbered_regs[32];
10027 
10028   gcc_assert (!epilogue_completed);
10029 
10030   if (reload_completed)
10031     /* After reload we rely on our own routine to determine which
10032        registers need saving.  */
10033     s390_regs_ever_clobbered (clobbered_regs);
10034   else
10035     /* During reload we use regs_ever_live as a base since reload
10036        does changes in there which we otherwise would not be aware
10037        of.  */
10038     for (i = 0; i < 32; i++)
10039       clobbered_regs[i] = df_regs_ever_live_p (i);
10040 
10041   for (i = 0; i < 32; i++)
10042     clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
10043 
10044   /* Mark the call-saved FPRs which need to be saved.
10045      This needs to be done before checking the special GPRs since the
10046      stack pointer usage depends on whether high FPRs have to be saved
10047      or not.  */
10048   cfun_frame_layout.fpr_bitmap = 0;
10049   cfun_frame_layout.high_fprs = 0;
10050   for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
10051     if (clobbered_regs[i] && !call_really_used_regs[i])
10052       {
10053 	cfun_set_fpr_save (i);
10054 	if (i >= FPR8_REGNUM)
10055 	  cfun_frame_layout.high_fprs++;
10056       }
10057 
10058   /* Register 12 is used for GOT address, but also as temp in prologue
10059      for split-stack stdarg functions (unless r14 is available).  */
10060   clobbered_regs[12]
10061     |= ((flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
10062 	|| (flag_split_stack && cfun->stdarg
10063 	    && (crtl->is_leaf || TARGET_TPF_PROFILING
10064 		|| has_hard_reg_initial_val (Pmode, RETURN_REGNUM))));
10065 
10066   clobbered_regs[BASE_REGNUM]
10067     |= (cfun->machine->base_reg
10068 	&& REGNO (cfun->machine->base_reg) == BASE_REGNUM);
10069 
10070   clobbered_regs[HARD_FRAME_POINTER_REGNUM]
10071     |= !!frame_pointer_needed;
10072 
10073   /* On pre z900 machines this might take until machine dependent
10074      reorg to decide.
10075      save_return_addr_p will only be set on non-zarch machines so
10076      there is no risk that r14 goes into an FPR instead of a stack
10077      slot.  */
10078   clobbered_regs[RETURN_REGNUM]
10079     |= (!crtl->is_leaf
10080 	|| TARGET_TPF_PROFILING
10081 	|| cfun->machine->split_branches_pending_p
10082 	|| cfun_frame_layout.save_return_addr_p
10083 	|| crtl->calls_eh_return);
10084 
10085   clobbered_regs[STACK_POINTER_REGNUM]
10086     |= (!crtl->is_leaf
10087 	|| TARGET_TPF_PROFILING
10088 	|| cfun_save_high_fprs_p
10089 	|| get_frame_size () > 0
10090 	|| (reload_completed && cfun_frame_layout.frame_size > 0)
10091 	|| cfun->calls_alloca);
10092 
10093   memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 16);
10094 
10095   for (i = 6; i < 16; i++)
10096     if (clobbered_regs[i])
10097       cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
10098 
10099   s390_register_info_stdarg_fpr ();
10100   s390_register_info_gprtofpr ();
10101   s390_register_info_set_ranges ();
10102   /* stdarg functions might need to save GPRs 2 to 6.  This might
10103      override the GPR->FPR save decision made by
10104      s390_register_info_gprtofpr for r6 since vararg regs must go to
10105      the stack.  */
10106   s390_register_info_stdarg_gpr ();
10107 }
10108 
10109 /* This function is called by s390_optimize_prologue in order to get
10110    rid of unnecessary GPR save/restore instructions.  The register info
10111    for the GPRs is re-computed and the ranges are re-calculated.  */
10112 
10113 static void
s390_optimize_register_info()10114 s390_optimize_register_info ()
10115 {
10116   char clobbered_regs[32];
10117   int i;
10118 
10119   gcc_assert (epilogue_completed);
10120   gcc_assert (!cfun->machine->split_branches_pending_p);
10121 
10122   s390_regs_ever_clobbered (clobbered_regs);
10123 
10124   for (i = 0; i < 32; i++)
10125     clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
10126 
10127   /* There is still special treatment needed for cases invisible to
10128      s390_regs_ever_clobbered.  */
10129   clobbered_regs[RETURN_REGNUM]
10130     |= (TARGET_TPF_PROFILING
10131 	/* When expanding builtin_return_addr in ESA mode we do not
10132 	   know whether r14 will later be needed as scratch reg when
10133 	   doing branch splitting.  So the builtin always accesses the
10134 	   r14 save slot and we need to stick to the save/restore
10135 	   decision for r14 even if it turns out that it didn't get
10136 	   clobbered.  */
10137 	|| cfun_frame_layout.save_return_addr_p
10138 	|| crtl->calls_eh_return);
10139 
10140   memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 6);
10141 
10142   for (i = 6; i < 16; i++)
10143     if (!clobbered_regs[i])
10144       cfun_gpr_save_slot (i) = SAVE_SLOT_NONE;
10145 
10146   s390_register_info_set_ranges ();
10147   s390_register_info_stdarg_gpr ();
10148 }
10149 
10150 /* Fill cfun->machine with info about frame of current function.  */
10151 
10152 static void
s390_frame_info(void)10153 s390_frame_info (void)
10154 {
10155   HOST_WIDE_INT lowest_offset;
10156 
10157   cfun_frame_layout.first_save_gpr_slot = cfun_frame_layout.first_save_gpr;
10158   cfun_frame_layout.last_save_gpr_slot = cfun_frame_layout.last_save_gpr;
10159 
10160   /* The va_arg builtin uses a constant distance of 16 *
10161      UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area
10162      pointer.  So even if we are going to save the stack pointer in an
10163      FPR we need the stack space in order to keep the offsets
10164      correct.  */
10165   if (cfun->stdarg && cfun_save_arg_fprs_p)
10166     {
10167       cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
10168 
10169       if (cfun_frame_layout.first_save_gpr_slot == -1)
10170 	cfun_frame_layout.first_save_gpr_slot = STACK_POINTER_REGNUM;
10171     }
10172 
10173   cfun_frame_layout.frame_size = get_frame_size ();
10174   if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000)
10175     fatal_error (input_location,
10176 		 "total size of local variables exceeds architecture limit");
10177 
10178   if (!TARGET_PACKED_STACK)
10179     {
10180       /* Fixed stack layout.  */
10181       cfun_frame_layout.backchain_offset = 0;
10182       cfun_frame_layout.f0_offset = 16 * UNITS_PER_LONG;
10183       cfun_frame_layout.f4_offset = cfun_frame_layout.f0_offset + 2 * 8;
10184       cfun_frame_layout.f8_offset = -cfun_frame_layout.high_fprs * 8;
10185       cfun_frame_layout.gprs_offset = (cfun_frame_layout.first_save_gpr_slot
10186 				       * UNITS_PER_LONG);
10187     }
10188   else if (TARGET_BACKCHAIN)
10189     {
10190       /* Kernel stack layout - packed stack, backchain, no float  */
10191       gcc_assert (TARGET_SOFT_FLOAT);
10192       cfun_frame_layout.backchain_offset = (STACK_POINTER_OFFSET
10193 					    - UNITS_PER_LONG);
10194 
10195       /* The distance between the backchain and the return address
10196 	 save slot must not change.  So we always need a slot for the
10197 	 stack pointer which resides in between.  */
10198       cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
10199 
10200       cfun_frame_layout.gprs_offset
10201 	= cfun_frame_layout.backchain_offset - cfun_gprs_save_area_size;
10202 
10203       /* FPRs will not be saved.  Nevertheless pick sane values to
10204 	 keep area calculations valid.  */
10205       cfun_frame_layout.f0_offset =
10206 	cfun_frame_layout.f4_offset =
10207 	cfun_frame_layout.f8_offset = cfun_frame_layout.gprs_offset;
10208     }
10209   else
10210     {
10211       int num_fprs;
10212 
10213       /* Packed stack layout without backchain.  */
10214 
10215       /* With stdarg FPRs need their dedicated slots.  */
10216       num_fprs = (TARGET_64BIT && cfun->stdarg ? 2
10217 		  : (cfun_fpr_save_p (FPR4_REGNUM) +
10218 		     cfun_fpr_save_p (FPR6_REGNUM)));
10219       cfun_frame_layout.f4_offset = STACK_POINTER_OFFSET - 8 * num_fprs;
10220 
10221       num_fprs = (cfun->stdarg ? 2
10222 		  : (cfun_fpr_save_p (FPR0_REGNUM)
10223 		     + cfun_fpr_save_p (FPR2_REGNUM)));
10224       cfun_frame_layout.f0_offset = cfun_frame_layout.f4_offset - 8 * num_fprs;
10225 
10226       cfun_frame_layout.gprs_offset
10227 	= cfun_frame_layout.f0_offset - cfun_gprs_save_area_size;
10228 
10229       cfun_frame_layout.f8_offset = (cfun_frame_layout.gprs_offset
10230 				     - cfun_frame_layout.high_fprs * 8);
10231     }
10232 
10233   if (cfun_save_high_fprs_p)
10234     cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8;
10235 
10236   if (!crtl->is_leaf)
10237     cfun_frame_layout.frame_size += crtl->outgoing_args_size;
10238 
10239   /* In the following cases we have to allocate a STACK_POINTER_OFFSET
10240      sized area at the bottom of the stack.  This is required also for
10241      leaf functions.  When GCC generates a local stack reference it
10242      will always add STACK_POINTER_OFFSET to all these references.  */
10243   if (crtl->is_leaf
10244       && !TARGET_TPF_PROFILING
10245       && cfun_frame_layout.frame_size == 0
10246       && !cfun->calls_alloca)
10247     return;
10248 
10249   /* Calculate the number of bytes we have used in our own register
10250      save area.  With the packed stack layout we can re-use the
10251      remaining bytes for normal stack elements.  */
10252 
10253   if (TARGET_PACKED_STACK)
10254     lowest_offset = MIN (MIN (cfun_frame_layout.f0_offset,
10255 			      cfun_frame_layout.f4_offset),
10256 			 cfun_frame_layout.gprs_offset);
10257   else
10258     lowest_offset = 0;
10259 
10260   if (TARGET_BACKCHAIN)
10261     lowest_offset = MIN (lowest_offset, cfun_frame_layout.backchain_offset);
10262 
10263   cfun_frame_layout.frame_size += STACK_POINTER_OFFSET - lowest_offset;
10264 
10265   /* If under 31 bit an odd number of gprs has to be saved we have to
10266      adjust the frame size to sustain 8 byte alignment of stack
10267      frames.  */
10268   cfun_frame_layout.frame_size = ((cfun_frame_layout.frame_size +
10269 				   STACK_BOUNDARY / BITS_PER_UNIT - 1)
10270 				  & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1));
10271 }
10272 
10273 /* Generate frame layout.  Fills in register and frame data for the current
10274    function in cfun->machine.  This routine can be called multiple times;
10275    it will re-do the complete frame layout every time.  */
10276 
10277 static void
s390_init_frame_layout(void)10278 s390_init_frame_layout (void)
10279 {
10280   HOST_WIDE_INT frame_size;
10281   int base_used;
10282 
10283   /* After LRA the frame layout is supposed to be read-only and should
10284      not be re-computed.  */
10285   if (reload_completed)
10286     return;
10287 
10288   /* On S/390 machines, we may need to perform branch splitting, which
10289      will require both base and return address register.  We have no
10290      choice but to assume we're going to need them until right at the
10291      end of the machine dependent reorg phase.  */
10292   if (!TARGET_CPU_ZARCH)
10293     cfun->machine->split_branches_pending_p = true;
10294 
10295   do
10296     {
10297       frame_size = cfun_frame_layout.frame_size;
10298 
10299       /* Try to predict whether we'll need the base register.  */
10300       base_used = cfun->machine->split_branches_pending_p
10301 		  || crtl->uses_const_pool
10302 		  || (!DISP_IN_RANGE (frame_size)
10303 		      && !CONST_OK_FOR_K (frame_size));
10304 
10305       /* Decide which register to use as literal pool base.  In small
10306 	 leaf functions, try to use an unused call-clobbered register
10307 	 as base register to avoid save/restore overhead.  */
10308       if (!base_used)
10309 	cfun->machine->base_reg = NULL_RTX;
10310       else
10311 	{
10312 	  int br = 0;
10313 
10314 	  if (crtl->is_leaf)
10315 	    /* Prefer r5 (most likely to be free).  */
10316 	    for (br = 5; br >= 2 && df_regs_ever_live_p (br); br--)
10317 	      ;
10318 	  cfun->machine->base_reg =
10319 	    gen_rtx_REG (Pmode, (br >= 2) ? br : BASE_REGNUM);
10320 	}
10321 
10322       s390_register_info ();
10323       s390_frame_info ();
10324     }
10325   while (frame_size != cfun_frame_layout.frame_size);
10326 }
10327 
10328 /* Remove the FPR clobbers from a tbegin insn if it can be proven that
10329    the TX is nonescaping.  A transaction is considered escaping if
10330    there is at least one path from tbegin returning CC0 to the
10331    function exit block without an tend.
10332 
10333    The check so far has some limitations:
10334    - only single tbegin/tend BBs are supported
10335    - the first cond jump after tbegin must separate the CC0 path from ~CC0
10336    - when CC is copied to a GPR and the CC0 check is done with the GPR
10337      this is not supported
10338 */
10339 
10340 static void
s390_optimize_nonescaping_tx(void)10341 s390_optimize_nonescaping_tx (void)
10342 {
10343   const unsigned int CC0 = 1 << 3;
10344   basic_block tbegin_bb = NULL;
10345   basic_block tend_bb = NULL;
10346   basic_block bb;
10347   rtx_insn *insn;
10348   bool result = true;
10349   int bb_index;
10350   rtx_insn *tbegin_insn = NULL;
10351 
10352   if (!cfun->machine->tbegin_p)
10353     return;
10354 
10355   for (bb_index = 0; bb_index < n_basic_blocks_for_fn (cfun); bb_index++)
10356     {
10357       bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
10358 
10359       if (!bb)
10360 	continue;
10361 
10362       FOR_BB_INSNS (bb, insn)
10363 	{
10364 	  rtx ite, cc, pat, target;
10365 	  unsigned HOST_WIDE_INT mask;
10366 
10367 	  if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
10368 	    continue;
10369 
10370 	  pat = PATTERN (insn);
10371 
10372 	  if (GET_CODE (pat) == PARALLEL)
10373 	    pat = XVECEXP (pat, 0, 0);
10374 
10375 	  if (GET_CODE (pat) != SET
10376 	      || GET_CODE (SET_SRC (pat)) != UNSPEC_VOLATILE)
10377 	    continue;
10378 
10379 	  if (XINT (SET_SRC (pat), 1) == UNSPECV_TBEGIN)
10380 	    {
10381 	      rtx_insn *tmp;
10382 
10383 	      tbegin_insn = insn;
10384 
10385 	      /* Just return if the tbegin doesn't have clobbers.  */
10386 	      if (GET_CODE (PATTERN (insn)) != PARALLEL)
10387 		return;
10388 
10389 	      if (tbegin_bb != NULL)
10390 		return;
10391 
10392 	      /* Find the next conditional jump.  */
10393 	      for (tmp = NEXT_INSN (insn);
10394 		   tmp != NULL_RTX;
10395 		   tmp = NEXT_INSN (tmp))
10396 		{
10397 		  if (reg_set_p (gen_rtx_REG (CCmode, CC_REGNUM), tmp))
10398 		    return;
10399 		  if (!JUMP_P (tmp))
10400 		    continue;
10401 
10402 		  ite = SET_SRC (PATTERN (tmp));
10403 		  if (GET_CODE (ite) != IF_THEN_ELSE)
10404 		    continue;
10405 
10406 		  cc = XEXP (XEXP (ite, 0), 0);
10407 		  if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc))
10408 		      || GET_MODE (cc) != CCRAWmode
10409 		      || GET_CODE (XEXP (XEXP (ite, 0), 1)) != CONST_INT)
10410 		    return;
10411 
10412 		  if (bb->succs->length () != 2)
10413 		    return;
10414 
10415 		  mask = INTVAL (XEXP (XEXP (ite, 0), 1));
10416 		  if (GET_CODE (XEXP (ite, 0)) == NE)
10417 		    mask ^= 0xf;
10418 
10419 		  if (mask == CC0)
10420 		    target = XEXP (ite, 1);
10421 		  else if (mask == (CC0 ^ 0xf))
10422 		    target = XEXP (ite, 2);
10423 		  else
10424 		    return;
10425 
10426 		  {
10427 		    edge_iterator ei;
10428 		    edge e1, e2;
10429 
10430 		    ei = ei_start (bb->succs);
10431 		    e1 = ei_safe_edge (ei);
10432 		    ei_next (&ei);
10433 		    e2 = ei_safe_edge (ei);
10434 
10435 		    if (e2->flags & EDGE_FALLTHRU)
10436 		      {
10437 			e2 = e1;
10438 			e1 = ei_safe_edge (ei);
10439 		      }
10440 
10441 		    if (!(e1->flags & EDGE_FALLTHRU))
10442 		      return;
10443 
10444 		    tbegin_bb = (target == pc_rtx) ? e1->dest : e2->dest;
10445 		  }
10446 		  if (tmp == BB_END (bb))
10447 		    break;
10448 		}
10449 	    }
10450 
10451 	  if (XINT (SET_SRC (pat), 1) == UNSPECV_TEND)
10452 	    {
10453 	      if (tend_bb != NULL)
10454 		return;
10455 	      tend_bb = bb;
10456 	    }
10457 	}
10458     }
10459 
10460   /* Either we successfully remove the FPR clobbers here or we are not
10461      able to do anything for this TX.  Both cases don't qualify for
10462      another look.  */
10463   cfun->machine->tbegin_p = false;
10464 
10465   if (tbegin_bb == NULL || tend_bb == NULL)
10466     return;
10467 
10468   calculate_dominance_info (CDI_POST_DOMINATORS);
10469   result = dominated_by_p (CDI_POST_DOMINATORS, tbegin_bb, tend_bb);
10470   free_dominance_info (CDI_POST_DOMINATORS);
10471 
10472   if (!result)
10473     return;
10474 
10475   PATTERN (tbegin_insn) = gen_rtx_PARALLEL (VOIDmode,
10476 			    gen_rtvec (2,
10477 				       XVECEXP (PATTERN (tbegin_insn), 0, 0),
10478 				       XVECEXP (PATTERN (tbegin_insn), 0, 1)));
10479   INSN_CODE (tbegin_insn) = -1;
10480   df_insn_rescan (tbegin_insn);
10481 
10482   return;
10483 }
10484 
10485 /* Implement TARGET_HARD_REGNO_NREGS.  Because all registers in a class
10486    have the same size, this is equivalent to CLASS_MAX_NREGS.  */
10487 
10488 static unsigned int
s390_hard_regno_nregs(unsigned int regno,machine_mode mode)10489 s390_hard_regno_nregs (unsigned int regno, machine_mode mode)
10490 {
10491   return s390_class_max_nregs (REGNO_REG_CLASS (regno), mode);
10492 }
10493 
10494 /* Implement TARGET_HARD_REGNO_MODE_OK.
10495 
10496    Integer modes <= word size fit into any GPR.
10497    Integer modes > word size fit into successive GPRs, starting with
10498    an even-numbered register.
10499    SImode and DImode fit into FPRs as well.
10500 
10501    Floating point modes <= word size fit into any FPR or GPR.
10502    Floating point modes > word size (i.e. DFmode on 32-bit) fit
10503    into any FPR, or an even-odd GPR pair.
10504    TFmode fits only into an even-odd FPR pair.
10505 
10506    Complex floating point modes fit either into two FPRs, or into
10507    successive GPRs (again starting with an even number).
10508    TCmode fits only into two successive even-odd FPR pairs.
10509 
10510    Condition code modes fit only into the CC register.  */
10511 
10512 static bool
s390_hard_regno_mode_ok(unsigned int regno,machine_mode mode)10513 s390_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
10514 {
10515   if (!TARGET_VX && VECTOR_NOFP_REGNO_P (regno))
10516     return false;
10517 
10518   switch (REGNO_REG_CLASS (regno))
10519     {
10520     case VEC_REGS:
10521       return ((GET_MODE_CLASS (mode) == MODE_INT
10522 	       && s390_class_max_nregs (VEC_REGS, mode) == 1)
10523 	      || mode == DFmode
10524 	      || (TARGET_VXE && mode == SFmode)
10525 	      || s390_vector_mode_supported_p (mode));
10526       break;
10527     case FP_REGS:
10528       if (TARGET_VX
10529 	  && ((GET_MODE_CLASS (mode) == MODE_INT
10530 	       && s390_class_max_nregs (FP_REGS, mode) == 1)
10531 	      || mode == DFmode
10532 	      || s390_vector_mode_supported_p (mode)))
10533 	return true;
10534 
10535       if (REGNO_PAIR_OK (regno, mode))
10536 	{
10537 	  if (mode == SImode || mode == DImode)
10538 	    return true;
10539 
10540 	  if (FLOAT_MODE_P (mode) && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
10541 	    return true;
10542 	}
10543       break;
10544     case ADDR_REGS:
10545       if (FRAME_REGNO_P (regno) && mode == Pmode)
10546 	return true;
10547 
10548       /* fallthrough */
10549     case GENERAL_REGS:
10550       if (REGNO_PAIR_OK (regno, mode))
10551 	{
10552 	  if (TARGET_ZARCH
10553 	      || (mode != TFmode && mode != TCmode && mode != TDmode))
10554 	    return true;
10555 	}
10556       break;
10557     case CC_REGS:
10558       if (GET_MODE_CLASS (mode) == MODE_CC)
10559 	return true;
10560       break;
10561     case ACCESS_REGS:
10562       if (REGNO_PAIR_OK (regno, mode))
10563 	{
10564 	  if (mode == SImode || mode == Pmode)
10565 	    return true;
10566 	}
10567       break;
10568     default:
10569       return false;
10570     }
10571 
10572   return false;
10573 }
10574 
10575 /* Implement TARGET_MODES_TIEABLE_P.  */
10576 
10577 static bool
s390_modes_tieable_p(machine_mode mode1,machine_mode mode2)10578 s390_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10579 {
10580   return ((mode1 == SFmode || mode1 == DFmode)
10581 	  == (mode2 == SFmode || mode2 == DFmode));
10582 }
10583 
10584 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG.  */
10585 
10586 bool
s390_hard_regno_rename_ok(unsigned int old_reg,unsigned int new_reg)10587 s390_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg)
10588 {
10589    /* Once we've decided upon a register to use as base register, it must
10590       no longer be used for any other purpose.  */
10591   if (cfun->machine->base_reg)
10592     if (REGNO (cfun->machine->base_reg) == old_reg
10593 	|| REGNO (cfun->machine->base_reg) == new_reg)
10594       return false;
10595 
10596   /* Prevent regrename from using call-saved regs which haven't
10597      actually been saved.  This is necessary since regrename assumes
10598      the backend save/restore decisions are based on
10599      df_regs_ever_live.  Since we have our own routine we have to tell
10600      regrename manually about it.  */
10601   if (GENERAL_REGNO_P (new_reg)
10602       && !call_really_used_regs[new_reg]
10603       && cfun_gpr_save_slot (new_reg) == SAVE_SLOT_NONE)
10604     return false;
10605 
10606   return true;
10607 }
10608 
10609 /* Return nonzero if register REGNO can be used as a scratch register
10610    in peephole2.  */
10611 
10612 static bool
s390_hard_regno_scratch_ok(unsigned int regno)10613 s390_hard_regno_scratch_ok (unsigned int regno)
10614 {
10615   /* See s390_hard_regno_rename_ok.  */
10616   if (GENERAL_REGNO_P (regno)
10617       && !call_really_used_regs[regno]
10618       && cfun_gpr_save_slot (regno) == SAVE_SLOT_NONE)
10619     return false;
10620 
10621   return true;
10622 }
10623 
10624 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED.  When generating
10625    code that runs in z/Architecture mode, but conforms to the 31-bit
10626    ABI, GPRs can hold 8 bytes; the ABI guarantees only that the lower 4
10627    bytes are saved across calls, however.  */
10628 
10629 static bool
s390_hard_regno_call_part_clobbered(unsigned int regno,machine_mode mode)10630 s390_hard_regno_call_part_clobbered (unsigned int regno, machine_mode mode)
10631 {
10632   if (!TARGET_64BIT
10633       && TARGET_ZARCH
10634       && GET_MODE_SIZE (mode) > 4
10635       && ((regno >= 6 && regno <= 15) || regno == 32))
10636     return true;
10637 
10638   if (TARGET_VX
10639       && GET_MODE_SIZE (mode) > 8
10640       && (((TARGET_64BIT && regno >= 24 && regno <= 31))
10641 	  || (!TARGET_64BIT && (regno == 18 || regno == 19))))
10642     return true;
10643 
10644   return false;
10645 }
10646 
10647 /* Maximum number of registers to represent a value of mode MODE
10648    in a register of class RCLASS.  */
10649 
10650 int
s390_class_max_nregs(enum reg_class rclass,machine_mode mode)10651 s390_class_max_nregs (enum reg_class rclass, machine_mode mode)
10652 {
10653   int reg_size;
10654   bool reg_pair_required_p = false;
10655 
10656   switch (rclass)
10657     {
10658     case FP_REGS:
10659     case VEC_REGS:
10660       reg_size = TARGET_VX ? 16 : 8;
10661 
10662       /* TF and TD modes would fit into a VR but we put them into a
10663 	 register pair since we do not have 128bit FP instructions on
10664 	 full VRs.  */
10665       if (TARGET_VX
10666 	  && SCALAR_FLOAT_MODE_P (mode)
10667 	  && GET_MODE_SIZE (mode) >= 16)
10668 	reg_pair_required_p = true;
10669 
10670       /* Even if complex types would fit into a single FPR/VR we force
10671 	 them into a register pair to deal with the parts more easily.
10672 	 (FIXME: What about complex ints?)  */
10673       if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
10674 	reg_pair_required_p = true;
10675       break;
10676     case ACCESS_REGS:
10677       reg_size = 4;
10678       break;
10679     default:
10680       reg_size = UNITS_PER_WORD;
10681       break;
10682     }
10683 
10684   if (reg_pair_required_p)
10685     return 2 * ((GET_MODE_SIZE (mode) / 2 + reg_size - 1) / reg_size);
10686 
10687   return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
10688 }
10689 
10690 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
10691 
10692 static bool
s390_can_change_mode_class(machine_mode from_mode,machine_mode to_mode,reg_class_t rclass)10693 s390_can_change_mode_class (machine_mode from_mode,
10694 			    machine_mode to_mode,
10695 			    reg_class_t rclass)
10696 {
10697   machine_mode small_mode;
10698   machine_mode big_mode;
10699 
10700   /* V1TF and TF have different representations in vector
10701      registers.  */
10702   if (reg_classes_intersect_p (VEC_REGS, rclass)
10703       && ((from_mode == V1TFmode && to_mode == TFmode)
10704 	  || (from_mode == TFmode && to_mode == V1TFmode)))
10705     return false;
10706 
10707   if (GET_MODE_SIZE (from_mode) == GET_MODE_SIZE (to_mode))
10708     return true;
10709 
10710   if (GET_MODE_SIZE (from_mode) < GET_MODE_SIZE (to_mode))
10711     {
10712       small_mode = from_mode;
10713       big_mode = to_mode;
10714     }
10715   else
10716     {
10717       small_mode = to_mode;
10718       big_mode = from_mode;
10719     }
10720 
10721   /* Values residing in VRs are little-endian style.  All modes are
10722      placed left-aligned in an VR.  This means that we cannot allow
10723      switching between modes with differing sizes.  Also if the vector
10724      facility is available we still place TFmode values in VR register
10725      pairs, since the only instructions we have operating on TFmodes
10726      only deal with register pairs.  Therefore we have to allow DFmode
10727      subregs of TFmodes to enable the TFmode splitters.  */
10728   if (reg_classes_intersect_p (VEC_REGS, rclass)
10729       && (GET_MODE_SIZE (small_mode) < 8
10730 	  || s390_class_max_nregs (VEC_REGS, big_mode) == 1))
10731     return false;
10732 
10733   /* Likewise for access registers, since they have only half the
10734      word size on 64-bit.  */
10735   if (reg_classes_intersect_p (ACCESS_REGS, rclass))
10736     return false;
10737 
10738   return true;
10739 }
10740 
10741 /* Return true if we use LRA instead of reload pass.  */
10742 static bool
s390_lra_p(void)10743 s390_lra_p (void)
10744 {
10745   return s390_lra_flag;
10746 }
10747 
10748 /* Return true if register FROM can be eliminated via register TO.  */
10749 
10750 static bool
s390_can_eliminate(const int from,const int to)10751 s390_can_eliminate (const int from, const int to)
10752 {
10753   /* On zSeries machines, we have not marked the base register as fixed.
10754      Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
10755      If a function requires the base register, we say here that this
10756      elimination cannot be performed.  This will cause reload to free
10757      up the base register (as if it were fixed).  On the other hand,
10758      if the current function does *not* require the base register, we
10759      say here the elimination succeeds, which in turn allows reload
10760      to allocate the base register for any other purpose.  */
10761   if (from == BASE_REGNUM && to == BASE_REGNUM)
10762     {
10763       if (TARGET_CPU_ZARCH)
10764 	{
10765 	  s390_init_frame_layout ();
10766 	  return cfun->machine->base_reg == NULL_RTX;
10767 	}
10768 
10769       return false;
10770     }
10771 
10772   /* Everything else must point into the stack frame.  */
10773   gcc_assert (to == STACK_POINTER_REGNUM
10774 	      || to == HARD_FRAME_POINTER_REGNUM);
10775 
10776   gcc_assert (from == FRAME_POINTER_REGNUM
10777 	      || from == ARG_POINTER_REGNUM
10778 	      || from == RETURN_ADDRESS_POINTER_REGNUM);
10779 
10780   /* Make sure we actually saved the return address.  */
10781   if (from == RETURN_ADDRESS_POINTER_REGNUM)
10782     if (!crtl->calls_eh_return
10783 	&& !cfun->stdarg
10784 	&& !cfun_frame_layout.save_return_addr_p)
10785       return false;
10786 
10787   return true;
10788 }
10789 
10790 /* Return offset between register FROM and TO initially after prolog.  */
10791 
10792 HOST_WIDE_INT
s390_initial_elimination_offset(int from,int to)10793 s390_initial_elimination_offset (int from, int to)
10794 {
10795   HOST_WIDE_INT offset;
10796 
10797   /* ??? Why are we called for non-eliminable pairs?  */
10798   if (!s390_can_eliminate (from, to))
10799     return 0;
10800 
10801   switch (from)
10802     {
10803     case FRAME_POINTER_REGNUM:
10804       offset = (get_frame_size()
10805 		+ STACK_POINTER_OFFSET
10806 		+ crtl->outgoing_args_size);
10807       break;
10808 
10809     case ARG_POINTER_REGNUM:
10810       s390_init_frame_layout ();
10811       offset = cfun_frame_layout.frame_size + STACK_POINTER_OFFSET;
10812       break;
10813 
10814     case RETURN_ADDRESS_POINTER_REGNUM:
10815       s390_init_frame_layout ();
10816 
10817       if (cfun_frame_layout.first_save_gpr_slot == -1)
10818 	{
10819 	  /* If it turns out that for stdarg nothing went into the reg
10820 	     save area we also do not need the return address
10821 	     pointer.  */
10822 	  if (cfun->stdarg && !cfun_save_arg_fprs_p)
10823 	    return 0;
10824 
10825 	  gcc_unreachable ();
10826 	}
10827 
10828       /* In order to make the following work it is not necessary for
10829 	 r14 to have a save slot.  It is sufficient if one other GPR
10830 	 got one.  Since the GPRs are always stored without gaps we
10831 	 are able to calculate where the r14 save slot would
10832 	 reside.  */
10833       offset = (cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset +
10834 		(RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot) *
10835 		UNITS_PER_LONG);
10836       break;
10837 
10838     case BASE_REGNUM:
10839       offset = 0;
10840       break;
10841 
10842     default:
10843       gcc_unreachable ();
10844     }
10845 
10846   return offset;
10847 }
10848 
10849 /* Emit insn to save fpr REGNUM at offset OFFSET relative
10850    to register BASE.  Return generated insn.  */
10851 
10852 static rtx
save_fpr(rtx base,int offset,int regnum)10853 save_fpr (rtx base, int offset, int regnum)
10854 {
10855   rtx addr;
10856   addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10857 
10858   if (regnum >= 16 && regnum <= (16 + FP_ARG_NUM_REG))
10859     set_mem_alias_set (addr, get_varargs_alias_set ());
10860   else
10861     set_mem_alias_set (addr, get_frame_alias_set ());
10862 
10863   return emit_move_insn (addr, gen_rtx_REG (DFmode, regnum));
10864 }
10865 
10866 /* Emit insn to restore fpr REGNUM from offset OFFSET relative
10867    to register BASE.  Return generated insn.  */
10868 
10869 static rtx
restore_fpr(rtx base,int offset,int regnum)10870 restore_fpr (rtx base, int offset, int regnum)
10871 {
10872   rtx addr;
10873   addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10874   set_mem_alias_set (addr, get_frame_alias_set ());
10875 
10876   return emit_move_insn (gen_rtx_REG (DFmode, regnum), addr);
10877 }
10878 
10879 /* Return true if REGNO is a global register, but not one
10880    of the special ones that need to be saved/restored in anyway.  */
10881 
10882 static inline bool
global_not_special_regno_p(int regno)10883 global_not_special_regno_p (int regno)
10884 {
10885   return (global_regs[regno]
10886 	  /* These registers are special and need to be
10887 	     restored in any case.  */
10888 	  && !(regno == STACK_POINTER_REGNUM
10889 	       || regno == RETURN_REGNUM
10890 	       || regno == BASE_REGNUM
10891 	       || (flag_pic && regno == (int)PIC_OFFSET_TABLE_REGNUM)));
10892 }
10893 
10894 /* Generate insn to save registers FIRST to LAST into
10895    the register save area located at offset OFFSET
10896    relative to register BASE.  */
10897 
10898 static rtx
save_gprs(rtx base,int offset,int first,int last)10899 save_gprs (rtx base, int offset, int first, int last)
10900 {
10901   rtx addr, insn, note;
10902   int i;
10903 
10904   addr = plus_constant (Pmode, base, offset);
10905   addr = gen_rtx_MEM (Pmode, addr);
10906 
10907   set_mem_alias_set (addr, get_frame_alias_set ());
10908 
10909   /* Special-case single register.  */
10910   if (first == last)
10911     {
10912       if (TARGET_64BIT)
10913         insn = gen_movdi (addr, gen_rtx_REG (Pmode, first));
10914       else
10915         insn = gen_movsi (addr, gen_rtx_REG (Pmode, first));
10916 
10917       if (!global_not_special_regno_p (first))
10918 	RTX_FRAME_RELATED_P (insn) = 1;
10919       return insn;
10920     }
10921 
10922 
10923   insn = gen_store_multiple (addr,
10924 			     gen_rtx_REG (Pmode, first),
10925 			     GEN_INT (last - first + 1));
10926 
10927   if (first <= 6 && cfun->stdarg)
10928     for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
10929       {
10930 	rtx mem = XEXP (XVECEXP (PATTERN (insn), 0, i), 0);
10931 
10932 	if (first + i <= 6)
10933 	  set_mem_alias_set (mem, get_varargs_alias_set ());
10934       }
10935 
10936   /* We need to set the FRAME_RELATED flag on all SETs
10937      inside the store-multiple pattern.
10938 
10939      However, we must not emit DWARF records for registers 2..5
10940      if they are stored for use by variable arguments ...
10941 
10942      ??? Unfortunately, it is not enough to simply not the
10943      FRAME_RELATED flags for those SETs, because the first SET
10944      of the PARALLEL is always treated as if it had the flag
10945      set, even if it does not.  Therefore we emit a new pattern
10946      without those registers as REG_FRAME_RELATED_EXPR note.  */
10947 
10948   if (first >= 6 && !global_not_special_regno_p (first))
10949     {
10950       rtx pat = PATTERN (insn);
10951 
10952       for (i = 0; i < XVECLEN (pat, 0); i++)
10953 	if (GET_CODE (XVECEXP (pat, 0, i)) == SET
10954 	    && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat,
10955 								     0, i)))))
10956 	  RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1;
10957 
10958       RTX_FRAME_RELATED_P (insn) = 1;
10959     }
10960   else if (last >= 6)
10961     {
10962       int start;
10963 
10964       for (start = first >= 6 ? first : 6; start <= last; start++)
10965 	if (!global_not_special_regno_p (start))
10966 	  break;
10967 
10968       if (start > last)
10969 	return insn;
10970 
10971       addr = plus_constant (Pmode, base,
10972 			    offset + (start - first) * UNITS_PER_LONG);
10973 
10974       if (start == last)
10975 	{
10976 	  if (TARGET_64BIT)
10977 	    note = gen_movdi (gen_rtx_MEM (Pmode, addr),
10978 			      gen_rtx_REG (Pmode, start));
10979 	  else
10980 	    note = gen_movsi (gen_rtx_MEM (Pmode, addr),
10981 			      gen_rtx_REG (Pmode, start));
10982 	  note = PATTERN (note);
10983 
10984 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10985 	  RTX_FRAME_RELATED_P (insn) = 1;
10986 
10987 	  return insn;
10988 	}
10989 
10990       note = gen_store_multiple (gen_rtx_MEM (Pmode, addr),
10991 				 gen_rtx_REG (Pmode, start),
10992 				 GEN_INT (last - start + 1));
10993       note = PATTERN (note);
10994 
10995       add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10996 
10997       for (i = 0; i < XVECLEN (note, 0); i++)
10998 	if (GET_CODE (XVECEXP (note, 0, i)) == SET
10999 	    && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note,
11000 								     0, i)))))
11001 	  RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1;
11002 
11003       RTX_FRAME_RELATED_P (insn) = 1;
11004     }
11005 
11006   return insn;
11007 }
11008 
11009 /* Generate insn to restore registers FIRST to LAST from
11010    the register save area located at offset OFFSET
11011    relative to register BASE.  */
11012 
11013 static rtx
restore_gprs(rtx base,int offset,int first,int last)11014 restore_gprs (rtx base, int offset, int first, int last)
11015 {
11016   rtx addr, insn;
11017 
11018   addr = plus_constant (Pmode, base, offset);
11019   addr = gen_rtx_MEM (Pmode, addr);
11020   set_mem_alias_set (addr, get_frame_alias_set ());
11021 
11022   /* Special-case single register.  */
11023   if (first == last)
11024     {
11025       if (TARGET_64BIT)
11026         insn = gen_movdi (gen_rtx_REG (Pmode, first), addr);
11027       else
11028         insn = gen_movsi (gen_rtx_REG (Pmode, first), addr);
11029 
11030       RTX_FRAME_RELATED_P (insn) = 1;
11031       return insn;
11032     }
11033 
11034   insn = gen_load_multiple (gen_rtx_REG (Pmode, first),
11035 			    addr,
11036 			    GEN_INT (last - first + 1));
11037   RTX_FRAME_RELATED_P (insn) = 1;
11038   return insn;
11039 }
11040 
11041 /* Return insn sequence to load the GOT register.  */
11042 
11043 rtx_insn *
s390_load_got(void)11044 s390_load_got (void)
11045 {
11046   rtx_insn *insns;
11047 
11048   /* We cannot use pic_offset_table_rtx here since we use this
11049      function also for non-pic if __tls_get_offset is called and in
11050      that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
11051      aren't usable.  */
11052   rtx got_rtx = gen_rtx_REG (Pmode, 12);
11053 
11054   start_sequence ();
11055 
11056   if (TARGET_CPU_ZARCH)
11057     {
11058       emit_move_insn (got_rtx, s390_got_symbol ());
11059     }
11060   else
11061     {
11062       rtx offset;
11063 
11064       offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, s390_got_symbol ()),
11065 			       UNSPEC_LTREL_OFFSET);
11066       offset = gen_rtx_CONST (Pmode, offset);
11067       offset = force_const_mem (Pmode, offset);
11068 
11069       emit_move_insn (got_rtx, offset);
11070 
11071       offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (offset, 0)),
11072 			       UNSPEC_LTREL_BASE);
11073       offset = gen_rtx_PLUS (Pmode, got_rtx, offset);
11074 
11075       emit_move_insn (got_rtx, offset);
11076     }
11077 
11078   insns = get_insns ();
11079   end_sequence ();
11080   return insns;
11081 }
11082 
11083 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
11084    and the change to the stack pointer.  */
11085 
11086 static void
s390_emit_stack_tie(void)11087 s390_emit_stack_tie (void)
11088 {
11089   rtx mem = gen_frame_mem (BLKmode,
11090 			   gen_rtx_REG (Pmode, STACK_POINTER_REGNUM));
11091 
11092   emit_insn (gen_stack_tie (mem));
11093 }
11094 
11095 /* Copy GPRS into FPR save slots.  */
11096 
11097 static void
s390_save_gprs_to_fprs(void)11098 s390_save_gprs_to_fprs (void)
11099 {
11100   int i;
11101 
11102   if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
11103     return;
11104 
11105   for (i = 6; i < 16; i++)
11106     {
11107       if (FP_REGNO_P (cfun_gpr_save_slot (i)))
11108 	{
11109 	  rtx_insn *insn =
11110 	    emit_move_insn (gen_rtx_REG (DImode, cfun_gpr_save_slot (i)),
11111 			    gen_rtx_REG (DImode, i));
11112 	  RTX_FRAME_RELATED_P (insn) = 1;
11113 	  /* This prevents dwarf2cfi from interpreting the set.  Doing
11114 	     so it might emit def_cfa_register infos setting an FPR as
11115 	     new CFA.  */
11116 	  add_reg_note (insn, REG_CFA_REGISTER, copy_rtx (PATTERN (insn)));
11117 	}
11118     }
11119 }
11120 
11121 /* Restore GPRs from FPR save slots.  */
11122 
11123 static void
s390_restore_gprs_from_fprs(void)11124 s390_restore_gprs_from_fprs (void)
11125 {
11126   int i;
11127 
11128   if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
11129     return;
11130 
11131   for (i = 6; i < 16; i++)
11132     {
11133       rtx_insn *insn;
11134 
11135       if (!FP_REGNO_P (cfun_gpr_save_slot (i)))
11136 	continue;
11137 
11138       rtx fpr = gen_rtx_REG (DImode, cfun_gpr_save_slot (i));
11139 
11140       if (i == STACK_POINTER_REGNUM)
11141 	insn = emit_insn (gen_stack_restore_from_fpr (fpr));
11142       else
11143 	insn = emit_move_insn (gen_rtx_REG (DImode, i), fpr);
11144 
11145       df_set_regs_ever_live (i, true);
11146       add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, i));
11147       if (i == STACK_POINTER_REGNUM)
11148 	add_reg_note (insn, REG_CFA_DEF_CFA,
11149 		      plus_constant (Pmode, stack_pointer_rtx,
11150 				     STACK_POINTER_OFFSET));
11151       RTX_FRAME_RELATED_P (insn) = 1;
11152     }
11153 }
11154 
11155 
11156 /* A pass run immediately before shrink-wrapping and prologue and epilogue
11157    generation.  */
11158 
11159 namespace {
11160 
11161 const pass_data pass_data_s390_early_mach =
11162 {
11163   RTL_PASS, /* type */
11164   "early_mach", /* name */
11165   OPTGROUP_NONE, /* optinfo_flags */
11166   TV_MACH_DEP, /* tv_id */
11167   0, /* properties_required */
11168   0, /* properties_provided */
11169   0, /* properties_destroyed */
11170   0, /* todo_flags_start */
11171   ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */
11172 };
11173 
11174 class pass_s390_early_mach : public rtl_opt_pass
11175 {
11176 public:
pass_s390_early_mach(gcc::context * ctxt)11177   pass_s390_early_mach (gcc::context *ctxt)
11178     : rtl_opt_pass (pass_data_s390_early_mach, ctxt)
11179   {}
11180 
11181   /* opt_pass methods: */
11182   virtual unsigned int execute (function *);
11183 
11184 }; // class pass_s390_early_mach
11185 
11186 unsigned int
execute(function * fun)11187 pass_s390_early_mach::execute (function *fun)
11188 {
11189   rtx_insn *insn;
11190 
11191   /* Try to get rid of the FPR clobbers.  */
11192   s390_optimize_nonescaping_tx ();
11193 
11194   /* Re-compute register info.  */
11195   s390_register_info ();
11196 
11197   /* If we're using a base register, ensure that it is always valid for
11198      the first non-prologue instruction.  */
11199   if (fun->machine->base_reg)
11200     emit_insn_at_entry (gen_main_pool (fun->machine->base_reg));
11201 
11202   /* Annotate all constant pool references to let the scheduler know
11203      they implicitly use the base register.  */
11204   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
11205     if (INSN_P (insn))
11206       {
11207 	annotate_constant_pool_refs (&PATTERN (insn));
11208 	df_insn_rescan (insn);
11209       }
11210   return 0;
11211 }
11212 
11213 } // anon namespace
11214 
11215 /* Calculate TARGET = REG + OFFSET as s390_emit_prologue would do it.
11216    - push too big immediates to the literal pool and annotate the refs
11217    - emit frame related notes for stack pointer changes.  */
11218 
11219 static rtx
s390_prologue_plus_offset(rtx target,rtx reg,rtx offset,bool frame_related_p)11220 s390_prologue_plus_offset (rtx target, rtx reg, rtx offset, bool frame_related_p)
11221 {
11222   rtx insn;
11223   rtx orig_offset = offset;
11224 
11225   gcc_assert (REG_P (target));
11226   gcc_assert (REG_P (reg));
11227   gcc_assert (CONST_INT_P (offset));
11228 
11229   if (offset == const0_rtx)                               /* lr/lgr */
11230     {
11231       insn = emit_move_insn (target, reg);
11232     }
11233   else if (DISP_IN_RANGE (INTVAL (offset)))               /* la */
11234     {
11235       insn = emit_move_insn (target, gen_rtx_PLUS (Pmode, reg,
11236 						   offset));
11237     }
11238   else
11239     {
11240       if (!satisfies_constraint_K (offset)                /* ahi/aghi */
11241 	  && (!TARGET_EXTIMM
11242 	      || (!satisfies_constraint_Op (offset)       /* alfi/algfi */
11243 		  && !satisfies_constraint_On (offset)))) /* slfi/slgfi */
11244 	offset = force_const_mem (Pmode, offset);
11245 
11246       if (target != reg)
11247 	{
11248 	  insn = emit_move_insn (target, reg);
11249 	  RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0;
11250 	}
11251 
11252       insn = emit_insn (gen_add2_insn (target, offset));
11253 
11254       if (!CONST_INT_P (offset))
11255 	{
11256 	  annotate_constant_pool_refs (&PATTERN (insn));
11257 
11258 	  if (frame_related_p)
11259 	    add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11260 			  gen_rtx_SET (target,
11261 				       gen_rtx_PLUS (Pmode, target,
11262 						     orig_offset)));
11263 	}
11264     }
11265 
11266   RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0;
11267 
11268   /* If this is a stack adjustment and we are generating a stack clash
11269      prologue, then add a REG_STACK_CHECK note to signal that this insn
11270      should be left alone.  */
11271   if (flag_stack_clash_protection && target == stack_pointer_rtx)
11272     add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
11273 
11274   return insn;
11275 }
11276 
11277 /* Emit a compare instruction with a volatile memory access as stack
11278    probe.  It does not waste store tags and does not clobber any
11279    registers apart from the condition code.  */
11280 static void
s390_emit_stack_probe(rtx addr)11281 s390_emit_stack_probe (rtx addr)
11282 {
11283   rtx tmp = gen_rtx_MEM (Pmode, addr);
11284   MEM_VOLATILE_P (tmp) = 1;
11285   s390_emit_compare (EQ, gen_rtx_REG (Pmode, 0), tmp);
11286   emit_insn (gen_blockage ());
11287 }
11288 
11289 /* Use a runtime loop if we have to emit more probes than this.  */
11290 #define MIN_UNROLL_PROBES 3
11291 
11292 /* Allocate SIZE bytes of stack space, using TEMP_REG as a temporary
11293    if necessary.  LAST_PROBE_OFFSET contains the offset of the closest
11294    probe relative to the stack pointer.
11295 
11296    Note that SIZE is negative.
11297 
11298    The return value is true if TEMP_REG has been clobbered.  */
11299 static bool
allocate_stack_space(rtx size,HOST_WIDE_INT last_probe_offset,rtx temp_reg)11300 allocate_stack_space (rtx size, HOST_WIDE_INT last_probe_offset,
11301 		      rtx temp_reg)
11302 {
11303   bool temp_reg_clobbered_p = false;
11304   HOST_WIDE_INT probe_interval
11305     = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL);
11306   HOST_WIDE_INT guard_size
11307     = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE);
11308 
11309   if (flag_stack_clash_protection)
11310     {
11311       if (last_probe_offset + -INTVAL (size) < guard_size)
11312 	dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
11313       else
11314 	{
11315 	  rtx offset = GEN_INT (probe_interval - UNITS_PER_LONG);
11316 	  HOST_WIDE_INT rounded_size = -INTVAL (size) & -probe_interval;
11317 	  HOST_WIDE_INT num_probes = rounded_size / probe_interval;
11318 	  HOST_WIDE_INT residual = -INTVAL (size) - rounded_size;
11319 
11320 	  if (num_probes < MIN_UNROLL_PROBES)
11321 	    {
11322 	      /* Emit unrolled probe statements.  */
11323 
11324 	      for (unsigned int i = 0; i < num_probes; i++)
11325 		{
11326 		  s390_prologue_plus_offset (stack_pointer_rtx,
11327 					     stack_pointer_rtx,
11328 					     GEN_INT (-probe_interval), true);
11329 		  s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
11330 						       stack_pointer_rtx,
11331 						       offset));
11332 		}
11333 	      dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
11334 	    }
11335 	  else
11336 	    {
11337 	      /* Emit a loop probing the pages.  */
11338 
11339 	      rtx_code_label *loop_start_label = gen_label_rtx ();
11340 
11341 	      /* From now on temp_reg will be the CFA register.  */
11342 	      s390_prologue_plus_offset (temp_reg, stack_pointer_rtx,
11343 					 GEN_INT (-rounded_size), true);
11344 	      emit_label (loop_start_label);
11345 
11346 	      s390_prologue_plus_offset (stack_pointer_rtx,
11347 					 stack_pointer_rtx,
11348 					 GEN_INT (-probe_interval), false);
11349 	      s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
11350 						   stack_pointer_rtx,
11351 						   offset));
11352 	      emit_cmp_and_jump_insns (stack_pointer_rtx, temp_reg,
11353 				       GT, NULL_RTX,
11354 				       Pmode, 1, loop_start_label);
11355 
11356 	      /* Without this make_edges ICEes.  */
11357 	      JUMP_LABEL (get_last_insn ()) = loop_start_label;
11358 	      LABEL_NUSES (loop_start_label) = 1;
11359 
11360 	      /* That's going to be a NOP since stack pointer and
11361 		 temp_reg are supposed to be the same here.  We just
11362 		 emit it to set the CFA reg back to r15.  */
11363 	      s390_prologue_plus_offset (stack_pointer_rtx, temp_reg,
11364 					 const0_rtx, true);
11365 	      temp_reg_clobbered_p = true;
11366 	      dump_stack_clash_frame_info (PROBE_LOOP, residual != 0);
11367 	    }
11368 
11369 	  /* Handle any residual allocation request.  */
11370 	  s390_prologue_plus_offset (stack_pointer_rtx,
11371 				     stack_pointer_rtx,
11372 				     GEN_INT (-residual), true);
11373 	  last_probe_offset += residual;
11374 	  if (last_probe_offset >= probe_interval)
11375 	    s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
11376 						 stack_pointer_rtx,
11377 						 GEN_INT (residual
11378 							  - UNITS_PER_LONG)));
11379 
11380 	  return temp_reg_clobbered_p;
11381 	}
11382     }
11383 
11384   /* Subtract frame size from stack pointer.  */
11385   s390_prologue_plus_offset (stack_pointer_rtx,
11386 			     stack_pointer_rtx,
11387 			     size, true);
11388 
11389   return temp_reg_clobbered_p;
11390 }
11391 
11392 /* Expand the prologue into a bunch of separate insns.  */
11393 
11394 void
s390_emit_prologue(void)11395 s390_emit_prologue (void)
11396 {
11397   rtx insn, addr;
11398   rtx temp_reg;
11399   int i;
11400   int offset;
11401   int next_fpr = 0;
11402 
11403   /* Choose best register to use for temp use within prologue.
11404      TPF with profiling must avoid the register 14 - the tracing function
11405      needs the original contents of r14 to be preserved.  */
11406 
11407   if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM)
11408       && !crtl->is_leaf
11409       && !TARGET_TPF_PROFILING)
11410     temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
11411   else if (flag_split_stack && cfun->stdarg)
11412     temp_reg = gen_rtx_REG (Pmode, 12);
11413   else
11414     temp_reg = gen_rtx_REG (Pmode, 1);
11415 
11416   /* When probing for stack-clash mitigation, we have to track the distance
11417      between the stack pointer and closest known reference.
11418 
11419      Most of the time we have to make a worst case assumption.  The
11420      only exception is when TARGET_BACKCHAIN is active, in which case
11421      we know *sp (offset 0) was written.  */
11422   HOST_WIDE_INT probe_interval
11423     = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL);
11424   HOST_WIDE_INT last_probe_offset
11425     = (TARGET_BACKCHAIN
11426        ? (TARGET_PACKED_STACK ? STACK_POINTER_OFFSET - UNITS_PER_LONG : 0)
11427        : probe_interval - (STACK_BOUNDARY / UNITS_PER_WORD));
11428 
11429   s390_save_gprs_to_fprs ();
11430 
11431   /* Save call saved gprs.  */
11432   if (cfun_frame_layout.first_save_gpr != -1)
11433     {
11434       insn = save_gprs (stack_pointer_rtx,
11435 			cfun_frame_layout.gprs_offset +
11436 			UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
11437 					  - cfun_frame_layout.first_save_gpr_slot),
11438 			cfun_frame_layout.first_save_gpr,
11439 			cfun_frame_layout.last_save_gpr);
11440 
11441       /* This is not 100% correct.  If we have more than one register saved,
11442 	 then LAST_PROBE_OFFSET can move even closer to sp.  */
11443       last_probe_offset
11444 	= (cfun_frame_layout.gprs_offset +
11445 	   UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
11446 			     - cfun_frame_layout.first_save_gpr_slot));
11447 
11448       emit_insn (insn);
11449     }
11450 
11451   /* Dummy insn to mark literal pool slot.  */
11452 
11453   if (cfun->machine->base_reg)
11454     emit_insn (gen_main_pool (cfun->machine->base_reg));
11455 
11456   offset = cfun_frame_layout.f0_offset;
11457 
11458   /* Save f0 and f2.  */
11459   for (i = FPR0_REGNUM; i <= FPR0_REGNUM + 1; i++)
11460     {
11461       if (cfun_fpr_save_p (i))
11462 	{
11463 	  save_fpr (stack_pointer_rtx, offset, i);
11464 	  if (offset < last_probe_offset)
11465 	    last_probe_offset = offset;
11466 	  offset += 8;
11467 	}
11468       else if (!TARGET_PACKED_STACK || cfun->stdarg)
11469 	offset += 8;
11470     }
11471 
11472   /* Save f4 and f6.  */
11473   offset = cfun_frame_layout.f4_offset;
11474   for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
11475     {
11476       if (cfun_fpr_save_p (i))
11477 	{
11478 	  insn = save_fpr (stack_pointer_rtx, offset, i);
11479 	  if (offset < last_probe_offset)
11480 	    last_probe_offset = offset;
11481 	  offset += 8;
11482 
11483 	  /* If f4 and f6 are call clobbered they are saved due to
11484 	     stdargs and therefore are not frame related.  */
11485 	  if (!call_really_used_regs[i])
11486 	    RTX_FRAME_RELATED_P (insn) = 1;
11487 	}
11488       else if (!TARGET_PACKED_STACK || call_really_used_regs[i])
11489 	offset += 8;
11490     }
11491 
11492   if (TARGET_PACKED_STACK
11493       && cfun_save_high_fprs_p
11494       && cfun_frame_layout.f8_offset + cfun_frame_layout.high_fprs * 8 > 0)
11495     {
11496       offset = (cfun_frame_layout.f8_offset
11497 		+ (cfun_frame_layout.high_fprs - 1) * 8);
11498 
11499       for (i = FPR15_REGNUM; i >= FPR8_REGNUM && offset >= 0; i--)
11500 	if (cfun_fpr_save_p (i))
11501 	  {
11502 	    insn = save_fpr (stack_pointer_rtx, offset, i);
11503 	    if (offset < last_probe_offset)
11504 	      last_probe_offset = offset;
11505 
11506 	    RTX_FRAME_RELATED_P (insn) = 1;
11507 	    offset -= 8;
11508 	  }
11509       if (offset >= cfun_frame_layout.f8_offset)
11510 	next_fpr = i;
11511     }
11512 
11513   if (!TARGET_PACKED_STACK)
11514     next_fpr = cfun_save_high_fprs_p ? FPR15_REGNUM : 0;
11515 
11516   if (flag_stack_usage_info)
11517     current_function_static_stack_size = cfun_frame_layout.frame_size;
11518 
11519   /* Decrement stack pointer.  */
11520 
11521   if (cfun_frame_layout.frame_size > 0)
11522     {
11523       rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
11524       rtx_insn *stack_pointer_backup_loc;
11525       bool temp_reg_clobbered_p;
11526 
11527       if (s390_stack_size)
11528 	{
11529 	  HOST_WIDE_INT stack_guard;
11530 
11531 	  if (s390_stack_guard)
11532 	    stack_guard = s390_stack_guard;
11533 	  else
11534 	    {
11535 	      /* If no value for stack guard is provided the smallest power of 2
11536 		 larger than the current frame size is chosen.  */
11537 	      stack_guard = 1;
11538 	      while (stack_guard < cfun_frame_layout.frame_size)
11539 		stack_guard <<= 1;
11540 	    }
11541 
11542 	  if (cfun_frame_layout.frame_size >= s390_stack_size)
11543 	    {
11544 	      warning (0, "frame size of function %qs is %wd"
11545 		       " bytes exceeding user provided stack limit of "
11546 		       "%d bytes.  "
11547 		       "An unconditional trap is added.",
11548 		       current_function_name(), cfun_frame_layout.frame_size,
11549 		       s390_stack_size);
11550 	      emit_insn (gen_trap ());
11551 	      emit_barrier ();
11552 	    }
11553 	  else
11554 	    {
11555 	      /* stack_guard has to be smaller than s390_stack_size.
11556 		 Otherwise we would emit an AND with zero which would
11557 		 not match the test under mask pattern.  */
11558 	      if (stack_guard >= s390_stack_size)
11559 		{
11560 		  warning (0, "frame size of function %qs is %wd"
11561 			   " bytes which is more than half the stack size. "
11562 			   "The dynamic check would not be reliable. "
11563 			   "No check emitted for this function.",
11564 			   current_function_name(),
11565 			   cfun_frame_layout.frame_size);
11566 		}
11567 	      else
11568 		{
11569 		  HOST_WIDE_INT stack_check_mask = ((s390_stack_size - 1)
11570 						    & ~(stack_guard - 1));
11571 
11572 		  rtx t = gen_rtx_AND (Pmode, stack_pointer_rtx,
11573 				       GEN_INT (stack_check_mask));
11574 		  if (TARGET_64BIT)
11575 		    emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode,
11576 							 t, const0_rtx),
11577 					     t, const0_rtx, const0_rtx));
11578 		  else
11579 		    emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode,
11580 							 t, const0_rtx),
11581 					     t, const0_rtx, const0_rtx));
11582 		}
11583 	    }
11584   	}
11585 
11586       if (s390_warn_framesize > 0
11587 	  && cfun_frame_layout.frame_size >= s390_warn_framesize)
11588 	warning (0, "frame size of %qs is %wd bytes",
11589 		 current_function_name (), cfun_frame_layout.frame_size);
11590 
11591       if (s390_warn_dynamicstack_p && cfun->calls_alloca)
11592 	warning (0, "%qs uses dynamic stack allocation", current_function_name ());
11593 
11594       /* Save the location where we could backup the incoming stack
11595 	 pointer.  */
11596       stack_pointer_backup_loc = get_last_insn ();
11597 
11598       temp_reg_clobbered_p = allocate_stack_space (frame_off, last_probe_offset,
11599 						   temp_reg);
11600 
11601       if (TARGET_BACKCHAIN || next_fpr)
11602 	{
11603 	  if (temp_reg_clobbered_p)
11604 	    {
11605 	      /* allocate_stack_space had to make use of temp_reg and
11606 		 we need it to hold a backup of the incoming stack
11607 		 pointer.  Calculate back that value from the current
11608 		 stack pointer.  */
11609 	      s390_prologue_plus_offset (temp_reg, stack_pointer_rtx,
11610 					 GEN_INT (cfun_frame_layout.frame_size),
11611 					 false);
11612 	    }
11613 	  else
11614 	    {
11615 	      /* allocate_stack_space didn't actually required
11616 		 temp_reg.  Insert the stack pointer backup insn
11617 		 before the stack pointer decrement code - knowing now
11618 		 that the value will survive.  */
11619 	      emit_insn_after (gen_move_insn (temp_reg, stack_pointer_rtx),
11620 			       stack_pointer_backup_loc);
11621 	    }
11622 	}
11623 
11624       /* Set backchain.  */
11625 
11626       if (TARGET_BACKCHAIN)
11627 	{
11628 	  if (cfun_frame_layout.backchain_offset)
11629 	    addr = gen_rtx_MEM (Pmode,
11630 				plus_constant (Pmode, stack_pointer_rtx,
11631 				  cfun_frame_layout.backchain_offset));
11632 	  else
11633 	    addr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
11634 	  set_mem_alias_set (addr, get_frame_alias_set ());
11635 	  insn = emit_insn (gen_move_insn (addr, temp_reg));
11636 	}
11637 
11638       /* If we support non-call exceptions (e.g. for Java),
11639 	 we need to make sure the backchain pointer is set up
11640 	 before any possibly trapping memory access.  */
11641       if (TARGET_BACKCHAIN && cfun->can_throw_non_call_exceptions)
11642 	{
11643 	  addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
11644 	  emit_clobber (addr);
11645 	}
11646     }
11647   else if (flag_stack_clash_protection)
11648     dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
11649 
11650   /* Save fprs 8 - 15 (64 bit ABI).  */
11651 
11652   if (cfun_save_high_fprs_p && next_fpr)
11653     {
11654       /* If the stack might be accessed through a different register
11655 	 we have to make sure that the stack pointer decrement is not
11656 	 moved below the use of the stack slots.  */
11657       s390_emit_stack_tie ();
11658 
11659       insn = emit_insn (gen_add2_insn (temp_reg,
11660 				       GEN_INT (cfun_frame_layout.f8_offset)));
11661 
11662       offset = 0;
11663 
11664       for (i = FPR8_REGNUM; i <= next_fpr; i++)
11665 	if (cfun_fpr_save_p (i))
11666 	  {
11667 	    rtx addr = plus_constant (Pmode, stack_pointer_rtx,
11668 				      cfun_frame_layout.frame_size
11669 				      + cfun_frame_layout.f8_offset
11670 				      + offset);
11671 
11672 	    insn = save_fpr (temp_reg, offset, i);
11673 	    offset += 8;
11674 	    RTX_FRAME_RELATED_P (insn) = 1;
11675 	    add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11676 			  gen_rtx_SET (gen_rtx_MEM (DFmode, addr),
11677 				       gen_rtx_REG (DFmode, i)));
11678 	  }
11679     }
11680 
11681   /* Set frame pointer, if needed.  */
11682 
11683   if (frame_pointer_needed)
11684     {
11685       insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11686       RTX_FRAME_RELATED_P (insn) = 1;
11687     }
11688 
11689   /* Set up got pointer, if needed.  */
11690 
11691   if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
11692     {
11693       rtx_insn *insns = s390_load_got ();
11694 
11695       for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn))
11696 	annotate_constant_pool_refs (&PATTERN (insn));
11697 
11698       emit_insn (insns);
11699     }
11700 
11701   if (TARGET_TPF_PROFILING)
11702     {
11703       /* Generate a BAS instruction to serve as a function
11704 	 entry intercept to facilitate the use of tracing
11705 	 algorithms located at the branch target.  */
11706       emit_insn (gen_prologue_tpf ());
11707 
11708       /* Emit a blockage here so that all code
11709 	 lies between the profiling mechanisms.  */
11710       emit_insn (gen_blockage ());
11711     }
11712 }
11713 
11714 /* Expand the epilogue into a bunch of separate insns.  */
11715 
11716 void
s390_emit_epilogue(bool sibcall)11717 s390_emit_epilogue (bool sibcall)
11718 {
11719   rtx frame_pointer, return_reg, cfa_restores = NULL_RTX;
11720   int area_bottom, area_top, offset = 0;
11721   int next_offset;
11722   int i;
11723 
11724   if (TARGET_TPF_PROFILING)
11725     {
11726 
11727       /* Generate a BAS instruction to serve as a function
11728 	 entry intercept to facilitate the use of tracing
11729 	 algorithms located at the branch target.  */
11730 
11731       /* Emit a blockage here so that all code
11732          lies between the profiling mechanisms.  */
11733       emit_insn (gen_blockage ());
11734 
11735       emit_insn (gen_epilogue_tpf ());
11736     }
11737 
11738   /* Check whether to use frame or stack pointer for restore.  */
11739 
11740   frame_pointer = (frame_pointer_needed
11741 		   ? hard_frame_pointer_rtx : stack_pointer_rtx);
11742 
11743   s390_frame_area (&area_bottom, &area_top);
11744 
11745   /* Check whether we can access the register save area.
11746      If not, increment the frame pointer as required.  */
11747 
11748   if (area_top <= area_bottom)
11749     {
11750       /* Nothing to restore.  */
11751     }
11752   else if (DISP_IN_RANGE (cfun_frame_layout.frame_size + area_bottom)
11753            && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1))
11754     {
11755       /* Area is in range.  */
11756       offset = cfun_frame_layout.frame_size;
11757     }
11758   else
11759     {
11760       rtx insn, frame_off, cfa;
11761 
11762       offset = area_bottom < 0 ? -area_bottom : 0;
11763       frame_off = GEN_INT (cfun_frame_layout.frame_size - offset);
11764 
11765       cfa = gen_rtx_SET (frame_pointer,
11766 			 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11767       if (DISP_IN_RANGE (INTVAL (frame_off)))
11768 	{
11769 	  insn = gen_rtx_SET (frame_pointer,
11770 			      gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11771 	  insn = emit_insn (insn);
11772 	}
11773       else
11774 	{
11775 	  if (!CONST_OK_FOR_K (INTVAL (frame_off)))
11776 	    frame_off = force_const_mem (Pmode, frame_off);
11777 
11778 	  insn = emit_insn (gen_add2_insn (frame_pointer, frame_off));
11779 	  annotate_constant_pool_refs (&PATTERN (insn));
11780 	}
11781       add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa);
11782       RTX_FRAME_RELATED_P (insn) = 1;
11783     }
11784 
11785   /* Restore call saved fprs.  */
11786 
11787   if (TARGET_64BIT)
11788     {
11789       if (cfun_save_high_fprs_p)
11790 	{
11791 	  next_offset = cfun_frame_layout.f8_offset;
11792 	  for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
11793 	    {
11794 	      if (cfun_fpr_save_p (i))
11795 		{
11796 		  restore_fpr (frame_pointer,
11797 			       offset + next_offset, i);
11798 		  cfa_restores
11799 		    = alloc_reg_note (REG_CFA_RESTORE,
11800 				      gen_rtx_REG (DFmode, i), cfa_restores);
11801 		  next_offset += 8;
11802 		}
11803 	    }
11804 	}
11805 
11806     }
11807   else
11808     {
11809       next_offset = cfun_frame_layout.f4_offset;
11810       /* f4, f6 */
11811       for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
11812 	{
11813 	  if (cfun_fpr_save_p (i))
11814 	    {
11815 	      restore_fpr (frame_pointer,
11816 			   offset + next_offset, i);
11817 	      cfa_restores
11818 		= alloc_reg_note (REG_CFA_RESTORE,
11819 				  gen_rtx_REG (DFmode, i), cfa_restores);
11820 	      next_offset += 8;
11821 	    }
11822 	  else if (!TARGET_PACKED_STACK)
11823 	    next_offset += 8;
11824 	}
11825 
11826     }
11827 
11828   /* Return register.  */
11829 
11830   return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
11831 
11832   /* Restore call saved gprs.  */
11833 
11834   if (cfun_frame_layout.first_restore_gpr != -1)
11835     {
11836       rtx insn, addr;
11837       int i;
11838 
11839       /* Check for global register and save them
11840 	 to stack location from where they get restored.  */
11841 
11842       for (i = cfun_frame_layout.first_restore_gpr;
11843 	   i <= cfun_frame_layout.last_restore_gpr;
11844 	   i++)
11845 	{
11846 	  if (global_not_special_regno_p (i))
11847 	    {
11848 	      addr = plus_constant (Pmode, frame_pointer,
11849 				    offset + cfun_frame_layout.gprs_offset
11850 				    + (i - cfun_frame_layout.first_save_gpr_slot)
11851 				    * UNITS_PER_LONG);
11852 	      addr = gen_rtx_MEM (Pmode, addr);
11853 	      set_mem_alias_set (addr, get_frame_alias_set ());
11854 	      emit_move_insn (addr, gen_rtx_REG (Pmode, i));
11855 	    }
11856 	  else
11857 	    cfa_restores
11858 	      = alloc_reg_note (REG_CFA_RESTORE,
11859 				gen_rtx_REG (Pmode, i), cfa_restores);
11860 	}
11861 
11862       /* Fetch return address from stack before load multiple,
11863 	 this will do good for scheduling.
11864 
11865 	 Only do this if we already decided that r14 needs to be
11866 	 saved to a stack slot. (And not just because r14 happens to
11867 	 be in between two GPRs which need saving.)  Otherwise it
11868 	 would be difficult to take that decision back in
11869 	 s390_optimize_prologue.
11870 
11871 	 This optimization is only helpful on in-order machines.  */
11872       if (! sibcall
11873 	  && cfun_gpr_save_slot (RETURN_REGNUM) == SAVE_SLOT_STACK
11874 	  && s390_tune <= PROCESSOR_2097_Z10)
11875 	{
11876 	  int return_regnum = find_unused_clobbered_reg();
11877 	  if (!return_regnum
11878 	      || (TARGET_INDIRECT_BRANCH_NOBP_RET_OPTION
11879 		  && !TARGET_CPU_Z10
11880 		  && return_regnum == INDIRECT_BRANCH_THUNK_REGNUM))
11881 	    {
11882 	      gcc_assert (INDIRECT_BRANCH_THUNK_REGNUM != 4);
11883 	      return_regnum = 4;
11884 	    }
11885 	  return_reg = gen_rtx_REG (Pmode, return_regnum);
11886 
11887 	  addr = plus_constant (Pmode, frame_pointer,
11888 				offset + cfun_frame_layout.gprs_offset
11889 				+ (RETURN_REGNUM
11890 				   - cfun_frame_layout.first_save_gpr_slot)
11891 				* UNITS_PER_LONG);
11892 	  addr = gen_rtx_MEM (Pmode, addr);
11893 	  set_mem_alias_set (addr, get_frame_alias_set ());
11894 	  emit_move_insn (return_reg, addr);
11895 
11896 	  /* Once we did that optimization we have to make sure
11897 	     s390_optimize_prologue does not try to remove the store
11898 	     of r14 since we will not be able to find the load issued
11899 	     here.  */
11900 	  cfun_frame_layout.save_return_addr_p = true;
11901 	}
11902 
11903       insn = restore_gprs (frame_pointer,
11904 			   offset + cfun_frame_layout.gprs_offset
11905 			   + (cfun_frame_layout.first_restore_gpr
11906 			      - cfun_frame_layout.first_save_gpr_slot)
11907 			   * UNITS_PER_LONG,
11908 			   cfun_frame_layout.first_restore_gpr,
11909 			   cfun_frame_layout.last_restore_gpr);
11910       insn = emit_insn (insn);
11911       REG_NOTES (insn) = cfa_restores;
11912       add_reg_note (insn, REG_CFA_DEF_CFA,
11913 		    plus_constant (Pmode, stack_pointer_rtx,
11914 				   STACK_POINTER_OFFSET));
11915       RTX_FRAME_RELATED_P (insn) = 1;
11916     }
11917 
11918   s390_restore_gprs_from_fprs ();
11919 
11920   if (! sibcall)
11921     emit_jump_insn (gen_return_use (return_reg));
11922 }
11923 
11924 /* Implement TARGET_SET_UP_BY_PROLOGUE.  */
11925 
11926 static void
s300_set_up_by_prologue(hard_reg_set_container * regs)11927 s300_set_up_by_prologue (hard_reg_set_container *regs)
11928 {
11929   if (cfun->machine->base_reg
11930       && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
11931     SET_HARD_REG_BIT (regs->set, REGNO (cfun->machine->base_reg));
11932 }
11933 
11934 /* -fsplit-stack support.  */
11935 
11936 /* A SYMBOL_REF for __morestack.  */
11937 static GTY(()) rtx morestack_ref;
11938 
11939 /* When using -fsplit-stack, the allocation routines set a field in
11940    the TCB to the bottom of the stack plus this much space, measured
11941    in bytes.  */
11942 
11943 #define SPLIT_STACK_AVAILABLE 1024
11944 
11945 /* Emit -fsplit-stack prologue, which goes before the regular function
11946    prologue.  */
11947 
11948 void
s390_expand_split_stack_prologue(void)11949 s390_expand_split_stack_prologue (void)
11950 {
11951   rtx r1, guard, cc = NULL;
11952   rtx_insn *insn;
11953   /* Offset from thread pointer to __private_ss.  */
11954   int psso = TARGET_64BIT ? 0x38 : 0x20;
11955   /* Pointer size in bytes.  */
11956   /* Frame size and argument size - the two parameters to __morestack.  */
11957   HOST_WIDE_INT frame_size = cfun_frame_layout.frame_size;
11958   /* Align argument size to 8 bytes - simplifies __morestack code.  */
11959   HOST_WIDE_INT args_size = crtl->args.size >= 0
11960 			    ? ((crtl->args.size + 7) & ~7)
11961 			    : 0;
11962   /* Label to be called by __morestack.  */
11963   rtx_code_label *call_done = NULL;
11964   rtx_code_label *parm_base = NULL;
11965   rtx tmp;
11966 
11967   gcc_assert (flag_split_stack && reload_completed);
11968   if (!TARGET_CPU_ZARCH)
11969     {
11970       sorry ("CPUs older than z900 are not supported for -fsplit-stack");
11971       return;
11972     }
11973 
11974   r1 = gen_rtx_REG (Pmode, 1);
11975 
11976   /* If no stack frame will be allocated, don't do anything.  */
11977   if (!frame_size)
11978     {
11979       if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11980 	{
11981 	  /* If va_start is used, just use r15.  */
11982 	  emit_move_insn (r1,
11983 			 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11984 				       GEN_INT (STACK_POINTER_OFFSET)));
11985 
11986 	}
11987       return;
11988     }
11989 
11990   if (morestack_ref == NULL_RTX)
11991     {
11992       morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11993       SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
11994 					   | SYMBOL_FLAG_FUNCTION);
11995     }
11996 
11997   if (CONST_OK_FOR_K (frame_size) || CONST_OK_FOR_Op (frame_size))
11998     {
11999       /* If frame_size will fit in an add instruction, do a stack space
12000 	 check, and only call __morestack if there's not enough space.  */
12001 
12002       /* Get thread pointer.  r1 is the only register we can always destroy - r0
12003 	 could contain a static chain (and cannot be used to address memory
12004 	 anyway), r2-r6 can contain parameters, and r6-r15 are callee-saved.  */
12005       emit_move_insn (r1, gen_rtx_REG (Pmode, TP_REGNUM));
12006       /* Aim at __private_ss.  */
12007       guard = gen_rtx_MEM (Pmode, plus_constant (Pmode, r1, psso));
12008 
12009       /* If less that 1kiB used, skip addition and compare directly with
12010 	 __private_ss.  */
12011       if (frame_size > SPLIT_STACK_AVAILABLE)
12012 	{
12013 	  emit_move_insn (r1, guard);
12014 	  if (TARGET_64BIT)
12015 	    emit_insn (gen_adddi3 (r1, r1, GEN_INT (frame_size)));
12016 	  else
12017 	    emit_insn (gen_addsi3 (r1, r1, GEN_INT (frame_size)));
12018 	  guard = r1;
12019 	}
12020 
12021       /* Compare the (maybe adjusted) guard with the stack pointer.  */
12022       cc = s390_emit_compare (LT, stack_pointer_rtx, guard);
12023     }
12024 
12025   call_done = gen_label_rtx ();
12026   parm_base = gen_label_rtx ();
12027 
12028   /* Emit the parameter block.  */
12029   tmp = gen_split_stack_data (parm_base, call_done,
12030 			      GEN_INT (frame_size),
12031 			      GEN_INT (args_size));
12032   insn = emit_insn (tmp);
12033   add_reg_note (insn, REG_LABEL_OPERAND, call_done);
12034   LABEL_NUSES (call_done)++;
12035   add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
12036   LABEL_NUSES (parm_base)++;
12037 
12038   /* %r1 = litbase.  */
12039   insn = emit_move_insn (r1, gen_rtx_LABEL_REF (VOIDmode, parm_base));
12040   add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
12041   LABEL_NUSES (parm_base)++;
12042 
12043   /* Now, we need to call __morestack.  It has very special calling
12044      conventions: it preserves param/return/static chain registers for
12045      calling main function body, and looks for its own parameters at %r1. */
12046 
12047   if (cc != NULL)
12048     {
12049       tmp = gen_split_stack_cond_call (morestack_ref, cc, call_done);
12050 
12051       insn = emit_jump_insn (tmp);
12052       JUMP_LABEL (insn) = call_done;
12053       LABEL_NUSES (call_done)++;
12054 
12055       /* Mark the jump as very unlikely to be taken.  */
12056       add_reg_br_prob_note (insn,
12057 		            profile_probability::very_unlikely ());
12058 
12059       if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12060 	{
12061 	  /* If va_start is used, and __morestack was not called, just use
12062 	     r15.  */
12063 	  emit_move_insn (r1,
12064 			 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12065 				       GEN_INT (STACK_POINTER_OFFSET)));
12066 	}
12067     }
12068   else
12069     {
12070       tmp = gen_split_stack_call (morestack_ref, call_done);
12071       insn = emit_jump_insn (tmp);
12072       JUMP_LABEL (insn) = call_done;
12073       LABEL_NUSES (call_done)++;
12074       emit_barrier ();
12075     }
12076 
12077   /* __morestack will call us here.  */
12078 
12079   emit_label (call_done);
12080 }
12081 
12082 /* We may have to tell the dataflow pass that the split stack prologue
12083    is initializing a register.  */
12084 
12085 static void
s390_live_on_entry(bitmap regs)12086 s390_live_on_entry (bitmap regs)
12087 {
12088   if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12089     {
12090       gcc_assert (flag_split_stack);
12091       bitmap_set_bit (regs, 1);
12092     }
12093 }
12094 
12095 /* Return true if the function can use simple_return to return outside
12096    of a shrink-wrapped region.  At present shrink-wrapping is supported
12097    in all cases.  */
12098 
12099 bool
s390_can_use_simple_return_insn(void)12100 s390_can_use_simple_return_insn (void)
12101 {
12102   return true;
12103 }
12104 
12105 /* Return true if the epilogue is guaranteed to contain only a return
12106    instruction and if a direct return can therefore be used instead.
12107    One of the main advantages of using direct return instructions
12108    is that we can then use conditional returns.  */
12109 
12110 bool
s390_can_use_return_insn(void)12111 s390_can_use_return_insn (void)
12112 {
12113   int i;
12114 
12115   if (!reload_completed)
12116     return false;
12117 
12118   if (crtl->profile)
12119     return false;
12120 
12121   if (TARGET_TPF_PROFILING)
12122     return false;
12123 
12124   for (i = 0; i < 16; i++)
12125     if (cfun_gpr_save_slot (i) != SAVE_SLOT_NONE)
12126       return false;
12127 
12128   /* For 31 bit this is not covered by the frame_size check below
12129      since f4, f6 are saved in the register save area without needing
12130      additional stack space.  */
12131   if (!TARGET_64BIT
12132       && (cfun_fpr_save_p (FPR4_REGNUM) || cfun_fpr_save_p (FPR6_REGNUM)))
12133     return false;
12134 
12135   if (cfun->machine->base_reg
12136       && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
12137     return false;
12138 
12139   return cfun_frame_layout.frame_size == 0;
12140 }
12141 
12142 /* The VX ABI differs for vararg functions.  Therefore we need the
12143    prototype of the callee to be available when passing vector type
12144    values.  */
12145 static const char *
s390_invalid_arg_for_unprototyped_fn(const_tree typelist,const_tree funcdecl,const_tree val)12146 s390_invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
12147 {
12148   return ((TARGET_VX_ABI
12149 	   && typelist == 0
12150 	   && VECTOR_TYPE_P (TREE_TYPE (val))
12151 	   && (funcdecl == NULL_TREE
12152 	       || (TREE_CODE (funcdecl) == FUNCTION_DECL
12153 		   && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
12154 	  ? N_("vector argument passed to unprototyped function")
12155 	  : NULL);
12156 }
12157 
12158 
12159 /* Return the size in bytes of a function argument of
12160    type TYPE and/or mode MODE.  At least one of TYPE or
12161    MODE must be specified.  */
12162 
12163 static int
s390_function_arg_size(machine_mode mode,const_tree type)12164 s390_function_arg_size (machine_mode mode, const_tree type)
12165 {
12166   if (type)
12167     return int_size_in_bytes (type);
12168 
12169   /* No type info available for some library calls ...  */
12170   if (mode != BLKmode)
12171     return GET_MODE_SIZE (mode);
12172 
12173   /* If we have neither type nor mode, abort */
12174   gcc_unreachable ();
12175 }
12176 
12177 /* Return true if a function argument of type TYPE and mode MODE
12178    is to be passed in a vector register, if available.  */
12179 
12180 bool
s390_function_arg_vector(machine_mode mode,const_tree type)12181 s390_function_arg_vector (machine_mode mode, const_tree type)
12182 {
12183   if (!TARGET_VX_ABI)
12184     return false;
12185 
12186   if (s390_function_arg_size (mode, type) > 16)
12187     return false;
12188 
12189   /* No type info available for some library calls ...  */
12190   if (!type)
12191     return VECTOR_MODE_P (mode);
12192 
12193   /* The ABI says that record types with a single member are treated
12194      just like that member would be.  */
12195   while (TREE_CODE (type) == RECORD_TYPE)
12196     {
12197       tree field, single = NULL_TREE;
12198 
12199       for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
12200 	{
12201 	  if (TREE_CODE (field) != FIELD_DECL)
12202 	    continue;
12203 
12204 	  if (single == NULL_TREE)
12205 	    single = TREE_TYPE (field);
12206 	  else
12207 	    return false;
12208 	}
12209 
12210       if (single == NULL_TREE)
12211 	return false;
12212       else
12213 	{
12214 	  /* If the field declaration adds extra byte due to
12215 	     e.g. padding this is not accepted as vector type.  */
12216 	  if (int_size_in_bytes (single) <= 0
12217 	      || int_size_in_bytes (single) != int_size_in_bytes (type))
12218 	    return false;
12219 	  type = single;
12220 	}
12221     }
12222 
12223   return VECTOR_TYPE_P (type);
12224 }
12225 
12226 /* Return true if a function argument of type TYPE and mode MODE
12227    is to be passed in a floating-point register, if available.  */
12228 
12229 static bool
s390_function_arg_float(machine_mode mode,const_tree type)12230 s390_function_arg_float (machine_mode mode, const_tree type)
12231 {
12232   if (s390_function_arg_size (mode, type) > 8)
12233     return false;
12234 
12235   /* Soft-float changes the ABI: no floating-point registers are used.  */
12236   if (TARGET_SOFT_FLOAT)
12237     return false;
12238 
12239   /* No type info available for some library calls ...  */
12240   if (!type)
12241     return mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode;
12242 
12243   /* The ABI says that record types with a single member are treated
12244      just like that member would be.  */
12245   while (TREE_CODE (type) == RECORD_TYPE)
12246     {
12247       tree field, single = NULL_TREE;
12248 
12249       for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
12250 	{
12251 	  if (TREE_CODE (field) != FIELD_DECL)
12252 	    continue;
12253 
12254 	  if (single == NULL_TREE)
12255 	    single = TREE_TYPE (field);
12256 	  else
12257 	    return false;
12258 	}
12259 
12260       if (single == NULL_TREE)
12261 	return false;
12262       else
12263 	type = single;
12264     }
12265 
12266   return TREE_CODE (type) == REAL_TYPE;
12267 }
12268 
12269 /* Return true if a function argument of type TYPE and mode MODE
12270    is to be passed in an integer register, or a pair of integer
12271    registers, if available.  */
12272 
12273 static bool
s390_function_arg_integer(machine_mode mode,const_tree type)12274 s390_function_arg_integer (machine_mode mode, const_tree type)
12275 {
12276   int size = s390_function_arg_size (mode, type);
12277   if (size > 8)
12278     return false;
12279 
12280   /* No type info available for some library calls ...  */
12281   if (!type)
12282     return GET_MODE_CLASS (mode) == MODE_INT
12283 	   || (TARGET_SOFT_FLOAT &&  SCALAR_FLOAT_MODE_P (mode));
12284 
12285   /* We accept small integral (and similar) types.  */
12286   if (INTEGRAL_TYPE_P (type)
12287       || POINTER_TYPE_P (type)
12288       || TREE_CODE (type) == NULLPTR_TYPE
12289       || TREE_CODE (type) == OFFSET_TYPE
12290       || (TARGET_SOFT_FLOAT && TREE_CODE (type) == REAL_TYPE))
12291     return true;
12292 
12293   /* We also accept structs of size 1, 2, 4, 8 that are not
12294      passed in floating-point registers.  */
12295   if (AGGREGATE_TYPE_P (type)
12296       && exact_log2 (size) >= 0
12297       && !s390_function_arg_float (mode, type))
12298     return true;
12299 
12300   return false;
12301 }
12302 
12303 /* Return 1 if a function argument of type TYPE and mode MODE
12304    is to be passed by reference.  The ABI specifies that only
12305    structures of size 1, 2, 4, or 8 bytes are passed by value,
12306    all other structures (and complex numbers) are passed by
12307    reference.  */
12308 
12309 static bool
s390_pass_by_reference(cumulative_args_t ca ATTRIBUTE_UNUSED,machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)12310 s390_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
12311 			machine_mode mode, const_tree type,
12312 			bool named ATTRIBUTE_UNUSED)
12313 {
12314   int size = s390_function_arg_size (mode, type);
12315 
12316   if (s390_function_arg_vector (mode, type))
12317     return false;
12318 
12319   if (size > 8)
12320     return true;
12321 
12322   if (type)
12323     {
12324       if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0)
12325         return true;
12326 
12327       if (TREE_CODE (type) == COMPLEX_TYPE
12328 	  || TREE_CODE (type) == VECTOR_TYPE)
12329 	return true;
12330     }
12331 
12332   return false;
12333 }
12334 
12335 /* Update the data in CUM to advance over an argument of mode MODE and
12336    data type TYPE.  (TYPE is null for libcalls where that information
12337    may not be available.).  The boolean NAMED specifies whether the
12338    argument is a named argument (as opposed to an unnamed argument
12339    matching an ellipsis).  */
12340 
12341 static void
s390_function_arg_advance(cumulative_args_t cum_v,machine_mode mode,const_tree type,bool named)12342 s390_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
12343 			   const_tree type, bool named)
12344 {
12345   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12346 
12347   if (s390_function_arg_vector (mode, type))
12348     {
12349       /* We are called for unnamed vector stdarg arguments which are
12350 	 passed on the stack.  In this case this hook does not have to
12351 	 do anything since stack arguments are tracked by common
12352 	 code.  */
12353       if (!named)
12354 	return;
12355       cum->vrs += 1;
12356     }
12357   else if (s390_function_arg_float (mode, type))
12358     {
12359       cum->fprs += 1;
12360     }
12361   else if (s390_function_arg_integer (mode, type))
12362     {
12363       int size = s390_function_arg_size (mode, type);
12364       cum->gprs += ((size + UNITS_PER_LONG - 1) / UNITS_PER_LONG);
12365     }
12366   else
12367     gcc_unreachable ();
12368 }
12369 
12370 /* Define where to put the arguments to a function.
12371    Value is zero to push the argument on the stack,
12372    or a hard register in which to store the argument.
12373 
12374    MODE is the argument's machine mode.
12375    TYPE is the data type of the argument (as a tree).
12376     This is null for libcalls where that information may
12377     not be available.
12378    CUM is a variable of type CUMULATIVE_ARGS which gives info about
12379     the preceding args and about the function being called.
12380    NAMED is nonzero if this argument is a named parameter
12381     (otherwise it is an extra parameter matching an ellipsis).
12382 
12383    On S/390, we use general purpose registers 2 through 6 to
12384    pass integer, pointer, and certain structure arguments, and
12385    floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
12386    to pass floating point arguments.  All remaining arguments
12387    are pushed to the stack.  */
12388 
12389 static rtx
s390_function_arg(cumulative_args_t cum_v,machine_mode mode,const_tree type,bool named)12390 s390_function_arg (cumulative_args_t cum_v, machine_mode mode,
12391 		   const_tree type, bool named)
12392 {
12393   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12394 
12395   if (!named)
12396     s390_check_type_for_vector_abi (type, true, false);
12397 
12398   if (s390_function_arg_vector (mode, type))
12399     {
12400       /* Vector arguments being part of the ellipsis are passed on the
12401 	 stack.  */
12402       if (!named || (cum->vrs + 1 > VEC_ARG_NUM_REG))
12403 	return NULL_RTX;
12404 
12405       return gen_rtx_REG (mode, cum->vrs + FIRST_VEC_ARG_REGNO);
12406     }
12407   else if (s390_function_arg_float (mode, type))
12408     {
12409       if (cum->fprs + 1 > FP_ARG_NUM_REG)
12410 	return NULL_RTX;
12411       else
12412 	return gen_rtx_REG (mode, cum->fprs + 16);
12413     }
12414   else if (s390_function_arg_integer (mode, type))
12415     {
12416       int size = s390_function_arg_size (mode, type);
12417       int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
12418 
12419       if (cum->gprs + n_gprs > GP_ARG_NUM_REG)
12420 	return NULL_RTX;
12421       else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG)
12422 	return gen_rtx_REG (mode, cum->gprs + 2);
12423       else if (n_gprs == 2)
12424 	{
12425 	  rtvec p = rtvec_alloc (2);
12426 
12427 	  RTVEC_ELT (p, 0)
12428 	    = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 2),
12429 					 const0_rtx);
12430 	  RTVEC_ELT (p, 1)
12431 	    = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 3),
12432 					 GEN_INT (4));
12433 
12434 	  return gen_rtx_PARALLEL (mode, p);
12435 	}
12436     }
12437 
12438   /* After the real arguments, expand_call calls us once again
12439      with a void_type_node type.  Whatever we return here is
12440      passed as operand 2 to the call expanders.
12441 
12442      We don't need this feature ...  */
12443   else if (type == void_type_node)
12444     return const0_rtx;
12445 
12446   gcc_unreachable ();
12447 }
12448 
12449 /* Implement TARGET_FUNCTION_ARG_BOUNDARY.  Vector arguments are
12450    left-justified when placed on the stack during parameter passing.  */
12451 
12452 static pad_direction
s390_function_arg_padding(machine_mode mode,const_tree type)12453 s390_function_arg_padding (machine_mode mode, const_tree type)
12454 {
12455   if (s390_function_arg_vector (mode, type))
12456     return PAD_UPWARD;
12457 
12458   return default_function_arg_padding (mode, type);
12459 }
12460 
12461 /* Return true if return values of type TYPE should be returned
12462    in a memory buffer whose address is passed by the caller as
12463    hidden first argument.  */
12464 
12465 static bool
s390_return_in_memory(const_tree type,const_tree fundecl ATTRIBUTE_UNUSED)12466 s390_return_in_memory (const_tree type, const_tree fundecl ATTRIBUTE_UNUSED)
12467 {
12468   /* We accept small integral (and similar) types.  */
12469   if (INTEGRAL_TYPE_P (type)
12470       || POINTER_TYPE_P (type)
12471       || TREE_CODE (type) == OFFSET_TYPE
12472       || TREE_CODE (type) == REAL_TYPE)
12473     return int_size_in_bytes (type) > 8;
12474 
12475   /* vector types which fit into a VR.  */
12476   if (TARGET_VX_ABI
12477       && VECTOR_TYPE_P (type)
12478       && int_size_in_bytes (type) <= 16)
12479     return false;
12480 
12481   /* Aggregates and similar constructs are always returned
12482      in memory.  */
12483   if (AGGREGATE_TYPE_P (type)
12484       || TREE_CODE (type) == COMPLEX_TYPE
12485       || VECTOR_TYPE_P (type))
12486     return true;
12487 
12488   /* ??? We get called on all sorts of random stuff from
12489      aggregate_value_p.  We can't abort, but it's not clear
12490      what's safe to return.  Pretend it's a struct I guess.  */
12491   return true;
12492 }
12493 
12494 /* Function arguments and return values are promoted to word size.  */
12495 
12496 static machine_mode
s390_promote_function_mode(const_tree type,machine_mode mode,int * punsignedp,const_tree fntype ATTRIBUTE_UNUSED,int for_return ATTRIBUTE_UNUSED)12497 s390_promote_function_mode (const_tree type, machine_mode mode,
12498                             int *punsignedp,
12499                             const_tree fntype ATTRIBUTE_UNUSED,
12500                             int for_return ATTRIBUTE_UNUSED)
12501 {
12502   if (INTEGRAL_MODE_P (mode)
12503       && GET_MODE_SIZE (mode) < UNITS_PER_LONG)
12504     {
12505       if (type != NULL_TREE && POINTER_TYPE_P (type))
12506 	*punsignedp = POINTERS_EXTEND_UNSIGNED;
12507       return Pmode;
12508     }
12509 
12510   return mode;
12511 }
12512 
12513 /* Define where to return a (scalar) value of type RET_TYPE.
12514    If RET_TYPE is null, define where to return a (scalar)
12515    value of mode MODE from a libcall.  */
12516 
12517 static rtx
s390_function_and_libcall_value(machine_mode mode,const_tree ret_type,const_tree fntype_or_decl,bool outgoing ATTRIBUTE_UNUSED)12518 s390_function_and_libcall_value (machine_mode mode,
12519 				 const_tree ret_type,
12520 				 const_tree fntype_or_decl,
12521 				 bool outgoing ATTRIBUTE_UNUSED)
12522 {
12523   /* For vector return types it is important to use the RET_TYPE
12524      argument whenever available since the middle-end might have
12525      changed the mode to a scalar mode.  */
12526   bool vector_ret_type_p = ((ret_type && VECTOR_TYPE_P (ret_type))
12527 			    || (!ret_type && VECTOR_MODE_P (mode)));
12528 
12529   /* For normal functions perform the promotion as
12530      promote_function_mode would do.  */
12531   if (ret_type)
12532     {
12533       int unsignedp = TYPE_UNSIGNED (ret_type);
12534       mode = promote_function_mode (ret_type, mode, &unsignedp,
12535 				    fntype_or_decl, 1);
12536     }
12537 
12538   gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
12539 	      || SCALAR_FLOAT_MODE_P (mode)
12540 	      || (TARGET_VX_ABI && vector_ret_type_p));
12541   gcc_assert (GET_MODE_SIZE (mode) <= (TARGET_VX_ABI ? 16 : 8));
12542 
12543   if (TARGET_VX_ABI && vector_ret_type_p)
12544     return gen_rtx_REG (mode, FIRST_VEC_ARG_REGNO);
12545   else if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
12546     return gen_rtx_REG (mode, 16);
12547   else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG
12548 	   || UNITS_PER_LONG == UNITS_PER_WORD)
12549     return gen_rtx_REG (mode, 2);
12550   else if (GET_MODE_SIZE (mode) == 2 * UNITS_PER_LONG)
12551     {
12552       /* This case is triggered when returning a 64 bit value with
12553 	 -m31 -mzarch.  Although the value would fit into a single
12554 	 register it has to be forced into a 32 bit register pair in
12555 	 order to match the ABI.  */
12556       rtvec p = rtvec_alloc (2);
12557 
12558       RTVEC_ELT (p, 0)
12559 	= gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 2), const0_rtx);
12560       RTVEC_ELT (p, 1)
12561 	= gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 3), GEN_INT (4));
12562 
12563       return gen_rtx_PARALLEL (mode, p);
12564     }
12565 
12566   gcc_unreachable ();
12567 }
12568 
12569 /* Define where to return a scalar return value of type RET_TYPE.  */
12570 
12571 static rtx
s390_function_value(const_tree ret_type,const_tree fn_decl_or_type,bool outgoing)12572 s390_function_value (const_tree ret_type, const_tree fn_decl_or_type,
12573 		     bool outgoing)
12574 {
12575   return s390_function_and_libcall_value (TYPE_MODE (ret_type), ret_type,
12576 					  fn_decl_or_type, outgoing);
12577 }
12578 
12579 /* Define where to return a scalar libcall return value of mode
12580    MODE.  */
12581 
12582 static rtx
s390_libcall_value(machine_mode mode,const_rtx fun ATTRIBUTE_UNUSED)12583 s390_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
12584 {
12585   return s390_function_and_libcall_value (mode, NULL_TREE,
12586 					  NULL_TREE, true);
12587 }
12588 
12589 
12590 /* Create and return the va_list datatype.
12591 
12592    On S/390, va_list is an array type equivalent to
12593 
12594       typedef struct __va_list_tag
12595         {
12596             long __gpr;
12597             long __fpr;
12598             void *__overflow_arg_area;
12599             void *__reg_save_area;
12600         } va_list[1];
12601 
12602    where __gpr and __fpr hold the number of general purpose
12603    or floating point arguments used up to now, respectively,
12604    __overflow_arg_area points to the stack location of the
12605    next argument passed on the stack, and __reg_save_area
12606    always points to the start of the register area in the
12607    call frame of the current function.  The function prologue
12608    saves all registers used for argument passing into this
12609    area if the function uses variable arguments.  */
12610 
12611 static tree
s390_build_builtin_va_list(void)12612 s390_build_builtin_va_list (void)
12613 {
12614   tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
12615 
12616   record = lang_hooks.types.make_type (RECORD_TYPE);
12617 
12618   type_decl =
12619     build_decl (BUILTINS_LOCATION,
12620 		TYPE_DECL, get_identifier ("__va_list_tag"), record);
12621 
12622   f_gpr = build_decl (BUILTINS_LOCATION,
12623 		      FIELD_DECL, get_identifier ("__gpr"),
12624 		      long_integer_type_node);
12625   f_fpr = build_decl (BUILTINS_LOCATION,
12626 		      FIELD_DECL, get_identifier ("__fpr"),
12627 		      long_integer_type_node);
12628   f_ovf = build_decl (BUILTINS_LOCATION,
12629 		      FIELD_DECL, get_identifier ("__overflow_arg_area"),
12630 		      ptr_type_node);
12631   f_sav = build_decl (BUILTINS_LOCATION,
12632 		      FIELD_DECL, get_identifier ("__reg_save_area"),
12633 		      ptr_type_node);
12634 
12635   va_list_gpr_counter_field = f_gpr;
12636   va_list_fpr_counter_field = f_fpr;
12637 
12638   DECL_FIELD_CONTEXT (f_gpr) = record;
12639   DECL_FIELD_CONTEXT (f_fpr) = record;
12640   DECL_FIELD_CONTEXT (f_ovf) = record;
12641   DECL_FIELD_CONTEXT (f_sav) = record;
12642 
12643   TYPE_STUB_DECL (record) = type_decl;
12644   TYPE_NAME (record) = type_decl;
12645   TYPE_FIELDS (record) = f_gpr;
12646   DECL_CHAIN (f_gpr) = f_fpr;
12647   DECL_CHAIN (f_fpr) = f_ovf;
12648   DECL_CHAIN (f_ovf) = f_sav;
12649 
12650   layout_type (record);
12651 
12652   /* The correct type is an array type of one element.  */
12653   return build_array_type (record, build_index_type (size_zero_node));
12654 }
12655 
12656 /* Implement va_start by filling the va_list structure VALIST.
12657    STDARG_P is always true, and ignored.
12658    NEXTARG points to the first anonymous stack argument.
12659 
12660    The following global variables are used to initialize
12661    the va_list structure:
12662 
12663      crtl->args.info:
12664        holds number of gprs and fprs used for named arguments.
12665      crtl->args.arg_offset_rtx:
12666        holds the offset of the first anonymous stack argument
12667        (relative to the virtual arg pointer).  */
12668 
12669 static void
s390_va_start(tree valist,rtx nextarg ATTRIBUTE_UNUSED)12670 s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
12671 {
12672   HOST_WIDE_INT n_gpr, n_fpr;
12673   int off;
12674   tree f_gpr, f_fpr, f_ovf, f_sav;
12675   tree gpr, fpr, ovf, sav, t;
12676 
12677   f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12678   f_fpr = DECL_CHAIN (f_gpr);
12679   f_ovf = DECL_CHAIN (f_fpr);
12680   f_sav = DECL_CHAIN (f_ovf);
12681 
12682   valist = build_simple_mem_ref (valist);
12683   gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12684   fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
12685   ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
12686   sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
12687 
12688   /* Count number of gp and fp argument registers used.  */
12689 
12690   n_gpr = crtl->args.info.gprs;
12691   n_fpr = crtl->args.info.fprs;
12692 
12693   if (cfun->va_list_gpr_size)
12694     {
12695       t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
12696 		  build_int_cst (NULL_TREE, n_gpr));
12697       TREE_SIDE_EFFECTS (t) = 1;
12698       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12699     }
12700 
12701   if (cfun->va_list_fpr_size)
12702     {
12703       t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
12704 	          build_int_cst (NULL_TREE, n_fpr));
12705       TREE_SIDE_EFFECTS (t) = 1;
12706       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12707     }
12708 
12709   if (flag_split_stack
12710      && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
12711          == NULL)
12712      && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
12713     {
12714       rtx reg;
12715       rtx_insn *seq;
12716 
12717       reg = gen_reg_rtx (Pmode);
12718       cfun->machine->split_stack_varargs_pointer = reg;
12719 
12720       start_sequence ();
12721       emit_move_insn (reg, gen_rtx_REG (Pmode, 1));
12722       seq = get_insns ();
12723       end_sequence ();
12724 
12725       push_topmost_sequence ();
12726       emit_insn_after (seq, entry_of_function ());
12727       pop_topmost_sequence ();
12728     }
12729 
12730   /* Find the overflow area.
12731      FIXME: This currently is too pessimistic when the vector ABI is
12732      enabled.  In that case we *always* set up the overflow area
12733      pointer.  */
12734   if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG
12735       || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG
12736       || TARGET_VX_ABI)
12737     {
12738       if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
12739         t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
12740       else
12741         t = make_tree (TREE_TYPE (ovf), cfun->machine->split_stack_varargs_pointer);
12742 
12743       off = INTVAL (crtl->args.arg_offset_rtx);
12744       off = off < 0 ? 0 : off;
12745       if (TARGET_DEBUG_ARG)
12746 	fprintf (stderr, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
12747 		 (int)n_gpr, (int)n_fpr, off);
12748 
12749       t = fold_build_pointer_plus_hwi (t, off);
12750 
12751       t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
12752       TREE_SIDE_EFFECTS (t) = 1;
12753       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12754     }
12755 
12756   /* Find the register save area.  */
12757   if ((cfun->va_list_gpr_size && n_gpr < GP_ARG_NUM_REG)
12758       || (cfun->va_list_fpr_size && n_fpr < FP_ARG_NUM_REG))
12759     {
12760       t = make_tree (TREE_TYPE (sav), return_address_pointer_rtx);
12761       t = fold_build_pointer_plus_hwi (t, -RETURN_REGNUM * UNITS_PER_LONG);
12762 
12763       t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
12764       TREE_SIDE_EFFECTS (t) = 1;
12765       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12766     }
12767 }
12768 
12769 /* Implement va_arg by updating the va_list structure
12770    VALIST as required to retrieve an argument of type
12771    TYPE, and returning that argument.
12772 
12773    Generates code equivalent to:
12774 
12775    if (integral value) {
12776      if (size  <= 4 && args.gpr < 5 ||
12777          size  > 4 && args.gpr < 4 )
12778        ret = args.reg_save_area[args.gpr+8]
12779      else
12780        ret = *args.overflow_arg_area++;
12781    } else if (vector value) {
12782        ret = *args.overflow_arg_area;
12783        args.overflow_arg_area += size / 8;
12784    } else if (float value) {
12785      if (args.fgpr < 2)
12786        ret = args.reg_save_area[args.fpr+64]
12787      else
12788        ret = *args.overflow_arg_area++;
12789    } else if (aggregate value) {
12790      if (args.gpr < 5)
12791        ret = *args.reg_save_area[args.gpr]
12792      else
12793        ret = **args.overflow_arg_area++;
12794    } */
12795 
12796 static tree
s390_gimplify_va_arg(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p ATTRIBUTE_UNUSED)12797 s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
12798 		      gimple_seq *post_p ATTRIBUTE_UNUSED)
12799 {
12800   tree f_gpr, f_fpr, f_ovf, f_sav;
12801   tree gpr, fpr, ovf, sav, reg, t, u;
12802   int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg;
12803   tree lab_false, lab_over = NULL_TREE;
12804   tree addr = create_tmp_var (ptr_type_node, "addr");
12805   bool left_align_p; /* How a value < UNITS_PER_LONG is aligned within
12806 			a stack slot.  */
12807 
12808   f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12809   f_fpr = DECL_CHAIN (f_gpr);
12810   f_ovf = DECL_CHAIN (f_fpr);
12811   f_sav = DECL_CHAIN (f_ovf);
12812 
12813   gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12814   fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
12815   sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
12816 
12817   /* The tree for args* cannot be shared between gpr/fpr and ovf since
12818      both appear on a lhs.  */
12819   valist = unshare_expr (valist);
12820   ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
12821 
12822   size = int_size_in_bytes (type);
12823 
12824   s390_check_type_for_vector_abi (type, true, false);
12825 
12826   if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
12827     {
12828       if (TARGET_DEBUG_ARG)
12829 	{
12830 	  fprintf (stderr, "va_arg: aggregate type");
12831 	  debug_tree (type);
12832 	}
12833 
12834       /* Aggregates are passed by reference.  */
12835       indirect_p = 1;
12836       reg = gpr;
12837       n_reg = 1;
12838 
12839       /* kernel stack layout on 31 bit: It is assumed here that no padding
12840 	 will be added by s390_frame_info because for va_args always an even
12841 	 number of gprs has to be saved r15-r2 = 14 regs.  */
12842       sav_ofs = 2 * UNITS_PER_LONG;
12843       sav_scale = UNITS_PER_LONG;
12844       size = UNITS_PER_LONG;
12845       max_reg = GP_ARG_NUM_REG - n_reg;
12846       left_align_p = false;
12847     }
12848   else if (s390_function_arg_vector (TYPE_MODE (type), type))
12849     {
12850       if (TARGET_DEBUG_ARG)
12851 	{
12852 	  fprintf (stderr, "va_arg: vector type");
12853 	  debug_tree (type);
12854 	}
12855 
12856       indirect_p = 0;
12857       reg = NULL_TREE;
12858       n_reg = 0;
12859       sav_ofs = 0;
12860       sav_scale = 8;
12861       max_reg = 0;
12862       left_align_p = true;
12863     }
12864   else if (s390_function_arg_float (TYPE_MODE (type), type))
12865     {
12866       if (TARGET_DEBUG_ARG)
12867 	{
12868 	  fprintf (stderr, "va_arg: float type");
12869 	  debug_tree (type);
12870 	}
12871 
12872       /* FP args go in FP registers, if present.  */
12873       indirect_p = 0;
12874       reg = fpr;
12875       n_reg = 1;
12876       sav_ofs = 16 * UNITS_PER_LONG;
12877       sav_scale = 8;
12878       max_reg = FP_ARG_NUM_REG - n_reg;
12879       left_align_p = false;
12880     }
12881   else
12882     {
12883       if (TARGET_DEBUG_ARG)
12884 	{
12885 	  fprintf (stderr, "va_arg: other type");
12886 	  debug_tree (type);
12887 	}
12888 
12889       /* Otherwise into GP registers.  */
12890       indirect_p = 0;
12891       reg = gpr;
12892       n_reg = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
12893 
12894       /* kernel stack layout on 31 bit: It is assumed here that no padding
12895 	 will be added by s390_frame_info because for va_args always an even
12896 	 number of gprs has to be saved r15-r2 = 14 regs.  */
12897       sav_ofs = 2 * UNITS_PER_LONG;
12898 
12899       if (size < UNITS_PER_LONG)
12900 	sav_ofs += UNITS_PER_LONG - size;
12901 
12902       sav_scale = UNITS_PER_LONG;
12903       max_reg = GP_ARG_NUM_REG - n_reg;
12904       left_align_p = false;
12905     }
12906 
12907   /* Pull the value out of the saved registers ...  */
12908 
12909   if (reg != NULL_TREE)
12910     {
12911       /*
12912 	if (reg > ((typeof (reg))max_reg))
12913           goto lab_false;
12914 
12915         addr = sav + sav_ofs + reg * save_scale;
12916 
12917 	goto lab_over;
12918 
12919         lab_false:
12920       */
12921 
12922       lab_false = create_artificial_label (UNKNOWN_LOCATION);
12923       lab_over = create_artificial_label (UNKNOWN_LOCATION);
12924 
12925       t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
12926       t = build2 (GT_EXPR, boolean_type_node, reg, t);
12927       u = build1 (GOTO_EXPR, void_type_node, lab_false);
12928       t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
12929       gimplify_and_add (t, pre_p);
12930 
12931       t = fold_build_pointer_plus_hwi (sav, sav_ofs);
12932       u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
12933 		  fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
12934       t = fold_build_pointer_plus (t, u);
12935 
12936       gimplify_assign (addr, t, pre_p);
12937 
12938       gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
12939 
12940       gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
12941     }
12942 
12943   /* ... Otherwise out of the overflow area.  */
12944 
12945   t = ovf;
12946   if (size < UNITS_PER_LONG && !left_align_p)
12947     t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size);
12948 
12949   gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
12950 
12951   gimplify_assign (addr, t, pre_p);
12952 
12953   if (size < UNITS_PER_LONG && left_align_p)
12954     t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG);
12955   else
12956     t = fold_build_pointer_plus_hwi (t, size);
12957 
12958   gimplify_assign (ovf, t, pre_p);
12959 
12960   if (reg != NULL_TREE)
12961     gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
12962 
12963 
12964   /* Increment register save count.  */
12965 
12966   if (n_reg > 0)
12967     {
12968       u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
12969 		  fold_convert (TREE_TYPE (reg), size_int (n_reg)));
12970       gimplify_and_add (u, pre_p);
12971     }
12972 
12973   if (indirect_p)
12974     {
12975       t = build_pointer_type_for_mode (build_pointer_type (type),
12976 				       ptr_mode, true);
12977       addr = fold_convert (t, addr);
12978       addr = build_va_arg_indirect_ref (addr);
12979     }
12980   else
12981     {
12982       t = build_pointer_type_for_mode (type, ptr_mode, true);
12983       addr = fold_convert (t, addr);
12984     }
12985 
12986   return build_va_arg_indirect_ref (addr);
12987 }
12988 
12989 /* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX)
12990    expanders.
12991    DEST  - Register location where CC will be stored.
12992    TDB   - Pointer to a 256 byte area where to store the transaction.
12993            diagnostic block. NULL if TDB is not needed.
12994    RETRY - Retry count value.  If non-NULL a retry loop for CC2
12995            is emitted
12996    CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
12997                     of the tbegin instruction pattern.  */
12998 
12999 void
s390_expand_tbegin(rtx dest,rtx tdb,rtx retry,bool clobber_fprs_p)13000 s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
13001 {
13002   rtx retry_plus_two = gen_reg_rtx (SImode);
13003   rtx retry_reg = gen_reg_rtx (SImode);
13004   rtx_code_label *retry_label = NULL;
13005 
13006   if (retry != NULL_RTX)
13007     {
13008       emit_move_insn (retry_reg, retry);
13009       emit_insn (gen_addsi3 (retry_plus_two, retry_reg, const2_rtx));
13010       emit_insn (gen_addsi3 (retry_reg, retry_reg, const1_rtx));
13011       retry_label = gen_label_rtx ();
13012       emit_label (retry_label);
13013     }
13014 
13015   if (clobber_fprs_p)
13016     {
13017       if (TARGET_VX)
13018 	emit_insn (gen_tbegin_1_z13 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
13019 				     tdb));
13020       else
13021 	emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
13022 				 tdb));
13023     }
13024   else
13025     emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
13026 				     tdb));
13027 
13028   emit_move_insn (dest, gen_rtx_UNSPEC (SImode,
13029 					gen_rtvec (1, gen_rtx_REG (CCRAWmode,
13030 								   CC_REGNUM)),
13031 					UNSPEC_CC_TO_INT));
13032   if (retry != NULL_RTX)
13033     {
13034       const int CC0 = 1 << 3;
13035       const int CC1 = 1 << 2;
13036       const int CC3 = 1 << 0;
13037       rtx jump;
13038       rtx count = gen_reg_rtx (SImode);
13039       rtx_code_label *leave_label = gen_label_rtx ();
13040 
13041       /* Exit for success and permanent failures.  */
13042       jump = s390_emit_jump (leave_label,
13043 			     gen_rtx_EQ (VOIDmode,
13044 			       gen_rtx_REG (CCRAWmode, CC_REGNUM),
13045 			       gen_rtx_CONST_INT (VOIDmode, CC0 | CC1 | CC3)));
13046       LABEL_NUSES (leave_label) = 1;
13047 
13048       /* CC2 - transient failure. Perform retry with ppa.  */
13049       emit_move_insn (count, retry_plus_two);
13050       emit_insn (gen_subsi3 (count, count, retry_reg));
13051       emit_insn (gen_tx_assist (count));
13052       jump = emit_jump_insn (gen_doloop_si64 (retry_label,
13053 					      retry_reg,
13054 					      retry_reg));
13055       JUMP_LABEL (jump) = retry_label;
13056       LABEL_NUSES (retry_label) = 1;
13057       emit_label (leave_label);
13058     }
13059 }
13060 
13061 
13062 /* Return the decl for the target specific builtin with the function
13063    code FCODE.  */
13064 
13065 static tree
s390_builtin_decl(unsigned fcode,bool initialized_p ATTRIBUTE_UNUSED)13066 s390_builtin_decl (unsigned fcode, bool initialized_p ATTRIBUTE_UNUSED)
13067 {
13068   if (fcode >= S390_BUILTIN_MAX)
13069     return error_mark_node;
13070 
13071   return s390_builtin_decls[fcode];
13072 }
13073 
13074 /* We call mcount before the function prologue.  So a profiled leaf
13075    function should stay a leaf function.  */
13076 
13077 static bool
s390_keep_leaf_when_profiled()13078 s390_keep_leaf_when_profiled ()
13079 {
13080   return true;
13081 }
13082 
13083 /* Output assembly code for the trampoline template to
13084    stdio stream FILE.
13085 
13086    On S/390, we use gpr 1 internally in the trampoline code;
13087    gpr 0 is used to hold the static chain.  */
13088 
13089 static void
s390_asm_trampoline_template(FILE * file)13090 s390_asm_trampoline_template (FILE *file)
13091 {
13092   rtx op[2];
13093   op[0] = gen_rtx_REG (Pmode, 0);
13094   op[1] = gen_rtx_REG (Pmode, 1);
13095 
13096   if (TARGET_64BIT)
13097     {
13098       output_asm_insn ("basr\t%1,0", op);         /* 2 byte */
13099       output_asm_insn ("lmg\t%0,%1,14(%1)", op);  /* 6 byte */
13100       output_asm_insn ("br\t%1", op);             /* 2 byte */
13101       ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10));
13102     }
13103   else
13104     {
13105       output_asm_insn ("basr\t%1,0", op);         /* 2 byte */
13106       output_asm_insn ("lm\t%0,%1,6(%1)", op);    /* 4 byte */
13107       output_asm_insn ("br\t%1", op);             /* 2 byte */
13108       ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8));
13109     }
13110 }
13111 
13112 /* Emit RTL insns to initialize the variable parts of a trampoline.
13113    FNADDR is an RTX for the address of the function's pure code.
13114    CXT is an RTX for the static chain value for the function.  */
13115 
13116 static void
s390_trampoline_init(rtx m_tramp,tree fndecl,rtx cxt)13117 s390_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
13118 {
13119   rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
13120   rtx mem;
13121 
13122   emit_block_move (m_tramp, assemble_trampoline_template (),
13123 		   GEN_INT (2 * UNITS_PER_LONG), BLOCK_OP_NORMAL);
13124 
13125   mem = adjust_address (m_tramp, Pmode, 2 * UNITS_PER_LONG);
13126   emit_move_insn (mem, cxt);
13127   mem = adjust_address (m_tramp, Pmode, 3 * UNITS_PER_LONG);
13128   emit_move_insn (mem, fnaddr);
13129 }
13130 
13131 /* Output assembler code to FILE to increment profiler label # LABELNO
13132    for profiling a function entry.  */
13133 
13134 void
s390_function_profiler(FILE * file,int labelno)13135 s390_function_profiler (FILE *file, int labelno)
13136 {
13137   rtx op[7];
13138 
13139   char label[128];
13140   ASM_GENERATE_INTERNAL_LABEL (label, "LP", labelno);
13141 
13142   fprintf (file, "# function profiler \n");
13143 
13144   op[0] = gen_rtx_REG (Pmode, RETURN_REGNUM);
13145   op[1] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
13146   op[1] = gen_rtx_MEM (Pmode, plus_constant (Pmode, op[1], UNITS_PER_LONG));
13147 
13148   op[2] = gen_rtx_REG (Pmode, 1);
13149   op[3] = gen_rtx_SYMBOL_REF (Pmode, label);
13150   SYMBOL_REF_FLAGS (op[3]) = SYMBOL_FLAG_LOCAL;
13151 
13152   op[4] = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
13153   if (flag_pic)
13154     {
13155       op[4] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[4]), UNSPEC_PLT);
13156       op[4] = gen_rtx_CONST (Pmode, op[4]);
13157     }
13158 
13159   if (TARGET_64BIT)
13160     {
13161       output_asm_insn ("stg\t%0,%1", op);
13162       output_asm_insn ("larl\t%2,%3", op);
13163       output_asm_insn ("brasl\t%0,%4", op);
13164       output_asm_insn ("lg\t%0,%1", op);
13165     }
13166   else if (TARGET_CPU_ZARCH)
13167     {
13168       output_asm_insn ("st\t%0,%1", op);
13169       output_asm_insn ("larl\t%2,%3", op);
13170       output_asm_insn ("brasl\t%0,%4", op);
13171       output_asm_insn ("l\t%0,%1", op);
13172     }
13173   else if (!flag_pic)
13174     {
13175       op[6] = gen_label_rtx ();
13176 
13177       output_asm_insn ("st\t%0,%1", op);
13178       output_asm_insn ("bras\t%2,%l6", op);
13179       output_asm_insn (".long\t%4", op);
13180       output_asm_insn (".long\t%3", op);
13181       targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
13182       output_asm_insn ("l\t%0,0(%2)", op);
13183       output_asm_insn ("l\t%2,4(%2)", op);
13184       output_asm_insn ("basr\t%0,%0", op);
13185       output_asm_insn ("l\t%0,%1", op);
13186     }
13187   else
13188     {
13189       op[5] = gen_label_rtx ();
13190       op[6] = gen_label_rtx ();
13191 
13192       output_asm_insn ("st\t%0,%1", op);
13193       output_asm_insn ("bras\t%2,%l6", op);
13194       targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[5]));
13195       output_asm_insn (".long\t%4-%l5", op);
13196       output_asm_insn (".long\t%3-%l5", op);
13197       targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
13198       output_asm_insn ("lr\t%0,%2", op);
13199       output_asm_insn ("a\t%0,0(%2)", op);
13200       output_asm_insn ("a\t%2,4(%2)", op);
13201       output_asm_insn ("basr\t%0,%0", op);
13202       output_asm_insn ("l\t%0,%1", op);
13203     }
13204 }
13205 
13206 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
13207    into its SYMBOL_REF_FLAGS.  */
13208 
13209 static void
s390_encode_section_info(tree decl,rtx rtl,int first)13210 s390_encode_section_info (tree decl, rtx rtl, int first)
13211 {
13212   default_encode_section_info (decl, rtl, first);
13213 
13214   if (TREE_CODE (decl) == VAR_DECL)
13215     {
13216       /* Store the alignment to be able to check if we can use
13217 	 a larl/load-relative instruction.  We only handle the cases
13218 	 that can go wrong (i.e. no FUNC_DECLs).  */
13219       if (DECL_ALIGN (decl) == 0 || DECL_ALIGN (decl) % 16)
13220 	SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
13221       else if (DECL_ALIGN (decl) % 32)
13222 	SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
13223       else if (DECL_ALIGN (decl) % 64)
13224 	SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
13225     }
13226 
13227   /* Literal pool references don't have a decl so they are handled
13228      differently here.  We rely on the information in the MEM_ALIGN
13229      entry to decide upon the alignment.  */
13230   if (MEM_P (rtl)
13231       && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF
13232       && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0)))
13233     {
13234       if (MEM_ALIGN (rtl) == 0 || MEM_ALIGN (rtl) % 16)
13235 	SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
13236       else if (MEM_ALIGN (rtl) % 32)
13237 	SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
13238       else if (MEM_ALIGN (rtl) % 64)
13239 	SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
13240     }
13241 }
13242 
13243 /* Output thunk to FILE that implements a C++ virtual function call (with
13244    multiple inheritance) to FUNCTION.  The thunk adjusts the this pointer
13245    by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
13246    stored at VCALL_OFFSET in the vtable whose address is located at offset 0
13247    relative to the resulting this pointer.  */
13248 
13249 static void
s390_output_mi_thunk(FILE * file,tree thunk ATTRIBUTE_UNUSED,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)13250 s390_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
13251 		      HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
13252 		      tree function)
13253 {
13254   rtx op[10];
13255   int nonlocal = 0;
13256 
13257   /* Make sure unwind info is emitted for the thunk if needed.  */
13258   final_start_function (emit_barrier (), file, 1);
13259 
13260   /* Operand 0 is the target function.  */
13261   op[0] = XEXP (DECL_RTL (function), 0);
13262   if (flag_pic && !SYMBOL_REF_LOCAL_P (op[0]))
13263     {
13264       nonlocal = 1;
13265       op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]),
13266 			      TARGET_64BIT ? UNSPEC_PLT : UNSPEC_GOT);
13267       op[0] = gen_rtx_CONST (Pmode, op[0]);
13268     }
13269 
13270   /* Operand 1 is the 'this' pointer.  */
13271   if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
13272     op[1] = gen_rtx_REG (Pmode, 3);
13273   else
13274     op[1] = gen_rtx_REG (Pmode, 2);
13275 
13276   /* Operand 2 is the delta.  */
13277   op[2] = GEN_INT (delta);
13278 
13279   /* Operand 3 is the vcall_offset.  */
13280   op[3] = GEN_INT (vcall_offset);
13281 
13282   /* Operand 4 is the temporary register.  */
13283   op[4] = gen_rtx_REG (Pmode, 1);
13284 
13285   /* Operands 5 to 8 can be used as labels.  */
13286   op[5] = NULL_RTX;
13287   op[6] = NULL_RTX;
13288   op[7] = NULL_RTX;
13289   op[8] = NULL_RTX;
13290 
13291   /* Operand 9 can be used for temporary register.  */
13292   op[9] = NULL_RTX;
13293 
13294   /* Generate code.  */
13295   if (TARGET_64BIT)
13296     {
13297       /* Setup literal pool pointer if required.  */
13298       if ((!DISP_IN_RANGE (delta)
13299 	   && !CONST_OK_FOR_K (delta)
13300 	   && !CONST_OK_FOR_Os (delta))
13301 	  || (!DISP_IN_RANGE (vcall_offset)
13302 	      && !CONST_OK_FOR_K (vcall_offset)
13303 	      && !CONST_OK_FOR_Os (vcall_offset)))
13304 	{
13305 	  op[5] = gen_label_rtx ();
13306 	  output_asm_insn ("larl\t%4,%5", op);
13307 	}
13308 
13309       /* Add DELTA to this pointer.  */
13310       if (delta)
13311 	{
13312 	  if (CONST_OK_FOR_J (delta))
13313 	    output_asm_insn ("la\t%1,%2(%1)", op);
13314 	  else if (DISP_IN_RANGE (delta))
13315 	    output_asm_insn ("lay\t%1,%2(%1)", op);
13316 	  else if (CONST_OK_FOR_K (delta))
13317 	    output_asm_insn ("aghi\t%1,%2", op);
13318  	  else if (CONST_OK_FOR_Os (delta))
13319  	    output_asm_insn ("agfi\t%1,%2", op);
13320 	  else
13321 	    {
13322 	      op[6] = gen_label_rtx ();
13323 	      output_asm_insn ("agf\t%1,%6-%5(%4)", op);
13324 	    }
13325 	}
13326 
13327       /* Perform vcall adjustment.  */
13328       if (vcall_offset)
13329 	{
13330 	  if (DISP_IN_RANGE (vcall_offset))
13331 	    {
13332 	      output_asm_insn ("lg\t%4,0(%1)", op);
13333 	      output_asm_insn ("ag\t%1,%3(%4)", op);
13334 	    }
13335 	  else if (CONST_OK_FOR_K (vcall_offset))
13336 	    {
13337 	      output_asm_insn ("lghi\t%4,%3", op);
13338 	      output_asm_insn ("ag\t%4,0(%1)", op);
13339 	      output_asm_insn ("ag\t%1,0(%4)", op);
13340 	    }
13341  	  else if (CONST_OK_FOR_Os (vcall_offset))
13342  	    {
13343  	      output_asm_insn ("lgfi\t%4,%3", op);
13344  	      output_asm_insn ("ag\t%4,0(%1)", op);
13345  	      output_asm_insn ("ag\t%1,0(%4)", op);
13346  	    }
13347 	  else
13348 	    {
13349 	      op[7] = gen_label_rtx ();
13350 	      output_asm_insn ("llgf\t%4,%7-%5(%4)", op);
13351 	      output_asm_insn ("ag\t%4,0(%1)", op);
13352 	      output_asm_insn ("ag\t%1,0(%4)", op);
13353 	    }
13354 	}
13355 
13356       /* Jump to target.  */
13357       output_asm_insn ("jg\t%0", op);
13358 
13359       /* Output literal pool if required.  */
13360       if (op[5])
13361 	{
13362 	  output_asm_insn (".align\t4", op);
13363 	  targetm.asm_out.internal_label (file, "L",
13364 					  CODE_LABEL_NUMBER (op[5]));
13365 	}
13366       if (op[6])
13367 	{
13368 	  targetm.asm_out.internal_label (file, "L",
13369 					  CODE_LABEL_NUMBER (op[6]));
13370 	  output_asm_insn (".long\t%2", op);
13371 	}
13372       if (op[7])
13373 	{
13374 	  targetm.asm_out.internal_label (file, "L",
13375 					  CODE_LABEL_NUMBER (op[7]));
13376 	  output_asm_insn (".long\t%3", op);
13377 	}
13378     }
13379   else
13380     {
13381       /* Setup base pointer if required.  */
13382       if (!vcall_offset
13383 	  || (!DISP_IN_RANGE (delta)
13384               && !CONST_OK_FOR_K (delta)
13385 	      && !CONST_OK_FOR_Os (delta))
13386 	  || (!DISP_IN_RANGE (delta)
13387               && !CONST_OK_FOR_K (vcall_offset)
13388 	      && !CONST_OK_FOR_Os (vcall_offset)))
13389 	{
13390 	  op[5] = gen_label_rtx ();
13391 	  output_asm_insn ("basr\t%4,0", op);
13392 	  targetm.asm_out.internal_label (file, "L",
13393 					  CODE_LABEL_NUMBER (op[5]));
13394 	}
13395 
13396       /* Add DELTA to this pointer.  */
13397       if (delta)
13398 	{
13399 	  if (CONST_OK_FOR_J (delta))
13400 	    output_asm_insn ("la\t%1,%2(%1)", op);
13401 	  else if (DISP_IN_RANGE (delta))
13402 	    output_asm_insn ("lay\t%1,%2(%1)", op);
13403 	  else if (CONST_OK_FOR_K (delta))
13404 	    output_asm_insn ("ahi\t%1,%2", op);
13405 	  else if (CONST_OK_FOR_Os (delta))
13406  	    output_asm_insn ("afi\t%1,%2", op);
13407 	  else
13408 	    {
13409 	      op[6] = gen_label_rtx ();
13410 	      output_asm_insn ("a\t%1,%6-%5(%4)", op);
13411 	    }
13412 	}
13413 
13414       /* Perform vcall adjustment.  */
13415       if (vcall_offset)
13416         {
13417 	  if (CONST_OK_FOR_J (vcall_offset))
13418 	    {
13419 	      output_asm_insn ("l\t%4,0(%1)", op);
13420 	      output_asm_insn ("a\t%1,%3(%4)", op);
13421 	    }
13422 	  else if (DISP_IN_RANGE (vcall_offset))
13423 	    {
13424 	      output_asm_insn ("l\t%4,0(%1)", op);
13425 	      output_asm_insn ("ay\t%1,%3(%4)", op);
13426 	    }
13427 	  else if (CONST_OK_FOR_K (vcall_offset))
13428 	    {
13429 	      output_asm_insn ("lhi\t%4,%3", op);
13430 	      output_asm_insn ("a\t%4,0(%1)", op);
13431 	      output_asm_insn ("a\t%1,0(%4)", op);
13432 	    }
13433 	  else if (CONST_OK_FOR_Os (vcall_offset))
13434  	    {
13435  	      output_asm_insn ("iilf\t%4,%3", op);
13436  	      output_asm_insn ("a\t%4,0(%1)", op);
13437  	      output_asm_insn ("a\t%1,0(%4)", op);
13438  	    }
13439 	  else
13440 	    {
13441 	      op[7] = gen_label_rtx ();
13442 	      output_asm_insn ("l\t%4,%7-%5(%4)", op);
13443 	      output_asm_insn ("a\t%4,0(%1)", op);
13444 	      output_asm_insn ("a\t%1,0(%4)", op);
13445 	    }
13446 
13447 	  /* We had to clobber the base pointer register.
13448 	     Re-setup the base pointer (with a different base).  */
13449 	  op[5] = gen_label_rtx ();
13450 	  output_asm_insn ("basr\t%4,0", op);
13451 	  targetm.asm_out.internal_label (file, "L",
13452 					  CODE_LABEL_NUMBER (op[5]));
13453 	}
13454 
13455       /* Jump to target.  */
13456       op[8] = gen_label_rtx ();
13457 
13458       if (!flag_pic)
13459 	output_asm_insn ("l\t%4,%8-%5(%4)", op);
13460       else if (!nonlocal)
13461 	output_asm_insn ("a\t%4,%8-%5(%4)", op);
13462       /* We cannot call through .plt, since .plt requires %r12 loaded.  */
13463       else if (flag_pic == 1)
13464 	{
13465 	  output_asm_insn ("a\t%4,%8-%5(%4)", op);
13466 	  output_asm_insn ("l\t%4,%0(%4)", op);
13467 	}
13468       else if (flag_pic == 2)
13469 	{
13470 	  op[9] = gen_rtx_REG (Pmode, 0);
13471 	  output_asm_insn ("l\t%9,%8-4-%5(%4)", op);
13472 	  output_asm_insn ("a\t%4,%8-%5(%4)", op);
13473 	  output_asm_insn ("ar\t%4,%9", op);
13474 	  output_asm_insn ("l\t%4,0(%4)", op);
13475 	}
13476 
13477       output_asm_insn ("br\t%4", op);
13478 
13479       /* Output literal pool.  */
13480       output_asm_insn (".align\t4", op);
13481 
13482       if (nonlocal && flag_pic == 2)
13483 	output_asm_insn (".long\t%0", op);
13484       if (nonlocal)
13485 	{
13486 	  op[0] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
13487 	  SYMBOL_REF_FLAGS (op[0]) = SYMBOL_FLAG_LOCAL;
13488 	}
13489 
13490       targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[8]));
13491       if (!flag_pic)
13492 	output_asm_insn (".long\t%0", op);
13493       else
13494 	output_asm_insn (".long\t%0-%5", op);
13495 
13496       if (op[6])
13497 	{
13498 	  targetm.asm_out.internal_label (file, "L",
13499 					  CODE_LABEL_NUMBER (op[6]));
13500 	  output_asm_insn (".long\t%2", op);
13501 	}
13502       if (op[7])
13503 	{
13504 	  targetm.asm_out.internal_label (file, "L",
13505 					  CODE_LABEL_NUMBER (op[7]));
13506 	  output_asm_insn (".long\t%3", op);
13507 	}
13508     }
13509   final_end_function ();
13510 }
13511 
13512 /* Output either an indirect jump or a an indirect call
13513    (RETURN_ADDR_REGNO != INVALID_REGNUM) with target register REGNO
13514    using a branch trampoline disabling branch target prediction.  */
13515 
13516 void
s390_indirect_branch_via_thunk(unsigned int regno,unsigned int return_addr_regno,rtx comparison_operator,enum s390_indirect_branch_type type)13517 s390_indirect_branch_via_thunk (unsigned int regno,
13518 				unsigned int return_addr_regno,
13519 				rtx comparison_operator,
13520 				enum s390_indirect_branch_type type)
13521 {
13522   enum s390_indirect_branch_option option;
13523 
13524   if (type == s390_indirect_branch_type_return)
13525     {
13526       if (s390_return_addr_from_memory ())
13527 	option = s390_opt_function_return_mem;
13528       else
13529 	option = s390_opt_function_return_reg;
13530     }
13531   else if (type == s390_indirect_branch_type_jump)
13532     option = s390_opt_indirect_branch_jump;
13533   else if (type == s390_indirect_branch_type_call)
13534     option = s390_opt_indirect_branch_call;
13535   else
13536     gcc_unreachable ();
13537 
13538   if (TARGET_INDIRECT_BRANCH_TABLE)
13539     {
13540       char label[32];
13541 
13542       ASM_GENERATE_INTERNAL_LABEL (label,
13543 				   indirect_branch_table_label[option],
13544 				   indirect_branch_table_label_no[option]++);
13545       ASM_OUTPUT_LABEL (asm_out_file, label);
13546     }
13547 
13548   if (return_addr_regno != INVALID_REGNUM)
13549     {
13550       gcc_assert (comparison_operator == NULL_RTX);
13551       fprintf (asm_out_file, " \tbrasl\t%%r%d,", return_addr_regno);
13552     }
13553   else
13554     {
13555       fputs (" \tjg", asm_out_file);
13556       if (comparison_operator != NULL_RTX)
13557 	print_operand (asm_out_file, comparison_operator, 'C');
13558 
13559       fputs ("\t", asm_out_file);
13560     }
13561 
13562   if (TARGET_CPU_Z10)
13563     fprintf (asm_out_file,
13564 	     TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL "\n",
13565 	     regno);
13566   else
13567     fprintf (asm_out_file,
13568 	     TARGET_INDIRECT_BRANCH_THUNK_NAME_EX "\n",
13569 	     INDIRECT_BRANCH_THUNK_REGNUM, regno);
13570 
13571   if ((option == s390_opt_indirect_branch_jump
13572        && cfun->machine->indirect_branch_jump == indirect_branch_thunk)
13573       || (option == s390_opt_indirect_branch_call
13574 	  && cfun->machine->indirect_branch_call == indirect_branch_thunk)
13575       || (option == s390_opt_function_return_reg
13576 	  && cfun->machine->function_return_reg == indirect_branch_thunk)
13577       || (option == s390_opt_function_return_mem
13578 	  && cfun->machine->function_return_mem == indirect_branch_thunk))
13579     {
13580       if (TARGET_CPU_Z10)
13581 	indirect_branch_z10thunk_mask |= (1 << regno);
13582       else
13583 	indirect_branch_prez10thunk_mask |= (1 << regno);
13584     }
13585 }
13586 
13587 /* Output an inline thunk for indirect jumps.  EXECUTE_TARGET can
13588    either be an address register or a label pointing to the location
13589    of the jump instruction.  */
13590 
13591 void
s390_indirect_branch_via_inline_thunk(rtx execute_target)13592 s390_indirect_branch_via_inline_thunk (rtx execute_target)
13593 {
13594   if (TARGET_INDIRECT_BRANCH_TABLE)
13595     {
13596       char label[32];
13597 
13598       ASM_GENERATE_INTERNAL_LABEL (label,
13599 				   indirect_branch_table_label[s390_opt_indirect_branch_jump],
13600 				   indirect_branch_table_label_no[s390_opt_indirect_branch_jump]++);
13601       ASM_OUTPUT_LABEL (asm_out_file, label);
13602     }
13603 
13604   if (!TARGET_ZARCH)
13605     fputs ("\t.machinemode zarch\n", asm_out_file);
13606 
13607   if (REG_P (execute_target))
13608     fprintf (asm_out_file, "\tex\t%%r0,0(%%r%d)\n", REGNO (execute_target));
13609   else
13610     output_asm_insn ("\texrl\t%%r0,%0", &execute_target);
13611 
13612   if (!TARGET_ZARCH)
13613     fputs ("\t.machinemode esa\n", asm_out_file);
13614 
13615   fputs ("0:\tj\t0b\n", asm_out_file);
13616 }
13617 
13618 static bool
s390_valid_pointer_mode(scalar_int_mode mode)13619 s390_valid_pointer_mode (scalar_int_mode mode)
13620 {
13621   return (mode == SImode || (TARGET_64BIT && mode == DImode));
13622 }
13623 
13624 /* Checks whether the given CALL_EXPR would use a caller
13625    saved register.  This is used to decide whether sibling call
13626    optimization could be performed on the respective function
13627    call.  */
13628 
13629 static bool
s390_call_saved_register_used(tree call_expr)13630 s390_call_saved_register_used (tree call_expr)
13631 {
13632   CUMULATIVE_ARGS cum_v;
13633   cumulative_args_t cum;
13634   tree parameter;
13635   machine_mode mode;
13636   tree type;
13637   rtx parm_rtx;
13638   int reg, i;
13639 
13640   INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
13641   cum = pack_cumulative_args (&cum_v);
13642 
13643   for (i = 0; i < call_expr_nargs (call_expr); i++)
13644     {
13645       parameter = CALL_EXPR_ARG (call_expr, i);
13646       gcc_assert (parameter);
13647 
13648       /* For an undeclared variable passed as parameter we will get
13649 	 an ERROR_MARK node here.  */
13650       if (TREE_CODE (parameter) == ERROR_MARK)
13651 	return true;
13652 
13653       type = TREE_TYPE (parameter);
13654       gcc_assert (type);
13655 
13656       mode = TYPE_MODE (type);
13657       gcc_assert (mode);
13658 
13659       /* We assume that in the target function all parameters are
13660 	 named.  This only has an impact on vector argument register
13661 	 usage none of which is call-saved.  */
13662       if (pass_by_reference (&cum_v, mode, type, true))
13663  	{
13664  	  mode = Pmode;
13665  	  type = build_pointer_type (type);
13666  	}
13667 
13668        parm_rtx = s390_function_arg (cum, mode, type, true);
13669 
13670        s390_function_arg_advance (cum, mode, type, true);
13671 
13672        if (!parm_rtx)
13673 	 continue;
13674 
13675        if (REG_P (parm_rtx))
13676   	 {
13677 	   for (reg = 0; reg < REG_NREGS (parm_rtx); reg++)
13678 	     if (!call_used_regs[reg + REGNO (parm_rtx)])
13679  	       return true;
13680 	 }
13681 
13682        if (GET_CODE (parm_rtx) == PARALLEL)
13683 	 {
13684 	   int i;
13685 
13686 	   for (i = 0; i < XVECLEN (parm_rtx, 0); i++)
13687 	     {
13688 	       rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0);
13689 
13690 	       gcc_assert (REG_P (r));
13691 
13692 	       for (reg = 0; reg < REG_NREGS (r); reg++)
13693 		 if (!call_used_regs[reg + REGNO (r)])
13694 		   return true;
13695 	     }
13696 	 }
13697 
13698     }
13699   return false;
13700 }
13701 
13702 /* Return true if the given call expression can be
13703    turned into a sibling call.
13704    DECL holds the declaration of the function to be called whereas
13705    EXP is the call expression itself.  */
13706 
13707 static bool
s390_function_ok_for_sibcall(tree decl,tree exp)13708 s390_function_ok_for_sibcall (tree decl, tree exp)
13709 {
13710   /* The TPF epilogue uses register 1.  */
13711   if (TARGET_TPF_PROFILING)
13712     return false;
13713 
13714   /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
13715      which would have to be restored before the sibcall.  */
13716   if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl))
13717     return false;
13718 
13719   /* The thunks for indirect branches require r1 if no exrl is
13720      available.  r1 might not be available when doing a sibling
13721      call.  */
13722   if (TARGET_INDIRECT_BRANCH_NOBP_CALL
13723       && !TARGET_CPU_Z10
13724       && !decl)
13725     return false;
13726 
13727   /* Register 6 on s390 is available as an argument register but unfortunately
13728      "caller saved". This makes functions needing this register for arguments
13729      not suitable for sibcalls.  */
13730   return !s390_call_saved_register_used (exp);
13731 }
13732 
13733 /* Return the fixed registers used for condition codes.  */
13734 
13735 static bool
s390_fixed_condition_code_regs(unsigned int * p1,unsigned int * p2)13736 s390_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
13737 {
13738   *p1 = CC_REGNUM;
13739   *p2 = INVALID_REGNUM;
13740 
13741   return true;
13742 }
13743 
13744 /* This function is used by the call expanders of the machine description.
13745    It emits the call insn itself together with the necessary operations
13746    to adjust the target address and returns the emitted insn.
13747    ADDR_LOCATION is the target address rtx
13748    TLS_CALL the location of the thread-local symbol
13749    RESULT_REG the register where the result of the call should be stored
13750    RETADDR_REG the register where the return address should be stored
13751                If this parameter is NULL_RTX the call is considered
13752                to be a sibling call.  */
13753 
13754 rtx_insn *
s390_emit_call(rtx addr_location,rtx tls_call,rtx result_reg,rtx retaddr_reg)13755 s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg,
13756 		rtx retaddr_reg)
13757 {
13758   bool plt_call = false;
13759   rtx_insn *insn;
13760   rtx vec[4] = { NULL_RTX };
13761   int elts = 0;
13762   rtx *call = &vec[0];
13763   rtx *clobber_ret_reg = &vec[1];
13764   rtx *use = &vec[2];
13765   rtx *clobber_thunk_reg = &vec[3];
13766   int i;
13767 
13768   /* Direct function calls need special treatment.  */
13769   if (GET_CODE (addr_location) == SYMBOL_REF)
13770     {
13771       /* When calling a global routine in PIC mode, we must
13772          replace the symbol itself with the PLT stub.  */
13773       if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location))
13774         {
13775 	  if (TARGET_64BIT || retaddr_reg != NULL_RTX)
13776 	    {
13777 	      addr_location = gen_rtx_UNSPEC (Pmode,
13778 					      gen_rtvec (1, addr_location),
13779 					      UNSPEC_PLT);
13780 	      addr_location = gen_rtx_CONST (Pmode, addr_location);
13781 	      plt_call = true;
13782 	    }
13783 	  else
13784 	    /* For -fpic code the PLT entries might use r12 which is
13785 	       call-saved.  Therefore we cannot do a sibcall when
13786 	       calling directly using a symbol ref.  When reaching
13787 	       this point we decided (in s390_function_ok_for_sibcall)
13788 	       to do a sibcall for a function pointer but one of the
13789 	       optimizers was able to get rid of the function pointer
13790 	       by propagating the symbol ref into the call.  This
13791 	       optimization is illegal for S/390 so we turn the direct
13792 	       call into a indirect call again.  */
13793 	    addr_location = force_reg (Pmode, addr_location);
13794         }
13795 
13796       /* Unless we can use the bras(l) insn, force the
13797          routine address into a register.  */
13798       if (!TARGET_SMALL_EXEC && !TARGET_CPU_ZARCH)
13799         {
13800 	  if (flag_pic)
13801 	    addr_location = legitimize_pic_address (addr_location, 0);
13802 	  else
13803 	    addr_location = force_reg (Pmode, addr_location);
13804 	}
13805     }
13806 
13807   /* If it is already an indirect call or the code above moved the
13808      SYMBOL_REF to somewhere else make sure the address can be found in
13809      register 1.  */
13810   if (retaddr_reg == NULL_RTX
13811       && GET_CODE (addr_location) != SYMBOL_REF
13812       && !plt_call)
13813     {
13814       emit_move_insn (gen_rtx_REG (Pmode, SIBCALL_REGNUM), addr_location);
13815       addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM);
13816     }
13817 
13818   if (TARGET_INDIRECT_BRANCH_NOBP_CALL
13819       && GET_CODE (addr_location) != SYMBOL_REF
13820       && !plt_call)
13821     {
13822       /* Indirect branch thunks require the target to be a single GPR.  */
13823       addr_location = force_reg (Pmode, addr_location);
13824 
13825       /* Without exrl the indirect branch thunks need an additional
13826 	 register for larl;ex */
13827       if (!TARGET_CPU_Z10)
13828 	{
13829 	  *clobber_thunk_reg = gen_rtx_REG (Pmode, INDIRECT_BRANCH_THUNK_REGNUM);
13830 	  *clobber_thunk_reg = gen_rtx_CLOBBER (VOIDmode, *clobber_thunk_reg);
13831 	}
13832     }
13833 
13834   addr_location = gen_rtx_MEM (QImode, addr_location);
13835   *call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx);
13836 
13837   if (result_reg != NULL_RTX)
13838     *call = gen_rtx_SET (result_reg, *call);
13839 
13840   if (retaddr_reg != NULL_RTX)
13841     {
13842       *clobber_ret_reg = gen_rtx_CLOBBER (VOIDmode, retaddr_reg);
13843 
13844       if (tls_call != NULL_RTX)
13845 	*use = gen_rtx_USE (VOIDmode, tls_call);
13846     }
13847 
13848 
13849   for (i = 0; i < 4; i++)
13850     if (vec[i] != NULL_RTX)
13851       elts++;
13852 
13853   if (elts > 1)
13854     {
13855       rtvec v;
13856       int e = 0;
13857 
13858       v = rtvec_alloc (elts);
13859       for (i = 0; i < 4; i++)
13860 	if (vec[i] != NULL_RTX)
13861 	  {
13862 	    RTVEC_ELT (v, e) = vec[i];
13863 	    e++;
13864 	  }
13865 
13866       *call = gen_rtx_PARALLEL (VOIDmode, v);
13867     }
13868 
13869   insn = emit_call_insn (*call);
13870 
13871   /* 31-bit PLT stubs and tls calls use the GOT register implicitly.  */
13872   if ((!TARGET_64BIT && plt_call) || tls_call != NULL_RTX)
13873     {
13874       /* s390_function_ok_for_sibcall should
13875 	 have denied sibcalls in this case.  */
13876       gcc_assert (retaddr_reg != NULL_RTX);
13877       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, 12));
13878     }
13879   return insn;
13880 }
13881 
13882 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE.  */
13883 
13884 static void
s390_conditional_register_usage(void)13885 s390_conditional_register_usage (void)
13886 {
13887   int i;
13888 
13889   if (flag_pic)
13890     {
13891       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13892       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13893     }
13894   if (TARGET_CPU_ZARCH)
13895     {
13896       fixed_regs[BASE_REGNUM] = 0;
13897       call_used_regs[BASE_REGNUM] = 0;
13898       fixed_regs[RETURN_REGNUM] = 0;
13899       call_used_regs[RETURN_REGNUM] = 0;
13900     }
13901   if (TARGET_64BIT)
13902     {
13903       for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
13904 	call_used_regs[i] = call_really_used_regs[i] = 0;
13905     }
13906   else
13907     {
13908       call_used_regs[FPR4_REGNUM] = call_really_used_regs[FPR4_REGNUM] = 0;
13909       call_used_regs[FPR6_REGNUM] = call_really_used_regs[FPR6_REGNUM] = 0;
13910     }
13911 
13912   if (TARGET_SOFT_FLOAT)
13913     {
13914       for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
13915 	call_used_regs[i] = fixed_regs[i] = 1;
13916     }
13917 
13918   /* Disable v16 - v31 for non-vector target.  */
13919   if (!TARGET_VX)
13920     {
13921       for (i = VR16_REGNUM; i <= VR31_REGNUM; i++)
13922 	fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
13923     }
13924 }
13925 
13926 /* Corresponding function to eh_return expander.  */
13927 
13928 static GTY(()) rtx s390_tpf_eh_return_symbol;
13929 void
s390_emit_tpf_eh_return(rtx target)13930 s390_emit_tpf_eh_return (rtx target)
13931 {
13932   rtx_insn *insn;
13933   rtx reg, orig_ra;
13934 
13935   if (!s390_tpf_eh_return_symbol)
13936     s390_tpf_eh_return_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tpf_eh_return");
13937 
13938   reg = gen_rtx_REG (Pmode, 2);
13939   orig_ra = gen_rtx_REG (Pmode, 3);
13940 
13941   emit_move_insn (reg, target);
13942   emit_move_insn (orig_ra, get_hard_reg_initial_val (Pmode, RETURN_REGNUM));
13943   insn = s390_emit_call (s390_tpf_eh_return_symbol, NULL_RTX, reg,
13944                                      gen_rtx_REG (Pmode, RETURN_REGNUM));
13945   use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
13946   use_reg (&CALL_INSN_FUNCTION_USAGE (insn), orig_ra);
13947 
13948   emit_move_insn (EH_RETURN_HANDLER_RTX, reg);
13949 }
13950 
13951 /* Rework the prologue/epilogue to avoid saving/restoring
13952    registers unnecessarily.  */
13953 
13954 static void
s390_optimize_prologue(void)13955 s390_optimize_prologue (void)
13956 {
13957   rtx_insn *insn, *new_insn, *next_insn;
13958 
13959   /* Do a final recompute of the frame-related data.  */
13960   s390_optimize_register_info ();
13961 
13962   /* If all special registers are in fact used, there's nothing we
13963      can do, so no point in walking the insn list.  */
13964 
13965   if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM
13966       && cfun_frame_layout.last_save_gpr >= BASE_REGNUM
13967       && (TARGET_CPU_ZARCH
13968           || (cfun_frame_layout.first_save_gpr <= RETURN_REGNUM
13969               && cfun_frame_layout.last_save_gpr >= RETURN_REGNUM)))
13970     return;
13971 
13972   /* Search for prologue/epilogue insns and replace them.  */
13973 
13974   for (insn = get_insns (); insn; insn = next_insn)
13975     {
13976       int first, last, off;
13977       rtx set, base, offset;
13978       rtx pat;
13979 
13980       next_insn = NEXT_INSN (insn);
13981 
13982       if (! NONJUMP_INSN_P (insn) || ! RTX_FRAME_RELATED_P (insn))
13983 	continue;
13984 
13985       pat = PATTERN (insn);
13986 
13987       /* Remove ldgr/lgdr instructions used for saving and restore
13988 	 GPRs if possible.  */
13989       if (TARGET_Z10)
13990 	{
13991 	  rtx tmp_pat = pat;
13992 
13993 	  if (INSN_CODE (insn) == CODE_FOR_stack_restore_from_fpr)
13994 	    tmp_pat = XVECEXP (pat, 0, 0);
13995 
13996 	  if (GET_CODE (tmp_pat) == SET
13997 	      && GET_MODE (SET_SRC (tmp_pat)) == DImode
13998 	      && REG_P (SET_SRC (tmp_pat))
13999 	      && REG_P (SET_DEST (tmp_pat)))
14000 	    {
14001 	      int src_regno = REGNO (SET_SRC (tmp_pat));
14002 	      int dest_regno = REGNO (SET_DEST (tmp_pat));
14003 	      int gpr_regno;
14004 	      int fpr_regno;
14005 
14006 	      if (!((GENERAL_REGNO_P (src_regno)
14007 		     && FP_REGNO_P (dest_regno))
14008 		    || (FP_REGNO_P (src_regno)
14009 			&& GENERAL_REGNO_P (dest_regno))))
14010 		continue;
14011 
14012 	      gpr_regno = GENERAL_REGNO_P (src_regno) ? src_regno : dest_regno;
14013 	      fpr_regno = FP_REGNO_P (src_regno) ? src_regno : dest_regno;
14014 
14015 	      /* GPR must be call-saved, FPR must be call-clobbered.  */
14016 	      if (!call_really_used_regs[fpr_regno]
14017 		  || call_really_used_regs[gpr_regno])
14018 		continue;
14019 
14020 	      /* It must not happen that what we once saved in an FPR now
14021 		 needs a stack slot.  */
14022 	      gcc_assert (cfun_gpr_save_slot (gpr_regno) != SAVE_SLOT_STACK);
14023 
14024 	      if (cfun_gpr_save_slot (gpr_regno) == SAVE_SLOT_NONE)
14025 		{
14026 		  remove_insn (insn);
14027 		  continue;
14028 		}
14029 	    }
14030 	}
14031 
14032       if (GET_CODE (pat) == PARALLEL
14033 	  && store_multiple_operation (pat, VOIDmode))
14034 	{
14035 	  set = XVECEXP (pat, 0, 0);
14036 	  first = REGNO (SET_SRC (set));
14037 	  last = first + XVECLEN (pat, 0) - 1;
14038 	  offset = const0_rtx;
14039 	  base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
14040 	  off = INTVAL (offset);
14041 
14042 	  if (GET_CODE (base) != REG || off < 0)
14043 	    continue;
14044 	  if (cfun_frame_layout.first_save_gpr != -1
14045 	      && (cfun_frame_layout.first_save_gpr < first
14046 		  || cfun_frame_layout.last_save_gpr > last))
14047 	    continue;
14048 	  if (REGNO (base) != STACK_POINTER_REGNUM
14049 	      && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
14050 	    continue;
14051 	  if (first > BASE_REGNUM || last < BASE_REGNUM)
14052 	    continue;
14053 
14054 	  if (cfun_frame_layout.first_save_gpr != -1)
14055 	    {
14056 	      rtx s_pat = save_gprs (base,
14057 				     off + (cfun_frame_layout.first_save_gpr
14058 					    - first) * UNITS_PER_LONG,
14059 				     cfun_frame_layout.first_save_gpr,
14060 				     cfun_frame_layout.last_save_gpr);
14061 	      new_insn = emit_insn_before (s_pat, insn);
14062 	      INSN_ADDRESSES_NEW (new_insn, -1);
14063 	    }
14064 
14065 	  remove_insn (insn);
14066 	  continue;
14067 	}
14068 
14069       if (cfun_frame_layout.first_save_gpr == -1
14070 	  && GET_CODE (pat) == SET
14071 	  && GENERAL_REG_P (SET_SRC (pat))
14072 	  && GET_CODE (SET_DEST (pat)) == MEM)
14073 	{
14074 	  set = pat;
14075 	  first = REGNO (SET_SRC (set));
14076 	  offset = const0_rtx;
14077 	  base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
14078 	  off = INTVAL (offset);
14079 
14080 	  if (GET_CODE (base) != REG || off < 0)
14081 	    continue;
14082 	  if (REGNO (base) != STACK_POINTER_REGNUM
14083 	      && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
14084 	    continue;
14085 
14086 	  remove_insn (insn);
14087 	  continue;
14088 	}
14089 
14090       if (GET_CODE (pat) == PARALLEL
14091 	  && load_multiple_operation (pat, VOIDmode))
14092 	{
14093 	  set = XVECEXP (pat, 0, 0);
14094 	  first = REGNO (SET_DEST (set));
14095 	  last = first + XVECLEN (pat, 0) - 1;
14096 	  offset = const0_rtx;
14097 	  base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
14098 	  off = INTVAL (offset);
14099 
14100 	  if (GET_CODE (base) != REG || off < 0)
14101 	    continue;
14102 
14103 	  if (cfun_frame_layout.first_restore_gpr != -1
14104 	      && (cfun_frame_layout.first_restore_gpr < first
14105 		  || cfun_frame_layout.last_restore_gpr > last))
14106 	    continue;
14107 	  if (REGNO (base) != STACK_POINTER_REGNUM
14108 	      && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
14109 	    continue;
14110 	  if (first > BASE_REGNUM || last < BASE_REGNUM)
14111 	    continue;
14112 
14113 	  if (cfun_frame_layout.first_restore_gpr != -1)
14114 	    {
14115 	      rtx rpat = restore_gprs (base,
14116 				       off + (cfun_frame_layout.first_restore_gpr
14117 					      - first) * UNITS_PER_LONG,
14118 				       cfun_frame_layout.first_restore_gpr,
14119 				       cfun_frame_layout.last_restore_gpr);
14120 
14121 	      /* Remove REG_CFA_RESTOREs for registers that we no
14122 		 longer need to save.  */
14123 	      REG_NOTES (rpat) = REG_NOTES (insn);
14124 	      for (rtx *ptr = &REG_NOTES (rpat); *ptr; )
14125 		if (REG_NOTE_KIND (*ptr) == REG_CFA_RESTORE
14126 		    && ((int) REGNO (XEXP (*ptr, 0))
14127 			< cfun_frame_layout.first_restore_gpr))
14128 		  *ptr = XEXP (*ptr, 1);
14129 		else
14130 		  ptr = &XEXP (*ptr, 1);
14131 	      new_insn = emit_insn_before (rpat, insn);
14132 	      RTX_FRAME_RELATED_P (new_insn) = 1;
14133 	      INSN_ADDRESSES_NEW (new_insn, -1);
14134 	    }
14135 
14136 	  remove_insn (insn);
14137 	  continue;
14138 	}
14139 
14140       if (cfun_frame_layout.first_restore_gpr == -1
14141 	  && GET_CODE (pat) == SET
14142 	  && GENERAL_REG_P (SET_DEST (pat))
14143 	  && GET_CODE (SET_SRC (pat)) == MEM)
14144 	{
14145 	  set = pat;
14146 	  first = REGNO (SET_DEST (set));
14147 	  offset = const0_rtx;
14148 	  base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
14149 	  off = INTVAL (offset);
14150 
14151 	  if (GET_CODE (base) != REG || off < 0)
14152 	    continue;
14153 
14154 	  if (REGNO (base) != STACK_POINTER_REGNUM
14155 	      && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
14156 	    continue;
14157 
14158 	  remove_insn (insn);
14159 	  continue;
14160 	}
14161     }
14162 }
14163 
14164 /* On z10 and later the dynamic branch prediction must see the
14165    backward jump within a certain windows.  If not it falls back to
14166    the static prediction.  This function rearranges the loop backward
14167    branch in a way which makes the static prediction always correct.
14168    The function returns true if it added an instruction.  */
14169 static bool
s390_fix_long_loop_prediction(rtx_insn * insn)14170 s390_fix_long_loop_prediction (rtx_insn *insn)
14171 {
14172   rtx set = single_set (insn);
14173   rtx code_label, label_ref;
14174   rtx_insn *uncond_jump;
14175   rtx_insn *cur_insn;
14176   rtx tmp;
14177   int distance;
14178 
14179   /* This will exclude branch on count and branch on index patterns
14180      since these are correctly statically predicted.  */
14181   if (!set
14182       || SET_DEST (set) != pc_rtx
14183       || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
14184     return false;
14185 
14186   /* Skip conditional returns.  */
14187   if (ANY_RETURN_P (XEXP (SET_SRC (set), 1))
14188       && XEXP (SET_SRC (set), 2) == pc_rtx)
14189     return false;
14190 
14191   label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
14192 	       XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
14193 
14194   gcc_assert (GET_CODE (label_ref) == LABEL_REF);
14195 
14196   code_label = XEXP (label_ref, 0);
14197 
14198   if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
14199       || INSN_ADDRESSES (INSN_UID (insn)) == -1
14200       || (INSN_ADDRESSES (INSN_UID (insn))
14201 	  - INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE))
14202     return false;
14203 
14204   for (distance = 0, cur_insn = PREV_INSN (insn);
14205        distance < PREDICT_DISTANCE - 6;
14206        distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
14207     if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
14208       return false;
14209 
14210   rtx_code_label *new_label = gen_label_rtx ();
14211   uncond_jump = emit_jump_insn_after (
14212 		  gen_rtx_SET (pc_rtx,
14213 			       gen_rtx_LABEL_REF (VOIDmode, code_label)),
14214 		  insn);
14215   emit_label_after (new_label, uncond_jump);
14216 
14217   tmp = XEXP (SET_SRC (set), 1);
14218   XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
14219   XEXP (SET_SRC (set), 2) = tmp;
14220   INSN_CODE (insn) = -1;
14221 
14222   XEXP (label_ref, 0) = new_label;
14223   JUMP_LABEL (insn) = new_label;
14224   JUMP_LABEL (uncond_jump) = code_label;
14225 
14226   return true;
14227 }
14228 
14229 /* Returns 1 if INSN reads the value of REG for purposes not related
14230    to addressing of memory, and 0 otherwise.  */
14231 static int
s390_non_addr_reg_read_p(rtx reg,rtx_insn * insn)14232 s390_non_addr_reg_read_p (rtx reg, rtx_insn *insn)
14233 {
14234   return reg_referenced_p (reg, PATTERN (insn))
14235     && !reg_used_in_mem_p (REGNO (reg), PATTERN (insn));
14236 }
14237 
14238 /* Starting from INSN find_cond_jump looks downwards in the insn
14239    stream for a single jump insn which is the last user of the
14240    condition code set in INSN.  */
14241 static rtx_insn *
find_cond_jump(rtx_insn * insn)14242 find_cond_jump (rtx_insn *insn)
14243 {
14244   for (; insn; insn = NEXT_INSN (insn))
14245     {
14246       rtx ite, cc;
14247 
14248       if (LABEL_P (insn))
14249 	break;
14250 
14251       if (!JUMP_P (insn))
14252 	{
14253 	  if (reg_mentioned_p (gen_rtx_REG (CCmode, CC_REGNUM), insn))
14254 	    break;
14255 	  continue;
14256 	}
14257 
14258       /* This will be triggered by a return.  */
14259       if (GET_CODE (PATTERN (insn)) != SET)
14260 	break;
14261 
14262       gcc_assert (SET_DEST (PATTERN (insn)) == pc_rtx);
14263       ite = SET_SRC (PATTERN (insn));
14264 
14265       if (GET_CODE (ite) != IF_THEN_ELSE)
14266 	break;
14267 
14268       cc = XEXP (XEXP (ite, 0), 0);
14269       if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)))
14270 	break;
14271 
14272       if (find_reg_note (insn, REG_DEAD, cc))
14273 	return insn;
14274       break;
14275     }
14276 
14277   return NULL;
14278 }
14279 
14280 /* Swap the condition in COND and the operands in OP0 and OP1 so that
14281    the semantics does not change.  If NULL_RTX is passed as COND the
14282    function tries to find the conditional jump starting with INSN.  */
14283 static void
s390_swap_cmp(rtx cond,rtx * op0,rtx * op1,rtx_insn * insn)14284 s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx_insn *insn)
14285 {
14286   rtx tmp = *op0;
14287 
14288   if (cond == NULL_RTX)
14289     {
14290       rtx_insn *jump = find_cond_jump (NEXT_INSN (insn));
14291       rtx set = jump ? single_set (jump) : NULL_RTX;
14292 
14293       if (set == NULL_RTX)
14294 	return;
14295 
14296       cond = XEXP (SET_SRC (set), 0);
14297     }
14298 
14299   *op0 = *op1;
14300   *op1 = tmp;
14301   PUT_CODE (cond, swap_condition (GET_CODE (cond)));
14302 }
14303 
14304 /* On z10, instructions of the compare-and-branch family have the
14305    property to access the register occurring as second operand with
14306    its bits complemented.  If such a compare is grouped with a second
14307    instruction that accesses the same register non-complemented, and
14308    if that register's value is delivered via a bypass, then the
14309    pipeline recycles, thereby causing significant performance decline.
14310    This function locates such situations and exchanges the two
14311    operands of the compare.  The function return true whenever it
14312    added an insn.  */
14313 static bool
s390_z10_optimize_cmp(rtx_insn * insn)14314 s390_z10_optimize_cmp (rtx_insn *insn)
14315 {
14316   rtx_insn *prev_insn, *next_insn;
14317   bool insn_added_p = false;
14318   rtx cond, *op0, *op1;
14319 
14320   if (GET_CODE (PATTERN (insn)) == PARALLEL)
14321     {
14322       /* Handle compare and branch and branch on count
14323 	 instructions.  */
14324       rtx pattern = single_set (insn);
14325 
14326       if (!pattern
14327 	  || SET_DEST (pattern) != pc_rtx
14328 	  || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
14329 	return false;
14330 
14331       cond = XEXP (SET_SRC (pattern), 0);
14332       op0 = &XEXP (cond, 0);
14333       op1 = &XEXP (cond, 1);
14334     }
14335   else if (GET_CODE (PATTERN (insn)) == SET)
14336     {
14337       rtx src, dest;
14338 
14339       /* Handle normal compare instructions.  */
14340       src = SET_SRC (PATTERN (insn));
14341       dest = SET_DEST (PATTERN (insn));
14342 
14343       if (!REG_P (dest)
14344 	  || !CC_REGNO_P (REGNO (dest))
14345 	  || GET_CODE (src) != COMPARE)
14346 	return false;
14347 
14348       /* s390_swap_cmp will try to find the conditional
14349 	 jump when passing NULL_RTX as condition.  */
14350       cond = NULL_RTX;
14351       op0 = &XEXP (src, 0);
14352       op1 = &XEXP (src, 1);
14353     }
14354   else
14355     return false;
14356 
14357   if (!REG_P (*op0) || !REG_P (*op1))
14358     return false;
14359 
14360   if (GET_MODE_CLASS (GET_MODE (*op0)) != MODE_INT)
14361     return false;
14362 
14363   /* Swap the COMPARE arguments and its mask if there is a
14364      conflicting access in the previous insn.  */
14365   prev_insn = prev_active_insn (insn);
14366   if (prev_insn != NULL_RTX && INSN_P (prev_insn)
14367       && reg_referenced_p (*op1, PATTERN (prev_insn)))
14368     s390_swap_cmp (cond, op0, op1, insn);
14369 
14370   /* Check if there is a conflict with the next insn. If there
14371      was no conflict with the previous insn, then swap the
14372      COMPARE arguments and its mask.  If we already swapped
14373      the operands, or if swapping them would cause a conflict
14374      with the previous insn, issue a NOP after the COMPARE in
14375      order to separate the two instuctions.  */
14376   next_insn = next_active_insn (insn);
14377   if (next_insn != NULL_RTX && INSN_P (next_insn)
14378       && s390_non_addr_reg_read_p (*op1, next_insn))
14379     {
14380       if (prev_insn != NULL_RTX && INSN_P (prev_insn)
14381 	  && s390_non_addr_reg_read_p (*op0, prev_insn))
14382 	{
14383 	  if (REGNO (*op1) == 0)
14384 	    emit_insn_after (gen_nop_lr1 (), insn);
14385 	  else
14386 	    emit_insn_after (gen_nop_lr0 (), insn);
14387 	  insn_added_p = true;
14388 	}
14389       else
14390 	s390_swap_cmp (cond, op0, op1, insn);
14391     }
14392   return insn_added_p;
14393 }
14394 
14395 /* Number of INSNs to be scanned backward in the last BB of the loop
14396    and forward in the first BB of the loop.  This usually should be a
14397    bit more than the number of INSNs which could go into one
14398    group.  */
14399 #define S390_OSC_SCAN_INSN_NUM 5
14400 
14401 /* Scan LOOP for static OSC collisions and return true if a osc_break
14402    should be issued for this loop.  */
14403 static bool
s390_adjust_loop_scan_osc(struct loop * loop)14404 s390_adjust_loop_scan_osc (struct loop* loop)
14405 
14406 {
14407   HARD_REG_SET modregs, newregs;
14408   rtx_insn *insn, *store_insn = NULL;
14409   rtx set;
14410   struct s390_address addr_store, addr_load;
14411   subrtx_iterator::array_type array;
14412   int insn_count;
14413 
14414   CLEAR_HARD_REG_SET (modregs);
14415 
14416   insn_count = 0;
14417   FOR_BB_INSNS_REVERSE (loop->latch, insn)
14418     {
14419       if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14420 	continue;
14421 
14422       insn_count++;
14423       if (insn_count > S390_OSC_SCAN_INSN_NUM)
14424 	return false;
14425 
14426       find_all_hard_reg_sets (insn, &newregs, true);
14427       IOR_HARD_REG_SET (modregs, newregs);
14428 
14429       set = single_set (insn);
14430       if (!set)
14431 	continue;
14432 
14433       if (MEM_P (SET_DEST (set))
14434 	  && s390_decompose_address (XEXP (SET_DEST (set), 0), &addr_store))
14435 	{
14436 	  store_insn = insn;
14437 	  break;
14438 	}
14439     }
14440 
14441   if (store_insn == NULL_RTX)
14442     return false;
14443 
14444   insn_count = 0;
14445   FOR_BB_INSNS (loop->header, insn)
14446     {
14447       if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14448 	continue;
14449 
14450       if (insn == store_insn)
14451 	return false;
14452 
14453       insn_count++;
14454       if (insn_count > S390_OSC_SCAN_INSN_NUM)
14455 	return false;
14456 
14457       find_all_hard_reg_sets (insn, &newregs, true);
14458       IOR_HARD_REG_SET (modregs, newregs);
14459 
14460       set = single_set (insn);
14461       if (!set)
14462 	continue;
14463 
14464       /* An intermediate store disrupts static OSC checking
14465 	 anyway.  */
14466       if (MEM_P (SET_DEST (set))
14467 	  && s390_decompose_address (XEXP (SET_DEST (set), 0), NULL))
14468 	return false;
14469 
14470       FOR_EACH_SUBRTX (iter, array, SET_SRC (set), NONCONST)
14471 	if (MEM_P (*iter)
14472 	    && s390_decompose_address (XEXP (*iter, 0), &addr_load)
14473 	    && rtx_equal_p (addr_load.base, addr_store.base)
14474 	    && rtx_equal_p (addr_load.indx, addr_store.indx)
14475 	    && rtx_equal_p (addr_load.disp, addr_store.disp))
14476 	  {
14477 	    if ((addr_load.base != NULL_RTX
14478 		 && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.base)))
14479 		|| (addr_load.indx != NULL_RTX
14480 		    && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.indx))))
14481 	      return true;
14482 	  }
14483     }
14484   return false;
14485 }
14486 
14487 /* Look for adjustments which can be done on simple innermost
14488    loops.  */
14489 static void
s390_adjust_loops()14490 s390_adjust_loops ()
14491 {
14492   struct loop *loop = NULL;
14493 
14494   df_analyze ();
14495   compute_bb_for_insn ();
14496 
14497   /* Find the loops.  */
14498   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
14499 
14500   FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
14501     {
14502       if (dump_file)
14503 	{
14504 	  flow_loop_dump (loop, dump_file, NULL, 0);
14505 	  fprintf (dump_file, ";;  OSC loop scan Loop: ");
14506 	}
14507       if (loop->latch == NULL
14508 	  || pc_set (BB_END (loop->latch)) == NULL_RTX
14509 	  || !s390_adjust_loop_scan_osc (loop))
14510 	{
14511 	  if (dump_file)
14512 	    {
14513 	      if (loop->latch == NULL)
14514 		fprintf (dump_file, " muliple backward jumps\n");
14515 	      else
14516 		{
14517 		  fprintf (dump_file, " header insn: %d latch insn: %d ",
14518 			   INSN_UID (BB_HEAD (loop->header)),
14519 			   INSN_UID (BB_END (loop->latch)));
14520 		  if (pc_set (BB_END (loop->latch)) == NULL_RTX)
14521 		    fprintf (dump_file, " loop does not end with jump\n");
14522 		  else
14523 		    fprintf (dump_file, " not instrumented\n");
14524 		}
14525 	    }
14526 	}
14527       else
14528 	{
14529 	  rtx_insn *new_insn;
14530 
14531 	  if (dump_file)
14532 	    fprintf (dump_file, " adding OSC break insn: ");
14533 	  new_insn = emit_insn_before (gen_osc_break (),
14534 				       BB_END (loop->latch));
14535 	  INSN_ADDRESSES_NEW (new_insn, -1);
14536 	}
14537     }
14538 
14539   loop_optimizer_finalize ();
14540 
14541   df_finish_pass (false);
14542 }
14543 
14544 /* Perform machine-dependent processing.  */
14545 
14546 static void
s390_reorg(void)14547 s390_reorg (void)
14548 {
14549   bool pool_overflow = false;
14550   int hw_before, hw_after;
14551 
14552   if (s390_tune == PROCESSOR_2964_Z13)
14553     s390_adjust_loops ();
14554 
14555   /* Make sure all splits have been performed; splits after
14556      machine_dependent_reorg might confuse insn length counts.  */
14557   split_all_insns_noflow ();
14558 
14559   /* Install the main literal pool and the associated base
14560      register load insns.
14561 
14562      In addition, there are two problematic situations we need
14563      to correct:
14564 
14565      - the literal pool might be > 4096 bytes in size, so that
14566        some of its elements cannot be directly accessed
14567 
14568      - a branch target might be > 64K away from the branch, so that
14569        it is not possible to use a PC-relative instruction.
14570 
14571      To fix those, we split the single literal pool into multiple
14572      pool chunks, reloading the pool base register at various
14573      points throughout the function to ensure it always points to
14574      the pool chunk the following code expects, and / or replace
14575      PC-relative branches by absolute branches.
14576 
14577      However, the two problems are interdependent: splitting the
14578      literal pool can move a branch further away from its target,
14579      causing the 64K limit to overflow, and on the other hand,
14580      replacing a PC-relative branch by an absolute branch means
14581      we need to put the branch target address into the literal
14582      pool, possibly causing it to overflow.
14583 
14584      So, we loop trying to fix up both problems until we manage
14585      to satisfy both conditions at the same time.  Note that the
14586      loop is guaranteed to terminate as every pass of the loop
14587      strictly decreases the total number of PC-relative branches
14588      in the function.  (This is not completely true as there
14589      might be branch-over-pool insns introduced by chunkify_start.
14590      Those never need to be split however.)  */
14591 
14592   for (;;)
14593     {
14594       struct constant_pool *pool = NULL;
14595 
14596       /* Collect the literal pool.  */
14597       if (!pool_overflow)
14598 	{
14599 	  pool = s390_mainpool_start ();
14600 	  if (!pool)
14601 	    pool_overflow = true;
14602 	}
14603 
14604       /* If literal pool overflowed, start to chunkify it.  */
14605       if (pool_overflow)
14606         pool = s390_chunkify_start ();
14607 
14608       /* Split out-of-range branches.  If this has created new
14609 	 literal pool entries, cancel current chunk list and
14610 	 recompute it.  zSeries machines have large branch
14611 	 instructions, so we never need to split a branch.  */
14612       if (!TARGET_CPU_ZARCH && s390_split_branches ())
14613         {
14614           if (pool_overflow)
14615             s390_chunkify_cancel (pool);
14616 	  else
14617             s390_mainpool_cancel (pool);
14618 
14619           continue;
14620         }
14621 
14622       /* If we made it up to here, both conditions are satisfied.
14623 	 Finish up literal pool related changes.  */
14624       if (pool_overflow)
14625 	s390_chunkify_finish (pool);
14626       else
14627 	s390_mainpool_finish (pool);
14628 
14629       /* We're done splitting branches.  */
14630       cfun->machine->split_branches_pending_p = false;
14631       break;
14632     }
14633 
14634   /* Generate out-of-pool execute target insns.  */
14635   if (TARGET_CPU_ZARCH)
14636     {
14637       rtx_insn *insn, *target;
14638       rtx label;
14639 
14640       for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14641 	{
14642 	  label = s390_execute_label (insn);
14643 	  if (!label)
14644 	    continue;
14645 
14646 	  gcc_assert (label != const0_rtx);
14647 
14648 	  target = emit_label (XEXP (label, 0));
14649 	  INSN_ADDRESSES_NEW (target, -1);
14650 
14651 	  if (JUMP_P (insn))
14652 	    {
14653 	      target = emit_jump_insn (s390_execute_target (insn));
14654 	      /* This is important in order to keep a table jump
14655 		 pointing at the jump table label.  Only this makes it
14656 		 being recognized as table jump.  */
14657 	      JUMP_LABEL (target) = JUMP_LABEL (insn);
14658 	    }
14659 	  else
14660 	    target = emit_insn (s390_execute_target (insn));
14661 	  INSN_ADDRESSES_NEW (target, -1);
14662 	}
14663     }
14664 
14665   /* Try to optimize prologue and epilogue further.  */
14666   s390_optimize_prologue ();
14667 
14668   /* Walk over the insns and do some >=z10 specific changes.  */
14669   if (s390_tune >= PROCESSOR_2097_Z10)
14670     {
14671       rtx_insn *insn;
14672       bool insn_added_p = false;
14673 
14674       /* The insn lengths and addresses have to be up to date for the
14675 	 following manipulations.  */
14676       shorten_branches (get_insns ());
14677 
14678       for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14679 	{
14680 	  if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14681 	    continue;
14682 
14683 	  if (JUMP_P (insn))
14684 	    insn_added_p |= s390_fix_long_loop_prediction (insn);
14685 
14686 	  if ((GET_CODE (PATTERN (insn)) == PARALLEL
14687 	       || GET_CODE (PATTERN (insn)) == SET)
14688 	      && s390_tune == PROCESSOR_2097_Z10)
14689 	    insn_added_p |= s390_z10_optimize_cmp (insn);
14690 	}
14691 
14692       /* Adjust branches if we added new instructions.  */
14693       if (insn_added_p)
14694 	shorten_branches (get_insns ());
14695     }
14696 
14697   s390_function_num_hotpatch_hw (current_function_decl, &hw_before, &hw_after);
14698   if (hw_after > 0)
14699     {
14700       rtx_insn *insn;
14701 
14702       /* Insert NOPs for hotpatching. */
14703       for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14704 	/* Emit NOPs
14705 	    1. inside the area covered by debug information to allow setting
14706 	       breakpoints at the NOPs,
14707 	    2. before any insn which results in an asm instruction,
14708 	    3. before in-function labels to avoid jumping to the NOPs, for
14709 	       example as part of a loop,
14710 	    4. before any barrier in case the function is completely empty
14711 	       (__builtin_unreachable ()) and has neither internal labels nor
14712 	       active insns.
14713 	*/
14714 	if (active_insn_p (insn) || BARRIER_P (insn) || LABEL_P (insn))
14715 	  break;
14716       /* Output a series of NOPs before the first active insn.  */
14717       while (insn && hw_after > 0)
14718 	{
14719 	  if (hw_after >= 3 && TARGET_CPU_ZARCH)
14720 	    {
14721 	      emit_insn_before (gen_nop_6_byte (), insn);
14722 	      hw_after -= 3;
14723 	    }
14724 	  else if (hw_after >= 2)
14725 	    {
14726 	      emit_insn_before (gen_nop_4_byte (), insn);
14727 	      hw_after -= 2;
14728 	    }
14729 	  else
14730 	    {
14731 	      emit_insn_before (gen_nop_2_byte (), insn);
14732 	      hw_after -= 1;
14733 	    }
14734 	}
14735     }
14736 }
14737 
14738 /* Return true if INSN is a fp load insn writing register REGNO.  */
14739 static inline bool
s390_fpload_toreg(rtx_insn * insn,unsigned int regno)14740 s390_fpload_toreg (rtx_insn *insn, unsigned int regno)
14741 {
14742   rtx set;
14743   enum attr_type flag = s390_safe_attr_type (insn);
14744 
14745   if (flag != TYPE_FLOADSF && flag != TYPE_FLOADDF)
14746     return false;
14747 
14748   set = single_set (insn);
14749 
14750   if (set == NULL_RTX)
14751     return false;
14752 
14753   if (!REG_P (SET_DEST (set)) || !MEM_P (SET_SRC (set)))
14754     return false;
14755 
14756   if (REGNO (SET_DEST (set)) != regno)
14757     return false;
14758 
14759   return true;
14760 }
14761 
14762 /* This value describes the distance to be avoided between an
14763    arithmetic fp instruction and an fp load writing the same register.
14764    Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
14765    fine but the exact value has to be avoided. Otherwise the FP
14766    pipeline will throw an exception causing a major penalty.  */
14767 #define Z10_EARLYLOAD_DISTANCE 7
14768 
14769 /* Rearrange the ready list in order to avoid the situation described
14770    for Z10_EARLYLOAD_DISTANCE.  A problematic load instruction is
14771    moved to the very end of the ready list.  */
14772 static void
s390_z10_prevent_earlyload_conflicts(rtx_insn ** ready,int * nready_p)14773 s390_z10_prevent_earlyload_conflicts (rtx_insn **ready, int *nready_p)
14774 {
14775   unsigned int regno;
14776   int nready = *nready_p;
14777   rtx_insn *tmp;
14778   int i;
14779   rtx_insn *insn;
14780   rtx set;
14781   enum attr_type flag;
14782   int distance;
14783 
14784   /* Skip DISTANCE - 1 active insns.  */
14785   for (insn = last_scheduled_insn, distance = Z10_EARLYLOAD_DISTANCE - 1;
14786        distance > 0 && insn != NULL_RTX;
14787        distance--, insn = prev_active_insn (insn))
14788     if (CALL_P (insn) || JUMP_P (insn))
14789       return;
14790 
14791   if (insn == NULL_RTX)
14792     return;
14793 
14794   set = single_set (insn);
14795 
14796   if (set == NULL_RTX || !REG_P (SET_DEST (set))
14797       || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) != MODE_FLOAT)
14798     return;
14799 
14800   flag = s390_safe_attr_type (insn);
14801 
14802   if (flag == TYPE_FLOADSF || flag == TYPE_FLOADDF)
14803     return;
14804 
14805   regno = REGNO (SET_DEST (set));
14806   i = nready - 1;
14807 
14808   while (!s390_fpload_toreg (ready[i], regno) && i > 0)
14809     i--;
14810 
14811   if (!i)
14812     return;
14813 
14814   tmp = ready[i];
14815   memmove (&ready[1], &ready[0], sizeof (rtx_insn *) * i);
14816   ready[0] = tmp;
14817 }
14818 
14819 /* Returns TRUE if BB is entered via a fallthru edge and all other
14820    incoming edges are less than unlikely.  */
14821 static bool
s390_bb_fallthru_entry_likely(basic_block bb)14822 s390_bb_fallthru_entry_likely (basic_block bb)
14823 {
14824   edge e, fallthru_edge;
14825   edge_iterator ei;
14826 
14827   if (!bb)
14828     return false;
14829 
14830   fallthru_edge = find_fallthru_edge (bb->preds);
14831   if (!fallthru_edge)
14832     return false;
14833 
14834   FOR_EACH_EDGE (e, ei, bb->preds)
14835     if (e != fallthru_edge
14836 	&& e->probability >= profile_probability::unlikely ())
14837       return false;
14838 
14839   return true;
14840 }
14841 
14842 /* The s390_sched_state variable tracks the state of the current or
14843    the last instruction group.
14844 
14845    0,1,2 number of instructions scheduled in the current group
14846    3     the last group is complete - normal insns
14847    4     the last group was a cracked/expanded insn */
14848 
14849 static int s390_sched_state = 0;
14850 
14851 #define S390_SCHED_STATE_NORMAL  3
14852 #define S390_SCHED_STATE_CRACKED 4
14853 
14854 #define S390_SCHED_ATTR_MASK_CRACKED    0x1
14855 #define S390_SCHED_ATTR_MASK_EXPANDED   0x2
14856 #define S390_SCHED_ATTR_MASK_ENDGROUP   0x4
14857 #define S390_SCHED_ATTR_MASK_GROUPALONE 0x8
14858 
14859 static unsigned int
s390_get_sched_attrmask(rtx_insn * insn)14860 s390_get_sched_attrmask (rtx_insn *insn)
14861 {
14862   unsigned int mask = 0;
14863 
14864   switch (s390_tune)
14865     {
14866     case PROCESSOR_2827_ZEC12:
14867       if (get_attr_zEC12_cracked (insn))
14868 	mask |= S390_SCHED_ATTR_MASK_CRACKED;
14869       if (get_attr_zEC12_expanded (insn))
14870 	mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14871       if (get_attr_zEC12_endgroup (insn))
14872 	mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14873       if (get_attr_zEC12_groupalone (insn))
14874 	mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14875       break;
14876     case PROCESSOR_2964_Z13:
14877     case PROCESSOR_3906_Z14:
14878       if (get_attr_z13_cracked (insn))
14879 	mask |= S390_SCHED_ATTR_MASK_CRACKED;
14880       if (get_attr_z13_expanded (insn))
14881 	mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14882       if (get_attr_z13_endgroup (insn))
14883 	mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14884       if (get_attr_z13_groupalone (insn))
14885 	mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14886       break;
14887     default:
14888       gcc_unreachable ();
14889     }
14890   return mask;
14891 }
14892 
14893 static unsigned int
s390_get_unit_mask(rtx_insn * insn,int * units)14894 s390_get_unit_mask (rtx_insn *insn, int *units)
14895 {
14896   unsigned int mask = 0;
14897 
14898   switch (s390_tune)
14899     {
14900     case PROCESSOR_2964_Z13:
14901     case PROCESSOR_3906_Z14:
14902       *units = 3;
14903       if (get_attr_z13_unit_lsu (insn))
14904 	mask |= 1 << 0;
14905       if (get_attr_z13_unit_fxu (insn))
14906 	mask |= 1 << 1;
14907       if (get_attr_z13_unit_vfu (insn))
14908 	mask |= 1 << 2;
14909       break;
14910     default:
14911       gcc_unreachable ();
14912     }
14913   return mask;
14914 }
14915 
14916 /* Return the scheduling score for INSN.  The higher the score the
14917    better.  The score is calculated from the OOO scheduling attributes
14918    of INSN and the scheduling state s390_sched_state.  */
14919 static int
s390_sched_score(rtx_insn * insn)14920 s390_sched_score (rtx_insn *insn)
14921 {
14922   unsigned int mask = s390_get_sched_attrmask (insn);
14923   int score = 0;
14924 
14925   switch (s390_sched_state)
14926     {
14927     case 0:
14928       /* Try to put insns into the first slot which would otherwise
14929 	 break a group.  */
14930       if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14931 	  || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
14932 	score += 5;
14933       if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
14934 	score += 10;
14935       /* fallthrough */
14936     case 1:
14937       /* Prefer not cracked insns while trying to put together a
14938 	 group.  */
14939       if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14940 	  && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
14941 	  && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
14942 	score += 10;
14943       if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) == 0)
14944 	score += 5;
14945       break;
14946     case 2:
14947       /* Prefer not cracked insns while trying to put together a
14948 	 group.  */
14949       if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14950 	  && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
14951 	  && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
14952 	score += 10;
14953       /* Prefer endgroup insns in the last slot.  */
14954       if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0)
14955 	score += 10;
14956       break;
14957     case S390_SCHED_STATE_NORMAL:
14958       /* Prefer not cracked insns if the last was not cracked.  */
14959       if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14960 	  && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0)
14961 	score += 5;
14962       if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
14963 	score += 10;
14964       break;
14965     case S390_SCHED_STATE_CRACKED:
14966       /* Try to keep cracked insns together to prevent them from
14967 	 interrupting groups.  */
14968       if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14969 	  || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
14970 	score += 5;
14971       break;
14972     }
14973 
14974   if (s390_tune >= PROCESSOR_2964_Z13)
14975     {
14976       int units, i;
14977       unsigned unit_mask, m = 1;
14978 
14979       unit_mask = s390_get_unit_mask (insn, &units);
14980       gcc_assert (units <= MAX_SCHED_UNITS);
14981 
14982       /* Add a score in range 0..MAX_SCHED_MIX_SCORE depending on how long
14983 	 ago the last insn of this unit type got scheduled.  This is
14984 	 supposed to help providing a proper instruction mix to the
14985 	 CPU.  */
14986       for (i = 0; i < units; i++, m <<= 1)
14987 	if (m & unit_mask)
14988 	  score += (last_scheduled_unit_distance[i] * MAX_SCHED_MIX_SCORE /
14989 		    MAX_SCHED_MIX_DISTANCE);
14990 
14991       unsigned latency = insn_default_latency (insn);
14992 
14993       int other_side = 1 - current_side;
14994 
14995       /* Try to delay long-running insns when side is busy.  */
14996       if (latency > LONGRUNNING_THRESHOLD)
14997 	{
14998 	  if (get_attr_z13_unit_fxu (insn) && fxu_longrunning[current_side]
14999 	      && fxu_longrunning[other_side] <= fxu_longrunning[current_side])
15000 	    score = MAX (0, score - 10);
15001 
15002 	  if (get_attr_z13_unit_vfu (insn) && vfu_longrunning[current_side]
15003 	      && vfu_longrunning[other_side] <= vfu_longrunning[current_side])
15004 	    score = MAX (0, score - 10);
15005 	}
15006     }
15007 
15008   return score;
15009 }
15010 
15011 /* This function is called via hook TARGET_SCHED_REORDER before
15012    issuing one insn from list READY which contains *NREADYP entries.
15013    For target z10 it reorders load instructions to avoid early load
15014    conflicts in the floating point pipeline  */
15015 static int
s390_sched_reorder(FILE * file,int verbose,rtx_insn ** ready,int * nreadyp,int clock ATTRIBUTE_UNUSED)15016 s390_sched_reorder (FILE *file, int verbose,
15017 		    rtx_insn **ready, int *nreadyp, int clock ATTRIBUTE_UNUSED)
15018 {
15019   if (s390_tune == PROCESSOR_2097_Z10
15020       && reload_completed
15021       && *nreadyp > 1)
15022     s390_z10_prevent_earlyload_conflicts (ready, nreadyp);
15023 
15024   if (s390_tune >= PROCESSOR_2827_ZEC12
15025       && reload_completed
15026       && *nreadyp > 1)
15027     {
15028       int i;
15029       int last_index = *nreadyp - 1;
15030       int max_index = -1;
15031       int max_score = -1;
15032       rtx_insn *tmp;
15033 
15034       /* Just move the insn with the highest score to the top (the
15035 	 end) of the list.  A full sort is not needed since a conflict
15036 	 in the hazard recognition cannot happen.  So the top insn in
15037 	 the ready list will always be taken.  */
15038       for (i = last_index; i >= 0; i--)
15039 	{
15040 	  int score;
15041 
15042 	  if (recog_memoized (ready[i]) < 0)
15043 	    continue;
15044 
15045 	  score = s390_sched_score (ready[i]);
15046 	  if (score > max_score)
15047 	    {
15048 	      max_score = score;
15049 	      max_index = i;
15050 	    }
15051 	}
15052 
15053       if (max_index != -1)
15054 	{
15055 	  if (max_index != last_index)
15056 	    {
15057 	      tmp = ready[max_index];
15058 	      ready[max_index] = ready[last_index];
15059 	      ready[last_index] = tmp;
15060 
15061 	      if (verbose > 5)
15062 		fprintf (file,
15063 			 ";;\t\tBACKEND: move insn %d to the top of list\n",
15064 			 INSN_UID (ready[last_index]));
15065 	    }
15066 	  else if (verbose > 5)
15067 	    fprintf (file,
15068 		     ";;\t\tBACKEND: best insn %d already on top\n",
15069 		     INSN_UID (ready[last_index]));
15070 	}
15071 
15072       if (verbose > 5)
15073 	{
15074 	  fprintf (file, "ready list ooo attributes - sched state: %d\n",
15075 		   s390_sched_state);
15076 
15077 	  for (i = last_index; i >= 0; i--)
15078 	    {
15079 	      unsigned int sched_mask;
15080 	      rtx_insn *insn = ready[i];
15081 
15082 	      if (recog_memoized (insn) < 0)
15083 		continue;
15084 
15085 	      sched_mask = s390_get_sched_attrmask (insn);
15086 	      fprintf (file, ";;\t\tBACKEND: insn %d score: %d: ",
15087 		       INSN_UID (insn),
15088 		       s390_sched_score (insn));
15089 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ",\
15090 					   ((M) & sched_mask) ? #ATTR : "");
15091 	      PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
15092 	      PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
15093 	      PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
15094 	      PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
15095 #undef PRINT_SCHED_ATTR
15096 	      if (s390_tune >= PROCESSOR_2964_Z13)
15097 		{
15098 		  unsigned int unit_mask, m = 1;
15099 		  int units, j;
15100 
15101 		  unit_mask  = s390_get_unit_mask (insn, &units);
15102 		  fprintf (file, "(units:");
15103 		  for (j = 0; j < units; j++, m <<= 1)
15104 		    if (m & unit_mask)
15105 		      fprintf (file, " u%d", j);
15106 		  fprintf (file, ")");
15107 		}
15108 	      fprintf (file, "\n");
15109 	    }
15110 	}
15111     }
15112 
15113   return s390_issue_rate ();
15114 }
15115 
15116 
15117 /* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
15118    the scheduler has issued INSN.  It stores the last issued insn into
15119    last_scheduled_insn in order to make it available for
15120    s390_sched_reorder.  */
15121 static int
s390_sched_variable_issue(FILE * file,int verbose,rtx_insn * insn,int more)15122 s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
15123 {
15124   last_scheduled_insn = insn;
15125 
15126   bool starts_group = false;
15127 
15128   if (s390_tune >= PROCESSOR_2827_ZEC12
15129       && reload_completed
15130       && recog_memoized (insn) >= 0)
15131     {
15132       unsigned int mask = s390_get_sched_attrmask (insn);
15133 
15134       if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
15135 	  || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0
15136 	  || (mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
15137 	starts_group = true;
15138 
15139       if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
15140 	  || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
15141 	s390_sched_state = S390_SCHED_STATE_CRACKED;
15142       else if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0
15143 	       || (mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
15144 	s390_sched_state = S390_SCHED_STATE_NORMAL;
15145       else
15146 	{
15147 	  /* Only normal insns are left (mask == 0).  */
15148 	  switch (s390_sched_state)
15149 	    {
15150 	    case 0:
15151 	      starts_group = true;
15152 	      /* fallthrough */
15153 	    case 1:
15154 	    case 2:
15155 	      s390_sched_state++;
15156 	      break;
15157 	    case S390_SCHED_STATE_NORMAL:
15158 	      starts_group = true;
15159 	      s390_sched_state = 1;
15160 	      break;
15161 	    case S390_SCHED_STATE_CRACKED:
15162 	      s390_sched_state = S390_SCHED_STATE_NORMAL;
15163 	      break;
15164 	    }
15165 	}
15166 
15167       if (s390_tune >= PROCESSOR_2964_Z13)
15168 	{
15169 	  int units, i;
15170 	  unsigned unit_mask, m = 1;
15171 
15172 	  unit_mask = s390_get_unit_mask (insn, &units);
15173 	  gcc_assert (units <= MAX_SCHED_UNITS);
15174 
15175 	  for (i = 0; i < units; i++, m <<= 1)
15176 	    if (m & unit_mask)
15177 	      last_scheduled_unit_distance[i] = 0;
15178 	    else if (last_scheduled_unit_distance[i] < MAX_SCHED_MIX_DISTANCE)
15179 	      last_scheduled_unit_distance[i]++;
15180 	}
15181 
15182       /* If this insn started a new group, the side flipped.  */
15183       if (starts_group)
15184 	current_side = current_side ? 0 : 1;
15185 
15186       for (int i = 0; i < 2; i++)
15187 	{
15188 	  if (fxu_longrunning[i] >= 1)
15189 	    fxu_longrunning[i] -= 1;
15190 	  if (vfu_longrunning[i] >= 1)
15191 	    vfu_longrunning[i] -= 1;
15192 	}
15193 
15194       unsigned latency = insn_default_latency (insn);
15195       if (latency > LONGRUNNING_THRESHOLD)
15196 	{
15197 	  if (get_attr_z13_unit_fxu (insn))
15198 	    fxu_longrunning[current_side] = latency * LATENCY_FACTOR;
15199 	  else
15200 	    vfu_longrunning[current_side] = latency * LATENCY_FACTOR;
15201 	}
15202 
15203       if (verbose > 5)
15204 	{
15205 	  unsigned int sched_mask;
15206 
15207 	  sched_mask = s390_get_sched_attrmask (insn);
15208 
15209 	  fprintf (file, ";;\t\tBACKEND: insn %d: ", INSN_UID (insn));
15210 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ", ((M) & sched_mask) ? #ATTR : "");
15211 	  PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
15212 	  PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
15213 	  PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
15214 	  PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
15215 #undef PRINT_SCHED_ATTR
15216 
15217 	  if (s390_tune >= PROCESSOR_2964_Z13)
15218 	    {
15219 	      unsigned int unit_mask, m = 1;
15220 	      int units, j;
15221 
15222 	      unit_mask  = s390_get_unit_mask (insn, &units);
15223 	      fprintf (file, "(units:");
15224 	      for (j = 0; j < units; j++, m <<= 1)
15225 		if (m & unit_mask)
15226 		  fprintf (file, " %d", j);
15227 	      fprintf (file, ")");
15228 	    }
15229 	  fprintf (file, " sched state: %d\n", s390_sched_state);
15230 
15231 	  if (s390_tune >= PROCESSOR_2964_Z13)
15232 	    {
15233 	      int units, j;
15234 
15235 	      s390_get_unit_mask (insn, &units);
15236 
15237 	      fprintf (file, ";;\t\tBACKEND: units unused for: ");
15238 	      for (j = 0; j < units; j++)
15239 		fprintf (file, "%d:%d ", j, last_scheduled_unit_distance[j]);
15240 	      fprintf (file, "\n");
15241 	    }
15242 	}
15243     }
15244 
15245   if (GET_CODE (PATTERN (insn)) != USE
15246       && GET_CODE (PATTERN (insn)) != CLOBBER)
15247     return more - 1;
15248   else
15249     return more;
15250 }
15251 
15252 static void
s390_sched_init(FILE * file ATTRIBUTE_UNUSED,int verbose ATTRIBUTE_UNUSED,int max_ready ATTRIBUTE_UNUSED)15253 s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
15254 		 int verbose ATTRIBUTE_UNUSED,
15255 		 int max_ready ATTRIBUTE_UNUSED)
15256 {
15257   last_scheduled_insn = NULL;
15258   memset (last_scheduled_unit_distance, 0, MAX_SCHED_UNITS * sizeof (int));
15259 
15260   /* If the next basic block is most likely entered via a fallthru edge
15261      we keep the last sched state.  Otherwise we start a new group.
15262      The scheduler traverses basic blocks in "instruction stream" ordering
15263      so if we see a fallthru edge here, s390_sched_state will be of its
15264      source block.
15265 
15266      current_sched_info->prev_head is the insn before the first insn of the
15267      block of insns to be scheduled.
15268      */
15269   rtx_insn *insn = current_sched_info->prev_head
15270     ? NEXT_INSN (current_sched_info->prev_head) : NULL;
15271   basic_block bb = insn ? BLOCK_FOR_INSN (insn) : NULL;
15272   if (s390_tune < PROCESSOR_2964_Z13 || !s390_bb_fallthru_entry_likely (bb))
15273     s390_sched_state = 0;
15274 }
15275 
15276 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
15277    a new number struct loop *loop should be unrolled if tuned for cpus with
15278    a built-in stride prefetcher.
15279    The loop is analyzed for memory accesses by calling check_dpu for
15280    each rtx of the loop. Depending on the loop_depth and the amount of
15281    memory accesses a new number <=nunroll is returned to improve the
15282    behavior of the hardware prefetch unit.  */
15283 static unsigned
s390_loop_unroll_adjust(unsigned nunroll,struct loop * loop)15284 s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
15285 {
15286   basic_block *bbs;
15287   rtx_insn *insn;
15288   unsigned i;
15289   unsigned mem_count = 0;
15290 
15291   if (s390_tune < PROCESSOR_2097_Z10)
15292     return nunroll;
15293 
15294   /* Count the number of memory references within the loop body.  */
15295   bbs = get_loop_body (loop);
15296   subrtx_iterator::array_type array;
15297   for (i = 0; i < loop->num_nodes; i++)
15298     FOR_BB_INSNS (bbs[i], insn)
15299       if (INSN_P (insn) && INSN_CODE (insn) != -1)
15300 	FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
15301 	  if (MEM_P (*iter))
15302 	    mem_count += 1;
15303   free (bbs);
15304 
15305   /* Prevent division by zero, and we do not need to adjust nunroll in this case.  */
15306   if (mem_count == 0)
15307     return nunroll;
15308 
15309   switch (loop_depth(loop))
15310     {
15311     case 1:
15312       return MIN (nunroll, 28 / mem_count);
15313     case 2:
15314       return MIN (nunroll, 22 / mem_count);
15315     default:
15316       return MIN (nunroll, 16 / mem_count);
15317     }
15318 }
15319 
15320 /* Restore the current options.  This is a hook function and also called
15321    internally.  */
15322 
15323 static void
s390_function_specific_restore(struct gcc_options * opts,struct cl_target_option * ptr ATTRIBUTE_UNUSED)15324 s390_function_specific_restore (struct gcc_options *opts,
15325 				struct cl_target_option *ptr ATTRIBUTE_UNUSED)
15326 {
15327   opts->x_s390_cost_pointer = (long)processor_table[opts->x_s390_tune].cost;
15328 }
15329 
15330 static void
s390_option_override_internal(bool main_args_p,struct gcc_options * opts,const struct gcc_options * opts_set)15331 s390_option_override_internal (bool main_args_p,
15332 			       struct gcc_options *opts,
15333 			       const struct gcc_options *opts_set)
15334 {
15335   const char *prefix;
15336   const char *suffix;
15337 
15338   /* Set up prefix/suffix so the error messages refer to either the command
15339      line argument, or the attribute(target).  */
15340   if (main_args_p)
15341     {
15342       prefix = "-m";
15343       suffix = "";
15344     }
15345   else
15346     {
15347       prefix = "option(\"";
15348       suffix = "\")";
15349     }
15350 
15351 
15352   /* Architecture mode defaults according to ABI.  */
15353   if (!(opts_set->x_target_flags & MASK_ZARCH))
15354     {
15355       if (TARGET_64BIT)
15356 	opts->x_target_flags |= MASK_ZARCH;
15357       else
15358 	opts->x_target_flags &= ~MASK_ZARCH;
15359     }
15360 
15361   /* Set the march default in case it hasn't been specified on cmdline.  */
15362   if (!opts_set->x_s390_arch)
15363     opts->x_s390_arch = PROCESSOR_2064_Z900;
15364   else if (opts->x_s390_arch == PROCESSOR_9672_G5
15365 	   || opts->x_s390_arch == PROCESSOR_9672_G6)
15366     warning (OPT_Wdeprecated, "%sarch=%s%s is deprecated and will be removed "
15367 	     "in future releases; use at least %sarch=z900%s",
15368 	     prefix, opts->x_s390_arch == PROCESSOR_9672_G5 ? "g5" : "g6",
15369 	     suffix, prefix, suffix);
15370 
15371   opts->x_s390_arch_flags = processor_flags_table[(int) opts->x_s390_arch];
15372 
15373   /* Determine processor to tune for.  */
15374   if (!opts_set->x_s390_tune)
15375     opts->x_s390_tune = opts->x_s390_arch;
15376   else if (opts->x_s390_tune == PROCESSOR_9672_G5
15377 	   || opts->x_s390_tune == PROCESSOR_9672_G6)
15378     warning (OPT_Wdeprecated, "%stune=%s%s is deprecated and will be removed "
15379 	     "in future releases; use at least %stune=z900%s",
15380 	     prefix, opts->x_s390_tune == PROCESSOR_9672_G5 ? "g5" : "g6",
15381 	     suffix, prefix, suffix);
15382 
15383   opts->x_s390_tune_flags = processor_flags_table[opts->x_s390_tune];
15384 
15385   /* Sanity checks.  */
15386   if (opts->x_s390_arch == PROCESSOR_NATIVE
15387       || opts->x_s390_tune == PROCESSOR_NATIVE)
15388     gcc_unreachable ();
15389   if (TARGET_ZARCH_P (opts->x_target_flags) && !TARGET_CPU_ZARCH_P (opts))
15390     error ("z/Architecture mode not supported on %s",
15391 	   processor_table[(int)opts->x_s390_arch].name);
15392   if (TARGET_64BIT && !TARGET_ZARCH_P (opts->x_target_flags))
15393     error ("64-bit ABI not supported in ESA/390 mode");
15394 
15395   if (opts->x_s390_indirect_branch == indirect_branch_thunk_inline
15396       || opts->x_s390_indirect_branch_call == indirect_branch_thunk_inline
15397       || opts->x_s390_function_return == indirect_branch_thunk_inline
15398       || opts->x_s390_function_return_reg == indirect_branch_thunk_inline
15399       || opts->x_s390_function_return_mem == indirect_branch_thunk_inline)
15400     error ("thunk-inline is only supported with -mindirect-branch-jump");
15401 
15402   if (opts->x_s390_indirect_branch != indirect_branch_keep)
15403     {
15404       if (!opts_set->x_s390_indirect_branch_call)
15405 	opts->x_s390_indirect_branch_call = opts->x_s390_indirect_branch;
15406 
15407       if (!opts_set->x_s390_indirect_branch_jump)
15408 	opts->x_s390_indirect_branch_jump = opts->x_s390_indirect_branch;
15409     }
15410 
15411   if (opts->x_s390_function_return != indirect_branch_keep)
15412     {
15413       if (!opts_set->x_s390_function_return_reg)
15414 	opts->x_s390_function_return_reg = opts->x_s390_function_return;
15415 
15416       if (!opts_set->x_s390_function_return_mem)
15417 	opts->x_s390_function_return_mem = opts->x_s390_function_return;
15418     }
15419 
15420   if (!TARGET_CPU_ZARCH)
15421     {
15422       if (opts->x_s390_indirect_branch_call != indirect_branch_keep
15423 	  || opts->x_s390_indirect_branch_jump != indirect_branch_keep)
15424 	error ("-mindirect-branch* options require -march=z900 or higher");
15425       if (opts->x_s390_function_return_reg != indirect_branch_keep
15426 	  || opts->x_s390_function_return_mem != indirect_branch_keep)
15427 	error ("-mfunction-return* options require -march=z900 or higher");
15428     }
15429 
15430 
15431   /* Enable hardware transactions if available and not explicitly
15432      disabled by user.  E.g. with -m31 -march=zEC12 -mzarch */
15433   if (!TARGET_OPT_HTM_P (opts_set->x_target_flags))
15434     {
15435       if (TARGET_CPU_HTM_P (opts) && TARGET_ZARCH_P (opts->x_target_flags))
15436 	opts->x_target_flags |= MASK_OPT_HTM;
15437       else
15438 	opts->x_target_flags &= ~MASK_OPT_HTM;
15439     }
15440 
15441   if (TARGET_OPT_VX_P (opts_set->x_target_flags))
15442     {
15443       if (TARGET_OPT_VX_P (opts->x_target_flags))
15444 	{
15445 	  if (!TARGET_CPU_VX_P (opts))
15446 	    error ("hardware vector support not available on %s",
15447 		   processor_table[(int)opts->x_s390_arch].name);
15448 	  if (TARGET_SOFT_FLOAT_P (opts->x_target_flags))
15449 	    error ("hardware vector support not available with -msoft-float");
15450 	}
15451     }
15452   else
15453     {
15454       if (TARGET_CPU_VX_P (opts))
15455 	/* Enable vector support if available and not explicitly disabled
15456 	   by user.  E.g. with -m31 -march=z13 -mzarch */
15457 	opts->x_target_flags |= MASK_OPT_VX;
15458       else
15459 	opts->x_target_flags &= ~MASK_OPT_VX;
15460     }
15461 
15462   /* Use hardware DFP if available and not explicitly disabled by
15463      user. E.g. with -m31 -march=z10 -mzarch   */
15464   if (!TARGET_HARD_DFP_P (opts_set->x_target_flags))
15465     {
15466       if (TARGET_DFP_P (opts))
15467 	opts->x_target_flags |= MASK_HARD_DFP;
15468       else
15469 	opts->x_target_flags &= ~MASK_HARD_DFP;
15470     }
15471 
15472   if (TARGET_HARD_DFP_P (opts->x_target_flags) && !TARGET_DFP_P (opts))
15473     {
15474       if (TARGET_HARD_DFP_P (opts_set->x_target_flags))
15475 	{
15476 	  if (!TARGET_CPU_DFP_P (opts))
15477 	    error ("hardware decimal floating point instructions"
15478 		   " not available on %s",
15479 		   processor_table[(int)opts->x_s390_arch].name);
15480 	  if (!TARGET_ZARCH_P (opts->x_target_flags))
15481 	    error ("hardware decimal floating point instructions"
15482 		   " not available in ESA/390 mode");
15483 	}
15484       else
15485 	opts->x_target_flags &= ~MASK_HARD_DFP;
15486     }
15487 
15488   if (TARGET_SOFT_FLOAT_P (opts_set->x_target_flags)
15489       && TARGET_SOFT_FLOAT_P (opts->x_target_flags))
15490     {
15491       if (TARGET_HARD_DFP_P (opts_set->x_target_flags)
15492 	  && TARGET_HARD_DFP_P (opts->x_target_flags))
15493 	error ("-mhard-dfp can%'t be used in conjunction with -msoft-float");
15494 
15495       opts->x_target_flags &= ~MASK_HARD_DFP;
15496     }
15497 
15498   if (TARGET_BACKCHAIN_P (opts->x_target_flags)
15499       && TARGET_PACKED_STACK_P (opts->x_target_flags)
15500       && TARGET_HARD_FLOAT_P (opts->x_target_flags))
15501     error ("-mbackchain -mpacked-stack -mhard-float are not supported "
15502 	   "in combination");
15503 
15504   if (opts->x_s390_stack_size)
15505     {
15506       if (opts->x_s390_stack_guard >= opts->x_s390_stack_size)
15507 	error ("stack size must be greater than the stack guard value");
15508       else if (opts->x_s390_stack_size > 1 << 16)
15509 	error ("stack size must not be greater than 64k");
15510     }
15511   else if (opts->x_s390_stack_guard)
15512     error ("-mstack-guard implies use of -mstack-size");
15513 
15514 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
15515   if (!TARGET_LONG_DOUBLE_128_P (opts_set->x_target_flags))
15516     opts->x_target_flags |= MASK_LONG_DOUBLE_128;
15517 #endif
15518 
15519   if (opts->x_s390_tune >= PROCESSOR_2097_Z10)
15520     {
15521       maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 100,
15522 			     opts->x_param_values,
15523 			     opts_set->x_param_values);
15524       maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 32,
15525 			     opts->x_param_values,
15526 			     opts_set->x_param_values);
15527       maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 2000,
15528 			     opts->x_param_values,
15529 			     opts_set->x_param_values);
15530       maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 64,
15531 			     opts->x_param_values,
15532 			     opts_set->x_param_values);
15533     }
15534 
15535   maybe_set_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 256,
15536 			 opts->x_param_values,
15537 			 opts_set->x_param_values);
15538   /* values for loop prefetching */
15539   maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, 256,
15540 			 opts->x_param_values,
15541 			 opts_set->x_param_values);
15542   maybe_set_param_value (PARAM_L1_CACHE_SIZE, 128,
15543 			 opts->x_param_values,
15544 			 opts_set->x_param_values);
15545   /* s390 has more than 2 levels and the size is much larger.  Since
15546      we are always running virtualized assume that we only get a small
15547      part of the caches above l1.  */
15548   maybe_set_param_value (PARAM_L2_CACHE_SIZE, 1500,
15549 			 opts->x_param_values,
15550 			 opts_set->x_param_values);
15551   maybe_set_param_value (PARAM_PREFETCH_MIN_INSN_TO_MEM_RATIO, 2,
15552 			 opts->x_param_values,
15553 			 opts_set->x_param_values);
15554   maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 6,
15555 			 opts->x_param_values,
15556 			 opts_set->x_param_values);
15557 
15558   /* Use the alternative scheduling-pressure algorithm by default.  */
15559   maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
15560                          opts->x_param_values,
15561                          opts_set->x_param_values);
15562 
15563   maybe_set_param_value (PARAM_MIN_VECT_LOOP_BOUND, 2,
15564 			 opts->x_param_values,
15565 			 opts_set->x_param_values);
15566 
15567   /* Call target specific restore function to do post-init work.  At the moment,
15568      this just sets opts->x_s390_cost_pointer.  */
15569   s390_function_specific_restore (opts, NULL);
15570 }
15571 
15572 static void
s390_option_override(void)15573 s390_option_override (void)
15574 {
15575   unsigned int i;
15576   cl_deferred_option *opt;
15577   vec<cl_deferred_option> *v =
15578     (vec<cl_deferred_option> *) s390_deferred_options;
15579 
15580   if (v)
15581     FOR_EACH_VEC_ELT (*v, i, opt)
15582       {
15583 	switch (opt->opt_index)
15584 	  {
15585 	  case OPT_mhotpatch_:
15586 	    {
15587 	      int val1;
15588 	      int val2;
15589 	      char *s = strtok (ASTRDUP (opt->arg), ",");
15590 	      char *t = strtok (NULL, "\0");
15591 
15592 	      if (t != NULL)
15593 		{
15594 		  val1 = integral_argument (s);
15595 		  val2 = integral_argument (t);
15596 		}
15597 	      else
15598 		{
15599 		  val1 = -1;
15600 		  val2 = -1;
15601 		}
15602 	      if (val1 == -1 || val2 == -1)
15603 		{
15604 		  /* argument is not a plain number */
15605 		  error ("arguments to %qs should be non-negative integers",
15606 			 "-mhotpatch=n,m");
15607 		  break;
15608 		}
15609 	      else if (val1 > s390_hotpatch_hw_max
15610 		       || val2 > s390_hotpatch_hw_max)
15611 		{
15612 		  error ("argument to %qs is too large (max. %d)",
15613 			 "-mhotpatch=n,m", s390_hotpatch_hw_max);
15614 		  break;
15615 		}
15616 	      s390_hotpatch_hw_before_label = val1;
15617 	      s390_hotpatch_hw_after_label = val2;
15618 	      break;
15619 	    }
15620 	  default:
15621 	    gcc_unreachable ();
15622 	  }
15623       }
15624 
15625   /* Set up function hooks.  */
15626   init_machine_status = s390_init_machine_status;
15627 
15628   s390_option_override_internal (true, &global_options, &global_options_set);
15629 
15630   /* Save the initial options in case the user does function specific
15631      options.  */
15632   target_option_default_node = build_target_option_node (&global_options);
15633   target_option_current_node = target_option_default_node;
15634 
15635   /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
15636      requires the arch flags to be evaluated already.  Since prefetching
15637      is beneficial on s390, we enable it if available.  */
15638   if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3)
15639     flag_prefetch_loop_arrays = 1;
15640 
15641   if (!s390_pic_data_is_text_relative && !flag_pic)
15642     error ("-mno-pic-data-is-text-relative cannot be used without -fpic/-fPIC");
15643 
15644   if (TARGET_TPF)
15645     {
15646       /* Don't emit DWARF3/4 unless specifically selected.  The TPF
15647 	 debuggers do not yet support DWARF 3/4.  */
15648       if (!global_options_set.x_dwarf_strict)
15649 	dwarf_strict = 1;
15650       if (!global_options_set.x_dwarf_version)
15651 	dwarf_version = 2;
15652     }
15653 
15654   /* Register a target-specific optimization-and-lowering pass
15655      to run immediately before prologue and epilogue generation.
15656 
15657      Registering the pass must be done at start up.  It's
15658      convenient to do it here.  */
15659   opt_pass *new_pass = new pass_s390_early_mach (g);
15660   struct register_pass_info insert_pass_s390_early_mach =
15661     {
15662       new_pass,			/* pass */
15663       "pro_and_epilogue",	/* reference_pass_name */
15664       1,			/* ref_pass_instance_number */
15665       PASS_POS_INSERT_BEFORE	/* po_op */
15666     };
15667   register_pass (&insert_pass_s390_early_mach);
15668 }
15669 
15670 #if S390_USE_TARGET_ATTRIBUTE
15671 /* Inner function to process the attribute((target(...))), take an argument and
15672    set the current options from the argument. If we have a list, recursively go
15673    over the list.  */
15674 
15675 static bool
s390_valid_target_attribute_inner_p(tree args,struct gcc_options * opts,struct gcc_options * new_opts_set,bool force_pragma)15676 s390_valid_target_attribute_inner_p (tree args,
15677 				     struct gcc_options *opts,
15678 				     struct gcc_options *new_opts_set,
15679 				     bool force_pragma)
15680 {
15681   char *next_optstr;
15682   bool ret = true;
15683 
15684 #define S390_ATTRIB(S,O,A)  { S, sizeof (S)-1, O, A, 0 }
15685 #define S390_PRAGMA(S,O,A)  { S, sizeof (S)-1, O, A, 1 }
15686   static const struct
15687   {
15688     const char *string;
15689     size_t len;
15690     int opt;
15691     int has_arg;
15692     int only_as_pragma;
15693   } attrs[] = {
15694     /* enum options */
15695     S390_ATTRIB ("arch=", OPT_march_, 1),
15696     S390_ATTRIB ("tune=", OPT_mtune_, 1),
15697     /* uinteger options */
15698     S390_ATTRIB ("stack-guard=", OPT_mstack_guard_, 1),
15699     S390_ATTRIB ("stack-size=", OPT_mstack_size_, 1),
15700     S390_ATTRIB ("branch-cost=", OPT_mbranch_cost_, 1),
15701     S390_ATTRIB ("warn-framesize=", OPT_mwarn_framesize_, 1),
15702     /* flag options */
15703     S390_ATTRIB ("backchain", OPT_mbackchain, 0),
15704     S390_ATTRIB ("hard-dfp", OPT_mhard_dfp, 0),
15705     S390_ATTRIB ("hard-float", OPT_mhard_float, 0),
15706     S390_ATTRIB ("htm", OPT_mhtm, 0),
15707     S390_ATTRIB ("vx", OPT_mvx, 0),
15708     S390_ATTRIB ("packed-stack", OPT_mpacked_stack, 0),
15709     S390_ATTRIB ("small-exec", OPT_msmall_exec, 0),
15710     S390_ATTRIB ("soft-float", OPT_msoft_float, 0),
15711     S390_ATTRIB ("mvcle", OPT_mmvcle, 0),
15712     S390_PRAGMA ("zvector", OPT_mzvector, 0),
15713     /* boolean options */
15714     S390_ATTRIB ("warn-dynamicstack", OPT_mwarn_dynamicstack, 0),
15715   };
15716 #undef S390_ATTRIB
15717 #undef S390_PRAGMA
15718 
15719   /* If this is a list, recurse to get the options.  */
15720   if (TREE_CODE (args) == TREE_LIST)
15721     {
15722       bool ret = true;
15723       int num_pragma_values;
15724       int i;
15725 
15726       /* Note: attribs.c:decl_attributes prepends the values from
15727 	 current_target_pragma to the list of target attributes.  To determine
15728 	 whether we're looking at a value of the attribute or the pragma we
15729 	 assume that the first [list_length (current_target_pragma)] values in
15730 	 the list are the values from the pragma.  */
15731       num_pragma_values = (!force_pragma && current_target_pragma != NULL)
15732 	? list_length (current_target_pragma) : 0;
15733       for (i = 0; args; args = TREE_CHAIN (args), i++)
15734 	{
15735 	  bool is_pragma;
15736 
15737 	  is_pragma = (force_pragma || i < num_pragma_values);
15738 	  if (TREE_VALUE (args)
15739 	      && !s390_valid_target_attribute_inner_p (TREE_VALUE (args),
15740 						       opts, new_opts_set,
15741 						       is_pragma))
15742 	    {
15743 	      ret = false;
15744 	    }
15745 	}
15746       return ret;
15747     }
15748 
15749   else if (TREE_CODE (args) != STRING_CST)
15750     {
15751       error ("attribute %<target%> argument not a string");
15752       return false;
15753     }
15754 
15755   /* Handle multiple arguments separated by commas.  */
15756   next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
15757 
15758   while (next_optstr && *next_optstr != '\0')
15759     {
15760       char *p = next_optstr;
15761       char *orig_p = p;
15762       char *comma = strchr (next_optstr, ',');
15763       size_t len, opt_len;
15764       int opt;
15765       bool opt_set_p;
15766       char ch;
15767       unsigned i;
15768       int mask = 0;
15769       enum cl_var_type var_type;
15770       bool found;
15771 
15772       if (comma)
15773 	{
15774 	  *comma = '\0';
15775 	  len = comma - next_optstr;
15776 	  next_optstr = comma + 1;
15777 	}
15778       else
15779 	{
15780 	  len = strlen (p);
15781 	  next_optstr = NULL;
15782 	}
15783 
15784       /* Recognize no-xxx.  */
15785       if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
15786 	{
15787 	  opt_set_p = false;
15788 	  p += 3;
15789 	  len -= 3;
15790 	}
15791       else
15792 	opt_set_p = true;
15793 
15794       /* Find the option.  */
15795       ch = *p;
15796       found = false;
15797       for (i = 0; i < ARRAY_SIZE (attrs); i++)
15798 	{
15799 	  opt_len = attrs[i].len;
15800 	  if (ch == attrs[i].string[0]
15801 	      && ((attrs[i].has_arg) ? len > opt_len : len == opt_len)
15802 	      && memcmp (p, attrs[i].string, opt_len) == 0)
15803 	    {
15804 	      opt = attrs[i].opt;
15805 	      if (!opt_set_p && cl_options[opt].cl_reject_negative)
15806 		continue;
15807 	      mask = cl_options[opt].var_value;
15808 	      var_type = cl_options[opt].var_type;
15809 	      found = true;
15810 	      break;
15811 	    }
15812 	}
15813 
15814       /* Process the option.  */
15815       if (!found)
15816 	{
15817 	  error ("attribute(target(\"%s\")) is unknown", orig_p);
15818 	  return false;
15819 	}
15820       else if (attrs[i].only_as_pragma && !force_pragma)
15821 	{
15822 	  /* Value is not allowed for the target attribute.  */
15823 	  error ("value %qs is not supported by attribute %<target%>",
15824 		 attrs[i].string);
15825 	  return false;
15826 	}
15827 
15828       else if (var_type == CLVC_BIT_SET || var_type == CLVC_BIT_CLEAR)
15829 	{
15830 	  if (var_type == CLVC_BIT_CLEAR)
15831 	    opt_set_p = !opt_set_p;
15832 
15833 	  if (opt_set_p)
15834 	    opts->x_target_flags |= mask;
15835 	  else
15836 	    opts->x_target_flags &= ~mask;
15837 	  new_opts_set->x_target_flags |= mask;
15838 	}
15839 
15840       else if (cl_options[opt].var_type == CLVC_BOOLEAN)
15841 	{
15842 	  int value;
15843 
15844 	  if (cl_options[opt].cl_uinteger)
15845 	    {
15846 	      /* Unsigned integer argument.  Code based on the function
15847 		 decode_cmdline_option () in opts-common.c.  */
15848 	      value = integral_argument (p + opt_len);
15849 	    }
15850 	  else
15851 	    value = (opt_set_p) ? 1 : 0;
15852 
15853 	  if (value != -1)
15854 	    {
15855 	      struct cl_decoded_option decoded;
15856 
15857 	      /* Value range check; only implemented for numeric and boolean
15858 		 options at the moment.  */
15859 	      generate_option (opt, NULL, value, CL_TARGET, &decoded);
15860 	      s390_handle_option (opts, new_opts_set, &decoded, input_location);
15861 	      set_option (opts, new_opts_set, opt, value,
15862 			  p + opt_len, DK_UNSPECIFIED, input_location,
15863 			  global_dc);
15864 	    }
15865 	  else
15866 	    {
15867 	      error ("attribute(target(\"%s\")) is unknown", orig_p);
15868 	      ret = false;
15869 	    }
15870 	}
15871 
15872       else if (cl_options[opt].var_type == CLVC_ENUM)
15873 	{
15874 	  bool arg_ok;
15875 	  int value;
15876 
15877 	  arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
15878 	  if (arg_ok)
15879 	    set_option (opts, new_opts_set, opt, value,
15880 			p + opt_len, DK_UNSPECIFIED, input_location,
15881 			global_dc);
15882 	  else
15883 	    {
15884 	      error ("attribute(target(\"%s\")) is unknown", orig_p);
15885 	      ret = false;
15886 	    }
15887 	}
15888 
15889       else
15890 	gcc_unreachable ();
15891     }
15892   return ret;
15893 }
15894 
15895 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
15896 
15897 tree
s390_valid_target_attribute_tree(tree args,struct gcc_options * opts,const struct gcc_options * opts_set,bool force_pragma)15898 s390_valid_target_attribute_tree (tree args,
15899 				  struct gcc_options *opts,
15900 				  const struct gcc_options *opts_set,
15901 				  bool force_pragma)
15902 {
15903   tree t = NULL_TREE;
15904   struct gcc_options new_opts_set;
15905 
15906   memset (&new_opts_set, 0, sizeof (new_opts_set));
15907 
15908   /* Process each of the options on the chain.  */
15909   if (! s390_valid_target_attribute_inner_p (args, opts, &new_opts_set,
15910 					     force_pragma))
15911     return error_mark_node;
15912 
15913   /* If some option was set (even if it has not changed), rerun
15914      s390_option_override_internal, and then save the options away.  */
15915   if (new_opts_set.x_target_flags
15916       || new_opts_set.x_s390_arch
15917       || new_opts_set.x_s390_tune
15918       || new_opts_set.x_s390_stack_guard
15919       || new_opts_set.x_s390_stack_size
15920       || new_opts_set.x_s390_branch_cost
15921       || new_opts_set.x_s390_warn_framesize
15922       || new_opts_set.x_s390_warn_dynamicstack_p)
15923     {
15924       const unsigned char *src = (const unsigned char *)opts_set;
15925       unsigned char *dest = (unsigned char *)&new_opts_set;
15926       unsigned int i;
15927 
15928       /* Merge the original option flags into the new ones.  */
15929       for (i = 0; i < sizeof(*opts_set); i++)
15930 	dest[i] |= src[i];
15931 
15932       /* Do any overrides, such as arch=xxx, or tune=xxx support.  */
15933       s390_option_override_internal (false, opts, &new_opts_set);
15934       /* Save the current options unless we are validating options for
15935 	 #pragma.  */
15936       t = build_target_option_node (opts);
15937     }
15938   return t;
15939 }
15940 
15941 /* Hook to validate attribute((target("string"))).  */
15942 
15943 static bool
s390_valid_target_attribute_p(tree fndecl,tree ARG_UNUSED (name),tree args,int ARG_UNUSED (flags))15944 s390_valid_target_attribute_p (tree fndecl,
15945 			       tree ARG_UNUSED (name),
15946 			       tree args,
15947 			       int ARG_UNUSED (flags))
15948 {
15949   struct gcc_options func_options;
15950   tree new_target, new_optimize;
15951   bool ret = true;
15952 
15953   /* attribute((target("default"))) does nothing, beyond
15954      affecting multi-versioning.  */
15955   if (TREE_VALUE (args)
15956       && TREE_CODE (TREE_VALUE (args)) == STRING_CST
15957       && TREE_CHAIN (args) == NULL_TREE
15958       && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
15959     return true;
15960 
15961   tree old_optimize = build_optimization_node (&global_options);
15962 
15963   /* Get the optimization options of the current function.  */
15964   tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
15965 
15966   if (!func_optimize)
15967     func_optimize = old_optimize;
15968 
15969   /* Init func_options.  */
15970   memset (&func_options, 0, sizeof (func_options));
15971   init_options_struct (&func_options, NULL);
15972   lang_hooks.init_options_struct (&func_options);
15973 
15974   cl_optimization_restore (&func_options, TREE_OPTIMIZATION (func_optimize));
15975 
15976   /* Initialize func_options to the default before its target options can
15977      be set.  */
15978   cl_target_option_restore (&func_options,
15979 			    TREE_TARGET_OPTION (target_option_default_node));
15980 
15981   new_target = s390_valid_target_attribute_tree (args, &func_options,
15982 						 &global_options_set,
15983 						 (args ==
15984 						  current_target_pragma));
15985   new_optimize = build_optimization_node (&func_options);
15986   if (new_target == error_mark_node)
15987     ret = false;
15988   else if (fndecl && new_target)
15989     {
15990       DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
15991       if (old_optimize != new_optimize)
15992 	DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
15993     }
15994   return ret;
15995 }
15996 
15997 /* Hook to determine if one function can safely inline another.  */
15998 
15999 static bool
s390_can_inline_p(tree caller,tree callee)16000 s390_can_inline_p (tree caller, tree callee)
16001 {
16002   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
16003   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
16004 
16005   if (!callee_tree)
16006     callee_tree = target_option_default_node;
16007   if (!caller_tree)
16008     caller_tree = target_option_default_node;
16009   if (callee_tree == caller_tree)
16010     return true;
16011 
16012   struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
16013   struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
16014   bool ret = true;
16015 
16016   if ((caller_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP))
16017       != (callee_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP)))
16018     ret = false;
16019 
16020   /* Don't inline functions to be compiled for a more recent arch into a
16021      function for an older arch.  */
16022   else if (caller_opts->x_s390_arch < callee_opts->x_s390_arch)
16023     ret = false;
16024 
16025   /* Inlining a hard float function into a soft float function is only
16026      allowed if the hard float function doesn't actually make use of
16027      floating point.
16028 
16029      We are called from FEs for multi-versioning call optimization, so
16030      beware of ipa_fn_summaries not available.  */
16031   else if (((TARGET_SOFT_FLOAT_P (caller_opts->x_target_flags)
16032 	     && !TARGET_SOFT_FLOAT_P (callee_opts->x_target_flags))
16033 	    || (!TARGET_HARD_DFP_P (caller_opts->x_target_flags)
16034 		&& TARGET_HARD_DFP_P (callee_opts->x_target_flags)))
16035 	   && (! ipa_fn_summaries
16036 	       || ipa_fn_summaries->get
16037 	       (cgraph_node::get (callee))->fp_expressions))
16038     ret = false;
16039 
16040   return ret;
16041 }
16042 
16043 /* Set VAL to correct enum value according to the indirect-branch or
16044    function-return attribute in ATTR.  */
16045 
16046 static inline void
s390_indirect_branch_attrvalue(tree attr,enum indirect_branch * val)16047 s390_indirect_branch_attrvalue (tree attr, enum indirect_branch *val)
16048 {
16049   const char *str = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
16050   if (strcmp (str, "keep") == 0)
16051     *val = indirect_branch_keep;
16052   else if (strcmp (str, "thunk") == 0)
16053     *val = indirect_branch_thunk;
16054   else if (strcmp (str, "thunk-inline") == 0)
16055     *val = indirect_branch_thunk_inline;
16056   else if (strcmp (str, "thunk-extern") == 0)
16057     *val = indirect_branch_thunk_extern;
16058 }
16059 
16060 /* Memorize the setting for -mindirect-branch* and -mfunction-return*
16061    from either the cmdline or the function attributes in
16062    cfun->machine.  */
16063 
16064 static void
s390_indirect_branch_settings(tree fndecl)16065 s390_indirect_branch_settings (tree fndecl)
16066 {
16067   tree attr;
16068 
16069   if (!fndecl)
16070     return;
16071 
16072   /* Initialize with the cmdline options and let the attributes
16073      override it.  */
16074   cfun->machine->indirect_branch_jump = s390_indirect_branch_jump;
16075   cfun->machine->indirect_branch_call = s390_indirect_branch_call;
16076 
16077   cfun->machine->function_return_reg = s390_function_return_reg;
16078   cfun->machine->function_return_mem = s390_function_return_mem;
16079 
16080   if ((attr = lookup_attribute ("indirect_branch",
16081 				DECL_ATTRIBUTES (fndecl))))
16082     {
16083       s390_indirect_branch_attrvalue (attr,
16084 				      &cfun->machine->indirect_branch_jump);
16085       s390_indirect_branch_attrvalue (attr,
16086 				      &cfun->machine->indirect_branch_call);
16087     }
16088 
16089   if ((attr = lookup_attribute ("indirect_branch_jump",
16090 				DECL_ATTRIBUTES (fndecl))))
16091     s390_indirect_branch_attrvalue (attr, &cfun->machine->indirect_branch_jump);
16092 
16093   if ((attr = lookup_attribute ("indirect_branch_call",
16094 				DECL_ATTRIBUTES (fndecl))))
16095     s390_indirect_branch_attrvalue (attr, &cfun->machine->indirect_branch_call);
16096 
16097   if ((attr = lookup_attribute ("function_return",
16098 				DECL_ATTRIBUTES (fndecl))))
16099     {
16100       s390_indirect_branch_attrvalue (attr,
16101 				      &cfun->machine->function_return_reg);
16102       s390_indirect_branch_attrvalue (attr,
16103 				      &cfun->machine->function_return_mem);
16104     }
16105 
16106   if ((attr = lookup_attribute ("function_return_reg",
16107 				DECL_ATTRIBUTES (fndecl))))
16108     s390_indirect_branch_attrvalue (attr, &cfun->machine->function_return_reg);
16109 
16110   if ((attr = lookup_attribute ("function_return_mem",
16111 				DECL_ATTRIBUTES (fndecl))))
16112     s390_indirect_branch_attrvalue (attr, &cfun->machine->function_return_mem);
16113 }
16114 
16115 /* Restore targets globals from NEW_TREE and invalidate s390_previous_fndecl
16116    cache.  */
16117 
16118 void
s390_activate_target_options(tree new_tree)16119 s390_activate_target_options (tree new_tree)
16120 {
16121   cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
16122   if (TREE_TARGET_GLOBALS (new_tree))
16123     restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
16124   else if (new_tree == target_option_default_node)
16125     restore_target_globals (&default_target_globals);
16126   else
16127     TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
16128   s390_previous_fndecl = NULL_TREE;
16129 }
16130 
16131 /* Establish appropriate back-end context for processing the function
16132    FNDECL.  The argument might be NULL to indicate processing at top
16133    level, outside of any function scope.  */
16134 static void
s390_set_current_function(tree fndecl)16135 s390_set_current_function (tree fndecl)
16136 {
16137   /* Only change the context if the function changes.  This hook is called
16138      several times in the course of compiling a function, and we don't want to
16139      slow things down too much or call target_reinit when it isn't safe.  */
16140   if (fndecl == s390_previous_fndecl)
16141     {
16142       s390_indirect_branch_settings (fndecl);
16143       return;
16144     }
16145 
16146   tree old_tree;
16147   if (s390_previous_fndecl == NULL_TREE)
16148     old_tree = target_option_current_node;
16149   else if (DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl))
16150     old_tree = DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl);
16151   else
16152     old_tree = target_option_default_node;
16153 
16154   if (fndecl == NULL_TREE)
16155     {
16156       if (old_tree != target_option_current_node)
16157 	s390_activate_target_options (target_option_current_node);
16158       return;
16159     }
16160 
16161   tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
16162   if (new_tree == NULL_TREE)
16163     new_tree = target_option_default_node;
16164 
16165   if (old_tree != new_tree)
16166     s390_activate_target_options (new_tree);
16167   s390_previous_fndecl = fndecl;
16168 
16169   s390_indirect_branch_settings (fndecl);
16170 }
16171 #endif
16172 
16173 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P.  */
16174 
16175 static bool
s390_use_by_pieces_infrastructure_p(unsigned HOST_WIDE_INT size,unsigned int align ATTRIBUTE_UNUSED,enum by_pieces_operation op ATTRIBUTE_UNUSED,bool speed_p ATTRIBUTE_UNUSED)16176 s390_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
16177 				     unsigned int align ATTRIBUTE_UNUSED,
16178 				     enum by_pieces_operation op ATTRIBUTE_UNUSED,
16179 				     bool speed_p ATTRIBUTE_UNUSED)
16180 {
16181   return (size == 1 || size == 2
16182 	  || size == 4 || (TARGET_ZARCH && size == 8));
16183 }
16184 
16185 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook.  */
16186 
16187 static void
s390_atomic_assign_expand_fenv(tree * hold,tree * clear,tree * update)16188 s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
16189 {
16190   tree sfpc = s390_builtin_decls[S390_BUILTIN_s390_sfpc];
16191   tree efpc = s390_builtin_decls[S390_BUILTIN_s390_efpc];
16192   tree call_efpc = build_call_expr (efpc, 0);
16193   tree fenv_var = create_tmp_var_raw (unsigned_type_node);
16194 
16195 #define FPC_EXCEPTION_MASK	 HOST_WIDE_INT_UC (0xf8000000)
16196 #define FPC_FLAGS_MASK		 HOST_WIDE_INT_UC (0x00f80000)
16197 #define FPC_DXC_MASK		 HOST_WIDE_INT_UC (0x0000ff00)
16198 #define FPC_EXCEPTION_MASK_SHIFT HOST_WIDE_INT_UC (24)
16199 #define FPC_FLAGS_SHIFT		 HOST_WIDE_INT_UC (16)
16200 #define FPC_DXC_SHIFT		 HOST_WIDE_INT_UC (8)
16201 
16202   /* Generates the equivalent of feholdexcept (&fenv_var)
16203 
16204      fenv_var = __builtin_s390_efpc ();
16205      __builtin_s390_sfpc (fenv_var & mask) */
16206   tree old_fpc = build2 (MODIFY_EXPR, unsigned_type_node, fenv_var, call_efpc);
16207   tree new_fpc =
16208     build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
16209 	    build_int_cst (unsigned_type_node,
16210 			   ~(FPC_DXC_MASK | FPC_FLAGS_MASK |
16211 			     FPC_EXCEPTION_MASK)));
16212   tree set_new_fpc = build_call_expr (sfpc, 1, new_fpc);
16213   *hold = build2 (COMPOUND_EXPR, void_type_node, old_fpc, set_new_fpc);
16214 
16215   /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT)
16216 
16217      __builtin_s390_sfpc (__builtin_s390_efpc () & mask) */
16218   new_fpc = build2 (BIT_AND_EXPR, unsigned_type_node, call_efpc,
16219 		    build_int_cst (unsigned_type_node,
16220 				   ~(FPC_DXC_MASK | FPC_FLAGS_MASK)));
16221   *clear = build_call_expr (sfpc, 1, new_fpc);
16222 
16223   /* Generates the equivalent of feupdateenv (fenv_var)
16224 
16225   old_fpc = __builtin_s390_efpc ();
16226   __builtin_s390_sfpc (fenv_var);
16227   __atomic_feraiseexcept ((old_fpc & FPC_FLAGS_MASK) >> FPC_FLAGS_SHIFT);  */
16228 
16229   old_fpc = create_tmp_var_raw (unsigned_type_node);
16230   tree store_old_fpc = build2 (MODIFY_EXPR, void_type_node,
16231 			       old_fpc, call_efpc);
16232 
16233   set_new_fpc = build_call_expr (sfpc, 1, fenv_var);
16234 
16235   tree raise_old_except = build2 (BIT_AND_EXPR, unsigned_type_node, old_fpc,
16236 				  build_int_cst (unsigned_type_node,
16237 						 FPC_FLAGS_MASK));
16238   raise_old_except = build2 (RSHIFT_EXPR, unsigned_type_node, raise_old_except,
16239 			     build_int_cst (unsigned_type_node,
16240 					    FPC_FLAGS_SHIFT));
16241   tree atomic_feraiseexcept
16242     = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
16243   raise_old_except = build_call_expr (atomic_feraiseexcept,
16244 				      1, raise_old_except);
16245 
16246   *update = build2 (COMPOUND_EXPR, void_type_node,
16247 		    build2 (COMPOUND_EXPR, void_type_node,
16248 			    store_old_fpc, set_new_fpc),
16249 		    raise_old_except);
16250 
16251 #undef FPC_EXCEPTION_MASK
16252 #undef FPC_FLAGS_MASK
16253 #undef FPC_DXC_MASK
16254 #undef FPC_EXCEPTION_MASK_SHIFT
16255 #undef FPC_FLAGS_SHIFT
16256 #undef FPC_DXC_SHIFT
16257 }
16258 
16259 /* Return the vector mode to be used for inner mode MODE when doing
16260    vectorization.  */
16261 static machine_mode
s390_preferred_simd_mode(scalar_mode mode)16262 s390_preferred_simd_mode (scalar_mode mode)
16263 {
16264   if (TARGET_VXE)
16265     switch (mode)
16266       {
16267       case E_SFmode:
16268 	return V4SFmode;
16269       default:;
16270       }
16271 
16272   if (TARGET_VX)
16273     switch (mode)
16274       {
16275       case E_DFmode:
16276 	return V2DFmode;
16277       case E_DImode:
16278 	return V2DImode;
16279       case E_SImode:
16280 	return V4SImode;
16281       case E_HImode:
16282 	return V8HImode;
16283       case E_QImode:
16284 	return V16QImode;
16285       default:;
16286       }
16287   return word_mode;
16288 }
16289 
16290 /* Our hardware does not require vectors to be strictly aligned.  */
16291 static bool
s390_support_vector_misalignment(machine_mode mode ATTRIBUTE_UNUSED,const_tree type ATTRIBUTE_UNUSED,int misalignment ATTRIBUTE_UNUSED,bool is_packed ATTRIBUTE_UNUSED)16292 s390_support_vector_misalignment (machine_mode mode ATTRIBUTE_UNUSED,
16293 				  const_tree type ATTRIBUTE_UNUSED,
16294 				  int misalignment ATTRIBUTE_UNUSED,
16295 				  bool is_packed ATTRIBUTE_UNUSED)
16296 {
16297   if (TARGET_VX)
16298     return true;
16299 
16300   return default_builtin_support_vector_misalignment (mode, type, misalignment,
16301 						      is_packed);
16302 }
16303 
16304 /* The vector ABI requires vector types to be aligned on an 8 byte
16305    boundary (our stack alignment).  However, we allow this to be
16306    overriden by the user, while this definitely breaks the ABI.  */
16307 static HOST_WIDE_INT
s390_vector_alignment(const_tree type)16308 s390_vector_alignment (const_tree type)
16309 {
16310   if (!TARGET_VX_ABI)
16311     return default_vector_alignment (type);
16312 
16313   if (TYPE_USER_ALIGN (type))
16314     return TYPE_ALIGN (type);
16315 
16316   return MIN (64, tree_to_shwi (TYPE_SIZE (type)));
16317 }
16318 
16319 /* Implement TARGET_CONSTANT_ALIGNMENT.  Alignment on even addresses for
16320    LARL instruction.  */
16321 
16322 static HOST_WIDE_INT
s390_constant_alignment(const_tree,HOST_WIDE_INT align)16323 s390_constant_alignment (const_tree, HOST_WIDE_INT align)
16324 {
16325   return MAX (align, 16);
16326 }
16327 
16328 #ifdef HAVE_AS_MACHINE_MACHINEMODE
16329 /* Implement TARGET_ASM_FILE_START.  */
16330 static void
s390_asm_file_start(void)16331 s390_asm_file_start (void)
16332 {
16333   default_file_start ();
16334   s390_asm_output_machine_for_arch (asm_out_file);
16335 }
16336 #endif
16337 
16338 /* Implement TARGET_ASM_FILE_END.  */
16339 static void
s390_asm_file_end(void)16340 s390_asm_file_end (void)
16341 {
16342 #ifdef HAVE_AS_GNU_ATTRIBUTE
16343   varpool_node *vnode;
16344   cgraph_node *cnode;
16345 
16346   FOR_EACH_VARIABLE (vnode)
16347     if (TREE_PUBLIC (vnode->decl))
16348       s390_check_type_for_vector_abi (TREE_TYPE (vnode->decl), false, false);
16349 
16350   FOR_EACH_FUNCTION (cnode)
16351     if (TREE_PUBLIC (cnode->decl))
16352       s390_check_type_for_vector_abi (TREE_TYPE (cnode->decl), false, false);
16353 
16354 
16355   if (s390_vector_abi != 0)
16356     fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
16357 	     s390_vector_abi);
16358 #endif
16359   file_end_indicate_exec_stack ();
16360 
16361   if (flag_split_stack)
16362     file_end_indicate_split_stack ();
16363 }
16364 
16365 /* Return true if TYPE is a vector bool type.  */
16366 static inline bool
s390_vector_bool_type_p(const_tree type)16367 s390_vector_bool_type_p (const_tree type)
16368 {
16369   return TYPE_VECTOR_OPAQUE (type);
16370 }
16371 
16372 /* Return the diagnostic message string if the binary operation OP is
16373    not permitted on TYPE1 and TYPE2, NULL otherwise.  */
16374 static const char*
s390_invalid_binary_op(int op ATTRIBUTE_UNUSED,const_tree type1,const_tree type2)16375 s390_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
16376 {
16377   bool bool1_p, bool2_p;
16378   bool plusminus_p;
16379   bool muldiv_p;
16380   bool compare_p;
16381   machine_mode mode1, mode2;
16382 
16383   if (!TARGET_ZVECTOR)
16384     return NULL;
16385 
16386   if (!VECTOR_TYPE_P (type1) || !VECTOR_TYPE_P (type2))
16387     return NULL;
16388 
16389   bool1_p = s390_vector_bool_type_p (type1);
16390   bool2_p = s390_vector_bool_type_p (type2);
16391 
16392   /* Mixing signed and unsigned types is forbidden for all
16393      operators.  */
16394   if (!bool1_p && !bool2_p
16395       && TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
16396     return N_("types differ in signedness");
16397 
16398   plusminus_p = (op == PLUS_EXPR || op == MINUS_EXPR);
16399   muldiv_p = (op == MULT_EXPR || op == RDIV_EXPR || op == TRUNC_DIV_EXPR
16400 	      || op == CEIL_DIV_EXPR || op == FLOOR_DIV_EXPR
16401 	      || op == ROUND_DIV_EXPR);
16402   compare_p = (op == LT_EXPR || op == LE_EXPR || op == GT_EXPR || op == GE_EXPR
16403 	       || op == EQ_EXPR || op == NE_EXPR);
16404 
16405   if (bool1_p && bool2_p && (plusminus_p || muldiv_p))
16406     return N_("binary operator does not support two vector bool operands");
16407 
16408   if (bool1_p != bool2_p && (muldiv_p || compare_p))
16409     return N_("binary operator does not support vector bool operand");
16410 
16411   mode1 = TYPE_MODE (type1);
16412   mode2 = TYPE_MODE (type2);
16413 
16414   if (bool1_p != bool2_p && plusminus_p
16415       && (GET_MODE_CLASS (mode1) == MODE_VECTOR_FLOAT
16416 	  || GET_MODE_CLASS (mode2) == MODE_VECTOR_FLOAT))
16417     return N_("binary operator does not support mixing vector "
16418 	      "bool with floating point vector operands");
16419 
16420   return NULL;
16421 }
16422 
16423 /* Implement TARGET_C_EXCESS_PRECISION.
16424 
16425    FIXME: For historical reasons, float_t and double_t are typedef'ed to
16426    double on s390, causing operations on float_t to operate in a higher
16427    precision than is necessary.  However, it is not the case that SFmode
16428    operations have implicit excess precision, and we generate more optimal
16429    code if we let the compiler know no implicit extra precision is added.
16430 
16431    That means when we are compiling with -fexcess-precision=fast, the value
16432    we set for FLT_EVAL_METHOD will be out of line with the actual precision of
16433    float_t (though they would be correct for -fexcess-precision=standard).
16434 
16435    A complete fix would modify glibc to remove the unnecessary typedef
16436    of float_t to double.  */
16437 
16438 static enum flt_eval_method
s390_excess_precision(enum excess_precision_type type)16439 s390_excess_precision (enum excess_precision_type type)
16440 {
16441   switch (type)
16442     {
16443       case EXCESS_PRECISION_TYPE_IMPLICIT:
16444       case EXCESS_PRECISION_TYPE_FAST:
16445 	/* The fastest type to promote to will always be the native type,
16446 	   whether that occurs with implicit excess precision or
16447 	   otherwise.  */
16448 	return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
16449       case EXCESS_PRECISION_TYPE_STANDARD:
16450 	/* Otherwise, when we are in a standards compliant mode, to
16451 	   ensure consistency with the implementation in glibc, report that
16452 	   float is evaluated to the range and precision of double.  */
16453 	return FLT_EVAL_METHOD_PROMOTE_TO_DOUBLE;
16454       default:
16455 	gcc_unreachable ();
16456     }
16457   return FLT_EVAL_METHOD_UNPREDICTABLE;
16458 }
16459 
16460 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
16461 
16462 static unsigned HOST_WIDE_INT
s390_asan_shadow_offset(void)16463 s390_asan_shadow_offset (void)
16464 {
16465   return TARGET_64BIT ? HOST_WIDE_INT_1U << 52 : HOST_WIDE_INT_UC (0x20000000);
16466 }
16467 
16468 #ifdef HAVE_GAS_HIDDEN
16469 # define USE_HIDDEN_LINKONCE 1
16470 #else
16471 # define USE_HIDDEN_LINKONCE 0
16472 #endif
16473 
16474 /* Output an indirect branch trampoline for target register REGNO.  */
16475 
16476 static void
s390_output_indirect_thunk_function(unsigned int regno,bool z10_p)16477 s390_output_indirect_thunk_function (unsigned int regno, bool z10_p)
16478 {
16479   tree decl;
16480   char thunk_label[32];
16481   int i;
16482 
16483   if (z10_p)
16484     sprintf (thunk_label, TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL, regno);
16485   else
16486     sprintf (thunk_label, TARGET_INDIRECT_BRANCH_THUNK_NAME_EX,
16487 	     INDIRECT_BRANCH_THUNK_REGNUM, regno);
16488 
16489   decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
16490 		     get_identifier (thunk_label),
16491 		     build_function_type_list (void_type_node, NULL_TREE));
16492   DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
16493 				   NULL_TREE, void_type_node);
16494   TREE_PUBLIC (decl) = 1;
16495   TREE_STATIC (decl) = 1;
16496   DECL_IGNORED_P (decl) = 1;
16497 
16498   if (USE_HIDDEN_LINKONCE)
16499     {
16500       cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
16501 
16502       targetm.asm_out.unique_section (decl, 0);
16503       switch_to_section (get_named_section (decl, NULL, 0));
16504 
16505       targetm.asm_out.globalize_label (asm_out_file, thunk_label);
16506       fputs ("\t.hidden\t", asm_out_file);
16507       assemble_name (asm_out_file, thunk_label);
16508       putc ('\n', asm_out_file);
16509       ASM_DECLARE_FUNCTION_NAME (asm_out_file, thunk_label, decl);
16510     }
16511   else
16512     {
16513       switch_to_section (text_section);
16514       ASM_OUTPUT_LABEL (asm_out_file, thunk_label);
16515     }
16516 
16517   DECL_INITIAL (decl) = make_node (BLOCK);
16518   current_function_decl = decl;
16519   allocate_struct_function (decl, false);
16520   init_function_start (decl);
16521   cfun->is_thunk = true;
16522   first_function_block_is_cold = false;
16523   final_start_function (emit_barrier (), asm_out_file, 1);
16524 
16525   /* This makes CFI at least usable for indirect jumps.
16526 
16527      Stopping in the thunk: backtrace will point to the thunk target
16528      is if it was interrupted by a signal.  For a call this means that
16529      the call chain will be: caller->callee->thunk   */
16530   if (flag_asynchronous_unwind_tables && flag_dwarf2_cfi_asm)
16531     {
16532       fputs ("\t.cfi_signal_frame\n", asm_out_file);
16533       fprintf (asm_out_file, "\t.cfi_return_column %d\n", regno);
16534       for (i = 0; i < FPR15_REGNUM; i++)
16535 	fprintf (asm_out_file, "\t.cfi_same_value %s\n", reg_names[i]);
16536     }
16537 
16538   if (z10_p)
16539     {
16540       /* exrl  0,1f  */
16541 
16542       /* We generate a thunk for z10 compiled code although z10 is
16543 	 currently not enabled.  Tell the assembler to accept the
16544 	 instruction.  */
16545       if (!TARGET_CPU_Z10)
16546 	{
16547 	  fputs ("\t.machine push\n", asm_out_file);
16548 	  fputs ("\t.machine z10\n", asm_out_file);
16549 	}
16550       /* We use exrl even if -mzarch hasn't been specified on the
16551 	 command line so we have to tell the assembler to accept
16552 	 it.  */
16553       if (!TARGET_ZARCH)
16554 	fputs ("\t.machinemode zarch\n", asm_out_file);
16555 
16556       fputs ("\texrl\t0,1f\n", asm_out_file);
16557 
16558       if (!TARGET_ZARCH)
16559 	fputs ("\t.machinemode esa\n", asm_out_file);
16560 
16561       if (!TARGET_CPU_Z10)
16562 	fputs ("\t.machine pop\n", asm_out_file);
16563     }
16564   else if (TARGET_CPU_ZARCH)
16565     {
16566       /* larl %r1,1f  */
16567       fprintf (asm_out_file, "\tlarl\t%%r%d,1f\n",
16568 	       INDIRECT_BRANCH_THUNK_REGNUM);
16569 
16570       /* ex 0,0(%r1)  */
16571       fprintf (asm_out_file, "\tex\t0,0(%%r%d)\n",
16572 	       INDIRECT_BRANCH_THUNK_REGNUM);
16573     }
16574   else
16575     gcc_unreachable ();
16576 
16577   /* 0:    j 0b  */
16578   fputs ("0:\tj\t0b\n", asm_out_file);
16579 
16580   /* 1:    br <regno>  */
16581   fprintf (asm_out_file, "1:\tbr\t%%r%d\n", regno);
16582 
16583   final_end_function ();
16584   init_insn_lengths ();
16585   free_after_compilation (cfun);
16586   set_cfun (NULL);
16587   current_function_decl = NULL;
16588 }
16589 
16590 /* Implement the asm.code_end target hook.  */
16591 
16592 static void
s390_code_end(void)16593 s390_code_end (void)
16594 {
16595   int i;
16596 
16597   for (i = 1; i < 16; i++)
16598     {
16599       if (indirect_branch_z10thunk_mask & (1 << i))
16600 	s390_output_indirect_thunk_function (i, true);
16601 
16602       if (indirect_branch_prez10thunk_mask & (1 << i))
16603 	s390_output_indirect_thunk_function (i, false);
16604     }
16605 
16606   if (TARGET_INDIRECT_BRANCH_TABLE)
16607     {
16608       int o;
16609       int i;
16610 
16611       for (o = 0; o < INDIRECT_BRANCH_NUM_OPTIONS; o++)
16612 	{
16613 	  if (indirect_branch_table_label_no[o] == 0)
16614 	    continue;
16615 
16616 	  switch_to_section (get_section (indirect_branch_table_name[o],
16617 					  0,
16618 					  NULL_TREE));
16619 	  for (i = 0; i < indirect_branch_table_label_no[o]; i++)
16620 	    {
16621 	      char label_start[32];
16622 
16623 	      ASM_GENERATE_INTERNAL_LABEL (label_start,
16624 					   indirect_branch_table_label[o], i);
16625 
16626 	      fputs ("\t.long\t", asm_out_file);
16627 	      assemble_name_raw (asm_out_file, label_start);
16628 	      fputs ("-.\n", asm_out_file);
16629 	    }
16630 	  switch_to_section (current_function_section ());
16631 	}
16632     }
16633 }
16634 
16635 /* Implement the TARGET_CASE_VALUES_THRESHOLD target hook.  */
16636 
16637 unsigned int
s390_case_values_threshold(void)16638 s390_case_values_threshold (void)
16639 {
16640   /* Disabling branch prediction for indirect jumps makes jump tables
16641      much more expensive.  */
16642   if (TARGET_INDIRECT_BRANCH_NOBP_JUMP)
16643     return 20;
16644 
16645   return default_case_values_threshold ();
16646 }
16647 
16648 /* Initialize GCC target structure.  */
16649 
16650 #undef  TARGET_ASM_ALIGNED_HI_OP
16651 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
16652 #undef  TARGET_ASM_ALIGNED_DI_OP
16653 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
16654 #undef  TARGET_ASM_INTEGER
16655 #define TARGET_ASM_INTEGER s390_assemble_integer
16656 
16657 #undef  TARGET_ASM_OPEN_PAREN
16658 #define TARGET_ASM_OPEN_PAREN ""
16659 
16660 #undef  TARGET_ASM_CLOSE_PAREN
16661 #define TARGET_ASM_CLOSE_PAREN ""
16662 
16663 #undef TARGET_OPTION_OVERRIDE
16664 #define TARGET_OPTION_OVERRIDE s390_option_override
16665 
16666 #ifdef TARGET_THREAD_SSP_OFFSET
16667 #undef TARGET_STACK_PROTECT_GUARD
16668 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
16669 #endif
16670 
16671 #undef	TARGET_ENCODE_SECTION_INFO
16672 #define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
16673 
16674 #undef TARGET_SCALAR_MODE_SUPPORTED_P
16675 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
16676 
16677 #ifdef HAVE_AS_TLS
16678 #undef TARGET_HAVE_TLS
16679 #define TARGET_HAVE_TLS true
16680 #endif
16681 #undef TARGET_CANNOT_FORCE_CONST_MEM
16682 #define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
16683 
16684 #undef TARGET_DELEGITIMIZE_ADDRESS
16685 #define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
16686 
16687 #undef TARGET_LEGITIMIZE_ADDRESS
16688 #define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
16689 
16690 #undef TARGET_RETURN_IN_MEMORY
16691 #define TARGET_RETURN_IN_MEMORY s390_return_in_memory
16692 
16693 #undef  TARGET_INIT_BUILTINS
16694 #define TARGET_INIT_BUILTINS s390_init_builtins
16695 #undef  TARGET_EXPAND_BUILTIN
16696 #define TARGET_EXPAND_BUILTIN s390_expand_builtin
16697 #undef  TARGET_BUILTIN_DECL
16698 #define TARGET_BUILTIN_DECL s390_builtin_decl
16699 
16700 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
16701 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
16702 
16703 #undef TARGET_ASM_OUTPUT_MI_THUNK
16704 #define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
16705 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
16706 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
16707 
16708 #undef TARGET_C_EXCESS_PRECISION
16709 #define TARGET_C_EXCESS_PRECISION s390_excess_precision
16710 
16711 #undef  TARGET_SCHED_ADJUST_PRIORITY
16712 #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
16713 #undef TARGET_SCHED_ISSUE_RATE
16714 #define TARGET_SCHED_ISSUE_RATE s390_issue_rate
16715 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
16716 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
16717 
16718 #undef TARGET_SCHED_VARIABLE_ISSUE
16719 #define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
16720 #undef TARGET_SCHED_REORDER
16721 #define TARGET_SCHED_REORDER s390_sched_reorder
16722 #undef TARGET_SCHED_INIT
16723 #define TARGET_SCHED_INIT s390_sched_init
16724 
16725 #undef TARGET_CANNOT_COPY_INSN_P
16726 #define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
16727 #undef TARGET_RTX_COSTS
16728 #define TARGET_RTX_COSTS s390_rtx_costs
16729 #undef TARGET_ADDRESS_COST
16730 #define TARGET_ADDRESS_COST s390_address_cost
16731 #undef TARGET_REGISTER_MOVE_COST
16732 #define TARGET_REGISTER_MOVE_COST s390_register_move_cost
16733 #undef TARGET_MEMORY_MOVE_COST
16734 #define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
16735 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
16736 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
16737   s390_builtin_vectorization_cost
16738 
16739 #undef TARGET_MACHINE_DEPENDENT_REORG
16740 #define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
16741 
16742 #undef TARGET_VALID_POINTER_MODE
16743 #define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
16744 
16745 #undef TARGET_BUILD_BUILTIN_VA_LIST
16746 #define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
16747 #undef TARGET_EXPAND_BUILTIN_VA_START
16748 #define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
16749 #undef TARGET_ASAN_SHADOW_OFFSET
16750 #define TARGET_ASAN_SHADOW_OFFSET s390_asan_shadow_offset
16751 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
16752 #define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
16753 
16754 #undef TARGET_PROMOTE_FUNCTION_MODE
16755 #define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
16756 #undef TARGET_PASS_BY_REFERENCE
16757 #define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
16758 
16759 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
16760 #define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
16761 #undef TARGET_FUNCTION_ARG
16762 #define TARGET_FUNCTION_ARG s390_function_arg
16763 #undef TARGET_FUNCTION_ARG_ADVANCE
16764 #define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
16765 #undef TARGET_FUNCTION_ARG_PADDING
16766 #define TARGET_FUNCTION_ARG_PADDING s390_function_arg_padding
16767 #undef TARGET_FUNCTION_VALUE
16768 #define TARGET_FUNCTION_VALUE s390_function_value
16769 #undef TARGET_LIBCALL_VALUE
16770 #define TARGET_LIBCALL_VALUE s390_libcall_value
16771 #undef TARGET_STRICT_ARGUMENT_NAMING
16772 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
16773 
16774 #undef TARGET_KEEP_LEAF_WHEN_PROFILED
16775 #define TARGET_KEEP_LEAF_WHEN_PROFILED s390_keep_leaf_when_profiled
16776 
16777 #undef TARGET_FIXED_CONDITION_CODE_REGS
16778 #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
16779 
16780 #undef TARGET_CC_MODES_COMPATIBLE
16781 #define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
16782 
16783 #undef TARGET_INVALID_WITHIN_DOLOOP
16784 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
16785 
16786 #ifdef HAVE_AS_TLS
16787 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
16788 #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
16789 #endif
16790 
16791 #undef TARGET_DWARF_FRAME_REG_MODE
16792 #define TARGET_DWARF_FRAME_REG_MODE s390_dwarf_frame_reg_mode
16793 
16794 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
16795 #undef TARGET_MANGLE_TYPE
16796 #define TARGET_MANGLE_TYPE s390_mangle_type
16797 #endif
16798 
16799 #undef TARGET_SCALAR_MODE_SUPPORTED_P
16800 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
16801 
16802 #undef TARGET_VECTOR_MODE_SUPPORTED_P
16803 #define TARGET_VECTOR_MODE_SUPPORTED_P s390_vector_mode_supported_p
16804 
16805 #undef  TARGET_PREFERRED_RELOAD_CLASS
16806 #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
16807 
16808 #undef TARGET_SECONDARY_RELOAD
16809 #define TARGET_SECONDARY_RELOAD s390_secondary_reload
16810 #undef TARGET_SECONDARY_MEMORY_NEEDED
16811 #define TARGET_SECONDARY_MEMORY_NEEDED s390_secondary_memory_needed
16812 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
16813 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE s390_secondary_memory_needed_mode
16814 
16815 #undef TARGET_LIBGCC_CMP_RETURN_MODE
16816 #define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
16817 
16818 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
16819 #define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
16820 
16821 #undef TARGET_LEGITIMATE_ADDRESS_P
16822 #define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
16823 
16824 #undef TARGET_LEGITIMATE_CONSTANT_P
16825 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
16826 
16827 #undef TARGET_LRA_P
16828 #define TARGET_LRA_P s390_lra_p
16829 
16830 #undef TARGET_CAN_ELIMINATE
16831 #define TARGET_CAN_ELIMINATE s390_can_eliminate
16832 
16833 #undef TARGET_CONDITIONAL_REGISTER_USAGE
16834 #define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
16835 
16836 #undef TARGET_LOOP_UNROLL_ADJUST
16837 #define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
16838 
16839 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
16840 #define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
16841 #undef TARGET_TRAMPOLINE_INIT
16842 #define TARGET_TRAMPOLINE_INIT s390_trampoline_init
16843 
16844 /* PR 79421 */
16845 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
16846 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
16847 
16848 #undef TARGET_UNWIND_WORD_MODE
16849 #define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
16850 
16851 #undef TARGET_CANONICALIZE_COMPARISON
16852 #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
16853 
16854 #undef TARGET_HARD_REGNO_SCRATCH_OK
16855 #define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok
16856 
16857 #undef TARGET_HARD_REGNO_NREGS
16858 #define TARGET_HARD_REGNO_NREGS s390_hard_regno_nregs
16859 #undef TARGET_HARD_REGNO_MODE_OK
16860 #define TARGET_HARD_REGNO_MODE_OK s390_hard_regno_mode_ok
16861 #undef TARGET_MODES_TIEABLE_P
16862 #define TARGET_MODES_TIEABLE_P s390_modes_tieable_p
16863 
16864 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
16865 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
16866   s390_hard_regno_call_part_clobbered
16867 
16868 #undef TARGET_ATTRIBUTE_TABLE
16869 #define TARGET_ATTRIBUTE_TABLE s390_attribute_table
16870 
16871 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
16872 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
16873 
16874 #undef TARGET_SET_UP_BY_PROLOGUE
16875 #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
16876 
16877 #undef TARGET_EXTRA_LIVE_ON_ENTRY
16878 #define TARGET_EXTRA_LIVE_ON_ENTRY s390_live_on_entry
16879 
16880 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
16881 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
16882   s390_use_by_pieces_infrastructure_p
16883 
16884 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
16885 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV s390_atomic_assign_expand_fenv
16886 
16887 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
16888 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN s390_invalid_arg_for_unprototyped_fn
16889 
16890 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
16891 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE s390_preferred_simd_mode
16892 
16893 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
16894 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT s390_support_vector_misalignment
16895 
16896 #undef TARGET_VECTOR_ALIGNMENT
16897 #define TARGET_VECTOR_ALIGNMENT s390_vector_alignment
16898 
16899 #undef TARGET_INVALID_BINARY_OP
16900 #define TARGET_INVALID_BINARY_OP s390_invalid_binary_op
16901 
16902 #ifdef HAVE_AS_MACHINE_MACHINEMODE
16903 #undef TARGET_ASM_FILE_START
16904 #define TARGET_ASM_FILE_START s390_asm_file_start
16905 #endif
16906 
16907 #undef TARGET_ASM_FILE_END
16908 #define TARGET_ASM_FILE_END s390_asm_file_end
16909 
16910 #if S390_USE_TARGET_ATTRIBUTE
16911 #undef TARGET_SET_CURRENT_FUNCTION
16912 #define TARGET_SET_CURRENT_FUNCTION s390_set_current_function
16913 
16914 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
16915 #define TARGET_OPTION_VALID_ATTRIBUTE_P s390_valid_target_attribute_p
16916 
16917 #undef TARGET_CAN_INLINE_P
16918 #define TARGET_CAN_INLINE_P s390_can_inline_p
16919 #endif
16920 
16921 #undef TARGET_OPTION_RESTORE
16922 #define TARGET_OPTION_RESTORE s390_function_specific_restore
16923 
16924 #undef TARGET_CAN_CHANGE_MODE_CLASS
16925 #define TARGET_CAN_CHANGE_MODE_CLASS s390_can_change_mode_class
16926 
16927 #undef TARGET_CONSTANT_ALIGNMENT
16928 #define TARGET_CONSTANT_ALIGNMENT s390_constant_alignment
16929 
16930 #undef TARGET_ASM_CODE_END
16931 #define TARGET_ASM_CODE_END s390_code_end
16932 
16933 #undef TARGET_CASE_VALUES_THRESHOLD
16934 #define TARGET_CASE_VALUES_THRESHOLD s390_case_values_threshold
16935 
16936 struct gcc_target targetm = TARGET_INITIALIZER;
16937 
16938 #include "gt-s390.h"
16939