1 /* Subroutines used for code generation on IBM S/390 and zSeries
2    Copyright (C) 1999-2020 Free Software Foundation, Inc.
3    Contributed by Hartmut Penner (hpenner@de.ibm.com) and
4                   Ulrich Weigand (uweigand@de.ibm.com) and
5                   Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
6 
7 This file is part of GCC.
8 
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13 
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17 for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3.  If not see
21 <http://www.gnu.org/licenses/>.  */
22 
23 #define IN_TARGET_CODE 1
24 
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "backend.h"
29 #include "target.h"
30 #include "target-globals.h"
31 #include "rtl.h"
32 #include "tree.h"
33 #include "gimple.h"
34 #include "cfghooks.h"
35 #include "cfgloop.h"
36 #include "df.h"
37 #include "memmodel.h"
38 #include "tm_p.h"
39 #include "stringpool.h"
40 #include "attribs.h"
41 #include "expmed.h"
42 #include "optabs.h"
43 #include "regs.h"
44 #include "emit-rtl.h"
45 #include "recog.h"
46 #include "cgraph.h"
47 #include "diagnostic-core.h"
48 #include "diagnostic.h"
49 #include "alias.h"
50 #include "fold-const.h"
51 #include "print-tree.h"
52 #include "stor-layout.h"
53 #include "varasm.h"
54 #include "calls.h"
55 #include "conditions.h"
56 #include "output.h"
57 #include "insn-attr.h"
58 #include "flags.h"
59 #include "except.h"
60 #include "dojump.h"
61 #include "explow.h"
62 #include "stmt.h"
63 #include "expr.h"
64 #include "reload.h"
65 #include "cfgrtl.h"
66 #include "cfganal.h"
67 #include "lcm.h"
68 #include "cfgbuild.h"
69 #include "cfgcleanup.h"
70 #include "debug.h"
71 #include "langhooks.h"
72 #include "internal-fn.h"
73 #include "gimple-fold.h"
74 #include "tree-eh.h"
75 #include "gimplify.h"
76 #include "opts.h"
77 #include "tree-pass.h"
78 #include "context.h"
79 #include "builtins.h"
80 #include "rtl-iter.h"
81 #include "intl.h"
82 #include "tm-constrs.h"
83 #include "tree-vrp.h"
84 #include "symbol-summary.h"
85 #include "ipa-prop.h"
86 #include "ipa-fnsummary.h"
87 #include "sched-int.h"
88 
89 /* This file should be included last.  */
90 #include "target-def.h"
91 
92 static bool s390_hard_regno_mode_ok (unsigned int, machine_mode);
93 
94 /* Remember the last target of s390_set_current_function.  */
95 static GTY(()) tree s390_previous_fndecl;
96 
97 /* Define the specific costs for a given cpu.  */
98 
99 struct processor_costs
100 {
101   /* multiplication */
102   const int m;        /* cost of an M instruction.  */
103   const int mghi;     /* cost of an MGHI instruction.  */
104   const int mh;       /* cost of an MH instruction.  */
105   const int mhi;      /* cost of an MHI instruction.  */
106   const int ml;       /* cost of an ML instruction.  */
107   const int mr;       /* cost of an MR instruction.  */
108   const int ms;       /* cost of an MS instruction.  */
109   const int msg;      /* cost of an MSG instruction.  */
110   const int msgf;     /* cost of an MSGF instruction.  */
111   const int msgfr;    /* cost of an MSGFR instruction.  */
112   const int msgr;     /* cost of an MSGR instruction.  */
113   const int msr;      /* cost of an MSR instruction.  */
114   const int mult_df;  /* cost of multiplication in DFmode.  */
115   const int mxbr;
116   /* square root */
117   const int sqxbr;    /* cost of square root in TFmode.  */
118   const int sqdbr;    /* cost of square root in DFmode.  */
119   const int sqebr;    /* cost of square root in SFmode.  */
120   /* multiply and add */
121   const int madbr;    /* cost of multiply and add in DFmode.  */
122   const int maebr;    /* cost of multiply and add in SFmode.  */
123   /* division */
124   const int dxbr;
125   const int ddbr;
126   const int debr;
127   const int dlgr;
128   const int dlr;
129   const int dr;
130   const int dsgfr;
131   const int dsgr;
132 };
133 
134 #define s390_cost ((const struct processor_costs *)(s390_cost_pointer))
135 
136 static const
137 struct processor_costs z900_cost =
138 {
139   COSTS_N_INSNS (5),     /* M     */
140   COSTS_N_INSNS (10),    /* MGHI  */
141   COSTS_N_INSNS (5),     /* MH    */
142   COSTS_N_INSNS (4),     /* MHI   */
143   COSTS_N_INSNS (5),     /* ML    */
144   COSTS_N_INSNS (5),     /* MR    */
145   COSTS_N_INSNS (4),     /* MS    */
146   COSTS_N_INSNS (15),    /* MSG   */
147   COSTS_N_INSNS (7),     /* MSGF  */
148   COSTS_N_INSNS (7),     /* MSGFR */
149   COSTS_N_INSNS (10),    /* MSGR  */
150   COSTS_N_INSNS (4),     /* MSR   */
151   COSTS_N_INSNS (7),     /* multiplication in DFmode */
152   COSTS_N_INSNS (13),    /* MXBR */
153   COSTS_N_INSNS (136),   /* SQXBR */
154   COSTS_N_INSNS (44),    /* SQDBR */
155   COSTS_N_INSNS (35),    /* SQEBR */
156   COSTS_N_INSNS (18),    /* MADBR */
157   COSTS_N_INSNS (13),    /* MAEBR */
158   COSTS_N_INSNS (134),   /* DXBR */
159   COSTS_N_INSNS (30),    /* DDBR */
160   COSTS_N_INSNS (27),    /* DEBR */
161   COSTS_N_INSNS (220),   /* DLGR */
162   COSTS_N_INSNS (34),    /* DLR */
163   COSTS_N_INSNS (34),    /* DR */
164   COSTS_N_INSNS (32),    /* DSGFR */
165   COSTS_N_INSNS (32),    /* DSGR */
166 };
167 
168 static const
169 struct processor_costs z990_cost =
170 {
171   COSTS_N_INSNS (4),     /* M     */
172   COSTS_N_INSNS (2),     /* MGHI  */
173   COSTS_N_INSNS (2),     /* MH    */
174   COSTS_N_INSNS (2),     /* MHI   */
175   COSTS_N_INSNS (4),     /* ML    */
176   COSTS_N_INSNS (4),     /* MR    */
177   COSTS_N_INSNS (5),     /* MS    */
178   COSTS_N_INSNS (6),     /* MSG   */
179   COSTS_N_INSNS (4),     /* MSGF  */
180   COSTS_N_INSNS (4),     /* MSGFR */
181   COSTS_N_INSNS (4),     /* MSGR  */
182   COSTS_N_INSNS (4),     /* MSR   */
183   COSTS_N_INSNS (1),     /* multiplication in DFmode */
184   COSTS_N_INSNS (28),    /* MXBR */
185   COSTS_N_INSNS (130),   /* SQXBR */
186   COSTS_N_INSNS (66),    /* SQDBR */
187   COSTS_N_INSNS (38),    /* SQEBR */
188   COSTS_N_INSNS (1),     /* MADBR */
189   COSTS_N_INSNS (1),     /* MAEBR */
190   COSTS_N_INSNS (60),    /* DXBR */
191   COSTS_N_INSNS (40),    /* DDBR */
192   COSTS_N_INSNS (26),    /* DEBR */
193   COSTS_N_INSNS (176),   /* DLGR */
194   COSTS_N_INSNS (31),    /* DLR */
195   COSTS_N_INSNS (31),    /* DR */
196   COSTS_N_INSNS (31),    /* DSGFR */
197   COSTS_N_INSNS (31),    /* DSGR */
198 };
199 
200 static const
201 struct processor_costs z9_109_cost =
202 {
203   COSTS_N_INSNS (4),     /* M     */
204   COSTS_N_INSNS (2),     /* MGHI  */
205   COSTS_N_INSNS (2),     /* MH    */
206   COSTS_N_INSNS (2),     /* MHI   */
207   COSTS_N_INSNS (4),     /* ML    */
208   COSTS_N_INSNS (4),     /* MR    */
209   COSTS_N_INSNS (5),     /* MS    */
210   COSTS_N_INSNS (6),     /* MSG   */
211   COSTS_N_INSNS (4),     /* MSGF  */
212   COSTS_N_INSNS (4),     /* MSGFR */
213   COSTS_N_INSNS (4),     /* MSGR  */
214   COSTS_N_INSNS (4),     /* MSR   */
215   COSTS_N_INSNS (1),     /* multiplication in DFmode */
216   COSTS_N_INSNS (28),    /* MXBR */
217   COSTS_N_INSNS (130),   /* SQXBR */
218   COSTS_N_INSNS (66),    /* SQDBR */
219   COSTS_N_INSNS (38),    /* SQEBR */
220   COSTS_N_INSNS (1),     /* MADBR */
221   COSTS_N_INSNS (1),     /* MAEBR */
222   COSTS_N_INSNS (60),    /* DXBR */
223   COSTS_N_INSNS (40),    /* DDBR */
224   COSTS_N_INSNS (26),    /* DEBR */
225   COSTS_N_INSNS (30),    /* DLGR */
226   COSTS_N_INSNS (23),    /* DLR */
227   COSTS_N_INSNS (23),    /* DR */
228   COSTS_N_INSNS (24),    /* DSGFR */
229   COSTS_N_INSNS (24),    /* DSGR */
230 };
231 
232 static const
233 struct processor_costs z10_cost =
234 {
235   COSTS_N_INSNS (10),    /* M     */
236   COSTS_N_INSNS (10),    /* MGHI  */
237   COSTS_N_INSNS (10),    /* MH    */
238   COSTS_N_INSNS (10),    /* MHI   */
239   COSTS_N_INSNS (10),    /* ML    */
240   COSTS_N_INSNS (10),    /* MR    */
241   COSTS_N_INSNS (10),    /* MS    */
242   COSTS_N_INSNS (10),    /* MSG   */
243   COSTS_N_INSNS (10),    /* MSGF  */
244   COSTS_N_INSNS (10),    /* MSGFR */
245   COSTS_N_INSNS (10),    /* MSGR  */
246   COSTS_N_INSNS (10),    /* MSR   */
247   COSTS_N_INSNS (1) ,    /* multiplication in DFmode */
248   COSTS_N_INSNS (50),    /* MXBR */
249   COSTS_N_INSNS (120),   /* SQXBR */
250   COSTS_N_INSNS (52),    /* SQDBR */
251   COSTS_N_INSNS (38),    /* SQEBR */
252   COSTS_N_INSNS (1),     /* MADBR */
253   COSTS_N_INSNS (1),     /* MAEBR */
254   COSTS_N_INSNS (111),   /* DXBR */
255   COSTS_N_INSNS (39),    /* DDBR */
256   COSTS_N_INSNS (32),    /* DEBR */
257   COSTS_N_INSNS (160),   /* DLGR */
258   COSTS_N_INSNS (71),    /* DLR */
259   COSTS_N_INSNS (71),    /* DR */
260   COSTS_N_INSNS (71),    /* DSGFR */
261   COSTS_N_INSNS (71),    /* DSGR */
262 };
263 
264 static const
265 struct processor_costs z196_cost =
266 {
267   COSTS_N_INSNS (7),     /* M     */
268   COSTS_N_INSNS (5),     /* MGHI  */
269   COSTS_N_INSNS (5),     /* MH    */
270   COSTS_N_INSNS (5),     /* MHI   */
271   COSTS_N_INSNS (7),     /* ML    */
272   COSTS_N_INSNS (7),     /* MR    */
273   COSTS_N_INSNS (6),     /* MS    */
274   COSTS_N_INSNS (8),     /* MSG   */
275   COSTS_N_INSNS (6),     /* MSGF  */
276   COSTS_N_INSNS (6),     /* MSGFR */
277   COSTS_N_INSNS (8),     /* MSGR  */
278   COSTS_N_INSNS (6),     /* MSR   */
279   COSTS_N_INSNS (1) ,    /* multiplication in DFmode */
280   COSTS_N_INSNS (40),    /* MXBR B+40 */
281   COSTS_N_INSNS (100),   /* SQXBR B+100 */
282   COSTS_N_INSNS (42),    /* SQDBR B+42 */
283   COSTS_N_INSNS (28),    /* SQEBR B+28 */
284   COSTS_N_INSNS (1),     /* MADBR B */
285   COSTS_N_INSNS (1),     /* MAEBR B */
286   COSTS_N_INSNS (101),   /* DXBR B+101 */
287   COSTS_N_INSNS (29),    /* DDBR */
288   COSTS_N_INSNS (22),    /* DEBR */
289   COSTS_N_INSNS (160),   /* DLGR cracked */
290   COSTS_N_INSNS (160),   /* DLR cracked */
291   COSTS_N_INSNS (160),   /* DR expanded */
292   COSTS_N_INSNS (160),   /* DSGFR cracked */
293   COSTS_N_INSNS (160),   /* DSGR cracked */
294 };
295 
296 static const
297 struct processor_costs zEC12_cost =
298 {
299   COSTS_N_INSNS (7),     /* M     */
300   COSTS_N_INSNS (5),     /* MGHI  */
301   COSTS_N_INSNS (5),     /* MH    */
302   COSTS_N_INSNS (5),     /* MHI   */
303   COSTS_N_INSNS (7),     /* ML    */
304   COSTS_N_INSNS (7),     /* MR    */
305   COSTS_N_INSNS (6),     /* MS    */
306   COSTS_N_INSNS (8),     /* MSG   */
307   COSTS_N_INSNS (6),     /* MSGF  */
308   COSTS_N_INSNS (6),     /* MSGFR */
309   COSTS_N_INSNS (8),     /* MSGR  */
310   COSTS_N_INSNS (6),     /* MSR   */
311   COSTS_N_INSNS (1) ,    /* multiplication in DFmode */
312   COSTS_N_INSNS (40),    /* MXBR B+40 */
313   COSTS_N_INSNS (100),   /* SQXBR B+100 */
314   COSTS_N_INSNS (42),    /* SQDBR B+42 */
315   COSTS_N_INSNS (28),    /* SQEBR B+28 */
316   COSTS_N_INSNS (1),     /* MADBR B */
317   COSTS_N_INSNS (1),     /* MAEBR B */
318   COSTS_N_INSNS (131),   /* DXBR B+131 */
319   COSTS_N_INSNS (29),    /* DDBR */
320   COSTS_N_INSNS (22),    /* DEBR */
321   COSTS_N_INSNS (160),   /* DLGR cracked */
322   COSTS_N_INSNS (160),   /* DLR cracked */
323   COSTS_N_INSNS (160),   /* DR expanded */
324   COSTS_N_INSNS (160),   /* DSGFR cracked */
325   COSTS_N_INSNS (160),   /* DSGR cracked */
326 };
327 
328 const struct s390_processor processor_table[] =
329 {
330   { "z900",   "z900",   PROCESSOR_2064_Z900,   &z900_cost,   5  },
331   { "z990",   "z990",   PROCESSOR_2084_Z990,   &z990_cost,   6  },
332   { "z9-109", "z9-109", PROCESSOR_2094_Z9_109, &z9_109_cost, 7  },
333   { "z9-ec",  "z9-ec",  PROCESSOR_2094_Z9_EC,  &z9_109_cost, 7  },
334   { "z10",    "z10",    PROCESSOR_2097_Z10,    &z10_cost,    8  },
335   { "z196",   "z196",   PROCESSOR_2817_Z196,   &z196_cost,   9  },
336   { "zEC12",  "zEC12",  PROCESSOR_2827_ZEC12,  &zEC12_cost,  10 },
337   { "z13",    "z13",    PROCESSOR_2964_Z13,    &zEC12_cost,  11 },
338   { "z14",    "arch12", PROCESSOR_3906_Z14,    &zEC12_cost,  12 },
339   { "z15",    "arch13", PROCESSOR_8561_Z15,    &zEC12_cost,  13 },
340   { "native", "",       PROCESSOR_NATIVE,      NULL,         0  }
341 };
342 
343 extern int reload_completed;
344 
345 /* Kept up to date using the SCHED_VARIABLE_ISSUE hook.  */
346 static rtx_insn *last_scheduled_insn;
347 #define NUM_SIDES 2
348 
349 #define MAX_SCHED_UNITS 4
350 static int last_scheduled_unit_distance[MAX_SCHED_UNITS][NUM_SIDES];
351 
352 /* Estimate of number of cycles a long-running insn occupies an
353    execution unit.  */
354 static int fxd_longrunning[NUM_SIDES];
355 static int fpd_longrunning[NUM_SIDES];
356 
357 /* The maximum score added for an instruction whose unit hasn't been
358    in use for MAX_SCHED_MIX_DISTANCE steps.  Increase this value to
359    give instruction mix scheduling more priority over instruction
360    grouping.  */
361 #define MAX_SCHED_MIX_SCORE      2
362 
363 /* The maximum distance up to which individual scores will be
364    calculated.  Everything beyond this gives MAX_SCHED_MIX_SCORE.
365    Increase this with the OOO windows size of the machine.  */
366 #define MAX_SCHED_MIX_DISTANCE 70
367 
368 /* Structure used to hold the components of a S/390 memory
369    address.  A legitimate address on S/390 is of the general
370    form
371           base + index + displacement
372    where any of the components is optional.
373 
374    base and index are registers of the class ADDR_REGS,
375    displacement is an unsigned 12-bit immediate constant.  */
376 
377 /* The max number of insns of backend generated memset/memcpy/memcmp
378    loops.  This value is used in the unroll adjust hook to detect such
379    loops.  Current max is 9 coming from the memcmp loop.  */
380 #define BLOCK_MEM_OPS_LOOP_INSNS 9
381 
382 struct s390_address
383 {
384   rtx base;
385   rtx indx;
386   rtx disp;
387   bool pointer;
388   bool literal_pool;
389 };
390 
391 /* Few accessor macros for struct cfun->machine->s390_frame_layout.  */
392 
393 #define cfun_frame_layout (cfun->machine->frame_layout)
394 #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
395 #define cfun_save_arg_fprs_p (!!(TARGET_64BIT				\
396 				 ? cfun_frame_layout.fpr_bitmap & 0x0f	\
397 				 : cfun_frame_layout.fpr_bitmap & 0x03))
398 #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
399   cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
400 #define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |=    \
401   (1 << (REGNO - FPR0_REGNUM)))
402 #define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap &    \
403   (1 << (REGNO - FPR0_REGNUM))))
404 #define cfun_gpr_save_slot(REGNO) \
405   cfun->machine->frame_layout.gpr_save_slots[REGNO]
406 
407 /* Number of GPRs and FPRs used for argument passing.  */
408 #define GP_ARG_NUM_REG 5
409 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
410 #define VEC_ARG_NUM_REG 8
411 
412 /* A couple of shortcuts.  */
413 #define CONST_OK_FOR_J(x) \
414 	CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
415 #define CONST_OK_FOR_K(x) \
416 	CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
417 #define CONST_OK_FOR_Os(x) \
418 	CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
419 #define CONST_OK_FOR_Op(x) \
420 	CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
421 #define CONST_OK_FOR_On(x) \
422 	CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
423 
424 #define REGNO_PAIR_OK(REGNO, MODE)                               \
425   (s390_hard_regno_nregs ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
426 
427 /* That's the read ahead of the dynamic branch prediction unit in
428    bytes on a z10 (or higher) CPU.  */
429 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
430 
431 /* Masks per jump target register indicating which thunk need to be
432    generated.  */
433 static GTY(()) int indirect_branch_prez10thunk_mask = 0;
434 static GTY(()) int indirect_branch_z10thunk_mask = 0;
435 
436 #define INDIRECT_BRANCH_NUM_OPTIONS 4
437 
438 enum s390_indirect_branch_option
439   {
440     s390_opt_indirect_branch_jump = 0,
441     s390_opt_indirect_branch_call,
442     s390_opt_function_return_reg,
443     s390_opt_function_return_mem
444   };
445 
446 static GTY(()) int indirect_branch_table_label_no[INDIRECT_BRANCH_NUM_OPTIONS] = { 0 };
447 const char *indirect_branch_table_label[INDIRECT_BRANCH_NUM_OPTIONS] = \
448   { "LJUMP", "LCALL", "LRETREG", "LRETMEM" };
449 const char *indirect_branch_table_name[INDIRECT_BRANCH_NUM_OPTIONS] =	\
450   { ".s390_indirect_jump", ".s390_indirect_call",
451     ".s390_return_reg", ".s390_return_mem" };
452 
453 bool
s390_return_addr_from_memory()454 s390_return_addr_from_memory ()
455 {
456   return cfun_gpr_save_slot(RETURN_REGNUM) == SAVE_SLOT_STACK;
457 }
458 
459 /* Indicate which ABI has been used for passing vector args.
460    0 - no vector type arguments have been passed where the ABI is relevant
461    1 - the old ABI has been used
462    2 - a vector type argument has been passed either in a vector register
463        or on the stack by value  */
464 static int s390_vector_abi = 0;
465 
466 /* Set the vector ABI marker if TYPE is subject to the vector ABI
467    switch.  The vector ABI affects only vector data types.  There are
468    two aspects of the vector ABI relevant here:
469 
470    1. vectors >= 16 bytes have an alignment of 8 bytes with the new
471    ABI and natural alignment with the old.
472 
473    2. vector <= 16 bytes are passed in VRs or by value on the stack
474    with the new ABI but by reference on the stack with the old.
475 
476    If ARG_P is true TYPE is used for a function argument or return
477    value.  The ABI marker then is set for all vector data types.  If
478    ARG_P is false only type 1 vectors are being checked.  */
479 
480 static void
s390_check_type_for_vector_abi(const_tree type,bool arg_p,bool in_struct_p)481 s390_check_type_for_vector_abi (const_tree type, bool arg_p, bool in_struct_p)
482 {
483   static hash_set<const_tree> visited_types_hash;
484 
485   if (s390_vector_abi)
486     return;
487 
488   if (type == NULL_TREE || TREE_CODE (type) == ERROR_MARK)
489     return;
490 
491   if (visited_types_hash.contains (type))
492     return;
493 
494   visited_types_hash.add (type);
495 
496   if (VECTOR_TYPE_P (type))
497     {
498       int type_size = int_size_in_bytes (type);
499 
500       /* Outside arguments only the alignment is changing and this
501 	 only happens for vector types >= 16 bytes.  */
502       if (!arg_p && type_size < 16)
503 	return;
504 
505       /* In arguments vector types > 16 are passed as before (GCC
506 	 never enforced the bigger alignment for arguments which was
507 	 required by the old vector ABI).  However, it might still be
508 	 ABI relevant due to the changed alignment if it is a struct
509 	 member.  */
510       if (arg_p && type_size > 16 && !in_struct_p)
511 	return;
512 
513       s390_vector_abi = TARGET_VX_ABI ? 2 : 1;
514     }
515   else if (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE)
516     {
517       /* ARRAY_TYPE: Since with neither of the ABIs we have more than
518 	 natural alignment there will never be ABI dependent padding
519 	 in an array type.  That's why we do not set in_struct_p to
520 	 true here.  */
521       s390_check_type_for_vector_abi (TREE_TYPE (type), arg_p, in_struct_p);
522     }
523   else if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
524     {
525       tree arg_chain;
526 
527       /* Check the return type.  */
528       s390_check_type_for_vector_abi (TREE_TYPE (type), true, false);
529 
530       for (arg_chain = TYPE_ARG_TYPES (type);
531 	   arg_chain;
532 	   arg_chain = TREE_CHAIN (arg_chain))
533 	s390_check_type_for_vector_abi (TREE_VALUE (arg_chain), true, false);
534     }
535   else if (RECORD_OR_UNION_TYPE_P (type))
536     {
537       tree field;
538 
539       for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
540 	{
541 	  if (TREE_CODE (field) != FIELD_DECL)
542 	    continue;
543 
544 	  s390_check_type_for_vector_abi (TREE_TYPE (field), arg_p, true);
545 	}
546     }
547 }
548 
549 
550 /* System z builtins.  */
551 
552 #include "s390-builtins.h"
553 
554 const unsigned int bflags_builtin[S390_BUILTIN_MAX + 1] =
555   {
556 #undef B_DEF
557 #undef OB_DEF
558 #undef OB_DEF_VAR
559 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, ...) BFLAGS,
560 #define OB_DEF(...)
561 #define OB_DEF_VAR(...)
562 #include "s390-builtins.def"
563     0
564   };
565 
566 const unsigned int opflags_builtin[S390_BUILTIN_MAX + 1] =
567   {
568 #undef B_DEF
569 #undef OB_DEF
570 #undef OB_DEF_VAR
571 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, ...) OPFLAGS,
572 #define OB_DEF(...)
573 #define OB_DEF_VAR(...)
574 #include "s390-builtins.def"
575     0
576   };
577 
578 const unsigned int bflags_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1] =
579   {
580 #undef B_DEF
581 #undef OB_DEF
582 #undef OB_DEF_VAR
583 #define B_DEF(...)
584 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, ...) BFLAGS,
585 #define OB_DEF_VAR(...)
586 #include "s390-builtins.def"
587     0
588   };
589 
590 const unsigned int
591 bflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
592   {
593 #undef B_DEF
594 #undef OB_DEF
595 #undef OB_DEF_VAR
596 #define B_DEF(...)
597 #define OB_DEF(...)
598 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) FLAGS,
599 #include "s390-builtins.def"
600     0
601   };
602 
603 const unsigned int
604 opflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
605   {
606 #undef B_DEF
607 #undef OB_DEF
608 #undef OB_DEF_VAR
609 #define B_DEF(...)
610 #define OB_DEF(...)
611 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) OPFLAGS,
612 #include "s390-builtins.def"
613     0
614   };
615 
616 tree s390_builtin_types[BT_MAX];
617 tree s390_builtin_fn_types[BT_FN_MAX];
618 tree s390_builtin_decls[S390_BUILTIN_MAX +
619 			S390_OVERLOADED_BUILTIN_MAX +
620 			S390_OVERLOADED_BUILTIN_VAR_MAX];
621 
622 static enum insn_code const code_for_builtin[S390_BUILTIN_MAX + 1] = {
623 #undef B_DEF
624 #undef OB_DEF
625 #undef OB_DEF_VAR
626 #define B_DEF(NAME, PATTERN, ...) CODE_FOR_##PATTERN,
627 #define OB_DEF(...)
628 #define OB_DEF_VAR(...)
629 
630 #include "s390-builtins.def"
631   CODE_FOR_nothing
632 };
633 
634 static void
s390_init_builtins(void)635 s390_init_builtins (void)
636 {
637   /* These definitions are being used in s390-builtins.def.  */
638   tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"),
639 				       NULL, NULL);
640   tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL);
641   tree c_uint64_type_node;
642 
643   /* The uint64_type_node from tree.c is not compatible to the C99
644      uint64_t data type.  What we want is c_uint64_type_node from
645      c-common.c.  But since backend code is not supposed to interface
646      with the frontend we recreate it here.  */
647   if (TARGET_64BIT)
648     c_uint64_type_node = long_unsigned_type_node;
649   else
650     c_uint64_type_node = long_long_unsigned_type_node;
651 
652 #undef DEF_TYPE
653 #define DEF_TYPE(INDEX, NODE, CONST_P)			\
654   if (s390_builtin_types[INDEX] == NULL)		\
655     s390_builtin_types[INDEX] = (!CONST_P) ?		\
656       (NODE) : build_type_variant ((NODE), 1, 0);
657 
658 #undef DEF_POINTER_TYPE
659 #define DEF_POINTER_TYPE(INDEX, INDEX_BASE)				\
660   if (s390_builtin_types[INDEX] == NULL)				\
661     s390_builtin_types[INDEX] =						\
662       build_pointer_type (s390_builtin_types[INDEX_BASE]);
663 
664 #undef DEF_DISTINCT_TYPE
665 #define DEF_DISTINCT_TYPE(INDEX, INDEX_BASE)				\
666   if (s390_builtin_types[INDEX] == NULL)				\
667     s390_builtin_types[INDEX] =						\
668       build_distinct_type_copy (s390_builtin_types[INDEX_BASE]);
669 
670 #undef DEF_VECTOR_TYPE
671 #define DEF_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS)			\
672   if (s390_builtin_types[INDEX] == NULL)				\
673     s390_builtin_types[INDEX] =						\
674       build_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
675 
676 #undef DEF_OPAQUE_VECTOR_TYPE
677 #define DEF_OPAQUE_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS)		\
678   if (s390_builtin_types[INDEX] == NULL)				\
679     s390_builtin_types[INDEX] =						\
680       build_opaque_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
681 
682 #undef DEF_FN_TYPE
683 #define DEF_FN_TYPE(INDEX, args...)				\
684   if (s390_builtin_fn_types[INDEX] == NULL)			\
685     s390_builtin_fn_types[INDEX] =				\
686       build_function_type_list (args, NULL_TREE);
687 #undef DEF_OV_TYPE
688 #define DEF_OV_TYPE(...)
689 #include "s390-builtin-types.def"
690 
691 #undef B_DEF
692 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, FNTYPE)		\
693   if (s390_builtin_decls[S390_BUILTIN_##NAME] == NULL)			\
694     s390_builtin_decls[S390_BUILTIN_##NAME] =				\
695       add_builtin_function ("__builtin_" #NAME,				\
696 			    s390_builtin_fn_types[FNTYPE],		\
697 			    S390_BUILTIN_##NAME,			\
698 			    BUILT_IN_MD,				\
699 			    NULL,					\
700 			    ATTRS);
701 #undef OB_DEF
702 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, FNTYPE)	\
703   if (s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] \
704       == NULL)								\
705     s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \
706       add_builtin_function ("__builtin_" #NAME,				\
707 			    s390_builtin_fn_types[FNTYPE],		\
708 			    S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX, \
709 			    BUILT_IN_MD,				\
710 			    NULL,					\
711 			    0);
712 #undef OB_DEF_VAR
713 #define OB_DEF_VAR(...)
714 #include "s390-builtins.def"
715 
716 }
717 
718 /* Return true if ARG is appropriate as argument number ARGNUM of
719    builtin DECL.  The operand flags from s390-builtins.def have to
720    passed as OP_FLAGS.  */
721 bool
s390_const_operand_ok(tree arg,int argnum,int op_flags,tree decl)722 s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl)
723 {
724   if (O_UIMM_P (op_flags))
725     {
726       int bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32 };
727       int bitwidth = bitwidths[op_flags - O_U1];
728 
729       if (!tree_fits_uhwi_p (arg)
730 	  || tree_to_uhwi (arg) > (HOST_WIDE_INT_1U << bitwidth) - 1)
731 	{
732 	  error ("constant argument %d for builtin %qF is out of range "
733 		 "(0..%wu)", argnum, decl,
734 		 (HOST_WIDE_INT_1U << bitwidth) - 1);
735 	  return false;
736 	}
737     }
738 
739   if (O_SIMM_P (op_flags))
740     {
741       int bitwidths[] = { 2, 3, 4, 5, 8, 12, 16, 32 };
742       int bitwidth = bitwidths[op_flags - O_S2];
743 
744       if (!tree_fits_shwi_p (arg)
745 	  || tree_to_shwi (arg) < -(HOST_WIDE_INT_1 << (bitwidth - 1))
746 	  || tree_to_shwi (arg) > ((HOST_WIDE_INT_1 << (bitwidth - 1)) - 1))
747 	{
748 	  error ("constant argument %d for builtin %qF is out of range "
749 		 "(%wd..%wd)", argnum, decl,
750 		 -(HOST_WIDE_INT_1 << (bitwidth - 1)),
751 		 (HOST_WIDE_INT_1 << (bitwidth - 1)) - 1);
752 	  return false;
753 	}
754     }
755   return true;
756 }
757 
758 /* Expand an expression EXP that calls a built-in function,
759    with result going to TARGET if that's convenient
760    (and in mode MODE if that's convenient).
761    SUBTARGET may be used as the target for computing one of EXP's operands.
762    IGNORE is nonzero if the value is to be ignored.  */
763 
764 static rtx
s390_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,int ignore ATTRIBUTE_UNUSED)765 s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
766 		     machine_mode mode ATTRIBUTE_UNUSED,
767 		     int ignore ATTRIBUTE_UNUSED)
768 {
769 #define MAX_ARGS 6
770 
771   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
772   unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
773   enum insn_code icode;
774   rtx op[MAX_ARGS], pat;
775   int arity;
776   bool nonvoid;
777   tree arg;
778   call_expr_arg_iterator iter;
779   unsigned int all_op_flags = opflags_for_builtin (fcode);
780   machine_mode last_vec_mode = VOIDmode;
781 
782   if (TARGET_DEBUG_ARG)
783     {
784       fprintf (stderr,
785 	       "s390_expand_builtin, code = %4d, %s, bflags = 0x%x\n",
786 	       (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl)),
787 	       bflags_for_builtin (fcode));
788     }
789 
790   if (S390_USE_TARGET_ATTRIBUTE)
791     {
792       unsigned int bflags;
793 
794       bflags = bflags_for_builtin (fcode);
795       if ((bflags & B_HTM) && !TARGET_HTM)
796 	{
797 	  error ("builtin %qF is not supported without %<-mhtm%> "
798 		 "(default with %<-march=zEC12%> and higher).", fndecl);
799 	  return const0_rtx;
800 	}
801       if (((bflags & B_VX) || (bflags & B_VXE)) && !TARGET_VX)
802 	{
803 	  error ("builtin %qF requires %<-mvx%> "
804 		 "(default with %<-march=z13%> and higher).", fndecl);
805 	  return const0_rtx;
806 	}
807 
808       if ((bflags & B_VXE) && !TARGET_VXE)
809 	{
810 	  error ("Builtin %qF requires z14 or higher.", fndecl);
811 	  return const0_rtx;
812 	}
813 
814       if ((bflags & B_VXE2) && !TARGET_VXE2)
815 	{
816 	  error ("Builtin %qF requires z15 or higher.", fndecl);
817 	  return const0_rtx;
818 	}
819     }
820   if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET
821       && fcode < S390_ALL_BUILTIN_MAX)
822     {
823       gcc_unreachable ();
824     }
825   else if (fcode < S390_OVERLOADED_BUILTIN_OFFSET)
826     {
827       icode = code_for_builtin[fcode];
828       /* Set a flag in the machine specific cfun part in order to support
829 	 saving/restoring of FPRs.  */
830       if (fcode == S390_BUILTIN_tbegin || fcode == S390_BUILTIN_tbegin_retry)
831 	cfun->machine->tbegin_p = true;
832     }
833   else if (fcode < S390_OVERLOADED_BUILTIN_VAR_OFFSET)
834     {
835       error ("unresolved overloaded builtin");
836       return const0_rtx;
837     }
838   else
839     internal_error ("bad builtin fcode");
840 
841   if (icode == 0)
842     internal_error ("bad builtin icode");
843 
844   nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
845 
846   if (nonvoid)
847     {
848       machine_mode tmode = insn_data[icode].operand[0].mode;
849       if (!target
850 	  || GET_MODE (target) != tmode
851 	  || !(*insn_data[icode].operand[0].predicate) (target, tmode))
852 	target = gen_reg_rtx (tmode);
853 
854       /* There are builtins (e.g. vec_promote) with no vector
855 	 arguments but an element selector.  So we have to also look
856 	 at the vector return type when emitting the modulo
857 	 operation.  */
858       if (VECTOR_MODE_P (insn_data[icode].operand[0].mode))
859 	last_vec_mode = insn_data[icode].operand[0].mode;
860     }
861 
862   arity = 0;
863   FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
864     {
865       rtx tmp_rtx;
866       const struct insn_operand_data *insn_op;
867       unsigned int op_flags = all_op_flags & ((1 << O_SHIFT) - 1);
868 
869       all_op_flags = all_op_flags >> O_SHIFT;
870 
871       if (arg == error_mark_node)
872 	return NULL_RTX;
873       if (arity >= MAX_ARGS)
874 	return NULL_RTX;
875 
876       if (O_IMM_P (op_flags)
877 	  && TREE_CODE (arg) != INTEGER_CST)
878 	{
879 	  error ("constant value required for builtin %qF argument %d",
880 		 fndecl, arity + 1);
881 	  return const0_rtx;
882 	}
883 
884       if (!s390_const_operand_ok (arg, arity + 1, op_flags, fndecl))
885 	return const0_rtx;
886 
887       insn_op = &insn_data[icode].operand[arity + nonvoid];
888       op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
889 
890       /* expand_expr truncates constants to the target mode only if it
891 	 is "convenient".  However, our checks below rely on this
892 	 being done.  */
893       if (CONST_INT_P (op[arity])
894 	  && SCALAR_INT_MODE_P (insn_op->mode)
895 	  && GET_MODE (op[arity]) != insn_op->mode)
896 	op[arity] = GEN_INT (trunc_int_for_mode (INTVAL (op[arity]),
897 						 insn_op->mode));
898 
899       /* Wrap the expanded RTX for pointer types into a MEM expr with
900 	 the proper mode.  This allows us to use e.g. (match_operand
901 	 "memory_operand"..) in the insn patterns instead of (mem
902 	 (match_operand "address_operand)).  This is helpful for
903 	 patterns not just accepting MEMs.  */
904       if (POINTER_TYPE_P (TREE_TYPE (arg))
905 	  && insn_op->predicate != address_operand)
906 	op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]);
907 
908       /* Expand the module operation required on element selectors.  */
909       if (op_flags == O_ELEM)
910 	{
911 	  gcc_assert (last_vec_mode != VOIDmode);
912 	  op[arity] = simplify_expand_binop (SImode, code_to_optab (AND),
913 					     op[arity],
914 					     GEN_INT (GET_MODE_NUNITS (last_vec_mode) - 1),
915 					     NULL_RTX, 1, OPTAB_DIRECT);
916 	}
917 
918       /* Record the vector mode used for an element selector.  This assumes:
919 	 1. There is no builtin with two different vector modes and an element selector
920 	 2. The element selector comes after the vector type it is referring to.
921 	 This currently the true for all the builtins but FIXME we
922 	 should better check for that.  */
923       if (VECTOR_MODE_P (insn_op->mode))
924 	last_vec_mode = insn_op->mode;
925 
926       if (insn_op->predicate (op[arity], insn_op->mode))
927 	{
928 	  arity++;
929 	  continue;
930 	}
931 
932       /* A memory operand is rejected by the memory_operand predicate.
933 	 Try making the address legal by copying it into a register.  */
934       if (MEM_P (op[arity])
935 	  && insn_op->predicate == memory_operand
936 	  && (GET_MODE (XEXP (op[arity], 0)) == Pmode
937 	      || GET_MODE (XEXP (op[arity], 0)) == VOIDmode))
938 	{
939 	  op[arity] = replace_equiv_address (op[arity],
940 					     copy_to_mode_reg (Pmode,
941 					       XEXP (op[arity], 0)));
942 	}
943       /* Some of the builtins require different modes/types than the
944 	 pattern in order to implement a specific API.  Instead of
945 	 adding many expanders which do the mode change we do it here.
946 	 E.g. s390_vec_add_u128 required to have vector unsigned char
947 	 arguments is mapped to addti3.  */
948       else if (insn_op->mode != VOIDmode
949 	       && GET_MODE (op[arity]) != VOIDmode
950 	       && GET_MODE (op[arity]) != insn_op->mode
951 	       && ((tmp_rtx = simplify_gen_subreg (insn_op->mode, op[arity],
952 						   GET_MODE (op[arity]), 0))
953 		   != NULL_RTX))
954 	{
955 	  op[arity] = tmp_rtx;
956 	}
957 
958       /* The predicate rejects the operand although the mode is fine.
959 	 Copy the operand to register.  */
960       if (!insn_op->predicate (op[arity], insn_op->mode)
961 	  && (GET_MODE (op[arity]) == insn_op->mode
962 	      || GET_MODE (op[arity]) == VOIDmode
963 	      || (insn_op->predicate == address_operand
964 		  && GET_MODE (op[arity]) == Pmode)))
965 	{
966 	  /* An address_operand usually has VOIDmode in the expander
967 	     so we cannot use this.  */
968 	  machine_mode target_mode =
969 	    (insn_op->predicate == address_operand
970 	     ? (machine_mode) Pmode : insn_op->mode);
971 	  op[arity] = copy_to_mode_reg (target_mode, op[arity]);
972 	}
973 
974       if (!insn_op->predicate (op[arity], insn_op->mode))
975 	{
976 	  error ("invalid argument %d for builtin %qF", arity + 1, fndecl);
977 	  return const0_rtx;
978 	}
979       arity++;
980     }
981 
982   switch (arity)
983     {
984     case 0:
985       pat = GEN_FCN (icode) (target);
986       break;
987     case 1:
988       if (nonvoid)
989 	pat = GEN_FCN (icode) (target, op[0]);
990       else
991 	pat = GEN_FCN (icode) (op[0]);
992       break;
993     case 2:
994       if (nonvoid)
995 	pat = GEN_FCN (icode) (target, op[0], op[1]);
996       else
997 	pat = GEN_FCN (icode) (op[0], op[1]);
998       break;
999     case 3:
1000       if (nonvoid)
1001 	pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
1002       else
1003 	pat = GEN_FCN (icode) (op[0], op[1], op[2]);
1004       break;
1005     case 4:
1006       if (nonvoid)
1007 	pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
1008       else
1009 	pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
1010       break;
1011     case 5:
1012       if (nonvoid)
1013 	pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
1014       else
1015 	pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
1016       break;
1017     case 6:
1018       if (nonvoid)
1019 	pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4], op[5]);
1020       else
1021 	pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
1022       break;
1023     default:
1024       gcc_unreachable ();
1025     }
1026   if (!pat)
1027     return NULL_RTX;
1028   emit_insn (pat);
1029 
1030   if (nonvoid)
1031     return target;
1032   else
1033     return const0_rtx;
1034 }
1035 
1036 
1037 static const int s390_hotpatch_hw_max = 1000000;
1038 static int s390_hotpatch_hw_before_label = 0;
1039 static int s390_hotpatch_hw_after_label = 0;
1040 
1041 /* Check whether the hotpatch attribute is applied to a function and, if it has
1042    an argument, the argument is valid.  */
1043 
1044 static tree
s390_handle_hotpatch_attribute(tree * node,tree name,tree args,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)1045 s390_handle_hotpatch_attribute (tree *node, tree name, tree args,
1046 				int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1047 {
1048   tree expr;
1049   tree expr2;
1050   int err;
1051 
1052   if (TREE_CODE (*node) != FUNCTION_DECL)
1053     {
1054       warning (OPT_Wattributes, "%qE attribute only applies to functions",
1055 	       name);
1056       *no_add_attrs = true;
1057     }
1058   if (args != NULL && TREE_CHAIN (args) != NULL)
1059     {
1060       expr = TREE_VALUE (args);
1061       expr2 = TREE_VALUE (TREE_CHAIN (args));
1062     }
1063   if (args == NULL || TREE_CHAIN (args) == NULL)
1064     err = 1;
1065   else if (TREE_CODE (expr) != INTEGER_CST
1066 	   || !INTEGRAL_TYPE_P (TREE_TYPE (expr))
1067 	   || wi::gtu_p (wi::to_wide (expr), s390_hotpatch_hw_max))
1068     err = 1;
1069   else if (TREE_CODE (expr2) != INTEGER_CST
1070 	   || !INTEGRAL_TYPE_P (TREE_TYPE (expr2))
1071 	   || wi::gtu_p (wi::to_wide (expr2), s390_hotpatch_hw_max))
1072     err = 1;
1073   else
1074     err = 0;
1075   if (err)
1076     {
1077       error ("requested %qE attribute is not a comma separated pair of"
1078 	     " non-negative integer constants or too large (max. %d)", name,
1079 	     s390_hotpatch_hw_max);
1080       *no_add_attrs = true;
1081     }
1082 
1083   return NULL_TREE;
1084 }
1085 
1086 /* Expand the s390_vector_bool type attribute.  */
1087 
1088 static tree
s390_handle_vectorbool_attribute(tree * node,tree name ATTRIBUTE_UNUSED,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)1089 s390_handle_vectorbool_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
1090 				  tree args ATTRIBUTE_UNUSED,
1091 				  int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1092 {
1093   tree type = *node, result = NULL_TREE;
1094   machine_mode mode;
1095 
1096   while (POINTER_TYPE_P (type)
1097 	 || TREE_CODE (type) == FUNCTION_TYPE
1098 	 || TREE_CODE (type) == METHOD_TYPE
1099 	 || TREE_CODE (type) == ARRAY_TYPE)
1100     type = TREE_TYPE (type);
1101 
1102   mode = TYPE_MODE (type);
1103   switch (mode)
1104     {
1105     case E_DImode: case E_V2DImode:
1106       result = s390_builtin_types[BT_BV2DI];
1107       break;
1108     case E_SImode: case E_V4SImode:
1109       result = s390_builtin_types[BT_BV4SI];
1110       break;
1111     case E_HImode: case E_V8HImode:
1112       result = s390_builtin_types[BT_BV8HI];
1113       break;
1114     case E_QImode: case E_V16QImode:
1115       result = s390_builtin_types[BT_BV16QI];
1116       break;
1117     default:
1118       break;
1119     }
1120 
1121   *no_add_attrs = true;  /* No need to hang on to the attribute.  */
1122 
1123   if (result)
1124     *node = lang_hooks.types.reconstruct_complex_type (*node, result);
1125 
1126   return NULL_TREE;
1127 }
1128 
1129 /* Check syntax of function decl attributes having a string type value.  */
1130 
1131 static tree
s390_handle_string_attribute(tree * node,tree name ATTRIBUTE_UNUSED,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)1132 s390_handle_string_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
1133 			      tree args ATTRIBUTE_UNUSED,
1134 			      int flags ATTRIBUTE_UNUSED,
1135 			      bool *no_add_attrs)
1136 {
1137   tree cst;
1138 
1139   if (TREE_CODE (*node) != FUNCTION_DECL)
1140     {
1141       warning (OPT_Wattributes, "%qE attribute only applies to functions",
1142 	       name);
1143       *no_add_attrs = true;
1144     }
1145 
1146   cst = TREE_VALUE (args);
1147 
1148   if (TREE_CODE (cst) != STRING_CST)
1149     {
1150       warning (OPT_Wattributes,
1151 	       "%qE attribute requires a string constant argument",
1152 	       name);
1153       *no_add_attrs = true;
1154     }
1155 
1156   if (is_attribute_p ("indirect_branch", name)
1157       || is_attribute_p ("indirect_branch_call", name)
1158       || is_attribute_p ("function_return", name)
1159       || is_attribute_p ("function_return_reg", name)
1160       || is_attribute_p ("function_return_mem", name))
1161     {
1162       if (strcmp (TREE_STRING_POINTER (cst), "keep") != 0
1163 	  && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0
1164 	  && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0)
1165       {
1166 	warning (OPT_Wattributes,
1167 		 "argument to %qE attribute is not "
1168 		 "(keep|thunk|thunk-extern)", name);
1169 	*no_add_attrs = true;
1170       }
1171     }
1172 
1173   if (is_attribute_p ("indirect_branch_jump", name)
1174       && strcmp (TREE_STRING_POINTER (cst), "keep") != 0
1175       && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0
1176       && strcmp (TREE_STRING_POINTER (cst), "thunk-inline") != 0
1177       && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0)
1178     {
1179       warning (OPT_Wattributes,
1180 	       "argument to %qE attribute is not "
1181 	       "(keep|thunk|thunk-inline|thunk-extern)", name);
1182       *no_add_attrs = true;
1183     }
1184 
1185   return NULL_TREE;
1186 }
1187 
1188 static const struct attribute_spec s390_attribute_table[] = {
1189   { "hotpatch", 2, 2, true, false, false, false,
1190     s390_handle_hotpatch_attribute, NULL },
1191   { "s390_vector_bool", 0, 0, false, true, false, true,
1192     s390_handle_vectorbool_attribute, NULL },
1193   { "indirect_branch", 1, 1, true, false, false, false,
1194     s390_handle_string_attribute, NULL },
1195   { "indirect_branch_jump", 1, 1, true, false, false, false,
1196     s390_handle_string_attribute, NULL },
1197   { "indirect_branch_call", 1, 1, true, false, false, false,
1198     s390_handle_string_attribute, NULL },
1199   { "function_return", 1, 1, true, false, false, false,
1200     s390_handle_string_attribute, NULL },
1201   { "function_return_reg", 1, 1, true, false, false, false,
1202     s390_handle_string_attribute, NULL },
1203   { "function_return_mem", 1, 1, true, false, false, false,
1204     s390_handle_string_attribute, NULL },
1205 
1206   /* End element.  */
1207   { NULL,        0, 0, false, false, false, false, NULL, NULL }
1208 };
1209 
1210 /* Return the alignment for LABEL.  We default to the -falign-labels
1211    value except for the literal pool base label.  */
1212 int
s390_label_align(rtx_insn * label)1213 s390_label_align (rtx_insn *label)
1214 {
1215   rtx_insn *prev_insn = prev_active_insn (label);
1216   rtx set, src;
1217 
1218   if (prev_insn == NULL_RTX)
1219     goto old;
1220 
1221   set = single_set (prev_insn);
1222 
1223   if (set == NULL_RTX)
1224     goto old;
1225 
1226   src = SET_SRC (set);
1227 
1228   /* Don't align literal pool base labels.  */
1229   if (GET_CODE (src) == UNSPEC
1230       && XINT (src, 1) == UNSPEC_MAIN_BASE)
1231     return 0;
1232 
1233  old:
1234   return align_labels.levels[0].log;
1235 }
1236 
1237 static GTY(()) rtx got_symbol;
1238 
1239 /* Return the GOT table symbol.  The symbol will be created when the
1240    function is invoked for the first time.  */
1241 
1242 static rtx
s390_got_symbol(void)1243 s390_got_symbol (void)
1244 {
1245   if (!got_symbol)
1246     {
1247       got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
1248       SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL;
1249     }
1250 
1251   return got_symbol;
1252 }
1253 
1254 static scalar_int_mode
s390_libgcc_cmp_return_mode(void)1255 s390_libgcc_cmp_return_mode (void)
1256 {
1257   return TARGET_64BIT ? DImode : SImode;
1258 }
1259 
1260 static scalar_int_mode
s390_libgcc_shift_count_mode(void)1261 s390_libgcc_shift_count_mode (void)
1262 {
1263   return TARGET_64BIT ? DImode : SImode;
1264 }
1265 
1266 static scalar_int_mode
s390_unwind_word_mode(void)1267 s390_unwind_word_mode (void)
1268 {
1269   return TARGET_64BIT ? DImode : SImode;
1270 }
1271 
1272 /* Return true if the back end supports mode MODE.  */
1273 static bool
s390_scalar_mode_supported_p(scalar_mode mode)1274 s390_scalar_mode_supported_p (scalar_mode mode)
1275 {
1276   /* In contrast to the default implementation reject TImode constants on 31bit
1277      TARGET_ZARCH for ABI compliance.  */
1278   if (!TARGET_64BIT && TARGET_ZARCH && mode == TImode)
1279     return false;
1280 
1281   if (DECIMAL_FLOAT_MODE_P (mode))
1282     return default_decimal_float_supported_p ();
1283 
1284   return default_scalar_mode_supported_p (mode);
1285 }
1286 
1287 /* Return true if the back end supports vector mode MODE.  */
1288 static bool
s390_vector_mode_supported_p(machine_mode mode)1289 s390_vector_mode_supported_p (machine_mode mode)
1290 {
1291   machine_mode inner;
1292 
1293   if (!VECTOR_MODE_P (mode)
1294       || !TARGET_VX
1295       || GET_MODE_SIZE (mode) > 16)
1296     return false;
1297 
1298   inner = GET_MODE_INNER (mode);
1299 
1300   switch (inner)
1301     {
1302     case E_QImode:
1303     case E_HImode:
1304     case E_SImode:
1305     case E_DImode:
1306     case E_TImode:
1307     case E_SFmode:
1308     case E_DFmode:
1309     case E_TFmode:
1310       return true;
1311     default:
1312       return false;
1313     }
1314 }
1315 
1316 /* Set the has_landing_pad_p flag in struct machine_function to VALUE.  */
1317 
1318 void
s390_set_has_landing_pad_p(bool value)1319 s390_set_has_landing_pad_p (bool value)
1320 {
1321   cfun->machine->has_landing_pad_p = value;
1322 }
1323 
1324 /* If two condition code modes are compatible, return a condition code
1325    mode which is compatible with both.  Otherwise, return
1326    VOIDmode.  */
1327 
1328 static machine_mode
s390_cc_modes_compatible(machine_mode m1,machine_mode m2)1329 s390_cc_modes_compatible (machine_mode m1, machine_mode m2)
1330 {
1331   if (m1 == m2)
1332     return m1;
1333 
1334   switch (m1)
1335     {
1336     case E_CCZmode:
1337       if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode
1338 	  || m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode)
1339 	return m2;
1340       return VOIDmode;
1341 
1342     case E_CCSmode:
1343     case E_CCUmode:
1344     case E_CCTmode:
1345     case E_CCSRmode:
1346     case E_CCURmode:
1347     case E_CCZ1mode:
1348       if (m2 == CCZmode)
1349 	return m1;
1350 
1351       return VOIDmode;
1352 
1353     default:
1354       return VOIDmode;
1355     }
1356   return VOIDmode;
1357 }
1358 
1359 /* Return true if SET either doesn't set the CC register, or else
1360    the source and destination have matching CC modes and that
1361    CC mode is at least as constrained as REQ_MODE.  */
1362 
1363 static bool
s390_match_ccmode_set(rtx set,machine_mode req_mode)1364 s390_match_ccmode_set (rtx set, machine_mode req_mode)
1365 {
1366   machine_mode set_mode;
1367 
1368   gcc_assert (GET_CODE (set) == SET);
1369 
1370   /* These modes are supposed to be used only in CC consumer
1371      patterns.  */
1372   gcc_assert (req_mode != CCVIALLmode && req_mode != CCVIANYmode
1373 	      && req_mode != CCVFALLmode && req_mode != CCVFANYmode);
1374 
1375   if (GET_CODE (SET_DEST (set)) != REG || !CC_REGNO_P (REGNO (SET_DEST (set))))
1376     return 1;
1377 
1378   set_mode = GET_MODE (SET_DEST (set));
1379   switch (set_mode)
1380     {
1381     case E_CCZ1mode:
1382     case E_CCSmode:
1383     case E_CCSRmode:
1384     case E_CCSFPSmode:
1385     case E_CCUmode:
1386     case E_CCURmode:
1387     case E_CCOmode:
1388     case E_CCLmode:
1389     case E_CCL1mode:
1390     case E_CCL2mode:
1391     case E_CCL3mode:
1392     case E_CCT1mode:
1393     case E_CCT2mode:
1394     case E_CCT3mode:
1395     case E_CCVEQmode:
1396     case E_CCVIHmode:
1397     case E_CCVIHUmode:
1398     case E_CCVFHmode:
1399     case E_CCVFHEmode:
1400       if (req_mode != set_mode)
1401 	return 0;
1402       break;
1403 
1404     case E_CCZmode:
1405       if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode
1406 	  && req_mode != CCSRmode && req_mode != CCURmode
1407 	  && req_mode != CCZ1mode)
1408 	return 0;
1409       break;
1410 
1411     case E_CCAPmode:
1412     case E_CCANmode:
1413       if (req_mode != CCAmode)
1414 	return 0;
1415       break;
1416 
1417     default:
1418       gcc_unreachable ();
1419     }
1420 
1421   return (GET_MODE (SET_SRC (set)) == set_mode);
1422 }
1423 
1424 /* Return true if every SET in INSN that sets the CC register
1425    has source and destination with matching CC modes and that
1426    CC mode is at least as constrained as REQ_MODE.
1427    If REQ_MODE is VOIDmode, always return false.  */
1428 
1429 bool
s390_match_ccmode(rtx_insn * insn,machine_mode req_mode)1430 s390_match_ccmode (rtx_insn *insn, machine_mode req_mode)
1431 {
1432   int i;
1433 
1434   /* s390_tm_ccmode returns VOIDmode to indicate failure.  */
1435   if (req_mode == VOIDmode)
1436     return false;
1437 
1438   if (GET_CODE (PATTERN (insn)) == SET)
1439     return s390_match_ccmode_set (PATTERN (insn), req_mode);
1440 
1441   if (GET_CODE (PATTERN (insn)) == PARALLEL)
1442       for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1443 	{
1444 	  rtx set = XVECEXP (PATTERN (insn), 0, i);
1445 	  if (GET_CODE (set) == SET)
1446 	    if (!s390_match_ccmode_set (set, req_mode))
1447 	      return false;
1448 	}
1449 
1450   return true;
1451 }
1452 
1453 /* If a test-under-mask instruction can be used to implement
1454    (compare (and ... OP1) OP2), return the CC mode required
1455    to do that.  Otherwise, return VOIDmode.
1456    MIXED is true if the instruction can distinguish between
1457    CC1 and CC2 for mixed selected bits (TMxx), it is false
1458    if the instruction cannot (TM).  */
1459 
1460 machine_mode
s390_tm_ccmode(rtx op1,rtx op2,bool mixed)1461 s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
1462 {
1463   int bit0, bit1;
1464 
1465   /* ??? Fixme: should work on CONST_WIDE_INT as well.  */
1466   if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT)
1467     return VOIDmode;
1468 
1469   /* Selected bits all zero: CC0.
1470      e.g.: int a; if ((a & (16 + 128)) == 0) */
1471   if (INTVAL (op2) == 0)
1472     return CCTmode;
1473 
1474   /* Selected bits all one: CC3.
1475      e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
1476   if (INTVAL (op2) == INTVAL (op1))
1477     return CCT3mode;
1478 
1479   /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
1480      int a;
1481      if ((a & (16 + 128)) == 16)         -> CCT1
1482      if ((a & (16 + 128)) == 128)        -> CCT2  */
1483   if (mixed)
1484     {
1485       bit1 = exact_log2 (INTVAL (op2));
1486       bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2));
1487       if (bit0 != -1 && bit1 != -1)
1488 	return bit0 > bit1 ? CCT1mode : CCT2mode;
1489     }
1490 
1491   return VOIDmode;
1492 }
1493 
1494 /* Given a comparison code OP (EQ, NE, etc.) and the operands
1495    OP0 and OP1 of a COMPARE, return the mode to be used for the
1496    comparison.  */
1497 
1498 machine_mode
s390_select_ccmode(enum rtx_code code,rtx op0,rtx op1)1499 s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
1500 {
1501   switch (code)
1502     {
1503       case EQ:
1504       case NE:
1505 	if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1506 	    && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1507 	  return CCAPmode;
1508 	if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1509 	    && CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))))
1510 	  return CCAPmode;
1511 	if ((GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS
1512 	     || GET_CODE (op1) == NEG)
1513 	    && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1514 	  return CCLmode;
1515 
1516 	if (GET_CODE (op0) == AND)
1517 	  {
1518 	    /* Check whether we can potentially do it via TM.  */
1519 	    machine_mode ccmode;
1520 	    ccmode = s390_tm_ccmode (XEXP (op0, 1), op1, 1);
1521 	    if (ccmode != VOIDmode)
1522 	      {
1523 		/* Relax CCTmode to CCZmode to allow fall-back to AND
1524 		   if that turns out to be beneficial.  */
1525 		return ccmode == CCTmode ? CCZmode : ccmode;
1526 	      }
1527 	  }
1528 
1529 	if (register_operand (op0, HImode)
1530 	    && GET_CODE (op1) == CONST_INT
1531 	    && (INTVAL (op1) == -1 || INTVAL (op1) == 65535))
1532 	  return CCT3mode;
1533 	if (register_operand (op0, QImode)
1534 	    && GET_CODE (op1) == CONST_INT
1535 	    && (INTVAL (op1) == -1 || INTVAL (op1) == 255))
1536 	  return CCT3mode;
1537 
1538 	return CCZmode;
1539 
1540       case LE:
1541       case LT:
1542       case GE:
1543       case GT:
1544 	/* The only overflow condition of NEG and ABS happens when
1545 	   -INT_MAX is used as parameter, which stays negative. So
1546 	   we have an overflow from a positive value to a negative.
1547 	   Using CCAP mode the resulting cc can be used for comparisons.  */
1548 	if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1549 	    && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1550 	  return CCAPmode;
1551 
1552 	/* If constants are involved in an add instruction it is possible to use
1553 	   the resulting cc for comparisons with zero. Knowing the sign of the
1554 	   constant the overflow behavior gets predictable. e.g.:
1555 	     int a, b; if ((b = a + c) > 0)
1556 	   with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP  */
1557 	if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1558 	    && (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1)))
1559 		|| (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os")
1560 		    /* Avoid INT32_MIN on 32 bit.  */
1561 		    && (!TARGET_ZARCH || INTVAL (XEXP (op0, 1)) != -0x7fffffff - 1))))
1562 	  {
1563 	    if (INTVAL (XEXP((op0), 1)) < 0)
1564 	      return CCANmode;
1565 	    else
1566 	      return CCAPmode;
1567 	  }
1568 
1569 	/* Fall through.  */
1570       case LTGT:
1571 	if (HONOR_NANS (op0) || HONOR_NANS (op1))
1572 	  return CCSFPSmode;
1573 
1574 	/* Fall through.  */
1575       case UNORDERED:
1576       case ORDERED:
1577       case UNEQ:
1578       case UNLE:
1579       case UNLT:
1580       case UNGE:
1581       case UNGT:
1582 	if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1583 	    && GET_CODE (op1) != CONST_INT)
1584 	  return CCSRmode;
1585 	return CCSmode;
1586 
1587       case LTU:
1588       case GEU:
1589 	if (GET_CODE (op0) == PLUS
1590 	    && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1591 	  return CCL1mode;
1592 
1593 	if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1594 	    && GET_CODE (op1) != CONST_INT)
1595 	  return CCURmode;
1596 	return CCUmode;
1597 
1598       case LEU:
1599       case GTU:
1600 	if (GET_CODE (op0) == MINUS
1601 	    && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1602 	  return CCL2mode;
1603 
1604 	if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1605 	    && GET_CODE (op1) != CONST_INT)
1606 	  return CCURmode;
1607 	return CCUmode;
1608 
1609       default:
1610 	gcc_unreachable ();
1611     }
1612 }
1613 
1614 /* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
1615    that we can implement more efficiently.  */
1616 
1617 static void
s390_canonicalize_comparison(int * code,rtx * op0,rtx * op1,bool op0_preserve_value)1618 s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1619 			      bool op0_preserve_value)
1620 {
1621   if (op0_preserve_value)
1622     return;
1623 
1624   /* Convert ZERO_EXTRACT back to AND to enable TM patterns.  */
1625   if ((*code == EQ || *code == NE)
1626       && *op1 == const0_rtx
1627       && GET_CODE (*op0) == ZERO_EXTRACT
1628       && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1629       && GET_CODE (XEXP (*op0, 2)) == CONST_INT
1630       && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1631     {
1632       rtx inner = XEXP (*op0, 0);
1633       HOST_WIDE_INT modesize = GET_MODE_BITSIZE (GET_MODE (inner));
1634       HOST_WIDE_INT len = INTVAL (XEXP (*op0, 1));
1635       HOST_WIDE_INT pos = INTVAL (XEXP (*op0, 2));
1636 
1637       if (len > 0 && len < modesize
1638 	  && pos >= 0 && pos + len <= modesize
1639 	  && modesize <= HOST_BITS_PER_WIDE_INT)
1640 	{
1641 	  unsigned HOST_WIDE_INT block;
1642 	  block = (HOST_WIDE_INT_1U << len) - 1;
1643 	  block <<= modesize - pos - len;
1644 
1645 	  *op0 = gen_rtx_AND (GET_MODE (inner), inner,
1646 			      gen_int_mode (block, GET_MODE (inner)));
1647 	}
1648     }
1649 
1650   /* Narrow AND of memory against immediate to enable TM.  */
1651   if ((*code == EQ || *code == NE)
1652       && *op1 == const0_rtx
1653       && GET_CODE (*op0) == AND
1654       && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1655       && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1656     {
1657       rtx inner = XEXP (*op0, 0);
1658       rtx mask = XEXP (*op0, 1);
1659 
1660       /* Ignore paradoxical SUBREGs if all extra bits are masked out.  */
1661       if (GET_CODE (inner) == SUBREG
1662 	  && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner)))
1663 	  && (GET_MODE_SIZE (GET_MODE (inner))
1664 	      >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1665 	  && ((INTVAL (mask)
1666 	       & GET_MODE_MASK (GET_MODE (inner))
1667 	       & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner))))
1668 	      == 0))
1669 	inner = SUBREG_REG (inner);
1670 
1671       /* Do not change volatile MEMs.  */
1672       if (MEM_P (inner) && !MEM_VOLATILE_P (inner))
1673 	{
1674 	  int part = s390_single_part (XEXP (*op0, 1),
1675 				       GET_MODE (inner), QImode, 0);
1676 	  if (part >= 0)
1677 	    {
1678 	      mask = gen_int_mode (s390_extract_part (mask, QImode, 0), QImode);
1679 	      inner = adjust_address_nv (inner, QImode, part);
1680 	      *op0 = gen_rtx_AND (QImode, inner, mask);
1681 	    }
1682 	}
1683     }
1684 
1685   /* Narrow comparisons against 0xffff to HImode if possible.  */
1686   if ((*code == EQ || *code == NE)
1687       && GET_CODE (*op1) == CONST_INT
1688       && INTVAL (*op1) == 0xffff
1689       && SCALAR_INT_MODE_P (GET_MODE (*op0))
1690       && (nonzero_bits (*op0, GET_MODE (*op0))
1691 	  & ~HOST_WIDE_INT_UC (0xffff)) == 0)
1692     {
1693       *op0 = gen_lowpart (HImode, *op0);
1694       *op1 = constm1_rtx;
1695     }
1696 
1697   /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible.  */
1698   if (GET_CODE (*op0) == UNSPEC
1699       && XINT (*op0, 1) == UNSPEC_STRCMPCC_TO_INT
1700       && XVECLEN (*op0, 0) == 1
1701       && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode
1702       && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1703       && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1704       && *op1 == const0_rtx)
1705     {
1706       enum rtx_code new_code = UNKNOWN;
1707       switch (*code)
1708 	{
1709 	  case EQ: new_code = EQ;  break;
1710 	  case NE: new_code = NE;  break;
1711 	  case LT: new_code = GTU; break;
1712 	  case GT: new_code = LTU; break;
1713 	  case LE: new_code = GEU; break;
1714 	  case GE: new_code = LEU; break;
1715 	  default: break;
1716 	}
1717 
1718       if (new_code != UNKNOWN)
1719 	{
1720 	  *op0 = XVECEXP (*op0, 0, 0);
1721 	  *code = new_code;
1722 	}
1723     }
1724 
1725   /* Remove redundant UNSPEC_CC_TO_INT conversions if possible.  */
1726   if (GET_CODE (*op0) == UNSPEC
1727       && XINT (*op0, 1) == UNSPEC_CC_TO_INT
1728       && XVECLEN (*op0, 0) == 1
1729       && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1730       && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1731       && CONST_INT_P (*op1))
1732     {
1733       enum rtx_code new_code = UNKNOWN;
1734       switch (GET_MODE (XVECEXP (*op0, 0, 0)))
1735 	{
1736 	case E_CCZmode:
1737 	case E_CCRAWmode:
1738 	  switch (*code)
1739 	    {
1740 	    case EQ: new_code = EQ;  break;
1741 	    case NE: new_code = NE;  break;
1742 	    default: break;
1743 	    }
1744 	  break;
1745 	default: break;
1746 	}
1747 
1748       if (new_code != UNKNOWN)
1749 	{
1750 	  /* For CCRAWmode put the required cc mask into the second
1751 	     operand.  */
1752 	if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode
1753 	    && INTVAL (*op1) >= 0 && INTVAL (*op1) <= 3)
1754 	    *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1)));
1755 	  *op0 = XVECEXP (*op0, 0, 0);
1756 	  *code = new_code;
1757 	}
1758     }
1759 
1760   /* Simplify cascaded EQ, NE with const0_rtx.  */
1761   if ((*code == NE || *code == EQ)
1762       && (GET_CODE (*op0) == EQ || GET_CODE (*op0) == NE)
1763       && GET_MODE (*op0) == SImode
1764       && GET_MODE (XEXP (*op0, 0)) == CCZ1mode
1765       && REG_P (XEXP (*op0, 0))
1766       && XEXP (*op0, 1) == const0_rtx
1767       && *op1 == const0_rtx)
1768     {
1769       if ((*code == EQ && GET_CODE (*op0) == NE)
1770 	  || (*code == NE && GET_CODE (*op0) == EQ))
1771 	*code = EQ;
1772       else
1773 	*code = NE;
1774       *op0 = XEXP (*op0, 0);
1775     }
1776 
1777   /* Prefer register over memory as first operand.  */
1778   if (MEM_P (*op0) && REG_P (*op1))
1779     {
1780       rtx tem = *op0; *op0 = *op1; *op1 = tem;
1781       *code = (int)swap_condition ((enum rtx_code)*code);
1782     }
1783 
1784   /* A comparison result is compared against zero.  Replace it with
1785      the (perhaps inverted) original comparison.
1786      This probably should be done by simplify_relational_operation.  */
1787   if ((*code == EQ || *code == NE)
1788       && *op1 == const0_rtx
1789       && COMPARISON_P (*op0)
1790       && CC_REG_P (XEXP (*op0, 0)))
1791     {
1792       enum rtx_code new_code;
1793 
1794       if (*code == EQ)
1795 	new_code = reversed_comparison_code_parts (GET_CODE (*op0),
1796 						   XEXP (*op0, 0),
1797 						   XEXP (*op0, 1), NULL);
1798       else
1799 	new_code = GET_CODE (*op0);
1800 
1801       if (new_code != UNKNOWN)
1802 	{
1803 	  *code = new_code;
1804 	  *op1 = XEXP (*op0, 1);
1805 	  *op0 = XEXP (*op0, 0);
1806 	}
1807     }
1808 
1809   /* ~a==b -> ~(a^b)==0   ~a!=b -> ~(a^b)!=0 */
1810   if (TARGET_Z15
1811       && (*code == EQ || *code == NE)
1812       && (GET_MODE (*op0) == DImode || GET_MODE (*op0) == SImode)
1813       && GET_CODE (*op0) == NOT)
1814     {
1815       machine_mode mode = GET_MODE (*op0);
1816       *op0 = gen_rtx_XOR (mode, XEXP (*op0, 0), *op1);
1817       *op0 = gen_rtx_NOT (mode, *op0);
1818       *op1 = const0_rtx;
1819     }
1820 
1821   /* a&b == -1 -> ~a|~b == 0    a|b == -1 -> ~a&~b == 0  */
1822   if (TARGET_Z15
1823       && (*code == EQ || *code == NE)
1824       && (GET_CODE (*op0) == AND || GET_CODE (*op0) == IOR)
1825       && (GET_MODE (*op0) == DImode || GET_MODE (*op0) == SImode)
1826       && CONST_INT_P (*op1)
1827       && *op1 == constm1_rtx)
1828     {
1829       machine_mode mode = GET_MODE (*op0);
1830       rtx op00 = gen_rtx_NOT (mode, XEXP (*op0, 0));
1831       rtx op01 = gen_rtx_NOT (mode, XEXP (*op0, 1));
1832 
1833       if (GET_CODE (*op0) == AND)
1834 	*op0 = gen_rtx_IOR (mode, op00, op01);
1835       else
1836 	*op0 = gen_rtx_AND (mode, op00, op01);
1837 
1838       *op1 = const0_rtx;
1839     }
1840 }
1841 
1842 
1843 /* Emit a compare instruction suitable to implement the comparison
1844    OP0 CODE OP1.  Return the correct condition RTL to be placed in
1845    the IF_THEN_ELSE of the conditional branch testing the result.  */
1846 
1847 rtx
s390_emit_compare(enum rtx_code code,rtx op0,rtx op1)1848 s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
1849 {
1850   machine_mode mode = s390_select_ccmode (code, op0, op1);
1851   rtx cc;
1852 
1853   if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
1854     {
1855       /* Do not output a redundant compare instruction if a
1856 	 compare_and_swap pattern already computed the result and the
1857 	 machine modes are compatible.  */
1858       gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
1859 		  == GET_MODE (op0));
1860       cc = op0;
1861     }
1862   else
1863     {
1864       cc = gen_rtx_REG (mode, CC_REGNUM);
1865       emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, op0, op1)));
1866     }
1867 
1868   return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
1869 }
1870 
1871 /* If MEM is not a legitimate compare-and-swap memory operand, return a new
1872    MEM, whose address is a pseudo containing the original MEM's address.  */
1873 
1874 static rtx
s390_legitimize_cs_operand(rtx mem)1875 s390_legitimize_cs_operand (rtx mem)
1876 {
1877   rtx tmp;
1878 
1879   if (!contains_symbol_ref_p (mem))
1880     return mem;
1881   tmp = gen_reg_rtx (Pmode);
1882   emit_move_insn (tmp, copy_rtx (XEXP (mem, 0)));
1883   return change_address (mem, VOIDmode, tmp);
1884 }
1885 
1886 /* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
1887    matches CMP.
1888    Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
1889    conditional branch testing the result.  */
1890 
1891 static rtx
s390_emit_compare_and_swap(enum rtx_code code,rtx old,rtx mem,rtx cmp,rtx new_rtx,machine_mode ccmode)1892 s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
1893 			    rtx cmp, rtx new_rtx, machine_mode ccmode)
1894 {
1895   rtx cc;
1896 
1897   mem = s390_legitimize_cs_operand (mem);
1898   cc = gen_rtx_REG (ccmode, CC_REGNUM);
1899   switch (GET_MODE (mem))
1900     {
1901     case E_SImode:
1902       emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp,
1903 							 new_rtx, cc));
1904       break;
1905     case E_DImode:
1906       emit_insn (gen_atomic_compare_and_swapdi_internal (old, mem, cmp,
1907 							 new_rtx, cc));
1908       break;
1909     case E_TImode:
1910 	emit_insn (gen_atomic_compare_and_swapti_internal (old, mem, cmp,
1911 							   new_rtx, cc));
1912       break;
1913     case E_QImode:
1914     case E_HImode:
1915     default:
1916       gcc_unreachable ();
1917     }
1918   return s390_emit_compare (code, cc, const0_rtx);
1919 }
1920 
1921 /* Emit a jump instruction to TARGET and return it.  If COND is
1922    NULL_RTX, emit an unconditional jump, else a conditional jump under
1923    condition COND.  */
1924 
1925 rtx_insn *
s390_emit_jump(rtx target,rtx cond)1926 s390_emit_jump (rtx target, rtx cond)
1927 {
1928   rtx insn;
1929 
1930   target = gen_rtx_LABEL_REF (VOIDmode, target);
1931   if (cond)
1932     target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
1933 
1934   insn = gen_rtx_SET (pc_rtx, target);
1935   return emit_jump_insn (insn);
1936 }
1937 
1938 /* Return branch condition mask to implement a branch
1939    specified by CODE.  Return -1 for invalid comparisons.  */
1940 
1941 int
s390_branch_condition_mask(rtx code)1942 s390_branch_condition_mask (rtx code)
1943 {
1944   const int CC0 = 1 << 3;
1945   const int CC1 = 1 << 2;
1946   const int CC2 = 1 << 1;
1947   const int CC3 = 1 << 0;
1948 
1949   gcc_assert (GET_CODE (XEXP (code, 0)) == REG);
1950   gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM);
1951   gcc_assert (XEXP (code, 1) == const0_rtx
1952 	      || (GET_MODE (XEXP (code, 0)) == CCRAWmode
1953 		  && CONST_INT_P (XEXP (code, 1))));
1954 
1955 
1956   switch (GET_MODE (XEXP (code, 0)))
1957     {
1958     case E_CCZmode:
1959     case E_CCZ1mode:
1960       switch (GET_CODE (code))
1961 	{
1962 	case EQ:	return CC0;
1963 	case NE:	return CC1 | CC2 | CC3;
1964 	default:	return -1;
1965 	}
1966       break;
1967 
1968     case E_CCT1mode:
1969       switch (GET_CODE (code))
1970 	{
1971 	case EQ:	return CC1;
1972 	case NE:	return CC0 | CC2 | CC3;
1973 	default:	return -1;
1974 	}
1975       break;
1976 
1977     case E_CCT2mode:
1978       switch (GET_CODE (code))
1979 	{
1980 	case EQ:	return CC2;
1981 	case NE:	return CC0 | CC1 | CC3;
1982 	default:	return -1;
1983 	}
1984       break;
1985 
1986     case E_CCT3mode:
1987       switch (GET_CODE (code))
1988 	{
1989 	case EQ:	return CC3;
1990 	case NE:	return CC0 | CC1 | CC2;
1991 	default:	return -1;
1992 	}
1993       break;
1994 
1995     case E_CCLmode:
1996       switch (GET_CODE (code))
1997 	{
1998 	case EQ:	return CC0 | CC2;
1999 	case NE:	return CC1 | CC3;
2000 	default:	return -1;
2001 	}
2002       break;
2003 
2004     case E_CCL1mode:
2005       switch (GET_CODE (code))
2006 	{
2007 	case LTU:	return CC2 | CC3;  /* carry */
2008 	case GEU:	return CC0 | CC1;  /* no carry */
2009 	default:	return -1;
2010 	}
2011       break;
2012 
2013     case E_CCL2mode:
2014       switch (GET_CODE (code))
2015 	{
2016 	case GTU:	return CC0 | CC1;  /* borrow */
2017 	case LEU:	return CC2 | CC3;  /* no borrow */
2018 	default:	return -1;
2019 	}
2020       break;
2021 
2022     case E_CCL3mode:
2023       switch (GET_CODE (code))
2024 	{
2025 	case EQ:	return CC0 | CC2;
2026 	case NE:	return CC1 | CC3;
2027 	case LTU:	return CC1;
2028 	case GTU:	return CC3;
2029 	case LEU:	return CC1 | CC2;
2030 	case GEU:	return CC2 | CC3;
2031 	default:	return -1;
2032 	}
2033 
2034     case E_CCUmode:
2035       switch (GET_CODE (code))
2036 	{
2037 	case EQ:	return CC0;
2038 	case NE:	return CC1 | CC2 | CC3;
2039 	case LTU:	return CC1;
2040 	case GTU:	return CC2;
2041 	case LEU:	return CC0 | CC1;
2042 	case GEU:	return CC0 | CC2;
2043 	default:	return -1;
2044 	}
2045       break;
2046 
2047     case E_CCURmode:
2048       switch (GET_CODE (code))
2049 	{
2050 	case EQ:	return CC0;
2051 	case NE:	return CC2 | CC1 | CC3;
2052 	case LTU:	return CC2;
2053 	case GTU:	return CC1;
2054 	case LEU:	return CC0 | CC2;
2055 	case GEU:	return CC0 | CC1;
2056 	default:	return -1;
2057 	}
2058       break;
2059 
2060     case E_CCAPmode:
2061       switch (GET_CODE (code))
2062 	{
2063 	case EQ:	return CC0;
2064 	case NE:	return CC1 | CC2 | CC3;
2065 	case LT:	return CC1 | CC3;
2066 	case GT:	return CC2;
2067 	case LE:	return CC0 | CC1 | CC3;
2068 	case GE:	return CC0 | CC2;
2069 	default:	return -1;
2070 	}
2071       break;
2072 
2073     case E_CCANmode:
2074       switch (GET_CODE (code))
2075 	{
2076 	case EQ:	return CC0;
2077 	case NE:	return CC1 | CC2 | CC3;
2078 	case LT:	return CC1;
2079 	case GT:	return CC2 | CC3;
2080 	case LE:	return CC0 | CC1;
2081 	case GE:	return CC0 | CC2 | CC3;
2082 	default:	return -1;
2083 	}
2084       break;
2085 
2086     case E_CCOmode:
2087       switch (GET_CODE (code))
2088 	{
2089 	case EQ:	return CC0 | CC1 | CC2;
2090 	case NE:	return CC3;
2091 	default:	return -1;
2092 	}
2093       break;
2094 
2095     case E_CCSmode:
2096     case E_CCSFPSmode:
2097       switch (GET_CODE (code))
2098 	{
2099 	case EQ:	return CC0;
2100 	case NE:	return CC1 | CC2 | CC3;
2101 	case LT:	return CC1;
2102 	case GT:	return CC2;
2103 	case LE:	return CC0 | CC1;
2104 	case GE:	return CC0 | CC2;
2105 	case UNORDERED:	return CC3;
2106 	case ORDERED:	return CC0 | CC1 | CC2;
2107 	case UNEQ:	return CC0 | CC3;
2108 	case UNLT:	return CC1 | CC3;
2109 	case UNGT:	return CC2 | CC3;
2110 	case UNLE:	return CC0 | CC1 | CC3;
2111 	case UNGE:	return CC0 | CC2 | CC3;
2112 	case LTGT:	return CC1 | CC2;
2113 	default:	return -1;
2114 	}
2115       break;
2116 
2117     case E_CCSRmode:
2118       switch (GET_CODE (code))
2119 	{
2120 	case EQ:	return CC0;
2121 	case NE:	return CC2 | CC1 | CC3;
2122 	case LT:	return CC2;
2123 	case GT:	return CC1;
2124 	case LE:	return CC0 | CC2;
2125 	case GE:	return CC0 | CC1;
2126 	case UNORDERED:	return CC3;
2127 	case ORDERED:	return CC0 | CC2 | CC1;
2128 	case UNEQ:	return CC0 | CC3;
2129 	case UNLT:	return CC2 | CC3;
2130 	case UNGT:	return CC1 | CC3;
2131 	case UNLE:	return CC0 | CC2 | CC3;
2132 	case UNGE:	return CC0 | CC1 | CC3;
2133 	case LTGT:	return CC2 | CC1;
2134 	default:	return -1;
2135 	}
2136       break;
2137 
2138       /* Vector comparison modes.  */
2139       /* CC2 will never be set.  It however is part of the negated
2140 	 masks.  */
2141     case E_CCVIALLmode:
2142       switch (GET_CODE (code))
2143 	{
2144 	case EQ:
2145 	case GTU:
2146 	case GT:
2147 	case GE:        return CC0;
2148 	  /* The inverted modes are in fact *any* modes.  */
2149 	case NE:
2150 	case LEU:
2151 	case LE:
2152 	case LT:        return CC3 | CC1 | CC2;
2153 	default:        return -1;
2154 	}
2155 
2156     case E_CCVIANYmode:
2157       switch (GET_CODE (code))
2158 	{
2159 	case EQ:
2160 	case GTU:
2161 	case GT:
2162 	case GE:        return CC0 | CC1;
2163 	  /* The inverted modes are in fact *all* modes.  */
2164 	case NE:
2165 	case LEU:
2166 	case LE:
2167 	case LT:        return CC3 | CC2;
2168 	default:        return -1;
2169 	}
2170     case E_CCVFALLmode:
2171       switch (GET_CODE (code))
2172 	{
2173 	case EQ:
2174 	case GT:
2175 	case GE:        return CC0;
2176 	  /* The inverted modes are in fact *any* modes.  */
2177 	case NE:
2178 	case UNLE:
2179 	case UNLT:      return CC3 | CC1 | CC2;
2180 	default:        return -1;
2181 	}
2182 
2183     case E_CCVFANYmode:
2184       switch (GET_CODE (code))
2185 	{
2186 	case EQ:
2187 	case GT:
2188 	case GE:        return CC0 | CC1;
2189 	  /* The inverted modes are in fact *all* modes.  */
2190 	case NE:
2191 	case UNLE:
2192 	case UNLT:      return CC3 | CC2;
2193 	default:        return -1;
2194 	}
2195 
2196     case E_CCRAWmode:
2197       switch (GET_CODE (code))
2198 	{
2199 	case EQ:
2200 	  return INTVAL (XEXP (code, 1));
2201 	case NE:
2202 	  return (INTVAL (XEXP (code, 1))) ^ 0xf;
2203 	default:
2204 	  gcc_unreachable ();
2205 	}
2206 
2207     default:
2208       return -1;
2209     }
2210 }
2211 
2212 
2213 /* Return branch condition mask to implement a compare and branch
2214    specified by CODE.  Return -1 for invalid comparisons.  */
2215 
2216 int
s390_compare_and_branch_condition_mask(rtx code)2217 s390_compare_and_branch_condition_mask (rtx code)
2218 {
2219   const int CC0 = 1 << 3;
2220   const int CC1 = 1 << 2;
2221   const int CC2 = 1 << 1;
2222 
2223   switch (GET_CODE (code))
2224     {
2225     case EQ:
2226       return CC0;
2227     case NE:
2228       return CC1 | CC2;
2229     case LT:
2230     case LTU:
2231       return CC1;
2232     case GT:
2233     case GTU:
2234       return CC2;
2235     case LE:
2236     case LEU:
2237       return CC0 | CC1;
2238     case GE:
2239     case GEU:
2240       return CC0 | CC2;
2241     default:
2242       gcc_unreachable ();
2243     }
2244   return -1;
2245 }
2246 
2247 /* If INV is false, return assembler mnemonic string to implement
2248    a branch specified by CODE.  If INV is true, return mnemonic
2249    for the corresponding inverted branch.  */
2250 
2251 static const char *
s390_branch_condition_mnemonic(rtx code,int inv)2252 s390_branch_condition_mnemonic (rtx code, int inv)
2253 {
2254   int mask;
2255 
2256   static const char *const mnemonic[16] =
2257     {
2258       NULL, "o", "h", "nle",
2259       "l", "nhe", "lh", "ne",
2260       "e", "nlh", "he", "nl",
2261       "le", "nh", "no", NULL
2262     };
2263 
2264   if (GET_CODE (XEXP (code, 0)) == REG
2265       && REGNO (XEXP (code, 0)) == CC_REGNUM
2266       && (XEXP (code, 1) == const0_rtx
2267 	  || (GET_MODE (XEXP (code, 0)) == CCRAWmode
2268 	      && CONST_INT_P (XEXP (code, 1)))))
2269     mask = s390_branch_condition_mask (code);
2270   else
2271     mask = s390_compare_and_branch_condition_mask (code);
2272 
2273   gcc_assert (mask >= 0);
2274 
2275   if (inv)
2276     mask ^= 15;
2277 
2278   gcc_assert (mask >= 1 && mask <= 14);
2279 
2280   return mnemonic[mask];
2281 }
2282 
2283 /* Return the part of op which has a value different from def.
2284    The size of the part is determined by mode.
2285    Use this function only if you already know that op really
2286    contains such a part.  */
2287 
2288 unsigned HOST_WIDE_INT
s390_extract_part(rtx op,machine_mode mode,int def)2289 s390_extract_part (rtx op, machine_mode mode, int def)
2290 {
2291   unsigned HOST_WIDE_INT value = 0;
2292   int max_parts = HOST_BITS_PER_WIDE_INT / GET_MODE_BITSIZE (mode);
2293   int part_bits = GET_MODE_BITSIZE (mode);
2294   unsigned HOST_WIDE_INT part_mask = (HOST_WIDE_INT_1U << part_bits) - 1;
2295   int i;
2296 
2297   for (i = 0; i < max_parts; i++)
2298     {
2299       if (i == 0)
2300 	value = UINTVAL (op);
2301       else
2302 	value >>= part_bits;
2303 
2304       if ((value & part_mask) != (def & part_mask))
2305 	return value & part_mask;
2306     }
2307 
2308   gcc_unreachable ();
2309 }
2310 
2311 /* If OP is an integer constant of mode MODE with exactly one
2312    part of mode PART_MODE unequal to DEF, return the number of that
2313    part. Otherwise, return -1.  */
2314 
2315 int
s390_single_part(rtx op,machine_mode mode,machine_mode part_mode,int def)2316 s390_single_part (rtx op,
2317 		  machine_mode mode,
2318 		  machine_mode part_mode,
2319 		  int def)
2320 {
2321   unsigned HOST_WIDE_INT value = 0;
2322   int n_parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (part_mode);
2323   unsigned HOST_WIDE_INT part_mask
2324     = (HOST_WIDE_INT_1U << GET_MODE_BITSIZE (part_mode)) - 1;
2325   int i, part = -1;
2326 
2327   if (GET_CODE (op) != CONST_INT)
2328     return -1;
2329 
2330   for (i = 0; i < n_parts; i++)
2331     {
2332       if (i == 0)
2333 	value = UINTVAL (op);
2334       else
2335 	value >>= GET_MODE_BITSIZE (part_mode);
2336 
2337       if ((value & part_mask) != (def & part_mask))
2338 	{
2339 	  if (part != -1)
2340 	    return -1;
2341 	  else
2342 	    part = i;
2343 	}
2344     }
2345   return part == -1 ? -1 : n_parts - 1 - part;
2346 }
2347 
2348 /* Return true if IN contains a contiguous bitfield in the lower SIZE
2349    bits and no other bits are set in (the lower SIZE bits of) IN.
2350 
2351    PSTART and PEND can be used to obtain the start and end
2352    position (inclusive) of the bitfield relative to 64
2353    bits. *PSTART / *PEND gives the position of the first/last bit
2354    of the bitfield counting from the highest order bit starting
2355    with zero.  */
2356 
2357 bool
s390_contiguous_bitmask_nowrap_p(unsigned HOST_WIDE_INT in,int size,int * pstart,int * pend)2358 s390_contiguous_bitmask_nowrap_p (unsigned HOST_WIDE_INT in, int size,
2359 				  int *pstart, int *pend)
2360 {
2361   int start;
2362   int end = -1;
2363   int lowbit = HOST_BITS_PER_WIDE_INT - 1;
2364   int highbit = HOST_BITS_PER_WIDE_INT - size;
2365   unsigned HOST_WIDE_INT bitmask = HOST_WIDE_INT_1U;
2366 
2367   gcc_assert (!!pstart == !!pend);
2368   for (start = lowbit; start >= highbit; bitmask <<= 1, start--)
2369     if (end == -1)
2370       {
2371 	/* Look for the rightmost bit of a contiguous range of ones.  */
2372 	if (bitmask & in)
2373 	  /* Found it.  */
2374 	  end = start;
2375       }
2376     else
2377       {
2378 	/* Look for the firt zero bit after the range of ones.  */
2379 	if (! (bitmask & in))
2380 	  /* Found it.  */
2381 	  break;
2382       }
2383   /* We're one past the last one-bit.  */
2384   start++;
2385 
2386   if (end == -1)
2387     /* No one bits found.  */
2388     return false;
2389 
2390   if (start > highbit)
2391     {
2392       unsigned HOST_WIDE_INT mask;
2393 
2394       /* Calculate a mask for all bits beyond the contiguous bits.  */
2395       mask = ((~HOST_WIDE_INT_0U >> highbit)
2396 	      & (~HOST_WIDE_INT_0U << (lowbit - start + 1)));
2397       if (mask & in)
2398 	/* There are more bits set beyond the first range of one bits.  */
2399 	return false;
2400     }
2401 
2402   if (pstart)
2403     {
2404       *pstart = start;
2405       *pend = end;
2406     }
2407 
2408   return true;
2409 }
2410 
2411 /* Same as s390_contiguous_bitmask_nowrap_p but also returns true
2412    if ~IN contains a contiguous bitfield.  In that case, *END is <
2413    *START.
2414 
2415    If WRAP_P is true, a bitmask that wraps around is also tested.
2416    When a wraparoud occurs *START is greater than *END (in
2417    non-null pointers), and the uppermost (64 - SIZE) bits are thus
2418    part of the range.  If WRAP_P is false, no wraparound is
2419    tested.  */
2420 
2421 bool
s390_contiguous_bitmask_p(unsigned HOST_WIDE_INT in,bool wrap_p,int size,int * start,int * end)2422 s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, bool wrap_p,
2423 			   int size, int *start, int *end)
2424 {
2425   int bs = HOST_BITS_PER_WIDE_INT;
2426   bool b;
2427 
2428   gcc_assert (!!start == !!end);
2429   if ((in & ((~HOST_WIDE_INT_0U) >> (bs - size))) == 0)
2430     /* This cannot be expressed as a contiguous bitmask.  Exit early because
2431        the second call of s390_contiguous_bitmask_nowrap_p would accept this as
2432        a valid bitmask.  */
2433     return false;
2434   b = s390_contiguous_bitmask_nowrap_p (in, size, start, end);
2435   if (b)
2436     return true;
2437   if (! wrap_p)
2438     return false;
2439   b = s390_contiguous_bitmask_nowrap_p (~in, size, start, end);
2440   if (b && start)
2441     {
2442       int s = *start;
2443       int e = *end;
2444 
2445       gcc_assert (s >= 1);
2446       *start = ((e + 1) & (bs - 1));
2447       *end = ((s - 1 + bs) & (bs - 1));
2448     }
2449 
2450   return b;
2451 }
2452 
2453 /* Return true if OP contains the same contiguous bitfield in *all*
2454    its elements.  START and END can be used to obtain the start and
2455    end position of the bitfield.
2456 
2457    START/STOP give the position of the first/last bit of the bitfield
2458    counting from the lowest order bit starting with zero.  In order to
2459    use these values for S/390 instructions this has to be converted to
2460    "bits big endian" style.  */
2461 
2462 bool
s390_contiguous_bitmask_vector_p(rtx op,int * start,int * end)2463 s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end)
2464 {
2465   unsigned HOST_WIDE_INT mask;
2466   int size;
2467   rtx elt;
2468   bool b;
2469 
2470   gcc_assert (!!start == !!end);
2471   if (!const_vec_duplicate_p (op, &elt)
2472       || !CONST_INT_P (elt))
2473     return false;
2474 
2475   size = GET_MODE_UNIT_BITSIZE (GET_MODE (op));
2476 
2477   /* We cannot deal with V1TI/V1TF. This would require a vgmq.  */
2478   if (size > 64)
2479     return false;
2480 
2481   mask = UINTVAL (elt);
2482 
2483   b = s390_contiguous_bitmask_p (mask, true, size, start, end);
2484   if (b)
2485     {
2486       if (start)
2487 	{
2488 	  *start -= (HOST_BITS_PER_WIDE_INT - size);
2489 	  *end -= (HOST_BITS_PER_WIDE_INT - size);
2490 	}
2491       return true;
2492     }
2493   else
2494     return false;
2495 }
2496 
2497 /* Return true if C consists only of byte chunks being either 0 or
2498    0xff.  If MASK is !=NULL a byte mask is generated which is
2499    appropriate for the vector generate byte mask instruction.  */
2500 
2501 bool
s390_bytemask_vector_p(rtx op,unsigned * mask)2502 s390_bytemask_vector_p (rtx op, unsigned *mask)
2503 {
2504   int i;
2505   unsigned tmp_mask = 0;
2506   int nunit, unit_size;
2507 
2508   if (!VECTOR_MODE_P (GET_MODE (op))
2509       || GET_CODE (op) != CONST_VECTOR
2510       || !CONST_INT_P (XVECEXP (op, 0, 0)))
2511     return false;
2512 
2513   nunit = GET_MODE_NUNITS (GET_MODE (op));
2514   unit_size = GET_MODE_UNIT_SIZE (GET_MODE (op));
2515 
2516   for (i = 0; i < nunit; i++)
2517     {
2518       unsigned HOST_WIDE_INT c;
2519       int j;
2520 
2521       if (!CONST_INT_P (XVECEXP (op, 0, i)))
2522 	return false;
2523 
2524       c = UINTVAL (XVECEXP (op, 0, i));
2525       for (j = 0; j < unit_size; j++)
2526 	{
2527 	  if ((c & 0xff) != 0 && (c & 0xff) != 0xff)
2528 	    return false;
2529 	  tmp_mask |= (c & 1) << ((nunit - 1 - i) * unit_size + j);
2530 	  c = c >> BITS_PER_UNIT;
2531 	}
2532     }
2533 
2534   if (mask != NULL)
2535     *mask = tmp_mask;
2536 
2537   return true;
2538 }
2539 
2540 /* Check whether a rotate of ROTL followed by an AND of CONTIG is
2541    equivalent to a shift followed by the AND.  In particular, CONTIG
2542    should not overlap the (rotated) bit 0/bit 63 gap.  Negative values
2543    for ROTL indicate a rotate to the right.  */
2544 
2545 bool
s390_extzv_shift_ok(int bitsize,int rotl,unsigned HOST_WIDE_INT contig)2546 s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig)
2547 {
2548   int start, end;
2549   bool ok;
2550 
2551   ok = s390_contiguous_bitmask_nowrap_p (contig, bitsize, &start, &end);
2552   gcc_assert (ok);
2553 
2554   if (rotl >= 0)
2555     return (64 - end >= rotl);
2556   else
2557     {
2558       /* Translate "- rotate right" in BITSIZE mode to "rotate left" in
2559 	 DIMode.  */
2560       rotl = -rotl + (64 - bitsize);
2561       return (start >= rotl);
2562     }
2563 }
2564 
2565 /* Check whether we can (and want to) split a double-word
2566    move in mode MODE from SRC to DST into two single-word
2567    moves, moving the subword FIRST_SUBWORD first.  */
2568 
2569 bool
s390_split_ok_p(rtx dst,rtx src,machine_mode mode,int first_subword)2570 s390_split_ok_p (rtx dst, rtx src, machine_mode mode, int first_subword)
2571 {
2572   /* Floating point and vector registers cannot be split.  */
2573   if (FP_REG_P (src) || FP_REG_P (dst) || VECTOR_REG_P (src) || VECTOR_REG_P (dst))
2574     return false;
2575 
2576   /* Non-offsettable memory references cannot be split.  */
2577   if ((GET_CODE (src) == MEM && !offsettable_memref_p (src))
2578       || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst)))
2579     return false;
2580 
2581   /* Moving the first subword must not clobber a register
2582      needed to move the second subword.  */
2583   if (register_operand (dst, mode))
2584     {
2585       rtx subreg = operand_subword (dst, first_subword, 0, mode);
2586       if (reg_overlap_mentioned_p (subreg, src))
2587 	return false;
2588     }
2589 
2590   return true;
2591 }
2592 
2593 /* Return true if it can be proven that [MEM1, MEM1 + SIZE]
2594    and [MEM2, MEM2 + SIZE] do overlap and false
2595    otherwise.  */
2596 
2597 bool
s390_overlap_p(rtx mem1,rtx mem2,HOST_WIDE_INT size)2598 s390_overlap_p (rtx mem1, rtx mem2, HOST_WIDE_INT size)
2599 {
2600   rtx addr1, addr2, addr_delta;
2601   HOST_WIDE_INT delta;
2602 
2603   if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2604     return true;
2605 
2606   if (size == 0)
2607     return false;
2608 
2609   addr1 = XEXP (mem1, 0);
2610   addr2 = XEXP (mem2, 0);
2611 
2612   addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2613 
2614   /* This overlapping check is used by peepholes merging memory block operations.
2615      Overlapping operations would otherwise be recognized by the S/390 hardware
2616      and would fall back to a slower implementation. Allowing overlapping
2617      operations would lead to slow code but not to wrong code. Therefore we are
2618      somewhat optimistic if we cannot prove that the memory blocks are
2619      overlapping.
2620      That's why we return false here although this may accept operations on
2621      overlapping memory areas.  */
2622   if (!addr_delta || GET_CODE (addr_delta) != CONST_INT)
2623     return false;
2624 
2625   delta = INTVAL (addr_delta);
2626 
2627   if (delta == 0
2628       || (delta > 0 && delta < size)
2629       || (delta < 0 && -delta < size))
2630     return true;
2631 
2632   return false;
2633 }
2634 
2635 /* Check whether the address of memory reference MEM2 equals exactly
2636    the address of memory reference MEM1 plus DELTA.  Return true if
2637    we can prove this to be the case, false otherwise.  */
2638 
2639 bool
s390_offset_p(rtx mem1,rtx mem2,rtx delta)2640 s390_offset_p (rtx mem1, rtx mem2, rtx delta)
2641 {
2642   rtx addr1, addr2, addr_delta;
2643 
2644   if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2645     return false;
2646 
2647   addr1 = XEXP (mem1, 0);
2648   addr2 = XEXP (mem2, 0);
2649 
2650   addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2651   if (!addr_delta || !rtx_equal_p (addr_delta, delta))
2652     return false;
2653 
2654   return true;
2655 }
2656 
2657 /* Expand logical operator CODE in mode MODE with operands OPERANDS.  */
2658 
2659 void
s390_expand_logical_operator(enum rtx_code code,machine_mode mode,rtx * operands)2660 s390_expand_logical_operator (enum rtx_code code, machine_mode mode,
2661 			      rtx *operands)
2662 {
2663   machine_mode wmode = mode;
2664   rtx dst = operands[0];
2665   rtx src1 = operands[1];
2666   rtx src2 = operands[2];
2667   rtx op, clob, tem;
2668 
2669   /* If we cannot handle the operation directly, use a temp register.  */
2670   if (!s390_logical_operator_ok_p (operands))
2671     dst = gen_reg_rtx (mode);
2672 
2673   /* QImode and HImode patterns make sense only if we have a destination
2674      in memory.  Otherwise perform the operation in SImode.  */
2675   if ((mode == QImode || mode == HImode) && GET_CODE (dst) != MEM)
2676     wmode = SImode;
2677 
2678   /* Widen operands if required.  */
2679   if (mode != wmode)
2680     {
2681       if (GET_CODE (dst) == SUBREG
2682 	  && (tem = simplify_subreg (wmode, dst, mode, 0)) != 0)
2683 	dst = tem;
2684       else if (REG_P (dst))
2685 	dst = gen_rtx_SUBREG (wmode, dst, 0);
2686       else
2687 	dst = gen_reg_rtx (wmode);
2688 
2689       if (GET_CODE (src1) == SUBREG
2690 	  && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0)
2691 	src1 = tem;
2692       else if (GET_MODE (src1) != VOIDmode)
2693 	src1 = gen_rtx_SUBREG (wmode, force_reg (mode, src1), 0);
2694 
2695       if (GET_CODE (src2) == SUBREG
2696 	  && (tem = simplify_subreg (wmode, src2, mode, 0)) != 0)
2697 	src2 = tem;
2698       else if (GET_MODE (src2) != VOIDmode)
2699 	src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0);
2700     }
2701 
2702   /* Emit the instruction.  */
2703   op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, wmode, src1, src2));
2704   clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
2705   emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
2706 
2707   /* Fix up the destination if needed.  */
2708   if (dst != operands[0])
2709     emit_move_insn (operands[0], gen_lowpart (mode, dst));
2710 }
2711 
2712 /* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR).  */
2713 
2714 bool
s390_logical_operator_ok_p(rtx * operands)2715 s390_logical_operator_ok_p (rtx *operands)
2716 {
2717   /* If the destination operand is in memory, it needs to coincide
2718      with one of the source operands.  After reload, it has to be
2719      the first source operand.  */
2720   if (GET_CODE (operands[0]) == MEM)
2721     return rtx_equal_p (operands[0], operands[1])
2722 	   || (!reload_completed && rtx_equal_p (operands[0], operands[2]));
2723 
2724   return true;
2725 }
2726 
2727 /* Narrow logical operation CODE of memory operand MEMOP with immediate
2728    operand IMMOP to switch from SS to SI type instructions.  */
2729 
2730 void
s390_narrow_logical_operator(enum rtx_code code,rtx * memop,rtx * immop)2731 s390_narrow_logical_operator (enum rtx_code code, rtx *memop, rtx *immop)
2732 {
2733   int def = code == AND ? -1 : 0;
2734   HOST_WIDE_INT mask;
2735   int part;
2736 
2737   gcc_assert (GET_CODE (*memop) == MEM);
2738   gcc_assert (!MEM_VOLATILE_P (*memop));
2739 
2740   mask = s390_extract_part (*immop, QImode, def);
2741   part = s390_single_part (*immop, GET_MODE (*memop), QImode, def);
2742   gcc_assert (part >= 0);
2743 
2744   *memop = adjust_address (*memop, QImode, part);
2745   *immop = gen_int_mode (mask, QImode);
2746 }
2747 
2748 
2749 /* How to allocate a 'struct machine_function'.  */
2750 
2751 static struct machine_function *
s390_init_machine_status(void)2752 s390_init_machine_status (void)
2753 {
2754   return ggc_cleared_alloc<machine_function> ();
2755 }
2756 
2757 /* Map for smallest class containing reg regno.  */
2758 
2759 const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
2760 { GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,  /*  0 */
2761   ADDR_REGS,    ADDR_REGS, ADDR_REGS, ADDR_REGS,  /*  4 */
2762   ADDR_REGS,    ADDR_REGS, ADDR_REGS, ADDR_REGS,  /*  8 */
2763   ADDR_REGS,    ADDR_REGS, ADDR_REGS, ADDR_REGS,  /* 12 */
2764   FP_REGS,      FP_REGS,   FP_REGS,   FP_REGS,    /* 16 */
2765   FP_REGS,      FP_REGS,   FP_REGS,   FP_REGS,    /* 20 */
2766   FP_REGS,      FP_REGS,   FP_REGS,   FP_REGS,    /* 24 */
2767   FP_REGS,      FP_REGS,   FP_REGS,   FP_REGS,    /* 28 */
2768   ADDR_REGS,    CC_REGS,   ADDR_REGS, ADDR_REGS,  /* 32 */
2769   ACCESS_REGS,	ACCESS_REGS, VEC_REGS, VEC_REGS,  /* 36 */
2770   VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS,         /* 40 */
2771   VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS,         /* 44 */
2772   VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS,         /* 48 */
2773   VEC_REGS, VEC_REGS                              /* 52 */
2774 };
2775 
2776 /* Return attribute type of insn.  */
2777 
2778 static enum attr_type
s390_safe_attr_type(rtx_insn * insn)2779 s390_safe_attr_type (rtx_insn *insn)
2780 {
2781   if (recog_memoized (insn) >= 0)
2782     return get_attr_type (insn);
2783   else
2784     return TYPE_NONE;
2785 }
2786 
2787 /* Return attribute relative_long of insn.  */
2788 
2789 static bool
s390_safe_relative_long_p(rtx_insn * insn)2790 s390_safe_relative_long_p (rtx_insn *insn)
2791 {
2792   if (recog_memoized (insn) >= 0)
2793     return get_attr_relative_long (insn) == RELATIVE_LONG_YES;
2794   else
2795     return false;
2796 }
2797 
2798 /* Return true if DISP is a valid short displacement.  */
2799 
2800 static bool
s390_short_displacement(rtx disp)2801 s390_short_displacement (rtx disp)
2802 {
2803   /* No displacement is OK.  */
2804   if (!disp)
2805     return true;
2806 
2807   /* Without the long displacement facility we don't need to
2808      distingiush between long and short displacement.  */
2809   if (!TARGET_LONG_DISPLACEMENT)
2810     return true;
2811 
2812   /* Integer displacement in range.  */
2813   if (GET_CODE (disp) == CONST_INT)
2814     return INTVAL (disp) >= 0 && INTVAL (disp) < 4096;
2815 
2816   /* GOT offset is not OK, the GOT can be large.  */
2817   if (GET_CODE (disp) == CONST
2818       && GET_CODE (XEXP (disp, 0)) == UNSPEC
2819       && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOT
2820 	  || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF))
2821     return false;
2822 
2823   /* All other symbolic constants are literal pool references,
2824      which are OK as the literal pool must be small.  */
2825   if (GET_CODE (disp) == CONST)
2826     return true;
2827 
2828   return false;
2829 }
2830 
2831 /* Attempts to split `ref', which should be UNSPEC_LTREF, into (base + `disp').
2832    If successful, also determines the
2833    following characteristics of `ref': `is_ptr' - whether it can be an
2834    LA argument, `is_base_ptr' - whether the resulting base is a well-known
2835    base register (stack/frame pointer, etc), `is_pool_ptr` - whether it is
2836    considered a literal pool pointer for purposes of avoiding two different
2837    literal pool pointers per insn during or after reload (`B' constraint).  */
2838 static bool
s390_decompose_constant_pool_ref(rtx * ref,rtx * disp,bool * is_ptr,bool * is_base_ptr,bool * is_pool_ptr)2839 s390_decompose_constant_pool_ref (rtx *ref, rtx *disp, bool *is_ptr,
2840 				  bool *is_base_ptr, bool *is_pool_ptr)
2841 {
2842   if (!*ref)
2843     return true;
2844 
2845   if (GET_CODE (*ref) == UNSPEC)
2846     switch (XINT (*ref, 1))
2847       {
2848       case UNSPEC_LTREF:
2849 	if (!*disp)
2850 	  *disp = gen_rtx_UNSPEC (Pmode,
2851 				  gen_rtvec (1, XVECEXP (*ref, 0, 0)),
2852 				  UNSPEC_LTREL_OFFSET);
2853 	else
2854 	  return false;
2855 
2856 	*ref = XVECEXP (*ref, 0, 1);
2857 	break;
2858 
2859       default:
2860 	return false;
2861       }
2862 
2863   if (!REG_P (*ref) || GET_MODE (*ref) != Pmode)
2864     return false;
2865 
2866   if (REGNO (*ref) == STACK_POINTER_REGNUM
2867       || REGNO (*ref) == FRAME_POINTER_REGNUM
2868       || ((reload_completed || reload_in_progress)
2869 	  && frame_pointer_needed
2870 	  && REGNO (*ref) == HARD_FRAME_POINTER_REGNUM)
2871       || REGNO (*ref) == ARG_POINTER_REGNUM
2872       || (flag_pic
2873 	  && REGNO (*ref) == PIC_OFFSET_TABLE_REGNUM))
2874     *is_ptr = *is_base_ptr = true;
2875 
2876   if ((reload_completed || reload_in_progress)
2877       && *ref == cfun->machine->base_reg)
2878     *is_ptr = *is_base_ptr = *is_pool_ptr = true;
2879 
2880   return true;
2881 }
2882 
2883 /* Decompose a RTL expression ADDR for a memory address into
2884    its components, returned in OUT.
2885 
2886    Returns false if ADDR is not a valid memory address, true
2887    otherwise.  If OUT is NULL, don't return the components,
2888    but check for validity only.
2889 
2890    Note: Only addresses in canonical form are recognized.
2891    LEGITIMIZE_ADDRESS should convert non-canonical forms to the
2892    canonical form so that they will be recognized.  */
2893 
2894 static int
s390_decompose_address(rtx addr,struct s390_address * out)2895 s390_decompose_address (rtx addr, struct s390_address *out)
2896 {
2897   HOST_WIDE_INT offset = 0;
2898   rtx base = NULL_RTX;
2899   rtx indx = NULL_RTX;
2900   rtx disp = NULL_RTX;
2901   rtx orig_disp;
2902   bool pointer = false;
2903   bool base_ptr = false;
2904   bool indx_ptr = false;
2905   bool literal_pool = false;
2906 
2907   /* We may need to substitute the literal pool base register into the address
2908      below.  However, at this point we do not know which register is going to
2909      be used as base, so we substitute the arg pointer register.  This is going
2910      to be treated as holding a pointer below -- it shouldn't be used for any
2911      other purpose.  */
2912   rtx fake_pool_base = gen_rtx_REG (Pmode, ARG_POINTER_REGNUM);
2913 
2914   /* Decompose address into base + index + displacement.  */
2915 
2916   if (GET_CODE (addr) == REG || GET_CODE (addr) == UNSPEC)
2917     base = addr;
2918 
2919   else if (GET_CODE (addr) == PLUS)
2920     {
2921       rtx op0 = XEXP (addr, 0);
2922       rtx op1 = XEXP (addr, 1);
2923       enum rtx_code code0 = GET_CODE (op0);
2924       enum rtx_code code1 = GET_CODE (op1);
2925 
2926       if (code0 == REG || code0 == UNSPEC)
2927 	{
2928 	  if (code1 == REG || code1 == UNSPEC)
2929 	    {
2930 	      indx = op0;	/* index + base */
2931 	      base = op1;
2932 	    }
2933 
2934 	  else
2935 	    {
2936 	      base = op0;	/* base + displacement */
2937 	      disp = op1;
2938 	    }
2939 	}
2940 
2941       else if (code0 == PLUS)
2942 	{
2943 	  indx = XEXP (op0, 0);	/* index + base + disp */
2944 	  base = XEXP (op0, 1);
2945 	  disp = op1;
2946 	}
2947 
2948       else
2949 	{
2950 	  return false;
2951 	}
2952     }
2953 
2954   else
2955     disp = addr;		/* displacement */
2956 
2957   /* Extract integer part of displacement.  */
2958   orig_disp = disp;
2959   if (disp)
2960     {
2961       if (GET_CODE (disp) == CONST_INT)
2962 	{
2963 	  offset = INTVAL (disp);
2964 	  disp = NULL_RTX;
2965 	}
2966       else if (GET_CODE (disp) == CONST
2967 	       && GET_CODE (XEXP (disp, 0)) == PLUS
2968 	       && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
2969 	{
2970 	  offset = INTVAL (XEXP (XEXP (disp, 0), 1));
2971 	  disp = XEXP (XEXP (disp, 0), 0);
2972 	}
2973     }
2974 
2975   /* Strip off CONST here to avoid special case tests later.  */
2976   if (disp && GET_CODE (disp) == CONST)
2977     disp = XEXP (disp, 0);
2978 
2979   /* We can convert literal pool addresses to
2980      displacements by basing them off the base register.  */
2981   if (disp && GET_CODE (disp) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (disp))
2982     {
2983       if (base || indx)
2984 	return false;
2985 
2986       base = fake_pool_base, literal_pool = true;
2987 
2988       /* Mark up the displacement.  */
2989       disp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, disp),
2990 			     UNSPEC_LTREL_OFFSET);
2991     }
2992 
2993   /* Validate base register.  */
2994   if (!s390_decompose_constant_pool_ref (&base, &disp, &pointer, &base_ptr,
2995 					 &literal_pool))
2996     return false;
2997 
2998   /* Validate index register.  */
2999   if (!s390_decompose_constant_pool_ref (&indx, &disp, &pointer, &indx_ptr,
3000 					 &literal_pool))
3001     return false;
3002 
3003   /* Prefer to use pointer as base, not index.  */
3004   if (base && indx && !base_ptr
3005       && (indx_ptr || (!REG_POINTER (base) && REG_POINTER (indx))))
3006     {
3007       rtx tmp = base;
3008       base = indx;
3009       indx = tmp;
3010     }
3011 
3012   /* Validate displacement.  */
3013   if (!disp)
3014     {
3015       /* If virtual registers are involved, the displacement will change later
3016 	 anyway as the virtual registers get eliminated.  This could make a
3017 	 valid displacement invalid, but it is more likely to make an invalid
3018 	 displacement valid, because we sometimes access the register save area
3019 	 via negative offsets to one of those registers.
3020 	 Thus we don't check the displacement for validity here.  If after
3021 	 elimination the displacement turns out to be invalid after all,
3022 	 this is fixed up by reload in any case.  */
3023       /* LRA maintains always displacements up to date and we need to
3024 	 know the displacement is right during all LRA not only at the
3025 	 final elimination.  */
3026       if (lra_in_progress
3027 	  || (base != arg_pointer_rtx
3028 	      && indx != arg_pointer_rtx
3029 	      && base != return_address_pointer_rtx
3030 	      && indx != return_address_pointer_rtx
3031 	      && base != frame_pointer_rtx
3032 	      && indx != frame_pointer_rtx
3033 	      && base != virtual_stack_vars_rtx
3034 	      && indx != virtual_stack_vars_rtx))
3035 	if (!DISP_IN_RANGE (offset))
3036 	  return false;
3037     }
3038   else
3039     {
3040       /* All the special cases are pointers.  */
3041       pointer = true;
3042 
3043       /* In the small-PIC case, the linker converts @GOT
3044 	 and @GOTNTPOFF offsets to possible displacements.  */
3045       if (GET_CODE (disp) == UNSPEC
3046 	  && (XINT (disp, 1) == UNSPEC_GOT
3047 	      || XINT (disp, 1) == UNSPEC_GOTNTPOFF)
3048 	  && flag_pic == 1)
3049 	{
3050 	  ;
3051 	}
3052 
3053       /* Accept pool label offsets.  */
3054       else if (GET_CODE (disp) == UNSPEC
3055 	       && XINT (disp, 1) == UNSPEC_POOL_OFFSET)
3056 	;
3057 
3058       /* Accept literal pool references.  */
3059       else if (GET_CODE (disp) == UNSPEC
3060 	       && XINT (disp, 1) == UNSPEC_LTREL_OFFSET)
3061 	{
3062 	  /* In case CSE pulled a non literal pool reference out of
3063 	     the pool we have to reject the address.  This is
3064 	     especially important when loading the GOT pointer on non
3065 	     zarch CPUs.  In this case the literal pool contains an lt
3066 	     relative offset to the _GLOBAL_OFFSET_TABLE_ label which
3067 	     will most likely exceed the displacement.  */
3068 	  if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
3069 	      || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp, 0, 0)))
3070 	    return false;
3071 
3072 	  orig_disp = gen_rtx_CONST (Pmode, disp);
3073 	  if (offset)
3074 	    {
3075 	      /* If we have an offset, make sure it does not
3076 		 exceed the size of the constant pool entry.
3077 		 Otherwise we might generate an out-of-range
3078 		 displacement for the base register form.  */
3079 	      rtx sym = XVECEXP (disp, 0, 0);
3080 	      if (offset >= GET_MODE_SIZE (get_pool_mode (sym)))
3081 		return false;
3082 
3083 	      orig_disp = plus_constant (Pmode, orig_disp, offset);
3084 	    }
3085 	}
3086 
3087       else
3088 	return false;
3089     }
3090 
3091   if (!base && !indx)
3092     pointer = true;
3093 
3094   if (out)
3095     {
3096       out->base = base;
3097       out->indx = indx;
3098       out->disp = orig_disp;
3099       out->pointer = pointer;
3100       out->literal_pool = literal_pool;
3101     }
3102 
3103   return true;
3104 }
3105 
3106 /* Decompose a RTL expression OP for an address style operand into its
3107    components, and return the base register in BASE and the offset in
3108    OFFSET.  While OP looks like an address it is never supposed to be
3109    used as such.
3110 
3111    Return true if OP is a valid address operand, false if not.  */
3112 
3113 bool
s390_decompose_addrstyle_without_index(rtx op,rtx * base,HOST_WIDE_INT * offset)3114 s390_decompose_addrstyle_without_index (rtx op, rtx *base,
3115 					HOST_WIDE_INT *offset)
3116 {
3117   rtx off = NULL_RTX;
3118 
3119   /* We can have an integer constant, an address register,
3120      or a sum of the two.  */
3121   if (CONST_SCALAR_INT_P (op))
3122     {
3123       off = op;
3124       op = NULL_RTX;
3125     }
3126   if (op && GET_CODE (op) == PLUS && CONST_SCALAR_INT_P (XEXP (op, 1)))
3127     {
3128       off = XEXP (op, 1);
3129       op = XEXP (op, 0);
3130     }
3131   while (op && GET_CODE (op) == SUBREG)
3132     op = SUBREG_REG (op);
3133 
3134   if (op && GET_CODE (op) != REG)
3135     return false;
3136 
3137   if (offset)
3138     {
3139       if (off == NULL_RTX)
3140 	*offset = 0;
3141       else if (CONST_INT_P (off))
3142 	*offset = INTVAL (off);
3143       else if (CONST_WIDE_INT_P (off))
3144 	/* The offset will anyway be cut down to 12 bits so take just
3145 	   the lowest order chunk of the wide int.  */
3146 	*offset = CONST_WIDE_INT_ELT (off, 0);
3147       else
3148 	gcc_unreachable ();
3149     }
3150   if (base)
3151     *base = op;
3152 
3153    return true;
3154 }
3155 
3156 /*  Check that OP is a valid shift count operand.
3157     It should be of the following structure:
3158       (subreg (and (plus (reg imm_op)) 2^k-1) 7)
3159     where subreg, and and plus are optional.
3160 
3161     If IMPLICIT_MASK is > 0 and OP contains and
3162       (AND ... immediate)
3163     it is checked whether IMPLICIT_MASK and the immediate match.
3164     Otherwise, no checking is performed.
3165   */
3166 bool
s390_valid_shift_count(rtx op,HOST_WIDE_INT implicit_mask)3167 s390_valid_shift_count (rtx op, HOST_WIDE_INT implicit_mask)
3168 {
3169   /* Strip subreg.  */
3170   while (GET_CODE (op) == SUBREG && subreg_lowpart_p (op))
3171     op = XEXP (op, 0);
3172 
3173   /* Check for an and with proper constant.  */
3174   if (GET_CODE (op) == AND)
3175   {
3176     rtx op1 = XEXP (op, 0);
3177     rtx imm = XEXP (op, 1);
3178 
3179     if (GET_CODE (op1) == SUBREG && subreg_lowpart_p (op1))
3180       op1 = XEXP (op1, 0);
3181 
3182     if (!(register_operand (op1, GET_MODE (op1)) || GET_CODE (op1) == PLUS))
3183       return false;
3184 
3185     if (!immediate_operand (imm, GET_MODE (imm)))
3186       return false;
3187 
3188     HOST_WIDE_INT val = INTVAL (imm);
3189     if (implicit_mask > 0
3190 	&& (val & implicit_mask) != implicit_mask)
3191       return false;
3192 
3193     op = op1;
3194   }
3195 
3196   /* Check the rest.  */
3197   return s390_decompose_addrstyle_without_index (op, NULL, NULL);
3198 }
3199 
3200 /* Return true if CODE is a valid address without index.  */
3201 
3202 bool
s390_legitimate_address_without_index_p(rtx op)3203 s390_legitimate_address_without_index_p (rtx op)
3204 {
3205   struct s390_address addr;
3206 
3207   if (!s390_decompose_address (XEXP (op, 0), &addr))
3208     return false;
3209   if (addr.indx)
3210     return false;
3211 
3212   return true;
3213 }
3214 
3215 
3216 /* Return TRUE if ADDR is an operand valid for a load/store relative
3217    instruction.  Be aware that the alignment of the operand needs to
3218    be checked separately.
3219    Valid addresses are single references or a sum of a reference and a
3220    constant integer. Return these parts in SYMREF and ADDEND.  You can
3221    pass NULL in REF and/or ADDEND if you are not interested in these
3222    values.  */
3223 
3224 static bool
s390_loadrelative_operand_p(rtx addr,rtx * symref,HOST_WIDE_INT * addend)3225 s390_loadrelative_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
3226 {
3227   HOST_WIDE_INT tmpaddend = 0;
3228 
3229   if (GET_CODE (addr) == CONST)
3230     addr = XEXP (addr, 0);
3231 
3232   if (GET_CODE (addr) == PLUS)
3233     {
3234       if (!CONST_INT_P (XEXP (addr, 1)))
3235 	return false;
3236 
3237       tmpaddend = INTVAL (XEXP (addr, 1));
3238       addr = XEXP (addr, 0);
3239     }
3240 
3241   if (GET_CODE (addr) == SYMBOL_REF
3242       || (GET_CODE (addr) == UNSPEC
3243 	  && (XINT (addr, 1) == UNSPEC_GOTENT
3244 	      || XINT (addr, 1) == UNSPEC_PLT)))
3245     {
3246       if (symref)
3247 	*symref = addr;
3248       if (addend)
3249 	*addend = tmpaddend;
3250 
3251       return true;
3252     }
3253   return false;
3254 }
3255 
3256 /* Return true if the address in OP is valid for constraint letter C
3257    if wrapped in a MEM rtx.  Set LIT_POOL_OK to true if it literal
3258    pool MEMs should be accepted.  Only the Q, R, S, T constraint
3259    letters are allowed for C.  */
3260 
3261 static int
s390_check_qrst_address(char c,rtx op,bool lit_pool_ok)3262 s390_check_qrst_address (char c, rtx op, bool lit_pool_ok)
3263 {
3264   rtx symref;
3265   struct s390_address addr;
3266   bool decomposed = false;
3267 
3268   if (!address_operand (op, GET_MODE (op)))
3269     return 0;
3270 
3271   /* This check makes sure that no symbolic address (except literal
3272      pool references) are accepted by the R or T constraints.  */
3273   if (s390_loadrelative_operand_p (op, &symref, NULL)
3274       && (!lit_pool_ok
3275           || !SYMBOL_REF_P (symref)
3276           || !CONSTANT_POOL_ADDRESS_P (symref)))
3277     return 0;
3278 
3279   /* Ensure literal pool references are only accepted if LIT_POOL_OK.  */
3280   if (!lit_pool_ok)
3281     {
3282       if (!s390_decompose_address (op, &addr))
3283 	return 0;
3284       if (addr.literal_pool)
3285 	return 0;
3286       decomposed = true;
3287     }
3288 
3289   /* With reload, we sometimes get intermediate address forms that are
3290      actually invalid as-is, but we need to accept them in the most
3291      generic cases below ('R' or 'T'), since reload will in fact fix
3292      them up.  LRA behaves differently here; we never see such forms,
3293      but on the other hand, we need to strictly reject every invalid
3294      address form.  After both reload and LRA invalid address forms
3295      must be rejected, because nothing will fix them up later.  Perform
3296      this check right up front.  */
3297   if (lra_in_progress || reload_completed)
3298     {
3299       if (!decomposed && !s390_decompose_address (op, &addr))
3300 	return 0;
3301       decomposed = true;
3302     }
3303 
3304   switch (c)
3305     {
3306     case 'Q': /* no index short displacement */
3307       if (!decomposed && !s390_decompose_address (op, &addr))
3308 	return 0;
3309       if (addr.indx)
3310 	return 0;
3311       if (!s390_short_displacement (addr.disp))
3312 	return 0;
3313       break;
3314 
3315     case 'R': /* with index short displacement */
3316       if (TARGET_LONG_DISPLACEMENT)
3317 	{
3318 	  if (!decomposed && !s390_decompose_address (op, &addr))
3319 	    return 0;
3320 	  if (!s390_short_displacement (addr.disp))
3321 	    return 0;
3322 	}
3323       /* Any invalid address here will be fixed up by reload,
3324 	 so accept it for the most generic constraint.  */
3325       break;
3326 
3327     case 'S': /* no index long displacement */
3328       if (!decomposed && !s390_decompose_address (op, &addr))
3329 	return 0;
3330       if (addr.indx)
3331 	return 0;
3332       break;
3333 
3334     case 'T': /* with index long displacement */
3335       /* Any invalid address here will be fixed up by reload,
3336 	 so accept it for the most generic constraint.  */
3337       break;
3338 
3339     default:
3340       return 0;
3341     }
3342   return 1;
3343 }
3344 
3345 
3346 /* Evaluates constraint strings described by the regular expression
3347    ([A|B|Z](Q|R|S|T))|Y and returns 1 if OP is a valid operand for
3348    the constraint given in STR, or 0 else.  */
3349 
3350 int
s390_mem_constraint(const char * str,rtx op)3351 s390_mem_constraint (const char *str, rtx op)
3352 {
3353   char c = str[0];
3354 
3355   switch (c)
3356     {
3357     case 'A':
3358       /* Check for offsettable variants of memory constraints.  */
3359       if (!MEM_P (op) || MEM_VOLATILE_P (op))
3360 	return 0;
3361       if ((reload_completed || reload_in_progress)
3362 	  ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op))
3363 	return 0;
3364       return s390_check_qrst_address (str[1], XEXP (op, 0), true);
3365     case 'B':
3366       /* Check for non-literal-pool variants of memory constraints.  */
3367       if (!MEM_P (op))
3368 	return 0;
3369       return s390_check_qrst_address (str[1], XEXP (op, 0), false);
3370     case 'Q':
3371     case 'R':
3372     case 'S':
3373     case 'T':
3374       if (GET_CODE (op) != MEM)
3375 	return 0;
3376       return s390_check_qrst_address (c, XEXP (op, 0), true);
3377     case 'Y':
3378       /* Simply check for the basic form of a shift count.  Reload will
3379 	 take care of making sure we have a proper base register.  */
3380       if (!s390_decompose_addrstyle_without_index (op, NULL, NULL))
3381 	return 0;
3382       break;
3383     case 'Z':
3384       return s390_check_qrst_address (str[1], op, true);
3385     default:
3386       return 0;
3387     }
3388   return 1;
3389 }
3390 
3391 
3392 /* Evaluates constraint strings starting with letter O.  Input
3393    parameter C is the second letter following the "O" in the constraint
3394    string. Returns 1 if VALUE meets the respective constraint and 0
3395    otherwise.  */
3396 
3397 int
s390_O_constraint_str(const char c,HOST_WIDE_INT value)3398 s390_O_constraint_str (const char c, HOST_WIDE_INT value)
3399 {
3400   if (!TARGET_EXTIMM)
3401     return 0;
3402 
3403   switch (c)
3404     {
3405     case 's':
3406       return trunc_int_for_mode (value, SImode) == value;
3407 
3408     case 'p':
3409       return value == 0
3410 	|| s390_single_part (GEN_INT (value), DImode, SImode, 0) == 1;
3411 
3412     case 'n':
3413       return s390_single_part (GEN_INT (value - 1), DImode, SImode, -1) == 1;
3414 
3415     default:
3416       gcc_unreachable ();
3417     }
3418 }
3419 
3420 
3421 /* Evaluates constraint strings starting with letter N.  Parameter STR
3422    contains the letters following letter "N" in the constraint string.
3423    Returns true if VALUE matches the constraint.  */
3424 
3425 int
s390_N_constraint_str(const char * str,HOST_WIDE_INT value)3426 s390_N_constraint_str (const char *str, HOST_WIDE_INT value)
3427 {
3428   machine_mode mode, part_mode;
3429   int def;
3430   int part, part_goal;
3431 
3432 
3433   if (str[0] == 'x')
3434     part_goal = -1;
3435   else
3436     part_goal = str[0] - '0';
3437 
3438   switch (str[1])
3439     {
3440     case 'Q':
3441       part_mode = QImode;
3442       break;
3443     case 'H':
3444       part_mode = HImode;
3445       break;
3446     case 'S':
3447       part_mode = SImode;
3448       break;
3449     default:
3450       return 0;
3451     }
3452 
3453   switch (str[2])
3454     {
3455     case 'H':
3456       mode = HImode;
3457       break;
3458     case 'S':
3459       mode = SImode;
3460       break;
3461     case 'D':
3462       mode = DImode;
3463       break;
3464     default:
3465       return 0;
3466     }
3467 
3468   switch (str[3])
3469     {
3470     case '0':
3471       def = 0;
3472       break;
3473     case 'F':
3474       def = -1;
3475       break;
3476     default:
3477       return 0;
3478     }
3479 
3480   if (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (part_mode))
3481     return 0;
3482 
3483   part = s390_single_part (GEN_INT (value), mode, part_mode, def);
3484   if (part < 0)
3485     return 0;
3486   if (part_goal != -1 && part_goal != part)
3487     return 0;
3488 
3489   return 1;
3490 }
3491 
3492 
3493 /* Returns true if the input parameter VALUE is a float zero.  */
3494 
3495 int
s390_float_const_zero_p(rtx value)3496 s390_float_const_zero_p (rtx value)
3497 {
3498   return (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT
3499 	  && value == CONST0_RTX (GET_MODE (value)));
3500 }
3501 
3502 /* Implement TARGET_REGISTER_MOVE_COST.  */
3503 
3504 static int
s390_register_move_cost(machine_mode mode,reg_class_t from,reg_class_t to)3505 s390_register_move_cost (machine_mode mode,
3506 			 reg_class_t from, reg_class_t to)
3507 {
3508   /* On s390, copy between fprs and gprs is expensive.  */
3509 
3510   /* It becomes somewhat faster having ldgr/lgdr.  */
3511   if (TARGET_Z10 && GET_MODE_SIZE (mode) == 8)
3512     {
3513       /* ldgr is single cycle. */
3514       if (reg_classes_intersect_p (from, GENERAL_REGS)
3515 	  && reg_classes_intersect_p (to, FP_REGS))
3516 	return 1;
3517       /* lgdr needs 3 cycles. */
3518       if (reg_classes_intersect_p (to, GENERAL_REGS)
3519 	  && reg_classes_intersect_p (from, FP_REGS))
3520 	return 3;
3521     }
3522 
3523   /* Otherwise copying is done via memory.  */
3524   if ((reg_classes_intersect_p (from, GENERAL_REGS)
3525        && reg_classes_intersect_p (to, FP_REGS))
3526       || (reg_classes_intersect_p (from, FP_REGS)
3527 	  && reg_classes_intersect_p (to, GENERAL_REGS)))
3528     return 10;
3529 
3530   /* We usually do not want to copy via CC.  */
3531   if (reg_classes_intersect_p (from, CC_REGS)
3532        || reg_classes_intersect_p (to, CC_REGS))
3533     return 5;
3534 
3535   return 1;
3536 }
3537 
3538 /* Implement TARGET_MEMORY_MOVE_COST.  */
3539 
3540 static int
s390_memory_move_cost(machine_mode mode ATTRIBUTE_UNUSED,reg_class_t rclass ATTRIBUTE_UNUSED,bool in ATTRIBUTE_UNUSED)3541 s390_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
3542 		       reg_class_t rclass ATTRIBUTE_UNUSED,
3543 		       bool in ATTRIBUTE_UNUSED)
3544 {
3545   return 2;
3546 }
3547 
3548 /* Compute a (partial) cost for rtx X.  Return true if the complete
3549    cost has been computed, and false if subexpressions should be
3550    scanned.  In either case, *TOTAL contains the cost result.  The
3551    initial value of *TOTAL is the default value computed by
3552    rtx_cost.  It may be left unmodified.  OUTER_CODE contains the
3553    code of the superexpression of x.  */
3554 
3555 static bool
s390_rtx_costs(rtx x,machine_mode mode,int outer_code,int opno ATTRIBUTE_UNUSED,int * total,bool speed ATTRIBUTE_UNUSED)3556 s390_rtx_costs (rtx x, machine_mode mode, int outer_code,
3557 		int opno ATTRIBUTE_UNUSED,
3558 		int *total, bool speed ATTRIBUTE_UNUSED)
3559 {
3560   int code = GET_CODE (x);
3561   switch (code)
3562     {
3563     case CONST:
3564     case CONST_INT:
3565     case LABEL_REF:
3566     case SYMBOL_REF:
3567     case CONST_DOUBLE:
3568     case CONST_WIDE_INT:
3569     case MEM:
3570       *total = 0;
3571       return true;
3572 
3573     case SET:
3574       {
3575 	/* Without this a conditional move instruction would be
3576 	   accounted as 3 * COSTS_N_INSNS (set, if_then_else,
3577 	   comparison operator).  That's a bit pessimistic.  */
3578 
3579 	if (!TARGET_Z196 || GET_CODE (SET_SRC (x)) != IF_THEN_ELSE)
3580 	  return false;
3581 
3582 	rtx cond = XEXP (SET_SRC (x), 0);
3583 
3584 	if (!CC_REG_P (XEXP (cond, 0)) || !CONST_INT_P (XEXP (cond, 1)))
3585 	  return false;
3586 
3587 	/* It is going to be a load/store on condition.  Make it
3588 	   slightly more expensive than a normal load.  */
3589 	*total = COSTS_N_INSNS (1) + 1;
3590 
3591 	rtx dst = SET_DEST (x);
3592 	rtx then = XEXP (SET_SRC (x), 1);
3593 	rtx els = XEXP (SET_SRC (x), 2);
3594 
3595 	/* It is a real IF-THEN-ELSE.  An additional move will be
3596 	   needed to implement that.  */
3597 	if (!TARGET_Z15
3598 	    && reload_completed
3599 	    && !rtx_equal_p (dst, then)
3600 	    && !rtx_equal_p (dst, els))
3601 	  *total += COSTS_N_INSNS (1) / 2;
3602 
3603 	/* A minor penalty for constants we cannot directly handle.  */
3604 	if ((CONST_INT_P (then) || CONST_INT_P (els))
3605 	    && (!TARGET_Z13 || MEM_P (dst)
3606 		|| (CONST_INT_P (then) && !satisfies_constraint_K (then))
3607 		|| (CONST_INT_P (els) && !satisfies_constraint_K (els))))
3608 	  *total += COSTS_N_INSNS (1) / 2;
3609 
3610 	/* A store on condition can only handle register src operands.  */
3611 	if (MEM_P (dst) && (!REG_P (then) || !REG_P (els)))
3612 	  *total += COSTS_N_INSNS (1) / 2;
3613 
3614 	return true;
3615       }
3616     case IOR:
3617 
3618       /* nnrk, nngrk */
3619       if (TARGET_Z15
3620 	  && (mode == SImode || mode == DImode)
3621 	  && GET_CODE (XEXP (x, 0)) == NOT
3622 	  && GET_CODE (XEXP (x, 1)) == NOT)
3623 	{
3624 	  *total = COSTS_N_INSNS (1);
3625 	  if (!REG_P (XEXP (XEXP (x, 0), 0)))
3626 	    *total += 1;
3627 	  if (!REG_P (XEXP (XEXP (x, 1), 0)))
3628 	    *total += 1;
3629 	  return true;
3630 	}
3631 
3632       /* risbg */
3633       if (GET_CODE (XEXP (x, 0)) == AND
3634 	  && GET_CODE (XEXP (x, 1)) == ASHIFT
3635 	  && REG_P (XEXP (XEXP (x, 0), 0))
3636 	  && REG_P (XEXP (XEXP (x, 1), 0))
3637 	  && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3638 	  && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3639 	  && (UINTVAL (XEXP (XEXP (x, 0), 1)) ==
3640 	      (HOST_WIDE_INT_1U << UINTVAL (XEXP (XEXP (x, 1), 1))) - 1))
3641 	{
3642 	  *total = COSTS_N_INSNS (2);
3643 	  return true;
3644 	}
3645 
3646       /* ~AND on a 128 bit mode.  This can be done using a vector
3647 	 instruction.  */
3648       if (TARGET_VXE
3649 	  && GET_CODE (XEXP (x, 0)) == NOT
3650 	  && GET_CODE (XEXP (x, 1)) == NOT
3651 	  && REG_P (XEXP (XEXP (x, 0), 0))
3652 	  && REG_P (XEXP (XEXP (x, 1), 0))
3653 	  && GET_MODE_SIZE (GET_MODE (XEXP (XEXP (x, 0), 0))) == 16
3654 	  && s390_hard_regno_mode_ok (VR0_REGNUM,
3655 				      GET_MODE (XEXP (XEXP (x, 0), 0))))
3656 	{
3657 	  *total = COSTS_N_INSNS (1);
3658 	  return true;
3659 	}
3660 
3661       *total = COSTS_N_INSNS (1);
3662       return false;
3663 
3664     case AND:
3665       /* nork, nogrk */
3666       if (TARGET_Z15
3667 	  && (mode == SImode || mode == DImode)
3668 	  && GET_CODE (XEXP (x, 0)) == NOT
3669 	  && GET_CODE (XEXP (x, 1)) == NOT)
3670 	{
3671 	  *total = COSTS_N_INSNS (1);
3672 	  if (!REG_P (XEXP (XEXP (x, 0), 0)))
3673 	    *total += 1;
3674 	  if (!REG_P (XEXP (XEXP (x, 1), 0)))
3675 	    *total += 1;
3676 	  return true;
3677 	}
3678       /* fallthrough */
3679     case ASHIFT:
3680     case ASHIFTRT:
3681     case LSHIFTRT:
3682     case ROTATE:
3683     case ROTATERT:
3684     case XOR:
3685     case NEG:
3686     case NOT:
3687     case PLUS:
3688     case MINUS:
3689       *total = COSTS_N_INSNS (1);
3690       return false;
3691 
3692     case MULT:
3693       switch (mode)
3694 	{
3695 	case E_SImode:
3696 	  {
3697 	    rtx left = XEXP (x, 0);
3698 	    rtx right = XEXP (x, 1);
3699 	    if (GET_CODE (right) == CONST_INT
3700 		&& CONST_OK_FOR_K (INTVAL (right)))
3701 	      *total = s390_cost->mhi;
3702 	    else if (GET_CODE (left) == SIGN_EXTEND)
3703 	      *total = s390_cost->mh;
3704 	    else
3705 	      *total = s390_cost->ms;  /* msr, ms, msy */
3706 	    break;
3707 	  }
3708 	case E_DImode:
3709 	  {
3710 	    rtx left = XEXP (x, 0);
3711 	    rtx right = XEXP (x, 1);
3712 	    if (TARGET_ZARCH)
3713 	      {
3714 		if (GET_CODE (right) == CONST_INT
3715 		    && CONST_OK_FOR_K (INTVAL (right)))
3716 		  *total = s390_cost->mghi;
3717 		else if (GET_CODE (left) == SIGN_EXTEND)
3718 		  *total = s390_cost->msgf;
3719 		else
3720 		  *total = s390_cost->msg;  /* msgr, msg */
3721 	      }
3722 	    else /* TARGET_31BIT */
3723 	      {
3724 		if (GET_CODE (left) == SIGN_EXTEND
3725 		    && GET_CODE (right) == SIGN_EXTEND)
3726 		  /* mulsidi case: mr, m */
3727 		  *total = s390_cost->m;
3728 		else if (GET_CODE (left) == ZERO_EXTEND
3729 			 && GET_CODE (right) == ZERO_EXTEND)
3730 		  /* umulsidi case: ml, mlr */
3731 		  *total = s390_cost->ml;
3732 		else
3733 		  /* Complex calculation is required.  */
3734 		  *total = COSTS_N_INSNS (40);
3735 	      }
3736 	    break;
3737 	  }
3738 	case E_SFmode:
3739 	case E_DFmode:
3740 	  *total = s390_cost->mult_df;
3741 	  break;
3742 	case E_TFmode:
3743 	  *total = s390_cost->mxbr;
3744 	  break;
3745 	default:
3746 	  return false;
3747 	}
3748       return false;
3749 
3750     case FMA:
3751       switch (mode)
3752 	{
3753 	case E_DFmode:
3754 	  *total = s390_cost->madbr;
3755 	  break;
3756 	case E_SFmode:
3757 	  *total = s390_cost->maebr;
3758 	  break;
3759 	default:
3760 	  return false;
3761 	}
3762       /* Negate in the third argument is free: FMSUB.  */
3763       if (GET_CODE (XEXP (x, 2)) == NEG)
3764 	{
3765 	  *total += (rtx_cost (XEXP (x, 0), mode, FMA, 0, speed)
3766 		     + rtx_cost (XEXP (x, 1), mode, FMA, 1, speed)
3767 		     + rtx_cost (XEXP (XEXP (x, 2), 0), mode, FMA, 2, speed));
3768 	  return true;
3769 	}
3770       return false;
3771 
3772     case UDIV:
3773     case UMOD:
3774       if (mode == TImode)	       /* 128 bit division */
3775 	*total = s390_cost->dlgr;
3776       else if (mode == DImode)
3777 	{
3778 	  rtx right = XEXP (x, 1);
3779 	  if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3780 	    *total = s390_cost->dlr;
3781 	  else				       /* 64 by 64 bit division */
3782 	    *total = s390_cost->dlgr;
3783 	}
3784       else if (mode == SImode)         /* 32 bit division */
3785 	*total = s390_cost->dlr;
3786       return false;
3787 
3788     case DIV:
3789     case MOD:
3790       if (mode == DImode)
3791 	{
3792 	  rtx right = XEXP (x, 1);
3793 	  if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3794 	    if (TARGET_ZARCH)
3795 	      *total = s390_cost->dsgfr;
3796 	    else
3797 	      *total = s390_cost->dr;
3798 	  else				       /* 64 by 64 bit division */
3799 	    *total = s390_cost->dsgr;
3800 	}
3801       else if (mode == SImode)         /* 32 bit division */
3802 	*total = s390_cost->dlr;
3803       else if (mode == SFmode)
3804 	{
3805 	  *total = s390_cost->debr;
3806 	}
3807       else if (mode == DFmode)
3808 	{
3809 	  *total = s390_cost->ddbr;
3810 	}
3811       else if (mode == TFmode)
3812 	{
3813 	  *total = s390_cost->dxbr;
3814 	}
3815       return false;
3816 
3817     case SQRT:
3818       if (mode == SFmode)
3819 	*total = s390_cost->sqebr;
3820       else if (mode == DFmode)
3821 	*total = s390_cost->sqdbr;
3822       else /* TFmode */
3823 	*total = s390_cost->sqxbr;
3824       return false;
3825 
3826     case SIGN_EXTEND:
3827     case ZERO_EXTEND:
3828       if (outer_code == MULT || outer_code == DIV || outer_code == MOD
3829 	  || outer_code == PLUS || outer_code == MINUS
3830 	  || outer_code == COMPARE)
3831 	*total = 0;
3832       return false;
3833 
3834     case COMPARE:
3835       *total = COSTS_N_INSNS (1);
3836 
3837       /* nxrk, nxgrk ~(a^b)==0 */
3838       if (TARGET_Z15
3839 	  && GET_CODE (XEXP (x, 0)) == NOT
3840 	  && XEXP (x, 1) == const0_rtx
3841 	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == XOR
3842 	  && (GET_MODE (XEXP (x, 0)) == SImode || GET_MODE (XEXP (x, 0)) == DImode)
3843 	  && mode == CCZmode)
3844 	{
3845 	  if (!REG_P (XEXP (XEXP (XEXP (x, 0), 0), 0)))
3846 	    *total += 1;
3847 	  if (!REG_P (XEXP (XEXP (XEXP (x, 0), 0), 1)))
3848 	    *total += 1;
3849 	  return true;
3850 	}
3851 
3852       /* nnrk, nngrk, nork, nogrk */
3853       if (TARGET_Z15
3854 	  && (GET_CODE (XEXP (x, 0)) == AND || GET_CODE (XEXP (x, 0)) == IOR)
3855 	  && XEXP (x, 1) == const0_rtx
3856 	  && (GET_MODE (XEXP (x, 0)) == SImode || GET_MODE (XEXP (x, 0)) == DImode)
3857 	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == NOT
3858 	  && GET_CODE (XEXP (XEXP (x, 0), 1)) == NOT
3859 	  && mode == CCZmode)
3860 	{
3861 	  if (!REG_P (XEXP (XEXP (XEXP (x, 0), 0), 0)))
3862 	    *total += 1;
3863 	  if (!REG_P (XEXP (XEXP (XEXP (x, 0), 1), 0)))
3864 	    *total += 1;
3865 	  return true;
3866 	}
3867 
3868       if (GET_CODE (XEXP (x, 0)) == AND
3869 	  && GET_CODE (XEXP (x, 1)) == CONST_INT
3870 	  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
3871 	{
3872 	  rtx op0 = XEXP (XEXP (x, 0), 0);
3873 	  rtx op1 = XEXP (XEXP (x, 0), 1);
3874 	  rtx op2 = XEXP (x, 1);
3875 
3876 	  if (memory_operand (op0, GET_MODE (op0))
3877 	      && s390_tm_ccmode (op1, op2, 0) != VOIDmode)
3878 	    return true;
3879 	  if (register_operand (op0, GET_MODE (op0))
3880 	      && s390_tm_ccmode (op1, op2, 1) != VOIDmode)
3881 	    return true;
3882 	}
3883       return false;
3884 
3885     default:
3886       return false;
3887     }
3888 }
3889 
3890 /* Return the cost of an address rtx ADDR.  */
3891 
3892 static int
s390_address_cost(rtx addr,machine_mode mode ATTRIBUTE_UNUSED,addr_space_t as ATTRIBUTE_UNUSED,bool speed ATTRIBUTE_UNUSED)3893 s390_address_cost (rtx addr, machine_mode mode ATTRIBUTE_UNUSED,
3894 		   addr_space_t as ATTRIBUTE_UNUSED,
3895 		   bool speed ATTRIBUTE_UNUSED)
3896 {
3897   struct s390_address ad;
3898   if (!s390_decompose_address (addr, &ad))
3899     return 1000;
3900 
3901   return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
3902 }
3903 
3904 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
3905 static int
s390_builtin_vectorization_cost(enum vect_cost_for_stmt type_of_cost,tree vectype,int misalign ATTRIBUTE_UNUSED)3906 s390_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
3907 				 tree vectype,
3908 				 int misalign ATTRIBUTE_UNUSED)
3909 {
3910   switch (type_of_cost)
3911     {
3912       case scalar_stmt:
3913       case scalar_load:
3914       case scalar_store:
3915       case vector_stmt:
3916       case vector_load:
3917       case vector_store:
3918       case vector_gather_load:
3919       case vector_scatter_store:
3920       case vec_to_scalar:
3921       case scalar_to_vec:
3922       case cond_branch_not_taken:
3923       case vec_perm:
3924       case vec_promote_demote:
3925       case unaligned_load:
3926       case unaligned_store:
3927 	return 1;
3928 
3929       case cond_branch_taken:
3930 	return 3;
3931 
3932       case vec_construct:
3933 	return TYPE_VECTOR_SUBPARTS (vectype) - 1;
3934 
3935       default:
3936 	gcc_unreachable ();
3937     }
3938 }
3939 
3940 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
3941    otherwise return 0.  */
3942 
3943 int
tls_symbolic_operand(rtx op)3944 tls_symbolic_operand (rtx op)
3945 {
3946   if (GET_CODE (op) != SYMBOL_REF)
3947     return 0;
3948   return SYMBOL_REF_TLS_MODEL (op);
3949 }
3950 
3951 /* Split DImode access register reference REG (on 64-bit) into its constituent
3952    low and high parts, and store them into LO and HI.  Note that gen_lowpart/
3953    gen_highpart cannot be used as they assume all registers are word-sized,
3954    while our access registers have only half that size.  */
3955 
3956 void
s390_split_access_reg(rtx reg,rtx * lo,rtx * hi)3957 s390_split_access_reg (rtx reg, rtx *lo, rtx *hi)
3958 {
3959   gcc_assert (TARGET_64BIT);
3960   gcc_assert (ACCESS_REG_P (reg));
3961   gcc_assert (GET_MODE (reg) == DImode);
3962   gcc_assert (!(REGNO (reg) & 1));
3963 
3964   *lo = gen_rtx_REG (SImode, REGNO (reg) + 1);
3965   *hi = gen_rtx_REG (SImode, REGNO (reg));
3966 }
3967 
3968 /* Return true if OP contains a symbol reference */
3969 
3970 bool
symbolic_reference_mentioned_p(rtx op)3971 symbolic_reference_mentioned_p (rtx op)
3972 {
3973   const char *fmt;
3974   int i;
3975 
3976   if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3977     return 1;
3978 
3979   fmt = GET_RTX_FORMAT (GET_CODE (op));
3980   for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3981     {
3982       if (fmt[i] == 'E')
3983 	{
3984 	  int j;
3985 
3986 	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3987 	    if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3988 	      return 1;
3989 	}
3990 
3991       else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3992 	return 1;
3993     }
3994 
3995   return 0;
3996 }
3997 
3998 /* Return true if OP contains a reference to a thread-local symbol.  */
3999 
4000 bool
tls_symbolic_reference_mentioned_p(rtx op)4001 tls_symbolic_reference_mentioned_p (rtx op)
4002 {
4003   const char *fmt;
4004   int i;
4005 
4006   if (GET_CODE (op) == SYMBOL_REF)
4007     return tls_symbolic_operand (op);
4008 
4009   fmt = GET_RTX_FORMAT (GET_CODE (op));
4010   for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4011     {
4012       if (fmt[i] == 'E')
4013 	{
4014 	  int j;
4015 
4016 	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4017 	    if (tls_symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4018 	      return true;
4019 	}
4020 
4021       else if (fmt[i] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op, i)))
4022 	return true;
4023     }
4024 
4025   return false;
4026 }
4027 
4028 
4029 /* Return true if OP is a legitimate general operand when
4030    generating PIC code.  It is given that flag_pic is on
4031    and that OP satisfies CONSTANT_P.  */
4032 
4033 int
legitimate_pic_operand_p(rtx op)4034 legitimate_pic_operand_p (rtx op)
4035 {
4036   /* Accept all non-symbolic constants.  */
4037   if (!SYMBOLIC_CONST (op))
4038     return 1;
4039 
4040   /* Accept addresses that can be expressed relative to (pc).  */
4041   if (larl_operand (op, VOIDmode))
4042     return 1;
4043 
4044   /* Reject everything else; must be handled
4045      via emit_symbolic_move.  */
4046   return 0;
4047 }
4048 
4049 /* Returns true if the constant value OP is a legitimate general operand.
4050    It is given that OP satisfies CONSTANT_P.  */
4051 
4052 static bool
s390_legitimate_constant_p(machine_mode mode,rtx op)4053 s390_legitimate_constant_p (machine_mode mode, rtx op)
4054 {
4055   if (TARGET_VX && VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR)
4056     {
4057       if (GET_MODE_SIZE (mode) != 16)
4058 	return 0;
4059 
4060       if (!satisfies_constraint_j00 (op)
4061 	  && !satisfies_constraint_jm1 (op)
4062 	  && !satisfies_constraint_jKK (op)
4063 	  && !satisfies_constraint_jxx (op)
4064 	  && !satisfies_constraint_jyy (op))
4065 	return 0;
4066     }
4067 
4068   /* Accept all non-symbolic constants.  */
4069   if (!SYMBOLIC_CONST (op))
4070     return 1;
4071 
4072   /* Accept immediate LARL operands.  */
4073   if (larl_operand (op, mode))
4074     return 1;
4075 
4076   /* Thread-local symbols are never legal constants.  This is
4077      so that emit_call knows that computing such addresses
4078      might require a function call.  */
4079   if (TLS_SYMBOLIC_CONST (op))
4080     return 0;
4081 
4082   /* In the PIC case, symbolic constants must *not* be
4083      forced into the literal pool.  We accept them here,
4084      so that they will be handled by emit_symbolic_move.  */
4085   if (flag_pic)
4086     return 1;
4087 
4088   /* All remaining non-PIC symbolic constants are
4089      forced into the literal pool.  */
4090   return 0;
4091 }
4092 
4093 /* Determine if it's legal to put X into the constant pool.  This
4094    is not possible if X contains the address of a symbol that is
4095    not constant (TLS) or not known at final link time (PIC).  */
4096 
4097 static bool
s390_cannot_force_const_mem(machine_mode mode,rtx x)4098 s390_cannot_force_const_mem (machine_mode mode, rtx x)
4099 {
4100   switch (GET_CODE (x))
4101     {
4102     case CONST_INT:
4103     case CONST_DOUBLE:
4104     case CONST_WIDE_INT:
4105     case CONST_VECTOR:
4106       /* Accept all non-symbolic constants.  */
4107       return false;
4108 
4109     case LABEL_REF:
4110       /* Labels are OK iff we are non-PIC.  */
4111       return flag_pic != 0;
4112 
4113     case SYMBOL_REF:
4114       /* 'Naked' TLS symbol references are never OK,
4115 	 non-TLS symbols are OK iff we are non-PIC.  */
4116       if (tls_symbolic_operand (x))
4117 	return true;
4118       else
4119 	return flag_pic != 0;
4120 
4121     case CONST:
4122       return s390_cannot_force_const_mem (mode, XEXP (x, 0));
4123     case PLUS:
4124     case MINUS:
4125       return s390_cannot_force_const_mem (mode, XEXP (x, 0))
4126 	     || s390_cannot_force_const_mem (mode, XEXP (x, 1));
4127 
4128     case UNSPEC:
4129       switch (XINT (x, 1))
4130 	{
4131 	/* Only lt-relative or GOT-relative UNSPECs are OK.  */
4132 	case UNSPEC_LTREL_OFFSET:
4133 	case UNSPEC_GOT:
4134 	case UNSPEC_GOTOFF:
4135 	case UNSPEC_PLTOFF:
4136 	case UNSPEC_TLSGD:
4137 	case UNSPEC_TLSLDM:
4138 	case UNSPEC_NTPOFF:
4139 	case UNSPEC_DTPOFF:
4140 	case UNSPEC_GOTNTPOFF:
4141 	case UNSPEC_INDNTPOFF:
4142 	  return false;
4143 
4144 	/* If the literal pool shares the code section, be put
4145 	   execute template placeholders into the pool as well.  */
4146 	case UNSPEC_INSN:
4147 	default:
4148 	  return true;
4149 	}
4150       break;
4151 
4152     default:
4153       gcc_unreachable ();
4154     }
4155 }
4156 
4157 /* Returns true if the constant value OP is a legitimate general
4158    operand during and after reload.  The difference to
4159    legitimate_constant_p is that this function will not accept
4160    a constant that would need to be forced to the literal pool
4161    before it can be used as operand.
4162    This function accepts all constants which can be loaded directly
4163    into a GPR.  */
4164 
4165 bool
legitimate_reload_constant_p(rtx op)4166 legitimate_reload_constant_p (rtx op)
4167 {
4168   /* Accept la(y) operands.  */
4169   if (GET_CODE (op) == CONST_INT
4170       && DISP_IN_RANGE (INTVAL (op)))
4171     return true;
4172 
4173   /* Accept l(g)hi/l(g)fi operands.  */
4174   if (GET_CODE (op) == CONST_INT
4175       && (CONST_OK_FOR_K (INTVAL (op)) || CONST_OK_FOR_Os (INTVAL (op))))
4176     return true;
4177 
4178   /* Accept lliXX operands.  */
4179   if (TARGET_ZARCH
4180       && GET_CODE (op) == CONST_INT
4181       && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
4182       && s390_single_part (op, word_mode, HImode, 0) >= 0)
4183   return true;
4184 
4185   if (TARGET_EXTIMM
4186       && GET_CODE (op) == CONST_INT
4187       && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
4188       && s390_single_part (op, word_mode, SImode, 0) >= 0)
4189     return true;
4190 
4191   /* Accept larl operands.  */
4192   if (larl_operand (op, VOIDmode))
4193     return true;
4194 
4195   /* Accept floating-point zero operands that fit into a single GPR.  */
4196   if (GET_CODE (op) == CONST_DOUBLE
4197       && s390_float_const_zero_p (op)
4198       && GET_MODE_SIZE (GET_MODE (op)) <= UNITS_PER_WORD)
4199     return true;
4200 
4201   /* Accept double-word operands that can be split.  */
4202   if (GET_CODE (op) == CONST_WIDE_INT
4203       || (GET_CODE (op) == CONST_INT
4204 	  && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op)))
4205     {
4206       machine_mode dword_mode = word_mode == SImode ? DImode : TImode;
4207       rtx hi = operand_subword (op, 0, 0, dword_mode);
4208       rtx lo = operand_subword (op, 1, 0, dword_mode);
4209       return legitimate_reload_constant_p (hi)
4210 	     && legitimate_reload_constant_p (lo);
4211     }
4212 
4213   /* Everything else cannot be handled without reload.  */
4214   return false;
4215 }
4216 
4217 /* Returns true if the constant value OP is a legitimate fp operand
4218    during and after reload.
4219    This function accepts all constants which can be loaded directly
4220    into an FPR.  */
4221 
4222 static bool
legitimate_reload_fp_constant_p(rtx op)4223 legitimate_reload_fp_constant_p (rtx op)
4224 {
4225   /* Accept floating-point zero operands if the load zero instruction
4226      can be used.  Prior to z196 the load fp zero instruction caused a
4227      performance penalty if the result is used as BFP number.  */
4228   if (TARGET_Z196
4229       && GET_CODE (op) == CONST_DOUBLE
4230       && s390_float_const_zero_p (op))
4231     return true;
4232 
4233   return false;
4234 }
4235 
4236 /* Returns true if the constant value OP is a legitimate vector operand
4237    during and after reload.
4238    This function accepts all constants which can be loaded directly
4239    into an VR.  */
4240 
4241 static bool
legitimate_reload_vector_constant_p(rtx op)4242 legitimate_reload_vector_constant_p (rtx op)
4243 {
4244   if (TARGET_VX && GET_MODE_SIZE (GET_MODE (op)) == 16
4245       && (satisfies_constraint_j00 (op)
4246 	  || satisfies_constraint_jm1 (op)
4247 	  || satisfies_constraint_jKK (op)
4248 	  || satisfies_constraint_jxx (op)
4249 	  || satisfies_constraint_jyy (op)))
4250     return true;
4251 
4252   return false;
4253 }
4254 
4255 /* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
4256    return the class of reg to actually use.  */
4257 
4258 static reg_class_t
s390_preferred_reload_class(rtx op,reg_class_t rclass)4259 s390_preferred_reload_class (rtx op, reg_class_t rclass)
4260 {
4261   switch (GET_CODE (op))
4262     {
4263       /* Constants we cannot reload into general registers
4264 	 must be forced into the literal pool.  */
4265       case CONST_VECTOR:
4266       case CONST_DOUBLE:
4267       case CONST_INT:
4268       case CONST_WIDE_INT:
4269 	if (reg_class_subset_p (GENERAL_REGS, rclass)
4270 	    && legitimate_reload_constant_p (op))
4271 	  return GENERAL_REGS;
4272 	else if (reg_class_subset_p (ADDR_REGS, rclass)
4273 		 && legitimate_reload_constant_p (op))
4274 	  return ADDR_REGS;
4275 	else if (reg_class_subset_p (FP_REGS, rclass)
4276 		 && legitimate_reload_fp_constant_p (op))
4277 	  return FP_REGS;
4278 	else if (reg_class_subset_p (VEC_REGS, rclass)
4279 		 && legitimate_reload_vector_constant_p (op))
4280 	  return VEC_REGS;
4281 
4282 	return NO_REGS;
4283 
4284       /* If a symbolic constant or a PLUS is reloaded,
4285 	 it is most likely being used as an address, so
4286 	 prefer ADDR_REGS.  If 'class' is not a superset
4287 	 of ADDR_REGS, e.g. FP_REGS, reject this reload.  */
4288       case CONST:
4289 	/* Symrefs cannot be pushed into the literal pool with -fPIC
4290 	   so we *MUST NOT* return NO_REGS for these cases
4291 	   (s390_cannot_force_const_mem will return true).
4292 
4293 	   On the other hand we MUST return NO_REGS for symrefs with
4294 	   invalid addend which might have been pushed to the literal
4295 	   pool (no -fPIC).  Usually we would expect them to be
4296 	   handled via secondary reload but this does not happen if
4297 	   they are used as literal pool slot replacement in reload
4298 	   inheritance (see emit_input_reload_insns).  */
4299 	if (GET_CODE (XEXP (op, 0)) == PLUS
4300 	    && GET_CODE (XEXP (XEXP(op, 0), 0)) == SYMBOL_REF
4301 	    && GET_CODE (XEXP (XEXP(op, 0), 1)) == CONST_INT)
4302 	  {
4303 	    if (flag_pic && reg_class_subset_p (ADDR_REGS, rclass))
4304 	      return ADDR_REGS;
4305 	    else
4306 	      return NO_REGS;
4307 	  }
4308 	/* fallthrough */
4309       case LABEL_REF:
4310       case SYMBOL_REF:
4311 	if (!legitimate_reload_constant_p (op))
4312 	  return NO_REGS;
4313 	/* fallthrough */
4314       case PLUS:
4315 	/* load address will be used.  */
4316 	if (reg_class_subset_p (ADDR_REGS, rclass))
4317 	  return ADDR_REGS;
4318 	else
4319 	  return NO_REGS;
4320 
4321       default:
4322 	break;
4323     }
4324 
4325   return rclass;
4326 }
4327 
4328 /* Return true if ADDR is SYMBOL_REF + addend with addend being a
4329    multiple of ALIGNMENT and the SYMBOL_REF being naturally
4330    aligned.  */
4331 
4332 bool
s390_check_symref_alignment(rtx addr,HOST_WIDE_INT alignment)4333 s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment)
4334 {
4335   HOST_WIDE_INT addend;
4336   rtx symref;
4337 
4338   /* The "required alignment" might be 0 (e.g. for certain structs
4339      accessed via BLKmode).  Early abort in this case, as well as when
4340      an alignment > 8 is required.  */
4341   if (alignment < 2 || alignment > 8)
4342     return false;
4343 
4344   if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4345     return false;
4346 
4347   if (addend & (alignment - 1))
4348     return false;
4349 
4350   if (GET_CODE (symref) == SYMBOL_REF)
4351     {
4352       /* s390_encode_section_info is not called for anchors, since they don't
4353 	 have corresponding VAR_DECLs.  Therefore, we cannot rely on
4354 	 SYMBOL_FLAG_NOTALIGN{2,4,8}_P returning useful information.  */
4355       if (SYMBOL_REF_ANCHOR_P (symref))
4356 	{
4357 	  HOST_WIDE_INT block_offset = SYMBOL_REF_BLOCK_OFFSET (symref);
4358 	  unsigned int block_alignment = (SYMBOL_REF_BLOCK (symref)->alignment
4359 					  / BITS_PER_UNIT);
4360 
4361 	  gcc_assert (block_offset >= 0);
4362 	  return ((block_offset & (alignment - 1)) == 0
4363 		  && block_alignment >= alignment);
4364 	}
4365 
4366       /* We have load-relative instructions for 2-byte, 4-byte, and
4367 	 8-byte alignment so allow only these.  */
4368       switch (alignment)
4369 	{
4370 	case 8:	return !SYMBOL_FLAG_NOTALIGN8_P (symref);
4371 	case 4:	return !SYMBOL_FLAG_NOTALIGN4_P (symref);
4372 	case 2:	return !SYMBOL_FLAG_NOTALIGN2_P (symref);
4373 	default: return false;
4374 	}
4375     }
4376 
4377   if (GET_CODE (symref) == UNSPEC
4378       && alignment <= UNITS_PER_LONG)
4379     return true;
4380 
4381   return false;
4382 }
4383 
4384 /* ADDR is moved into REG using larl.  If ADDR isn't a valid larl
4385    operand SCRATCH is used to reload the even part of the address and
4386    adding one.  */
4387 
4388 void
s390_reload_larl_operand(rtx reg,rtx addr,rtx scratch)4389 s390_reload_larl_operand (rtx reg, rtx addr, rtx scratch)
4390 {
4391   HOST_WIDE_INT addend;
4392   rtx symref;
4393 
4394   if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4395     gcc_unreachable ();
4396 
4397   if (!(addend & 1))
4398     /* Easy case.  The addend is even so larl will do fine.  */
4399     emit_move_insn (reg, addr);
4400   else
4401     {
4402       /* We can leave the scratch register untouched if the target
4403 	 register is a valid base register.  */
4404       if (REGNO (reg) < FIRST_PSEUDO_REGISTER
4405 	  && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS)
4406 	scratch = reg;
4407 
4408       gcc_assert (REGNO (scratch) < FIRST_PSEUDO_REGISTER);
4409       gcc_assert (REGNO_REG_CLASS (REGNO (scratch)) == ADDR_REGS);
4410 
4411       if (addend != 1)
4412 	emit_move_insn (scratch,
4413 			gen_rtx_CONST (Pmode,
4414 				       gen_rtx_PLUS (Pmode, symref,
4415 						     GEN_INT (addend - 1))));
4416       else
4417 	emit_move_insn (scratch, symref);
4418 
4419       /* Increment the address using la in order to avoid clobbering cc.  */
4420       s390_load_address (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx));
4421     }
4422 }
4423 
4424 /* Generate what is necessary to move between REG and MEM using
4425    SCRATCH.  The direction is given by TOMEM.  */
4426 
4427 void
s390_reload_symref_address(rtx reg,rtx mem,rtx scratch,bool tomem)4428 s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem)
4429 {
4430   /* Reload might have pulled a constant out of the literal pool.
4431      Force it back in.  */
4432   if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
4433       || GET_CODE (mem) == CONST_WIDE_INT
4434       || GET_CODE (mem) == CONST_VECTOR
4435       || GET_CODE (mem) == CONST)
4436     mem = force_const_mem (GET_MODE (reg), mem);
4437 
4438   gcc_assert (MEM_P (mem));
4439 
4440   /* For a load from memory we can leave the scratch register
4441      untouched if the target register is a valid base register.  */
4442   if (!tomem
4443       && REGNO (reg) < FIRST_PSEUDO_REGISTER
4444       && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS
4445       && GET_MODE (reg) == GET_MODE (scratch))
4446     scratch = reg;
4447 
4448   /* Load address into scratch register.  Since we can't have a
4449      secondary reload for a secondary reload we have to cover the case
4450      where larl would need a secondary reload here as well.  */
4451   s390_reload_larl_operand (scratch, XEXP (mem, 0), scratch);
4452 
4453   /* Now we can use a standard load/store to do the move.  */
4454   if (tomem)
4455     emit_move_insn (replace_equiv_address (mem, scratch), reg);
4456   else
4457     emit_move_insn (reg, replace_equiv_address (mem, scratch));
4458 }
4459 
4460 /* Inform reload about cases where moving X with a mode MODE to a register in
4461    RCLASS requires an extra scratch or immediate register.  Return the class
4462    needed for the immediate register.  */
4463 
4464 static reg_class_t
s390_secondary_reload(bool in_p,rtx x,reg_class_t rclass_i,machine_mode mode,secondary_reload_info * sri)4465 s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
4466 		       machine_mode mode, secondary_reload_info *sri)
4467 {
4468   enum reg_class rclass = (enum reg_class) rclass_i;
4469 
4470   /* Intermediate register needed.  */
4471   if (reg_classes_intersect_p (CC_REGS, rclass))
4472     return GENERAL_REGS;
4473 
4474   if (TARGET_VX)
4475     {
4476       /* The vst/vl vector move instructions allow only for short
4477 	 displacements.  */
4478       if (MEM_P (x)
4479 	  && GET_CODE (XEXP (x, 0)) == PLUS
4480 	  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4481 	  && !SHORT_DISP_IN_RANGE(INTVAL (XEXP (XEXP (x, 0), 1)))
4482 	  && reg_class_subset_p (rclass, VEC_REGS)
4483 	  && (!reg_class_subset_p (rclass, FP_REGS)
4484 	      || (GET_MODE_SIZE (mode) > 8
4485 		  && s390_class_max_nregs (FP_REGS, mode) == 1)))
4486 	{
4487 	  if (in_p)
4488 	    sri->icode = (TARGET_64BIT ?
4489 			  CODE_FOR_reloaddi_la_in :
4490 			  CODE_FOR_reloadsi_la_in);
4491 	  else
4492 	    sri->icode = (TARGET_64BIT ?
4493 			  CODE_FOR_reloaddi_la_out :
4494 			  CODE_FOR_reloadsi_la_out);
4495 	}
4496     }
4497 
4498   if (TARGET_Z10)
4499     {
4500       HOST_WIDE_INT offset;
4501       rtx symref;
4502 
4503       /* On z10 several optimizer steps may generate larl operands with
4504 	 an odd addend.  */
4505       if (in_p
4506 	  && s390_loadrelative_operand_p (x, &symref, &offset)
4507 	  && mode == Pmode
4508 	  && !SYMBOL_FLAG_NOTALIGN2_P (symref)
4509 	  && (offset & 1) == 1)
4510 	sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10
4511 		      : CODE_FOR_reloadsi_larl_odd_addend_z10);
4512 
4513       /* Handle all the (mem (symref)) accesses we cannot use the z10
4514 	 instructions for.  */
4515       if (MEM_P (x)
4516 	  && s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL)
4517 	  && (mode == QImode
4518 	      || !reg_class_subset_p (rclass, GENERAL_REGS)
4519 	      || GET_MODE_SIZE (mode) > UNITS_PER_WORD
4520 	      || !s390_check_symref_alignment (XEXP (x, 0),
4521 					       GET_MODE_SIZE (mode))))
4522 	{
4523 #define __SECONDARY_RELOAD_CASE(M,m)					\
4524 	  case E_##M##mode:						\
4525 	    if (TARGET_64BIT)						\
4526 	      sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 :	\
4527 				  CODE_FOR_reload##m##di_tomem_z10;	\
4528 	    else							\
4529 	      sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 :	\
4530 				  CODE_FOR_reload##m##si_tomem_z10;	\
4531 	  break;
4532 
4533 	  switch (GET_MODE (x))
4534 	    {
4535 	      __SECONDARY_RELOAD_CASE (QI, qi);
4536 	      __SECONDARY_RELOAD_CASE (HI, hi);
4537 	      __SECONDARY_RELOAD_CASE (SI, si);
4538 	      __SECONDARY_RELOAD_CASE (DI, di);
4539 	      __SECONDARY_RELOAD_CASE (TI, ti);
4540 	      __SECONDARY_RELOAD_CASE (SF, sf);
4541 	      __SECONDARY_RELOAD_CASE (DF, df);
4542 	      __SECONDARY_RELOAD_CASE (TF, tf);
4543 	      __SECONDARY_RELOAD_CASE (SD, sd);
4544 	      __SECONDARY_RELOAD_CASE (DD, dd);
4545 	      __SECONDARY_RELOAD_CASE (TD, td);
4546 	      __SECONDARY_RELOAD_CASE (V1QI, v1qi);
4547 	      __SECONDARY_RELOAD_CASE (V2QI, v2qi);
4548 	      __SECONDARY_RELOAD_CASE (V4QI, v4qi);
4549 	      __SECONDARY_RELOAD_CASE (V8QI, v8qi);
4550 	      __SECONDARY_RELOAD_CASE (V16QI, v16qi);
4551 	      __SECONDARY_RELOAD_CASE (V1HI, v1hi);
4552 	      __SECONDARY_RELOAD_CASE (V2HI, v2hi);
4553 	      __SECONDARY_RELOAD_CASE (V4HI, v4hi);
4554 	      __SECONDARY_RELOAD_CASE (V8HI, v8hi);
4555 	      __SECONDARY_RELOAD_CASE (V1SI, v1si);
4556 	      __SECONDARY_RELOAD_CASE (V2SI, v2si);
4557 	      __SECONDARY_RELOAD_CASE (V4SI, v4si);
4558 	      __SECONDARY_RELOAD_CASE (V1DI, v1di);
4559 	      __SECONDARY_RELOAD_CASE (V2DI, v2di);
4560 	      __SECONDARY_RELOAD_CASE (V1TI, v1ti);
4561 	      __SECONDARY_RELOAD_CASE (V1SF, v1sf);
4562 	      __SECONDARY_RELOAD_CASE (V2SF, v2sf);
4563 	      __SECONDARY_RELOAD_CASE (V4SF, v4sf);
4564 	      __SECONDARY_RELOAD_CASE (V1DF, v1df);
4565 	      __SECONDARY_RELOAD_CASE (V2DF, v2df);
4566 	      __SECONDARY_RELOAD_CASE (V1TF, v1tf);
4567 	    default:
4568 	      gcc_unreachable ();
4569 	    }
4570 #undef __SECONDARY_RELOAD_CASE
4571 	}
4572     }
4573 
4574   /* We need a scratch register when loading a PLUS expression which
4575      is not a legitimate operand of the LOAD ADDRESS instruction.  */
4576   /* LRA can deal with transformation of plus op very well -- so we
4577      don't need to prompt LRA in this case.  */
4578   if (! lra_in_progress && in_p && s390_plus_operand (x, mode))
4579     sri->icode = (TARGET_64BIT ?
4580 		  CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
4581 
4582   /* Performing a multiword move from or to memory we have to make sure the
4583      second chunk in memory is addressable without causing a displacement
4584      overflow.  If that would be the case we calculate the address in
4585      a scratch register.  */
4586   if (MEM_P (x)
4587       && GET_CODE (XEXP (x, 0)) == PLUS
4588       && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4589       && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1))
4590 			 + GET_MODE_SIZE (mode) - 1))
4591     {
4592       /* For GENERAL_REGS a displacement overflow is no problem if occurring
4593 	 in a s_operand address since we may fallback to lm/stm.  So we only
4594 	 have to care about overflows in the b+i+d case.  */
4595       if ((reg_classes_intersect_p (GENERAL_REGS, rclass)
4596 	   && s390_class_max_nregs (GENERAL_REGS, mode) > 1
4597 	   && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
4598 	  /* For FP_REGS no lm/stm is available so this check is triggered
4599 	     for displacement overflows in b+i+d and b+d like addresses.  */
4600 	  || (reg_classes_intersect_p (FP_REGS, rclass)
4601 	      && s390_class_max_nregs (FP_REGS, mode) > 1))
4602 	{
4603 	  if (in_p)
4604 	    sri->icode = (TARGET_64BIT ?
4605 			  CODE_FOR_reloaddi_la_in :
4606 			  CODE_FOR_reloadsi_la_in);
4607 	  else
4608 	    sri->icode = (TARGET_64BIT ?
4609 			  CODE_FOR_reloaddi_la_out :
4610 			  CODE_FOR_reloadsi_la_out);
4611 	}
4612     }
4613 
4614   /* A scratch address register is needed when a symbolic constant is
4615      copied to r0 compiling with -fPIC.  In other cases the target
4616      register might be used as temporary (see legitimize_pic_address).  */
4617   if (in_p && SYMBOLIC_CONST (x) && flag_pic == 2 && rclass != ADDR_REGS)
4618     sri->icode = (TARGET_64BIT ?
4619 		  CODE_FOR_reloaddi_PIC_addr :
4620 		  CODE_FOR_reloadsi_PIC_addr);
4621 
4622   /* Either scratch or no register needed.  */
4623   return NO_REGS;
4624 }
4625 
4626 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
4627 
4628    We need secondary memory to move data between GPRs and FPRs.
4629 
4630    - With DFP the ldgr lgdr instructions are available.  Due to the
4631      different alignment we cannot use them for SFmode.  For 31 bit a
4632      64 bit value in GPR would be a register pair so here we still
4633      need to go via memory.
4634 
4635    - With z13 we can do the SF/SImode moves with vlgvf.  Due to the
4636      overlapping of FPRs and VRs we still disallow TF/TD modes to be
4637      in full VRs so as before also on z13 we do these moves via
4638      memory.
4639 
4640      FIXME: Should we try splitting it into two vlgvg's/vlvg's instead?  */
4641 
4642 static bool
s390_secondary_memory_needed(machine_mode mode,reg_class_t class1,reg_class_t class2)4643 s390_secondary_memory_needed (machine_mode mode,
4644 			      reg_class_t class1, reg_class_t class2)
4645 {
4646   return (((reg_classes_intersect_p (class1, VEC_REGS)
4647 	    && reg_classes_intersect_p (class2, GENERAL_REGS))
4648 	   || (reg_classes_intersect_p (class1, GENERAL_REGS)
4649 	       && reg_classes_intersect_p (class2, VEC_REGS)))
4650 	  && (TARGET_TPF || !TARGET_DFP || !TARGET_64BIT
4651 	      || GET_MODE_SIZE (mode) != 8)
4652 	  && (!TARGET_VX || (SCALAR_FLOAT_MODE_P (mode)
4653 			     && GET_MODE_SIZE (mode) > 8)));
4654 }
4655 
4656 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
4657 
4658    get_secondary_mem widens its argument to BITS_PER_WORD which loses on 64bit
4659    because the movsi and movsf patterns don't handle r/f moves.  */
4660 
4661 static machine_mode
s390_secondary_memory_needed_mode(machine_mode mode)4662 s390_secondary_memory_needed_mode (machine_mode mode)
4663 {
4664   if (GET_MODE_BITSIZE (mode) < 32)
4665     return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
4666   return mode;
4667 }
4668 
4669 /* Generate code to load SRC, which is PLUS that is not a
4670    legitimate operand for the LA instruction, into TARGET.
4671    SCRATCH may be used as scratch register.  */
4672 
4673 void
s390_expand_plus_operand(rtx target,rtx src,rtx scratch)4674 s390_expand_plus_operand (rtx target, rtx src,
4675 			  rtx scratch)
4676 {
4677   rtx sum1, sum2;
4678   struct s390_address ad;
4679 
4680   /* src must be a PLUS; get its two operands.  */
4681   gcc_assert (GET_CODE (src) == PLUS);
4682   gcc_assert (GET_MODE (src) == Pmode);
4683 
4684   /* Check if any of the two operands is already scheduled
4685      for replacement by reload.  This can happen e.g. when
4686      float registers occur in an address.  */
4687   sum1 = find_replacement (&XEXP (src, 0));
4688   sum2 = find_replacement (&XEXP (src, 1));
4689   src = gen_rtx_PLUS (Pmode, sum1, sum2);
4690 
4691   /* If the address is already strictly valid, there's nothing to do.  */
4692   if (!s390_decompose_address (src, &ad)
4693       || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4694       || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
4695     {
4696       /* Otherwise, one of the operands cannot be an address register;
4697 	 we reload its value into the scratch register.  */
4698       if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15)
4699 	{
4700 	  emit_move_insn (scratch, sum1);
4701 	  sum1 = scratch;
4702 	}
4703       if (true_regnum (sum2) < 1 || true_regnum (sum2) > 15)
4704 	{
4705 	  emit_move_insn (scratch, sum2);
4706 	  sum2 = scratch;
4707 	}
4708 
4709       /* According to the way these invalid addresses are generated
4710 	 in reload.c, it should never happen (at least on s390) that
4711 	 *neither* of the PLUS components, after find_replacements
4712 	 was applied, is an address register.  */
4713       if (sum1 == scratch && sum2 == scratch)
4714 	{
4715 	  debug_rtx (src);
4716 	  gcc_unreachable ();
4717 	}
4718 
4719       src = gen_rtx_PLUS (Pmode, sum1, sum2);
4720     }
4721 
4722   /* Emit the LOAD ADDRESS pattern.  Note that reload of PLUS
4723      is only ever performed on addresses, so we can mark the
4724      sum as legitimate for LA in any case.  */
4725   s390_load_address (target, src);
4726 }
4727 
4728 
4729 /* Return true if ADDR is a valid memory address.
4730    STRICT specifies whether strict register checking applies.  */
4731 
4732 static bool
s390_legitimate_address_p(machine_mode mode,rtx addr,bool strict)4733 s390_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4734 {
4735   struct s390_address ad;
4736 
4737   if (TARGET_Z10
4738       && larl_operand (addr, VOIDmode)
4739       && (mode == VOIDmode
4740 	  || s390_check_symref_alignment (addr, GET_MODE_SIZE (mode))))
4741     return true;
4742 
4743   if (!s390_decompose_address (addr, &ad))
4744     return false;
4745 
4746   /* The vector memory instructions only support short displacements.
4747      Reject invalid displacements early to prevent plenty of lay
4748      instructions to be generated later which then cannot be merged
4749      properly.  */
4750   if (TARGET_VX
4751       && VECTOR_MODE_P (mode)
4752       && ad.disp != NULL_RTX
4753       && CONST_INT_P (ad.disp)
4754       && !SHORT_DISP_IN_RANGE (INTVAL (ad.disp)))
4755     return false;
4756 
4757   if (strict)
4758     {
4759       if (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4760 	return false;
4761 
4762       if (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))
4763 	return false;
4764     }
4765   else
4766     {
4767       if (ad.base
4768 	  && !(REGNO (ad.base) >= FIRST_PSEUDO_REGISTER
4769 	       || REGNO_REG_CLASS (REGNO (ad.base)) == ADDR_REGS))
4770 	return false;
4771 
4772       if (ad.indx
4773 	  && !(REGNO (ad.indx) >= FIRST_PSEUDO_REGISTER
4774 	       || REGNO_REG_CLASS (REGNO (ad.indx)) == ADDR_REGS))
4775 	  return false;
4776     }
4777   return true;
4778 }
4779 
4780 /* Return true if OP is a valid operand for the LA instruction.
4781    In 31-bit, we need to prove that the result is used as an
4782    address, as LA performs only a 31-bit addition.  */
4783 
4784 bool
legitimate_la_operand_p(rtx op)4785 legitimate_la_operand_p (rtx op)
4786 {
4787   struct s390_address addr;
4788   if (!s390_decompose_address (op, &addr))
4789     return false;
4790 
4791   return (TARGET_64BIT || addr.pointer);
4792 }
4793 
4794 /* Return true if it is valid *and* preferable to use LA to
4795    compute the sum of OP1 and OP2.  */
4796 
4797 bool
preferred_la_operand_p(rtx op1,rtx op2)4798 preferred_la_operand_p (rtx op1, rtx op2)
4799 {
4800   struct s390_address addr;
4801 
4802   if (op2 != const0_rtx)
4803     op1 = gen_rtx_PLUS (Pmode, op1, op2);
4804 
4805   if (!s390_decompose_address (op1, &addr))
4806     return false;
4807   if (addr.base && !REGNO_OK_FOR_BASE_P (REGNO (addr.base)))
4808     return false;
4809   if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx)))
4810     return false;
4811 
4812   /* Avoid LA instructions with index (and base) register on z196 or
4813      later; it is preferable to use regular add instructions when
4814      possible.  Starting with zEC12 the la with index register is
4815      "uncracked" again but still slower than a regular add.  */
4816   if (addr.indx && s390_tune >= PROCESSOR_2817_Z196)
4817     return false;
4818 
4819   if (!TARGET_64BIT && !addr.pointer)
4820     return false;
4821 
4822   if (addr.pointer)
4823     return true;
4824 
4825   if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base))
4826       || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx)))
4827     return true;
4828 
4829   return false;
4830 }
4831 
4832 /* Emit a forced load-address operation to load SRC into DST.
4833    This will use the LOAD ADDRESS instruction even in situations
4834    where legitimate_la_operand_p (SRC) returns false.  */
4835 
4836 void
s390_load_address(rtx dst,rtx src)4837 s390_load_address (rtx dst, rtx src)
4838 {
4839   if (TARGET_64BIT)
4840     emit_move_insn (dst, src);
4841   else
4842     emit_insn (gen_force_la_31 (dst, src));
4843 }
4844 
4845 /* Return true if it ok to use SYMBOL_REF in a relative address.  */
4846 
4847 bool
s390_rel_address_ok_p(rtx symbol_ref)4848 s390_rel_address_ok_p (rtx symbol_ref)
4849 {
4850   tree decl;
4851 
4852   if (symbol_ref == s390_got_symbol () || CONSTANT_POOL_ADDRESS_P (symbol_ref))
4853     return true;
4854 
4855   decl = SYMBOL_REF_DECL (symbol_ref);
4856 
4857   if (!flag_pic || SYMBOL_REF_LOCAL_P (symbol_ref))
4858     return (s390_pic_data_is_text_relative
4859 	    || (decl
4860 		&& TREE_CODE (decl) == FUNCTION_DECL));
4861 
4862   return false;
4863 }
4864 
4865 /* Return a legitimate reference for ORIG (an address) using the
4866    register REG.  If REG is 0, a new pseudo is generated.
4867 
4868    There are two types of references that must be handled:
4869 
4870    1. Global data references must load the address from the GOT, via
4871       the PIC reg.  An insn is emitted to do this load, and the reg is
4872       returned.
4873 
4874    2. Static data references, constant pool addresses, and code labels
4875       compute the address as an offset from the GOT, whose base is in
4876       the PIC reg.  Static data objects have SYMBOL_FLAG_LOCAL set to
4877       differentiate them from global data objects.  The returned
4878       address is the PIC reg + an unspec constant.
4879 
4880    TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
4881    reg also appears in the address.  */
4882 
4883 rtx
legitimize_pic_address(rtx orig,rtx reg)4884 legitimize_pic_address (rtx orig, rtx reg)
4885 {
4886   rtx addr = orig;
4887   rtx addend = const0_rtx;
4888   rtx new_rtx = orig;
4889 
4890   gcc_assert (!TLS_SYMBOLIC_CONST (addr));
4891 
4892   if (GET_CODE (addr) == CONST)
4893     addr = XEXP (addr, 0);
4894 
4895   if (GET_CODE (addr) == PLUS)
4896     {
4897       addend = XEXP (addr, 1);
4898       addr = XEXP (addr, 0);
4899     }
4900 
4901   if ((GET_CODE (addr) == LABEL_REF
4902        || (SYMBOL_REF_P (addr) && s390_rel_address_ok_p (addr))
4903        || (GET_CODE (addr) == UNSPEC &&
4904 	   (XINT (addr, 1) == UNSPEC_GOTENT
4905 	    || XINT (addr, 1) == UNSPEC_PLT)))
4906       && GET_CODE (addend) == CONST_INT)
4907     {
4908       /* This can be locally addressed.  */
4909 
4910       /* larl_operand requires UNSPECs to be wrapped in a const rtx.  */
4911       rtx const_addr = (GET_CODE (addr) == UNSPEC ?
4912 			gen_rtx_CONST (Pmode, addr) : addr);
4913 
4914       if (larl_operand (const_addr, VOIDmode)
4915 	  && INTVAL (addend) < HOST_WIDE_INT_1 << 31
4916 	  && INTVAL (addend) >= -(HOST_WIDE_INT_1 << 31))
4917 	{
4918 	  if (INTVAL (addend) & 1)
4919 	    {
4920 	      /* LARL can't handle odd offsets, so emit a pair of LARL
4921 		 and LA.  */
4922 	      rtx temp = reg? reg : gen_reg_rtx (Pmode);
4923 
4924 	      if (!DISP_IN_RANGE (INTVAL (addend)))
4925 		{
4926 		  HOST_WIDE_INT even = INTVAL (addend) - 1;
4927 		  addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (even));
4928 		  addr = gen_rtx_CONST (Pmode, addr);
4929 		  addend = const1_rtx;
4930 		}
4931 
4932 	      emit_move_insn (temp, addr);
4933 	      new_rtx = gen_rtx_PLUS (Pmode, temp, addend);
4934 
4935 	      if (reg != 0)
4936 		{
4937 		  s390_load_address (reg, new_rtx);
4938 		  new_rtx = reg;
4939 		}
4940 	    }
4941 	  else
4942 	    {
4943 	      /* If the offset is even, we can just use LARL.  This
4944 		 will happen automatically.  */
4945 	    }
4946 	}
4947       else
4948 	{
4949 	  /* No larl - Access local symbols relative to the GOT.  */
4950 
4951 	  rtx temp = reg? reg : gen_reg_rtx (Pmode);
4952 
4953 	  if (reload_in_progress || reload_completed)
4954 	    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4955 
4956 	  addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
4957 	  if (addend != const0_rtx)
4958 	    addr = gen_rtx_PLUS (Pmode, addr, addend);
4959 	  addr = gen_rtx_CONST (Pmode, addr);
4960 	  addr = force_const_mem (Pmode, addr);
4961 	  emit_move_insn (temp, addr);
4962 
4963 	  new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4964 	  if (reg != 0)
4965 	    {
4966 	      s390_load_address (reg, new_rtx);
4967 	      new_rtx = reg;
4968 	    }
4969 	}
4970     }
4971   else if (GET_CODE (addr) == SYMBOL_REF && addend == const0_rtx)
4972     {
4973       /* A non-local symbol reference without addend.
4974 
4975 	 The symbol ref is wrapped into an UNSPEC to make sure the
4976 	 proper operand modifier (@GOT or @GOTENT) will be emitted.
4977 	 This will tell the linker to put the symbol into the GOT.
4978 
4979 	 Additionally the code dereferencing the GOT slot is emitted here.
4980 
4981 	 An addend to the symref needs to be added afterwards.
4982 	 legitimize_pic_address calls itself recursively to handle
4983 	 that case.  So no need to do it here.  */
4984 
4985       if (reg == 0)
4986 	reg = gen_reg_rtx (Pmode);
4987 
4988       if (TARGET_Z10)
4989 	{
4990 	  /* Use load relative if possible.
4991 	     lgrl <target>, sym@GOTENT  */
4992 	  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4993 	  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4994 	  new_rtx = gen_const_mem (GET_MODE (reg), new_rtx);
4995 
4996 	  emit_move_insn (reg, new_rtx);
4997 	  new_rtx = reg;
4998 	}
4999       else if (flag_pic == 1)
5000 	{
5001 	  /* Assume GOT offset is a valid displacement operand (< 4k
5002 	     or < 512k with z990).  This is handled the same way in
5003 	     both 31- and 64-bit code (@GOT).
5004 	     lg <target>, sym@GOT(r12)  */
5005 
5006 	  if (reload_in_progress || reload_completed)
5007 	    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
5008 
5009 	  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5010 	  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5011 	  new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
5012 	  new_rtx = gen_const_mem (Pmode, new_rtx);
5013 	  emit_move_insn (reg, new_rtx);
5014 	  new_rtx = reg;
5015 	}
5016       else
5017 	{
5018 	  /* If the GOT offset might be >= 4k, we determine the position
5019 	     of the GOT entry via a PC-relative LARL (@GOTENT).
5020 	     larl temp, sym@GOTENT
5021 	     lg   <target>, 0(temp) */
5022 
5023 	  rtx temp = reg ? reg : gen_reg_rtx (Pmode);
5024 
5025 	  gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
5026 		      || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
5027 
5028 	  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
5029 	  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5030 	  emit_move_insn (temp, new_rtx);
5031 	  new_rtx = gen_const_mem (Pmode, temp);
5032 	  emit_move_insn (reg, new_rtx);
5033 
5034 	  new_rtx = reg;
5035 	}
5036     }
5037   else if (GET_CODE (addr) == UNSPEC && GET_CODE (addend) == CONST_INT)
5038     {
5039       gcc_assert (XVECLEN (addr, 0) == 1);
5040       switch (XINT (addr, 1))
5041 	{
5042 	  /* These address symbols (or PLT slots) relative to the GOT
5043 	     (not GOT slots!).  In general this will exceed the
5044 	     displacement range so these value belong into the literal
5045 	     pool.  */
5046 	case UNSPEC_GOTOFF:
5047 	case UNSPEC_PLTOFF:
5048 	  new_rtx = force_const_mem (Pmode, orig);
5049 	  break;
5050 
5051 	  /* For -fPIC the GOT size might exceed the displacement
5052 	     range so make sure the value is in the literal pool.  */
5053 	case UNSPEC_GOT:
5054 	  if (flag_pic == 2)
5055 	    new_rtx = force_const_mem (Pmode, orig);
5056 	  break;
5057 
5058 	  /* For @GOTENT larl is used.  This is handled like local
5059 	     symbol refs.  */
5060 	case UNSPEC_GOTENT:
5061 	  gcc_unreachable ();
5062 	  break;
5063 
5064 	  /* For @PLT larl is used.  This is handled like local
5065 	     symbol refs.  */
5066 	case UNSPEC_PLT:
5067 	  gcc_unreachable ();
5068 	  break;
5069 
5070 	  /* Everything else cannot happen.  */
5071 	default:
5072 	  gcc_unreachable ();
5073 	}
5074     }
5075   else if (addend != const0_rtx)
5076     {
5077       /* Otherwise, compute the sum.  */
5078 
5079       rtx base = legitimize_pic_address (addr, reg);
5080       new_rtx  = legitimize_pic_address (addend,
5081 					 base == reg ? NULL_RTX : reg);
5082       if (GET_CODE (new_rtx) == CONST_INT)
5083 	new_rtx = plus_constant (Pmode, base, INTVAL (new_rtx));
5084       else
5085 	{
5086 	  if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
5087 	    {
5088 	      base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
5089 	      new_rtx = XEXP (new_rtx, 1);
5090 	    }
5091 	  new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
5092 	}
5093 
5094       if (GET_CODE (new_rtx) == CONST)
5095 	new_rtx = XEXP (new_rtx, 0);
5096       new_rtx = force_operand (new_rtx, 0);
5097     }
5098 
5099   return new_rtx;
5100 }
5101 
5102 /* Load the thread pointer into a register.  */
5103 
5104 rtx
s390_get_thread_pointer(void)5105 s390_get_thread_pointer (void)
5106 {
5107   rtx tp = gen_reg_rtx (Pmode);
5108 
5109   emit_insn (gen_get_thread_pointer (Pmode, tp));
5110 
5111   mark_reg_pointer (tp, BITS_PER_WORD);
5112 
5113   return tp;
5114 }
5115 
5116 /* Emit a tls call insn. The call target is the SYMBOL_REF stored
5117    in s390_tls_symbol which always refers to __tls_get_offset.
5118    The returned offset is written to RESULT_REG and an USE rtx is
5119    generated for TLS_CALL.  */
5120 
5121 static GTY(()) rtx s390_tls_symbol;
5122 
5123 static void
s390_emit_tls_call_insn(rtx result_reg,rtx tls_call)5124 s390_emit_tls_call_insn (rtx result_reg, rtx tls_call)
5125 {
5126   rtx insn;
5127 
5128   if (!flag_pic)
5129     emit_insn (s390_load_got ());
5130 
5131   if (!s390_tls_symbol)
5132     s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset");
5133 
5134   insn = s390_emit_call (s390_tls_symbol, tls_call, result_reg,
5135 			 gen_rtx_REG (Pmode, RETURN_REGNUM));
5136 
5137   use_reg (&CALL_INSN_FUNCTION_USAGE (insn), result_reg);
5138   RTL_CONST_CALL_P (insn) = 1;
5139 }
5140 
5141 /* ADDR contains a thread-local SYMBOL_REF.  Generate code to compute
5142    this (thread-local) address.  REG may be used as temporary.  */
5143 
5144 static rtx
legitimize_tls_address(rtx addr,rtx reg)5145 legitimize_tls_address (rtx addr, rtx reg)
5146 {
5147   rtx new_rtx, tls_call, temp, base, r2;
5148   rtx_insn *insn;
5149 
5150   if (GET_CODE (addr) == SYMBOL_REF)
5151     switch (tls_symbolic_operand (addr))
5152       {
5153       case TLS_MODEL_GLOBAL_DYNAMIC:
5154 	start_sequence ();
5155 	r2 = gen_rtx_REG (Pmode, 2);
5156 	tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD);
5157 	new_rtx = gen_rtx_CONST (Pmode, tls_call);
5158 	new_rtx = force_const_mem (Pmode, new_rtx);
5159 	emit_move_insn (r2, new_rtx);
5160 	s390_emit_tls_call_insn (r2, tls_call);
5161 	insn = get_insns ();
5162 	end_sequence ();
5163 
5164 	new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
5165 	temp = gen_reg_rtx (Pmode);
5166 	emit_libcall_block (insn, temp, r2, new_rtx);
5167 
5168 	new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5169 	if (reg != 0)
5170 	  {
5171 	    s390_load_address (reg, new_rtx);
5172 	    new_rtx = reg;
5173 	  }
5174 	break;
5175 
5176       case TLS_MODEL_LOCAL_DYNAMIC:
5177 	start_sequence ();
5178 	r2 = gen_rtx_REG (Pmode, 2);
5179 	tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM);
5180 	new_rtx = gen_rtx_CONST (Pmode, tls_call);
5181 	new_rtx = force_const_mem (Pmode, new_rtx);
5182 	emit_move_insn (r2, new_rtx);
5183 	s390_emit_tls_call_insn (r2, tls_call);
5184 	insn = get_insns ();
5185 	end_sequence ();
5186 
5187 	new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
5188 	temp = gen_reg_rtx (Pmode);
5189 	emit_libcall_block (insn, temp, r2, new_rtx);
5190 
5191 	new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5192 	base = gen_reg_rtx (Pmode);
5193 	s390_load_address (base, new_rtx);
5194 
5195 	new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF);
5196 	new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5197 	new_rtx = force_const_mem (Pmode, new_rtx);
5198 	temp = gen_reg_rtx (Pmode);
5199 	emit_move_insn (temp, new_rtx);
5200 
5201 	new_rtx = gen_rtx_PLUS (Pmode, base, temp);
5202 	if (reg != 0)
5203 	  {
5204 	    s390_load_address (reg, new_rtx);
5205 	    new_rtx = reg;
5206 	  }
5207 	break;
5208 
5209       case TLS_MODEL_INITIAL_EXEC:
5210 	if (flag_pic == 1)
5211 	  {
5212 	    /* Assume GOT offset < 4k.  This is handled the same way
5213 	       in both 31- and 64-bit code.  */
5214 
5215 	    if (reload_in_progress || reload_completed)
5216 	      df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
5217 
5218 	    new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
5219 	    new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5220 	    new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
5221 	    new_rtx = gen_const_mem (Pmode, new_rtx);
5222 	    temp = gen_reg_rtx (Pmode);
5223 	    emit_move_insn (temp, new_rtx);
5224 	  }
5225 	else
5226 	  {
5227 	    /* If the GOT offset might be >= 4k, we determine the position
5228 	       of the GOT entry via a PC-relative LARL.  */
5229 
5230 	    new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
5231 	    new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5232 	    temp = gen_reg_rtx (Pmode);
5233 	    emit_move_insn (temp, new_rtx);
5234 
5235 	    new_rtx = gen_const_mem (Pmode, temp);
5236 	    temp = gen_reg_rtx (Pmode);
5237 	    emit_move_insn (temp, new_rtx);
5238 	  }
5239 
5240 	new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5241 	if (reg != 0)
5242 	  {
5243 	    s390_load_address (reg, new_rtx);
5244 	    new_rtx = reg;
5245 	  }
5246 	break;
5247 
5248       case TLS_MODEL_LOCAL_EXEC:
5249 	new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
5250 	new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5251 	new_rtx = force_const_mem (Pmode, new_rtx);
5252 	temp = gen_reg_rtx (Pmode);
5253 	emit_move_insn (temp, new_rtx);
5254 
5255 	new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5256 	if (reg != 0)
5257 	  {
5258 	    s390_load_address (reg, new_rtx);
5259 	    new_rtx = reg;
5260 	  }
5261 	break;
5262 
5263       default:
5264 	gcc_unreachable ();
5265       }
5266 
5267   else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == UNSPEC)
5268     {
5269       switch (XINT (XEXP (addr, 0), 1))
5270 	{
5271 	case UNSPEC_INDNTPOFF:
5272 	  new_rtx = addr;
5273 	  break;
5274 
5275 	default:
5276 	  gcc_unreachable ();
5277 	}
5278     }
5279 
5280   else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
5281 	   && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
5282     {
5283       new_rtx = XEXP (XEXP (addr, 0), 0);
5284       if (GET_CODE (new_rtx) != SYMBOL_REF)
5285 	new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5286 
5287       new_rtx = legitimize_tls_address (new_rtx, reg);
5288       new_rtx = plus_constant (Pmode, new_rtx,
5289 			       INTVAL (XEXP (XEXP (addr, 0), 1)));
5290       new_rtx = force_operand (new_rtx, 0);
5291     }
5292 
5293   else
5294     gcc_unreachable ();  /* for now ... */
5295 
5296   return new_rtx;
5297 }
5298 
5299 /* Emit insns making the address in operands[1] valid for a standard
5300    move to operands[0].  operands[1] is replaced by an address which
5301    should be used instead of the former RTX to emit the move
5302    pattern.  */
5303 
5304 void
emit_symbolic_move(rtx * operands)5305 emit_symbolic_move (rtx *operands)
5306 {
5307   rtx temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
5308 
5309   if (GET_CODE (operands[0]) == MEM)
5310     operands[1] = force_reg (Pmode, operands[1]);
5311   else if (TLS_SYMBOLIC_CONST (operands[1]))
5312     operands[1] = legitimize_tls_address (operands[1], temp);
5313   else if (flag_pic)
5314     operands[1] = legitimize_pic_address (operands[1], temp);
5315 }
5316 
5317 /* Try machine-dependent ways of modifying an illegitimate address X
5318    to be legitimate.  If we find one, return the new, valid address.
5319 
5320    OLDX is the address as it was before break_out_memory_refs was called.
5321    In some cases it is useful to look at this to decide what needs to be done.
5322 
5323    MODE is the mode of the operand pointed to by X.
5324 
5325    When -fpic is used, special handling is needed for symbolic references.
5326    See comments by legitimize_pic_address for details.  */
5327 
5328 static rtx
s390_legitimize_address(rtx x,rtx oldx ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED)5329 s390_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
5330 			 machine_mode mode ATTRIBUTE_UNUSED)
5331 {
5332   rtx constant_term = const0_rtx;
5333 
5334   if (TLS_SYMBOLIC_CONST (x))
5335     {
5336       x = legitimize_tls_address (x, 0);
5337 
5338       if (s390_legitimate_address_p (mode, x, FALSE))
5339 	return x;
5340     }
5341   else if (GET_CODE (x) == PLUS
5342 	   && (TLS_SYMBOLIC_CONST (XEXP (x, 0))
5343 	       || TLS_SYMBOLIC_CONST (XEXP (x, 1))))
5344     {
5345       return x;
5346     }
5347   else if (flag_pic)
5348     {
5349       if (SYMBOLIC_CONST (x)
5350 	  || (GET_CODE (x) == PLUS
5351 	      && (SYMBOLIC_CONST (XEXP (x, 0))
5352 		  || SYMBOLIC_CONST (XEXP (x, 1)))))
5353 	  x = legitimize_pic_address (x, 0);
5354 
5355       if (s390_legitimate_address_p (mode, x, FALSE))
5356 	return x;
5357     }
5358 
5359   x = eliminate_constant_term (x, &constant_term);
5360 
5361   /* Optimize loading of large displacements by splitting them
5362      into the multiple of 4K and the rest; this allows the
5363      former to be CSE'd if possible.
5364 
5365      Don't do this if the displacement is added to a register
5366      pointing into the stack frame, as the offsets will
5367      change later anyway.  */
5368 
5369   if (GET_CODE (constant_term) == CONST_INT
5370       && !TARGET_LONG_DISPLACEMENT
5371       && !DISP_IN_RANGE (INTVAL (constant_term))
5372       && !(REG_P (x) && REGNO_PTR_FRAME_P (REGNO (x))))
5373     {
5374       HOST_WIDE_INT lower = INTVAL (constant_term) & 0xfff;
5375       HOST_WIDE_INT upper = INTVAL (constant_term) ^ lower;
5376 
5377       rtx temp = gen_reg_rtx (Pmode);
5378       rtx val  = force_operand (GEN_INT (upper), temp);
5379       if (val != temp)
5380 	emit_move_insn (temp, val);
5381 
5382       x = gen_rtx_PLUS (Pmode, x, temp);
5383       constant_term = GEN_INT (lower);
5384     }
5385 
5386   if (GET_CODE (x) == PLUS)
5387     {
5388       if (GET_CODE (XEXP (x, 0)) == REG)
5389 	{
5390 	  rtx temp = gen_reg_rtx (Pmode);
5391 	  rtx val  = force_operand (XEXP (x, 1), temp);
5392 	  if (val != temp)
5393 	    emit_move_insn (temp, val);
5394 
5395 	  x = gen_rtx_PLUS (Pmode, XEXP (x, 0), temp);
5396 	}
5397 
5398       else if (GET_CODE (XEXP (x, 1)) == REG)
5399 	{
5400 	  rtx temp = gen_reg_rtx (Pmode);
5401 	  rtx val  = force_operand (XEXP (x, 0), temp);
5402 	  if (val != temp)
5403 	    emit_move_insn (temp, val);
5404 
5405 	  x = gen_rtx_PLUS (Pmode, temp, XEXP (x, 1));
5406 	}
5407     }
5408 
5409   if (constant_term != const0_rtx)
5410     x = gen_rtx_PLUS (Pmode, x, constant_term);
5411 
5412   return x;
5413 }
5414 
5415 /* Try a machine-dependent way of reloading an illegitimate address AD
5416    operand.  If we find one, push the reload and return the new address.
5417 
5418    MODE is the mode of the enclosing MEM.  OPNUM is the operand number
5419    and TYPE is the reload type of the current reload.  */
5420 
5421 rtx
legitimize_reload_address(rtx ad,machine_mode mode ATTRIBUTE_UNUSED,int opnum,int type)5422 legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
5423 			   int opnum, int type)
5424 {
5425   if (!optimize || TARGET_LONG_DISPLACEMENT)
5426     return NULL_RTX;
5427 
5428   if (GET_CODE (ad) == PLUS)
5429     {
5430       rtx tem = simplify_binary_operation (PLUS, Pmode,
5431 					   XEXP (ad, 0), XEXP (ad, 1));
5432       if (tem)
5433 	ad = tem;
5434     }
5435 
5436   if (GET_CODE (ad) == PLUS
5437       && GET_CODE (XEXP (ad, 0)) == REG
5438       && GET_CODE (XEXP (ad, 1)) == CONST_INT
5439       && !DISP_IN_RANGE (INTVAL (XEXP (ad, 1))))
5440     {
5441       HOST_WIDE_INT lower = INTVAL (XEXP (ad, 1)) & 0xfff;
5442       HOST_WIDE_INT upper = INTVAL (XEXP (ad, 1)) ^ lower;
5443       rtx cst, tem, new_rtx;
5444 
5445       cst = GEN_INT (upper);
5446       if (!legitimate_reload_constant_p (cst))
5447 	cst = force_const_mem (Pmode, cst);
5448 
5449       tem = gen_rtx_PLUS (Pmode, XEXP (ad, 0), cst);
5450       new_rtx = gen_rtx_PLUS (Pmode, tem, GEN_INT (lower));
5451 
5452       push_reload (XEXP (tem, 1), 0, &XEXP (tem, 1), 0,
5453 		   BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
5454 		   opnum, (enum reload_type) type);
5455       return new_rtx;
5456     }
5457 
5458   return NULL_RTX;
5459 }
5460 
5461 /* Emit code to move LEN bytes from DST to SRC.  */
5462 
5463 bool
s390_expand_cpymem(rtx dst,rtx src,rtx len)5464 s390_expand_cpymem (rtx dst, rtx src, rtx len)
5465 {
5466   /* When tuning for z10 or higher we rely on the Glibc functions to
5467      do the right thing. Only for constant lengths below 64k we will
5468      generate inline code.  */
5469   if (s390_tune >= PROCESSOR_2097_Z10
5470       && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5471     return false;
5472 
5473   /* Expand memcpy for constant length operands without a loop if it
5474      is shorter that way.
5475 
5476      With a constant length argument a
5477      memcpy loop (without pfd) is 36 bytes -> 6 * mvc  */
5478   if (GET_CODE (len) == CONST_INT
5479       && INTVAL (len) >= 0
5480       && INTVAL (len) <= 256 * 6
5481       && (!TARGET_MVCLE || INTVAL (len) <= 256))
5482     {
5483       HOST_WIDE_INT o, l;
5484 
5485       for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
5486 	{
5487 	  rtx newdst = adjust_address (dst, BLKmode, o);
5488 	  rtx newsrc = adjust_address (src, BLKmode, o);
5489 	  emit_insn (gen_cpymem_short (newdst, newsrc,
5490 				       GEN_INT (l > 256 ? 255 : l - 1)));
5491 	}
5492     }
5493 
5494   else if (TARGET_MVCLE)
5495     {
5496       emit_insn (gen_cpymem_long (dst, src, convert_to_mode (Pmode, len, 1)));
5497     }
5498 
5499   else
5500     {
5501       rtx dst_addr, src_addr, count, blocks, temp;
5502       rtx_code_label *loop_start_label = gen_label_rtx ();
5503       rtx_code_label *loop_end_label = gen_label_rtx ();
5504       rtx_code_label *end_label = gen_label_rtx ();
5505       machine_mode mode;
5506 
5507       mode = GET_MODE (len);
5508       if (mode == VOIDmode)
5509 	mode = Pmode;
5510 
5511       dst_addr = gen_reg_rtx (Pmode);
5512       src_addr = gen_reg_rtx (Pmode);
5513       count = gen_reg_rtx (mode);
5514       blocks = gen_reg_rtx (mode);
5515 
5516       convert_move (count, len, 1);
5517       emit_cmp_and_jump_insns (count, const0_rtx,
5518 			       EQ, NULL_RTX, mode, 1, end_label);
5519 
5520       emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5521       emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX));
5522       dst = change_address (dst, VOIDmode, dst_addr);
5523       src = change_address (src, VOIDmode, src_addr);
5524 
5525       temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5526 			   OPTAB_DIRECT);
5527       if (temp != count)
5528 	emit_move_insn (count, temp);
5529 
5530       temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5531 			   OPTAB_DIRECT);
5532       if (temp != blocks)
5533 	emit_move_insn (blocks, temp);
5534 
5535       emit_cmp_and_jump_insns (blocks, const0_rtx,
5536 			       EQ, NULL_RTX, mode, 1, loop_end_label);
5537 
5538       emit_label (loop_start_label);
5539 
5540       if (TARGET_Z10
5541 	  && (GET_CODE (len) != CONST_INT || INTVAL (len) > 768))
5542 	{
5543 	  rtx prefetch;
5544 
5545 	  /* Issue a read prefetch for the +3 cache line.  */
5546 	  prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, src_addr, GEN_INT (768)),
5547 				   const0_rtx, const0_rtx);
5548 	  PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5549 	  emit_insn (prefetch);
5550 
5551 	  /* Issue a write prefetch for the +3 cache line.  */
5552 	  prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (768)),
5553 				   const1_rtx, const0_rtx);
5554 	  PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5555 	  emit_insn (prefetch);
5556 	}
5557 
5558       emit_insn (gen_cpymem_short (dst, src, GEN_INT (255)));
5559       s390_load_address (dst_addr,
5560 			 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5561       s390_load_address (src_addr,
5562 			 gen_rtx_PLUS (Pmode, src_addr, GEN_INT (256)));
5563 
5564       temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5565 			   OPTAB_DIRECT);
5566       if (temp != blocks)
5567 	emit_move_insn (blocks, temp);
5568 
5569       emit_cmp_and_jump_insns (blocks, const0_rtx,
5570 			       EQ, NULL_RTX, mode, 1, loop_end_label);
5571 
5572       emit_jump (loop_start_label);
5573       emit_label (loop_end_label);
5574 
5575       emit_insn (gen_cpymem_short (dst, src,
5576 				   convert_to_mode (Pmode, count, 1)));
5577       emit_label (end_label);
5578     }
5579   return true;
5580 }
5581 
5582 /* Emit code to set LEN bytes at DST to VAL.
5583    Make use of clrmem if VAL is zero.  */
5584 
5585 void
s390_expand_setmem(rtx dst,rtx len,rtx val)5586 s390_expand_setmem (rtx dst, rtx len, rtx val)
5587 {
5588   if (GET_CODE (len) == CONST_INT && INTVAL (len) <= 0)
5589     return;
5590 
5591   gcc_assert (GET_CODE (val) == CONST_INT || GET_MODE (val) == QImode);
5592 
5593   /* Expand setmem/clrmem for a constant length operand without a
5594      loop if it will be shorter that way.
5595      clrmem loop (with PFD)    is 30 bytes -> 5 * xc
5596      clrmem loop (without PFD) is 24 bytes -> 4 * xc
5597      setmem loop (with PFD)    is 38 bytes -> ~4 * (mvi/stc + mvc)
5598      setmem loop (without PFD) is 32 bytes -> ~4 * (mvi/stc + mvc) */
5599   if (GET_CODE (len) == CONST_INT
5600       && ((val == const0_rtx
5601 	   && (INTVAL (len) <= 256 * 4
5602 	       || (INTVAL (len) <= 256 * 5 && TARGET_SETMEM_PFD(val,len))))
5603 	  || (val != const0_rtx && INTVAL (len) <= 257 * 4))
5604       && (!TARGET_MVCLE || INTVAL (len) <= 256))
5605     {
5606       HOST_WIDE_INT o, l;
5607 
5608       if (val == const0_rtx)
5609 	/* clrmem: emit 256 byte blockwise XCs.  */
5610 	for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
5611 	  {
5612 	    rtx newdst = adjust_address (dst, BLKmode, o);
5613 	    emit_insn (gen_clrmem_short (newdst,
5614 					 GEN_INT (l > 256 ? 255 : l - 1)));
5615 	  }
5616       else
5617 	/* setmem: emit 1(mvi) + 256(mvc) byte blockwise memsets by
5618 	   setting first byte to val and using a 256 byte mvc with one
5619 	   byte overlap to propagate the byte.  */
5620 	for (l = INTVAL (len), o = 0; l > 0; l -= 257, o += 257)
5621 	  {
5622 	    rtx newdst = adjust_address (dst, BLKmode, o);
5623 	    emit_move_insn (adjust_address (dst, QImode, o), val);
5624 	    if (l > 1)
5625 	      {
5626 		rtx newdstp1 = adjust_address (dst, BLKmode, o + 1);
5627 		emit_insn (gen_cpymem_short (newdstp1, newdst,
5628 					     GEN_INT (l > 257 ? 255 : l - 2)));
5629 	      }
5630 	  }
5631     }
5632 
5633   else if (TARGET_MVCLE)
5634     {
5635       val = force_not_mem (convert_modes (Pmode, QImode, val, 1));
5636       if (TARGET_64BIT)
5637 	emit_insn (gen_setmem_long_di (dst, convert_to_mode (Pmode, len, 1),
5638 				       val));
5639       else
5640 	emit_insn (gen_setmem_long_si (dst, convert_to_mode (Pmode, len, 1),
5641 				       val));
5642     }
5643 
5644   else
5645     {
5646       rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX;
5647       rtx_code_label *loop_start_label = gen_label_rtx ();
5648       rtx_code_label *onebyte_end_label = gen_label_rtx ();
5649       rtx_code_label *zerobyte_end_label = gen_label_rtx ();
5650       rtx_code_label *restbyte_end_label = gen_label_rtx ();
5651       machine_mode mode;
5652 
5653       mode = GET_MODE (len);
5654       if (mode == VOIDmode)
5655 	mode = Pmode;
5656 
5657       dst_addr = gen_reg_rtx (Pmode);
5658       count = gen_reg_rtx (mode);
5659       blocks = gen_reg_rtx (mode);
5660 
5661       convert_move (count, len, 1);
5662       emit_cmp_and_jump_insns (count, const0_rtx,
5663 			       EQ, NULL_RTX, mode, 1, zerobyte_end_label,
5664 			       profile_probability::very_unlikely ());
5665 
5666       /* We need to make a copy of the target address since memset is
5667 	 supposed to return it unmodified.  We have to make it here
5668 	 already since the new reg is used at onebyte_end_label.  */
5669       emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5670       dst = change_address (dst, VOIDmode, dst_addr);
5671 
5672       if (val != const0_rtx)
5673 	{
5674 	  /* When using the overlapping mvc the original target
5675 	     address is only accessed as single byte entity (even by
5676 	     the mvc reading this value).  */
5677 	  set_mem_size (dst, 1);
5678 	  dstp1 = adjust_address (dst, VOIDmode, 1);
5679 	  emit_cmp_and_jump_insns (count,
5680 				   const1_rtx, EQ, NULL_RTX, mode, 1,
5681 				   onebyte_end_label,
5682 				   profile_probability::very_unlikely ());
5683 	}
5684 
5685       /* There is one unconditional (mvi+mvc)/xc after the loop
5686 	 dealing with the rest of the bytes, subtracting two (mvi+mvc)
5687 	 or one (xc) here leaves this number of bytes to be handled by
5688 	 it.  */
5689       temp = expand_binop (mode, add_optab, count,
5690 			   val == const0_rtx ? constm1_rtx : GEN_INT (-2),
5691 			   count, 1, OPTAB_DIRECT);
5692       if (temp != count)
5693 	emit_move_insn (count, temp);
5694 
5695       temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5696 			   OPTAB_DIRECT);
5697       if (temp != blocks)
5698 	emit_move_insn (blocks, temp);
5699 
5700       emit_cmp_and_jump_insns (blocks, const0_rtx,
5701 			       EQ, NULL_RTX, mode, 1, restbyte_end_label);
5702 
5703       emit_jump (loop_start_label);
5704 
5705       if (val != const0_rtx)
5706 	{
5707 	  /* The 1 byte != 0 special case.  Not handled efficiently
5708 	     since we require two jumps for that.  However, this
5709 	     should be very rare.  */
5710 	  emit_label (onebyte_end_label);
5711 	  emit_move_insn (adjust_address (dst, QImode, 0), val);
5712 	  emit_jump (zerobyte_end_label);
5713 	}
5714 
5715       emit_label (loop_start_label);
5716 
5717       if (TARGET_SETMEM_PFD (val, len))
5718 	{
5719 	  /* Issue a write prefetch.  */
5720 	  rtx distance = GEN_INT (TARGET_SETMEM_PREFETCH_DISTANCE);
5721 	  rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, distance),
5722 				       const1_rtx, const0_rtx);
5723 	  emit_insn (prefetch);
5724 	  PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5725 	}
5726 
5727       if (val == const0_rtx)
5728 	emit_insn (gen_clrmem_short (dst, GEN_INT (255)));
5729       else
5730 	{
5731 	  /* Set the first byte in the block to the value and use an
5732 	     overlapping mvc for the block.  */
5733 	  emit_move_insn (adjust_address (dst, QImode, 0), val);
5734 	  emit_insn (gen_cpymem_short (dstp1, dst, GEN_INT (254)));
5735 	}
5736       s390_load_address (dst_addr,
5737 			 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5738 
5739       temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5740 			   OPTAB_DIRECT);
5741       if (temp != blocks)
5742 	emit_move_insn (blocks, temp);
5743 
5744       emit_cmp_and_jump_insns (blocks, const0_rtx,
5745 			       NE, NULL_RTX, mode, 1, loop_start_label);
5746 
5747       emit_label (restbyte_end_label);
5748 
5749       if (val == const0_rtx)
5750 	emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1)));
5751       else
5752 	{
5753 	  /* Set the first byte in the block to the value and use an
5754 	     overlapping mvc for the block.  */
5755 	  emit_move_insn (adjust_address (dst, QImode, 0), val);
5756 	  /* execute only uses the lowest 8 bits of count that's
5757 	     exactly what we need here.  */
5758 	  emit_insn (gen_cpymem_short (dstp1, dst,
5759 				       convert_to_mode (Pmode, count, 1)));
5760 	}
5761 
5762       emit_label (zerobyte_end_label);
5763     }
5764 }
5765 
5766 /* Emit code to compare LEN bytes at OP0 with those at OP1,
5767    and return the result in TARGET.  */
5768 
5769 bool
s390_expand_cmpmem(rtx target,rtx op0,rtx op1,rtx len)5770 s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
5771 {
5772   rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM);
5773   rtx tmp;
5774 
5775   /* When tuning for z10 or higher we rely on the Glibc functions to
5776      do the right thing. Only for constant lengths below 64k we will
5777      generate inline code.  */
5778   if (s390_tune >= PROCESSOR_2097_Z10
5779       && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5780     return false;
5781 
5782   /* As the result of CMPINT is inverted compared to what we need,
5783      we have to swap the operands.  */
5784   tmp = op0; op0 = op1; op1 = tmp;
5785 
5786   if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
5787     {
5788       if (INTVAL (len) > 0)
5789 	{
5790 	  emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1)));
5791 	  emit_insn (gen_cmpint (target, ccreg));
5792 	}
5793       else
5794 	emit_move_insn (target, const0_rtx);
5795     }
5796   else if (TARGET_MVCLE)
5797     {
5798       emit_insn (gen_cmpmem_long (op0, op1, convert_to_mode (Pmode, len, 1)));
5799       emit_insn (gen_cmpint (target, ccreg));
5800     }
5801   else
5802     {
5803       rtx addr0, addr1, count, blocks, temp;
5804       rtx_code_label *loop_start_label = gen_label_rtx ();
5805       rtx_code_label *loop_end_label = gen_label_rtx ();
5806       rtx_code_label *end_label = gen_label_rtx ();
5807       machine_mode mode;
5808 
5809       mode = GET_MODE (len);
5810       if (mode == VOIDmode)
5811 	mode = Pmode;
5812 
5813       addr0 = gen_reg_rtx (Pmode);
5814       addr1 = gen_reg_rtx (Pmode);
5815       count = gen_reg_rtx (mode);
5816       blocks = gen_reg_rtx (mode);
5817 
5818       convert_move (count, len, 1);
5819       emit_cmp_and_jump_insns (count, const0_rtx,
5820 			       EQ, NULL_RTX, mode, 1, end_label);
5821 
5822       emit_move_insn (addr0, force_operand (XEXP (op0, 0), NULL_RTX));
5823       emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX));
5824       op0 = change_address (op0, VOIDmode, addr0);
5825       op1 = change_address (op1, VOIDmode, addr1);
5826 
5827       temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5828 			   OPTAB_DIRECT);
5829       if (temp != count)
5830 	emit_move_insn (count, temp);
5831 
5832       temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5833 			   OPTAB_DIRECT);
5834       if (temp != blocks)
5835 	emit_move_insn (blocks, temp);
5836 
5837       emit_cmp_and_jump_insns (blocks, const0_rtx,
5838 			       EQ, NULL_RTX, mode, 1, loop_end_label);
5839 
5840       emit_label (loop_start_label);
5841 
5842       if (TARGET_Z10
5843 	  && (GET_CODE (len) != CONST_INT || INTVAL (len) > 512))
5844 	{
5845 	  rtx prefetch;
5846 
5847 	  /* Issue a read prefetch for the +2 cache line of operand 1.  */
5848 	  prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr0, GEN_INT (512)),
5849 				   const0_rtx, const0_rtx);
5850 	  emit_insn (prefetch);
5851 	  PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5852 
5853 	  /* Issue a read prefetch for the +2 cache line of operand 2.  */
5854 	  prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr1, GEN_INT (512)),
5855 				   const0_rtx, const0_rtx);
5856 	  emit_insn (prefetch);
5857 	  PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5858 	}
5859 
5860       emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (255)));
5861       temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
5862       temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5863 			gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx);
5864       temp = gen_rtx_SET (pc_rtx, temp);
5865       emit_jump_insn (temp);
5866 
5867       s390_load_address (addr0,
5868 			 gen_rtx_PLUS (Pmode, addr0, GEN_INT (256)));
5869       s390_load_address (addr1,
5870 			 gen_rtx_PLUS (Pmode, addr1, GEN_INT (256)));
5871 
5872       temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5873 			   OPTAB_DIRECT);
5874       if (temp != blocks)
5875 	emit_move_insn (blocks, temp);
5876 
5877       emit_cmp_and_jump_insns (blocks, const0_rtx,
5878 			       EQ, NULL_RTX, mode, 1, loop_end_label);
5879 
5880       emit_jump (loop_start_label);
5881       emit_label (loop_end_label);
5882 
5883       emit_insn (gen_cmpmem_short (op0, op1,
5884 				   convert_to_mode (Pmode, count, 1)));
5885       emit_label (end_label);
5886 
5887       emit_insn (gen_cmpint (target, ccreg));
5888     }
5889   return true;
5890 }
5891 
5892 /* Emit a conditional jump to LABEL for condition code mask MASK using
5893    comparsion operator COMPARISON.  Return the emitted jump insn.  */
5894 
5895 static rtx_insn *
s390_emit_ccraw_jump(HOST_WIDE_INT mask,enum rtx_code comparison,rtx label)5896 s390_emit_ccraw_jump (HOST_WIDE_INT mask, enum rtx_code comparison, rtx label)
5897 {
5898   rtx temp;
5899 
5900   gcc_assert (comparison == EQ || comparison == NE);
5901   gcc_assert (mask > 0 && mask < 15);
5902 
5903   temp = gen_rtx_fmt_ee (comparison, VOIDmode,
5904 			 gen_rtx_REG (CCRAWmode, CC_REGNUM), GEN_INT (mask));
5905   temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5906 			       gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
5907   temp = gen_rtx_SET (pc_rtx, temp);
5908   return emit_jump_insn (temp);
5909 }
5910 
5911 /* Emit the instructions to implement strlen of STRING and store the
5912    result in TARGET.  The string has the known ALIGNMENT.  This
5913    version uses vector instructions and is therefore not appropriate
5914    for targets prior to z13.  */
5915 
5916 void
s390_expand_vec_strlen(rtx target,rtx string,rtx alignment)5917 s390_expand_vec_strlen (rtx target, rtx string, rtx alignment)
5918 {
5919   rtx highest_index_to_load_reg = gen_reg_rtx (Pmode);
5920   rtx str_reg = gen_reg_rtx (V16QImode);
5921   rtx str_addr_base_reg = gen_reg_rtx (Pmode);
5922   rtx str_idx_reg = gen_reg_rtx (Pmode);
5923   rtx result_reg = gen_reg_rtx (V16QImode);
5924   rtx is_aligned_label = gen_label_rtx ();
5925   rtx into_loop_label = NULL_RTX;
5926   rtx loop_start_label = gen_label_rtx ();
5927   rtx temp;
5928   rtx len = gen_reg_rtx (QImode);
5929   rtx cond;
5930 
5931   s390_load_address (str_addr_base_reg, XEXP (string, 0));
5932   emit_move_insn (str_idx_reg, const0_rtx);
5933 
5934   if (INTVAL (alignment) < 16)
5935     {
5936       /* Check whether the address happens to be aligned properly so
5937 	 jump directly to the aligned loop.  */
5938       emit_cmp_and_jump_insns (gen_rtx_AND (Pmode,
5939 					    str_addr_base_reg, GEN_INT (15)),
5940 			       const0_rtx, EQ, NULL_RTX,
5941 			       Pmode, 1, is_aligned_label);
5942 
5943       temp = gen_reg_rtx (Pmode);
5944       temp = expand_binop (Pmode, and_optab, str_addr_base_reg,
5945 			   GEN_INT (15), temp, 1, OPTAB_DIRECT);
5946       gcc_assert (REG_P (temp));
5947       highest_index_to_load_reg =
5948 	expand_binop (Pmode, sub_optab, GEN_INT (15), temp,
5949 		      highest_index_to_load_reg, 1, OPTAB_DIRECT);
5950       gcc_assert (REG_P (highest_index_to_load_reg));
5951       emit_insn (gen_vllv16qi (str_reg,
5952 		   convert_to_mode (SImode, highest_index_to_load_reg, 1),
5953 		   gen_rtx_MEM (BLKmode, str_addr_base_reg)));
5954 
5955       into_loop_label = gen_label_rtx ();
5956       s390_emit_jump (into_loop_label, NULL_RTX);
5957       emit_barrier ();
5958     }
5959 
5960   emit_label (is_aligned_label);
5961   LABEL_NUSES (is_aligned_label) = INTVAL (alignment) < 16 ? 2 : 1;
5962 
5963   /* Reaching this point we are only performing 16 bytes aligned
5964      loads.  */
5965   emit_move_insn (highest_index_to_load_reg, GEN_INT (15));
5966 
5967   emit_label (loop_start_label);
5968   LABEL_NUSES (loop_start_label) = 1;
5969 
5970   /* Load 16 bytes of the string into VR.  */
5971   emit_move_insn (str_reg,
5972 		  gen_rtx_MEM (V16QImode,
5973 			       gen_rtx_PLUS (Pmode, str_idx_reg,
5974 					     str_addr_base_reg)));
5975   if (into_loop_label != NULL_RTX)
5976     {
5977       emit_label (into_loop_label);
5978       LABEL_NUSES (into_loop_label) = 1;
5979     }
5980 
5981   /* Increment string index by 16 bytes.  */
5982   expand_binop (Pmode, add_optab, str_idx_reg, GEN_INT (16),
5983 		str_idx_reg, 1, OPTAB_DIRECT);
5984 
5985   emit_insn (gen_vec_vfenesv16qi (result_reg, str_reg, str_reg,
5986 				  GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
5987 
5988   add_int_reg_note (s390_emit_ccraw_jump (8, NE, loop_start_label),
5989 		    REG_BR_PROB,
5990 		    profile_probability::very_likely ().to_reg_br_prob_note ());
5991   emit_insn (gen_vec_extractv16qiqi (len, result_reg, GEN_INT (7)));
5992 
5993   /* If the string pointer wasn't aligned we have loaded less then 16
5994      bytes and the remaining bytes got filled with zeros (by vll).
5995      Now we have to check whether the resulting index lies within the
5996      bytes actually part of the string.  */
5997 
5998   cond = s390_emit_compare (GT, convert_to_mode (Pmode, len, 1),
5999 			    highest_index_to_load_reg);
6000   s390_load_address (highest_index_to_load_reg,
6001 		     gen_rtx_PLUS (Pmode, highest_index_to_load_reg,
6002 				   const1_rtx));
6003   if (TARGET_64BIT)
6004     emit_insn (gen_movdicc (str_idx_reg, cond,
6005 			    highest_index_to_load_reg, str_idx_reg));
6006   else
6007     emit_insn (gen_movsicc (str_idx_reg, cond,
6008 			    highest_index_to_load_reg, str_idx_reg));
6009 
6010   add_reg_br_prob_note (s390_emit_jump (is_aligned_label, cond),
6011 			profile_probability::very_unlikely ());
6012 
6013   expand_binop (Pmode, add_optab, str_idx_reg,
6014 		GEN_INT (-16), str_idx_reg, 1, OPTAB_DIRECT);
6015   /* FIXME: len is already zero extended - so avoid the llgcr emitted
6016      here.  */
6017   temp = expand_binop (Pmode, add_optab, str_idx_reg,
6018 		       convert_to_mode (Pmode, len, 1),
6019 		       target, 1, OPTAB_DIRECT);
6020   if (temp != target)
6021     emit_move_insn (target, temp);
6022 }
6023 
6024 void
s390_expand_vec_movstr(rtx result,rtx dst,rtx src)6025 s390_expand_vec_movstr (rtx result, rtx dst, rtx src)
6026 {
6027   rtx temp = gen_reg_rtx (Pmode);
6028   rtx src_addr = XEXP (src, 0);
6029   rtx dst_addr = XEXP (dst, 0);
6030   rtx src_addr_reg = gen_reg_rtx (Pmode);
6031   rtx dst_addr_reg = gen_reg_rtx (Pmode);
6032   rtx offset = gen_reg_rtx (Pmode);
6033   rtx vsrc = gen_reg_rtx (V16QImode);
6034   rtx vpos = gen_reg_rtx (V16QImode);
6035   rtx loadlen = gen_reg_rtx (SImode);
6036   rtx gpos_qi = gen_reg_rtx(QImode);
6037   rtx gpos = gen_reg_rtx (SImode);
6038   rtx done_label = gen_label_rtx ();
6039   rtx loop_label = gen_label_rtx ();
6040   rtx exit_label = gen_label_rtx ();
6041   rtx full_label = gen_label_rtx ();
6042 
6043   /* Perform a quick check for string ending on the first up to 16
6044      bytes and exit early if successful.  */
6045 
6046   emit_insn (gen_vlbb (vsrc, src, GEN_INT (6)));
6047   emit_insn (gen_lcbb (loadlen, src_addr, GEN_INT (6)));
6048   emit_insn (gen_vfenezv16qi (vpos, vsrc, vsrc));
6049   emit_insn (gen_vec_extractv16qiqi (gpos_qi, vpos, GEN_INT (7)));
6050   emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
6051   /* gpos is the byte index if a zero was found and 16 otherwise.
6052      So if it is lower than the loaded bytes we have a hit.  */
6053   emit_cmp_and_jump_insns (gpos, loadlen, GE, NULL_RTX, SImode, 1,
6054 			   full_label);
6055   emit_insn (gen_vstlv16qi (vsrc, gpos, dst));
6056 
6057   force_expand_binop (Pmode, add_optab, dst_addr, gpos, result,
6058 		      1, OPTAB_DIRECT);
6059   emit_jump (exit_label);
6060   emit_barrier ();
6061 
6062   emit_label (full_label);
6063   LABEL_NUSES (full_label) = 1;
6064 
6065   /* Calculate `offset' so that src + offset points to the last byte
6066      before 16 byte alignment.  */
6067 
6068   /* temp = src_addr & 0xf */
6069   force_expand_binop (Pmode, and_optab, src_addr, GEN_INT (15), temp,
6070 		      1, OPTAB_DIRECT);
6071 
6072   /* offset = 0xf - temp */
6073   emit_move_insn (offset, GEN_INT (15));
6074   force_expand_binop (Pmode, sub_optab, offset, temp, offset,
6075 		      1, OPTAB_DIRECT);
6076 
6077   /* Store `offset' bytes in the dstination string.  The quick check
6078      has loaded at least `offset' bytes into vsrc.  */
6079 
6080   emit_insn (gen_vstlv16qi (vsrc, gen_lowpart (SImode, offset), dst));
6081 
6082   /* Advance to the next byte to be loaded.  */
6083   force_expand_binop (Pmode, add_optab, offset, const1_rtx, offset,
6084 		      1, OPTAB_DIRECT);
6085 
6086   /* Make sure the addresses are single regs which can be used as a
6087      base.  */
6088   emit_move_insn (src_addr_reg, src_addr);
6089   emit_move_insn (dst_addr_reg, dst_addr);
6090 
6091   /* MAIN LOOP */
6092 
6093   emit_label (loop_label);
6094   LABEL_NUSES (loop_label) = 1;
6095 
6096   emit_move_insn (vsrc,
6097 		  gen_rtx_MEM (V16QImode,
6098 			       gen_rtx_PLUS (Pmode, src_addr_reg, offset)));
6099 
6100   emit_insn (gen_vec_vfenesv16qi (vpos, vsrc, vsrc,
6101 				  GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
6102   add_int_reg_note (s390_emit_ccraw_jump (8, EQ, done_label),
6103 		    REG_BR_PROB, profile_probability::very_unlikely ()
6104 				  .to_reg_br_prob_note ());
6105 
6106   emit_move_insn (gen_rtx_MEM (V16QImode,
6107 			       gen_rtx_PLUS (Pmode, dst_addr_reg, offset)),
6108 		  vsrc);
6109   /* offset += 16 */
6110   force_expand_binop (Pmode, add_optab, offset, GEN_INT (16),
6111 		      offset,  1, OPTAB_DIRECT);
6112 
6113   emit_jump (loop_label);
6114   emit_barrier ();
6115 
6116   /* REGULAR EXIT */
6117 
6118   /* We are done.  Add the offset of the zero character to the dst_addr
6119      pointer to get the result.  */
6120 
6121   emit_label (done_label);
6122   LABEL_NUSES (done_label) = 1;
6123 
6124   force_expand_binop (Pmode, add_optab, dst_addr_reg, offset, dst_addr_reg,
6125 		      1, OPTAB_DIRECT);
6126 
6127   emit_insn (gen_vec_extractv16qiqi (gpos_qi, vpos, GEN_INT (7)));
6128   emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
6129 
6130   emit_insn (gen_vstlv16qi (vsrc, gpos, gen_rtx_MEM (BLKmode, dst_addr_reg)));
6131 
6132   force_expand_binop (Pmode, add_optab, dst_addr_reg, gpos, result,
6133 		      1, OPTAB_DIRECT);
6134 
6135   /* EARLY EXIT */
6136 
6137   emit_label (exit_label);
6138   LABEL_NUSES (exit_label) = 1;
6139 }
6140 
6141 
6142 /* Expand conditional increment or decrement using alc/slb instructions.
6143    Should generate code setting DST to either SRC or SRC + INCREMENT,
6144    depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
6145    Returns true if successful, false otherwise.
6146 
6147    That makes it possible to implement some if-constructs without jumps e.g.:
6148    (borrow = CC0 | CC1 and carry = CC2 | CC3)
6149    unsigned int a, b, c;
6150    if (a < b)  c++; -> CCU  b > a  -> CC2;    c += carry;
6151    if (a < b)  c--; -> CCL3 a - b  -> borrow; c -= borrow;
6152    if (a <= b) c++; -> CCL3 b - a  -> borrow; c += carry;
6153    if (a <= b) c--; -> CCU  a <= b -> borrow; c -= borrow;
6154 
6155    Checks for EQ and NE with a nonzero value need an additional xor e.g.:
6156    if (a == b) c++; -> CCL3 a ^= b; 0 - a  -> borrow;    c += carry;
6157    if (a == b) c--; -> CCU  a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
6158    if (a != b) c++; -> CCU  a ^= b; a > 0  -> CC2;       c += carry;
6159    if (a != b) c--; -> CCL3 a ^= b; 0 - a  -> borrow;    c -= borrow; */
6160 
6161 bool
s390_expand_addcc(enum rtx_code cmp_code,rtx cmp_op0,rtx cmp_op1,rtx dst,rtx src,rtx increment)6162 s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1,
6163 		   rtx dst, rtx src, rtx increment)
6164 {
6165   machine_mode cmp_mode;
6166   machine_mode cc_mode;
6167   rtx op_res;
6168   rtx insn;
6169   rtvec p;
6170   int ret;
6171 
6172   if ((GET_MODE (cmp_op0) == SImode || GET_MODE (cmp_op0) == VOIDmode)
6173       && (GET_MODE (cmp_op1) == SImode || GET_MODE (cmp_op1) == VOIDmode))
6174     cmp_mode = SImode;
6175   else if ((GET_MODE (cmp_op0) == DImode || GET_MODE (cmp_op0) == VOIDmode)
6176 	   && (GET_MODE (cmp_op1) == DImode || GET_MODE (cmp_op1) == VOIDmode))
6177     cmp_mode = DImode;
6178   else
6179     return false;
6180 
6181   /* Try ADD LOGICAL WITH CARRY.  */
6182   if (increment == const1_rtx)
6183     {
6184       /* Determine CC mode to use.  */
6185       if (cmp_code == EQ || cmp_code == NE)
6186 	{
6187 	  if (cmp_op1 != const0_rtx)
6188 	    {
6189 	      cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
6190 					     NULL_RTX, 0, OPTAB_WIDEN);
6191 	      cmp_op1 = const0_rtx;
6192 	    }
6193 
6194 	  cmp_code = cmp_code == EQ ? LEU : GTU;
6195 	}
6196 
6197       if (cmp_code == LTU || cmp_code == LEU)
6198 	{
6199 	  rtx tem = cmp_op0;
6200 	  cmp_op0 = cmp_op1;
6201 	  cmp_op1 = tem;
6202 	  cmp_code = swap_condition (cmp_code);
6203 	}
6204 
6205       switch (cmp_code)
6206 	{
6207 	  case GTU:
6208 	    cc_mode = CCUmode;
6209 	    break;
6210 
6211 	  case GEU:
6212 	    cc_mode = CCL3mode;
6213 	    break;
6214 
6215 	  default:
6216 	    return false;
6217 	}
6218 
6219       /* Emit comparison instruction pattern. */
6220       if (!register_operand (cmp_op0, cmp_mode))
6221 	cmp_op0 = force_reg (cmp_mode, cmp_op0);
6222 
6223       insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
6224 			  gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
6225       /* We use insn_invalid_p here to add clobbers if required.  */
6226       ret = insn_invalid_p (emit_insn (insn), false);
6227       gcc_assert (!ret);
6228 
6229       /* Emit ALC instruction pattern.  */
6230       op_res = gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
6231 			       gen_rtx_REG (cc_mode, CC_REGNUM),
6232 			       const0_rtx);
6233 
6234       if (src != const0_rtx)
6235 	{
6236 	  if (!register_operand (src, GET_MODE (dst)))
6237 	    src = force_reg (GET_MODE (dst), src);
6238 
6239 	  op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, src);
6240 	  op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, const0_rtx);
6241 	}
6242 
6243       p = rtvec_alloc (2);
6244       RTVEC_ELT (p, 0) =
6245 	gen_rtx_SET (dst, op_res);
6246       RTVEC_ELT (p, 1) =
6247 	gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6248       emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
6249 
6250       return true;
6251     }
6252 
6253   /* Try SUBTRACT LOGICAL WITH BORROW.  */
6254   if (increment == constm1_rtx)
6255     {
6256       /* Determine CC mode to use.  */
6257       if (cmp_code == EQ || cmp_code == NE)
6258 	{
6259 	  if (cmp_op1 != const0_rtx)
6260 	    {
6261 	      cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
6262 					     NULL_RTX, 0, OPTAB_WIDEN);
6263 	      cmp_op1 = const0_rtx;
6264 	    }
6265 
6266 	  cmp_code = cmp_code == EQ ? LEU : GTU;
6267 	}
6268 
6269       if (cmp_code == GTU || cmp_code == GEU)
6270 	{
6271 	  rtx tem = cmp_op0;
6272 	  cmp_op0 = cmp_op1;
6273 	  cmp_op1 = tem;
6274 	  cmp_code = swap_condition (cmp_code);
6275 	}
6276 
6277       switch (cmp_code)
6278 	{
6279 	  case LEU:
6280 	    cc_mode = CCUmode;
6281 	    break;
6282 
6283 	  case LTU:
6284 	    cc_mode = CCL3mode;
6285 	    break;
6286 
6287 	  default:
6288 	    return false;
6289 	}
6290 
6291       /* Emit comparison instruction pattern. */
6292       if (!register_operand (cmp_op0, cmp_mode))
6293 	cmp_op0 = force_reg (cmp_mode, cmp_op0);
6294 
6295       insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
6296 			  gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
6297       /* We use insn_invalid_p here to add clobbers if required.  */
6298       ret = insn_invalid_p (emit_insn (insn), false);
6299       gcc_assert (!ret);
6300 
6301       /* Emit SLB instruction pattern.  */
6302       if (!register_operand (src, GET_MODE (dst)))
6303 	src = force_reg (GET_MODE (dst), src);
6304 
6305       op_res = gen_rtx_MINUS (GET_MODE (dst),
6306 			      gen_rtx_MINUS (GET_MODE (dst), src, const0_rtx),
6307 			      gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
6308 					      gen_rtx_REG (cc_mode, CC_REGNUM),
6309 					      const0_rtx));
6310       p = rtvec_alloc (2);
6311       RTVEC_ELT (p, 0) =
6312 	gen_rtx_SET (dst, op_res);
6313       RTVEC_ELT (p, 1) =
6314 	gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6315       emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
6316 
6317       return true;
6318     }
6319 
6320   return false;
6321 }
6322 
6323 /* Expand code for the insv template. Return true if successful.  */
6324 
6325 bool
s390_expand_insv(rtx dest,rtx op1,rtx op2,rtx src)6326 s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
6327 {
6328   int bitsize = INTVAL (op1);
6329   int bitpos = INTVAL (op2);
6330   machine_mode mode = GET_MODE (dest);
6331   machine_mode smode;
6332   int smode_bsize, mode_bsize;
6333   rtx op, clobber;
6334 
6335   if (bitsize + bitpos > GET_MODE_BITSIZE (mode))
6336     return false;
6337 
6338   /* Generate INSERT IMMEDIATE (IILL et al).  */
6339   /* (set (ze (reg)) (const_int)).  */
6340   if (TARGET_ZARCH
6341       && register_operand (dest, word_mode)
6342       && (bitpos % 16) == 0
6343       && (bitsize % 16) == 0
6344       && const_int_operand (src, VOIDmode))
6345     {
6346       HOST_WIDE_INT val = INTVAL (src);
6347       int regpos = bitpos + bitsize;
6348 
6349       while (regpos > bitpos)
6350 	{
6351 	  machine_mode putmode;
6352 	  int putsize;
6353 
6354 	  if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32))
6355 	    putmode = SImode;
6356 	  else
6357 	    putmode = HImode;
6358 
6359 	  putsize = GET_MODE_BITSIZE (putmode);
6360 	  regpos -= putsize;
6361 	  emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6362 						GEN_INT (putsize),
6363 						GEN_INT (regpos)),
6364 			  gen_int_mode (val, putmode));
6365 	  val >>= putsize;
6366 	}
6367       gcc_assert (regpos == bitpos);
6368       return true;
6369     }
6370 
6371   smode = smallest_int_mode_for_size (bitsize);
6372   smode_bsize = GET_MODE_BITSIZE (smode);
6373   mode_bsize = GET_MODE_BITSIZE (mode);
6374 
6375   /* Generate STORE CHARACTERS UNDER MASK (STCM et al).  */
6376   if (bitpos == 0
6377       && (bitsize % BITS_PER_UNIT) == 0
6378       && MEM_P (dest)
6379       && (register_operand (src, word_mode)
6380 	  || const_int_operand (src, VOIDmode)))
6381     {
6382       /* Emit standard pattern if possible.  */
6383       if (smode_bsize == bitsize)
6384 	{
6385 	  emit_move_insn (adjust_address (dest, smode, 0),
6386 			  gen_lowpart (smode, src));
6387 	  return true;
6388 	}
6389 
6390       /* (set (ze (mem)) (const_int)).  */
6391       else if (const_int_operand (src, VOIDmode))
6392 	{
6393 	  int size = bitsize / BITS_PER_UNIT;
6394 	  rtx src_mem = adjust_address (force_const_mem (word_mode, src),
6395 					BLKmode,
6396 					UNITS_PER_WORD - size);
6397 
6398 	  dest = adjust_address (dest, BLKmode, 0);
6399 	  set_mem_size (dest, size);
6400 	  s390_expand_cpymem (dest, src_mem, GEN_INT (size));
6401 	  return true;
6402 	}
6403 
6404       /* (set (ze (mem)) (reg)).  */
6405       else if (register_operand (src, word_mode))
6406 	{
6407 	  if (bitsize <= 32)
6408 	    emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1,
6409 						  const0_rtx), src);
6410 	  else
6411 	    {
6412 	      /* Emit st,stcmh sequence.  */
6413 	      int stcmh_width = bitsize - 32;
6414 	      int size = stcmh_width / BITS_PER_UNIT;
6415 
6416 	      emit_move_insn (adjust_address (dest, SImode, size),
6417 			      gen_lowpart (SImode, src));
6418 	      set_mem_size (dest, size);
6419 	      emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6420 						    GEN_INT (stcmh_width),
6421 						    const0_rtx),
6422 			      gen_rtx_LSHIFTRT (word_mode, src, GEN_INT (32)));
6423 	    }
6424 	  return true;
6425 	}
6426     }
6427 
6428   /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al).  */
6429   if ((bitpos % BITS_PER_UNIT) == 0
6430       && (bitsize % BITS_PER_UNIT) == 0
6431       && (bitpos & 32) == ((bitpos + bitsize - 1) & 32)
6432       && MEM_P (src)
6433       && (mode == DImode || mode == SImode)
6434       && register_operand (dest, mode))
6435     {
6436       /* Emit a strict_low_part pattern if possible.  */
6437       if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
6438 	{
6439 	  op = gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (smode, dest));
6440 	  op = gen_rtx_SET (op, gen_lowpart (smode, src));
6441 	  clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6442 	  emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)));
6443 	  return true;
6444 	}
6445 
6446       /* ??? There are more powerful versions of ICM that are not
6447 	 completely represented in the md file.  */
6448     }
6449 
6450   /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al).  */
6451   if (TARGET_Z10 && (mode == DImode || mode == SImode))
6452     {
6453       machine_mode mode_s = GET_MODE (src);
6454 
6455       if (CONSTANT_P (src))
6456 	{
6457 	  /* For constant zero values the representation with AND
6458 	     appears to be folded in more situations than the (set
6459 	     (zero_extract) ...).
6460 	     We only do this when the start and end of the bitfield
6461 	     remain in the same SImode chunk.  That way nihf or nilf
6462 	     can be used.
6463 	     The AND patterns might still generate a risbg for this.  */
6464 	  if (src == const0_rtx && bitpos / 32  == (bitpos + bitsize - 1) / 32)
6465 	    return false;
6466 	  else
6467 	    src = force_reg (mode, src);
6468 	}
6469       else if (mode_s != mode)
6470 	{
6471 	  gcc_assert (GET_MODE_BITSIZE (mode_s) >= bitsize);
6472 	  src = force_reg (mode_s, src);
6473 	  src = gen_lowpart (mode, src);
6474 	}
6475 
6476       op = gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2),
6477       op = gen_rtx_SET (op, src);
6478 
6479       if (!TARGET_ZEC12)
6480 	{
6481 	  clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6482 	  op = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber));
6483 	}
6484       emit_insn (op);
6485 
6486       return true;
6487     }
6488 
6489   return false;
6490 }
6491 
6492 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
6493    register that holds VAL of mode MODE shifted by COUNT bits.  */
6494 
6495 static inline rtx
s390_expand_mask_and_shift(rtx val,machine_mode mode,rtx count)6496 s390_expand_mask_and_shift (rtx val, machine_mode mode, rtx count)
6497 {
6498   val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
6499 			     NULL_RTX, 1, OPTAB_DIRECT);
6500   return expand_simple_binop (SImode, ASHIFT, val, count,
6501 			      NULL_RTX, 1, OPTAB_DIRECT);
6502 }
6503 
6504 /* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
6505    the result in TARGET.  */
6506 
6507 void
s390_expand_vec_compare(rtx target,enum rtx_code cond,rtx cmp_op1,rtx cmp_op2)6508 s390_expand_vec_compare (rtx target, enum rtx_code cond,
6509 			 rtx cmp_op1, rtx cmp_op2)
6510 {
6511   machine_mode mode = GET_MODE (target);
6512   bool neg_p = false, swap_p = false;
6513   rtx tmp;
6514 
6515   if (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_VECTOR_FLOAT)
6516     {
6517       switch (cond)
6518 	{
6519 	  /* NE a != b -> !(a == b) */
6520 	case NE:   cond = EQ; neg_p = true;                break;
6521 	case UNGT:
6522 	  emit_insn (gen_vec_cmpungt (target, cmp_op1, cmp_op2));
6523 	  return;
6524 	case UNGE:
6525 	  emit_insn (gen_vec_cmpunge (target, cmp_op1, cmp_op2));
6526 	  return;
6527 	case LE:   cond = GE;               swap_p = true; break;
6528 	  /* UNLE: (a u<= b) -> (b u>= a).  */
6529 	case UNLE:
6530 	  emit_insn (gen_vec_cmpunge (target, cmp_op2, cmp_op1));
6531 	  return;
6532 	  /* LT: a < b -> b > a */
6533 	case LT:   cond = GT;               swap_p = true; break;
6534 	  /* UNLT: (a u< b) -> (b u> a).  */
6535 	case UNLT:
6536 	  emit_insn (gen_vec_cmpungt (target, cmp_op2, cmp_op1));
6537 	  return;
6538 	case UNEQ:
6539 	  emit_insn (gen_vec_cmpuneq (target, cmp_op1, cmp_op2));
6540 	  return;
6541 	case LTGT:
6542 	  emit_insn (gen_vec_cmpltgt (target, cmp_op1, cmp_op2));
6543 	  return;
6544 	case ORDERED:
6545 	  emit_insn (gen_vec_cmpordered (target, cmp_op1, cmp_op2));
6546 	  return;
6547 	case UNORDERED:
6548 	  emit_insn (gen_vec_cmpunordered (target, cmp_op1, cmp_op2));
6549 	  return;
6550 	default: break;
6551 	}
6552     }
6553   else
6554     {
6555       switch (cond)
6556 	{
6557 	  /* NE: a != b -> !(a == b) */
6558 	case NE:  cond = EQ;  neg_p = true;                break;
6559 	  /* GE: a >= b -> !(b > a) */
6560 	case GE:  cond = GT;  neg_p = true; swap_p = true; break;
6561 	  /* GEU: a >= b -> !(b > a) */
6562 	case GEU: cond = GTU; neg_p = true; swap_p = true; break;
6563 	  /* LE: a <= b -> !(a > b) */
6564 	case LE:  cond = GT;  neg_p = true;                break;
6565 	  /* LEU: a <= b -> !(a > b) */
6566 	case LEU: cond = GTU; neg_p = true;                break;
6567 	  /* LT: a < b -> b > a */
6568 	case LT:  cond = GT;                swap_p = true; break;
6569 	  /* LTU: a < b -> b > a */
6570 	case LTU: cond = GTU;               swap_p = true; break;
6571 	default: break;
6572 	}
6573     }
6574 
6575   if (swap_p)
6576     {
6577       tmp = cmp_op1; cmp_op1 = cmp_op2; cmp_op2 = tmp;
6578     }
6579 
6580   emit_insn (gen_rtx_SET (target, gen_rtx_fmt_ee (cond,
6581 						  mode,
6582 						  cmp_op1, cmp_op2)));
6583   if (neg_p)
6584     emit_insn (gen_rtx_SET (target, gen_rtx_NOT (mode, target)));
6585 }
6586 
6587 /* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into
6588    TARGET if either all (ALL_P is true) or any (ALL_P is false) of the
6589    elements in CMP1 and CMP2 fulfill the comparison.
6590    This function is only used to emit patterns for the vx builtins and
6591    therefore only handles comparison codes required by the
6592    builtins.  */
6593 void
s390_expand_vec_compare_cc(rtx target,enum rtx_code code,rtx cmp1,rtx cmp2,bool all_p)6594 s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
6595 			    rtx cmp1, rtx cmp2, bool all_p)
6596 {
6597   machine_mode cc_producer_mode, cc_consumer_mode, scratch_mode;
6598   rtx tmp_reg = gen_reg_rtx (SImode);
6599   bool swap_p = false;
6600 
6601   if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_INT)
6602     {
6603       switch (code)
6604 	{
6605 	case EQ:
6606 	case NE:
6607 	  cc_producer_mode = CCVEQmode;
6608 	  break;
6609 	case GE:
6610 	case LT:
6611 	  code = swap_condition (code);
6612 	  swap_p = true;
6613 	  /* fallthrough */
6614 	case GT:
6615 	case LE:
6616 	  cc_producer_mode = CCVIHmode;
6617 	  break;
6618 	case GEU:
6619 	case LTU:
6620 	  code = swap_condition (code);
6621 	  swap_p = true;
6622 	  /* fallthrough */
6623 	case GTU:
6624 	case LEU:
6625 	  cc_producer_mode = CCVIHUmode;
6626 	  break;
6627 	default:
6628 	  gcc_unreachable ();
6629 	}
6630 
6631       scratch_mode = GET_MODE (cmp1);
6632       /* These codes represent inverted CC interpretations.  Inverting
6633 	 an ALL CC mode results in an ANY CC mode and the other way
6634 	 around.  Invert the all_p flag here to compensate for
6635 	 that.  */
6636       if (code == NE || code == LE || code == LEU)
6637 	all_p = !all_p;
6638 
6639       cc_consumer_mode = all_p ? CCVIALLmode : CCVIANYmode;
6640     }
6641   else if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_FLOAT)
6642     {
6643       bool inv_p = false;
6644 
6645       switch (code)
6646 	{
6647 	case EQ:   cc_producer_mode = CCVEQmode;  break;
6648 	case NE:   cc_producer_mode = CCVEQmode;  inv_p = true; break;
6649 	case GT:   cc_producer_mode = CCVFHmode;  break;
6650 	case GE:   cc_producer_mode = CCVFHEmode; break;
6651 	case UNLE: cc_producer_mode = CCVFHmode;  inv_p = true; break;
6652 	case UNLT: cc_producer_mode = CCVFHEmode; inv_p = true; break;
6653 	case LT:   cc_producer_mode = CCVFHmode;  code = GT; swap_p = true; break;
6654 	case LE:   cc_producer_mode = CCVFHEmode; code = GE; swap_p = true; break;
6655 	default: gcc_unreachable ();
6656 	}
6657       scratch_mode = related_int_vector_mode (GET_MODE (cmp1)).require ();
6658 
6659       if (inv_p)
6660 	all_p = !all_p;
6661 
6662       cc_consumer_mode = all_p ? CCVFALLmode : CCVFANYmode;
6663     }
6664   else
6665     gcc_unreachable ();
6666 
6667   if (swap_p)
6668     {
6669       rtx tmp = cmp2;
6670       cmp2 = cmp1;
6671       cmp1 = tmp;
6672     }
6673 
6674   emit_insn (gen_rtx_PARALLEL (VOIDmode,
6675 	       gen_rtvec (2, gen_rtx_SET (
6676 			       gen_rtx_REG (cc_producer_mode, CC_REGNUM),
6677 			       gen_rtx_COMPARE (cc_producer_mode, cmp1, cmp2)),
6678 			  gen_rtx_CLOBBER (VOIDmode,
6679 					   gen_rtx_SCRATCH (scratch_mode)))));
6680   emit_move_insn (target, const0_rtx);
6681   emit_move_insn (tmp_reg, const1_rtx);
6682 
6683   emit_move_insn (target,
6684 		  gen_rtx_IF_THEN_ELSE (SImode,
6685 		    gen_rtx_fmt_ee (code, VOIDmode,
6686 				    gen_rtx_REG (cc_consumer_mode, CC_REGNUM),
6687 				    const0_rtx),
6688 					tmp_reg, target));
6689 }
6690 
6691 /* Invert the comparison CODE applied to a CC mode.  This is only safe
6692    if we know whether there result was created by a floating point
6693    compare or not.  For the CCV modes this is encoded as part of the
6694    mode.  */
6695 enum rtx_code
s390_reverse_condition(machine_mode mode,enum rtx_code code)6696 s390_reverse_condition (machine_mode mode, enum rtx_code code)
6697 {
6698   /* Reversal of FP compares takes care -- an ordered compare
6699      becomes an unordered compare and vice versa.  */
6700   if (mode == CCVFALLmode || mode == CCVFANYmode || mode == CCSFPSmode)
6701     return reverse_condition_maybe_unordered (code);
6702   else if (mode == CCVIALLmode || mode == CCVIANYmode)
6703     return reverse_condition (code);
6704   else
6705     gcc_unreachable ();
6706 }
6707 
6708 /* Generate a vector comparison expression loading either elements of
6709    THEN or ELS into TARGET depending on the comparison COND of CMP_OP1
6710    and CMP_OP2.  */
6711 
6712 void
s390_expand_vcond(rtx target,rtx then,rtx els,enum rtx_code cond,rtx cmp_op1,rtx cmp_op2)6713 s390_expand_vcond (rtx target, rtx then, rtx els,
6714 		   enum rtx_code cond, rtx cmp_op1, rtx cmp_op2)
6715 {
6716   rtx tmp;
6717   machine_mode result_mode;
6718   rtx result_target;
6719 
6720   machine_mode target_mode = GET_MODE (target);
6721   machine_mode cmp_mode = GET_MODE (cmp_op1);
6722   rtx op = (cond == LT) ? els : then;
6723 
6724   /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
6725      and x < 0 ? 1 : 0 into (unsigned) x >> 31.  Likewise
6726      for short and byte (x >> 15 and x >> 7 respectively).  */
6727   if ((cond == LT || cond == GE)
6728       && target_mode == cmp_mode
6729       && cmp_op2 == CONST0_RTX (cmp_mode)
6730       && op == CONST0_RTX (target_mode)
6731       && s390_vector_mode_supported_p (target_mode)
6732       && GET_MODE_CLASS (target_mode) == MODE_VECTOR_INT)
6733     {
6734       rtx negop = (cond == LT) ? then : els;
6735 
6736       int shift = GET_MODE_BITSIZE (GET_MODE_INNER (target_mode)) - 1;
6737 
6738       /* if x < 0 ? 1 : 0 or if x >= 0 ? 0 : 1 */
6739       if (negop == CONST1_RTX (target_mode))
6740 	{
6741 	  rtx res = expand_simple_binop (cmp_mode, LSHIFTRT, cmp_op1,
6742 					 GEN_INT (shift), target,
6743 					 1, OPTAB_DIRECT);
6744 	  if (res != target)
6745 	    emit_move_insn (target, res);
6746 	  return;
6747 	}
6748 
6749       /* if x < 0 ? -1 : 0 or if x >= 0 ? 0 : -1 */
6750       else if (all_ones_operand (negop, target_mode))
6751 	{
6752 	  rtx res = expand_simple_binop (cmp_mode, ASHIFTRT, cmp_op1,
6753 					 GEN_INT (shift), target,
6754 					 0, OPTAB_DIRECT);
6755 	  if (res != target)
6756 	    emit_move_insn (target, res);
6757 	  return;
6758 	}
6759     }
6760 
6761   /* We always use an integral type vector to hold the comparison
6762      result.  */
6763   result_mode = related_int_vector_mode (cmp_mode).require ();
6764   result_target = gen_reg_rtx (result_mode);
6765 
6766   /* We allow vector immediates as comparison operands that
6767      can be handled by the optimization above but not by the
6768      following code.  Hence, force them into registers here.  */
6769   if (!REG_P (cmp_op1))
6770     cmp_op1 = force_reg (GET_MODE (cmp_op1), cmp_op1);
6771 
6772   if (!REG_P (cmp_op2))
6773     cmp_op2 = force_reg (GET_MODE (cmp_op2), cmp_op2);
6774 
6775   s390_expand_vec_compare (result_target, cond,
6776 			   cmp_op1, cmp_op2);
6777 
6778   /* If the results are supposed to be either -1 or 0 we are done
6779      since this is what our compare instructions generate anyway.  */
6780   if (all_ones_operand (then, GET_MODE (then))
6781       && const0_operand (els, GET_MODE (els)))
6782     {
6783       emit_move_insn (target, gen_rtx_SUBREG (target_mode,
6784 					      result_target, 0));
6785       return;
6786     }
6787 
6788   /* Otherwise we will do a vsel afterwards.  */
6789   /* This gets triggered e.g.
6790      with gcc.c-torture/compile/pr53410-1.c */
6791   if (!REG_P (then))
6792     then = force_reg (target_mode, then);
6793 
6794   if (!REG_P (els))
6795     els = force_reg (target_mode, els);
6796 
6797   tmp = gen_rtx_fmt_ee (EQ, VOIDmode,
6798 			result_target,
6799 			CONST0_RTX (result_mode));
6800 
6801   /* We compared the result against zero above so we have to swap then
6802      and els here.  */
6803   tmp = gen_rtx_IF_THEN_ELSE (target_mode, tmp, els, then);
6804 
6805   gcc_assert (target_mode == GET_MODE (then));
6806   emit_insn (gen_rtx_SET (target, tmp));
6807 }
6808 
6809 /* Emit the RTX necessary to initialize the vector TARGET with values
6810    in VALS.  */
6811 void
s390_expand_vec_init(rtx target,rtx vals)6812 s390_expand_vec_init (rtx target, rtx vals)
6813 {
6814   machine_mode mode = GET_MODE (target);
6815   machine_mode inner_mode = GET_MODE_INNER (mode);
6816   int n_elts = GET_MODE_NUNITS (mode);
6817   bool all_same = true, all_regs = true, all_const_int = true;
6818   rtx x;
6819   int i;
6820 
6821   for (i = 0; i < n_elts; ++i)
6822     {
6823       x = XVECEXP (vals, 0, i);
6824 
6825       if (!CONST_INT_P (x))
6826 	all_const_int = false;
6827 
6828       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6829 	all_same = false;
6830 
6831       if (!REG_P (x))
6832 	all_regs = false;
6833     }
6834 
6835   /* Use vector gen mask or vector gen byte mask if possible.  */
6836   if (all_same && all_const_int
6837       && (XVECEXP (vals, 0, 0) == const0_rtx
6838 	  || s390_contiguous_bitmask_vector_p (XVECEXP (vals, 0, 0),
6839 					       NULL, NULL)
6840 	  || s390_bytemask_vector_p (XVECEXP (vals, 0, 0), NULL)))
6841     {
6842       emit_insn (gen_rtx_SET (target,
6843 			      gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))));
6844       return;
6845     }
6846 
6847   /* Use vector replicate instructions.  vlrep/vrepi/vrep  */
6848   if (all_same)
6849     {
6850       rtx elem = XVECEXP (vals, 0, 0);
6851 
6852       /* vec_splats accepts general_operand as source.  */
6853       if (!general_operand (elem, GET_MODE (elem)))
6854 	elem = force_reg (inner_mode, elem);
6855 
6856       emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, elem)));
6857       return;
6858     }
6859 
6860   if (all_regs
6861       && REG_P (target)
6862       && n_elts == 2
6863       && GET_MODE_SIZE (inner_mode) == 8)
6864     {
6865       /* Use vector load pair.  */
6866       emit_insn (gen_rtx_SET (target,
6867 			      gen_rtx_VEC_CONCAT (mode,
6868 						  XVECEXP (vals, 0, 0),
6869 						  XVECEXP (vals, 0, 1))));
6870       return;
6871     }
6872 
6873   /* Use vector load logical element and zero.  */
6874   if (TARGET_VXE && (mode == V4SImode || mode == V4SFmode))
6875     {
6876       bool found = true;
6877 
6878       x = XVECEXP (vals, 0, 0);
6879       if (memory_operand (x, inner_mode))
6880 	{
6881 	  for (i = 1; i < n_elts; ++i)
6882 	    found = found && XVECEXP (vals, 0, i) == const0_rtx;
6883 
6884 	  if (found)
6885 	    {
6886 	      machine_mode half_mode = (inner_mode == SFmode
6887 					? V2SFmode : V2SImode);
6888 	      emit_insn (gen_rtx_SET (target,
6889 			      gen_rtx_VEC_CONCAT (mode,
6890 						  gen_rtx_VEC_CONCAT (half_mode,
6891 								      x,
6892 								      const0_rtx),
6893 						  gen_rtx_VEC_CONCAT (half_mode,
6894 								      const0_rtx,
6895 								      const0_rtx))));
6896 	      return;
6897 	    }
6898 	}
6899     }
6900 
6901   /* We are about to set the vector elements one by one.  Zero out the
6902      full register first in order to help the data flow framework to
6903      detect it as full VR set.  */
6904   emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
6905 
6906   /* Unfortunately the vec_init expander is not allowed to fail.  So
6907      we have to implement the fallback ourselves.  */
6908   for (i = 0; i < n_elts; i++)
6909     {
6910       rtx elem = XVECEXP (vals, 0, i);
6911       if (!general_operand (elem, GET_MODE (elem)))
6912 	elem = force_reg (inner_mode, elem);
6913 
6914       emit_insn (gen_rtx_SET (target,
6915 			      gen_rtx_UNSPEC (mode,
6916 					      gen_rtvec (3, elem,
6917 							 GEN_INT (i), target),
6918 					      UNSPEC_VEC_SET)));
6919     }
6920 }
6921 
6922 /* Structure to hold the initial parameters for a compare_and_swap operation
6923    in HImode and QImode.  */
6924 
6925 struct alignment_context
6926 {
6927   rtx memsi;	  /* SI aligned memory location.  */
6928   rtx shift;	  /* Bit offset with regard to lsb.  */
6929   rtx modemask;	  /* Mask of the HQImode shifted by SHIFT bits.  */
6930   rtx modemaski;  /* ~modemask */
6931   bool aligned;	  /* True if memory is aligned, false else.  */
6932 };
6933 
6934 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
6935    structure AC for transparent simplifying, if the memory alignment is known
6936    to be at least 32bit.  MEM is the memory location for the actual operation
6937    and MODE its mode.  */
6938 
6939 static void
init_alignment_context(struct alignment_context * ac,rtx mem,machine_mode mode)6940 init_alignment_context (struct alignment_context *ac, rtx mem,
6941 			machine_mode mode)
6942 {
6943   ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode));
6944   ac->aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode));
6945 
6946   if (ac->aligned)
6947     ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned.  */
6948   else
6949     {
6950       /* Alignment is unknown.  */
6951       rtx byteoffset, addr, align;
6952 
6953       /* Force the address into a register.  */
6954       addr = force_reg (Pmode, XEXP (mem, 0));
6955 
6956       /* Align it to SImode.  */
6957       align = expand_simple_binop (Pmode, AND, addr,
6958 				   GEN_INT (-GET_MODE_SIZE (SImode)),
6959 				   NULL_RTX, 1, OPTAB_DIRECT);
6960       /* Generate MEM.  */
6961       ac->memsi = gen_rtx_MEM (SImode, align);
6962       MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem);
6963       set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER);
6964       set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode));
6965 
6966       /* Calculate shiftcount.  */
6967       byteoffset = expand_simple_binop (Pmode, AND, addr,
6968 					GEN_INT (GET_MODE_SIZE (SImode) - 1),
6969 					NULL_RTX, 1, OPTAB_DIRECT);
6970       /* As we already have some offset, evaluate the remaining distance.  */
6971       ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
6972 				      NULL_RTX, 1, OPTAB_DIRECT);
6973     }
6974 
6975   /* Shift is the byte count, but we need the bitcount.  */
6976   ac->shift = expand_simple_binop (SImode, ASHIFT, ac->shift, GEN_INT (3),
6977 				   NULL_RTX, 1, OPTAB_DIRECT);
6978 
6979   /* Calculate masks.  */
6980   ac->modemask = expand_simple_binop (SImode, ASHIFT,
6981 				      GEN_INT (GET_MODE_MASK (mode)),
6982 				      ac->shift, NULL_RTX, 1, OPTAB_DIRECT);
6983   ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask,
6984 				      NULL_RTX, 1);
6985 }
6986 
6987 /* A subroutine of s390_expand_cs_hqi.  Insert INS into VAL.  If possible,
6988    use a single insv insn into SEQ2.  Otherwise, put prep insns in SEQ1 and
6989    perform the merge in SEQ2.  */
6990 
6991 static rtx
s390_two_part_insv(struct alignment_context * ac,rtx * seq1,rtx * seq2,machine_mode mode,rtx val,rtx ins)6992 s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
6993 		    machine_mode mode, rtx val, rtx ins)
6994 {
6995   rtx tmp;
6996 
6997   if (ac->aligned)
6998     {
6999       start_sequence ();
7000       tmp = copy_to_mode_reg (SImode, val);
7001       if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)),
7002 			    const0_rtx, ins))
7003 	{
7004 	  *seq1 = NULL;
7005 	  *seq2 = get_insns ();
7006 	  end_sequence ();
7007 	  return tmp;
7008 	}
7009       end_sequence ();
7010     }
7011 
7012   /* Failed to use insv.  Generate a two part shift and mask.  */
7013   start_sequence ();
7014   tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
7015   *seq1 = get_insns ();
7016   end_sequence ();
7017 
7018   start_sequence ();
7019   tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
7020   *seq2 = get_insns ();
7021   end_sequence ();
7022 
7023   return tmp;
7024 }
7025 
7026 /* Expand an atomic compare and swap operation for HImode and QImode.  MEM is
7027    the memory location, CMP the old value to compare MEM with and NEW_RTX the
7028    value to set if CMP == MEM.  */
7029 
7030 static void
s390_expand_cs_hqi(machine_mode mode,rtx btarget,rtx vtarget,rtx mem,rtx cmp,rtx new_rtx,bool is_weak)7031 s390_expand_cs_hqi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
7032 		    rtx cmp, rtx new_rtx, bool is_weak)
7033 {
7034   struct alignment_context ac;
7035   rtx cmpv, newv, val, cc, seq0, seq1, seq2, seq3;
7036   rtx res = gen_reg_rtx (SImode);
7037   rtx_code_label *csloop = NULL, *csend = NULL;
7038 
7039   gcc_assert (MEM_P (mem));
7040 
7041   init_alignment_context (&ac, mem, mode);
7042 
7043   /* Load full word.  Subsequent loads are performed by CS.  */
7044   val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski,
7045 			     NULL_RTX, 1, OPTAB_DIRECT);
7046 
7047   /* Prepare insertions of cmp and new_rtx into the loaded value.  When
7048      possible, we try to use insv to make this happen efficiently.  If
7049      that fails we'll generate code both inside and outside the loop.  */
7050   cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp);
7051   newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx);
7052 
7053   if (seq0)
7054     emit_insn (seq0);
7055   if (seq1)
7056     emit_insn (seq1);
7057 
7058   /* Start CS loop.  */
7059   if (!is_weak)
7060     {
7061       /* Begin assuming success.  */
7062       emit_move_insn (btarget, const1_rtx);
7063 
7064       csloop = gen_label_rtx ();
7065       csend = gen_label_rtx ();
7066       emit_label (csloop);
7067     }
7068 
7069   /* val = "<mem>00..0<mem>"
7070    * cmp = "00..0<cmp>00..0"
7071    * new = "00..0<new>00..0"
7072    */
7073 
7074   emit_insn (seq2);
7075   emit_insn (seq3);
7076 
7077   cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv, CCZ1mode);
7078   if (is_weak)
7079     emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1)));
7080   else
7081     {
7082       rtx tmp;
7083 
7084       /* Jump to end if we're done (likely?).  */
7085       s390_emit_jump (csend, cc);
7086 
7087       /* Check for changes outside mode, and loop internal if so.
7088 	 Arrange the moves so that the compare is adjacent to the
7089 	 branch so that we can generate CRJ.  */
7090       tmp = copy_to_reg (val);
7091       force_expand_binop (SImode, and_optab, res, ac.modemaski, val,
7092 			  1, OPTAB_DIRECT);
7093       cc = s390_emit_compare (NE, val, tmp);
7094       s390_emit_jump (csloop, cc);
7095 
7096       /* Failed.  */
7097       emit_move_insn (btarget, const0_rtx);
7098       emit_label (csend);
7099     }
7100 
7101   /* Return the correct part of the bitfield.  */
7102   convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
7103 					      NULL_RTX, 1, OPTAB_DIRECT), 1);
7104 }
7105 
7106 /* Variant of s390_expand_cs for SI, DI and TI modes.  */
7107 static void
s390_expand_cs_tdsi(machine_mode mode,rtx btarget,rtx vtarget,rtx mem,rtx cmp,rtx new_rtx,bool is_weak)7108 s390_expand_cs_tdsi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
7109 		     rtx cmp, rtx new_rtx, bool is_weak)
7110 {
7111   rtx output = vtarget;
7112   rtx_code_label *skip_cs_label = NULL;
7113   bool do_const_opt = false;
7114 
7115   if (!register_operand (output, mode))
7116     output = gen_reg_rtx (mode);
7117 
7118   /* If IS_WEAK is true and the INPUT value is a constant, compare the memory
7119      with the constant first and skip the compare_and_swap because its very
7120      expensive and likely to fail anyway.
7121      Note 1: This is done only for IS_WEAK.  C11 allows optimizations that may
7122      cause spurious in that case.
7123      Note 2: It may be useful to do this also for non-constant INPUT.
7124      Note 3: Currently only targets with "load on condition" are supported
7125      (z196 and newer).  */
7126 
7127   if (TARGET_Z196
7128       && (mode == SImode || mode == DImode))
7129     do_const_opt = (is_weak && CONST_INT_P (cmp));
7130 
7131   if (do_const_opt)
7132     {
7133       rtx cc = gen_rtx_REG (CCZmode, CC_REGNUM);
7134 
7135       skip_cs_label = gen_label_rtx ();
7136       emit_move_insn (btarget, const0_rtx);
7137       if (CONST_INT_P (cmp) && INTVAL (cmp) == 0)
7138 	{
7139 	  rtvec lt = rtvec_alloc (2);
7140 
7141 	  /* Load-and-test + conditional jump.  */
7142 	  RTVEC_ELT (lt, 0)
7143 	    = gen_rtx_SET (cc, gen_rtx_COMPARE (CCZmode, mem, cmp));
7144 	  RTVEC_ELT (lt, 1) = gen_rtx_SET (output, mem);
7145 	  emit_insn (gen_rtx_PARALLEL (VOIDmode, lt));
7146 	}
7147       else
7148 	{
7149 	  emit_move_insn (output, mem);
7150 	  emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (CCZmode, output, cmp)));
7151 	}
7152       s390_emit_jump (skip_cs_label, gen_rtx_NE (VOIDmode, cc, const0_rtx));
7153       add_reg_br_prob_note (get_last_insn (),
7154 			    profile_probability::very_unlikely ());
7155       /* If the jump is not taken, OUTPUT is the expected value.  */
7156       cmp = output;
7157       /* Reload newval to a register manually, *after* the compare and jump
7158 	 above.  Otherwise Reload might place it before the jump.  */
7159     }
7160   else
7161     cmp = force_reg (mode, cmp);
7162   new_rtx = force_reg (mode, new_rtx);
7163   s390_emit_compare_and_swap (EQ, output, mem, cmp, new_rtx,
7164 			      (do_const_opt) ? CCZmode : CCZ1mode);
7165   if (skip_cs_label != NULL)
7166     emit_label (skip_cs_label);
7167 
7168   /* We deliberately accept non-register operands in the predicate
7169      to ensure the write back to the output operand happens *before*
7170      the store-flags code below.  This makes it easier for combine
7171      to merge the store-flags code with a potential test-and-branch
7172      pattern following (immediately!) afterwards.  */
7173   if (output != vtarget)
7174     emit_move_insn (vtarget, output);
7175 
7176   if (do_const_opt)
7177     {
7178       rtx cc, cond, ite;
7179 
7180       /* Do not use gen_cstorecc4 here because it writes either 1 or 0, but
7181 	 btarget has already been initialized with 0 above.  */
7182       cc = gen_rtx_REG (CCZmode, CC_REGNUM);
7183       cond = gen_rtx_EQ (VOIDmode, cc, const0_rtx);
7184       ite = gen_rtx_IF_THEN_ELSE (SImode, cond, const1_rtx, btarget);
7185       emit_insn (gen_rtx_SET (btarget, ite));
7186     }
7187   else
7188     {
7189       rtx cc, cond;
7190 
7191       cc = gen_rtx_REG (CCZ1mode, CC_REGNUM);
7192       cond = gen_rtx_EQ (SImode, cc, const0_rtx);
7193       emit_insn (gen_cstorecc4 (btarget, cond, cc, const0_rtx));
7194     }
7195 }
7196 
7197 /* Expand an atomic compare and swap operation.  MEM is the memory location,
7198    CMP the old value to compare MEM with and NEW_RTX the value to set if
7199    CMP == MEM.  */
7200 
7201 void
s390_expand_cs(machine_mode mode,rtx btarget,rtx vtarget,rtx mem,rtx cmp,rtx new_rtx,bool is_weak)7202 s390_expand_cs (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
7203 		rtx cmp, rtx new_rtx, bool is_weak)
7204 {
7205   switch (mode)
7206     {
7207     case E_TImode:
7208     case E_DImode:
7209     case E_SImode:
7210       s390_expand_cs_tdsi (mode, btarget, vtarget, mem, cmp, new_rtx, is_weak);
7211       break;
7212     case E_HImode:
7213     case E_QImode:
7214       s390_expand_cs_hqi (mode, btarget, vtarget, mem, cmp, new_rtx, is_weak);
7215       break;
7216     default:
7217       gcc_unreachable ();
7218     }
7219 }
7220 
7221 /* Expand an atomic_exchange operation simulated with a compare-and-swap loop.
7222    The memory location MEM is set to INPUT.  OUTPUT is set to the previous value
7223    of MEM.  */
7224 
7225 void
s390_expand_atomic_exchange_tdsi(rtx output,rtx mem,rtx input)7226 s390_expand_atomic_exchange_tdsi (rtx output, rtx mem, rtx input)
7227 {
7228   machine_mode mode = GET_MODE (mem);
7229   rtx_code_label *csloop;
7230 
7231   if (TARGET_Z196
7232       && (mode == DImode || mode == SImode)
7233       && CONST_INT_P (input) && INTVAL (input) == 0)
7234     {
7235       emit_move_insn (output, const0_rtx);
7236       if (mode == DImode)
7237 	emit_insn (gen_atomic_fetch_anddi (output, mem, const0_rtx, input));
7238       else
7239 	emit_insn (gen_atomic_fetch_andsi (output, mem, const0_rtx, input));
7240       return;
7241     }
7242 
7243   input = force_reg (mode, input);
7244   emit_move_insn (output, mem);
7245   csloop = gen_label_rtx ();
7246   emit_label (csloop);
7247   s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, output, mem, output,
7248 						      input, CCZ1mode));
7249 }
7250 
7251 /* Expand an atomic operation CODE of mode MODE.  MEM is the memory location
7252    and VAL the value to play with.  If AFTER is true then store the value
7253    MEM holds after the operation, if AFTER is false then store the value MEM
7254    holds before the operation.  If TARGET is zero then discard that value, else
7255    store it to TARGET.  */
7256 
7257 void
s390_expand_atomic(machine_mode mode,enum rtx_code code,rtx target,rtx mem,rtx val,bool after)7258 s390_expand_atomic (machine_mode mode, enum rtx_code code,
7259 		    rtx target, rtx mem, rtx val, bool after)
7260 {
7261   struct alignment_context ac;
7262   rtx cmp;
7263   rtx new_rtx = gen_reg_rtx (SImode);
7264   rtx orig = gen_reg_rtx (SImode);
7265   rtx_code_label *csloop = gen_label_rtx ();
7266 
7267   gcc_assert (!target || register_operand (target, VOIDmode));
7268   gcc_assert (MEM_P (mem));
7269 
7270   init_alignment_context (&ac, mem, mode);
7271 
7272   /* Shift val to the correct bit positions.
7273      Preserve "icm", but prevent "ex icm".  */
7274   if (!(ac.aligned && code == SET && MEM_P (val)))
7275     val = s390_expand_mask_and_shift (val, mode, ac.shift);
7276 
7277   /* Further preparation insns.  */
7278   if (code == PLUS || code == MINUS)
7279     emit_move_insn (orig, val);
7280   else if (code == MULT || code == AND) /* val = "11..1<val>11..1" */
7281     val = expand_simple_binop (SImode, XOR, val, ac.modemaski,
7282 			       NULL_RTX, 1, OPTAB_DIRECT);
7283 
7284   /* Load full word.  Subsequent loads are performed by CS.  */
7285   cmp = force_reg (SImode, ac.memsi);
7286 
7287   /* Start CS loop.  */
7288   emit_label (csloop);
7289   emit_move_insn (new_rtx, cmp);
7290 
7291   /* Patch new with val at correct position.  */
7292   switch (code)
7293     {
7294     case PLUS:
7295     case MINUS:
7296       val = expand_simple_binop (SImode, code, new_rtx, orig,
7297 				 NULL_RTX, 1, OPTAB_DIRECT);
7298       val = expand_simple_binop (SImode, AND, val, ac.modemask,
7299 				 NULL_RTX, 1, OPTAB_DIRECT);
7300       /* FALLTHRU */
7301     case SET:
7302       if (ac.aligned && MEM_P (val))
7303 	store_bit_field (new_rtx, GET_MODE_BITSIZE (mode), 0,
7304 			 0, 0, SImode, val, false);
7305       else
7306 	{
7307 	  new_rtx = expand_simple_binop (SImode, AND, new_rtx, ac.modemaski,
7308 				     NULL_RTX, 1, OPTAB_DIRECT);
7309 	  new_rtx = expand_simple_binop (SImode, IOR, new_rtx, val,
7310 				     NULL_RTX, 1, OPTAB_DIRECT);
7311 	}
7312       break;
7313     case AND:
7314     case IOR:
7315     case XOR:
7316       new_rtx = expand_simple_binop (SImode, code, new_rtx, val,
7317 				 NULL_RTX, 1, OPTAB_DIRECT);
7318       break;
7319     case MULT: /* NAND */
7320       new_rtx = expand_simple_binop (SImode, AND, new_rtx, val,
7321 				 NULL_RTX, 1, OPTAB_DIRECT);
7322       new_rtx = expand_simple_binop (SImode, XOR, new_rtx, ac.modemask,
7323 				 NULL_RTX, 1, OPTAB_DIRECT);
7324       break;
7325     default:
7326       gcc_unreachable ();
7327     }
7328 
7329   s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, cmp,
7330 						      ac.memsi, cmp, new_rtx,
7331 						      CCZ1mode));
7332 
7333   /* Return the correct part of the bitfield.  */
7334   if (target)
7335     convert_move (target, expand_simple_binop (SImode, LSHIFTRT,
7336 					       after ? new_rtx : cmp, ac.shift,
7337 					       NULL_RTX, 1, OPTAB_DIRECT), 1);
7338 }
7339 
7340 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7341    We need to emit DTP-relative relocations.  */
7342 
7343 static void s390_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
7344 
7345 static void
s390_output_dwarf_dtprel(FILE * file,int size,rtx x)7346 s390_output_dwarf_dtprel (FILE *file, int size, rtx x)
7347 {
7348   switch (size)
7349     {
7350     case 4:
7351       fputs ("\t.long\t", file);
7352       break;
7353     case 8:
7354       fputs ("\t.quad\t", file);
7355       break;
7356     default:
7357       gcc_unreachable ();
7358     }
7359   output_addr_const (file, x);
7360   fputs ("@DTPOFF", file);
7361 }
7362 
7363 /* Return the proper mode for REGNO being represented in the dwarf
7364    unwind table.  */
7365 machine_mode
s390_dwarf_frame_reg_mode(int regno)7366 s390_dwarf_frame_reg_mode (int regno)
7367 {
7368   machine_mode save_mode = default_dwarf_frame_reg_mode (regno);
7369 
7370   /* Make sure not to return DImode for any GPR with -m31 -mzarch.  */
7371   if (GENERAL_REGNO_P (regno))
7372     save_mode = Pmode;
7373 
7374   /* The rightmost 64 bits of vector registers are call-clobbered.  */
7375   if (GET_MODE_SIZE (save_mode) > 8)
7376     save_mode = DImode;
7377 
7378   return save_mode;
7379 }
7380 
7381 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
7382 /* Implement TARGET_MANGLE_TYPE.  */
7383 
7384 static const char *
s390_mangle_type(const_tree type)7385 s390_mangle_type (const_tree type)
7386 {
7387   type = TYPE_MAIN_VARIANT (type);
7388 
7389   if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
7390       && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
7391     return NULL;
7392 
7393   if (type == s390_builtin_types[BT_BV16QI]) return "U6__boolc";
7394   if (type == s390_builtin_types[BT_BV8HI]) return "U6__bools";
7395   if (type == s390_builtin_types[BT_BV4SI]) return "U6__booli";
7396   if (type == s390_builtin_types[BT_BV2DI]) return "U6__booll";
7397 
7398   if (TYPE_MAIN_VARIANT (type) == long_double_type_node
7399       && TARGET_LONG_DOUBLE_128)
7400     return "g";
7401 
7402   /* For all other types, use normal C++ mangling.  */
7403   return NULL;
7404 }
7405 #endif
7406 
7407 /* In the name of slightly smaller debug output, and to cater to
7408    general assembler lossage, recognize various UNSPEC sequences
7409    and turn them back into a direct symbol reference.  */
7410 
7411 static rtx
s390_delegitimize_address(rtx orig_x)7412 s390_delegitimize_address (rtx orig_x)
7413 {
7414   rtx x, y;
7415 
7416   orig_x = delegitimize_mem_from_attrs (orig_x);
7417   x = orig_x;
7418 
7419   /* Extract the symbol ref from:
7420      (plus:SI (reg:SI 12 %r12)
7421 	      (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
7422 				    UNSPEC_GOTOFF/PLTOFF)))
7423      and
7424      (plus:SI (reg:SI 12 %r12)
7425 	      (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
7426 					     UNSPEC_GOTOFF/PLTOFF)
7427 				 (const_int 4 [0x4]))))  */
7428   if (GET_CODE (x) == PLUS
7429       && REG_P (XEXP (x, 0))
7430       && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
7431       && GET_CODE (XEXP (x, 1)) == CONST)
7432     {
7433       HOST_WIDE_INT offset = 0;
7434 
7435       /* The const operand.  */
7436       y = XEXP (XEXP (x, 1), 0);
7437 
7438       if (GET_CODE (y) == PLUS
7439 	  && GET_CODE (XEXP (y, 1)) == CONST_INT)
7440 	{
7441 	  offset = INTVAL (XEXP (y, 1));
7442 	  y = XEXP (y, 0);
7443 	}
7444 
7445       if (GET_CODE (y) == UNSPEC
7446 	  && (XINT (y, 1) == UNSPEC_GOTOFF
7447 	      || XINT (y, 1) == UNSPEC_PLTOFF))
7448 	return plus_constant (Pmode, XVECEXP (y, 0, 0), offset);
7449     }
7450 
7451   if (GET_CODE (x) != MEM)
7452     return orig_x;
7453 
7454   x = XEXP (x, 0);
7455   if (GET_CODE (x) == PLUS
7456       && GET_CODE (XEXP (x, 1)) == CONST
7457       && GET_CODE (XEXP (x, 0)) == REG
7458       && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7459     {
7460       y = XEXP (XEXP (x, 1), 0);
7461       if (GET_CODE (y) == UNSPEC
7462 	  && XINT (y, 1) == UNSPEC_GOT)
7463 	y = XVECEXP (y, 0, 0);
7464       else
7465 	return orig_x;
7466     }
7467   else if (GET_CODE (x) == CONST)
7468     {
7469       /* Extract the symbol ref from:
7470 	 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
7471 				       UNSPEC_PLT/GOTENT)))  */
7472 
7473       y = XEXP (x, 0);
7474       if (GET_CODE (y) == UNSPEC
7475 	  && (XINT (y, 1) == UNSPEC_GOTENT
7476 	      || XINT (y, 1) == UNSPEC_PLT))
7477 	y = XVECEXP (y, 0, 0);
7478       else
7479 	return orig_x;
7480     }
7481   else
7482     return orig_x;
7483 
7484   if (GET_MODE (orig_x) != Pmode)
7485     {
7486       if (GET_MODE (orig_x) == BLKmode)
7487 	return orig_x;
7488       y = lowpart_subreg (GET_MODE (orig_x), y, Pmode);
7489       if (y == NULL_RTX)
7490 	return orig_x;
7491     }
7492   return y;
7493 }
7494 
7495 /* Output operand OP to stdio stream FILE.
7496    OP is an address (register + offset) which is not used to address data;
7497    instead the rightmost bits are interpreted as the value.  */
7498 
7499 static void
print_addrstyle_operand(FILE * file,rtx op)7500 print_addrstyle_operand (FILE *file, rtx op)
7501 {
7502   HOST_WIDE_INT offset;
7503   rtx base;
7504 
7505   /* Extract base register and offset.  */
7506   if (!s390_decompose_addrstyle_without_index (op, &base, &offset))
7507     gcc_unreachable ();
7508 
7509   /* Sanity check.  */
7510   if (base)
7511     {
7512       gcc_assert (GET_CODE (base) == REG);
7513       gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER);
7514       gcc_assert (REGNO_REG_CLASS (REGNO (base)) == ADDR_REGS);
7515     }
7516 
7517   /* Offsets are constricted to twelve bits.  */
7518   fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset & ((1 << 12) - 1));
7519   if (base)
7520     fprintf (file, "(%s)", reg_names[REGNO (base)]);
7521 }
7522 
7523 /* Print the shift count operand OP to FILE.
7524    OP is an address-style operand in a form which
7525    s390_valid_shift_count permits.  Subregs and no-op
7526    and-masking of the operand are stripped.  */
7527 
7528 static void
print_shift_count_operand(FILE * file,rtx op)7529 print_shift_count_operand (FILE *file, rtx op)
7530 {
7531   /* No checking of the and mask required here.  */
7532   if (!s390_valid_shift_count (op, 0))
7533     gcc_unreachable ();
7534 
7535   while (op && GET_CODE (op) == SUBREG)
7536     op = SUBREG_REG (op);
7537 
7538   if (GET_CODE (op) == AND)
7539     op = XEXP (op, 0);
7540 
7541   print_addrstyle_operand (file, op);
7542 }
7543 
7544 /* Assigns the number of NOP halfwords to be emitted before and after the
7545    function label to *HW_BEFORE and *HW_AFTER.  Both pointers must not be NULL.
7546    If hotpatching is disabled for the function, the values are set to zero.
7547 */
7548 
7549 static void
s390_function_num_hotpatch_hw(tree decl,int * hw_before,int * hw_after)7550 s390_function_num_hotpatch_hw (tree decl,
7551 			       int *hw_before,
7552 			       int *hw_after)
7553 {
7554   tree attr;
7555 
7556   attr = lookup_attribute ("hotpatch", DECL_ATTRIBUTES (decl));
7557 
7558   /* Handle the arguments of the hotpatch attribute.  The values
7559      specified via attribute might override the cmdline argument
7560      values.  */
7561   if (attr)
7562     {
7563       tree args = TREE_VALUE (attr);
7564 
7565       *hw_before = TREE_INT_CST_LOW (TREE_VALUE (args));
7566       *hw_after = TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (args)));
7567     }
7568   else
7569     {
7570       /* Use the values specified by the cmdline arguments.  */
7571       *hw_before = s390_hotpatch_hw_before_label;
7572       *hw_after = s390_hotpatch_hw_after_label;
7573     }
7574 }
7575 
7576 /* Write the current .machine and .machinemode specification to the assembler
7577    file.  */
7578 
7579 #ifdef HAVE_AS_MACHINE_MACHINEMODE
7580 static void
s390_asm_output_machine_for_arch(FILE * asm_out_file)7581 s390_asm_output_machine_for_arch (FILE *asm_out_file)
7582 {
7583   fprintf (asm_out_file, "\t.machinemode %s\n",
7584 	   (TARGET_ZARCH) ? "zarch" : "esa");
7585   fprintf (asm_out_file, "\t.machine \"%s",
7586 	   processor_table[s390_arch].binutils_name);
7587   if (S390_USE_ARCHITECTURE_MODIFIERS)
7588     {
7589       int cpu_flags;
7590 
7591       cpu_flags = processor_flags_table[(int) s390_arch];
7592       if (TARGET_HTM && !(cpu_flags & PF_TX))
7593 	fprintf (asm_out_file, "+htm");
7594       else if (!TARGET_HTM && (cpu_flags & PF_TX))
7595 	fprintf (asm_out_file, "+nohtm");
7596       if (TARGET_VX && !(cpu_flags & PF_VX))
7597 	fprintf (asm_out_file, "+vx");
7598       else if (!TARGET_VX && (cpu_flags & PF_VX))
7599 	fprintf (asm_out_file, "+novx");
7600     }
7601   fprintf (asm_out_file, "\"\n");
7602 }
7603 
7604 /* Write an extra function header before the very start of the function.  */
7605 
7606 void
s390_asm_output_function_prefix(FILE * asm_out_file,const char * fnname ATTRIBUTE_UNUSED)7607 s390_asm_output_function_prefix (FILE *asm_out_file,
7608 				 const char *fnname ATTRIBUTE_UNUSED)
7609 {
7610   if (DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl) == NULL)
7611     return;
7612   /* Since only the function specific options are saved but not the indications
7613      which options are set, it's too much work here to figure out which options
7614      have actually changed.  Thus, generate .machine and .machinemode whenever a
7615      function has the target attribute or pragma.  */
7616   fprintf (asm_out_file, "\t.machinemode push\n");
7617   fprintf (asm_out_file, "\t.machine push\n");
7618   s390_asm_output_machine_for_arch (asm_out_file);
7619 }
7620 
7621 /* Write an extra function footer after the very end of the function.  */
7622 
7623 void
s390_asm_declare_function_size(FILE * asm_out_file,const char * fnname,tree decl)7624 s390_asm_declare_function_size (FILE *asm_out_file,
7625 				const char *fnname, tree decl)
7626 {
7627   if (!flag_inhibit_size_directive)
7628     ASM_OUTPUT_MEASURED_SIZE (asm_out_file, fnname);
7629   if (DECL_FUNCTION_SPECIFIC_TARGET (decl) == NULL)
7630     return;
7631   fprintf (asm_out_file, "\t.machine pop\n");
7632   fprintf (asm_out_file, "\t.machinemode pop\n");
7633 }
7634 #endif
7635 
7636 /* Write the extra assembler code needed to declare a function properly.  */
7637 
7638 void
s390_asm_output_function_label(FILE * asm_out_file,const char * fname,tree decl)7639 s390_asm_output_function_label (FILE *asm_out_file, const char *fname,
7640 				tree decl)
7641 {
7642   int hw_before, hw_after;
7643 
7644   s390_function_num_hotpatch_hw (decl, &hw_before, &hw_after);
7645   if (hw_before > 0)
7646     {
7647       unsigned int function_alignment;
7648       int i;
7649 
7650       /* Add a trampoline code area before the function label and initialize it
7651 	 with two-byte nop instructions.  This area can be overwritten with code
7652 	 that jumps to a patched version of the function.  */
7653       asm_fprintf (asm_out_file, "\tnopr\t%%r0"
7654 		   "\t# pre-label NOPs for hotpatch (%d halfwords)\n",
7655 		   hw_before);
7656       for (i = 1; i < hw_before; i++)
7657 	fputs ("\tnopr\t%r0\n", asm_out_file);
7658 
7659       /* Note:  The function label must be aligned so that (a) the bytes of the
7660 	 following nop do not cross a cacheline boundary, and (b) a jump address
7661 	 (eight bytes for 64 bit targets, 4 bytes for 32 bit targets) can be
7662 	 stored directly before the label without crossing a cacheline
7663 	 boundary.  All this is necessary to make sure the trampoline code can
7664 	 be changed atomically.
7665 	 This alignment is done automatically using the FOUNCTION_BOUNDARY, but
7666 	 if there are NOPs before the function label, the alignment is placed
7667 	 before them.  So it is necessary to duplicate the alignment after the
7668 	 NOPs.  */
7669       function_alignment = MAX (8, DECL_ALIGN (decl) / BITS_PER_UNIT);
7670       if (! DECL_USER_ALIGN (decl))
7671 	function_alignment
7672 	  = MAX (function_alignment,
7673 		 (unsigned int) align_functions.levels[0].get_value ());
7674       fputs ("\t# alignment for hotpatch\n", asm_out_file);
7675       ASM_OUTPUT_ALIGN (asm_out_file, align_functions.levels[0].log);
7676     }
7677 
7678   if (S390_USE_TARGET_ATTRIBUTE && TARGET_DEBUG_ARG)
7679     {
7680       asm_fprintf (asm_out_file, "\t# fn:%s ar%d\n", fname, s390_arch);
7681       asm_fprintf (asm_out_file, "\t# fn:%s tu%d\n", fname, s390_tune);
7682       asm_fprintf (asm_out_file, "\t# fn:%s sg%d\n", fname, s390_stack_guard);
7683       asm_fprintf (asm_out_file, "\t# fn:%s ss%d\n", fname, s390_stack_size);
7684       asm_fprintf (asm_out_file, "\t# fn:%s bc%d\n", fname, s390_branch_cost);
7685       asm_fprintf (asm_out_file, "\t# fn:%s wf%d\n", fname,
7686 		   s390_warn_framesize);
7687       asm_fprintf (asm_out_file, "\t# fn:%s ba%d\n", fname, TARGET_BACKCHAIN);
7688       asm_fprintf (asm_out_file, "\t# fn:%s hd%d\n", fname, TARGET_HARD_DFP);
7689       asm_fprintf (asm_out_file, "\t# fn:%s hf%d\n", fname, !TARGET_SOFT_FLOAT);
7690       asm_fprintf (asm_out_file, "\t# fn:%s ht%d\n", fname, TARGET_OPT_HTM);
7691       asm_fprintf (asm_out_file, "\t# fn:%s vx%d\n", fname, TARGET_OPT_VX);
7692       asm_fprintf (asm_out_file, "\t# fn:%s ps%d\n", fname,
7693 		   TARGET_PACKED_STACK);
7694       asm_fprintf (asm_out_file, "\t# fn:%s se%d\n", fname, TARGET_SMALL_EXEC);
7695       asm_fprintf (asm_out_file, "\t# fn:%s mv%d\n", fname, TARGET_MVCLE);
7696       asm_fprintf (asm_out_file, "\t# fn:%s zv%d\n", fname, TARGET_ZVECTOR);
7697       asm_fprintf (asm_out_file, "\t# fn:%s wd%d\n", fname,
7698 		   s390_warn_dynamicstack_p);
7699     }
7700   ASM_OUTPUT_LABEL (asm_out_file, fname);
7701   if (hw_after > 0)
7702     asm_fprintf (asm_out_file,
7703 		 "\t# post-label NOPs for hotpatch (%d halfwords)\n",
7704 		 hw_after);
7705 }
7706 
7707 /* Output machine-dependent UNSPECs occurring in address constant X
7708    in assembler syntax to stdio stream FILE.  Returns true if the
7709    constant X could be recognized, false otherwise.  */
7710 
7711 static bool
s390_output_addr_const_extra(FILE * file,rtx x)7712 s390_output_addr_const_extra (FILE *file, rtx x)
7713 {
7714   if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1)
7715     switch (XINT (x, 1))
7716       {
7717       case UNSPEC_GOTENT:
7718 	output_addr_const (file, XVECEXP (x, 0, 0));
7719 	fprintf (file, "@GOTENT");
7720 	return true;
7721       case UNSPEC_GOT:
7722 	output_addr_const (file, XVECEXP (x, 0, 0));
7723 	fprintf (file, "@GOT");
7724 	return true;
7725       case UNSPEC_GOTOFF:
7726 	output_addr_const (file, XVECEXP (x, 0, 0));
7727 	fprintf (file, "@GOTOFF");
7728 	return true;
7729       case UNSPEC_PLT:
7730 	output_addr_const (file, XVECEXP (x, 0, 0));
7731 	fprintf (file, "@PLT");
7732 	return true;
7733       case UNSPEC_PLTOFF:
7734 	output_addr_const (file, XVECEXP (x, 0, 0));
7735 	fprintf (file, "@PLTOFF");
7736 	return true;
7737       case UNSPEC_TLSGD:
7738 	output_addr_const (file, XVECEXP (x, 0, 0));
7739 	fprintf (file, "@TLSGD");
7740 	return true;
7741       case UNSPEC_TLSLDM:
7742 	assemble_name (file, get_some_local_dynamic_name ());
7743 	fprintf (file, "@TLSLDM");
7744 	return true;
7745       case UNSPEC_DTPOFF:
7746 	output_addr_const (file, XVECEXP (x, 0, 0));
7747 	fprintf (file, "@DTPOFF");
7748 	return true;
7749       case UNSPEC_NTPOFF:
7750 	output_addr_const (file, XVECEXP (x, 0, 0));
7751 	fprintf (file, "@NTPOFF");
7752 	return true;
7753       case UNSPEC_GOTNTPOFF:
7754 	output_addr_const (file, XVECEXP (x, 0, 0));
7755 	fprintf (file, "@GOTNTPOFF");
7756 	return true;
7757       case UNSPEC_INDNTPOFF:
7758 	output_addr_const (file, XVECEXP (x, 0, 0));
7759 	fprintf (file, "@INDNTPOFF");
7760 	return true;
7761       }
7762 
7763   if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 2)
7764     switch (XINT (x, 1))
7765       {
7766       case UNSPEC_POOL_OFFSET:
7767 	x = gen_rtx_MINUS (GET_MODE (x), XVECEXP (x, 0, 0), XVECEXP (x, 0, 1));
7768 	output_addr_const (file, x);
7769 	return true;
7770       }
7771   return false;
7772 }
7773 
7774 /* Output address operand ADDR in assembler syntax to
7775    stdio stream FILE.  */
7776 
7777 void
print_operand_address(FILE * file,rtx addr)7778 print_operand_address (FILE *file, rtx addr)
7779 {
7780   struct s390_address ad;
7781   memset (&ad, 0, sizeof (s390_address));
7782 
7783   if (s390_loadrelative_operand_p (addr, NULL, NULL))
7784     {
7785       if (!TARGET_Z10)
7786 	{
7787 	  output_operand_lossage ("symbolic memory references are "
7788 				  "only supported on z10 or later");
7789 	  return;
7790 	}
7791       output_addr_const (file, addr);
7792       return;
7793     }
7794 
7795   if (!s390_decompose_address (addr, &ad)
7796       || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7797       || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
7798     output_operand_lossage ("cannot decompose address");
7799 
7800   if (ad.disp)
7801     output_addr_const (file, ad.disp);
7802   else
7803     fprintf (file, "0");
7804 
7805   if (ad.base && ad.indx)
7806     fprintf (file, "(%s,%s)", reg_names[REGNO (ad.indx)],
7807 			      reg_names[REGNO (ad.base)]);
7808   else if (ad.base)
7809     fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7810 }
7811 
7812 /* Output operand X in assembler syntax to stdio stream FILE.
7813    CODE specified the format flag.  The following format flags
7814    are recognized:
7815 
7816     'A': On z14 or higher: If operand is a mem print the alignment
7817 	 hint usable with vl/vst prefixed by a comma.
7818     'C': print opcode suffix for branch condition.
7819     'D': print opcode suffix for inverse branch condition.
7820     'E': print opcode suffix for branch on index instruction.
7821     'G': print the size of the operand in bytes.
7822     'J': print tls_load/tls_gdcall/tls_ldcall suffix
7823     'M': print the second word of a TImode operand.
7824     'N': print the second word of a DImode operand.
7825     'O': print only the displacement of a memory reference or address.
7826     'R': print only the base register of a memory reference or address.
7827     'S': print S-type memory reference (base+displacement).
7828     'Y': print address style operand without index (e.g. shift count or setmem
7829 	 operand).
7830 
7831     'b': print integer X as if it's an unsigned byte.
7832     'c': print integer X as if it's an signed byte.
7833     'e': "end" contiguous bitmask X in either DImode or vector inner mode.
7834     'f': "end" contiguous bitmask X in SImode.
7835     'h': print integer X as if it's a signed halfword.
7836     'i': print the first nonzero HImode part of X.
7837     'j': print the first HImode part unequal to -1 of X.
7838     'k': print the first nonzero SImode part of X.
7839     'm': print the first SImode part unequal to -1 of X.
7840     'o': print integer X as if it's an unsigned 32bit word.
7841     's': "start" of contiguous bitmask X in either DImode or vector inner mode.
7842     't': CONST_INT: "start" of contiguous bitmask X in SImode.
7843 	 CONST_VECTOR: Generate a bitmask for vgbm instruction.
7844     'x': print integer X as if it's an unsigned halfword.
7845     'v': print register number as vector register (v1 instead of f1).
7846 */
7847 
7848 void
print_operand(FILE * file,rtx x,int code)7849 print_operand (FILE *file, rtx x, int code)
7850 {
7851   HOST_WIDE_INT ival;
7852 
7853   switch (code)
7854     {
7855     case 'A':
7856       if (TARGET_VECTOR_LOADSTORE_ALIGNMENT_HINTS && MEM_P (x))
7857 	{
7858 	  if (MEM_ALIGN (x) >= 128)
7859 	    fprintf (file, ",4");
7860 	  else if (MEM_ALIGN (x) == 64)
7861 	    fprintf (file, ",3");
7862 	}
7863       return;
7864     case 'C':
7865       fprintf (file, s390_branch_condition_mnemonic (x, FALSE));
7866       return;
7867 
7868     case 'D':
7869       fprintf (file, s390_branch_condition_mnemonic (x, TRUE));
7870       return;
7871 
7872     case 'E':
7873       if (GET_CODE (x) == LE)
7874 	fprintf (file, "l");
7875       else if (GET_CODE (x) == GT)
7876 	fprintf (file, "h");
7877       else
7878 	output_operand_lossage ("invalid comparison operator "
7879 				"for 'E' output modifier");
7880       return;
7881 
7882     case 'J':
7883       if (GET_CODE (x) == SYMBOL_REF)
7884 	{
7885 	  fprintf (file, "%s", ":tls_load:");
7886 	  output_addr_const (file, x);
7887 	}
7888       else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
7889 	{
7890 	  fprintf (file, "%s", ":tls_gdcall:");
7891 	  output_addr_const (file, XVECEXP (x, 0, 0));
7892 	}
7893       else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM)
7894 	{
7895 	  fprintf (file, "%s", ":tls_ldcall:");
7896 	  const char *name = get_some_local_dynamic_name ();
7897 	  gcc_assert (name);
7898 	  assemble_name (file, name);
7899 	}
7900       else
7901 	output_operand_lossage ("invalid reference for 'J' output modifier");
7902       return;
7903 
7904     case 'G':
7905       fprintf (file, "%u", GET_MODE_SIZE (GET_MODE (x)));
7906       return;
7907 
7908     case 'O':
7909       {
7910 	struct s390_address ad;
7911 	int ret;
7912 
7913 	ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7914 
7915 	if (!ret
7916 	    || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7917 	    || ad.indx)
7918 	  {
7919 	    output_operand_lossage ("invalid address for 'O' output modifier");
7920 	    return;
7921 	  }
7922 
7923 	if (ad.disp)
7924 	  output_addr_const (file, ad.disp);
7925 	else
7926 	  fprintf (file, "0");
7927       }
7928       return;
7929 
7930     case 'R':
7931       {
7932 	struct s390_address ad;
7933 	int ret;
7934 
7935 	ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7936 
7937 	if (!ret
7938 	    || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7939 	    || ad.indx)
7940 	  {
7941 	    output_operand_lossage ("invalid address for 'R' output modifier");
7942 	    return;
7943 	  }
7944 
7945 	if (ad.base)
7946 	  fprintf (file, "%s", reg_names[REGNO (ad.base)]);
7947 	else
7948 	  fprintf (file, "0");
7949       }
7950       return;
7951 
7952     case 'S':
7953       {
7954 	struct s390_address ad;
7955 	int ret;
7956 
7957 	if (!MEM_P (x))
7958 	  {
7959 	    output_operand_lossage ("memory reference expected for "
7960 				    "'S' output modifier");
7961 	    return;
7962 	  }
7963 	ret = s390_decompose_address (XEXP (x, 0), &ad);
7964 
7965 	if (!ret
7966 	    || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7967 	    || ad.indx)
7968 	  {
7969 	    output_operand_lossage ("invalid address for 'S' output modifier");
7970 	    return;
7971 	  }
7972 
7973 	if (ad.disp)
7974 	  output_addr_const (file, ad.disp);
7975 	else
7976 	  fprintf (file, "0");
7977 
7978 	if (ad.base)
7979 	  fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7980       }
7981       return;
7982 
7983     case 'N':
7984       if (GET_CODE (x) == REG)
7985 	x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7986       else if (GET_CODE (x) == MEM)
7987 	x = change_address (x, VOIDmode,
7988 			    plus_constant (Pmode, XEXP (x, 0), 4));
7989       else
7990 	output_operand_lossage ("register or memory expression expected "
7991 				"for 'N' output modifier");
7992       break;
7993 
7994     case 'M':
7995       if (GET_CODE (x) == REG)
7996 	x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7997       else if (GET_CODE (x) == MEM)
7998 	x = change_address (x, VOIDmode,
7999 			    plus_constant (Pmode, XEXP (x, 0), 8));
8000       else
8001 	output_operand_lossage ("register or memory expression expected "
8002 				"for 'M' output modifier");
8003       break;
8004 
8005     case 'Y':
8006       print_shift_count_operand (file, x);
8007       return;
8008     }
8009 
8010   switch (GET_CODE (x))
8011     {
8012     case REG:
8013       /* Print FP regs as fx instead of vx when they are accessed
8014 	 through non-vector mode.  */
8015       if (code == 'v'
8016 	  || VECTOR_NOFP_REG_P (x)
8017 	  || (FP_REG_P (x) && VECTOR_MODE_P (GET_MODE (x)))
8018 	  || (VECTOR_REG_P (x)
8019 	      && (GET_MODE_SIZE (GET_MODE (x)) /
8020 		  s390_class_max_nregs (FP_REGS, GET_MODE (x))) > 8))
8021 	fprintf (file, "%%v%s", reg_names[REGNO (x)] + 2);
8022       else
8023 	fprintf (file, "%s", reg_names[REGNO (x)]);
8024       break;
8025 
8026     case MEM:
8027       output_address (GET_MODE (x), XEXP (x, 0));
8028       break;
8029 
8030     case CONST:
8031     case CODE_LABEL:
8032     case LABEL_REF:
8033     case SYMBOL_REF:
8034       output_addr_const (file, x);
8035       break;
8036 
8037     case CONST_INT:
8038       ival = INTVAL (x);
8039       switch (code)
8040 	{
8041 	case 0:
8042 	  break;
8043 	case 'b':
8044 	  ival &= 0xff;
8045 	  break;
8046 	case 'c':
8047 	  ival = ((ival & 0xff) ^ 0x80) - 0x80;
8048 	  break;
8049 	case 'x':
8050 	  ival &= 0xffff;
8051 	  break;
8052 	case 'h':
8053 	  ival = ((ival & 0xffff) ^ 0x8000) - 0x8000;
8054 	  break;
8055 	case 'i':
8056 	  ival = s390_extract_part (x, HImode, 0);
8057 	  break;
8058 	case 'j':
8059 	  ival = s390_extract_part (x, HImode, -1);
8060 	  break;
8061 	case 'k':
8062 	  ival = s390_extract_part (x, SImode, 0);
8063 	  break;
8064 	case 'm':
8065 	  ival = s390_extract_part (x, SImode, -1);
8066 	  break;
8067 	case 'o':
8068 	  ival &= 0xffffffff;
8069 	  break;
8070 	case 'e': case 'f':
8071 	case 's': case 't':
8072 	  {
8073 	    int start, end;
8074 	    int len;
8075 	    bool ok;
8076 
8077 	    len = (code == 's' || code == 'e' ? 64 : 32);
8078 	    ok = s390_contiguous_bitmask_p (ival, true, len, &start, &end);
8079 	    gcc_assert (ok);
8080 	    if (code == 's' || code == 't')
8081 	      ival = start;
8082 	    else
8083 	      ival = end;
8084 	  }
8085 	  break;
8086 	default:
8087 	  output_operand_lossage ("invalid constant for output modifier '%c'", code);
8088 	}
8089       fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
8090       break;
8091 
8092     case CONST_WIDE_INT:
8093       if (code == 'b')
8094 	fprintf (file, HOST_WIDE_INT_PRINT_DEC,
8095 		 CONST_WIDE_INT_ELT (x, 0) & 0xff);
8096       else if (code == 'x')
8097 	fprintf (file, HOST_WIDE_INT_PRINT_DEC,
8098 		 CONST_WIDE_INT_ELT (x, 0) & 0xffff);
8099       else if (code == 'h')
8100 	fprintf (file, HOST_WIDE_INT_PRINT_DEC,
8101 		 ((CONST_WIDE_INT_ELT (x, 0) & 0xffff) ^ 0x8000) - 0x8000);
8102       else
8103 	{
8104 	  if (code == 0)
8105 	    output_operand_lossage ("invalid constant - try using "
8106 				    "an output modifier");
8107 	  else
8108 	    output_operand_lossage ("invalid constant for output modifier '%c'",
8109 				    code);
8110 	}
8111       break;
8112     case CONST_VECTOR:
8113       switch (code)
8114 	{
8115 	case 'h':
8116 	  gcc_assert (const_vec_duplicate_p (x));
8117 	  fprintf (file, HOST_WIDE_INT_PRINT_DEC,
8118 		   ((INTVAL (XVECEXP (x, 0, 0)) & 0xffff) ^ 0x8000) - 0x8000);
8119 	  break;
8120 	case 'e':
8121 	case 's':
8122 	  {
8123 	    int start, end;
8124 	    bool ok;
8125 
8126 	    ok = s390_contiguous_bitmask_vector_p (x, &start, &end);
8127 	    gcc_assert (ok);
8128 	    ival = (code == 's') ? start : end;
8129 	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
8130 	  }
8131 	  break;
8132 	case 't':
8133 	  {
8134 	    unsigned mask;
8135 	    bool ok = s390_bytemask_vector_p (x, &mask);
8136 	    gcc_assert (ok);
8137 	    fprintf (file, "%u", mask);
8138 	  }
8139 	  break;
8140 
8141 	default:
8142 	  output_operand_lossage ("invalid constant vector for output "
8143 				  "modifier '%c'", code);
8144 	}
8145       break;
8146 
8147     default:
8148       if (code == 0)
8149 	output_operand_lossage ("invalid expression - try using "
8150 				"an output modifier");
8151       else
8152 	output_operand_lossage ("invalid expression for output "
8153 				"modifier '%c'", code);
8154       break;
8155     }
8156 }
8157 
8158 /* Target hook for assembling integer objects.  We need to define it
8159    here to work a round a bug in some versions of GAS, which couldn't
8160    handle values smaller than INT_MIN when printed in decimal.  */
8161 
8162 static bool
s390_assemble_integer(rtx x,unsigned int size,int aligned_p)8163 s390_assemble_integer (rtx x, unsigned int size, int aligned_p)
8164 {
8165   if (size == 8 && aligned_p
8166       && GET_CODE (x) == CONST_INT && INTVAL (x) < INT_MIN)
8167     {
8168       fprintf (asm_out_file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n",
8169 	       INTVAL (x));
8170       return true;
8171     }
8172   return default_assemble_integer (x, size, aligned_p);
8173 }
8174 
8175 /* Returns true if register REGNO is used  for forming
8176    a memory address in expression X.  */
8177 
8178 static bool
reg_used_in_mem_p(int regno,rtx x)8179 reg_used_in_mem_p (int regno, rtx x)
8180 {
8181   enum rtx_code code = GET_CODE (x);
8182   int i, j;
8183   const char *fmt;
8184 
8185   if (code == MEM)
8186     {
8187       if (refers_to_regno_p (regno, XEXP (x, 0)))
8188 	return true;
8189     }
8190   else if (code == SET
8191 	   && GET_CODE (SET_DEST (x)) == PC)
8192     {
8193       if (refers_to_regno_p (regno, SET_SRC (x)))
8194 	return true;
8195     }
8196 
8197   fmt = GET_RTX_FORMAT (code);
8198   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8199     {
8200       if (fmt[i] == 'e'
8201 	  && reg_used_in_mem_p (regno, XEXP (x, i)))
8202 	return true;
8203 
8204       else if (fmt[i] == 'E')
8205 	for (j = 0; j < XVECLEN (x, i); j++)
8206 	  if (reg_used_in_mem_p (regno, XVECEXP (x, i, j)))
8207 	    return true;
8208     }
8209   return false;
8210 }
8211 
8212 /* Returns true if expression DEP_RTX sets an address register
8213    used by instruction INSN to address memory.  */
8214 
8215 static bool
addr_generation_dependency_p(rtx dep_rtx,rtx_insn * insn)8216 addr_generation_dependency_p (rtx dep_rtx, rtx_insn *insn)
8217 {
8218   rtx target, pat;
8219 
8220   if (NONJUMP_INSN_P (dep_rtx))
8221     dep_rtx = PATTERN (dep_rtx);
8222 
8223   if (GET_CODE (dep_rtx) == SET)
8224     {
8225       target = SET_DEST (dep_rtx);
8226       if (GET_CODE (target) == STRICT_LOW_PART)
8227 	target = XEXP (target, 0);
8228       while (GET_CODE (target) == SUBREG)
8229 	target = SUBREG_REG (target);
8230 
8231       if (GET_CODE (target) == REG)
8232 	{
8233 	  int regno = REGNO (target);
8234 
8235 	  if (s390_safe_attr_type (insn) == TYPE_LA)
8236 	    {
8237 	      pat = PATTERN (insn);
8238 	      if (GET_CODE (pat) == PARALLEL)
8239 		{
8240 		  gcc_assert (XVECLEN (pat, 0) == 2);
8241 		  pat = XVECEXP (pat, 0, 0);
8242 		}
8243 	      gcc_assert (GET_CODE (pat) == SET);
8244 	      return refers_to_regno_p (regno, SET_SRC (pat));
8245 	    }
8246 	  else if (get_attr_atype (insn) == ATYPE_AGEN)
8247 	    return reg_used_in_mem_p (regno, PATTERN (insn));
8248 	}
8249     }
8250   return false;
8251 }
8252 
8253 /* Return 1, if dep_insn sets register used in insn in the agen unit.  */
8254 
8255 int
s390_agen_dep_p(rtx_insn * dep_insn,rtx_insn * insn)8256 s390_agen_dep_p (rtx_insn *dep_insn, rtx_insn *insn)
8257 {
8258   rtx dep_rtx = PATTERN (dep_insn);
8259   int i;
8260 
8261   if (GET_CODE (dep_rtx) == SET
8262       && addr_generation_dependency_p (dep_rtx, insn))
8263     return 1;
8264   else if (GET_CODE (dep_rtx) == PARALLEL)
8265     {
8266       for (i = 0; i < XVECLEN (dep_rtx, 0); i++)
8267 	{
8268 	  if (addr_generation_dependency_p (XVECEXP (dep_rtx, 0, i), insn))
8269 	    return 1;
8270 	}
8271     }
8272   return 0;
8273 }
8274 
8275 
8276 /* A C statement (sans semicolon) to update the integer scheduling priority
8277    INSN_PRIORITY (INSN).  Increase the priority to execute the INSN earlier,
8278    reduce the priority to execute INSN later.  Do not define this macro if
8279    you do not need to adjust the scheduling priorities of insns.
8280 
8281    A STD instruction should be scheduled earlier,
8282    in order to use the bypass.  */
8283 static int
s390_adjust_priority(rtx_insn * insn,int priority)8284 s390_adjust_priority (rtx_insn *insn, int priority)
8285 {
8286   if (! INSN_P (insn))
8287     return priority;
8288 
8289   if (s390_tune <= PROCESSOR_2064_Z900)
8290     return priority;
8291 
8292   switch (s390_safe_attr_type (insn))
8293     {
8294       case TYPE_FSTOREDF:
8295       case TYPE_FSTORESF:
8296 	priority = priority << 3;
8297 	break;
8298       case TYPE_STORE:
8299       case TYPE_STM:
8300 	priority = priority << 1;
8301 	break;
8302       default:
8303 	break;
8304     }
8305   return priority;
8306 }
8307 
8308 
8309 /* The number of instructions that can be issued per cycle.  */
8310 
8311 static int
s390_issue_rate(void)8312 s390_issue_rate (void)
8313 {
8314   switch (s390_tune)
8315     {
8316     case PROCESSOR_2084_Z990:
8317     case PROCESSOR_2094_Z9_109:
8318     case PROCESSOR_2094_Z9_EC:
8319     case PROCESSOR_2817_Z196:
8320       return 3;
8321     case PROCESSOR_2097_Z10:
8322       return 2;
8323     case PROCESSOR_2064_Z900:
8324       /* Starting with EC12 we use the sched_reorder hook to take care
8325 	 of instruction dispatch constraints.  The algorithm only
8326 	 picks the best instruction and assumes only a single
8327 	 instruction gets issued per cycle.  */
8328     case PROCESSOR_2827_ZEC12:
8329     case PROCESSOR_2964_Z13:
8330     case PROCESSOR_3906_Z14:
8331     default:
8332       return 1;
8333     }
8334 }
8335 
8336 static int
s390_first_cycle_multipass_dfa_lookahead(void)8337 s390_first_cycle_multipass_dfa_lookahead (void)
8338 {
8339   return 4;
8340 }
8341 
8342 static void
annotate_constant_pool_refs_1(rtx * x)8343 annotate_constant_pool_refs_1 (rtx *x)
8344 {
8345   int i, j;
8346   const char *fmt;
8347 
8348   gcc_assert (GET_CODE (*x) != SYMBOL_REF
8349 	      || !CONSTANT_POOL_ADDRESS_P (*x));
8350 
8351   /* Literal pool references can only occur inside a MEM ...  */
8352   if (GET_CODE (*x) == MEM)
8353     {
8354       rtx memref = XEXP (*x, 0);
8355 
8356       if (GET_CODE (memref) == SYMBOL_REF
8357 	  && CONSTANT_POOL_ADDRESS_P (memref))
8358 	{
8359 	  rtx base = cfun->machine->base_reg;
8360 	  rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, memref, base),
8361 				     UNSPEC_LTREF);
8362 
8363 	  *x = replace_equiv_address (*x, addr);
8364 	  return;
8365 	}
8366 
8367       if (GET_CODE (memref) == CONST
8368 	  && GET_CODE (XEXP (memref, 0)) == PLUS
8369 	  && GET_CODE (XEXP (XEXP (memref, 0), 1)) == CONST_INT
8370 	  && GET_CODE (XEXP (XEXP (memref, 0), 0)) == SYMBOL_REF
8371 	  && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref, 0), 0)))
8372 	{
8373 	  HOST_WIDE_INT off = INTVAL (XEXP (XEXP (memref, 0), 1));
8374 	  rtx sym = XEXP (XEXP (memref, 0), 0);
8375 	  rtx base = cfun->machine->base_reg;
8376 	  rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
8377 				     UNSPEC_LTREF);
8378 
8379 	  *x = replace_equiv_address (*x, plus_constant (Pmode, addr, off));
8380 	  return;
8381 	}
8382     }
8383 
8384   /* ... or a load-address type pattern.  */
8385   if (GET_CODE (*x) == SET)
8386     {
8387       rtx addrref = SET_SRC (*x);
8388 
8389       if (GET_CODE (addrref) == SYMBOL_REF
8390 	  && CONSTANT_POOL_ADDRESS_P (addrref))
8391 	{
8392 	  rtx base = cfun->machine->base_reg;
8393 	  rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addrref, base),
8394 				     UNSPEC_LTREF);
8395 
8396 	  SET_SRC (*x) = addr;
8397 	  return;
8398 	}
8399 
8400       if (GET_CODE (addrref) == CONST
8401 	  && GET_CODE (XEXP (addrref, 0)) == PLUS
8402 	  && GET_CODE (XEXP (XEXP (addrref, 0), 1)) == CONST_INT
8403 	  && GET_CODE (XEXP (XEXP (addrref, 0), 0)) == SYMBOL_REF
8404 	  && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref, 0), 0)))
8405 	{
8406 	  HOST_WIDE_INT off = INTVAL (XEXP (XEXP (addrref, 0), 1));
8407 	  rtx sym = XEXP (XEXP (addrref, 0), 0);
8408 	  rtx base = cfun->machine->base_reg;
8409 	  rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
8410 				     UNSPEC_LTREF);
8411 
8412 	  SET_SRC (*x) = plus_constant (Pmode, addr, off);
8413 	  return;
8414 	}
8415     }
8416 
8417   fmt = GET_RTX_FORMAT (GET_CODE (*x));
8418   for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8419     {
8420       if (fmt[i] == 'e')
8421 	{
8422 	  annotate_constant_pool_refs_1 (&XEXP (*x, i));
8423 	}
8424       else if (fmt[i] == 'E')
8425 	{
8426 	  for (j = 0; j < XVECLEN (*x, i); j++)
8427 	    annotate_constant_pool_refs_1 (&XVECEXP (*x, i, j));
8428 	}
8429     }
8430 }
8431 
8432 /* Annotate every literal pool reference in INSN by an UNSPEC_LTREF expression.
8433    Fix up MEMs as required.
8434    Skip insns which support relative addressing, because they do not use a base
8435    register.  */
8436 
8437 static void
annotate_constant_pool_refs(rtx_insn * insn)8438 annotate_constant_pool_refs (rtx_insn *insn)
8439 {
8440   if (s390_safe_relative_long_p (insn))
8441     return;
8442   annotate_constant_pool_refs_1 (&PATTERN (insn));
8443 }
8444 
8445 static void
find_constant_pool_ref_1(rtx x,rtx * ref)8446 find_constant_pool_ref_1 (rtx x, rtx *ref)
8447 {
8448   int i, j;
8449   const char *fmt;
8450 
8451   /* Likewise POOL_ENTRY insns.  */
8452   if (GET_CODE (x) == UNSPEC_VOLATILE
8453       && XINT (x, 1) == UNSPECV_POOL_ENTRY)
8454     return;
8455 
8456   gcc_assert (GET_CODE (x) != SYMBOL_REF
8457 	      || !CONSTANT_POOL_ADDRESS_P (x));
8458 
8459   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_LTREF)
8460     {
8461       rtx sym = XVECEXP (x, 0, 0);
8462       gcc_assert (GET_CODE (sym) == SYMBOL_REF
8463 		  && CONSTANT_POOL_ADDRESS_P (sym));
8464 
8465       if (*ref == NULL_RTX)
8466 	*ref = sym;
8467       else
8468 	gcc_assert (*ref == sym);
8469 
8470       return;
8471     }
8472 
8473   fmt = GET_RTX_FORMAT (GET_CODE (x));
8474   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8475     {
8476       if (fmt[i] == 'e')
8477 	{
8478 	  find_constant_pool_ref_1 (XEXP (x, i), ref);
8479 	}
8480       else if (fmt[i] == 'E')
8481 	{
8482 	  for (j = 0; j < XVECLEN (x, i); j++)
8483 	    find_constant_pool_ref_1 (XVECEXP (x, i, j), ref);
8484 	}
8485     }
8486 }
8487 
8488 /* Find an annotated literal pool symbol referenced in INSN,
8489    and store it at REF.  Will abort if INSN contains references to
8490    more than one such pool symbol; multiple references to the same
8491    symbol are allowed, however.
8492 
8493    The rtx pointed to by REF must be initialized to NULL_RTX
8494    by the caller before calling this routine.
8495 
8496    Skip insns which support relative addressing, because they do not use a base
8497    register.  */
8498 
8499 static void
find_constant_pool_ref(rtx_insn * insn,rtx * ref)8500 find_constant_pool_ref (rtx_insn *insn, rtx *ref)
8501 {
8502   if (s390_safe_relative_long_p (insn))
8503     return;
8504   find_constant_pool_ref_1 (PATTERN (insn), ref);
8505 }
8506 
8507 static void
replace_constant_pool_ref_1(rtx * x,rtx ref,rtx offset)8508 replace_constant_pool_ref_1 (rtx *x, rtx ref, rtx offset)
8509 {
8510   int i, j;
8511   const char *fmt;
8512 
8513   gcc_assert (*x != ref);
8514 
8515   if (GET_CODE (*x) == UNSPEC
8516       && XINT (*x, 1) == UNSPEC_LTREF
8517       && XVECEXP (*x, 0, 0) == ref)
8518     {
8519       *x = gen_rtx_PLUS (Pmode, XVECEXP (*x, 0, 1), offset);
8520       return;
8521     }
8522 
8523   if (GET_CODE (*x) == PLUS
8524       && GET_CODE (XEXP (*x, 1)) == CONST_INT
8525       && GET_CODE (XEXP (*x, 0)) == UNSPEC
8526       && XINT (XEXP (*x, 0), 1) == UNSPEC_LTREF
8527       && XVECEXP (XEXP (*x, 0), 0, 0) == ref)
8528     {
8529       rtx addr = gen_rtx_PLUS (Pmode, XVECEXP (XEXP (*x, 0), 0, 1), offset);
8530       *x = plus_constant (Pmode, addr, INTVAL (XEXP (*x, 1)));
8531       return;
8532     }
8533 
8534   fmt = GET_RTX_FORMAT (GET_CODE (*x));
8535   for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8536     {
8537       if (fmt[i] == 'e')
8538 	{
8539 	  replace_constant_pool_ref_1 (&XEXP (*x, i), ref, offset);
8540 	}
8541       else if (fmt[i] == 'E')
8542 	{
8543 	  for (j = 0; j < XVECLEN (*x, i); j++)
8544 	    replace_constant_pool_ref_1 (&XVECEXP (*x, i, j), ref, offset);
8545 	}
8546     }
8547 }
8548 
8549 /* Replace every reference to the annotated literal pool
8550    symbol REF in INSN by its base plus OFFSET.
8551    Skip insns which support relative addressing, because they do not use a base
8552    register.  */
8553 
8554 static void
replace_constant_pool_ref(rtx_insn * insn,rtx ref,rtx offset)8555 replace_constant_pool_ref (rtx_insn *insn, rtx ref, rtx offset)
8556 {
8557   if (s390_safe_relative_long_p (insn))
8558     return;
8559   replace_constant_pool_ref_1 (&PATTERN (insn), ref, offset);
8560 }
8561 
8562 /* We keep a list of constants which we have to add to internal
8563    constant tables in the middle of large functions.  */
8564 
8565 #define NR_C_MODES 32
8566 machine_mode constant_modes[NR_C_MODES] =
8567 {
8568   TFmode, TImode, TDmode,
8569   V16QImode, V8HImode, V4SImode, V2DImode, V1TImode,
8570   V4SFmode, V2DFmode, V1TFmode,
8571   DFmode, DImode, DDmode,
8572   V8QImode, V4HImode, V2SImode, V1DImode, V2SFmode, V1DFmode,
8573   SFmode, SImode, SDmode,
8574   V4QImode, V2HImode, V1SImode,  V1SFmode,
8575   HImode,
8576   V2QImode, V1HImode,
8577   QImode,
8578   V1QImode
8579 };
8580 
8581 struct constant
8582 {
8583   struct constant *next;
8584   rtx value;
8585   rtx_code_label *label;
8586 };
8587 
8588 struct constant_pool
8589 {
8590   struct constant_pool *next;
8591   rtx_insn *first_insn;
8592   rtx_insn *pool_insn;
8593   bitmap insns;
8594   rtx_insn *emit_pool_after;
8595 
8596   struct constant *constants[NR_C_MODES];
8597   struct constant *execute;
8598   rtx_code_label *label;
8599   int size;
8600 };
8601 
8602 /* Allocate new constant_pool structure.  */
8603 
8604 static struct constant_pool *
s390_alloc_pool(void)8605 s390_alloc_pool (void)
8606 {
8607   struct constant_pool *pool;
8608   int i;
8609 
8610   pool = (struct constant_pool *) xmalloc (sizeof *pool);
8611   pool->next = NULL;
8612   for (i = 0; i < NR_C_MODES; i++)
8613     pool->constants[i] = NULL;
8614 
8615   pool->execute = NULL;
8616   pool->label = gen_label_rtx ();
8617   pool->first_insn = NULL;
8618   pool->pool_insn = NULL;
8619   pool->insns = BITMAP_ALLOC (NULL);
8620   pool->size = 0;
8621   pool->emit_pool_after = NULL;
8622 
8623   return pool;
8624 }
8625 
8626 /* Create new constant pool covering instructions starting at INSN
8627    and chain it to the end of POOL_LIST.  */
8628 
8629 static struct constant_pool *
s390_start_pool(struct constant_pool ** pool_list,rtx_insn * insn)8630 s390_start_pool (struct constant_pool **pool_list, rtx_insn *insn)
8631 {
8632   struct constant_pool *pool, **prev;
8633 
8634   pool = s390_alloc_pool ();
8635   pool->first_insn = insn;
8636 
8637   for (prev = pool_list; *prev; prev = &(*prev)->next)
8638     ;
8639   *prev = pool;
8640 
8641   return pool;
8642 }
8643 
8644 /* End range of instructions covered by POOL at INSN and emit
8645    placeholder insn representing the pool.  */
8646 
8647 static void
s390_end_pool(struct constant_pool * pool,rtx_insn * insn)8648 s390_end_pool (struct constant_pool *pool, rtx_insn *insn)
8649 {
8650   rtx pool_size = GEN_INT (pool->size + 8 /* alignment slop */);
8651 
8652   if (!insn)
8653     insn = get_last_insn ();
8654 
8655   pool->pool_insn = emit_insn_after (gen_pool (pool_size), insn);
8656   INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8657 }
8658 
8659 /* Add INSN to the list of insns covered by POOL.  */
8660 
8661 static void
s390_add_pool_insn(struct constant_pool * pool,rtx insn)8662 s390_add_pool_insn (struct constant_pool *pool, rtx insn)
8663 {
8664   bitmap_set_bit (pool->insns, INSN_UID (insn));
8665 }
8666 
8667 /* Return pool out of POOL_LIST that covers INSN.  */
8668 
8669 static struct constant_pool *
s390_find_pool(struct constant_pool * pool_list,rtx insn)8670 s390_find_pool (struct constant_pool *pool_list, rtx insn)
8671 {
8672   struct constant_pool *pool;
8673 
8674   for (pool = pool_list; pool; pool = pool->next)
8675     if (bitmap_bit_p (pool->insns, INSN_UID (insn)))
8676       break;
8677 
8678   return pool;
8679 }
8680 
8681 /* Add constant VAL of mode MODE to the constant pool POOL.  */
8682 
8683 static void
s390_add_constant(struct constant_pool * pool,rtx val,machine_mode mode)8684 s390_add_constant (struct constant_pool *pool, rtx val, machine_mode mode)
8685 {
8686   struct constant *c;
8687   int i;
8688 
8689   for (i = 0; i < NR_C_MODES; i++)
8690     if (constant_modes[i] == mode)
8691       break;
8692   gcc_assert (i != NR_C_MODES);
8693 
8694   for (c = pool->constants[i]; c != NULL; c = c->next)
8695     if (rtx_equal_p (val, c->value))
8696       break;
8697 
8698   if (c == NULL)
8699     {
8700       c = (struct constant *) xmalloc (sizeof *c);
8701       c->value = val;
8702       c->label = gen_label_rtx ();
8703       c->next = pool->constants[i];
8704       pool->constants[i] = c;
8705       pool->size += GET_MODE_SIZE (mode);
8706     }
8707 }
8708 
8709 /* Return an rtx that represents the offset of X from the start of
8710    pool POOL.  */
8711 
8712 static rtx
s390_pool_offset(struct constant_pool * pool,rtx x)8713 s390_pool_offset (struct constant_pool *pool, rtx x)
8714 {
8715   rtx label;
8716 
8717   label = gen_rtx_LABEL_REF (GET_MODE (x), pool->label);
8718   x = gen_rtx_UNSPEC (GET_MODE (x), gen_rtvec (2, x, label),
8719 		      UNSPEC_POOL_OFFSET);
8720   return gen_rtx_CONST (GET_MODE (x), x);
8721 }
8722 
8723 /* Find constant VAL of mode MODE in the constant pool POOL.
8724    Return an RTX describing the distance from the start of
8725    the pool to the location of the new constant.  */
8726 
8727 static rtx
s390_find_constant(struct constant_pool * pool,rtx val,machine_mode mode)8728 s390_find_constant (struct constant_pool *pool, rtx val,
8729 		    machine_mode mode)
8730 {
8731   struct constant *c;
8732   int i;
8733 
8734   for (i = 0; i < NR_C_MODES; i++)
8735     if (constant_modes[i] == mode)
8736       break;
8737   gcc_assert (i != NR_C_MODES);
8738 
8739   for (c = pool->constants[i]; c != NULL; c = c->next)
8740     if (rtx_equal_p (val, c->value))
8741       break;
8742 
8743   gcc_assert (c);
8744 
8745   return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8746 }
8747 
8748 /* Check whether INSN is an execute.  Return the label_ref to its
8749    execute target template if so, NULL_RTX otherwise.  */
8750 
8751 static rtx
s390_execute_label(rtx insn)8752 s390_execute_label (rtx insn)
8753 {
8754   if (INSN_P (insn)
8755       && GET_CODE (PATTERN (insn)) == PARALLEL
8756       && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
8757       && (XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE
8758 	  || XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE_JUMP))
8759     {
8760       if (XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE)
8761 	return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2);
8762       else
8763 	{
8764 	  gcc_assert (JUMP_P (insn));
8765 	  /* For jump insns as execute target:
8766 	     - There is one operand less in the parallel (the
8767 	       modification register of the execute is always 0).
8768 	     - The execute target label is wrapped into an
8769 	       if_then_else in order to hide it from jump analysis.  */
8770 	  return XEXP (XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 0), 0);
8771 	}
8772     }
8773 
8774   return NULL_RTX;
8775 }
8776 
8777 /* Find execute target for INSN in the constant pool POOL.
8778    Return an RTX describing the distance from the start of
8779    the pool to the location of the execute target.  */
8780 
8781 static rtx
s390_find_execute(struct constant_pool * pool,rtx insn)8782 s390_find_execute (struct constant_pool *pool, rtx insn)
8783 {
8784   struct constant *c;
8785 
8786   for (c = pool->execute; c != NULL; c = c->next)
8787     if (INSN_UID (insn) == INSN_UID (c->value))
8788       break;
8789 
8790   gcc_assert (c);
8791 
8792   return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8793 }
8794 
8795 /* For an execute INSN, extract the execute target template.  */
8796 
8797 static rtx
s390_execute_target(rtx insn)8798 s390_execute_target (rtx insn)
8799 {
8800   rtx pattern = PATTERN (insn);
8801   gcc_assert (s390_execute_label (insn));
8802 
8803   if (XVECLEN (pattern, 0) == 2)
8804     {
8805       pattern = copy_rtx (XVECEXP (pattern, 0, 1));
8806     }
8807   else
8808     {
8809       rtvec vec = rtvec_alloc (XVECLEN (pattern, 0) - 1);
8810       int i;
8811 
8812       for (i = 0; i < XVECLEN (pattern, 0) - 1; i++)
8813 	RTVEC_ELT (vec, i) = copy_rtx (XVECEXP (pattern, 0, i + 1));
8814 
8815       pattern = gen_rtx_PARALLEL (VOIDmode, vec);
8816     }
8817 
8818   return pattern;
8819 }
8820 
8821 /* Indicate that INSN cannot be duplicated.  This is the case for
8822    execute insns that carry a unique label.  */
8823 
8824 static bool
s390_cannot_copy_insn_p(rtx_insn * insn)8825 s390_cannot_copy_insn_p (rtx_insn *insn)
8826 {
8827   rtx label = s390_execute_label (insn);
8828   return label && label != const0_rtx;
8829 }
8830 
8831 /* Dump out the constants in POOL.  If REMOTE_LABEL is true,
8832    do not emit the pool base label.  */
8833 
8834 static void
s390_dump_pool(struct constant_pool * pool,bool remote_label)8835 s390_dump_pool (struct constant_pool *pool, bool remote_label)
8836 {
8837   struct constant *c;
8838   rtx_insn *insn = pool->pool_insn;
8839   int i;
8840 
8841   /* Switch to rodata section.  */
8842   insn = emit_insn_after (gen_pool_section_start (), insn);
8843   INSN_ADDRESSES_NEW (insn, -1);
8844 
8845   /* Ensure minimum pool alignment.  */
8846   insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn);
8847   INSN_ADDRESSES_NEW (insn, -1);
8848 
8849   /* Emit pool base label.  */
8850   if (!remote_label)
8851     {
8852       insn = emit_label_after (pool->label, insn);
8853       INSN_ADDRESSES_NEW (insn, -1);
8854     }
8855 
8856   /* Dump constants in descending alignment requirement order,
8857      ensuring proper alignment for every constant.  */
8858   for (i = 0; i < NR_C_MODES; i++)
8859     for (c = pool->constants[i]; c; c = c->next)
8860       {
8861 	/* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references.  */
8862 	rtx value = copy_rtx (c->value);
8863 	if (GET_CODE (value) == CONST
8864 	    && GET_CODE (XEXP (value, 0)) == UNSPEC
8865 	    && XINT (XEXP (value, 0), 1) == UNSPEC_LTREL_OFFSET
8866 	    && XVECLEN (XEXP (value, 0), 0) == 1)
8867 	  value = s390_pool_offset (pool, XVECEXP (XEXP (value, 0), 0, 0));
8868 
8869 	insn = emit_label_after (c->label, insn);
8870 	INSN_ADDRESSES_NEW (insn, -1);
8871 
8872 	value = gen_rtx_UNSPEC_VOLATILE (constant_modes[i],
8873 					 gen_rtvec (1, value),
8874 					 UNSPECV_POOL_ENTRY);
8875 	insn = emit_insn_after (value, insn);
8876 	INSN_ADDRESSES_NEW (insn, -1);
8877       }
8878 
8879   /* Ensure minimum alignment for instructions.  */
8880   insn = emit_insn_after (gen_pool_align (GEN_INT (2)), insn);
8881   INSN_ADDRESSES_NEW (insn, -1);
8882 
8883   /* Output in-pool execute template insns.  */
8884   for (c = pool->execute; c; c = c->next)
8885     {
8886       insn = emit_label_after (c->label, insn);
8887       INSN_ADDRESSES_NEW (insn, -1);
8888 
8889       insn = emit_insn_after (s390_execute_target (c->value), insn);
8890       INSN_ADDRESSES_NEW (insn, -1);
8891     }
8892 
8893   /* Switch back to previous section.  */
8894   insn = emit_insn_after (gen_pool_section_end (), insn);
8895   INSN_ADDRESSES_NEW (insn, -1);
8896 
8897   insn = emit_barrier_after (insn);
8898   INSN_ADDRESSES_NEW (insn, -1);
8899 
8900   /* Remove placeholder insn.  */
8901   remove_insn (pool->pool_insn);
8902 }
8903 
8904 /* Free all memory used by POOL.  */
8905 
8906 static void
s390_free_pool(struct constant_pool * pool)8907 s390_free_pool (struct constant_pool *pool)
8908 {
8909   struct constant *c, *next;
8910   int i;
8911 
8912   for (i = 0; i < NR_C_MODES; i++)
8913     for (c = pool->constants[i]; c; c = next)
8914       {
8915 	next = c->next;
8916 	free (c);
8917       }
8918 
8919   for (c = pool->execute; c; c = next)
8920     {
8921       next = c->next;
8922       free (c);
8923     }
8924 
8925   BITMAP_FREE (pool->insns);
8926   free (pool);
8927 }
8928 
8929 
8930 /* Collect main literal pool.  Return NULL on overflow.  */
8931 
8932 static struct constant_pool *
s390_mainpool_start(void)8933 s390_mainpool_start (void)
8934 {
8935   struct constant_pool *pool;
8936   rtx_insn *insn;
8937 
8938   pool = s390_alloc_pool ();
8939 
8940   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8941     {
8942       if (NONJUMP_INSN_P (insn)
8943 	  && GET_CODE (PATTERN (insn)) == SET
8944 	  && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC_VOLATILE
8945 	  && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPECV_MAIN_POOL)
8946 	{
8947 	  /* There might be two main_pool instructions if base_reg
8948 	     is call-clobbered; one for shrink-wrapped code and one
8949 	     for the rest.  We want to keep the first.  */
8950 	  if (pool->pool_insn)
8951 	    {
8952 	      insn = PREV_INSN (insn);
8953 	      delete_insn (NEXT_INSN (insn));
8954 	      continue;
8955 	    }
8956 	  pool->pool_insn = insn;
8957 	}
8958 
8959       if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8960 	{
8961 	  rtx pool_ref = NULL_RTX;
8962 	  find_constant_pool_ref (insn, &pool_ref);
8963 	  if (pool_ref)
8964 	    {
8965 	      rtx constant = get_pool_constant (pool_ref);
8966 	      machine_mode mode = get_pool_mode (pool_ref);
8967 	      s390_add_constant (pool, constant, mode);
8968 	    }
8969 	}
8970 
8971       /* If hot/cold partitioning is enabled we have to make sure that
8972 	 the literal pool is emitted in the same section where the
8973 	 initialization of the literal pool base pointer takes place.
8974 	 emit_pool_after is only used in the non-overflow case on non
8975 	 Z cpus where we can emit the literal pool at the end of the
8976 	 function body within the text section.  */
8977       if (NOTE_P (insn)
8978 	  && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS
8979 	  && !pool->emit_pool_after)
8980 	pool->emit_pool_after = PREV_INSN (insn);
8981     }
8982 
8983   gcc_assert (pool->pool_insn || pool->size == 0);
8984 
8985   if (pool->size >= 4096)
8986     {
8987       /* We're going to chunkify the pool, so remove the main
8988 	 pool placeholder insn.  */
8989       remove_insn (pool->pool_insn);
8990 
8991       s390_free_pool (pool);
8992       pool = NULL;
8993     }
8994 
8995   /* If the functions ends with the section where the literal pool
8996      should be emitted set the marker to its end.  */
8997   if (pool && !pool->emit_pool_after)
8998     pool->emit_pool_after = get_last_insn ();
8999 
9000   return pool;
9001 }
9002 
9003 /* POOL holds the main literal pool as collected by s390_mainpool_start.
9004    Modify the current function to output the pool constants as well as
9005    the pool register setup instruction.  */
9006 
9007 static void
s390_mainpool_finish(struct constant_pool * pool)9008 s390_mainpool_finish (struct constant_pool *pool)
9009 {
9010   rtx base_reg = cfun->machine->base_reg;
9011   rtx set;
9012   rtx_insn *insn;
9013 
9014   /* If the pool is empty, we're done.  */
9015   if (pool->size == 0)
9016     {
9017       /* We don't actually need a base register after all.  */
9018       cfun->machine->base_reg = NULL_RTX;
9019 
9020       if (pool->pool_insn)
9021 	remove_insn (pool->pool_insn);
9022       s390_free_pool (pool);
9023       return;
9024     }
9025 
9026   /* We need correct insn addresses.  */
9027   shorten_branches (get_insns ());
9028 
9029   /* Use a LARL to load the pool register.  The pool is
9030      located in the .rodata section, so we emit it after the function.  */
9031   set = gen_main_base_64 (base_reg, pool->label);
9032   insn = emit_insn_after (set, pool->pool_insn);
9033   INSN_ADDRESSES_NEW (insn, -1);
9034   remove_insn (pool->pool_insn);
9035 
9036   insn = get_last_insn ();
9037   pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
9038   INSN_ADDRESSES_NEW (pool->pool_insn, -1);
9039 
9040   s390_dump_pool (pool, 0);
9041 
9042   /* Replace all literal pool references.  */
9043 
9044   for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
9045     {
9046       if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9047 	{
9048 	  rtx addr, pool_ref = NULL_RTX;
9049 	  find_constant_pool_ref (insn, &pool_ref);
9050 	  if (pool_ref)
9051 	    {
9052 	      if (s390_execute_label (insn))
9053 		addr = s390_find_execute (pool, insn);
9054 	      else
9055 		addr = s390_find_constant (pool, get_pool_constant (pool_ref),
9056 						 get_pool_mode (pool_ref));
9057 
9058 	      replace_constant_pool_ref (insn, pool_ref, addr);
9059 	      INSN_CODE (insn) = -1;
9060 	    }
9061 	}
9062     }
9063 
9064 
9065   /* Free the pool.  */
9066   s390_free_pool (pool);
9067 }
9068 
9069 /* Chunkify the literal pool.  */
9070 
9071 #define S390_POOL_CHUNK_MIN	0xc00
9072 #define S390_POOL_CHUNK_MAX	0xe00
9073 
9074 static struct constant_pool *
s390_chunkify_start(void)9075 s390_chunkify_start (void)
9076 {
9077   struct constant_pool *curr_pool = NULL, *pool_list = NULL;
9078   bitmap far_labels;
9079   rtx_insn *insn;
9080 
9081   /* We need correct insn addresses.  */
9082 
9083   shorten_branches (get_insns ());
9084 
9085   /* Scan all insns and move literals to pool chunks.  */
9086 
9087   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9088     {
9089       if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9090 	{
9091 	  rtx pool_ref = NULL_RTX;
9092 	  find_constant_pool_ref (insn, &pool_ref);
9093 	  if (pool_ref)
9094 	    {
9095 	      rtx constant = get_pool_constant (pool_ref);
9096 	      machine_mode mode = get_pool_mode (pool_ref);
9097 
9098 	      if (!curr_pool)
9099 		curr_pool = s390_start_pool (&pool_list, insn);
9100 
9101 	      s390_add_constant (curr_pool, constant, mode);
9102 	      s390_add_pool_insn (curr_pool, insn);
9103 	    }
9104 	}
9105 
9106       if (JUMP_P (insn) || JUMP_TABLE_DATA_P (insn) || LABEL_P (insn))
9107 	{
9108 	  if (curr_pool)
9109 	    s390_add_pool_insn (curr_pool, insn);
9110 	}
9111 
9112       if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_VAR_LOCATION)
9113 	continue;
9114 
9115       if (!curr_pool
9116 	  || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn)
9117 	  || INSN_ADDRESSES (INSN_UID (insn)) == -1)
9118 	continue;
9119 
9120       if (curr_pool->size < S390_POOL_CHUNK_MAX)
9121 	continue;
9122 
9123       s390_end_pool (curr_pool, NULL);
9124       curr_pool = NULL;
9125     }
9126 
9127   if (curr_pool)
9128     s390_end_pool (curr_pool, NULL);
9129 
9130   /* Find all labels that are branched into
9131      from an insn belonging to a different chunk.  */
9132 
9133   far_labels = BITMAP_ALLOC (NULL);
9134 
9135   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9136     {
9137       rtx_jump_table_data *table;
9138 
9139       /* Labels marked with LABEL_PRESERVE_P can be target
9140 	 of non-local jumps, so we have to mark them.
9141 	 The same holds for named labels.
9142 
9143 	 Don't do that, however, if it is the label before
9144 	 a jump table.  */
9145 
9146       if (LABEL_P (insn)
9147 	  && (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn)))
9148 	{
9149 	  rtx_insn *vec_insn = NEXT_INSN (insn);
9150 	  if (! vec_insn || ! JUMP_TABLE_DATA_P (vec_insn))
9151 	    bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn));
9152 	}
9153       /* Check potential targets in a table jump (casesi_jump).  */
9154       else if (tablejump_p (insn, NULL, &table))
9155 	{
9156 	  rtx vec_pat = PATTERN (table);
9157 	  int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
9158 
9159 	  for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
9160 	    {
9161 	      rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
9162 
9163 	      if (s390_find_pool (pool_list, label)
9164 		  != s390_find_pool (pool_list, insn))
9165 		bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
9166 	    }
9167 	}
9168       /* If we have a direct jump (conditional or unconditional),
9169 	 check all potential targets.  */
9170       else if (JUMP_P (insn))
9171 	{
9172 	  rtx pat = PATTERN (insn);
9173 
9174 	  if (GET_CODE (pat) == PARALLEL)
9175 	    pat = XVECEXP (pat, 0, 0);
9176 
9177 	  if (GET_CODE (pat) == SET)
9178 	    {
9179 	      rtx label = JUMP_LABEL (insn);
9180 	      if (label && !ANY_RETURN_P (label))
9181 		{
9182 		  if (s390_find_pool (pool_list, label)
9183 		      != s390_find_pool (pool_list, insn))
9184 		    bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
9185 		}
9186 	    }
9187 	}
9188     }
9189 
9190   /* Insert base register reload insns before every pool.  */
9191 
9192   for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9193     {
9194       rtx new_insn = gen_reload_base_64 (cfun->machine->base_reg,
9195 					 curr_pool->label);
9196       rtx_insn *insn = curr_pool->first_insn;
9197       INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1);
9198     }
9199 
9200   /* Insert base register reload insns at every far label.  */
9201 
9202   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9203     if (LABEL_P (insn)
9204 	&& bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn)))
9205       {
9206 	struct constant_pool *pool = s390_find_pool (pool_list, insn);
9207 	if (pool)
9208 	  {
9209 	    rtx new_insn = gen_reload_base_64 (cfun->machine->base_reg,
9210 					       pool->label);
9211 	    INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1);
9212 	  }
9213       }
9214 
9215 
9216   BITMAP_FREE (far_labels);
9217 
9218 
9219   /* Recompute insn addresses.  */
9220 
9221   init_insn_lengths ();
9222   shorten_branches (get_insns ());
9223 
9224   return pool_list;
9225 }
9226 
9227 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
9228    After we have decided to use this list, finish implementing
9229    all changes to the current function as required.  */
9230 
9231 static void
s390_chunkify_finish(struct constant_pool * pool_list)9232 s390_chunkify_finish (struct constant_pool *pool_list)
9233 {
9234   struct constant_pool *curr_pool = NULL;
9235   rtx_insn *insn;
9236 
9237 
9238   /* Replace all literal pool references.  */
9239 
9240   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9241     {
9242       curr_pool = s390_find_pool (pool_list, insn);
9243       if (!curr_pool)
9244 	continue;
9245 
9246       if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9247 	{
9248 	  rtx addr, pool_ref = NULL_RTX;
9249 	  find_constant_pool_ref (insn, &pool_ref);
9250 	  if (pool_ref)
9251 	    {
9252 	      if (s390_execute_label (insn))
9253 		addr = s390_find_execute (curr_pool, insn);
9254 	      else
9255 		addr = s390_find_constant (curr_pool,
9256 					   get_pool_constant (pool_ref),
9257 					   get_pool_mode (pool_ref));
9258 
9259 	      replace_constant_pool_ref (insn, pool_ref, addr);
9260 	      INSN_CODE (insn) = -1;
9261 	    }
9262 	}
9263     }
9264 
9265   /* Dump out all literal pools.  */
9266 
9267   for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9268     s390_dump_pool (curr_pool, 0);
9269 
9270   /* Free pool list.  */
9271 
9272   while (pool_list)
9273     {
9274       struct constant_pool *next = pool_list->next;
9275       s390_free_pool (pool_list);
9276       pool_list = next;
9277     }
9278 }
9279 
9280 /* Output the constant pool entry EXP in mode MODE with alignment ALIGN.  */
9281 
9282 void
s390_output_pool_entry(rtx exp,machine_mode mode,unsigned int align)9283 s390_output_pool_entry (rtx exp, machine_mode mode, unsigned int align)
9284 {
9285   switch (GET_MODE_CLASS (mode))
9286     {
9287     case MODE_FLOAT:
9288     case MODE_DECIMAL_FLOAT:
9289       gcc_assert (GET_CODE (exp) == CONST_DOUBLE);
9290 
9291       assemble_real (*CONST_DOUBLE_REAL_VALUE (exp),
9292 		     as_a <scalar_float_mode> (mode), align);
9293       break;
9294 
9295     case MODE_INT:
9296       assemble_integer (exp, GET_MODE_SIZE (mode), align, 1);
9297       mark_symbol_refs_as_used (exp);
9298       break;
9299 
9300     case MODE_VECTOR_INT:
9301     case MODE_VECTOR_FLOAT:
9302       {
9303 	int i;
9304 	machine_mode inner_mode;
9305 	gcc_assert (GET_CODE (exp) == CONST_VECTOR);
9306 
9307 	inner_mode = GET_MODE_INNER (GET_MODE (exp));
9308 	for (i = 0; i < XVECLEN (exp, 0); i++)
9309 	  s390_output_pool_entry (XVECEXP (exp, 0, i),
9310 				  inner_mode,
9311 				  i == 0
9312 				  ? align
9313 				  : GET_MODE_BITSIZE (inner_mode));
9314       }
9315       break;
9316 
9317     default:
9318       gcc_unreachable ();
9319     }
9320 }
9321 
9322 
9323 /* Return an RTL expression representing the value of the return address
9324    for the frame COUNT steps up from the current frame.  FRAME is the
9325    frame pointer of that frame.  */
9326 
9327 rtx
s390_return_addr_rtx(int count,rtx frame ATTRIBUTE_UNUSED)9328 s390_return_addr_rtx (int count, rtx frame ATTRIBUTE_UNUSED)
9329 {
9330   int offset;
9331   rtx addr;
9332 
9333   /* Without backchain, we fail for all but the current frame.  */
9334 
9335   if (!TARGET_BACKCHAIN && count > 0)
9336     return NULL_RTX;
9337 
9338   /* For the current frame, we need to make sure the initial
9339      value of RETURN_REGNUM is actually saved.  */
9340 
9341   if (count == 0)
9342     return get_hard_reg_initial_val (Pmode, RETURN_REGNUM);
9343 
9344   if (TARGET_PACKED_STACK)
9345     offset = -2 * UNITS_PER_LONG;
9346   else
9347     offset = RETURN_REGNUM * UNITS_PER_LONG;
9348 
9349   addr = plus_constant (Pmode, frame, offset);
9350   addr = memory_address (Pmode, addr);
9351   return gen_rtx_MEM (Pmode, addr);
9352 }
9353 
9354 /* Return an RTL expression representing the back chain stored in
9355    the current stack frame.  */
9356 
9357 rtx
s390_back_chain_rtx(void)9358 s390_back_chain_rtx (void)
9359 {
9360   rtx chain;
9361 
9362   gcc_assert (TARGET_BACKCHAIN);
9363 
9364   if (TARGET_PACKED_STACK)
9365     chain = plus_constant (Pmode, stack_pointer_rtx,
9366 			   STACK_POINTER_OFFSET - UNITS_PER_LONG);
9367   else
9368     chain = stack_pointer_rtx;
9369 
9370   chain = gen_rtx_MEM (Pmode, chain);
9371   return chain;
9372 }
9373 
9374 /* Find first call clobbered register unused in a function.
9375    This could be used as base register in a leaf function
9376    or for holding the return address before epilogue.  */
9377 
9378 static int
find_unused_clobbered_reg(void)9379 find_unused_clobbered_reg (void)
9380 {
9381   int i;
9382   for (i = 0; i < 6; i++)
9383     if (!df_regs_ever_live_p (i))
9384       return i;
9385   return 0;
9386 }
9387 
9388 
9389 /* Helper function for s390_regs_ever_clobbered.  Sets the fields in DATA for all
9390    clobbered hard regs in SETREG.  */
9391 
9392 static void
s390_reg_clobbered_rtx(rtx setreg,const_rtx set_insn ATTRIBUTE_UNUSED,void * data)9393 s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data)
9394 {
9395   char *regs_ever_clobbered = (char *)data;
9396   unsigned int i, regno;
9397   machine_mode mode = GET_MODE (setreg);
9398 
9399   if (GET_CODE (setreg) == SUBREG)
9400     {
9401       rtx inner = SUBREG_REG (setreg);
9402       if (!GENERAL_REG_P (inner) && !FP_REG_P (inner))
9403 	return;
9404       regno = subreg_regno (setreg);
9405     }
9406   else if (GENERAL_REG_P (setreg) || FP_REG_P (setreg))
9407     regno = REGNO (setreg);
9408   else
9409     return;
9410 
9411   for (i = regno;
9412        i < end_hard_regno (mode, regno);
9413        i++)
9414     regs_ever_clobbered[i] = 1;
9415 }
9416 
9417 /* Walks through all basic blocks of the current function looking
9418    for clobbered hard regs using s390_reg_clobbered_rtx.  The fields
9419    of the passed integer array REGS_EVER_CLOBBERED are set to one for
9420    each of those regs.  */
9421 
9422 static void
s390_regs_ever_clobbered(char regs_ever_clobbered[])9423 s390_regs_ever_clobbered (char regs_ever_clobbered[])
9424 {
9425   basic_block cur_bb;
9426   rtx_insn *cur_insn;
9427   unsigned int i;
9428 
9429   memset (regs_ever_clobbered, 0, 32);
9430 
9431   /* For non-leaf functions we have to consider all call clobbered regs to be
9432      clobbered.  */
9433   if (!crtl->is_leaf)
9434     {
9435       for (i = 0; i < 32; i++)
9436 	regs_ever_clobbered[i] = call_used_regs[i];
9437     }
9438 
9439   /* Make the "magic" eh_return registers live if necessary.  For regs_ever_live
9440      this work is done by liveness analysis (mark_regs_live_at_end).
9441      Special care is needed for functions containing landing pads.  Landing pads
9442      may use the eh registers, but the code which sets these registers is not
9443      contained in that function.  Hence s390_regs_ever_clobbered is not able to
9444      deal with this automatically.  */
9445   if (crtl->calls_eh_return || cfun->machine->has_landing_pad_p)
9446     for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM ; i++)
9447       if (crtl->calls_eh_return
9448 	  || (cfun->machine->has_landing_pad_p
9449 	      && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i))))
9450 	regs_ever_clobbered[EH_RETURN_DATA_REGNO (i)] = 1;
9451 
9452   /* For nonlocal gotos all call-saved registers have to be saved.
9453      This flag is also set for the unwinding code in libgcc.
9454      See expand_builtin_unwind_init.  For regs_ever_live this is done by
9455      reload.  */
9456   if (crtl->saves_all_registers)
9457     for (i = 0; i < 32; i++)
9458       if (!call_used_regs[i])
9459 	regs_ever_clobbered[i] = 1;
9460 
9461   FOR_EACH_BB_FN (cur_bb, cfun)
9462     {
9463       FOR_BB_INSNS (cur_bb, cur_insn)
9464 	{
9465 	  rtx pat;
9466 
9467 	  if (!INSN_P (cur_insn))
9468 	    continue;
9469 
9470 	  pat = PATTERN (cur_insn);
9471 
9472 	  /* Ignore GPR restore insns.  */
9473 	  if (epilogue_completed && RTX_FRAME_RELATED_P (cur_insn))
9474 	    {
9475 	      if (GET_CODE (pat) == SET
9476 		  && GENERAL_REG_P (SET_DEST (pat)))
9477 		{
9478 		  /* lgdr  */
9479 		  if (GET_MODE (SET_SRC (pat)) == DImode
9480 		      && FP_REG_P (SET_SRC (pat)))
9481 		    continue;
9482 
9483 		  /* l / lg  */
9484 		  if (GET_CODE (SET_SRC (pat)) == MEM)
9485 		    continue;
9486 		}
9487 
9488 	      /* lm / lmg */
9489 	      if (GET_CODE (pat) == PARALLEL
9490 		  && load_multiple_operation (pat, VOIDmode))
9491 		continue;
9492 	    }
9493 
9494 	  note_stores (cur_insn,
9495 		       s390_reg_clobbered_rtx,
9496 		       regs_ever_clobbered);
9497 	}
9498     }
9499 }
9500 
9501 /* Determine the frame area which actually has to be accessed
9502    in the function epilogue. The values are stored at the
9503    given pointers AREA_BOTTOM (address of the lowest used stack
9504    address) and AREA_TOP (address of the first item which does
9505    not belong to the stack frame).  */
9506 
9507 static void
s390_frame_area(int * area_bottom,int * area_top)9508 s390_frame_area (int *area_bottom, int *area_top)
9509 {
9510   int b, t;
9511 
9512   b = INT_MAX;
9513   t = INT_MIN;
9514 
9515   if (cfun_frame_layout.first_restore_gpr != -1)
9516     {
9517       b = (cfun_frame_layout.gprs_offset
9518 	   + cfun_frame_layout.first_restore_gpr * UNITS_PER_LONG);
9519       t = b + (cfun_frame_layout.last_restore_gpr
9520 	       - cfun_frame_layout.first_restore_gpr + 1) * UNITS_PER_LONG;
9521     }
9522 
9523   if (TARGET_64BIT && cfun_save_high_fprs_p)
9524     {
9525       b = MIN (b, cfun_frame_layout.f8_offset);
9526       t = MAX (t, (cfun_frame_layout.f8_offset
9527 		   + cfun_frame_layout.high_fprs * 8));
9528     }
9529 
9530   if (!TARGET_64BIT)
9531     {
9532       if (cfun_fpr_save_p (FPR4_REGNUM))
9533 	{
9534 	  b = MIN (b, cfun_frame_layout.f4_offset);
9535 	  t = MAX (t, cfun_frame_layout.f4_offset + 8);
9536 	}
9537       if (cfun_fpr_save_p (FPR6_REGNUM))
9538 	{
9539 	  b = MIN (b, cfun_frame_layout.f4_offset + 8);
9540 	  t = MAX (t, cfun_frame_layout.f4_offset + 16);
9541 	}
9542     }
9543   *area_bottom = b;
9544   *area_top = t;
9545 }
9546 /* Update gpr_save_slots in the frame layout trying to make use of
9547    FPRs as GPR save slots.
9548    This is a helper routine of s390_register_info.  */
9549 
9550 static void
s390_register_info_gprtofpr()9551 s390_register_info_gprtofpr ()
9552 {
9553   int save_reg_slot = FPR0_REGNUM;
9554   int i, j;
9555 
9556   if (TARGET_TPF || !TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
9557     return;
9558 
9559   /* builtin_eh_return needs to be able to modify the return address
9560      on the stack.  It could also adjust the FPR save slot instead but
9561      is it worth the trouble?!  */
9562   if (crtl->calls_eh_return)
9563     return;
9564 
9565   for (i = 15; i >= 6; i--)
9566     {
9567       if (cfun_gpr_save_slot (i) == SAVE_SLOT_NONE)
9568 	continue;
9569 
9570       /* Advance to the next FP register which can be used as a
9571 	 GPR save slot.  */
9572       while ((!call_used_regs[save_reg_slot]
9573 	      || df_regs_ever_live_p (save_reg_slot)
9574 	      || cfun_fpr_save_p (save_reg_slot))
9575 	     && FP_REGNO_P (save_reg_slot))
9576 	save_reg_slot++;
9577       if (!FP_REGNO_P (save_reg_slot))
9578 	{
9579 	  /* We only want to use ldgr/lgdr if we can get rid of
9580 	     stm/lm entirely.  So undo the gpr slot allocation in
9581 	     case we ran out of FPR save slots.  */
9582 	  for (j = 6; j <= 15; j++)
9583 	    if (FP_REGNO_P (cfun_gpr_save_slot (j)))
9584 	      cfun_gpr_save_slot (j) = SAVE_SLOT_STACK;
9585 	  break;
9586 	}
9587       cfun_gpr_save_slot (i) = save_reg_slot++;
9588     }
9589 }
9590 
9591 /* Set the bits in fpr_bitmap for FPRs which need to be saved due to
9592    stdarg.
9593    This is a helper routine for s390_register_info.  */
9594 
9595 static void
s390_register_info_stdarg_fpr()9596 s390_register_info_stdarg_fpr ()
9597 {
9598   int i;
9599   int min_fpr;
9600   int max_fpr;
9601 
9602   /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and
9603      f0-f4 for 64 bit.  */
9604   if (!cfun->stdarg
9605       || !TARGET_HARD_FLOAT
9606       || !cfun->va_list_fpr_size
9607       || crtl->args.info.fprs >= FP_ARG_NUM_REG)
9608     return;
9609 
9610   min_fpr = crtl->args.info.fprs;
9611   max_fpr = min_fpr + cfun->va_list_fpr_size - 1;
9612   if (max_fpr >= FP_ARG_NUM_REG)
9613     max_fpr = FP_ARG_NUM_REG - 1;
9614 
9615   /* FPR argument regs start at f0.  */
9616   min_fpr += FPR0_REGNUM;
9617   max_fpr += FPR0_REGNUM;
9618 
9619   for (i = min_fpr; i <= max_fpr; i++)
9620     cfun_set_fpr_save (i);
9621 }
9622 
9623 /* Reserve the GPR save slots for GPRs which need to be saved due to
9624    stdarg.
9625    This is a helper routine for s390_register_info.  */
9626 
9627 static void
s390_register_info_stdarg_gpr()9628 s390_register_info_stdarg_gpr ()
9629 {
9630   int i;
9631   int min_gpr;
9632   int max_gpr;
9633 
9634   if (!cfun->stdarg
9635       || !cfun->va_list_gpr_size
9636       || crtl->args.info.gprs >= GP_ARG_NUM_REG)
9637     return;
9638 
9639   min_gpr = crtl->args.info.gprs;
9640   max_gpr = min_gpr + cfun->va_list_gpr_size - 1;
9641   if (max_gpr >= GP_ARG_NUM_REG)
9642     max_gpr = GP_ARG_NUM_REG - 1;
9643 
9644   /* GPR argument regs start at r2.  */
9645   min_gpr += GPR2_REGNUM;
9646   max_gpr += GPR2_REGNUM;
9647 
9648   /* If r6 was supposed to be saved into an FPR and now needs to go to
9649      the stack for vararg we have to adjust the restore range to make
9650      sure that the restore is done from stack as well.  */
9651   if (FP_REGNO_P (cfun_gpr_save_slot (GPR6_REGNUM))
9652       && min_gpr <= GPR6_REGNUM
9653       && max_gpr >= GPR6_REGNUM)
9654     {
9655       if (cfun_frame_layout.first_restore_gpr == -1
9656 	  || cfun_frame_layout.first_restore_gpr > GPR6_REGNUM)
9657 	cfun_frame_layout.first_restore_gpr = GPR6_REGNUM;
9658       if (cfun_frame_layout.last_restore_gpr == -1
9659 	  || cfun_frame_layout.last_restore_gpr < GPR6_REGNUM)
9660 	cfun_frame_layout.last_restore_gpr = GPR6_REGNUM;
9661     }
9662 
9663   if (cfun_frame_layout.first_save_gpr == -1
9664       || cfun_frame_layout.first_save_gpr > min_gpr)
9665     cfun_frame_layout.first_save_gpr = min_gpr;
9666 
9667   if (cfun_frame_layout.last_save_gpr == -1
9668       || cfun_frame_layout.last_save_gpr < max_gpr)
9669     cfun_frame_layout.last_save_gpr = max_gpr;
9670 
9671   for (i = min_gpr; i <= max_gpr; i++)
9672     cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
9673 }
9674 
9675 /* Calculate the save and restore ranges for stm(g) and lm(g) in the
9676    prologue and epilogue.  */
9677 
9678 static void
s390_register_info_set_ranges()9679 s390_register_info_set_ranges ()
9680 {
9681   int i, j;
9682 
9683   /* Find the first and the last save slot supposed to use the stack
9684      to set the restore range.
9685      Vararg regs might be marked as save to stack but only the
9686      call-saved regs really need restoring (i.e. r6).  This code
9687      assumes that the vararg regs have not yet been recorded in
9688      cfun_gpr_save_slot.  */
9689   for (i = 0; i < 16 && cfun_gpr_save_slot (i) != SAVE_SLOT_STACK; i++);
9690   for (j = 15; j > i && cfun_gpr_save_slot (j) != SAVE_SLOT_STACK; j--);
9691   cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
9692   cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
9693   cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
9694   cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
9695 }
9696 
9697 /* The GPR and FPR save slots in cfun->machine->frame_layout are set
9698    for registers which need to be saved in function prologue.
9699    This function can be used until the insns emitted for save/restore
9700    of the regs are visible in the RTL stream.  */
9701 
9702 static void
s390_register_info()9703 s390_register_info ()
9704 {
9705   int i;
9706   char clobbered_regs[32];
9707 
9708   gcc_assert (!epilogue_completed);
9709 
9710   if (reload_completed)
9711     /* After reload we rely on our own routine to determine which
9712        registers need saving.  */
9713     s390_regs_ever_clobbered (clobbered_regs);
9714   else
9715     /* During reload we use regs_ever_live as a base since reload
9716        does changes in there which we otherwise would not be aware
9717        of.  */
9718     for (i = 0; i < 32; i++)
9719       clobbered_regs[i] = df_regs_ever_live_p (i);
9720 
9721   for (i = 0; i < 32; i++)
9722     clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
9723 
9724   /* Mark the call-saved FPRs which need to be saved.
9725      This needs to be done before checking the special GPRs since the
9726      stack pointer usage depends on whether high FPRs have to be saved
9727      or not.  */
9728   cfun_frame_layout.fpr_bitmap = 0;
9729   cfun_frame_layout.high_fprs = 0;
9730   for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
9731     if (clobbered_regs[i] && !call_used_regs[i])
9732       {
9733 	cfun_set_fpr_save (i);
9734 	if (i >= FPR8_REGNUM)
9735 	  cfun_frame_layout.high_fprs++;
9736       }
9737 
9738   /* Register 12 is used for GOT address, but also as temp in prologue
9739      for split-stack stdarg functions (unless r14 is available).  */
9740   clobbered_regs[12]
9741     |= ((flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
9742 	|| (flag_split_stack && cfun->stdarg
9743 	    && (crtl->is_leaf || TARGET_TPF_PROFILING
9744 		|| has_hard_reg_initial_val (Pmode, RETURN_REGNUM))));
9745 
9746   clobbered_regs[BASE_REGNUM]
9747     |= (cfun->machine->base_reg
9748 	&& REGNO (cfun->machine->base_reg) == BASE_REGNUM);
9749 
9750   clobbered_regs[HARD_FRAME_POINTER_REGNUM]
9751     |= !!frame_pointer_needed;
9752 
9753   /* On pre z900 machines this might take until machine dependent
9754      reorg to decide.
9755      save_return_addr_p will only be set on non-zarch machines so
9756      there is no risk that r14 goes into an FPR instead of a stack
9757      slot.  */
9758   clobbered_regs[RETURN_REGNUM]
9759     |= (!crtl->is_leaf
9760 	|| TARGET_TPF_PROFILING
9761 	|| cfun_frame_layout.save_return_addr_p
9762 	|| crtl->calls_eh_return);
9763 
9764   clobbered_regs[STACK_POINTER_REGNUM]
9765     |= (!crtl->is_leaf
9766 	|| TARGET_TPF_PROFILING
9767 	|| cfun_save_high_fprs_p
9768 	|| get_frame_size () > 0
9769 	|| (reload_completed && cfun_frame_layout.frame_size > 0)
9770 	|| cfun->calls_alloca);
9771 
9772   memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 16);
9773 
9774   for (i = 6; i < 16; i++)
9775     if (clobbered_regs[i])
9776       cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
9777 
9778   s390_register_info_stdarg_fpr ();
9779   s390_register_info_gprtofpr ();
9780   s390_register_info_set_ranges ();
9781   /* stdarg functions might need to save GPRs 2 to 6.  This might
9782      override the GPR->FPR save decision made by
9783      s390_register_info_gprtofpr for r6 since vararg regs must go to
9784      the stack.  */
9785   s390_register_info_stdarg_gpr ();
9786 }
9787 
9788 /* Return true if REGNO is a global register, but not one
9789    of the special ones that need to be saved/restored in anyway.  */
9790 
9791 static inline bool
global_not_special_regno_p(int regno)9792 global_not_special_regno_p (int regno)
9793 {
9794   return (global_regs[regno]
9795 	  /* These registers are special and need to be
9796 	     restored in any case.  */
9797 	  && !(regno == STACK_POINTER_REGNUM
9798 	       || regno == RETURN_REGNUM
9799 	       || regno == BASE_REGNUM
9800 	       || (flag_pic && regno == (int)PIC_OFFSET_TABLE_REGNUM)));
9801 }
9802 
9803 /* This function is called by s390_optimize_prologue in order to get
9804    rid of unnecessary GPR save/restore instructions.  The register info
9805    for the GPRs is re-computed and the ranges are re-calculated.  */
9806 
9807 static void
s390_optimize_register_info()9808 s390_optimize_register_info ()
9809 {
9810   char clobbered_regs[32];
9811   int i;
9812 
9813   gcc_assert (epilogue_completed);
9814 
9815   s390_regs_ever_clobbered (clobbered_regs);
9816 
9817   /* Global registers do not need to be saved and restored unless it
9818      is one of our special regs.  (r12, r13, r14, or r15).  */
9819   for (i = 0; i < 32; i++)
9820     clobbered_regs[i] = clobbered_regs[i] && !global_not_special_regno_p (i);
9821 
9822   /* There is still special treatment needed for cases invisible to
9823      s390_regs_ever_clobbered.  */
9824   clobbered_regs[RETURN_REGNUM]
9825     |= (TARGET_TPF_PROFILING
9826 	/* When expanding builtin_return_addr in ESA mode we do not
9827 	   know whether r14 will later be needed as scratch reg when
9828 	   doing branch splitting.  So the builtin always accesses the
9829 	   r14 save slot and we need to stick to the save/restore
9830 	   decision for r14 even if it turns out that it didn't get
9831 	   clobbered.  */
9832 	|| cfun_frame_layout.save_return_addr_p
9833 	|| crtl->calls_eh_return);
9834 
9835   memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 6);
9836 
9837   for (i = 6; i < 16; i++)
9838     if (!clobbered_regs[i])
9839       cfun_gpr_save_slot (i) = SAVE_SLOT_NONE;
9840 
9841   s390_register_info_set_ranges ();
9842   s390_register_info_stdarg_gpr ();
9843 }
9844 
9845 /* Fill cfun->machine with info about frame of current function.  */
9846 
9847 static void
s390_frame_info(void)9848 s390_frame_info (void)
9849 {
9850   HOST_WIDE_INT lowest_offset;
9851 
9852   cfun_frame_layout.first_save_gpr_slot = cfun_frame_layout.first_save_gpr;
9853   cfun_frame_layout.last_save_gpr_slot = cfun_frame_layout.last_save_gpr;
9854 
9855   /* The va_arg builtin uses a constant distance of 16 *
9856      UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area
9857      pointer.  So even if we are going to save the stack pointer in an
9858      FPR we need the stack space in order to keep the offsets
9859      correct.  */
9860   if (cfun->stdarg && cfun_save_arg_fprs_p)
9861     {
9862       cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
9863 
9864       if (cfun_frame_layout.first_save_gpr_slot == -1)
9865 	cfun_frame_layout.first_save_gpr_slot = STACK_POINTER_REGNUM;
9866     }
9867 
9868   cfun_frame_layout.frame_size = get_frame_size ();
9869   if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000)
9870     fatal_error (input_location,
9871 		 "total size of local variables exceeds architecture limit");
9872 
9873   if (!TARGET_PACKED_STACK)
9874     {
9875       /* Fixed stack layout.  */
9876       cfun_frame_layout.backchain_offset = 0;
9877       cfun_frame_layout.f0_offset = 16 * UNITS_PER_LONG;
9878       cfun_frame_layout.f4_offset = cfun_frame_layout.f0_offset + 2 * 8;
9879       cfun_frame_layout.f8_offset = -cfun_frame_layout.high_fprs * 8;
9880       cfun_frame_layout.gprs_offset = (cfun_frame_layout.first_save_gpr_slot
9881 				       * UNITS_PER_LONG);
9882     }
9883   else if (TARGET_BACKCHAIN)
9884     {
9885       /* Kernel stack layout - packed stack, backchain, no float  */
9886       gcc_assert (TARGET_SOFT_FLOAT);
9887       cfun_frame_layout.backchain_offset = (STACK_POINTER_OFFSET
9888 					    - UNITS_PER_LONG);
9889 
9890       /* The distance between the backchain and the return address
9891 	 save slot must not change.  So we always need a slot for the
9892 	 stack pointer which resides in between.  */
9893       cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
9894 
9895       cfun_frame_layout.gprs_offset
9896 	= cfun_frame_layout.backchain_offset - cfun_gprs_save_area_size;
9897 
9898       /* FPRs will not be saved.  Nevertheless pick sane values to
9899 	 keep area calculations valid.  */
9900       cfun_frame_layout.f0_offset =
9901 	cfun_frame_layout.f4_offset =
9902 	cfun_frame_layout.f8_offset = cfun_frame_layout.gprs_offset;
9903     }
9904   else
9905     {
9906       int num_fprs;
9907 
9908       /* Packed stack layout without backchain.  */
9909 
9910       /* With stdarg FPRs need their dedicated slots.  */
9911       num_fprs = (TARGET_64BIT && cfun->stdarg ? 2
9912 		  : (cfun_fpr_save_p (FPR4_REGNUM) +
9913 		     cfun_fpr_save_p (FPR6_REGNUM)));
9914       cfun_frame_layout.f4_offset = STACK_POINTER_OFFSET - 8 * num_fprs;
9915 
9916       num_fprs = (cfun->stdarg ? 2
9917 		  : (cfun_fpr_save_p (FPR0_REGNUM)
9918 		     + cfun_fpr_save_p (FPR2_REGNUM)));
9919       cfun_frame_layout.f0_offset = cfun_frame_layout.f4_offset - 8 * num_fprs;
9920 
9921       cfun_frame_layout.gprs_offset
9922 	= cfun_frame_layout.f0_offset - cfun_gprs_save_area_size;
9923 
9924       cfun_frame_layout.f8_offset = (cfun_frame_layout.gprs_offset
9925 				     - cfun_frame_layout.high_fprs * 8);
9926     }
9927 
9928   if (cfun_save_high_fprs_p)
9929     cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8;
9930 
9931   if (!crtl->is_leaf)
9932     cfun_frame_layout.frame_size += crtl->outgoing_args_size;
9933 
9934   /* In the following cases we have to allocate a STACK_POINTER_OFFSET
9935      sized area at the bottom of the stack.  This is required also for
9936      leaf functions.  When GCC generates a local stack reference it
9937      will always add STACK_POINTER_OFFSET to all these references.  */
9938   if (crtl->is_leaf
9939       && !TARGET_TPF_PROFILING
9940       && cfun_frame_layout.frame_size == 0
9941       && !cfun->calls_alloca)
9942     return;
9943 
9944   /* Calculate the number of bytes we have used in our own register
9945      save area.  With the packed stack layout we can re-use the
9946      remaining bytes for normal stack elements.  */
9947 
9948   if (TARGET_PACKED_STACK)
9949     lowest_offset = MIN (MIN (cfun_frame_layout.f0_offset,
9950 			      cfun_frame_layout.f4_offset),
9951 			 cfun_frame_layout.gprs_offset);
9952   else
9953     lowest_offset = 0;
9954 
9955   if (TARGET_BACKCHAIN)
9956     lowest_offset = MIN (lowest_offset, cfun_frame_layout.backchain_offset);
9957 
9958   cfun_frame_layout.frame_size += STACK_POINTER_OFFSET - lowest_offset;
9959 
9960   /* If under 31 bit an odd number of gprs has to be saved we have to
9961      adjust the frame size to sustain 8 byte alignment of stack
9962      frames.  */
9963   cfun_frame_layout.frame_size = ((cfun_frame_layout.frame_size +
9964 				   STACK_BOUNDARY / BITS_PER_UNIT - 1)
9965 				  & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1));
9966 }
9967 
9968 /* Generate frame layout.  Fills in register and frame data for the current
9969    function in cfun->machine.  This routine can be called multiple times;
9970    it will re-do the complete frame layout every time.  */
9971 
9972 static void
s390_init_frame_layout(void)9973 s390_init_frame_layout (void)
9974 {
9975   HOST_WIDE_INT frame_size;
9976   int base_used;
9977 
9978   /* After LRA the frame layout is supposed to be read-only and should
9979      not be re-computed.  */
9980   if (reload_completed)
9981     return;
9982 
9983   do
9984     {
9985       frame_size = cfun_frame_layout.frame_size;
9986 
9987       /* Try to predict whether we'll need the base register.  */
9988       base_used = crtl->uses_const_pool
9989 		  || (!DISP_IN_RANGE (frame_size)
9990 		      && !CONST_OK_FOR_K (frame_size));
9991 
9992       /* Decide which register to use as literal pool base.  In small
9993 	 leaf functions, try to use an unused call-clobbered register
9994 	 as base register to avoid save/restore overhead.  */
9995       if (!base_used)
9996 	cfun->machine->base_reg = NULL_RTX;
9997       else
9998 	{
9999 	  int br = 0;
10000 
10001 	  if (crtl->is_leaf)
10002 	    /* Prefer r5 (most likely to be free).  */
10003 	    for (br = 5; br >= 2 && df_regs_ever_live_p (br); br--)
10004 	      ;
10005 	  cfun->machine->base_reg =
10006 	    gen_rtx_REG (Pmode, (br >= 2) ? br : BASE_REGNUM);
10007 	}
10008 
10009       s390_register_info ();
10010       s390_frame_info ();
10011     }
10012   while (frame_size != cfun_frame_layout.frame_size);
10013 }
10014 
10015 /* Remove the FPR clobbers from a tbegin insn if it can be proven that
10016    the TX is nonescaping.  A transaction is considered escaping if
10017    there is at least one path from tbegin returning CC0 to the
10018    function exit block without an tend.
10019 
10020    The check so far has some limitations:
10021    - only single tbegin/tend BBs are supported
10022    - the first cond jump after tbegin must separate the CC0 path from ~CC0
10023    - when CC is copied to a GPR and the CC0 check is done with the GPR
10024      this is not supported
10025 */
10026 
10027 static void
s390_optimize_nonescaping_tx(void)10028 s390_optimize_nonescaping_tx (void)
10029 {
10030   const unsigned int CC0 = 1 << 3;
10031   basic_block tbegin_bb = NULL;
10032   basic_block tend_bb = NULL;
10033   basic_block bb;
10034   rtx_insn *insn;
10035   bool result = true;
10036   int bb_index;
10037   rtx_insn *tbegin_insn = NULL;
10038 
10039   if (!cfun->machine->tbegin_p)
10040     return;
10041 
10042   for (bb_index = 0; bb_index < n_basic_blocks_for_fn (cfun); bb_index++)
10043     {
10044       bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
10045 
10046       if (!bb)
10047 	continue;
10048 
10049       FOR_BB_INSNS (bb, insn)
10050 	{
10051 	  rtx ite, cc, pat, target;
10052 	  unsigned HOST_WIDE_INT mask;
10053 
10054 	  if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
10055 	    continue;
10056 
10057 	  pat = PATTERN (insn);
10058 
10059 	  if (GET_CODE (pat) == PARALLEL)
10060 	    pat = XVECEXP (pat, 0, 0);
10061 
10062 	  if (GET_CODE (pat) != SET
10063 	      || GET_CODE (SET_SRC (pat)) != UNSPEC_VOLATILE)
10064 	    continue;
10065 
10066 	  if (XINT (SET_SRC (pat), 1) == UNSPECV_TBEGIN)
10067 	    {
10068 	      rtx_insn *tmp;
10069 
10070 	      tbegin_insn = insn;
10071 
10072 	      /* Just return if the tbegin doesn't have clobbers.  */
10073 	      if (GET_CODE (PATTERN (insn)) != PARALLEL)
10074 		return;
10075 
10076 	      if (tbegin_bb != NULL)
10077 		return;
10078 
10079 	      /* Find the next conditional jump.  */
10080 	      for (tmp = NEXT_INSN (insn);
10081 		   tmp != NULL_RTX;
10082 		   tmp = NEXT_INSN (tmp))
10083 		{
10084 		  if (reg_set_p (gen_rtx_REG (CCmode, CC_REGNUM), tmp))
10085 		    return;
10086 		  if (!JUMP_P (tmp))
10087 		    continue;
10088 
10089 		  ite = SET_SRC (PATTERN (tmp));
10090 		  if (GET_CODE (ite) != IF_THEN_ELSE)
10091 		    continue;
10092 
10093 		  cc = XEXP (XEXP (ite, 0), 0);
10094 		  if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc))
10095 		      || GET_MODE (cc) != CCRAWmode
10096 		      || GET_CODE (XEXP (XEXP (ite, 0), 1)) != CONST_INT)
10097 		    return;
10098 
10099 		  if (bb->succs->length () != 2)
10100 		    return;
10101 
10102 		  mask = INTVAL (XEXP (XEXP (ite, 0), 1));
10103 		  if (GET_CODE (XEXP (ite, 0)) == NE)
10104 		    mask ^= 0xf;
10105 
10106 		  if (mask == CC0)
10107 		    target = XEXP (ite, 1);
10108 		  else if (mask == (CC0 ^ 0xf))
10109 		    target = XEXP (ite, 2);
10110 		  else
10111 		    return;
10112 
10113 		  {
10114 		    edge_iterator ei;
10115 		    edge e1, e2;
10116 
10117 		    ei = ei_start (bb->succs);
10118 		    e1 = ei_safe_edge (ei);
10119 		    ei_next (&ei);
10120 		    e2 = ei_safe_edge (ei);
10121 
10122 		    if (e2->flags & EDGE_FALLTHRU)
10123 		      {
10124 			e2 = e1;
10125 			e1 = ei_safe_edge (ei);
10126 		      }
10127 
10128 		    if (!(e1->flags & EDGE_FALLTHRU))
10129 		      return;
10130 
10131 		    tbegin_bb = (target == pc_rtx) ? e1->dest : e2->dest;
10132 		  }
10133 		  if (tmp == BB_END (bb))
10134 		    break;
10135 		}
10136 	    }
10137 
10138 	  if (XINT (SET_SRC (pat), 1) == UNSPECV_TEND)
10139 	    {
10140 	      if (tend_bb != NULL)
10141 		return;
10142 	      tend_bb = bb;
10143 	    }
10144 	}
10145     }
10146 
10147   /* Either we successfully remove the FPR clobbers here or we are not
10148      able to do anything for this TX.  Both cases don't qualify for
10149      another look.  */
10150   cfun->machine->tbegin_p = false;
10151 
10152   if (tbegin_bb == NULL || tend_bb == NULL)
10153     return;
10154 
10155   calculate_dominance_info (CDI_POST_DOMINATORS);
10156   result = dominated_by_p (CDI_POST_DOMINATORS, tbegin_bb, tend_bb);
10157   free_dominance_info (CDI_POST_DOMINATORS);
10158 
10159   if (!result)
10160     return;
10161 
10162   PATTERN (tbegin_insn) = gen_rtx_PARALLEL (VOIDmode,
10163 			    gen_rtvec (2,
10164 				       XVECEXP (PATTERN (tbegin_insn), 0, 0),
10165 				       XVECEXP (PATTERN (tbegin_insn), 0, 1)));
10166   INSN_CODE (tbegin_insn) = -1;
10167   df_insn_rescan (tbegin_insn);
10168 
10169   return;
10170 }
10171 
10172 /* Implement TARGET_HARD_REGNO_NREGS.  Because all registers in a class
10173    have the same size, this is equivalent to CLASS_MAX_NREGS.  */
10174 
10175 static unsigned int
s390_hard_regno_nregs(unsigned int regno,machine_mode mode)10176 s390_hard_regno_nregs (unsigned int regno, machine_mode mode)
10177 {
10178   return s390_class_max_nregs (REGNO_REG_CLASS (regno), mode);
10179 }
10180 
10181 /* Implement TARGET_HARD_REGNO_MODE_OK.
10182 
10183    Integer modes <= word size fit into any GPR.
10184    Integer modes > word size fit into successive GPRs, starting with
10185    an even-numbered register.
10186    SImode and DImode fit into FPRs as well.
10187 
10188    Floating point modes <= word size fit into any FPR or GPR.
10189    Floating point modes > word size (i.e. DFmode on 32-bit) fit
10190    into any FPR, or an even-odd GPR pair.
10191    TFmode fits only into an even-odd FPR pair.
10192 
10193    Complex floating point modes fit either into two FPRs, or into
10194    successive GPRs (again starting with an even number).
10195    TCmode fits only into two successive even-odd FPR pairs.
10196 
10197    Condition code modes fit only into the CC register.  */
10198 
10199 static bool
s390_hard_regno_mode_ok(unsigned int regno,machine_mode mode)10200 s390_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
10201 {
10202   if (!TARGET_VX && VECTOR_NOFP_REGNO_P (regno))
10203     return false;
10204 
10205   switch (REGNO_REG_CLASS (regno))
10206     {
10207     case VEC_REGS:
10208       return ((GET_MODE_CLASS (mode) == MODE_INT
10209 	       && s390_class_max_nregs (VEC_REGS, mode) == 1)
10210 	      || mode == DFmode
10211 	      || (TARGET_VXE && mode == SFmode)
10212 	      || s390_vector_mode_supported_p (mode));
10213       break;
10214     case FP_REGS:
10215       if (TARGET_VX
10216 	  && ((GET_MODE_CLASS (mode) == MODE_INT
10217 	       && s390_class_max_nregs (FP_REGS, mode) == 1)
10218 	      || mode == DFmode
10219 	      || s390_vector_mode_supported_p (mode)))
10220 	return true;
10221 
10222       if (REGNO_PAIR_OK (regno, mode))
10223 	{
10224 	  if (mode == SImode || mode == DImode)
10225 	    return true;
10226 
10227 	  if (FLOAT_MODE_P (mode) && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
10228 	    return true;
10229 	}
10230       break;
10231     case ADDR_REGS:
10232       if (FRAME_REGNO_P (regno) && mode == Pmode)
10233 	return true;
10234 
10235       /* fallthrough */
10236     case GENERAL_REGS:
10237       if (REGNO_PAIR_OK (regno, mode))
10238 	{
10239 	  if (TARGET_ZARCH
10240 	      || (mode != TFmode && mode != TCmode && mode != TDmode))
10241 	    return true;
10242 	}
10243       break;
10244     case CC_REGS:
10245       if (GET_MODE_CLASS (mode) == MODE_CC)
10246 	return true;
10247       break;
10248     case ACCESS_REGS:
10249       if (REGNO_PAIR_OK (regno, mode))
10250 	{
10251 	  if (mode == SImode || mode == Pmode)
10252 	    return true;
10253 	}
10254       break;
10255     default:
10256       return false;
10257     }
10258 
10259   return false;
10260 }
10261 
10262 /* Implement TARGET_MODES_TIEABLE_P.  */
10263 
10264 static bool
s390_modes_tieable_p(machine_mode mode1,machine_mode mode2)10265 s390_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10266 {
10267   return ((mode1 == SFmode || mode1 == DFmode)
10268 	  == (mode2 == SFmode || mode2 == DFmode));
10269 }
10270 
10271 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG.  */
10272 
10273 bool
s390_hard_regno_rename_ok(unsigned int old_reg,unsigned int new_reg)10274 s390_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg)
10275 {
10276    /* Once we've decided upon a register to use as base register, it must
10277       no longer be used for any other purpose.  */
10278   if (cfun->machine->base_reg)
10279     if (REGNO (cfun->machine->base_reg) == old_reg
10280 	|| REGNO (cfun->machine->base_reg) == new_reg)
10281       return false;
10282 
10283   /* Prevent regrename from using call-saved regs which haven't
10284      actually been saved.  This is necessary since regrename assumes
10285      the backend save/restore decisions are based on
10286      df_regs_ever_live.  Since we have our own routine we have to tell
10287      regrename manually about it.  */
10288   if (GENERAL_REGNO_P (new_reg)
10289       && !call_used_regs[new_reg]
10290       && cfun_gpr_save_slot (new_reg) == SAVE_SLOT_NONE)
10291     return false;
10292 
10293   return true;
10294 }
10295 
10296 /* Return nonzero if register REGNO can be used as a scratch register
10297    in peephole2.  */
10298 
10299 static bool
s390_hard_regno_scratch_ok(unsigned int regno)10300 s390_hard_regno_scratch_ok (unsigned int regno)
10301 {
10302   /* See s390_hard_regno_rename_ok.  */
10303   if (GENERAL_REGNO_P (regno)
10304       && !call_used_regs[regno]
10305       && cfun_gpr_save_slot (regno) == SAVE_SLOT_NONE)
10306     return false;
10307 
10308   return true;
10309 }
10310 
10311 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED.  When generating
10312    code that runs in z/Architecture mode, but conforms to the 31-bit
10313    ABI, GPRs can hold 8 bytes; the ABI guarantees only that the lower 4
10314    bytes are saved across calls, however.  */
10315 
10316 static bool
s390_hard_regno_call_part_clobbered(unsigned int,unsigned int regno,machine_mode mode)10317 s390_hard_regno_call_part_clobbered (unsigned int, unsigned int regno,
10318 				     machine_mode mode)
10319 {
10320   if (!TARGET_64BIT
10321       && TARGET_ZARCH
10322       && GET_MODE_SIZE (mode) > 4
10323       && ((regno >= 6 && regno <= 15) || regno == 32))
10324     return true;
10325 
10326   if (TARGET_VX
10327       && GET_MODE_SIZE (mode) > 8
10328       && (((TARGET_64BIT && regno >= 24 && regno <= 31))
10329 	  || (!TARGET_64BIT && (regno == 18 || regno == 19))))
10330     return true;
10331 
10332   return false;
10333 }
10334 
10335 /* Maximum number of registers to represent a value of mode MODE
10336    in a register of class RCLASS.  */
10337 
10338 int
s390_class_max_nregs(enum reg_class rclass,machine_mode mode)10339 s390_class_max_nregs (enum reg_class rclass, machine_mode mode)
10340 {
10341   int reg_size;
10342   bool reg_pair_required_p = false;
10343 
10344   switch (rclass)
10345     {
10346     case FP_REGS:
10347     case VEC_REGS:
10348       reg_size = TARGET_VX ? 16 : 8;
10349 
10350       /* TF and TD modes would fit into a VR but we put them into a
10351 	 register pair since we do not have 128bit FP instructions on
10352 	 full VRs.  */
10353       if (TARGET_VX
10354 	  && SCALAR_FLOAT_MODE_P (mode)
10355 	  && GET_MODE_SIZE (mode) >= 16)
10356 	reg_pair_required_p = true;
10357 
10358       /* Even if complex types would fit into a single FPR/VR we force
10359 	 them into a register pair to deal with the parts more easily.
10360 	 (FIXME: What about complex ints?)  */
10361       if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
10362 	reg_pair_required_p = true;
10363       break;
10364     case ACCESS_REGS:
10365       reg_size = 4;
10366       break;
10367     default:
10368       reg_size = UNITS_PER_WORD;
10369       break;
10370     }
10371 
10372   if (reg_pair_required_p)
10373     return 2 * ((GET_MODE_SIZE (mode) / 2 + reg_size - 1) / reg_size);
10374 
10375   return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
10376 }
10377 
10378 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
10379 
10380 static bool
s390_can_change_mode_class(machine_mode from_mode,machine_mode to_mode,reg_class_t rclass)10381 s390_can_change_mode_class (machine_mode from_mode,
10382 			    machine_mode to_mode,
10383 			    reg_class_t rclass)
10384 {
10385   machine_mode small_mode;
10386   machine_mode big_mode;
10387 
10388   /* V1TF and TF have different representations in vector
10389      registers.  */
10390   if (reg_classes_intersect_p (VEC_REGS, rclass)
10391       && ((from_mode == V1TFmode && to_mode == TFmode)
10392 	  || (from_mode == TFmode && to_mode == V1TFmode)))
10393     return false;
10394 
10395   if (GET_MODE_SIZE (from_mode) == GET_MODE_SIZE (to_mode))
10396     return true;
10397 
10398   if (GET_MODE_SIZE (from_mode) < GET_MODE_SIZE (to_mode))
10399     {
10400       small_mode = from_mode;
10401       big_mode = to_mode;
10402     }
10403   else
10404     {
10405       small_mode = to_mode;
10406       big_mode = from_mode;
10407     }
10408 
10409   /* Values residing in VRs are little-endian style.  All modes are
10410      placed left-aligned in an VR.  This means that we cannot allow
10411      switching between modes with differing sizes.  Also if the vector
10412      facility is available we still place TFmode values in VR register
10413      pairs, since the only instructions we have operating on TFmodes
10414      only deal with register pairs.  Therefore we have to allow DFmode
10415      subregs of TFmodes to enable the TFmode splitters.  */
10416   if (reg_classes_intersect_p (VEC_REGS, rclass)
10417       && (GET_MODE_SIZE (small_mode) < 8
10418 	  || s390_class_max_nregs (VEC_REGS, big_mode) == 1))
10419     return false;
10420 
10421   /* Likewise for access registers, since they have only half the
10422      word size on 64-bit.  */
10423   if (reg_classes_intersect_p (ACCESS_REGS, rclass))
10424     return false;
10425 
10426   return true;
10427 }
10428 
10429 /* Return true if we use LRA instead of reload pass.  */
10430 static bool
s390_lra_p(void)10431 s390_lra_p (void)
10432 {
10433   return s390_lra_flag;
10434 }
10435 
10436 /* Return true if register FROM can be eliminated via register TO.  */
10437 
10438 static bool
s390_can_eliminate(const int from,const int to)10439 s390_can_eliminate (const int from, const int to)
10440 {
10441   /* We have not marked the base register as fixed.
10442      Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
10443      If a function requires the base register, we say here that this
10444      elimination cannot be performed.  This will cause reload to free
10445      up the base register (as if it were fixed).  On the other hand,
10446      if the current function does *not* require the base register, we
10447      say here the elimination succeeds, which in turn allows reload
10448      to allocate the base register for any other purpose.  */
10449   if (from == BASE_REGNUM && to == BASE_REGNUM)
10450     {
10451       s390_init_frame_layout ();
10452       return cfun->machine->base_reg == NULL_RTX;
10453     }
10454 
10455   /* Everything else must point into the stack frame.  */
10456   gcc_assert (to == STACK_POINTER_REGNUM
10457 	      || to == HARD_FRAME_POINTER_REGNUM);
10458 
10459   gcc_assert (from == FRAME_POINTER_REGNUM
10460 	      || from == ARG_POINTER_REGNUM
10461 	      || from == RETURN_ADDRESS_POINTER_REGNUM);
10462 
10463   /* Make sure we actually saved the return address.  */
10464   if (from == RETURN_ADDRESS_POINTER_REGNUM)
10465     if (!crtl->calls_eh_return
10466 	&& !cfun->stdarg
10467 	&& !cfun_frame_layout.save_return_addr_p)
10468       return false;
10469 
10470   return true;
10471 }
10472 
10473 /* Return offset between register FROM and TO initially after prolog.  */
10474 
10475 HOST_WIDE_INT
s390_initial_elimination_offset(int from,int to)10476 s390_initial_elimination_offset (int from, int to)
10477 {
10478   HOST_WIDE_INT offset;
10479 
10480   /* ??? Why are we called for non-eliminable pairs?  */
10481   if (!s390_can_eliminate (from, to))
10482     return 0;
10483 
10484   switch (from)
10485     {
10486     case FRAME_POINTER_REGNUM:
10487       offset = (get_frame_size()
10488 		+ STACK_POINTER_OFFSET
10489 		+ crtl->outgoing_args_size);
10490       break;
10491 
10492     case ARG_POINTER_REGNUM:
10493       s390_init_frame_layout ();
10494       offset = cfun_frame_layout.frame_size + STACK_POINTER_OFFSET;
10495       break;
10496 
10497     case RETURN_ADDRESS_POINTER_REGNUM:
10498       s390_init_frame_layout ();
10499 
10500       if (cfun_frame_layout.first_save_gpr_slot == -1)
10501 	{
10502 	  /* If it turns out that for stdarg nothing went into the reg
10503 	     save area we also do not need the return address
10504 	     pointer.  */
10505 	  if (cfun->stdarg && !cfun_save_arg_fprs_p)
10506 	    return 0;
10507 
10508 	  gcc_unreachable ();
10509 	}
10510 
10511       /* In order to make the following work it is not necessary for
10512 	 r14 to have a save slot.  It is sufficient if one other GPR
10513 	 got one.  Since the GPRs are always stored without gaps we
10514 	 are able to calculate where the r14 save slot would
10515 	 reside.  */
10516       offset = (cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset +
10517 		(RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot) *
10518 		UNITS_PER_LONG);
10519       break;
10520 
10521     case BASE_REGNUM:
10522       offset = 0;
10523       break;
10524 
10525     default:
10526       gcc_unreachable ();
10527     }
10528 
10529   return offset;
10530 }
10531 
10532 /* Emit insn to save fpr REGNUM at offset OFFSET relative
10533    to register BASE.  Return generated insn.  */
10534 
10535 static rtx
save_fpr(rtx base,int offset,int regnum)10536 save_fpr (rtx base, int offset, int regnum)
10537 {
10538   rtx addr;
10539   addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10540 
10541   if (regnum >= 16 && regnum <= (16 + FP_ARG_NUM_REG))
10542     set_mem_alias_set (addr, get_varargs_alias_set ());
10543   else
10544     set_mem_alias_set (addr, get_frame_alias_set ());
10545 
10546   return emit_move_insn (addr, gen_rtx_REG (DFmode, regnum));
10547 }
10548 
10549 /* Emit insn to restore fpr REGNUM from offset OFFSET relative
10550    to register BASE.  Return generated insn.  */
10551 
10552 static rtx
restore_fpr(rtx base,int offset,int regnum)10553 restore_fpr (rtx base, int offset, int regnum)
10554 {
10555   rtx addr;
10556   addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10557   set_mem_alias_set (addr, get_frame_alias_set ());
10558 
10559   return emit_move_insn (gen_rtx_REG (DFmode, regnum), addr);
10560 }
10561 
10562 /* Generate insn to save registers FIRST to LAST into
10563    the register save area located at offset OFFSET
10564    relative to register BASE.  */
10565 
10566 static rtx
save_gprs(rtx base,int offset,int first,int last)10567 save_gprs (rtx base, int offset, int first, int last)
10568 {
10569   rtx addr, insn, note;
10570   int i;
10571 
10572   addr = plus_constant (Pmode, base, offset);
10573   addr = gen_rtx_MEM (Pmode, addr);
10574 
10575   set_mem_alias_set (addr, get_frame_alias_set ());
10576 
10577   /* Special-case single register.  */
10578   if (first == last)
10579     {
10580       if (TARGET_64BIT)
10581 	insn = gen_movdi (addr, gen_rtx_REG (Pmode, first));
10582       else
10583 	insn = gen_movsi (addr, gen_rtx_REG (Pmode, first));
10584 
10585       if (!global_not_special_regno_p (first))
10586 	RTX_FRAME_RELATED_P (insn) = 1;
10587       return insn;
10588     }
10589 
10590 
10591   insn = gen_store_multiple (addr,
10592 			     gen_rtx_REG (Pmode, first),
10593 			     GEN_INT (last - first + 1));
10594 
10595   if (first <= 6 && cfun->stdarg)
10596     for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
10597       {
10598 	rtx mem = XEXP (XVECEXP (PATTERN (insn), 0, i), 0);
10599 
10600 	if (first + i <= 6)
10601 	  set_mem_alias_set (mem, get_varargs_alias_set ());
10602       }
10603 
10604   /* We need to set the FRAME_RELATED flag on all SETs
10605      inside the store-multiple pattern.
10606 
10607      However, we must not emit DWARF records for registers 2..5
10608      if they are stored for use by variable arguments ...
10609 
10610      ??? Unfortunately, it is not enough to simply not the
10611      FRAME_RELATED flags for those SETs, because the first SET
10612      of the PARALLEL is always treated as if it had the flag
10613      set, even if it does not.  Therefore we emit a new pattern
10614      without those registers as REG_FRAME_RELATED_EXPR note.  */
10615 
10616   if (first >= 6 && !global_not_special_regno_p (first))
10617     {
10618       rtx pat = PATTERN (insn);
10619 
10620       for (i = 0; i < XVECLEN (pat, 0); i++)
10621 	if (GET_CODE (XVECEXP (pat, 0, i)) == SET
10622 	    && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat,
10623 								     0, i)))))
10624 	  RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1;
10625 
10626       RTX_FRAME_RELATED_P (insn) = 1;
10627     }
10628   else if (last >= 6)
10629     {
10630       int start;
10631 
10632       for (start = first >= 6 ? first : 6; start <= last; start++)
10633 	if (!global_not_special_regno_p (start))
10634 	  break;
10635 
10636       if (start > last)
10637 	return insn;
10638 
10639       addr = plus_constant (Pmode, base,
10640 			    offset + (start - first) * UNITS_PER_LONG);
10641 
10642       if (start == last)
10643 	{
10644 	  if (TARGET_64BIT)
10645 	    note = gen_movdi (gen_rtx_MEM (Pmode, addr),
10646 			      gen_rtx_REG (Pmode, start));
10647 	  else
10648 	    note = gen_movsi (gen_rtx_MEM (Pmode, addr),
10649 			      gen_rtx_REG (Pmode, start));
10650 	  note = PATTERN (note);
10651 
10652 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10653 	  RTX_FRAME_RELATED_P (insn) = 1;
10654 
10655 	  return insn;
10656 	}
10657 
10658       note = gen_store_multiple (gen_rtx_MEM (Pmode, addr),
10659 				 gen_rtx_REG (Pmode, start),
10660 				 GEN_INT (last - start + 1));
10661       note = PATTERN (note);
10662 
10663       add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10664 
10665       for (i = 0; i < XVECLEN (note, 0); i++)
10666 	if (GET_CODE (XVECEXP (note, 0, i)) == SET
10667 	    && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note,
10668 								     0, i)))))
10669 	  RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1;
10670 
10671       RTX_FRAME_RELATED_P (insn) = 1;
10672     }
10673 
10674   return insn;
10675 }
10676 
10677 /* Generate insn to restore registers FIRST to LAST from
10678    the register save area located at offset OFFSET
10679    relative to register BASE.  */
10680 
10681 static rtx
restore_gprs(rtx base,int offset,int first,int last)10682 restore_gprs (rtx base, int offset, int first, int last)
10683 {
10684   rtx addr, insn;
10685 
10686   addr = plus_constant (Pmode, base, offset);
10687   addr = gen_rtx_MEM (Pmode, addr);
10688   set_mem_alias_set (addr, get_frame_alias_set ());
10689 
10690   /* Special-case single register.  */
10691   if (first == last)
10692     {
10693       if (TARGET_64BIT)
10694 	insn = gen_movdi (gen_rtx_REG (Pmode, first), addr);
10695       else
10696 	insn = gen_movsi (gen_rtx_REG (Pmode, first), addr);
10697 
10698       RTX_FRAME_RELATED_P (insn) = 1;
10699       return insn;
10700     }
10701 
10702   insn = gen_load_multiple (gen_rtx_REG (Pmode, first),
10703 			    addr,
10704 			    GEN_INT (last - first + 1));
10705   RTX_FRAME_RELATED_P (insn) = 1;
10706   return insn;
10707 }
10708 
10709 /* Return insn sequence to load the GOT register.  */
10710 
10711 rtx_insn *
s390_load_got(void)10712 s390_load_got (void)
10713 {
10714   rtx_insn *insns;
10715 
10716   /* We cannot use pic_offset_table_rtx here since we use this
10717      function also for non-pic if __tls_get_offset is called and in
10718      that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
10719      aren't usable.  */
10720   rtx got_rtx = gen_rtx_REG (Pmode, 12);
10721 
10722   start_sequence ();
10723 
10724   emit_move_insn (got_rtx, s390_got_symbol ());
10725 
10726   insns = get_insns ();
10727   end_sequence ();
10728   return insns;
10729 }
10730 
10731 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
10732    and the change to the stack pointer.  */
10733 
10734 static void
s390_emit_stack_tie(void)10735 s390_emit_stack_tie (void)
10736 {
10737   rtx mem = gen_frame_mem (BLKmode,
10738 			   gen_rtx_REG (Pmode, STACK_POINTER_REGNUM));
10739 
10740   emit_insn (gen_stack_tie (mem));
10741 }
10742 
10743 /* Copy GPRS into FPR save slots.  */
10744 
10745 static void
s390_save_gprs_to_fprs(void)10746 s390_save_gprs_to_fprs (void)
10747 {
10748   int i;
10749 
10750   if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10751     return;
10752 
10753   for (i = 6; i < 16; i++)
10754     {
10755       if (FP_REGNO_P (cfun_gpr_save_slot (i)))
10756 	{
10757 	  rtx_insn *insn =
10758 	    emit_move_insn (gen_rtx_REG (DImode, cfun_gpr_save_slot (i)),
10759 			    gen_rtx_REG (DImode, i));
10760 	  RTX_FRAME_RELATED_P (insn) = 1;
10761 	  /* This prevents dwarf2cfi from interpreting the set.  Doing
10762 	     so it might emit def_cfa_register infos setting an FPR as
10763 	     new CFA.  */
10764 	  add_reg_note (insn, REG_CFA_REGISTER, copy_rtx (PATTERN (insn)));
10765 	}
10766     }
10767 }
10768 
10769 /* Restore GPRs from FPR save slots.  */
10770 
10771 static void
s390_restore_gprs_from_fprs(void)10772 s390_restore_gprs_from_fprs (void)
10773 {
10774   int i;
10775 
10776   if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10777     return;
10778 
10779   /* Restore the GPRs starting with the stack pointer.  That way the
10780      stack pointer already has its original value when it comes to
10781      restoring the hard frame pointer.  So we can set the cfa reg back
10782      to the stack pointer.  */
10783   for (i = STACK_POINTER_REGNUM; i >= 6; i--)
10784     {
10785       rtx_insn *insn;
10786 
10787       if (!FP_REGNO_P (cfun_gpr_save_slot (i)))
10788 	continue;
10789 
10790       rtx fpr = gen_rtx_REG (DImode, cfun_gpr_save_slot (i));
10791 
10792       if (i == STACK_POINTER_REGNUM)
10793 	insn = emit_insn (gen_stack_restore_from_fpr (fpr));
10794       else
10795 	insn = emit_move_insn (gen_rtx_REG (DImode, i), fpr);
10796 
10797       df_set_regs_ever_live (i, true);
10798       add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, i));
10799 
10800       /* If either the stack pointer or the frame pointer get restored
10801 	 set the CFA value to its value at function start.  Doing this
10802 	 for the frame pointer results in .cfi_def_cfa_register 15
10803 	 what is ok since if the stack pointer got modified it has
10804 	 been restored already.  */
10805       if (i == STACK_POINTER_REGNUM || i == HARD_FRAME_POINTER_REGNUM)
10806 	add_reg_note (insn, REG_CFA_DEF_CFA,
10807 		      plus_constant (Pmode, stack_pointer_rtx,
10808 				     STACK_POINTER_OFFSET));
10809       RTX_FRAME_RELATED_P (insn) = 1;
10810     }
10811 }
10812 
10813 
10814 /* A pass run immediately before shrink-wrapping and prologue and epilogue
10815    generation.  */
10816 
10817 namespace {
10818 
10819 const pass_data pass_data_s390_early_mach =
10820 {
10821   RTL_PASS, /* type */
10822   "early_mach", /* name */
10823   OPTGROUP_NONE, /* optinfo_flags */
10824   TV_MACH_DEP, /* tv_id */
10825   0, /* properties_required */
10826   0, /* properties_provided */
10827   0, /* properties_destroyed */
10828   0, /* todo_flags_start */
10829   ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */
10830 };
10831 
10832 class pass_s390_early_mach : public rtl_opt_pass
10833 {
10834 public:
pass_s390_early_mach(gcc::context * ctxt)10835   pass_s390_early_mach (gcc::context *ctxt)
10836     : rtl_opt_pass (pass_data_s390_early_mach, ctxt)
10837   {}
10838 
10839   /* opt_pass methods: */
10840   virtual unsigned int execute (function *);
10841 
10842 }; // class pass_s390_early_mach
10843 
10844 unsigned int
execute(function * fun)10845 pass_s390_early_mach::execute (function *fun)
10846 {
10847   rtx_insn *insn;
10848 
10849   /* Try to get rid of the FPR clobbers.  */
10850   s390_optimize_nonescaping_tx ();
10851 
10852   /* Re-compute register info.  */
10853   s390_register_info ();
10854 
10855   /* If we're using a base register, ensure that it is always valid for
10856      the first non-prologue instruction.  */
10857   if (fun->machine->base_reg)
10858     emit_insn_at_entry (gen_main_pool (fun->machine->base_reg));
10859 
10860   /* Annotate all constant pool references to let the scheduler know
10861      they implicitly use the base register.  */
10862   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
10863     if (INSN_P (insn))
10864       {
10865 	annotate_constant_pool_refs (insn);
10866 	df_insn_rescan (insn);
10867       }
10868   return 0;
10869 }
10870 
10871 } // anon namespace
10872 
10873 rtl_opt_pass *
make_pass_s390_early_mach(gcc::context * ctxt)10874 make_pass_s390_early_mach (gcc::context *ctxt)
10875 {
10876   return new pass_s390_early_mach (ctxt);
10877 }
10878 
10879 /* Calculate TARGET = REG + OFFSET as s390_emit_prologue would do it.
10880    - push too big immediates to the literal pool and annotate the refs
10881    - emit frame related notes for stack pointer changes.  */
10882 
10883 static rtx
s390_prologue_plus_offset(rtx target,rtx reg,rtx offset,bool frame_related_p)10884 s390_prologue_plus_offset (rtx target, rtx reg, rtx offset, bool frame_related_p)
10885 {
10886   rtx_insn *insn;
10887   rtx orig_offset = offset;
10888 
10889   gcc_assert (REG_P (target));
10890   gcc_assert (REG_P (reg));
10891   gcc_assert (CONST_INT_P (offset));
10892 
10893   if (offset == const0_rtx)                               /* lr/lgr */
10894     {
10895       insn = emit_move_insn (target, reg);
10896     }
10897   else if (DISP_IN_RANGE (INTVAL (offset)))               /* la */
10898     {
10899       insn = emit_move_insn (target, gen_rtx_PLUS (Pmode, reg,
10900 						   offset));
10901     }
10902   else
10903     {
10904       if (!satisfies_constraint_K (offset)                /* ahi/aghi */
10905 	  && (!TARGET_EXTIMM
10906 	      || (!satisfies_constraint_Op (offset)       /* alfi/algfi */
10907 		  && !satisfies_constraint_On (offset)))) /* slfi/slgfi */
10908 	offset = force_const_mem (Pmode, offset);
10909 
10910       if (target != reg)
10911 	{
10912 	  insn = emit_move_insn (target, reg);
10913 	  RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0;
10914 	}
10915 
10916       insn = emit_insn (gen_add2_insn (target, offset));
10917 
10918       if (!CONST_INT_P (offset))
10919 	{
10920 	  annotate_constant_pool_refs (insn);
10921 
10922 	  if (frame_related_p)
10923 	    add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10924 			  gen_rtx_SET (target,
10925 				       gen_rtx_PLUS (Pmode, target,
10926 						     orig_offset)));
10927 	}
10928     }
10929 
10930   RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0;
10931 
10932   /* If this is a stack adjustment and we are generating a stack clash
10933      prologue, then add a REG_STACK_CHECK note to signal that this insn
10934      should be left alone.  */
10935   if (flag_stack_clash_protection && target == stack_pointer_rtx)
10936     add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
10937 
10938   return insn;
10939 }
10940 
10941 /* Emit a compare instruction with a volatile memory access as stack
10942    probe.  It does not waste store tags and does not clobber any
10943    registers apart from the condition code.  */
10944 static void
s390_emit_stack_probe(rtx addr)10945 s390_emit_stack_probe (rtx addr)
10946 {
10947   rtx tmp = gen_rtx_MEM (Pmode, addr);
10948   MEM_VOLATILE_P (tmp) = 1;
10949   s390_emit_compare (EQ, gen_rtx_REG (Pmode, 0), tmp);
10950   emit_insn (gen_blockage ());
10951 }
10952 
10953 /* Use a runtime loop if we have to emit more probes than this.  */
10954 #define MIN_UNROLL_PROBES 3
10955 
10956 /* Allocate SIZE bytes of stack space, using TEMP_REG as a temporary
10957    if necessary.  LAST_PROBE_OFFSET contains the offset of the closest
10958    probe relative to the stack pointer.
10959 
10960    Note that SIZE is negative.
10961 
10962    The return value is true if TEMP_REG has been clobbered.  */
10963 static bool
allocate_stack_space(rtx size,HOST_WIDE_INT last_probe_offset,rtx temp_reg)10964 allocate_stack_space (rtx size, HOST_WIDE_INT last_probe_offset,
10965 		      rtx temp_reg)
10966 {
10967   bool temp_reg_clobbered_p = false;
10968   HOST_WIDE_INT probe_interval
10969     = 1 << param_stack_clash_protection_probe_interval;
10970   HOST_WIDE_INT guard_size
10971     = 1 << param_stack_clash_protection_guard_size;
10972 
10973   if (flag_stack_clash_protection)
10974     {
10975       if (last_probe_offset + -INTVAL (size) < guard_size)
10976 	dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
10977       else
10978 	{
10979 	  rtx offset = GEN_INT (probe_interval - UNITS_PER_LONG);
10980 	  HOST_WIDE_INT rounded_size = -INTVAL (size) & -probe_interval;
10981 	  HOST_WIDE_INT num_probes = rounded_size / probe_interval;
10982 	  HOST_WIDE_INT residual = -INTVAL (size) - rounded_size;
10983 
10984 	  if (num_probes < MIN_UNROLL_PROBES)
10985 	    {
10986 	      /* Emit unrolled probe statements.  */
10987 
10988 	      for (unsigned int i = 0; i < num_probes; i++)
10989 		{
10990 		  s390_prologue_plus_offset (stack_pointer_rtx,
10991 					     stack_pointer_rtx,
10992 					     GEN_INT (-probe_interval), true);
10993 		  s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
10994 						       stack_pointer_rtx,
10995 						       offset));
10996 		}
10997 	      dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
10998 	    }
10999 	  else
11000 	    {
11001 	      /* Emit a loop probing the pages.  */
11002 
11003 	      rtx_code_label *loop_start_label = gen_label_rtx ();
11004 
11005 	      /* From now on temp_reg will be the CFA register.  */
11006 	      s390_prologue_plus_offset (temp_reg, stack_pointer_rtx,
11007 					 GEN_INT (-rounded_size), true);
11008 	      emit_label (loop_start_label);
11009 
11010 	      s390_prologue_plus_offset (stack_pointer_rtx,
11011 					 stack_pointer_rtx,
11012 					 GEN_INT (-probe_interval), false);
11013 	      s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
11014 						   stack_pointer_rtx,
11015 						   offset));
11016 	      emit_cmp_and_jump_insns (stack_pointer_rtx, temp_reg,
11017 				       GT, NULL_RTX,
11018 				       Pmode, 1, loop_start_label);
11019 
11020 	      /* Without this make_edges ICEes.  */
11021 	      JUMP_LABEL (get_last_insn ()) = loop_start_label;
11022 	      LABEL_NUSES (loop_start_label) = 1;
11023 
11024 	      /* That's going to be a NOP since stack pointer and
11025 		 temp_reg are supposed to be the same here.  We just
11026 		 emit it to set the CFA reg back to r15.  */
11027 	      s390_prologue_plus_offset (stack_pointer_rtx, temp_reg,
11028 					 const0_rtx, true);
11029 	      temp_reg_clobbered_p = true;
11030 	      dump_stack_clash_frame_info (PROBE_LOOP, residual != 0);
11031 	    }
11032 
11033 	  /* Handle any residual allocation request.  */
11034 	  s390_prologue_plus_offset (stack_pointer_rtx,
11035 				     stack_pointer_rtx,
11036 				     GEN_INT (-residual), true);
11037 	  last_probe_offset += residual;
11038 	  if (last_probe_offset >= probe_interval)
11039 	    s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
11040 						 stack_pointer_rtx,
11041 						 GEN_INT (residual
11042 							  - UNITS_PER_LONG)));
11043 
11044 	  return temp_reg_clobbered_p;
11045 	}
11046     }
11047 
11048   /* Subtract frame size from stack pointer.  */
11049   s390_prologue_plus_offset (stack_pointer_rtx,
11050 			     stack_pointer_rtx,
11051 			     size, true);
11052 
11053   return temp_reg_clobbered_p;
11054 }
11055 
11056 /* Expand the prologue into a bunch of separate insns.  */
11057 
11058 void
s390_emit_prologue(void)11059 s390_emit_prologue (void)
11060 {
11061   rtx insn, addr;
11062   rtx temp_reg;
11063   int i;
11064   int offset;
11065   int next_fpr = 0;
11066 
11067   /* Choose best register to use for temp use within prologue.
11068      TPF with profiling must avoid the register 14 - the tracing function
11069      needs the original contents of r14 to be preserved.  */
11070 
11071   if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM)
11072       && !crtl->is_leaf
11073       && !TARGET_TPF_PROFILING)
11074     temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
11075   else if (flag_split_stack && cfun->stdarg)
11076     temp_reg = gen_rtx_REG (Pmode, 12);
11077   else
11078     temp_reg = gen_rtx_REG (Pmode, 1);
11079 
11080   /* When probing for stack-clash mitigation, we have to track the distance
11081      between the stack pointer and closest known reference.
11082 
11083      Most of the time we have to make a worst case assumption.  The
11084      only exception is when TARGET_BACKCHAIN is active, in which case
11085      we know *sp (offset 0) was written.  */
11086   HOST_WIDE_INT probe_interval
11087     = 1 << param_stack_clash_protection_probe_interval;
11088   HOST_WIDE_INT last_probe_offset
11089     = (TARGET_BACKCHAIN
11090        ? (TARGET_PACKED_STACK ? STACK_POINTER_OFFSET - UNITS_PER_LONG : 0)
11091        : probe_interval - (STACK_BOUNDARY / UNITS_PER_WORD));
11092 
11093   s390_save_gprs_to_fprs ();
11094 
11095   /* Save call saved gprs.  */
11096   if (cfun_frame_layout.first_save_gpr != -1)
11097     {
11098       insn = save_gprs (stack_pointer_rtx,
11099 			cfun_frame_layout.gprs_offset +
11100 			UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
11101 					  - cfun_frame_layout.first_save_gpr_slot),
11102 			cfun_frame_layout.first_save_gpr,
11103 			cfun_frame_layout.last_save_gpr);
11104 
11105       /* This is not 100% correct.  If we have more than one register saved,
11106 	 then LAST_PROBE_OFFSET can move even closer to sp.  */
11107       last_probe_offset
11108 	= (cfun_frame_layout.gprs_offset +
11109 	   UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
11110 			     - cfun_frame_layout.first_save_gpr_slot));
11111 
11112       emit_insn (insn);
11113     }
11114 
11115   /* Dummy insn to mark literal pool slot.  */
11116 
11117   if (cfun->machine->base_reg)
11118     emit_insn (gen_main_pool (cfun->machine->base_reg));
11119 
11120   offset = cfun_frame_layout.f0_offset;
11121 
11122   /* Save f0 and f2.  */
11123   for (i = FPR0_REGNUM; i <= FPR0_REGNUM + 1; i++)
11124     {
11125       if (cfun_fpr_save_p (i))
11126 	{
11127 	  save_fpr (stack_pointer_rtx, offset, i);
11128 	  if (offset < last_probe_offset)
11129 	    last_probe_offset = offset;
11130 	  offset += 8;
11131 	}
11132       else if (!TARGET_PACKED_STACK || cfun->stdarg)
11133 	offset += 8;
11134     }
11135 
11136   /* Save f4 and f6.  */
11137   offset = cfun_frame_layout.f4_offset;
11138   for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
11139     {
11140       if (cfun_fpr_save_p (i))
11141 	{
11142 	  insn = save_fpr (stack_pointer_rtx, offset, i);
11143 	  if (offset < last_probe_offset)
11144 	    last_probe_offset = offset;
11145 	  offset += 8;
11146 
11147 	  /* If f4 and f6 are call clobbered they are saved due to
11148 	     stdargs and therefore are not frame related.  */
11149 	  if (!call_used_regs[i])
11150 	    RTX_FRAME_RELATED_P (insn) = 1;
11151 	}
11152       else if (!TARGET_PACKED_STACK || call_used_regs[i])
11153 	offset += 8;
11154     }
11155 
11156   if (TARGET_PACKED_STACK
11157       && cfun_save_high_fprs_p
11158       && cfun_frame_layout.f8_offset + cfun_frame_layout.high_fprs * 8 > 0)
11159     {
11160       offset = (cfun_frame_layout.f8_offset
11161 		+ (cfun_frame_layout.high_fprs - 1) * 8);
11162 
11163       for (i = FPR15_REGNUM; i >= FPR8_REGNUM && offset >= 0; i--)
11164 	if (cfun_fpr_save_p (i))
11165 	  {
11166 	    insn = save_fpr (stack_pointer_rtx, offset, i);
11167 	    if (offset < last_probe_offset)
11168 	      last_probe_offset = offset;
11169 
11170 	    RTX_FRAME_RELATED_P (insn) = 1;
11171 	    offset -= 8;
11172 	  }
11173       if (offset >= cfun_frame_layout.f8_offset)
11174 	next_fpr = i;
11175     }
11176 
11177   if (!TARGET_PACKED_STACK)
11178     next_fpr = cfun_save_high_fprs_p ? FPR15_REGNUM : 0;
11179 
11180   if (flag_stack_usage_info)
11181     current_function_static_stack_size = cfun_frame_layout.frame_size;
11182 
11183   /* Decrement stack pointer.  */
11184 
11185   if (cfun_frame_layout.frame_size > 0)
11186     {
11187       rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
11188       rtx_insn *stack_pointer_backup_loc;
11189       bool temp_reg_clobbered_p;
11190 
11191       if (s390_stack_size)
11192 	{
11193 	  HOST_WIDE_INT stack_guard;
11194 
11195 	  if (s390_stack_guard)
11196 	    stack_guard = s390_stack_guard;
11197 	  else
11198 	    {
11199 	      /* If no value for stack guard is provided the smallest power of 2
11200 		 larger than the current frame size is chosen.  */
11201 	      stack_guard = 1;
11202 	      while (stack_guard < cfun_frame_layout.frame_size)
11203 		stack_guard <<= 1;
11204 	    }
11205 
11206 	  if (cfun_frame_layout.frame_size >= s390_stack_size)
11207 	    {
11208 	      warning (0, "frame size of function %qs is %wd"
11209 		       " bytes exceeding user provided stack limit of "
11210 		       "%d bytes.  "
11211 		       "An unconditional trap is added.",
11212 		       current_function_name(), cfun_frame_layout.frame_size,
11213 		       s390_stack_size);
11214 	      emit_insn (gen_trap ());
11215 	      emit_barrier ();
11216 	    }
11217 	  else
11218 	    {
11219 	      /* stack_guard has to be smaller than s390_stack_size.
11220 		 Otherwise we would emit an AND with zero which would
11221 		 not match the test under mask pattern.  */
11222 	      if (stack_guard >= s390_stack_size)
11223 		{
11224 		  warning (0, "frame size of function %qs is %wd"
11225 			   " bytes which is more than half the stack size. "
11226 			   "The dynamic check would not be reliable. "
11227 			   "No check emitted for this function.",
11228 			   current_function_name(),
11229 			   cfun_frame_layout.frame_size);
11230 		}
11231 	      else
11232 		{
11233 		  HOST_WIDE_INT stack_check_mask = ((s390_stack_size - 1)
11234 						    & ~(stack_guard - 1));
11235 
11236 		  rtx t = gen_rtx_AND (Pmode, stack_pointer_rtx,
11237 				       GEN_INT (stack_check_mask));
11238 		  if (TARGET_64BIT)
11239 		    emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode,
11240 							 t, const0_rtx),
11241 					     t, const0_rtx, const0_rtx));
11242 		  else
11243 		    emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode,
11244 							 t, const0_rtx),
11245 					     t, const0_rtx, const0_rtx));
11246 		}
11247 	    }
11248 	}
11249 
11250       if (s390_warn_framesize > 0
11251 	  && cfun_frame_layout.frame_size >= s390_warn_framesize)
11252 	warning (0, "frame size of %qs is %wd bytes",
11253 		 current_function_name (), cfun_frame_layout.frame_size);
11254 
11255       if (s390_warn_dynamicstack_p && cfun->calls_alloca)
11256 	warning (0, "%qs uses dynamic stack allocation", current_function_name ());
11257 
11258       /* Save the location where we could backup the incoming stack
11259 	 pointer.  */
11260       stack_pointer_backup_loc = get_last_insn ();
11261 
11262       temp_reg_clobbered_p = allocate_stack_space (frame_off, last_probe_offset,
11263 						   temp_reg);
11264 
11265       if (TARGET_BACKCHAIN || next_fpr)
11266 	{
11267 	  if (temp_reg_clobbered_p)
11268 	    {
11269 	      /* allocate_stack_space had to make use of temp_reg and
11270 		 we need it to hold a backup of the incoming stack
11271 		 pointer.  Calculate back that value from the current
11272 		 stack pointer.  */
11273 	      s390_prologue_plus_offset (temp_reg, stack_pointer_rtx,
11274 					 GEN_INT (cfun_frame_layout.frame_size),
11275 					 false);
11276 	    }
11277 	  else
11278 	    {
11279 	      /* allocate_stack_space didn't actually required
11280 		 temp_reg.  Insert the stack pointer backup insn
11281 		 before the stack pointer decrement code - knowing now
11282 		 that the value will survive.  */
11283 	      emit_insn_after (gen_move_insn (temp_reg, stack_pointer_rtx),
11284 			       stack_pointer_backup_loc);
11285 	    }
11286 	}
11287 
11288       /* Set backchain.  */
11289 
11290       if (TARGET_BACKCHAIN)
11291 	{
11292 	  if (cfun_frame_layout.backchain_offset)
11293 	    addr = gen_rtx_MEM (Pmode,
11294 				plus_constant (Pmode, stack_pointer_rtx,
11295 				  cfun_frame_layout.backchain_offset));
11296 	  else
11297 	    addr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
11298 	  set_mem_alias_set (addr, get_frame_alias_set ());
11299 	  insn = emit_insn (gen_move_insn (addr, temp_reg));
11300 	}
11301 
11302       /* If we support non-call exceptions (e.g. for Java),
11303 	 we need to make sure the backchain pointer is set up
11304 	 before any possibly trapping memory access.  */
11305       if (TARGET_BACKCHAIN && cfun->can_throw_non_call_exceptions)
11306 	{
11307 	  addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
11308 	  emit_clobber (addr);
11309 	}
11310     }
11311   else if (flag_stack_clash_protection)
11312     dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
11313 
11314   /* Save fprs 8 - 15 (64 bit ABI).  */
11315 
11316   if (cfun_save_high_fprs_p && next_fpr)
11317     {
11318       /* If the stack might be accessed through a different register
11319 	 we have to make sure that the stack pointer decrement is not
11320 	 moved below the use of the stack slots.  */
11321       s390_emit_stack_tie ();
11322 
11323       insn = emit_insn (gen_add2_insn (temp_reg,
11324 				       GEN_INT (cfun_frame_layout.f8_offset)));
11325 
11326       offset = 0;
11327 
11328       for (i = FPR8_REGNUM; i <= next_fpr; i++)
11329 	if (cfun_fpr_save_p (i))
11330 	  {
11331 	    rtx addr = plus_constant (Pmode, stack_pointer_rtx,
11332 				      cfun_frame_layout.frame_size
11333 				      + cfun_frame_layout.f8_offset
11334 				      + offset);
11335 
11336 	    insn = save_fpr (temp_reg, offset, i);
11337 	    offset += 8;
11338 	    RTX_FRAME_RELATED_P (insn) = 1;
11339 	    add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11340 			  gen_rtx_SET (gen_rtx_MEM (DFmode, addr),
11341 				       gen_rtx_REG (DFmode, i)));
11342 	  }
11343     }
11344 
11345   /* Set frame pointer, if needed.  */
11346 
11347   if (frame_pointer_needed)
11348     {
11349       insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11350       RTX_FRAME_RELATED_P (insn) = 1;
11351     }
11352 
11353   /* Set up got pointer, if needed.  */
11354 
11355   if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
11356     {
11357       rtx_insn *insns = s390_load_got ();
11358 
11359       for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn))
11360 	annotate_constant_pool_refs (insn);
11361 
11362       emit_insn (insns);
11363     }
11364 
11365 #if TARGET_TPF != 0
11366   if (TARGET_TPF_PROFILING)
11367     {
11368       /* Generate a BAS instruction to serve as a function entry
11369 	 intercept to facilitate the use of tracing algorithms located
11370 	 at the branch target.  */
11371       emit_insn (gen_prologue_tpf (
11372 		   GEN_INT (s390_tpf_trace_hook_prologue_check),
11373 		   GEN_INT (s390_tpf_trace_hook_prologue_target)));
11374 
11375       /* Emit a blockage here so that all code lies between the
11376 	 profiling mechanisms.  */
11377       emit_insn (gen_blockage ());
11378     }
11379 #endif
11380 }
11381 
11382 /* Expand the epilogue into a bunch of separate insns.  */
11383 
11384 void
s390_emit_epilogue(bool sibcall)11385 s390_emit_epilogue (bool sibcall)
11386 {
11387   rtx frame_pointer, return_reg = NULL_RTX, cfa_restores = NULL_RTX;
11388   int area_bottom, area_top, offset = 0;
11389   int next_offset;
11390   int i;
11391 
11392 #if TARGET_TPF != 0
11393   if (TARGET_TPF_PROFILING)
11394     {
11395       /* Generate a BAS instruction to serve as a function entry
11396 	 intercept to facilitate the use of tracing algorithms located
11397 	 at the branch target.  */
11398 
11399       /* Emit a blockage here so that all code lies between the
11400 	 profiling mechanisms.  */
11401       emit_insn (gen_blockage ());
11402 
11403       emit_insn (gen_epilogue_tpf (
11404 		   GEN_INT (s390_tpf_trace_hook_epilogue_check),
11405 		   GEN_INT (s390_tpf_trace_hook_epilogue_target)));
11406     }
11407 #endif
11408 
11409   /* Check whether to use frame or stack pointer for restore.  */
11410 
11411   frame_pointer = (frame_pointer_needed
11412 		   ? hard_frame_pointer_rtx : stack_pointer_rtx);
11413 
11414   s390_frame_area (&area_bottom, &area_top);
11415 
11416   /* Check whether we can access the register save area.
11417      If not, increment the frame pointer as required.  */
11418 
11419   if (area_top <= area_bottom)
11420     {
11421       /* Nothing to restore.  */
11422     }
11423   else if (DISP_IN_RANGE (cfun_frame_layout.frame_size + area_bottom)
11424 	   && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1))
11425     {
11426       /* Area is in range.  */
11427       offset = cfun_frame_layout.frame_size;
11428     }
11429   else
11430     {
11431       rtx_insn *insn;
11432       rtx frame_off, cfa;
11433 
11434       offset = area_bottom < 0 ? -area_bottom : 0;
11435       frame_off = GEN_INT (cfun_frame_layout.frame_size - offset);
11436 
11437       cfa = gen_rtx_SET (frame_pointer,
11438 			 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11439       if (DISP_IN_RANGE (INTVAL (frame_off)))
11440 	{
11441 	  rtx set;
11442 
11443 	  set = gen_rtx_SET (frame_pointer,
11444 			     gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11445 	  insn = emit_insn (set);
11446 	}
11447       else
11448 	{
11449 	  if (!CONST_OK_FOR_K (INTVAL (frame_off)))
11450 	    frame_off = force_const_mem (Pmode, frame_off);
11451 
11452 	  insn = emit_insn (gen_add2_insn (frame_pointer, frame_off));
11453 	  annotate_constant_pool_refs (insn);
11454 	}
11455       add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa);
11456       RTX_FRAME_RELATED_P (insn) = 1;
11457     }
11458 
11459   /* Restore call saved fprs.  */
11460 
11461   if (TARGET_64BIT)
11462     {
11463       if (cfun_save_high_fprs_p)
11464 	{
11465 	  next_offset = cfun_frame_layout.f8_offset;
11466 	  for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
11467 	    {
11468 	      if (cfun_fpr_save_p (i))
11469 		{
11470 		  restore_fpr (frame_pointer,
11471 			       offset + next_offset, i);
11472 		  cfa_restores
11473 		    = alloc_reg_note (REG_CFA_RESTORE,
11474 				      gen_rtx_REG (DFmode, i), cfa_restores);
11475 		  next_offset += 8;
11476 		}
11477 	    }
11478 	}
11479 
11480     }
11481   else
11482     {
11483       next_offset = cfun_frame_layout.f4_offset;
11484       /* f4, f6 */
11485       for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
11486 	{
11487 	  if (cfun_fpr_save_p (i))
11488 	    {
11489 	      restore_fpr (frame_pointer,
11490 			   offset + next_offset, i);
11491 	      cfa_restores
11492 		= alloc_reg_note (REG_CFA_RESTORE,
11493 				  gen_rtx_REG (DFmode, i), cfa_restores);
11494 	      next_offset += 8;
11495 	    }
11496 	  else if (!TARGET_PACKED_STACK)
11497 	    next_offset += 8;
11498 	}
11499 
11500     }
11501 
11502   /* Restore call saved gprs.  */
11503 
11504   if (cfun_frame_layout.first_restore_gpr != -1)
11505     {
11506       rtx insn, addr;
11507       int i;
11508 
11509       /* Check for global register and save them
11510 	 to stack location from where they get restored.  */
11511 
11512       for (i = cfun_frame_layout.first_restore_gpr;
11513 	   i <= cfun_frame_layout.last_restore_gpr;
11514 	   i++)
11515 	{
11516 	  if (global_not_special_regno_p (i))
11517 	    {
11518 	      addr = plus_constant (Pmode, frame_pointer,
11519 				    offset + cfun_frame_layout.gprs_offset
11520 				    + (i - cfun_frame_layout.first_save_gpr_slot)
11521 				    * UNITS_PER_LONG);
11522 	      addr = gen_rtx_MEM (Pmode, addr);
11523 	      set_mem_alias_set (addr, get_frame_alias_set ());
11524 	      emit_move_insn (addr, gen_rtx_REG (Pmode, i));
11525 	    }
11526 	  else
11527 	    cfa_restores
11528 	      = alloc_reg_note (REG_CFA_RESTORE,
11529 				gen_rtx_REG (Pmode, i), cfa_restores);
11530 	}
11531 
11532       /* Fetch return address from stack before load multiple,
11533 	 this will do good for scheduling.
11534 
11535 	 Only do this if we already decided that r14 needs to be
11536 	 saved to a stack slot. (And not just because r14 happens to
11537 	 be in between two GPRs which need saving.)  Otherwise it
11538 	 would be difficult to take that decision back in
11539 	 s390_optimize_prologue.
11540 
11541 	 This optimization is only helpful on in-order machines.  */
11542       if (! sibcall
11543 	  && cfun_gpr_save_slot (RETURN_REGNUM) == SAVE_SLOT_STACK
11544 	  && s390_tune <= PROCESSOR_2097_Z10)
11545 	{
11546 	  int return_regnum = find_unused_clobbered_reg();
11547 	  if (!return_regnum
11548 	      || (TARGET_INDIRECT_BRANCH_NOBP_RET_OPTION
11549 		  && !TARGET_CPU_Z10
11550 		  && return_regnum == INDIRECT_BRANCH_THUNK_REGNUM))
11551 	    {
11552 	      gcc_assert (INDIRECT_BRANCH_THUNK_REGNUM != 4);
11553 	      return_regnum = 4;
11554 	    }
11555 	  return_reg = gen_rtx_REG (Pmode, return_regnum);
11556 
11557 	  addr = plus_constant (Pmode, frame_pointer,
11558 				offset + cfun_frame_layout.gprs_offset
11559 				+ (RETURN_REGNUM
11560 				   - cfun_frame_layout.first_save_gpr_slot)
11561 				* UNITS_PER_LONG);
11562 	  addr = gen_rtx_MEM (Pmode, addr);
11563 	  set_mem_alias_set (addr, get_frame_alias_set ());
11564 	  emit_move_insn (return_reg, addr);
11565 
11566 	  /* Once we did that optimization we have to make sure
11567 	     s390_optimize_prologue does not try to remove the store
11568 	     of r14 since we will not be able to find the load issued
11569 	     here.  */
11570 	  cfun_frame_layout.save_return_addr_p = true;
11571 	}
11572 
11573       insn = restore_gprs (frame_pointer,
11574 			   offset + cfun_frame_layout.gprs_offset
11575 			   + (cfun_frame_layout.first_restore_gpr
11576 			      - cfun_frame_layout.first_save_gpr_slot)
11577 			   * UNITS_PER_LONG,
11578 			   cfun_frame_layout.first_restore_gpr,
11579 			   cfun_frame_layout.last_restore_gpr);
11580       insn = emit_insn (insn);
11581       REG_NOTES (insn) = cfa_restores;
11582       add_reg_note (insn, REG_CFA_DEF_CFA,
11583 		    plus_constant (Pmode, stack_pointer_rtx,
11584 				   STACK_POINTER_OFFSET));
11585       RTX_FRAME_RELATED_P (insn) = 1;
11586     }
11587 
11588   s390_restore_gprs_from_fprs ();
11589 
11590   if (! sibcall)
11591     {
11592       if (!return_reg && !s390_can_use_return_insn ())
11593         /* We planned to emit (return), be we are not allowed to.  */
11594         return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
11595 
11596       if (return_reg)
11597         /* Emit (return) and (use).  */
11598         emit_jump_insn (gen_return_use (return_reg));
11599       else
11600         /* The fact that RETURN_REGNUM is used is already reflected by
11601            EPILOGUE_USES.  Emit plain (return).  */
11602         emit_jump_insn (gen_return ());
11603     }
11604 }
11605 
11606 /* Implement TARGET_SET_UP_BY_PROLOGUE.  */
11607 
11608 static void
s300_set_up_by_prologue(hard_reg_set_container * regs)11609 s300_set_up_by_prologue (hard_reg_set_container *regs)
11610 {
11611   if (cfun->machine->base_reg
11612       && !call_used_regs[REGNO (cfun->machine->base_reg)])
11613     SET_HARD_REG_BIT (regs->set, REGNO (cfun->machine->base_reg));
11614 }
11615 
11616 /* -fsplit-stack support.  */
11617 
11618 /* A SYMBOL_REF for __morestack.  */
11619 static GTY(()) rtx morestack_ref;
11620 
11621 /* When using -fsplit-stack, the allocation routines set a field in
11622    the TCB to the bottom of the stack plus this much space, measured
11623    in bytes.  */
11624 
11625 #define SPLIT_STACK_AVAILABLE 1024
11626 
11627 /* Emit the parmblock for __morestack into .rodata section.  It
11628    consists of 3 pointer size entries:
11629    - frame size
11630    - size of stack arguments
11631    - offset between parm block and __morestack return label  */
11632 
11633 void
s390_output_split_stack_data(rtx parm_block,rtx call_done,rtx frame_size,rtx args_size)11634 s390_output_split_stack_data (rtx parm_block, rtx call_done,
11635 			      rtx frame_size, rtx args_size)
11636 {
11637   rtx ops[] = { parm_block, call_done };
11638 
11639   switch_to_section (targetm.asm_out.function_rodata_section
11640 		     (current_function_decl));
11641 
11642   if (TARGET_64BIT)
11643     output_asm_insn (".align\t8", NULL);
11644   else
11645     output_asm_insn (".align\t4", NULL);
11646 
11647   (*targetm.asm_out.internal_label) (asm_out_file, "L",
11648 				     CODE_LABEL_NUMBER (parm_block));
11649   if (TARGET_64BIT)
11650     {
11651       output_asm_insn (".quad\t%0", &frame_size);
11652       output_asm_insn (".quad\t%0", &args_size);
11653       output_asm_insn (".quad\t%1-%0", ops);
11654     }
11655   else
11656     {
11657       output_asm_insn (".long\t%0", &frame_size);
11658       output_asm_insn (".long\t%0", &args_size);
11659       output_asm_insn (".long\t%1-%0", ops);
11660     }
11661 
11662   switch_to_section (current_function_section ());
11663 }
11664 
11665 /* Emit -fsplit-stack prologue, which goes before the regular function
11666    prologue.  */
11667 
11668 void
s390_expand_split_stack_prologue(void)11669 s390_expand_split_stack_prologue (void)
11670 {
11671   rtx r1, guard, cc = NULL;
11672   rtx_insn *insn;
11673   /* Offset from thread pointer to __private_ss.  */
11674   int psso = TARGET_64BIT ? 0x38 : 0x20;
11675   /* Pointer size in bytes.  */
11676   /* Frame size and argument size - the two parameters to __morestack.  */
11677   HOST_WIDE_INT frame_size = cfun_frame_layout.frame_size;
11678   /* Align argument size to 8 bytes - simplifies __morestack code.  */
11679   HOST_WIDE_INT args_size = crtl->args.size >= 0
11680 			    ? ((crtl->args.size + 7) & ~7)
11681 			    : 0;
11682   /* Label to be called by __morestack.  */
11683   rtx_code_label *call_done = NULL;
11684   rtx_code_label *parm_base = NULL;
11685   rtx tmp;
11686 
11687   gcc_assert (flag_split_stack && reload_completed);
11688 
11689   r1 = gen_rtx_REG (Pmode, 1);
11690 
11691   /* If no stack frame will be allocated, don't do anything.  */
11692   if (!frame_size)
11693     {
11694       if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11695 	{
11696 	  /* If va_start is used, just use r15.  */
11697 	  emit_move_insn (r1,
11698 			 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11699 				       GEN_INT (STACK_POINTER_OFFSET)));
11700 
11701 	}
11702       return;
11703     }
11704 
11705   if (morestack_ref == NULL_RTX)
11706     {
11707       morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11708       SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
11709 					   | SYMBOL_FLAG_FUNCTION);
11710     }
11711 
11712   if (CONST_OK_FOR_K (frame_size) || CONST_OK_FOR_Op (frame_size))
11713     {
11714       /* If frame_size will fit in an add instruction, do a stack space
11715 	 check, and only call __morestack if there's not enough space.  */
11716 
11717       /* Get thread pointer.  r1 is the only register we can always destroy - r0
11718 	 could contain a static chain (and cannot be used to address memory
11719 	 anyway), r2-r6 can contain parameters, and r6-r15 are callee-saved.  */
11720       emit_insn (gen_get_thread_pointer (Pmode, r1));
11721       /* Aim at __private_ss.  */
11722       guard = gen_rtx_MEM (Pmode, plus_constant (Pmode, r1, psso));
11723 
11724       /* If less that 1kiB used, skip addition and compare directly with
11725 	 __private_ss.  */
11726       if (frame_size > SPLIT_STACK_AVAILABLE)
11727 	{
11728 	  emit_move_insn (r1, guard);
11729 	  if (TARGET_64BIT)
11730 	    emit_insn (gen_adddi3 (r1, r1, GEN_INT (frame_size)));
11731 	  else
11732 	    emit_insn (gen_addsi3 (r1, r1, GEN_INT (frame_size)));
11733 	  guard = r1;
11734 	}
11735 
11736       /* Compare the (maybe adjusted) guard with the stack pointer.  */
11737       cc = s390_emit_compare (LT, stack_pointer_rtx, guard);
11738     }
11739 
11740   call_done = gen_label_rtx ();
11741   parm_base = gen_label_rtx ();
11742   LABEL_NUSES (parm_base)++;
11743   LABEL_NUSES (call_done)++;
11744 
11745   /* %r1 = litbase.  */
11746   insn = emit_move_insn (r1, gen_rtx_LABEL_REF (VOIDmode, parm_base));
11747   add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
11748   LABEL_NUSES (parm_base)++;
11749 
11750   /* Now, we need to call __morestack.  It has very special calling
11751      conventions: it preserves param/return/static chain registers for
11752      calling main function body, and looks for its own parameters at %r1. */
11753   if (cc != NULL)
11754     tmp = gen_split_stack_cond_call (Pmode,
11755 				     morestack_ref,
11756 				     parm_base,
11757 				     call_done,
11758 				     GEN_INT (frame_size),
11759 				     GEN_INT (args_size),
11760 				     cc);
11761   else
11762     tmp = gen_split_stack_call (Pmode,
11763 				morestack_ref,
11764 				parm_base,
11765 				call_done,
11766 				GEN_INT (frame_size),
11767 				GEN_INT (args_size));
11768 
11769   insn = emit_jump_insn (tmp);
11770   JUMP_LABEL (insn) = call_done;
11771   add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
11772   add_reg_note (insn, REG_LABEL_OPERAND, call_done);
11773 
11774   if (cc != NULL)
11775     {
11776       /* Mark the jump as very unlikely to be taken.  */
11777       add_reg_br_prob_note (insn,
11778 			    profile_probability::very_unlikely ());
11779 
11780       if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11781 	{
11782 	  /* If va_start is used, and __morestack was not called, just use
11783 	     r15.  */
11784 	  emit_move_insn (r1,
11785 			 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11786 				       GEN_INT (STACK_POINTER_OFFSET)));
11787 	}
11788     }
11789   else
11790     {
11791       emit_barrier ();
11792     }
11793 
11794   /* __morestack will call us here.  */
11795 
11796   emit_label (call_done);
11797 }
11798 
11799 /* We may have to tell the dataflow pass that the split stack prologue
11800    is initializing a register.  */
11801 
11802 static void
s390_live_on_entry(bitmap regs)11803 s390_live_on_entry (bitmap regs)
11804 {
11805   if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11806     {
11807       gcc_assert (flag_split_stack);
11808       bitmap_set_bit (regs, 1);
11809     }
11810 }
11811 
11812 /* Return true if the function can use simple_return to return outside
11813    of a shrink-wrapped region.  At present shrink-wrapping is supported
11814    in all cases.  */
11815 
11816 bool
s390_can_use_simple_return_insn(void)11817 s390_can_use_simple_return_insn (void)
11818 {
11819   return true;
11820 }
11821 
11822 /* Return true if the epilogue is guaranteed to contain only a return
11823    instruction and if a direct return can therefore be used instead.
11824    One of the main advantages of using direct return instructions
11825    is that we can then use conditional returns.  */
11826 
11827 bool
s390_can_use_return_insn(void)11828 s390_can_use_return_insn (void)
11829 {
11830   int i;
11831 
11832   if (!reload_completed)
11833     return false;
11834 
11835   if (crtl->profile)
11836     return false;
11837 
11838   if (TARGET_TPF_PROFILING)
11839     return false;
11840 
11841   for (i = 0; i < 16; i++)
11842     if (cfun_gpr_save_slot (i) != SAVE_SLOT_NONE)
11843       return false;
11844 
11845   /* For 31 bit this is not covered by the frame_size check below
11846      since f4, f6 are saved in the register save area without needing
11847      additional stack space.  */
11848   if (!TARGET_64BIT
11849       && (cfun_fpr_save_p (FPR4_REGNUM) || cfun_fpr_save_p (FPR6_REGNUM)))
11850     return false;
11851 
11852   if (cfun->machine->base_reg
11853       && !call_used_regs[REGNO (cfun->machine->base_reg)])
11854     return false;
11855 
11856   return cfun_frame_layout.frame_size == 0;
11857 }
11858 
11859 /* The VX ABI differs for vararg functions.  Therefore we need the
11860    prototype of the callee to be available when passing vector type
11861    values.  */
11862 static const char *
s390_invalid_arg_for_unprototyped_fn(const_tree typelist,const_tree funcdecl,const_tree val)11863 s390_invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
11864 {
11865   return ((TARGET_VX_ABI
11866 	   && typelist == 0
11867 	   && VECTOR_TYPE_P (TREE_TYPE (val))
11868 	   && (funcdecl == NULL_TREE
11869 	       || (TREE_CODE (funcdecl) == FUNCTION_DECL
11870 		   && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
11871 	  ? N_("vector argument passed to unprototyped function")
11872 	  : NULL);
11873 }
11874 
11875 
11876 /* Return the size in bytes of a function argument of
11877    type TYPE and/or mode MODE.  At least one of TYPE or
11878    MODE must be specified.  */
11879 
11880 static int
s390_function_arg_size(machine_mode mode,const_tree type)11881 s390_function_arg_size (machine_mode mode, const_tree type)
11882 {
11883   if (type)
11884     return int_size_in_bytes (type);
11885 
11886   /* No type info available for some library calls ...  */
11887   if (mode != BLKmode)
11888     return GET_MODE_SIZE (mode);
11889 
11890   /* If we have neither type nor mode, abort */
11891   gcc_unreachable ();
11892 }
11893 
11894 /* Return true if a function argument of type TYPE and mode MODE
11895    is to be passed in a vector register, if available.  */
11896 
11897 bool
s390_function_arg_vector(machine_mode mode,const_tree type)11898 s390_function_arg_vector (machine_mode mode, const_tree type)
11899 {
11900   if (!TARGET_VX_ABI)
11901     return false;
11902 
11903   if (s390_function_arg_size (mode, type) > 16)
11904     return false;
11905 
11906   /* No type info available for some library calls ...  */
11907   if (!type)
11908     return VECTOR_MODE_P (mode);
11909 
11910   /* The ABI says that record types with a single member are treated
11911      just like that member would be.  */
11912   int empty_base_seen = 0;
11913   const_tree orig_type = type;
11914   while (TREE_CODE (type) == RECORD_TYPE)
11915     {
11916       tree field, single = NULL_TREE;
11917 
11918       for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
11919 	{
11920 	  if (TREE_CODE (field) != FIELD_DECL)
11921 	    continue;
11922 
11923 	  if (DECL_FIELD_ABI_IGNORED (field))
11924 	    {
11925 	      if (lookup_attribute ("no_unique_address",
11926 				    DECL_ATTRIBUTES (field)))
11927 		empty_base_seen |= 2;
11928 	      else
11929 		empty_base_seen |= 1;
11930 	      continue;
11931 	    }
11932 
11933 	  if (single == NULL_TREE)
11934 	    single = TREE_TYPE (field);
11935 	  else
11936 	    return false;
11937 	}
11938 
11939       if (single == NULL_TREE)
11940 	return false;
11941       else
11942 	{
11943 	  /* If the field declaration adds extra byte due to
11944 	     e.g. padding this is not accepted as vector type.  */
11945 	  if (int_size_in_bytes (single) <= 0
11946 	      || int_size_in_bytes (single) != int_size_in_bytes (type))
11947 	    return false;
11948 	  type = single;
11949 	}
11950     }
11951 
11952   if (!VECTOR_TYPE_P (type))
11953     return false;
11954 
11955   if (warn_psabi && empty_base_seen)
11956     {
11957       static unsigned last_reported_type_uid;
11958       unsigned uid = TYPE_UID (TYPE_MAIN_VARIANT (orig_type));
11959       if (uid != last_reported_type_uid)
11960 	{
11961 	  const char *url = CHANGES_ROOT_URL "gcc-10/changes.html#empty_base";
11962 	  last_reported_type_uid = uid;
11963 	  if (empty_base_seen & 1)
11964 	    inform (input_location,
11965 		    "parameter passing for argument of type %qT when C++17 "
11966 		    "is enabled changed to match C++14 %{in GCC 10.1%}",
11967 		    orig_type, url);
11968 	  else
11969 	    inform (input_location,
11970 		    "parameter passing for argument of type %qT with "
11971 		    "%<[[no_unique_address]]%> members changed "
11972 		    "%{in GCC 10.1%}", orig_type, url);
11973 	}
11974     }
11975   return true;
11976 }
11977 
11978 /* Return true if a function argument of type TYPE and mode MODE
11979    is to be passed in a floating-point register, if available.  */
11980 
11981 static bool
s390_function_arg_float(machine_mode mode,const_tree type)11982 s390_function_arg_float (machine_mode mode, const_tree type)
11983 {
11984   if (s390_function_arg_size (mode, type) > 8)
11985     return false;
11986 
11987   /* Soft-float changes the ABI: no floating-point registers are used.  */
11988   if (TARGET_SOFT_FLOAT)
11989     return false;
11990 
11991   /* No type info available for some library calls ...  */
11992   if (!type)
11993     return mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode;
11994 
11995   /* The ABI says that record types with a single member are treated
11996      just like that member would be.  */
11997   int empty_base_seen = 0;
11998   const_tree orig_type = type;
11999   while (TREE_CODE (type) == RECORD_TYPE)
12000     {
12001       tree field, single = NULL_TREE;
12002 
12003       for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
12004 	{
12005 	  if (TREE_CODE (field) != FIELD_DECL)
12006 	    continue;
12007 	  if (DECL_FIELD_ABI_IGNORED (field))
12008 	    {
12009 	      if (lookup_attribute ("no_unique_address",
12010 				    DECL_ATTRIBUTES (field)))
12011 		empty_base_seen |= 2;
12012 	      else
12013 		empty_base_seen |= 1;
12014 	      continue;
12015 	    }
12016 
12017 	  if (single == NULL_TREE)
12018 	    single = TREE_TYPE (field);
12019 	  else
12020 	    return false;
12021 	}
12022 
12023       if (single == NULL_TREE)
12024 	return false;
12025       else
12026 	type = single;
12027     }
12028 
12029   if (TREE_CODE (type) != REAL_TYPE)
12030     return false;
12031 
12032   if (warn_psabi && empty_base_seen)
12033     {
12034       static unsigned last_reported_type_uid;
12035       unsigned uid = TYPE_UID (TYPE_MAIN_VARIANT (orig_type));
12036       if (uid != last_reported_type_uid)
12037 	{
12038 	  const char *url = CHANGES_ROOT_URL "gcc-10/changes.html#empty_base";
12039 	  last_reported_type_uid = uid;
12040 	  if (empty_base_seen & 1)
12041 	    inform (input_location,
12042 		    "parameter passing for argument of type %qT when C++17 "
12043 		    "is enabled changed to match C++14 %{in GCC 10.1%}",
12044 		    orig_type, url);
12045 	  else
12046 	    inform (input_location,
12047 		    "parameter passing for argument of type %qT with "
12048 		    "%<[[no_unique_address]]%> members changed "
12049 		    "%{in GCC 10.1%}", orig_type, url);
12050 	}
12051     }
12052 
12053   return true;
12054 }
12055 
12056 /* Return true if a function argument of type TYPE and mode MODE
12057    is to be passed in an integer register, or a pair of integer
12058    registers, if available.  */
12059 
12060 static bool
s390_function_arg_integer(machine_mode mode,const_tree type)12061 s390_function_arg_integer (machine_mode mode, const_tree type)
12062 {
12063   int size = s390_function_arg_size (mode, type);
12064   if (size > 8)
12065     return false;
12066 
12067   /* No type info available for some library calls ...  */
12068   if (!type)
12069     return GET_MODE_CLASS (mode) == MODE_INT
12070 	   || (TARGET_SOFT_FLOAT &&  SCALAR_FLOAT_MODE_P (mode));
12071 
12072   /* We accept small integral (and similar) types.  */
12073   if (INTEGRAL_TYPE_P (type)
12074       || POINTER_TYPE_P (type)
12075       || TREE_CODE (type) == NULLPTR_TYPE
12076       || TREE_CODE (type) == OFFSET_TYPE
12077       || (TARGET_SOFT_FLOAT && TREE_CODE (type) == REAL_TYPE))
12078     return true;
12079 
12080   /* We also accept structs of size 1, 2, 4, 8 that are not
12081      passed in floating-point registers.  */
12082   if (AGGREGATE_TYPE_P (type)
12083       && exact_log2 (size) >= 0
12084       && !s390_function_arg_float (mode, type))
12085     return true;
12086 
12087   return false;
12088 }
12089 
12090 /* Return 1 if a function argument ARG is to be passed by reference.
12091    The ABI specifies that only structures of size 1, 2, 4, or 8 bytes
12092    are passed by value, all other structures (and complex numbers) are
12093    passed by reference.  */
12094 
12095 static bool
s390_pass_by_reference(cumulative_args_t,const function_arg_info & arg)12096 s390_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
12097 {
12098   int size = s390_function_arg_size (arg.mode, arg.type);
12099 
12100   if (s390_function_arg_vector (arg.mode, arg.type))
12101     return false;
12102 
12103   if (size > 8)
12104     return true;
12105 
12106   if (tree type = arg.type)
12107     {
12108       if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0)
12109 	return true;
12110 
12111       if (TREE_CODE (type) == COMPLEX_TYPE
12112 	  || TREE_CODE (type) == VECTOR_TYPE)
12113 	return true;
12114     }
12115 
12116   return false;
12117 }
12118 
12119 /* Update the data in CUM to advance over argument ARG.  */
12120 
12121 static void
s390_function_arg_advance(cumulative_args_t cum_v,const function_arg_info & arg)12122 s390_function_arg_advance (cumulative_args_t cum_v,
12123 			   const function_arg_info &arg)
12124 {
12125   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12126 
12127   if (s390_function_arg_vector (arg.mode, arg.type))
12128     {
12129       /* We are called for unnamed vector stdarg arguments which are
12130 	 passed on the stack.  In this case this hook does not have to
12131 	 do anything since stack arguments are tracked by common
12132 	 code.  */
12133       if (!arg.named)
12134 	return;
12135       cum->vrs += 1;
12136     }
12137   else if (s390_function_arg_float (arg.mode, arg.type))
12138     {
12139       cum->fprs += 1;
12140     }
12141   else if (s390_function_arg_integer (arg.mode, arg.type))
12142     {
12143       int size = s390_function_arg_size (arg.mode, arg.type);
12144       cum->gprs += ((size + UNITS_PER_LONG - 1) / UNITS_PER_LONG);
12145     }
12146   else
12147     gcc_unreachable ();
12148 }
12149 
12150 /* Define where to put the arguments to a function.
12151    Value is zero to push the argument on the stack,
12152    or a hard register in which to store the argument.
12153 
12154    CUM is a variable of type CUMULATIVE_ARGS which gives info about
12155     the preceding args and about the function being called.
12156    ARG is a description of the argument.
12157 
12158    On S/390, we use general purpose registers 2 through 6 to
12159    pass integer, pointer, and certain structure arguments, and
12160    floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
12161    to pass floating point arguments.  All remaining arguments
12162    are pushed to the stack.  */
12163 
12164 static rtx
s390_function_arg(cumulative_args_t cum_v,const function_arg_info & arg)12165 s390_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
12166 {
12167   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12168 
12169   if (!arg.named)
12170     s390_check_type_for_vector_abi (arg.type, true, false);
12171 
12172   if (s390_function_arg_vector (arg.mode, arg.type))
12173     {
12174       /* Vector arguments being part of the ellipsis are passed on the
12175 	 stack.  */
12176       if (!arg.named || (cum->vrs + 1 > VEC_ARG_NUM_REG))
12177 	return NULL_RTX;
12178 
12179       return gen_rtx_REG (arg.mode, cum->vrs + FIRST_VEC_ARG_REGNO);
12180     }
12181   else if (s390_function_arg_float (arg.mode, arg.type))
12182     {
12183       if (cum->fprs + 1 > FP_ARG_NUM_REG)
12184 	return NULL_RTX;
12185       else
12186 	return gen_rtx_REG (arg.mode, cum->fprs + 16);
12187     }
12188   else if (s390_function_arg_integer (arg.mode, arg.type))
12189     {
12190       int size = s390_function_arg_size (arg.mode, arg.type);
12191       int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
12192 
12193       if (cum->gprs + n_gprs > GP_ARG_NUM_REG)
12194 	return NULL_RTX;
12195       else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG)
12196 	return gen_rtx_REG (arg.mode, cum->gprs + 2);
12197       else if (n_gprs == 2)
12198 	{
12199 	  rtvec p = rtvec_alloc (2);
12200 
12201 	  RTVEC_ELT (p, 0)
12202 	    = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 2),
12203 					 const0_rtx);
12204 	  RTVEC_ELT (p, 1)
12205 	    = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 3),
12206 					 GEN_INT (4));
12207 
12208 	  return gen_rtx_PARALLEL (arg.mode, p);
12209 	}
12210     }
12211 
12212   /* After the real arguments, expand_call calls us once again with an
12213      end marker.  Whatever we return here is passed as operand 2 to the
12214      call expanders.
12215 
12216      We don't need this feature ...  */
12217   else if (arg.end_marker_p ())
12218     return const0_rtx;
12219 
12220   gcc_unreachable ();
12221 }
12222 
12223 /* Implement TARGET_FUNCTION_ARG_BOUNDARY.  Vector arguments are
12224    left-justified when placed on the stack during parameter passing.  */
12225 
12226 static pad_direction
s390_function_arg_padding(machine_mode mode,const_tree type)12227 s390_function_arg_padding (machine_mode mode, const_tree type)
12228 {
12229   if (s390_function_arg_vector (mode, type))
12230     return PAD_UPWARD;
12231 
12232   return default_function_arg_padding (mode, type);
12233 }
12234 
12235 /* Return true if return values of type TYPE should be returned
12236    in a memory buffer whose address is passed by the caller as
12237    hidden first argument.  */
12238 
12239 static bool
s390_return_in_memory(const_tree type,const_tree fundecl ATTRIBUTE_UNUSED)12240 s390_return_in_memory (const_tree type, const_tree fundecl ATTRIBUTE_UNUSED)
12241 {
12242   /* We accept small integral (and similar) types.  */
12243   if (INTEGRAL_TYPE_P (type)
12244       || POINTER_TYPE_P (type)
12245       || TREE_CODE (type) == OFFSET_TYPE
12246       || TREE_CODE (type) == REAL_TYPE)
12247     return int_size_in_bytes (type) > 8;
12248 
12249   /* vector types which fit into a VR.  */
12250   if (TARGET_VX_ABI
12251       && VECTOR_TYPE_P (type)
12252       && int_size_in_bytes (type) <= 16)
12253     return false;
12254 
12255   /* Aggregates and similar constructs are always returned
12256      in memory.  */
12257   if (AGGREGATE_TYPE_P (type)
12258       || TREE_CODE (type) == COMPLEX_TYPE
12259       || VECTOR_TYPE_P (type))
12260     return true;
12261 
12262   /* ??? We get called on all sorts of random stuff from
12263      aggregate_value_p.  We can't abort, but it's not clear
12264      what's safe to return.  Pretend it's a struct I guess.  */
12265   return true;
12266 }
12267 
12268 /* Function arguments and return values are promoted to word size.  */
12269 
12270 static machine_mode
s390_promote_function_mode(const_tree type,machine_mode mode,int * punsignedp,const_tree fntype ATTRIBUTE_UNUSED,int for_return ATTRIBUTE_UNUSED)12271 s390_promote_function_mode (const_tree type, machine_mode mode,
12272 			    int *punsignedp,
12273 			    const_tree fntype ATTRIBUTE_UNUSED,
12274 			    int for_return ATTRIBUTE_UNUSED)
12275 {
12276   if (INTEGRAL_MODE_P (mode)
12277       && GET_MODE_SIZE (mode) < UNITS_PER_LONG)
12278     {
12279       if (type != NULL_TREE && POINTER_TYPE_P (type))
12280 	*punsignedp = POINTERS_EXTEND_UNSIGNED;
12281       return Pmode;
12282     }
12283 
12284   return mode;
12285 }
12286 
12287 /* Define where to return a (scalar) value of type RET_TYPE.
12288    If RET_TYPE is null, define where to return a (scalar)
12289    value of mode MODE from a libcall.  */
12290 
12291 static rtx
s390_function_and_libcall_value(machine_mode mode,const_tree ret_type,const_tree fntype_or_decl,bool outgoing ATTRIBUTE_UNUSED)12292 s390_function_and_libcall_value (machine_mode mode,
12293 				 const_tree ret_type,
12294 				 const_tree fntype_or_decl,
12295 				 bool outgoing ATTRIBUTE_UNUSED)
12296 {
12297   /* For vector return types it is important to use the RET_TYPE
12298      argument whenever available since the middle-end might have
12299      changed the mode to a scalar mode.  */
12300   bool vector_ret_type_p = ((ret_type && VECTOR_TYPE_P (ret_type))
12301 			    || (!ret_type && VECTOR_MODE_P (mode)));
12302 
12303   /* For normal functions perform the promotion as
12304      promote_function_mode would do.  */
12305   if (ret_type)
12306     {
12307       int unsignedp = TYPE_UNSIGNED (ret_type);
12308       mode = promote_function_mode (ret_type, mode, &unsignedp,
12309 				    fntype_or_decl, 1);
12310     }
12311 
12312   gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
12313 	      || SCALAR_FLOAT_MODE_P (mode)
12314 	      || (TARGET_VX_ABI && vector_ret_type_p));
12315   gcc_assert (GET_MODE_SIZE (mode) <= (TARGET_VX_ABI ? 16 : 8));
12316 
12317   if (TARGET_VX_ABI && vector_ret_type_p)
12318     return gen_rtx_REG (mode, FIRST_VEC_ARG_REGNO);
12319   else if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
12320     return gen_rtx_REG (mode, 16);
12321   else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG
12322 	   || UNITS_PER_LONG == UNITS_PER_WORD)
12323     return gen_rtx_REG (mode, 2);
12324   else if (GET_MODE_SIZE (mode) == 2 * UNITS_PER_LONG)
12325     {
12326       /* This case is triggered when returning a 64 bit value with
12327 	 -m31 -mzarch.  Although the value would fit into a single
12328 	 register it has to be forced into a 32 bit register pair in
12329 	 order to match the ABI.  */
12330       rtvec p = rtvec_alloc (2);
12331 
12332       RTVEC_ELT (p, 0)
12333 	= gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 2), const0_rtx);
12334       RTVEC_ELT (p, 1)
12335 	= gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 3), GEN_INT (4));
12336 
12337       return gen_rtx_PARALLEL (mode, p);
12338     }
12339 
12340   gcc_unreachable ();
12341 }
12342 
12343 /* Define where to return a scalar return value of type RET_TYPE.  */
12344 
12345 static rtx
s390_function_value(const_tree ret_type,const_tree fn_decl_or_type,bool outgoing)12346 s390_function_value (const_tree ret_type, const_tree fn_decl_or_type,
12347 		     bool outgoing)
12348 {
12349   return s390_function_and_libcall_value (TYPE_MODE (ret_type), ret_type,
12350 					  fn_decl_or_type, outgoing);
12351 }
12352 
12353 /* Define where to return a scalar libcall return value of mode
12354    MODE.  */
12355 
12356 static rtx
s390_libcall_value(machine_mode mode,const_rtx fun ATTRIBUTE_UNUSED)12357 s390_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
12358 {
12359   return s390_function_and_libcall_value (mode, NULL_TREE,
12360 					  NULL_TREE, true);
12361 }
12362 
12363 
12364 /* Create and return the va_list datatype.
12365 
12366    On S/390, va_list is an array type equivalent to
12367 
12368       typedef struct __va_list_tag
12369 	{
12370 	    long __gpr;
12371 	    long __fpr;
12372 	    void *__overflow_arg_area;
12373 	    void *__reg_save_area;
12374 	} va_list[1];
12375 
12376    where __gpr and __fpr hold the number of general purpose
12377    or floating point arguments used up to now, respectively,
12378    __overflow_arg_area points to the stack location of the
12379    next argument passed on the stack, and __reg_save_area
12380    always points to the start of the register area in the
12381    call frame of the current function.  The function prologue
12382    saves all registers used for argument passing into this
12383    area if the function uses variable arguments.  */
12384 
12385 static tree
s390_build_builtin_va_list(void)12386 s390_build_builtin_va_list (void)
12387 {
12388   tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
12389 
12390   record = lang_hooks.types.make_type (RECORD_TYPE);
12391 
12392   type_decl =
12393     build_decl (BUILTINS_LOCATION,
12394 		TYPE_DECL, get_identifier ("__va_list_tag"), record);
12395 
12396   f_gpr = build_decl (BUILTINS_LOCATION,
12397 		      FIELD_DECL, get_identifier ("__gpr"),
12398 		      long_integer_type_node);
12399   f_fpr = build_decl (BUILTINS_LOCATION,
12400 		      FIELD_DECL, get_identifier ("__fpr"),
12401 		      long_integer_type_node);
12402   f_ovf = build_decl (BUILTINS_LOCATION,
12403 		      FIELD_DECL, get_identifier ("__overflow_arg_area"),
12404 		      ptr_type_node);
12405   f_sav = build_decl (BUILTINS_LOCATION,
12406 		      FIELD_DECL, get_identifier ("__reg_save_area"),
12407 		      ptr_type_node);
12408 
12409   va_list_gpr_counter_field = f_gpr;
12410   va_list_fpr_counter_field = f_fpr;
12411 
12412   DECL_FIELD_CONTEXT (f_gpr) = record;
12413   DECL_FIELD_CONTEXT (f_fpr) = record;
12414   DECL_FIELD_CONTEXT (f_ovf) = record;
12415   DECL_FIELD_CONTEXT (f_sav) = record;
12416 
12417   TYPE_STUB_DECL (record) = type_decl;
12418   TYPE_NAME (record) = type_decl;
12419   TYPE_FIELDS (record) = f_gpr;
12420   DECL_CHAIN (f_gpr) = f_fpr;
12421   DECL_CHAIN (f_fpr) = f_ovf;
12422   DECL_CHAIN (f_ovf) = f_sav;
12423 
12424   layout_type (record);
12425 
12426   /* The correct type is an array type of one element.  */
12427   return build_array_type (record, build_index_type (size_zero_node));
12428 }
12429 
12430 /* Implement va_start by filling the va_list structure VALIST.
12431    STDARG_P is always true, and ignored.
12432    NEXTARG points to the first anonymous stack argument.
12433 
12434    The following global variables are used to initialize
12435    the va_list structure:
12436 
12437      crtl->args.info:
12438        holds number of gprs and fprs used for named arguments.
12439      crtl->args.arg_offset_rtx:
12440        holds the offset of the first anonymous stack argument
12441        (relative to the virtual arg pointer).  */
12442 
12443 static void
s390_va_start(tree valist,rtx nextarg ATTRIBUTE_UNUSED)12444 s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
12445 {
12446   HOST_WIDE_INT n_gpr, n_fpr;
12447   int off;
12448   tree f_gpr, f_fpr, f_ovf, f_sav;
12449   tree gpr, fpr, ovf, sav, t;
12450 
12451   f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12452   f_fpr = DECL_CHAIN (f_gpr);
12453   f_ovf = DECL_CHAIN (f_fpr);
12454   f_sav = DECL_CHAIN (f_ovf);
12455 
12456   valist = build_simple_mem_ref (valist);
12457   gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12458   fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
12459   ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
12460   sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
12461 
12462   /* Count number of gp and fp argument registers used.  */
12463 
12464   n_gpr = crtl->args.info.gprs;
12465   n_fpr = crtl->args.info.fprs;
12466 
12467   if (cfun->va_list_gpr_size)
12468     {
12469       t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
12470 		  build_int_cst (NULL_TREE, n_gpr));
12471       TREE_SIDE_EFFECTS (t) = 1;
12472       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12473     }
12474 
12475   if (cfun->va_list_fpr_size)
12476     {
12477       t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
12478 		  build_int_cst (NULL_TREE, n_fpr));
12479       TREE_SIDE_EFFECTS (t) = 1;
12480       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12481     }
12482 
12483   if (flag_split_stack
12484      && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
12485 	 == NULL)
12486      && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
12487     {
12488       rtx reg;
12489       rtx_insn *seq;
12490 
12491       reg = gen_reg_rtx (Pmode);
12492       cfun->machine->split_stack_varargs_pointer = reg;
12493 
12494       start_sequence ();
12495       emit_move_insn (reg, gen_rtx_REG (Pmode, 1));
12496       seq = get_insns ();
12497       end_sequence ();
12498 
12499       push_topmost_sequence ();
12500       emit_insn_after (seq, entry_of_function ());
12501       pop_topmost_sequence ();
12502     }
12503 
12504   /* Find the overflow area.
12505      FIXME: This currently is too pessimistic when the vector ABI is
12506      enabled.  In that case we *always* set up the overflow area
12507      pointer.  */
12508   if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG
12509       || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG
12510       || TARGET_VX_ABI)
12511     {
12512       if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
12513 	t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
12514       else
12515 	t = make_tree (TREE_TYPE (ovf), cfun->machine->split_stack_varargs_pointer);
12516 
12517       off = INTVAL (crtl->args.arg_offset_rtx);
12518       off = off < 0 ? 0 : off;
12519       if (TARGET_DEBUG_ARG)
12520 	fprintf (stderr, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
12521 		 (int)n_gpr, (int)n_fpr, off);
12522 
12523       t = fold_build_pointer_plus_hwi (t, off);
12524 
12525       t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
12526       TREE_SIDE_EFFECTS (t) = 1;
12527       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12528     }
12529 
12530   /* Find the register save area.  */
12531   if ((cfun->va_list_gpr_size && n_gpr < GP_ARG_NUM_REG)
12532       || (cfun->va_list_fpr_size && n_fpr < FP_ARG_NUM_REG))
12533     {
12534       t = make_tree (TREE_TYPE (sav), return_address_pointer_rtx);
12535       t = fold_build_pointer_plus_hwi (t, -RETURN_REGNUM * UNITS_PER_LONG);
12536 
12537       t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
12538       TREE_SIDE_EFFECTS (t) = 1;
12539       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12540     }
12541 }
12542 
12543 /* Implement va_arg by updating the va_list structure
12544    VALIST as required to retrieve an argument of type
12545    TYPE, and returning that argument.
12546 
12547    Generates code equivalent to:
12548 
12549    if (integral value) {
12550      if (size  <= 4 && args.gpr < 5 ||
12551 	 size  > 4 && args.gpr < 4 )
12552        ret = args.reg_save_area[args.gpr+8]
12553      else
12554        ret = *args.overflow_arg_area++;
12555    } else if (vector value) {
12556        ret = *args.overflow_arg_area;
12557        args.overflow_arg_area += size / 8;
12558    } else if (float value) {
12559      if (args.fgpr < 2)
12560        ret = args.reg_save_area[args.fpr+64]
12561      else
12562        ret = *args.overflow_arg_area++;
12563    } else if (aggregate value) {
12564      if (args.gpr < 5)
12565        ret = *args.reg_save_area[args.gpr]
12566      else
12567        ret = **args.overflow_arg_area++;
12568    } */
12569 
12570 static tree
s390_gimplify_va_arg(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p ATTRIBUTE_UNUSED)12571 s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
12572 		      gimple_seq *post_p ATTRIBUTE_UNUSED)
12573 {
12574   tree f_gpr, f_fpr, f_ovf, f_sav;
12575   tree gpr, fpr, ovf, sav, reg, t, u;
12576   int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg;
12577   tree lab_false, lab_over = NULL_TREE;
12578   tree addr = create_tmp_var (ptr_type_node, "addr");
12579   bool left_align_p; /* How a value < UNITS_PER_LONG is aligned within
12580 			a stack slot.  */
12581 
12582   f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12583   f_fpr = DECL_CHAIN (f_gpr);
12584   f_ovf = DECL_CHAIN (f_fpr);
12585   f_sav = DECL_CHAIN (f_ovf);
12586 
12587   gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12588   fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
12589   sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
12590 
12591   /* The tree for args* cannot be shared between gpr/fpr and ovf since
12592      both appear on a lhs.  */
12593   valist = unshare_expr (valist);
12594   ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
12595 
12596   size = int_size_in_bytes (type);
12597 
12598   s390_check_type_for_vector_abi (type, true, false);
12599 
12600   if (pass_va_arg_by_reference (type))
12601     {
12602       if (TARGET_DEBUG_ARG)
12603 	{
12604 	  fprintf (stderr, "va_arg: aggregate type");
12605 	  debug_tree (type);
12606 	}
12607 
12608       /* Aggregates are passed by reference.  */
12609       indirect_p = 1;
12610       reg = gpr;
12611       n_reg = 1;
12612 
12613       /* kernel stack layout on 31 bit: It is assumed here that no padding
12614 	 will be added by s390_frame_info because for va_args always an even
12615 	 number of gprs has to be saved r15-r2 = 14 regs.  */
12616       sav_ofs = 2 * UNITS_PER_LONG;
12617       sav_scale = UNITS_PER_LONG;
12618       size = UNITS_PER_LONG;
12619       max_reg = GP_ARG_NUM_REG - n_reg;
12620       left_align_p = false;
12621     }
12622   else if (s390_function_arg_vector (TYPE_MODE (type), type))
12623     {
12624       if (TARGET_DEBUG_ARG)
12625 	{
12626 	  fprintf (stderr, "va_arg: vector type");
12627 	  debug_tree (type);
12628 	}
12629 
12630       indirect_p = 0;
12631       reg = NULL_TREE;
12632       n_reg = 0;
12633       sav_ofs = 0;
12634       sav_scale = 8;
12635       max_reg = 0;
12636       left_align_p = true;
12637     }
12638   else if (s390_function_arg_float (TYPE_MODE (type), type))
12639     {
12640       if (TARGET_DEBUG_ARG)
12641 	{
12642 	  fprintf (stderr, "va_arg: float type");
12643 	  debug_tree (type);
12644 	}
12645 
12646       /* FP args go in FP registers, if present.  */
12647       indirect_p = 0;
12648       reg = fpr;
12649       n_reg = 1;
12650       sav_ofs = 16 * UNITS_PER_LONG;
12651       sav_scale = 8;
12652       max_reg = FP_ARG_NUM_REG - n_reg;
12653       left_align_p = false;
12654     }
12655   else
12656     {
12657       if (TARGET_DEBUG_ARG)
12658 	{
12659 	  fprintf (stderr, "va_arg: other type");
12660 	  debug_tree (type);
12661 	}
12662 
12663       /* Otherwise into GP registers.  */
12664       indirect_p = 0;
12665       reg = gpr;
12666       n_reg = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
12667 
12668       /* kernel stack layout on 31 bit: It is assumed here that no padding
12669 	 will be added by s390_frame_info because for va_args always an even
12670 	 number of gprs has to be saved r15-r2 = 14 regs.  */
12671       sav_ofs = 2 * UNITS_PER_LONG;
12672 
12673       if (size < UNITS_PER_LONG)
12674 	sav_ofs += UNITS_PER_LONG - size;
12675 
12676       sav_scale = UNITS_PER_LONG;
12677       max_reg = GP_ARG_NUM_REG - n_reg;
12678       left_align_p = false;
12679     }
12680 
12681   /* Pull the value out of the saved registers ...  */
12682 
12683   if (reg != NULL_TREE)
12684     {
12685       /*
12686 	if (reg > ((typeof (reg))max_reg))
12687 	  goto lab_false;
12688 
12689 	addr = sav + sav_ofs + reg * save_scale;
12690 
12691 	goto lab_over;
12692 
12693 	lab_false:
12694       */
12695 
12696       lab_false = create_artificial_label (UNKNOWN_LOCATION);
12697       lab_over = create_artificial_label (UNKNOWN_LOCATION);
12698 
12699       t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
12700       t = build2 (GT_EXPR, boolean_type_node, reg, t);
12701       u = build1 (GOTO_EXPR, void_type_node, lab_false);
12702       t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
12703       gimplify_and_add (t, pre_p);
12704 
12705       t = fold_build_pointer_plus_hwi (sav, sav_ofs);
12706       u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
12707 		  fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
12708       t = fold_build_pointer_plus (t, u);
12709 
12710       gimplify_assign (addr, t, pre_p);
12711 
12712       gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
12713 
12714       gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
12715     }
12716 
12717   /* ... Otherwise out of the overflow area.  */
12718 
12719   t = ovf;
12720   if (size < UNITS_PER_LONG && !left_align_p)
12721     t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size);
12722 
12723   gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
12724 
12725   gimplify_assign (addr, t, pre_p);
12726 
12727   if (size < UNITS_PER_LONG && left_align_p)
12728     t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG);
12729   else
12730     t = fold_build_pointer_plus_hwi (t, size);
12731 
12732   gimplify_assign (ovf, t, pre_p);
12733 
12734   if (reg != NULL_TREE)
12735     gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
12736 
12737 
12738   /* Increment register save count.  */
12739 
12740   if (n_reg > 0)
12741     {
12742       u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
12743 		  fold_convert (TREE_TYPE (reg), size_int (n_reg)));
12744       gimplify_and_add (u, pre_p);
12745     }
12746 
12747   if (indirect_p)
12748     {
12749       t = build_pointer_type_for_mode (build_pointer_type (type),
12750 				       ptr_mode, true);
12751       addr = fold_convert (t, addr);
12752       addr = build_va_arg_indirect_ref (addr);
12753     }
12754   else
12755     {
12756       t = build_pointer_type_for_mode (type, ptr_mode, true);
12757       addr = fold_convert (t, addr);
12758     }
12759 
12760   return build_va_arg_indirect_ref (addr);
12761 }
12762 
12763 /* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX)
12764    expanders.
12765    DEST  - Register location where CC will be stored.
12766    TDB   - Pointer to a 256 byte area where to store the transaction.
12767 	   diagnostic block. NULL if TDB is not needed.
12768    RETRY - Retry count value.  If non-NULL a retry loop for CC2
12769 	   is emitted
12770    CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
12771 		    of the tbegin instruction pattern.  */
12772 
12773 void
s390_expand_tbegin(rtx dest,rtx tdb,rtx retry,bool clobber_fprs_p)12774 s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
12775 {
12776   rtx retry_plus_two = gen_reg_rtx (SImode);
12777   rtx retry_reg = gen_reg_rtx (SImode);
12778   rtx_code_label *retry_label = NULL;
12779 
12780   if (retry != NULL_RTX)
12781     {
12782       emit_move_insn (retry_reg, retry);
12783       emit_insn (gen_addsi3 (retry_plus_two, retry_reg, const2_rtx));
12784       emit_insn (gen_addsi3 (retry_reg, retry_reg, const1_rtx));
12785       retry_label = gen_label_rtx ();
12786       emit_label (retry_label);
12787     }
12788 
12789   if (clobber_fprs_p)
12790     {
12791       if (TARGET_VX)
12792 	emit_insn (gen_tbegin_1_z13 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12793 				     tdb));
12794       else
12795 	emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12796 				 tdb));
12797     }
12798   else
12799     emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12800 				     tdb));
12801 
12802   emit_move_insn (dest, gen_rtx_UNSPEC (SImode,
12803 					gen_rtvec (1, gen_rtx_REG (CCRAWmode,
12804 								   CC_REGNUM)),
12805 					UNSPEC_CC_TO_INT));
12806   if (retry != NULL_RTX)
12807     {
12808       const int CC0 = 1 << 3;
12809       const int CC1 = 1 << 2;
12810       const int CC3 = 1 << 0;
12811       rtx jump;
12812       rtx count = gen_reg_rtx (SImode);
12813       rtx_code_label *leave_label = gen_label_rtx ();
12814 
12815       /* Exit for success and permanent failures.  */
12816       jump = s390_emit_jump (leave_label,
12817 			     gen_rtx_EQ (VOIDmode,
12818 			       gen_rtx_REG (CCRAWmode, CC_REGNUM),
12819 			       gen_rtx_CONST_INT (VOIDmode, CC0 | CC1 | CC3)));
12820       LABEL_NUSES (leave_label) = 1;
12821 
12822       /* CC2 - transient failure. Perform retry with ppa.  */
12823       emit_move_insn (count, retry_plus_two);
12824       emit_insn (gen_subsi3 (count, count, retry_reg));
12825       emit_insn (gen_tx_assist (count));
12826       jump = emit_jump_insn (gen_doloop_si64 (retry_label,
12827 					      retry_reg,
12828 					      retry_reg));
12829       JUMP_LABEL (jump) = retry_label;
12830       LABEL_NUSES (retry_label) = 1;
12831       emit_label (leave_label);
12832     }
12833 }
12834 
12835 
12836 /* Return the decl for the target specific builtin with the function
12837    code FCODE.  */
12838 
12839 static tree
s390_builtin_decl(unsigned fcode,bool initialized_p ATTRIBUTE_UNUSED)12840 s390_builtin_decl (unsigned fcode, bool initialized_p ATTRIBUTE_UNUSED)
12841 {
12842   if (fcode >= S390_BUILTIN_MAX)
12843     return error_mark_node;
12844 
12845   return s390_builtin_decls[fcode];
12846 }
12847 
12848 /* We call mcount before the function prologue.  So a profiled leaf
12849    function should stay a leaf function.  */
12850 
12851 static bool
s390_keep_leaf_when_profiled()12852 s390_keep_leaf_when_profiled ()
12853 {
12854   return true;
12855 }
12856 
12857 /* Output assembly code for the trampoline template to
12858    stdio stream FILE.
12859 
12860    On S/390, we use gpr 1 internally in the trampoline code;
12861    gpr 0 is used to hold the static chain.  */
12862 
12863 static void
s390_asm_trampoline_template(FILE * file)12864 s390_asm_trampoline_template (FILE *file)
12865 {
12866   rtx op[2];
12867   op[0] = gen_rtx_REG (Pmode, 0);
12868   op[1] = gen_rtx_REG (Pmode, 1);
12869 
12870   if (TARGET_64BIT)
12871     {
12872       output_asm_insn ("basr\t%1,0", op);         /* 2 byte */
12873       output_asm_insn ("lmg\t%0,%1,14(%1)", op);  /* 6 byte */
12874       output_asm_insn ("br\t%1", op);             /* 2 byte */
12875       ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10));
12876     }
12877   else
12878     {
12879       output_asm_insn ("basr\t%1,0", op);         /* 2 byte */
12880       output_asm_insn ("lm\t%0,%1,6(%1)", op);    /* 4 byte */
12881       output_asm_insn ("br\t%1", op);             /* 2 byte */
12882       ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8));
12883     }
12884 }
12885 
12886 /* Emit RTL insns to initialize the variable parts of a trampoline.
12887    FNADDR is an RTX for the address of the function's pure code.
12888    CXT is an RTX for the static chain value for the function.  */
12889 
12890 static void
s390_trampoline_init(rtx m_tramp,tree fndecl,rtx cxt)12891 s390_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
12892 {
12893   rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
12894   rtx mem;
12895 
12896   emit_block_move (m_tramp, assemble_trampoline_template (),
12897 		   GEN_INT (2 * UNITS_PER_LONG), BLOCK_OP_NORMAL);
12898 
12899   mem = adjust_address (m_tramp, Pmode, 2 * UNITS_PER_LONG);
12900   emit_move_insn (mem, cxt);
12901   mem = adjust_address (m_tramp, Pmode, 3 * UNITS_PER_LONG);
12902   emit_move_insn (mem, fnaddr);
12903 }
12904 
12905 static void
output_asm_nops(const char * user,int hw)12906 output_asm_nops (const char *user, int hw)
12907 {
12908   asm_fprintf (asm_out_file, "\t# NOPs for %s (%d halfwords)\n", user, hw);
12909   while (hw > 0)
12910     {
12911       if (hw >= 3)
12912 	{
12913 	  output_asm_insn ("brcl\t0,0", NULL);
12914 	  hw -= 3;
12915 	}
12916       else if (hw >= 2)
12917 	{
12918 	  output_asm_insn ("bc\t0,0", NULL);
12919 	  hw -= 2;
12920 	}
12921       else
12922 	{
12923 	  output_asm_insn ("bcr\t0,0", NULL);
12924 	  hw -= 1;
12925 	}
12926     }
12927 }
12928 
12929 /* Output assembler code to FILE to increment profiler label # LABELNO
12930    for profiling a function entry.  */
12931 
12932 void
s390_function_profiler(FILE * file,int labelno)12933 s390_function_profiler (FILE *file, int labelno)
12934 {
12935   rtx op[8];
12936 
12937   char label[128];
12938   ASM_GENERATE_INTERNAL_LABEL (label, "LP", labelno);
12939 
12940   fprintf (file, "# function profiler \n");
12941 
12942   op[0] = gen_rtx_REG (Pmode, RETURN_REGNUM);
12943   op[1] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
12944   op[1] = gen_rtx_MEM (Pmode, plus_constant (Pmode, op[1], UNITS_PER_LONG));
12945   op[7] = GEN_INT (UNITS_PER_LONG);
12946 
12947   op[2] = gen_rtx_REG (Pmode, 1);
12948   op[3] = gen_rtx_SYMBOL_REF (Pmode, label);
12949   SYMBOL_REF_FLAGS (op[3]) = SYMBOL_FLAG_LOCAL;
12950 
12951   op[4] = gen_rtx_SYMBOL_REF (Pmode, flag_fentry ? "__fentry__" : "_mcount");
12952   if (flag_pic)
12953     {
12954       op[4] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[4]), UNSPEC_PLT);
12955       op[4] = gen_rtx_CONST (Pmode, op[4]);
12956     }
12957 
12958   if (flag_record_mcount)
12959     fprintf (file, "1:\n");
12960 
12961   if (flag_fentry)
12962     {
12963       if (flag_nop_mcount)
12964 	output_asm_nops ("-mnop-mcount", /* brasl */ 3);
12965       else if (cfun->static_chain_decl)
12966 	warning (OPT_Wcannot_profile, "nested functions cannot be profiled "
12967 		 "with %<-mfentry%> on s390");
12968       else
12969 	output_asm_insn ("brasl\t0,%4", op);
12970     }
12971   else if (TARGET_64BIT)
12972     {
12973       if (flag_nop_mcount)
12974 	output_asm_nops ("-mnop-mcount", /* stg */ 3 + /* larl */ 3 +
12975 			 /* brasl */ 3 + /* lg */ 3);
12976       else
12977 	{
12978 	  output_asm_insn ("stg\t%0,%1", op);
12979 	  if (flag_dwarf2_cfi_asm)
12980 	    output_asm_insn (".cfi_rel_offset\t%0,%7", op);
12981 	  output_asm_insn ("larl\t%2,%3", op);
12982 	  output_asm_insn ("brasl\t%0,%4", op);
12983 	  output_asm_insn ("lg\t%0,%1", op);
12984 	  if (flag_dwarf2_cfi_asm)
12985 	    output_asm_insn (".cfi_restore\t%0", op);
12986 	}
12987     }
12988   else
12989     {
12990       if (flag_nop_mcount)
12991 	output_asm_nops ("-mnop-mcount", /* st */ 2 + /* larl */ 3 +
12992 			 /* brasl */ 3 + /* l */ 2);
12993       else
12994 	{
12995 	  output_asm_insn ("st\t%0,%1", op);
12996 	  if (flag_dwarf2_cfi_asm)
12997 	    output_asm_insn (".cfi_rel_offset\t%0,%7", op);
12998 	  output_asm_insn ("larl\t%2,%3", op);
12999 	  output_asm_insn ("brasl\t%0,%4", op);
13000 	  output_asm_insn ("l\t%0,%1", op);
13001 	  if (flag_dwarf2_cfi_asm)
13002 	    output_asm_insn (".cfi_restore\t%0", op);
13003 	}
13004     }
13005 
13006   if (flag_record_mcount)
13007     {
13008       fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
13009       fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
13010       fprintf (file, "\t.previous\n");
13011     }
13012 }
13013 
13014 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
13015    into its SYMBOL_REF_FLAGS.  */
13016 
13017 static void
s390_encode_section_info(tree decl,rtx rtl,int first)13018 s390_encode_section_info (tree decl, rtx rtl, int first)
13019 {
13020   default_encode_section_info (decl, rtl, first);
13021 
13022   if (TREE_CODE (decl) == VAR_DECL)
13023     {
13024       /* Store the alignment to be able to check if we can use
13025 	 a larl/load-relative instruction.  We only handle the cases
13026 	 that can go wrong (i.e. no FUNC_DECLs).  */
13027       if (DECL_ALIGN (decl) == 0 || DECL_ALIGN (decl) % 16)
13028 	SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
13029       else if (DECL_ALIGN (decl) % 32)
13030 	SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
13031       else if (DECL_ALIGN (decl) % 64)
13032 	SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
13033     }
13034 
13035   /* Literal pool references don't have a decl so they are handled
13036      differently here.  We rely on the information in the MEM_ALIGN
13037      entry to decide upon the alignment.  */
13038   if (MEM_P (rtl)
13039       && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF
13040       && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0)))
13041     {
13042       if (MEM_ALIGN (rtl) == 0 || MEM_ALIGN (rtl) % 16)
13043 	SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
13044       else if (MEM_ALIGN (rtl) % 32)
13045 	SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
13046       else if (MEM_ALIGN (rtl) % 64)
13047 	SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
13048     }
13049 }
13050 
13051 /* Output thunk to FILE that implements a C++ virtual function call (with
13052    multiple inheritance) to FUNCTION.  The thunk adjusts the this pointer
13053    by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
13054    stored at VCALL_OFFSET in the vtable whose address is located at offset 0
13055    relative to the resulting this pointer.  */
13056 
13057 static void
s390_output_mi_thunk(FILE * file,tree thunk ATTRIBUTE_UNUSED,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)13058 s390_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
13059 		      HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
13060 		      tree function)
13061 {
13062   const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
13063   rtx op[10];
13064   int nonlocal = 0;
13065 
13066   assemble_start_function (thunk, fnname);
13067   /* Make sure unwind info is emitted for the thunk if needed.  */
13068   final_start_function (emit_barrier (), file, 1);
13069 
13070   /* Operand 0 is the target function.  */
13071   op[0] = XEXP (DECL_RTL (function), 0);
13072   if (flag_pic && !SYMBOL_REF_LOCAL_P (op[0]))
13073     {
13074       nonlocal = 1;
13075       op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]),
13076 			      TARGET_64BIT ? UNSPEC_PLT : UNSPEC_GOT);
13077       op[0] = gen_rtx_CONST (Pmode, op[0]);
13078     }
13079 
13080   /* Operand 1 is the 'this' pointer.  */
13081   if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
13082     op[1] = gen_rtx_REG (Pmode, 3);
13083   else
13084     op[1] = gen_rtx_REG (Pmode, 2);
13085 
13086   /* Operand 2 is the delta.  */
13087   op[2] = GEN_INT (delta);
13088 
13089   /* Operand 3 is the vcall_offset.  */
13090   op[3] = GEN_INT (vcall_offset);
13091 
13092   /* Operand 4 is the temporary register.  */
13093   op[4] = gen_rtx_REG (Pmode, 1);
13094 
13095   /* Operands 5 to 8 can be used as labels.  */
13096   op[5] = NULL_RTX;
13097   op[6] = NULL_RTX;
13098   op[7] = NULL_RTX;
13099   op[8] = NULL_RTX;
13100 
13101   /* Operand 9 can be used for temporary register.  */
13102   op[9] = NULL_RTX;
13103 
13104   /* Generate code.  */
13105   if (TARGET_64BIT)
13106     {
13107       /* Setup literal pool pointer if required.  */
13108       if ((!DISP_IN_RANGE (delta)
13109 	   && !CONST_OK_FOR_K (delta)
13110 	   && !CONST_OK_FOR_Os (delta))
13111 	  || (!DISP_IN_RANGE (vcall_offset)
13112 	      && !CONST_OK_FOR_K (vcall_offset)
13113 	      && !CONST_OK_FOR_Os (vcall_offset)))
13114 	{
13115 	  op[5] = gen_label_rtx ();
13116 	  output_asm_insn ("larl\t%4,%5", op);
13117 	}
13118 
13119       /* Add DELTA to this pointer.  */
13120       if (delta)
13121 	{
13122 	  if (CONST_OK_FOR_J (delta))
13123 	    output_asm_insn ("la\t%1,%2(%1)", op);
13124 	  else if (DISP_IN_RANGE (delta))
13125 	    output_asm_insn ("lay\t%1,%2(%1)", op);
13126 	  else if (CONST_OK_FOR_K (delta))
13127 	    output_asm_insn ("aghi\t%1,%2", op);
13128 	  else if (CONST_OK_FOR_Os (delta))
13129 	    output_asm_insn ("agfi\t%1,%2", op);
13130 	  else
13131 	    {
13132 	      op[6] = gen_label_rtx ();
13133 	      output_asm_insn ("agf\t%1,%6-%5(%4)", op);
13134 	    }
13135 	}
13136 
13137       /* Perform vcall adjustment.  */
13138       if (vcall_offset)
13139 	{
13140 	  if (DISP_IN_RANGE (vcall_offset))
13141 	    {
13142 	      output_asm_insn ("lg\t%4,0(%1)", op);
13143 	      output_asm_insn ("ag\t%1,%3(%4)", op);
13144 	    }
13145 	  else if (CONST_OK_FOR_K (vcall_offset))
13146 	    {
13147 	      output_asm_insn ("lghi\t%4,%3", op);
13148 	      output_asm_insn ("ag\t%4,0(%1)", op);
13149 	      output_asm_insn ("ag\t%1,0(%4)", op);
13150 	    }
13151 	  else if (CONST_OK_FOR_Os (vcall_offset))
13152 	    {
13153 	      output_asm_insn ("lgfi\t%4,%3", op);
13154 	      output_asm_insn ("ag\t%4,0(%1)", op);
13155 	      output_asm_insn ("ag\t%1,0(%4)", op);
13156 	    }
13157 	  else
13158 	    {
13159 	      op[7] = gen_label_rtx ();
13160 	      output_asm_insn ("llgf\t%4,%7-%5(%4)", op);
13161 	      output_asm_insn ("ag\t%4,0(%1)", op);
13162 	      output_asm_insn ("ag\t%1,0(%4)", op);
13163 	    }
13164 	}
13165 
13166       /* Jump to target.  */
13167       output_asm_insn ("jg\t%0", op);
13168 
13169       /* Output literal pool if required.  */
13170       if (op[5])
13171 	{
13172 	  output_asm_insn (".align\t4", op);
13173 	  targetm.asm_out.internal_label (file, "L",
13174 					  CODE_LABEL_NUMBER (op[5]));
13175 	}
13176       if (op[6])
13177 	{
13178 	  targetm.asm_out.internal_label (file, "L",
13179 					  CODE_LABEL_NUMBER (op[6]));
13180 	  output_asm_insn (".long\t%2", op);
13181 	}
13182       if (op[7])
13183 	{
13184 	  targetm.asm_out.internal_label (file, "L",
13185 					  CODE_LABEL_NUMBER (op[7]));
13186 	  output_asm_insn (".long\t%3", op);
13187 	}
13188     }
13189   else
13190     {
13191       /* Setup base pointer if required.  */
13192       if (!vcall_offset
13193 	  || (!DISP_IN_RANGE (delta)
13194 	      && !CONST_OK_FOR_K (delta)
13195 	      && !CONST_OK_FOR_Os (delta))
13196 	  || (!DISP_IN_RANGE (delta)
13197 	      && !CONST_OK_FOR_K (vcall_offset)
13198 	      && !CONST_OK_FOR_Os (vcall_offset)))
13199 	{
13200 	  op[5] = gen_label_rtx ();
13201 	  output_asm_insn ("basr\t%4,0", op);
13202 	  targetm.asm_out.internal_label (file, "L",
13203 					  CODE_LABEL_NUMBER (op[5]));
13204 	}
13205 
13206       /* Add DELTA to this pointer.  */
13207       if (delta)
13208 	{
13209 	  if (CONST_OK_FOR_J (delta))
13210 	    output_asm_insn ("la\t%1,%2(%1)", op);
13211 	  else if (DISP_IN_RANGE (delta))
13212 	    output_asm_insn ("lay\t%1,%2(%1)", op);
13213 	  else if (CONST_OK_FOR_K (delta))
13214 	    output_asm_insn ("ahi\t%1,%2", op);
13215 	  else if (CONST_OK_FOR_Os (delta))
13216 	    output_asm_insn ("afi\t%1,%2", op);
13217 	  else
13218 	    {
13219 	      op[6] = gen_label_rtx ();
13220 	      output_asm_insn ("a\t%1,%6-%5(%4)", op);
13221 	    }
13222 	}
13223 
13224       /* Perform vcall adjustment.  */
13225       if (vcall_offset)
13226 	{
13227 	  if (CONST_OK_FOR_J (vcall_offset))
13228 	    {
13229 	      output_asm_insn ("l\t%4,0(%1)", op);
13230 	      output_asm_insn ("a\t%1,%3(%4)", op);
13231 	    }
13232 	  else if (DISP_IN_RANGE (vcall_offset))
13233 	    {
13234 	      output_asm_insn ("l\t%4,0(%1)", op);
13235 	      output_asm_insn ("ay\t%1,%3(%4)", op);
13236 	    }
13237 	  else if (CONST_OK_FOR_K (vcall_offset))
13238 	    {
13239 	      output_asm_insn ("lhi\t%4,%3", op);
13240 	      output_asm_insn ("a\t%4,0(%1)", op);
13241 	      output_asm_insn ("a\t%1,0(%4)", op);
13242 	    }
13243 	  else if (CONST_OK_FOR_Os (vcall_offset))
13244 	    {
13245 	      output_asm_insn ("iilf\t%4,%3", op);
13246 	      output_asm_insn ("a\t%4,0(%1)", op);
13247 	      output_asm_insn ("a\t%1,0(%4)", op);
13248 	    }
13249 	  else
13250 	    {
13251 	      op[7] = gen_label_rtx ();
13252 	      output_asm_insn ("l\t%4,%7-%5(%4)", op);
13253 	      output_asm_insn ("a\t%4,0(%1)", op);
13254 	      output_asm_insn ("a\t%1,0(%4)", op);
13255 	    }
13256 
13257 	  /* We had to clobber the base pointer register.
13258 	     Re-setup the base pointer (with a different base).  */
13259 	  op[5] = gen_label_rtx ();
13260 	  output_asm_insn ("basr\t%4,0", op);
13261 	  targetm.asm_out.internal_label (file, "L",
13262 					  CODE_LABEL_NUMBER (op[5]));
13263 	}
13264 
13265       /* Jump to target.  */
13266       op[8] = gen_label_rtx ();
13267 
13268       if (!flag_pic)
13269 	output_asm_insn ("l\t%4,%8-%5(%4)", op);
13270       else if (!nonlocal)
13271 	output_asm_insn ("a\t%4,%8-%5(%4)", op);
13272       /* We cannot call through .plt, since .plt requires %r12 loaded.  */
13273       else if (flag_pic == 1)
13274 	{
13275 	  output_asm_insn ("a\t%4,%8-%5(%4)", op);
13276 	  output_asm_insn ("l\t%4,%0(%4)", op);
13277 	}
13278       else if (flag_pic == 2)
13279 	{
13280 	  op[9] = gen_rtx_REG (Pmode, 0);
13281 	  output_asm_insn ("l\t%9,%8-4-%5(%4)", op);
13282 	  output_asm_insn ("a\t%4,%8-%5(%4)", op);
13283 	  output_asm_insn ("ar\t%4,%9", op);
13284 	  output_asm_insn ("l\t%4,0(%4)", op);
13285 	}
13286 
13287       output_asm_insn ("br\t%4", op);
13288 
13289       /* Output literal pool.  */
13290       output_asm_insn (".align\t4", op);
13291 
13292       if (nonlocal && flag_pic == 2)
13293 	output_asm_insn (".long\t%0", op);
13294       if (nonlocal)
13295 	{
13296 	  op[0] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
13297 	  SYMBOL_REF_FLAGS (op[0]) = SYMBOL_FLAG_LOCAL;
13298 	}
13299 
13300       targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[8]));
13301       if (!flag_pic)
13302 	output_asm_insn (".long\t%0", op);
13303       else
13304 	output_asm_insn (".long\t%0-%5", op);
13305 
13306       if (op[6])
13307 	{
13308 	  targetm.asm_out.internal_label (file, "L",
13309 					  CODE_LABEL_NUMBER (op[6]));
13310 	  output_asm_insn (".long\t%2", op);
13311 	}
13312       if (op[7])
13313 	{
13314 	  targetm.asm_out.internal_label (file, "L",
13315 					  CODE_LABEL_NUMBER (op[7]));
13316 	  output_asm_insn (".long\t%3", op);
13317 	}
13318     }
13319   final_end_function ();
13320   assemble_end_function (thunk, fnname);
13321 }
13322 
13323 /* Output either an indirect jump or an indirect call
13324    (RETURN_ADDR_REGNO != INVALID_REGNUM) with target register REGNO
13325    using a branch trampoline disabling branch target prediction.  */
13326 
13327 void
s390_indirect_branch_via_thunk(unsigned int regno,unsigned int return_addr_regno,rtx comparison_operator,enum s390_indirect_branch_type type)13328 s390_indirect_branch_via_thunk (unsigned int regno,
13329 				unsigned int return_addr_regno,
13330 				rtx comparison_operator,
13331 				enum s390_indirect_branch_type type)
13332 {
13333   enum s390_indirect_branch_option option;
13334 
13335   if (type == s390_indirect_branch_type_return)
13336     {
13337       if (s390_return_addr_from_memory ())
13338 	option = s390_opt_function_return_mem;
13339       else
13340 	option = s390_opt_function_return_reg;
13341     }
13342   else if (type == s390_indirect_branch_type_jump)
13343     option = s390_opt_indirect_branch_jump;
13344   else if (type == s390_indirect_branch_type_call)
13345     option = s390_opt_indirect_branch_call;
13346   else
13347     gcc_unreachable ();
13348 
13349   if (TARGET_INDIRECT_BRANCH_TABLE)
13350     {
13351       char label[32];
13352 
13353       ASM_GENERATE_INTERNAL_LABEL (label,
13354 				   indirect_branch_table_label[option],
13355 				   indirect_branch_table_label_no[option]++);
13356       ASM_OUTPUT_LABEL (asm_out_file, label);
13357     }
13358 
13359   if (return_addr_regno != INVALID_REGNUM)
13360     {
13361       gcc_assert (comparison_operator == NULL_RTX);
13362       fprintf (asm_out_file, " \tbrasl\t%%r%d,", return_addr_regno);
13363     }
13364   else
13365     {
13366       fputs (" \tjg", asm_out_file);
13367       if (comparison_operator != NULL_RTX)
13368 	print_operand (asm_out_file, comparison_operator, 'C');
13369 
13370       fputs ("\t", asm_out_file);
13371     }
13372 
13373   if (TARGET_CPU_Z10)
13374     fprintf (asm_out_file,
13375 	     TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL "\n",
13376 	     regno);
13377   else
13378     fprintf (asm_out_file,
13379 	     TARGET_INDIRECT_BRANCH_THUNK_NAME_EX "\n",
13380 	     INDIRECT_BRANCH_THUNK_REGNUM, regno);
13381 
13382   if ((option == s390_opt_indirect_branch_jump
13383        && cfun->machine->indirect_branch_jump == indirect_branch_thunk)
13384       || (option == s390_opt_indirect_branch_call
13385 	  && cfun->machine->indirect_branch_call == indirect_branch_thunk)
13386       || (option == s390_opt_function_return_reg
13387 	  && cfun->machine->function_return_reg == indirect_branch_thunk)
13388       || (option == s390_opt_function_return_mem
13389 	  && cfun->machine->function_return_mem == indirect_branch_thunk))
13390     {
13391       if (TARGET_CPU_Z10)
13392 	indirect_branch_z10thunk_mask |= (1 << regno);
13393       else
13394 	indirect_branch_prez10thunk_mask |= (1 << regno);
13395     }
13396 }
13397 
13398 /* Output an inline thunk for indirect jumps.  EXECUTE_TARGET can
13399    either be an address register or a label pointing to the location
13400    of the jump instruction.  */
13401 
13402 void
s390_indirect_branch_via_inline_thunk(rtx execute_target)13403 s390_indirect_branch_via_inline_thunk (rtx execute_target)
13404 {
13405   if (TARGET_INDIRECT_BRANCH_TABLE)
13406     {
13407       char label[32];
13408 
13409       ASM_GENERATE_INTERNAL_LABEL (label,
13410 				   indirect_branch_table_label[s390_opt_indirect_branch_jump],
13411 				   indirect_branch_table_label_no[s390_opt_indirect_branch_jump]++);
13412       ASM_OUTPUT_LABEL (asm_out_file, label);
13413     }
13414 
13415   if (!TARGET_ZARCH)
13416     fputs ("\t.machinemode zarch\n", asm_out_file);
13417 
13418   if (REG_P (execute_target))
13419     fprintf (asm_out_file, "\tex\t%%r0,0(%%r%d)\n", REGNO (execute_target));
13420   else
13421     output_asm_insn ("\texrl\t%%r0,%0", &execute_target);
13422 
13423   if (!TARGET_ZARCH)
13424     fputs ("\t.machinemode esa\n", asm_out_file);
13425 
13426   fputs ("0:\tj\t0b\n", asm_out_file);
13427 }
13428 
13429 static bool
s390_valid_pointer_mode(scalar_int_mode mode)13430 s390_valid_pointer_mode (scalar_int_mode mode)
13431 {
13432   return (mode == SImode || (TARGET_64BIT && mode == DImode));
13433 }
13434 
13435 /* Checks whether the given CALL_EXPR would use a caller
13436    saved register.  This is used to decide whether sibling call
13437    optimization could be performed on the respective function
13438    call.  */
13439 
13440 static bool
s390_call_saved_register_used(tree call_expr)13441 s390_call_saved_register_used (tree call_expr)
13442 {
13443   CUMULATIVE_ARGS cum_v;
13444   cumulative_args_t cum;
13445   tree parameter;
13446   rtx parm_rtx;
13447   int reg, i;
13448 
13449   INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
13450   cum = pack_cumulative_args (&cum_v);
13451 
13452   for (i = 0; i < call_expr_nargs (call_expr); i++)
13453     {
13454       parameter = CALL_EXPR_ARG (call_expr, i);
13455       gcc_assert (parameter);
13456 
13457       /* For an undeclared variable passed as parameter we will get
13458 	 an ERROR_MARK node here.  */
13459       if (TREE_CODE (parameter) == ERROR_MARK)
13460 	return true;
13461 
13462       /* We assume that in the target function all parameters are
13463 	 named.  This only has an impact on vector argument register
13464 	 usage none of which is call-saved.  */
13465       function_arg_info arg (TREE_TYPE (parameter), /*named=*/true);
13466       apply_pass_by_reference_rules (&cum_v, arg);
13467 
13468        parm_rtx = s390_function_arg (cum, arg);
13469 
13470        s390_function_arg_advance (cum, arg);
13471 
13472        if (!parm_rtx)
13473 	 continue;
13474 
13475        if (REG_P (parm_rtx))
13476 	 {
13477 	   for (reg = 0; reg < REG_NREGS (parm_rtx); reg++)
13478 	     if (!call_used_or_fixed_reg_p (reg + REGNO (parm_rtx)))
13479 	       return true;
13480 	 }
13481 
13482        if (GET_CODE (parm_rtx) == PARALLEL)
13483 	 {
13484 	   int i;
13485 
13486 	   for (i = 0; i < XVECLEN (parm_rtx, 0); i++)
13487 	     {
13488 	       rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0);
13489 
13490 	       gcc_assert (REG_P (r));
13491 
13492 	       for (reg = 0; reg < REG_NREGS (r); reg++)
13493 		 if (!call_used_or_fixed_reg_p (reg + REGNO (r)))
13494 		   return true;
13495 	     }
13496 	 }
13497 
13498     }
13499   return false;
13500 }
13501 
13502 /* Return true if the given call expression can be
13503    turned into a sibling call.
13504    DECL holds the declaration of the function to be called whereas
13505    EXP is the call expression itself.  */
13506 
13507 static bool
s390_function_ok_for_sibcall(tree decl,tree exp)13508 s390_function_ok_for_sibcall (tree decl, tree exp)
13509 {
13510   /* The TPF epilogue uses register 1.  */
13511   if (TARGET_TPF_PROFILING)
13512     return false;
13513 
13514   /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
13515      which would have to be restored before the sibcall.  */
13516   if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl))
13517     return false;
13518 
13519   /* The thunks for indirect branches require r1 if no exrl is
13520      available.  r1 might not be available when doing a sibling
13521      call.  */
13522   if (TARGET_INDIRECT_BRANCH_NOBP_CALL
13523       && !TARGET_CPU_Z10
13524       && !decl)
13525     return false;
13526 
13527   /* Register 6 on s390 is available as an argument register but unfortunately
13528      "caller saved". This makes functions needing this register for arguments
13529      not suitable for sibcalls.  */
13530   return !s390_call_saved_register_used (exp);
13531 }
13532 
13533 /* Return the fixed registers used for condition codes.  */
13534 
13535 static bool
s390_fixed_condition_code_regs(unsigned int * p1,unsigned int * p2)13536 s390_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
13537 {
13538   *p1 = CC_REGNUM;
13539   *p2 = INVALID_REGNUM;
13540 
13541   return true;
13542 }
13543 
13544 /* This function is used by the call expanders of the machine description.
13545    It emits the call insn itself together with the necessary operations
13546    to adjust the target address and returns the emitted insn.
13547    ADDR_LOCATION is the target address rtx
13548    TLS_CALL the location of the thread-local symbol
13549    RESULT_REG the register where the result of the call should be stored
13550    RETADDR_REG the register where the return address should be stored
13551 	       If this parameter is NULL_RTX the call is considered
13552 	       to be a sibling call.  */
13553 
13554 rtx_insn *
s390_emit_call(rtx addr_location,rtx tls_call,rtx result_reg,rtx retaddr_reg)13555 s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg,
13556 		rtx retaddr_reg)
13557 {
13558   bool plt_call = false;
13559   rtx_insn *insn;
13560   rtx vec[4] = { NULL_RTX };
13561   int elts = 0;
13562   rtx *call = &vec[0];
13563   rtx *clobber_ret_reg = &vec[1];
13564   rtx *use = &vec[2];
13565   rtx *clobber_thunk_reg = &vec[3];
13566   int i;
13567 
13568   /* Direct function calls need special treatment.  */
13569   if (GET_CODE (addr_location) == SYMBOL_REF)
13570     {
13571       /* When calling a global routine in PIC mode, we must
13572 	 replace the symbol itself with the PLT stub.  */
13573       if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location))
13574 	{
13575 	  if (TARGET_64BIT || retaddr_reg != NULL_RTX)
13576 	    {
13577 	      addr_location = gen_rtx_UNSPEC (Pmode,
13578 					      gen_rtvec (1, addr_location),
13579 					      UNSPEC_PLT);
13580 	      addr_location = gen_rtx_CONST (Pmode, addr_location);
13581 	      plt_call = true;
13582 	    }
13583 	  else
13584 	    /* For -fpic code the PLT entries might use r12 which is
13585 	       call-saved.  Therefore we cannot do a sibcall when
13586 	       calling directly using a symbol ref.  When reaching
13587 	       this point we decided (in s390_function_ok_for_sibcall)
13588 	       to do a sibcall for a function pointer but one of the
13589 	       optimizers was able to get rid of the function pointer
13590 	       by propagating the symbol ref into the call.  This
13591 	       optimization is illegal for S/390 so we turn the direct
13592 	       call into a indirect call again.  */
13593 	    addr_location = force_reg (Pmode, addr_location);
13594 	}
13595     }
13596 
13597   /* If it is already an indirect call or the code above moved the
13598      SYMBOL_REF to somewhere else make sure the address can be found in
13599      register 1.  */
13600   if (retaddr_reg == NULL_RTX
13601       && GET_CODE (addr_location) != SYMBOL_REF
13602       && !plt_call)
13603     {
13604       emit_move_insn (gen_rtx_REG (Pmode, SIBCALL_REGNUM), addr_location);
13605       addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM);
13606     }
13607 
13608   if (TARGET_INDIRECT_BRANCH_NOBP_CALL
13609       && GET_CODE (addr_location) != SYMBOL_REF
13610       && !plt_call)
13611     {
13612       /* Indirect branch thunks require the target to be a single GPR.  */
13613       addr_location = force_reg (Pmode, addr_location);
13614 
13615       /* Without exrl the indirect branch thunks need an additional
13616 	 register for larl;ex */
13617       if (!TARGET_CPU_Z10)
13618 	{
13619 	  *clobber_thunk_reg = gen_rtx_REG (Pmode, INDIRECT_BRANCH_THUNK_REGNUM);
13620 	  *clobber_thunk_reg = gen_rtx_CLOBBER (VOIDmode, *clobber_thunk_reg);
13621 	}
13622     }
13623 
13624   addr_location = gen_rtx_MEM (QImode, addr_location);
13625   *call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx);
13626 
13627   if (result_reg != NULL_RTX)
13628     *call = gen_rtx_SET (result_reg, *call);
13629 
13630   if (retaddr_reg != NULL_RTX)
13631     {
13632       *clobber_ret_reg = gen_rtx_CLOBBER (VOIDmode, retaddr_reg);
13633 
13634       if (tls_call != NULL_RTX)
13635 	*use = gen_rtx_USE (VOIDmode, tls_call);
13636     }
13637 
13638 
13639   for (i = 0; i < 4; i++)
13640     if (vec[i] != NULL_RTX)
13641       elts++;
13642 
13643   if (elts > 1)
13644     {
13645       rtvec v;
13646       int e = 0;
13647 
13648       v = rtvec_alloc (elts);
13649       for (i = 0; i < 4; i++)
13650 	if (vec[i] != NULL_RTX)
13651 	  {
13652 	    RTVEC_ELT (v, e) = vec[i];
13653 	    e++;
13654 	  }
13655 
13656       *call = gen_rtx_PARALLEL (VOIDmode, v);
13657     }
13658 
13659   insn = emit_call_insn (*call);
13660 
13661   /* 31-bit PLT stubs and tls calls use the GOT register implicitly.  */
13662   if ((!TARGET_64BIT && plt_call) || tls_call != NULL_RTX)
13663     {
13664       /* s390_function_ok_for_sibcall should
13665 	 have denied sibcalls in this case.  */
13666       gcc_assert (retaddr_reg != NULL_RTX);
13667       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, 12));
13668     }
13669   return insn;
13670 }
13671 
13672 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE.  */
13673 
13674 static void
s390_conditional_register_usage(void)13675 s390_conditional_register_usage (void)
13676 {
13677   int i;
13678 
13679   if (flag_pic)
13680     fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13681   fixed_regs[BASE_REGNUM] = 0;
13682   fixed_regs[RETURN_REGNUM] = 0;
13683   if (TARGET_64BIT)
13684     {
13685       for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
13686 	call_used_regs[i] = 0;
13687     }
13688   else
13689     {
13690       call_used_regs[FPR4_REGNUM] = 0;
13691       call_used_regs[FPR6_REGNUM] = 0;
13692     }
13693 
13694   if (TARGET_SOFT_FLOAT)
13695     {
13696       for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
13697 	fixed_regs[i] = 1;
13698     }
13699 
13700   /* Disable v16 - v31 for non-vector target.  */
13701   if (!TARGET_VX)
13702     {
13703       for (i = VR16_REGNUM; i <= VR31_REGNUM; i++)
13704 	fixed_regs[i] = call_used_regs[i] = 1;
13705     }
13706 }
13707 
13708 /* Corresponding function to eh_return expander.  */
13709 
13710 static GTY(()) rtx s390_tpf_eh_return_symbol;
13711 void
s390_emit_tpf_eh_return(rtx target)13712 s390_emit_tpf_eh_return (rtx target)
13713 {
13714   rtx_insn *insn;
13715   rtx reg, orig_ra;
13716 
13717   if (!s390_tpf_eh_return_symbol)
13718     s390_tpf_eh_return_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tpf_eh_return");
13719 
13720   reg = gen_rtx_REG (Pmode, 2);
13721   orig_ra = gen_rtx_REG (Pmode, 3);
13722 
13723   emit_move_insn (reg, target);
13724   emit_move_insn (orig_ra, get_hard_reg_initial_val (Pmode, RETURN_REGNUM));
13725   insn = s390_emit_call (s390_tpf_eh_return_symbol, NULL_RTX, reg,
13726 				     gen_rtx_REG (Pmode, RETURN_REGNUM));
13727   use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
13728   use_reg (&CALL_INSN_FUNCTION_USAGE (insn), orig_ra);
13729 
13730   emit_move_insn (EH_RETURN_HANDLER_RTX, reg);
13731 }
13732 
13733 /* Rework the prologue/epilogue to avoid saving/restoring
13734    registers unnecessarily.  */
13735 
13736 static void
s390_optimize_prologue(void)13737 s390_optimize_prologue (void)
13738 {
13739   rtx_insn *insn, *new_insn, *next_insn;
13740 
13741   /* Do a final recompute of the frame-related data.  */
13742   s390_optimize_register_info ();
13743 
13744   /* If all special registers are in fact used, there's nothing we
13745      can do, so no point in walking the insn list.  */
13746 
13747   if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM
13748       && cfun_frame_layout.last_save_gpr >= BASE_REGNUM)
13749     return;
13750 
13751   /* Search for prologue/epilogue insns and replace them.  */
13752   for (insn = get_insns (); insn; insn = next_insn)
13753     {
13754       int first, last, off;
13755       rtx set, base, offset;
13756       rtx pat;
13757 
13758       next_insn = NEXT_INSN (insn);
13759 
13760       if (! NONJUMP_INSN_P (insn) || ! RTX_FRAME_RELATED_P (insn))
13761 	continue;
13762 
13763       pat = PATTERN (insn);
13764 
13765       /* Remove ldgr/lgdr instructions used for saving and restore
13766 	 GPRs if possible.  */
13767       if (TARGET_Z10)
13768 	{
13769 	  rtx tmp_pat = pat;
13770 
13771 	  if (INSN_CODE (insn) == CODE_FOR_stack_restore_from_fpr)
13772 	    tmp_pat = XVECEXP (pat, 0, 0);
13773 
13774 	  if (GET_CODE (tmp_pat) == SET
13775 	      && GET_MODE (SET_SRC (tmp_pat)) == DImode
13776 	      && REG_P (SET_SRC (tmp_pat))
13777 	      && REG_P (SET_DEST (tmp_pat)))
13778 	    {
13779 	      int src_regno = REGNO (SET_SRC (tmp_pat));
13780 	      int dest_regno = REGNO (SET_DEST (tmp_pat));
13781 	      int gpr_regno;
13782 	      int fpr_regno;
13783 
13784 	      if (!((GENERAL_REGNO_P (src_regno)
13785 		     && FP_REGNO_P (dest_regno))
13786 		    || (FP_REGNO_P (src_regno)
13787 			&& GENERAL_REGNO_P (dest_regno))))
13788 		continue;
13789 
13790 	      gpr_regno = GENERAL_REGNO_P (src_regno) ? src_regno : dest_regno;
13791 	      fpr_regno = FP_REGNO_P (src_regno) ? src_regno : dest_regno;
13792 
13793 	      /* GPR must be call-saved, FPR must be call-clobbered.  */
13794 	      if (!call_used_regs[fpr_regno]
13795 		  || call_used_regs[gpr_regno])
13796 		continue;
13797 
13798 	      /* It must not happen that what we once saved in an FPR now
13799 		 needs a stack slot.  */
13800 	      gcc_assert (cfun_gpr_save_slot (gpr_regno) != SAVE_SLOT_STACK);
13801 
13802 	      if (cfun_gpr_save_slot (gpr_regno) == SAVE_SLOT_NONE)
13803 		{
13804 		  remove_insn (insn);
13805 		  continue;
13806 		}
13807 	    }
13808 	}
13809 
13810       if (GET_CODE (pat) == PARALLEL
13811 	  && store_multiple_operation (pat, VOIDmode))
13812 	{
13813 	  set = XVECEXP (pat, 0, 0);
13814 	  first = REGNO (SET_SRC (set));
13815 	  last = first + XVECLEN (pat, 0) - 1;
13816 	  offset = const0_rtx;
13817 	  base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
13818 	  off = INTVAL (offset);
13819 
13820 	  if (GET_CODE (base) != REG || off < 0)
13821 	    continue;
13822 	  if (cfun_frame_layout.first_save_gpr != -1
13823 	      && (cfun_frame_layout.first_save_gpr < first
13824 		  || cfun_frame_layout.last_save_gpr > last))
13825 	    continue;
13826 	  if (REGNO (base) != STACK_POINTER_REGNUM
13827 	      && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13828 	    continue;
13829 	  if (first > BASE_REGNUM || last < BASE_REGNUM)
13830 	    continue;
13831 
13832 	  if (cfun_frame_layout.first_save_gpr != -1)
13833 	    {
13834 	      rtx s_pat = save_gprs (base,
13835 				     off + (cfun_frame_layout.first_save_gpr
13836 					    - first) * UNITS_PER_LONG,
13837 				     cfun_frame_layout.first_save_gpr,
13838 				     cfun_frame_layout.last_save_gpr);
13839 	      new_insn = emit_insn_before (s_pat, insn);
13840 	      INSN_ADDRESSES_NEW (new_insn, -1);
13841 	    }
13842 
13843 	  remove_insn (insn);
13844 	  continue;
13845 	}
13846 
13847       if (cfun_frame_layout.first_save_gpr == -1
13848 	  && GET_CODE (pat) == SET
13849 	  && GENERAL_REG_P (SET_SRC (pat))
13850 	  && GET_CODE (SET_DEST (pat)) == MEM)
13851 	{
13852 	  set = pat;
13853 	  first = REGNO (SET_SRC (set));
13854 	  offset = const0_rtx;
13855 	  base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
13856 	  off = INTVAL (offset);
13857 
13858 	  if (GET_CODE (base) != REG || off < 0)
13859 	    continue;
13860 	  if (REGNO (base) != STACK_POINTER_REGNUM
13861 	      && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13862 	    continue;
13863 
13864 	  remove_insn (insn);
13865 	  continue;
13866 	}
13867 
13868       if (GET_CODE (pat) == PARALLEL
13869 	  && load_multiple_operation (pat, VOIDmode))
13870 	{
13871 	  set = XVECEXP (pat, 0, 0);
13872 	  first = REGNO (SET_DEST (set));
13873 	  last = first + XVECLEN (pat, 0) - 1;
13874 	  offset = const0_rtx;
13875 	  base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
13876 	  off = INTVAL (offset);
13877 
13878 	  if (GET_CODE (base) != REG || off < 0)
13879 	    continue;
13880 
13881 	  if (cfun_frame_layout.first_restore_gpr != -1
13882 	      && (cfun_frame_layout.first_restore_gpr < first
13883 		  || cfun_frame_layout.last_restore_gpr > last))
13884 	    continue;
13885 	  if (REGNO (base) != STACK_POINTER_REGNUM
13886 	      && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13887 	    continue;
13888 	  if (first > BASE_REGNUM || last < BASE_REGNUM)
13889 	    continue;
13890 
13891 	  if (cfun_frame_layout.first_restore_gpr != -1)
13892 	    {
13893 	      rtx rpat = restore_gprs (base,
13894 				       off + (cfun_frame_layout.first_restore_gpr
13895 					      - first) * UNITS_PER_LONG,
13896 				       cfun_frame_layout.first_restore_gpr,
13897 				       cfun_frame_layout.last_restore_gpr);
13898 
13899 	      /* Remove REG_CFA_RESTOREs for registers that we no
13900 		 longer need to save.  */
13901 	      REG_NOTES (rpat) = REG_NOTES (insn);
13902 	      for (rtx *ptr = &REG_NOTES (rpat); *ptr; )
13903 		if (REG_NOTE_KIND (*ptr) == REG_CFA_RESTORE
13904 		    && ((int) REGNO (XEXP (*ptr, 0))
13905 			< cfun_frame_layout.first_restore_gpr))
13906 		  *ptr = XEXP (*ptr, 1);
13907 		else
13908 		  ptr = &XEXP (*ptr, 1);
13909 	      new_insn = emit_insn_before (rpat, insn);
13910 	      RTX_FRAME_RELATED_P (new_insn) = 1;
13911 	      INSN_ADDRESSES_NEW (new_insn, -1);
13912 	    }
13913 
13914 	  remove_insn (insn);
13915 	  continue;
13916 	}
13917 
13918       if (cfun_frame_layout.first_restore_gpr == -1
13919 	  && GET_CODE (pat) == SET
13920 	  && GENERAL_REG_P (SET_DEST (pat))
13921 	  && GET_CODE (SET_SRC (pat)) == MEM)
13922 	{
13923 	  set = pat;
13924 	  first = REGNO (SET_DEST (set));
13925 	  offset = const0_rtx;
13926 	  base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
13927 	  off = INTVAL (offset);
13928 
13929 	  if (GET_CODE (base) != REG || off < 0)
13930 	    continue;
13931 
13932 	  if (REGNO (base) != STACK_POINTER_REGNUM
13933 	      && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13934 	    continue;
13935 
13936 	  remove_insn (insn);
13937 	  continue;
13938 	}
13939     }
13940 }
13941 
13942 /* On z10 and later the dynamic branch prediction must see the
13943    backward jump within a certain windows.  If not it falls back to
13944    the static prediction.  This function rearranges the loop backward
13945    branch in a way which makes the static prediction always correct.
13946    The function returns true if it added an instruction.  */
13947 static bool
s390_fix_long_loop_prediction(rtx_insn * insn)13948 s390_fix_long_loop_prediction (rtx_insn *insn)
13949 {
13950   rtx set = single_set (insn);
13951   rtx code_label, label_ref;
13952   rtx_insn *uncond_jump;
13953   rtx_insn *cur_insn;
13954   rtx tmp;
13955   int distance;
13956 
13957   /* This will exclude branch on count and branch on index patterns
13958      since these are correctly statically predicted.  */
13959   if (!set
13960       || SET_DEST (set) != pc_rtx
13961       || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
13962     return false;
13963 
13964   /* Skip conditional returns.  */
13965   if (ANY_RETURN_P (XEXP (SET_SRC (set), 1))
13966       && XEXP (SET_SRC (set), 2) == pc_rtx)
13967     return false;
13968 
13969   label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
13970 	       XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
13971 
13972   gcc_assert (GET_CODE (label_ref) == LABEL_REF);
13973 
13974   code_label = XEXP (label_ref, 0);
13975 
13976   if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
13977       || INSN_ADDRESSES (INSN_UID (insn)) == -1
13978       || (INSN_ADDRESSES (INSN_UID (insn))
13979 	  - INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE))
13980     return false;
13981 
13982   for (distance = 0, cur_insn = PREV_INSN (insn);
13983        distance < PREDICT_DISTANCE - 6;
13984        distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
13985     if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
13986       return false;
13987 
13988   rtx_code_label *new_label = gen_label_rtx ();
13989   uncond_jump = emit_jump_insn_after (
13990 		  gen_rtx_SET (pc_rtx,
13991 			       gen_rtx_LABEL_REF (VOIDmode, code_label)),
13992 		  insn);
13993   emit_label_after (new_label, uncond_jump);
13994 
13995   tmp = XEXP (SET_SRC (set), 1);
13996   XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
13997   XEXP (SET_SRC (set), 2) = tmp;
13998   INSN_CODE (insn) = -1;
13999 
14000   XEXP (label_ref, 0) = new_label;
14001   JUMP_LABEL (insn) = new_label;
14002   JUMP_LABEL (uncond_jump) = code_label;
14003 
14004   return true;
14005 }
14006 
14007 /* Returns 1 if INSN reads the value of REG for purposes not related
14008    to addressing of memory, and 0 otherwise.  */
14009 static int
s390_non_addr_reg_read_p(rtx reg,rtx_insn * insn)14010 s390_non_addr_reg_read_p (rtx reg, rtx_insn *insn)
14011 {
14012   return reg_referenced_p (reg, PATTERN (insn))
14013     && !reg_used_in_mem_p (REGNO (reg), PATTERN (insn));
14014 }
14015 
14016 /* Starting from INSN find_cond_jump looks downwards in the insn
14017    stream for a single jump insn which is the last user of the
14018    condition code set in INSN.  */
14019 static rtx_insn *
find_cond_jump(rtx_insn * insn)14020 find_cond_jump (rtx_insn *insn)
14021 {
14022   for (; insn; insn = NEXT_INSN (insn))
14023     {
14024       rtx ite, cc;
14025 
14026       if (LABEL_P (insn))
14027 	break;
14028 
14029       if (!JUMP_P (insn))
14030 	{
14031 	  if (reg_mentioned_p (gen_rtx_REG (CCmode, CC_REGNUM), insn))
14032 	    break;
14033 	  continue;
14034 	}
14035 
14036       /* This will be triggered by a return.  */
14037       if (GET_CODE (PATTERN (insn)) != SET)
14038 	break;
14039 
14040       gcc_assert (SET_DEST (PATTERN (insn)) == pc_rtx);
14041       ite = SET_SRC (PATTERN (insn));
14042 
14043       if (GET_CODE (ite) != IF_THEN_ELSE)
14044 	break;
14045 
14046       cc = XEXP (XEXP (ite, 0), 0);
14047       if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)))
14048 	break;
14049 
14050       if (find_reg_note (insn, REG_DEAD, cc))
14051 	return insn;
14052       break;
14053     }
14054 
14055   return NULL;
14056 }
14057 
14058 /* Swap the condition in COND and the operands in OP0 and OP1 so that
14059    the semantics does not change.  If NULL_RTX is passed as COND the
14060    function tries to find the conditional jump starting with INSN.  */
14061 static void
s390_swap_cmp(rtx cond,rtx * op0,rtx * op1,rtx_insn * insn)14062 s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx_insn *insn)
14063 {
14064   rtx tmp = *op0;
14065 
14066   if (cond == NULL_RTX)
14067     {
14068       rtx_insn *jump = find_cond_jump (NEXT_INSN (insn));
14069       rtx set = jump ? single_set (jump) : NULL_RTX;
14070 
14071       if (set == NULL_RTX)
14072 	return;
14073 
14074       cond = XEXP (SET_SRC (set), 0);
14075     }
14076 
14077   *op0 = *op1;
14078   *op1 = tmp;
14079   PUT_CODE (cond, swap_condition (GET_CODE (cond)));
14080 }
14081 
14082 /* On z10, instructions of the compare-and-branch family have the
14083    property to access the register occurring as second operand with
14084    its bits complemented.  If such a compare is grouped with a second
14085    instruction that accesses the same register non-complemented, and
14086    if that register's value is delivered via a bypass, then the
14087    pipeline recycles, thereby causing significant performance decline.
14088    This function locates such situations and exchanges the two
14089    operands of the compare.  The function return true whenever it
14090    added an insn.  */
14091 static bool
s390_z10_optimize_cmp(rtx_insn * insn)14092 s390_z10_optimize_cmp (rtx_insn *insn)
14093 {
14094   rtx_insn *prev_insn, *next_insn;
14095   bool insn_added_p = false;
14096   rtx cond, *op0, *op1;
14097 
14098   if (GET_CODE (PATTERN (insn)) == PARALLEL)
14099     {
14100       /* Handle compare and branch and branch on count
14101 	 instructions.  */
14102       rtx pattern = single_set (insn);
14103 
14104       if (!pattern
14105 	  || SET_DEST (pattern) != pc_rtx
14106 	  || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
14107 	return false;
14108 
14109       cond = XEXP (SET_SRC (pattern), 0);
14110       op0 = &XEXP (cond, 0);
14111       op1 = &XEXP (cond, 1);
14112     }
14113   else if (GET_CODE (PATTERN (insn)) == SET)
14114     {
14115       rtx src, dest;
14116 
14117       /* Handle normal compare instructions.  */
14118       src = SET_SRC (PATTERN (insn));
14119       dest = SET_DEST (PATTERN (insn));
14120 
14121       if (!REG_P (dest)
14122 	  || !CC_REGNO_P (REGNO (dest))
14123 	  || GET_CODE (src) != COMPARE)
14124 	return false;
14125 
14126       /* s390_swap_cmp will try to find the conditional
14127 	 jump when passing NULL_RTX as condition.  */
14128       cond = NULL_RTX;
14129       op0 = &XEXP (src, 0);
14130       op1 = &XEXP (src, 1);
14131     }
14132   else
14133     return false;
14134 
14135   if (!REG_P (*op0) || !REG_P (*op1))
14136     return false;
14137 
14138   if (GET_MODE_CLASS (GET_MODE (*op0)) != MODE_INT)
14139     return false;
14140 
14141   /* Swap the COMPARE arguments and its mask if there is a
14142      conflicting access in the previous insn.  */
14143   prev_insn = prev_active_insn (insn);
14144   if (prev_insn != NULL_RTX && INSN_P (prev_insn)
14145       && reg_referenced_p (*op1, PATTERN (prev_insn)))
14146     s390_swap_cmp (cond, op0, op1, insn);
14147 
14148   /* Check if there is a conflict with the next insn. If there
14149      was no conflict with the previous insn, then swap the
14150      COMPARE arguments and its mask.  If we already swapped
14151      the operands, or if swapping them would cause a conflict
14152      with the previous insn, issue a NOP after the COMPARE in
14153      order to separate the two instuctions.  */
14154   next_insn = next_active_insn (insn);
14155   if (next_insn != NULL_RTX && INSN_P (next_insn)
14156       && s390_non_addr_reg_read_p (*op1, next_insn))
14157     {
14158       if (prev_insn != NULL_RTX && INSN_P (prev_insn)
14159 	  && s390_non_addr_reg_read_p (*op0, prev_insn))
14160 	{
14161 	  if (REGNO (*op1) == 0)
14162 	    emit_insn_after (gen_nop_lr1 (), insn);
14163 	  else
14164 	    emit_insn_after (gen_nop_lr0 (), insn);
14165 	  insn_added_p = true;
14166 	}
14167       else
14168 	s390_swap_cmp (cond, op0, op1, insn);
14169     }
14170   return insn_added_p;
14171 }
14172 
14173 /* Number of INSNs to be scanned backward in the last BB of the loop
14174    and forward in the first BB of the loop.  This usually should be a
14175    bit more than the number of INSNs which could go into one
14176    group.  */
14177 #define S390_OSC_SCAN_INSN_NUM 5
14178 
14179 /* Scan LOOP for static OSC collisions and return true if a osc_break
14180    should be issued for this loop.  */
14181 static bool
s390_adjust_loop_scan_osc(struct loop * loop)14182 s390_adjust_loop_scan_osc (struct loop* loop)
14183 
14184 {
14185   HARD_REG_SET modregs, newregs;
14186   rtx_insn *insn, *store_insn = NULL;
14187   rtx set;
14188   struct s390_address addr_store, addr_load;
14189   subrtx_iterator::array_type array;
14190   int insn_count;
14191 
14192   CLEAR_HARD_REG_SET (modregs);
14193 
14194   insn_count = 0;
14195   FOR_BB_INSNS_REVERSE (loop->latch, insn)
14196     {
14197       if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14198 	continue;
14199 
14200       insn_count++;
14201       if (insn_count > S390_OSC_SCAN_INSN_NUM)
14202 	return false;
14203 
14204       find_all_hard_reg_sets (insn, &newregs, true);
14205       modregs |= newregs;
14206 
14207       set = single_set (insn);
14208       if (!set)
14209 	continue;
14210 
14211       if (MEM_P (SET_DEST (set))
14212 	  && s390_decompose_address (XEXP (SET_DEST (set), 0), &addr_store))
14213 	{
14214 	  store_insn = insn;
14215 	  break;
14216 	}
14217     }
14218 
14219   if (store_insn == NULL_RTX)
14220     return false;
14221 
14222   insn_count = 0;
14223   FOR_BB_INSNS (loop->header, insn)
14224     {
14225       if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14226 	continue;
14227 
14228       if (insn == store_insn)
14229 	return false;
14230 
14231       insn_count++;
14232       if (insn_count > S390_OSC_SCAN_INSN_NUM)
14233 	return false;
14234 
14235       find_all_hard_reg_sets (insn, &newregs, true);
14236       modregs |= newregs;
14237 
14238       set = single_set (insn);
14239       if (!set)
14240 	continue;
14241 
14242       /* An intermediate store disrupts static OSC checking
14243 	 anyway.  */
14244       if (MEM_P (SET_DEST (set))
14245 	  && s390_decompose_address (XEXP (SET_DEST (set), 0), NULL))
14246 	return false;
14247 
14248       FOR_EACH_SUBRTX (iter, array, SET_SRC (set), NONCONST)
14249 	if (MEM_P (*iter)
14250 	    && s390_decompose_address (XEXP (*iter, 0), &addr_load)
14251 	    && rtx_equal_p (addr_load.base, addr_store.base)
14252 	    && rtx_equal_p (addr_load.indx, addr_store.indx)
14253 	    && rtx_equal_p (addr_load.disp, addr_store.disp))
14254 	  {
14255 	    if ((addr_load.base != NULL_RTX
14256 		 && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.base)))
14257 		|| (addr_load.indx != NULL_RTX
14258 		    && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.indx))))
14259 	      return true;
14260 	  }
14261     }
14262   return false;
14263 }
14264 
14265 /* Look for adjustments which can be done on simple innermost
14266    loops.  */
14267 static void
s390_adjust_loops()14268 s390_adjust_loops ()
14269 {
14270   struct loop *loop = NULL;
14271 
14272   df_analyze ();
14273   compute_bb_for_insn ();
14274 
14275   /* Find the loops.  */
14276   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
14277 
14278   FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
14279     {
14280       if (dump_file)
14281 	{
14282 	  flow_loop_dump (loop, dump_file, NULL, 0);
14283 	  fprintf (dump_file, ";;  OSC loop scan Loop: ");
14284 	}
14285       if (loop->latch == NULL
14286 	  || pc_set (BB_END (loop->latch)) == NULL_RTX
14287 	  || !s390_adjust_loop_scan_osc (loop))
14288 	{
14289 	  if (dump_file)
14290 	    {
14291 	      if (loop->latch == NULL)
14292 		fprintf (dump_file, " muliple backward jumps\n");
14293 	      else
14294 		{
14295 		  fprintf (dump_file, " header insn: %d latch insn: %d ",
14296 			   INSN_UID (BB_HEAD (loop->header)),
14297 			   INSN_UID (BB_END (loop->latch)));
14298 		  if (pc_set (BB_END (loop->latch)) == NULL_RTX)
14299 		    fprintf (dump_file, " loop does not end with jump\n");
14300 		  else
14301 		    fprintf (dump_file, " not instrumented\n");
14302 		}
14303 	    }
14304 	}
14305       else
14306 	{
14307 	  rtx_insn *new_insn;
14308 
14309 	  if (dump_file)
14310 	    fprintf (dump_file, " adding OSC break insn: ");
14311 	  new_insn = emit_insn_before (gen_osc_break (),
14312 				       BB_END (loop->latch));
14313 	  INSN_ADDRESSES_NEW (new_insn, -1);
14314 	}
14315     }
14316 
14317   loop_optimizer_finalize ();
14318 
14319   df_finish_pass (false);
14320 }
14321 
14322 /* Perform machine-dependent processing.  */
14323 
14324 static void
s390_reorg(void)14325 s390_reorg (void)
14326 {
14327   struct constant_pool *pool;
14328   rtx_insn *insn;
14329   int hw_before, hw_after;
14330 
14331   if (s390_tune == PROCESSOR_2964_Z13)
14332     s390_adjust_loops ();
14333 
14334   /* Make sure all splits have been performed; splits after
14335      machine_dependent_reorg might confuse insn length counts.  */
14336   split_all_insns_noflow ();
14337 
14338   /* Install the main literal pool and the associated base
14339      register load insns.  The literal pool might be > 4096 bytes in
14340      size, so that some of its elements cannot be directly accessed.
14341 
14342      To fix this, we split the single literal pool into multiple
14343      pool chunks, reloading the pool base register at various
14344      points throughout the function to ensure it always points to
14345      the pool chunk the following code expects.  */
14346 
14347   /* Collect the literal pool.  */
14348   pool = s390_mainpool_start ();
14349   if (pool)
14350     {
14351       /* Finish up literal pool related changes.  */
14352       s390_mainpool_finish (pool);
14353     }
14354   else
14355     {
14356       /* If literal pool overflowed, chunkify it.  */
14357       pool = s390_chunkify_start ();
14358       s390_chunkify_finish (pool);
14359     }
14360 
14361   /* Generate out-of-pool execute target insns.  */
14362   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14363     {
14364       rtx label;
14365       rtx_insn *target;
14366 
14367       label = s390_execute_label (insn);
14368       if (!label)
14369 	continue;
14370 
14371       gcc_assert (label != const0_rtx);
14372 
14373       target = emit_label (XEXP (label, 0));
14374       INSN_ADDRESSES_NEW (target, -1);
14375 
14376       if (JUMP_P (insn))
14377 	{
14378 	  target = emit_jump_insn (s390_execute_target (insn));
14379 	  /* This is important in order to keep a table jump
14380 	     pointing at the jump table label.  Only this makes it
14381 	     being recognized as table jump.  */
14382 	  JUMP_LABEL (target) = JUMP_LABEL (insn);
14383 	}
14384       else
14385 	target = emit_insn (s390_execute_target (insn));
14386       INSN_ADDRESSES_NEW (target, -1);
14387     }
14388 
14389   /* Try to optimize prologue and epilogue further.  */
14390   s390_optimize_prologue ();
14391 
14392   /* Walk over the insns and do some >=z10 specific changes.  */
14393   if (s390_tune >= PROCESSOR_2097_Z10)
14394     {
14395       rtx_insn *insn;
14396       bool insn_added_p = false;
14397 
14398       /* The insn lengths and addresses have to be up to date for the
14399 	 following manipulations.  */
14400       shorten_branches (get_insns ());
14401 
14402       for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14403 	{
14404 	  if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14405 	    continue;
14406 
14407 	  if (JUMP_P (insn))
14408 	    insn_added_p |= s390_fix_long_loop_prediction (insn);
14409 
14410 	  if ((GET_CODE (PATTERN (insn)) == PARALLEL
14411 	       || GET_CODE (PATTERN (insn)) == SET)
14412 	      && s390_tune == PROCESSOR_2097_Z10)
14413 	    insn_added_p |= s390_z10_optimize_cmp (insn);
14414 	}
14415 
14416       /* Adjust branches if we added new instructions.  */
14417       if (insn_added_p)
14418 	shorten_branches (get_insns ());
14419     }
14420 
14421   s390_function_num_hotpatch_hw (current_function_decl, &hw_before, &hw_after);
14422   if (hw_after > 0)
14423     {
14424       rtx_insn *insn;
14425 
14426       /* Insert NOPs for hotpatching. */
14427       for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14428 	/* Emit NOPs
14429 	    1. inside the area covered by debug information to allow setting
14430 	       breakpoints at the NOPs,
14431 	    2. before any insn which results in an asm instruction,
14432 	    3. before in-function labels to avoid jumping to the NOPs, for
14433 	       example as part of a loop,
14434 	    4. before any barrier in case the function is completely empty
14435 	       (__builtin_unreachable ()) and has neither internal labels nor
14436 	       active insns.
14437 	*/
14438 	if (active_insn_p (insn) || BARRIER_P (insn) || LABEL_P (insn))
14439 	  break;
14440       /* Output a series of NOPs before the first active insn.  */
14441       while (insn && hw_after > 0)
14442 	{
14443 	  if (hw_after >= 3)
14444 	    {
14445 	      emit_insn_before (gen_nop_6_byte (), insn);
14446 	      hw_after -= 3;
14447 	    }
14448 	  else if (hw_after >= 2)
14449 	    {
14450 	      emit_insn_before (gen_nop_4_byte (), insn);
14451 	      hw_after -= 2;
14452 	    }
14453 	  else
14454 	    {
14455 	      emit_insn_before (gen_nop_2_byte (), insn);
14456 	      hw_after -= 1;
14457 	    }
14458 	}
14459     }
14460 }
14461 
14462 /* Return true if INSN is a fp load insn writing register REGNO.  */
14463 static inline bool
s390_fpload_toreg(rtx_insn * insn,unsigned int regno)14464 s390_fpload_toreg (rtx_insn *insn, unsigned int regno)
14465 {
14466   rtx set;
14467   enum attr_type flag = s390_safe_attr_type (insn);
14468 
14469   if (flag != TYPE_FLOADSF && flag != TYPE_FLOADDF)
14470     return false;
14471 
14472   set = single_set (insn);
14473 
14474   if (set == NULL_RTX)
14475     return false;
14476 
14477   if (!REG_P (SET_DEST (set)) || !MEM_P (SET_SRC (set)))
14478     return false;
14479 
14480   if (REGNO (SET_DEST (set)) != regno)
14481     return false;
14482 
14483   return true;
14484 }
14485 
14486 /* This value describes the distance to be avoided between an
14487    arithmetic fp instruction and an fp load writing the same register.
14488    Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
14489    fine but the exact value has to be avoided. Otherwise the FP
14490    pipeline will throw an exception causing a major penalty.  */
14491 #define Z10_EARLYLOAD_DISTANCE 7
14492 
14493 /* Rearrange the ready list in order to avoid the situation described
14494    for Z10_EARLYLOAD_DISTANCE.  A problematic load instruction is
14495    moved to the very end of the ready list.  */
14496 static void
s390_z10_prevent_earlyload_conflicts(rtx_insn ** ready,int * nready_p)14497 s390_z10_prevent_earlyload_conflicts (rtx_insn **ready, int *nready_p)
14498 {
14499   unsigned int regno;
14500   int nready = *nready_p;
14501   rtx_insn *tmp;
14502   int i;
14503   rtx_insn *insn;
14504   rtx set;
14505   enum attr_type flag;
14506   int distance;
14507 
14508   /* Skip DISTANCE - 1 active insns.  */
14509   for (insn = last_scheduled_insn, distance = Z10_EARLYLOAD_DISTANCE - 1;
14510        distance > 0 && insn != NULL_RTX;
14511        distance--, insn = prev_active_insn (insn))
14512     if (CALL_P (insn) || JUMP_P (insn))
14513       return;
14514 
14515   if (insn == NULL_RTX)
14516     return;
14517 
14518   set = single_set (insn);
14519 
14520   if (set == NULL_RTX || !REG_P (SET_DEST (set))
14521       || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) != MODE_FLOAT)
14522     return;
14523 
14524   flag = s390_safe_attr_type (insn);
14525 
14526   if (flag == TYPE_FLOADSF || flag == TYPE_FLOADDF)
14527     return;
14528 
14529   regno = REGNO (SET_DEST (set));
14530   i = nready - 1;
14531 
14532   while (!s390_fpload_toreg (ready[i], regno) && i > 0)
14533     i--;
14534 
14535   if (!i)
14536     return;
14537 
14538   tmp = ready[i];
14539   memmove (&ready[1], &ready[0], sizeof (rtx_insn *) * i);
14540   ready[0] = tmp;
14541 }
14542 
14543 /* Returns TRUE if BB is entered via a fallthru edge and all other
14544    incoming edges are less than likely.  */
14545 static bool
s390_bb_fallthru_entry_likely(basic_block bb)14546 s390_bb_fallthru_entry_likely (basic_block bb)
14547 {
14548   edge e, fallthru_edge;
14549   edge_iterator ei;
14550 
14551   if (!bb)
14552     return false;
14553 
14554   fallthru_edge = find_fallthru_edge (bb->preds);
14555   if (!fallthru_edge)
14556     return false;
14557 
14558   FOR_EACH_EDGE (e, ei, bb->preds)
14559     if (e != fallthru_edge
14560 	&& e->probability >= profile_probability::likely ())
14561       return false;
14562 
14563   return true;
14564 }
14565 
14566 struct s390_sched_state
14567 {
14568   /* Number of insns in the group.  */
14569   int group_state;
14570   /* Execution side of the group.  */
14571   int side;
14572   /* Group can only hold two insns.  */
14573   bool group_of_two;
14574 } s390_sched_state;
14575 
14576 static struct s390_sched_state sched_state = {0, 1, false};
14577 
14578 #define S390_SCHED_ATTR_MASK_CRACKED    0x1
14579 #define S390_SCHED_ATTR_MASK_EXPANDED   0x2
14580 #define S390_SCHED_ATTR_MASK_ENDGROUP   0x4
14581 #define S390_SCHED_ATTR_MASK_GROUPALONE 0x8
14582 #define S390_SCHED_ATTR_MASK_GROUPOFTWO 0x10
14583 
14584 static unsigned int
s390_get_sched_attrmask(rtx_insn * insn)14585 s390_get_sched_attrmask (rtx_insn *insn)
14586 {
14587   unsigned int mask = 0;
14588 
14589   switch (s390_tune)
14590     {
14591     case PROCESSOR_2827_ZEC12:
14592       if (get_attr_zEC12_cracked (insn))
14593 	mask |= S390_SCHED_ATTR_MASK_CRACKED;
14594       if (get_attr_zEC12_expanded (insn))
14595 	mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14596       if (get_attr_zEC12_endgroup (insn))
14597 	mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14598       if (get_attr_zEC12_groupalone (insn))
14599 	mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14600       break;
14601     case PROCESSOR_2964_Z13:
14602       if (get_attr_z13_cracked (insn))
14603 	mask |= S390_SCHED_ATTR_MASK_CRACKED;
14604       if (get_attr_z13_expanded (insn))
14605 	mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14606       if (get_attr_z13_endgroup (insn))
14607 	mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14608       if (get_attr_z13_groupalone (insn))
14609 	mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14610       if (get_attr_z13_groupoftwo (insn))
14611 	mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
14612       break;
14613     case PROCESSOR_3906_Z14:
14614       if (get_attr_z14_cracked (insn))
14615 	mask |= S390_SCHED_ATTR_MASK_CRACKED;
14616       if (get_attr_z14_expanded (insn))
14617 	mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14618       if (get_attr_z14_endgroup (insn))
14619 	mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14620       if (get_attr_z14_groupalone (insn))
14621 	mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14622       if (get_attr_z14_groupoftwo (insn))
14623 	mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
14624       break;
14625     case PROCESSOR_8561_Z15:
14626       if (get_attr_z15_cracked (insn))
14627 	mask |= S390_SCHED_ATTR_MASK_CRACKED;
14628       if (get_attr_z15_expanded (insn))
14629 	mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14630       if (get_attr_z15_endgroup (insn))
14631 	mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14632       if (get_attr_z15_groupalone (insn))
14633 	mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14634       if (get_attr_z15_groupoftwo (insn))
14635 	mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
14636       break;
14637     default:
14638       gcc_unreachable ();
14639     }
14640   return mask;
14641 }
14642 
14643 static unsigned int
s390_get_unit_mask(rtx_insn * insn,int * units)14644 s390_get_unit_mask (rtx_insn *insn, int *units)
14645 {
14646   unsigned int mask = 0;
14647 
14648   switch (s390_tune)
14649     {
14650     case PROCESSOR_2964_Z13:
14651       *units = 4;
14652       if (get_attr_z13_unit_lsu (insn))
14653 	mask |= 1 << 0;
14654       if (get_attr_z13_unit_fxa (insn))
14655 	mask |= 1 << 1;
14656       if (get_attr_z13_unit_fxb (insn))
14657 	mask |= 1 << 2;
14658       if (get_attr_z13_unit_vfu (insn))
14659 	mask |= 1 << 3;
14660       break;
14661     case PROCESSOR_3906_Z14:
14662       *units = 4;
14663       if (get_attr_z14_unit_lsu (insn))
14664 	mask |= 1 << 0;
14665       if (get_attr_z14_unit_fxa (insn))
14666 	mask |= 1 << 1;
14667       if (get_attr_z14_unit_fxb (insn))
14668 	mask |= 1 << 2;
14669       if (get_attr_z14_unit_vfu (insn))
14670 	mask |= 1 << 3;
14671       break;
14672     case PROCESSOR_8561_Z15:
14673       *units = 4;
14674       if (get_attr_z15_unit_lsu (insn))
14675 	mask |= 1 << 0;
14676       if (get_attr_z15_unit_fxa (insn))
14677 	mask |= 1 << 1;
14678       if (get_attr_z15_unit_fxb (insn))
14679 	mask |= 1 << 2;
14680       if (get_attr_z15_unit_vfu (insn))
14681 	mask |= 1 << 3;
14682       break;
14683     default:
14684       gcc_unreachable ();
14685     }
14686   return mask;
14687 }
14688 
14689 static bool
s390_is_fpd(rtx_insn * insn)14690 s390_is_fpd (rtx_insn *insn)
14691 {
14692   if (insn == NULL_RTX)
14693     return false;
14694 
14695   return get_attr_z13_unit_fpd (insn) || get_attr_z14_unit_fpd (insn)
14696     || get_attr_z15_unit_fpd (insn);
14697 }
14698 
14699 static bool
s390_is_fxd(rtx_insn * insn)14700 s390_is_fxd (rtx_insn *insn)
14701 {
14702   if (insn == NULL_RTX)
14703     return false;
14704 
14705   return get_attr_z13_unit_fxd (insn) || get_attr_z14_unit_fxd (insn)
14706     || get_attr_z15_unit_fxd (insn);
14707 }
14708 
14709 /* Returns TRUE if INSN is a long-running instruction.  */
14710 static bool
s390_is_longrunning(rtx_insn * insn)14711 s390_is_longrunning (rtx_insn *insn)
14712 {
14713   if (insn == NULL_RTX)
14714     return false;
14715 
14716   return s390_is_fxd (insn) || s390_is_fpd (insn);
14717 }
14718 
14719 
14720 /* Return the scheduling score for INSN.  The higher the score the
14721    better.  The score is calculated from the OOO scheduling attributes
14722    of INSN and the scheduling state sched_state.  */
14723 static int
s390_sched_score(rtx_insn * insn)14724 s390_sched_score (rtx_insn *insn)
14725 {
14726   unsigned int mask = s390_get_sched_attrmask (insn);
14727   int score = 0;
14728 
14729   switch (sched_state.group_state)
14730     {
14731     case 0:
14732       /* Try to put insns into the first slot which would otherwise
14733 	 break a group.  */
14734       if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14735 	  || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
14736 	score += 5;
14737       if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
14738 	score += 10;
14739       break;
14740     case 1:
14741       /* Prefer not cracked insns while trying to put together a
14742 	 group.  */
14743       if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14744 	  && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
14745 	  && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
14746 	score += 10;
14747       if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) == 0)
14748 	score += 5;
14749       /* If we are in a group of two already, try to schedule another
14750 	 group-of-two insn to avoid shortening another group.  */
14751       if (sched_state.group_of_two
14752 	  && (mask & S390_SCHED_ATTR_MASK_GROUPOFTWO) != 0)
14753 	score += 15;
14754       break;
14755     case 2:
14756       /* Prefer not cracked insns while trying to put together a
14757 	 group.  */
14758       if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14759 	  && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
14760 	  && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
14761 	score += 10;
14762       /* Prefer endgroup insns in the last slot.  */
14763       if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0)
14764 	score += 10;
14765       /* Try to avoid group-of-two insns in the last slot as they will
14766 	 shorten this group as well as the next one.  */
14767       if ((mask & S390_SCHED_ATTR_MASK_GROUPOFTWO) != 0)
14768 	score = MAX (0, score - 15);
14769       break;
14770     }
14771 
14772   if (s390_tune >= PROCESSOR_2964_Z13)
14773     {
14774       int units, i;
14775       unsigned unit_mask, m = 1;
14776 
14777       unit_mask = s390_get_unit_mask (insn, &units);
14778       gcc_assert (units <= MAX_SCHED_UNITS);
14779 
14780       /* Add a score in range 0..MAX_SCHED_MIX_SCORE depending on how long
14781 	 ago the last insn of this unit type got scheduled.  This is
14782 	 supposed to help providing a proper instruction mix to the
14783 	 CPU.  */
14784       for (i = 0; i < units; i++, m <<= 1)
14785 	if (m & unit_mask)
14786 	  score += (last_scheduled_unit_distance[i][sched_state.side]
14787 	      * MAX_SCHED_MIX_SCORE / MAX_SCHED_MIX_DISTANCE);
14788 
14789       int other_side = 1 - sched_state.side;
14790 
14791       /* Try to delay long-running insns when side is busy.  */
14792       if (s390_is_longrunning (insn))
14793 	{
14794 	  if (s390_is_fxd (insn))
14795 	    {
14796 	      if (fxd_longrunning[sched_state.side]
14797 		  && fxd_longrunning[other_side]
14798 		  <= fxd_longrunning[sched_state.side])
14799 		score = MAX (0, score - 10);
14800 
14801 	      else if (fxd_longrunning[other_side]
14802 		  >= fxd_longrunning[sched_state.side])
14803 		score += 10;
14804 	    }
14805 
14806 	  if (s390_is_fpd (insn))
14807 	    {
14808 	      if (fpd_longrunning[sched_state.side]
14809 		  && fpd_longrunning[other_side]
14810 		  <= fpd_longrunning[sched_state.side])
14811 		score = MAX (0, score - 10);
14812 
14813 	      else if (fpd_longrunning[other_side]
14814 		  >= fpd_longrunning[sched_state.side])
14815 		score += 10;
14816 	    }
14817 	}
14818     }
14819 
14820   return score;
14821 }
14822 
14823 /* This function is called via hook TARGET_SCHED_REORDER before
14824    issuing one insn from list READY which contains *NREADYP entries.
14825    For target z10 it reorders load instructions to avoid early load
14826    conflicts in the floating point pipeline  */
14827 static int
s390_sched_reorder(FILE * file,int verbose,rtx_insn ** ready,int * nreadyp,int clock ATTRIBUTE_UNUSED)14828 s390_sched_reorder (FILE *file, int verbose,
14829 		    rtx_insn **ready, int *nreadyp, int clock ATTRIBUTE_UNUSED)
14830 {
14831   if (s390_tune == PROCESSOR_2097_Z10
14832       && reload_completed
14833       && *nreadyp > 1)
14834     s390_z10_prevent_earlyload_conflicts (ready, nreadyp);
14835 
14836   if (s390_tune >= PROCESSOR_2827_ZEC12
14837       && reload_completed
14838       && *nreadyp > 1)
14839     {
14840       int i;
14841       int last_index = *nreadyp - 1;
14842       int max_index = -1;
14843       int max_score = -1;
14844       rtx_insn *tmp;
14845 
14846       /* Just move the insn with the highest score to the top (the
14847 	 end) of the list.  A full sort is not needed since a conflict
14848 	 in the hazard recognition cannot happen.  So the top insn in
14849 	 the ready list will always be taken.  */
14850       for (i = last_index; i >= 0; i--)
14851 	{
14852 	  int score;
14853 
14854 	  if (recog_memoized (ready[i]) < 0)
14855 	    continue;
14856 
14857 	  score = s390_sched_score (ready[i]);
14858 	  if (score > max_score)
14859 	    {
14860 	      max_score = score;
14861 	      max_index = i;
14862 	    }
14863 	}
14864 
14865       if (max_index != -1)
14866 	{
14867 	  if (max_index != last_index)
14868 	    {
14869 	      tmp = ready[max_index];
14870 	      ready[max_index] = ready[last_index];
14871 	      ready[last_index] = tmp;
14872 
14873 	      if (verbose > 5)
14874 		fprintf (file,
14875 			 ";;\t\tBACKEND: move insn %d to the top of list\n",
14876 			 INSN_UID (ready[last_index]));
14877 	    }
14878 	  else if (verbose > 5)
14879 	    fprintf (file,
14880 		     ";;\t\tBACKEND: best insn %d already on top\n",
14881 		     INSN_UID (ready[last_index]));
14882 	}
14883 
14884       if (verbose > 5)
14885 	{
14886 	  fprintf (file, "ready list ooo attributes - sched state: %d\n",
14887 		   sched_state.group_state);
14888 
14889 	  for (i = last_index; i >= 0; i--)
14890 	    {
14891 	      unsigned int sched_mask;
14892 	      rtx_insn *insn = ready[i];
14893 
14894 	      if (recog_memoized (insn) < 0)
14895 		continue;
14896 
14897 	      sched_mask = s390_get_sched_attrmask (insn);
14898 	      fprintf (file, ";;\t\tBACKEND: insn %d score: %d: ",
14899 		       INSN_UID (insn),
14900 		       s390_sched_score (insn));
14901 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ",\
14902 					   ((M) & sched_mask) ? #ATTR : "");
14903 	      PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
14904 	      PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
14905 	      PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
14906 	      PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
14907 #undef PRINT_SCHED_ATTR
14908 	      if (s390_tune >= PROCESSOR_2964_Z13)
14909 		{
14910 		  unsigned int unit_mask, m = 1;
14911 		  int units, j;
14912 
14913 		  unit_mask  = s390_get_unit_mask (insn, &units);
14914 		  fprintf (file, "(units:");
14915 		  for (j = 0; j < units; j++, m <<= 1)
14916 		    if (m & unit_mask)
14917 		      fprintf (file, " u%d", j);
14918 		  fprintf (file, ")");
14919 		}
14920 	      fprintf (file, "\n");
14921 	    }
14922 	}
14923     }
14924 
14925   return s390_issue_rate ();
14926 }
14927 
14928 
14929 /* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
14930    the scheduler has issued INSN.  It stores the last issued insn into
14931    last_scheduled_insn in order to make it available for
14932    s390_sched_reorder.  */
14933 static int
s390_sched_variable_issue(FILE * file,int verbose,rtx_insn * insn,int more)14934 s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
14935 {
14936   last_scheduled_insn = insn;
14937 
14938   bool ends_group = false;
14939 
14940   if (s390_tune >= PROCESSOR_2827_ZEC12
14941       && reload_completed
14942       && recog_memoized (insn) >= 0)
14943     {
14944       unsigned int mask = s390_get_sched_attrmask (insn);
14945 
14946       if ((mask & S390_SCHED_ATTR_MASK_GROUPOFTWO) != 0)
14947 	sched_state.group_of_two = true;
14948 
14949       /* If this is a group-of-two insn, we actually ended the last group
14950 	 and this insn is the first one of the new group.  */
14951       if (sched_state.group_state == 2 && sched_state.group_of_two)
14952 	{
14953 	  sched_state.side = sched_state.side ? 0 : 1;
14954 	  sched_state.group_state = 0;
14955 	}
14956 
14957       /* Longrunning and side bookkeeping.  */
14958       for (int i = 0; i < 2; i++)
14959 	{
14960 	  fxd_longrunning[i] = MAX (0, fxd_longrunning[i] - 1);
14961 	  fpd_longrunning[i] = MAX (0, fpd_longrunning[i] - 1);
14962 	}
14963 
14964       unsigned latency = insn_default_latency (insn);
14965       if (s390_is_longrunning (insn))
14966 	{
14967 	  if (s390_is_fxd (insn))
14968 	    fxd_longrunning[sched_state.side] = latency;
14969 	  else
14970 	    fpd_longrunning[sched_state.side] = latency;
14971 	}
14972 
14973       if (s390_tune >= PROCESSOR_2964_Z13)
14974 	{
14975 	  int units, i;
14976 	  unsigned unit_mask, m = 1;
14977 
14978 	  unit_mask = s390_get_unit_mask (insn, &units);
14979 	  gcc_assert (units <= MAX_SCHED_UNITS);
14980 
14981 	  for (i = 0; i < units; i++, m <<= 1)
14982 	    if (m & unit_mask)
14983 	      last_scheduled_unit_distance[i][sched_state.side] = 0;
14984 	    else if (last_scheduled_unit_distance[i][sched_state.side]
14985 		< MAX_SCHED_MIX_DISTANCE)
14986 	      last_scheduled_unit_distance[i][sched_state.side]++;
14987 	}
14988 
14989       if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14990 	  || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0
14991 	  || (mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0
14992 	  || (mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0)
14993 	{
14994 	  sched_state.group_state = 0;
14995 	  ends_group = true;
14996 	}
14997       else
14998 	{
14999 	  switch (sched_state.group_state)
15000 	    {
15001 	    case 0:
15002 	      sched_state.group_state++;
15003 	      break;
15004 	    case 1:
15005 	      sched_state.group_state++;
15006 	      if (sched_state.group_of_two)
15007 		{
15008 		  sched_state.group_state = 0;
15009 		  ends_group = true;
15010 		}
15011 	      break;
15012 	    case 2:
15013 	      sched_state.group_state++;
15014 	      ends_group = true;
15015 	      break;
15016 	    }
15017 	}
15018 
15019       if (verbose > 5)
15020 	{
15021 	  unsigned int sched_mask;
15022 
15023 	  sched_mask = s390_get_sched_attrmask (insn);
15024 
15025 	  fprintf (file, ";;\t\tBACKEND: insn %d: ", INSN_UID (insn));
15026 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ", ((M) & sched_mask) ? #ATTR : "");
15027 	  PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
15028 	  PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
15029 	  PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
15030 	  PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
15031 #undef PRINT_SCHED_ATTR
15032 
15033 	  if (s390_tune >= PROCESSOR_2964_Z13)
15034 	    {
15035 	      unsigned int unit_mask, m = 1;
15036 	      int units, j;
15037 
15038 	      unit_mask  = s390_get_unit_mask (insn, &units);
15039 	      fprintf (file, "(units:");
15040 	      for (j = 0; j < units; j++, m <<= 1)
15041 		if (m & unit_mask)
15042 		  fprintf (file, " %d", j);
15043 	      fprintf (file, ")");
15044 	    }
15045 	  fprintf (file, " sched state: %d\n", sched_state.group_state);
15046 
15047 	  if (s390_tune >= PROCESSOR_2964_Z13)
15048 	    {
15049 	      int units, j;
15050 
15051 	      s390_get_unit_mask (insn, &units);
15052 
15053 	      fprintf (file, ";;\t\tBACKEND: units on this side unused for: ");
15054 	      for (j = 0; j < units; j++)
15055 		fprintf (file, "%d:%d ", j,
15056 		    last_scheduled_unit_distance[j][sched_state.side]);
15057 	      fprintf (file, "\n");
15058 	    }
15059 	}
15060 
15061       /* If this insn ended a group, the next will be on the other side.  */
15062       if (ends_group)
15063 	{
15064 	  sched_state.group_state = 0;
15065 	  sched_state.side = sched_state.side ? 0 : 1;
15066 	  sched_state.group_of_two = false;
15067 	}
15068     }
15069 
15070   if (GET_CODE (PATTERN (insn)) != USE
15071       && GET_CODE (PATTERN (insn)) != CLOBBER)
15072     return more - 1;
15073   else
15074     return more;
15075 }
15076 
15077 static void
s390_sched_init(FILE * file ATTRIBUTE_UNUSED,int verbose ATTRIBUTE_UNUSED,int max_ready ATTRIBUTE_UNUSED)15078 s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
15079 		 int verbose ATTRIBUTE_UNUSED,
15080 		 int max_ready ATTRIBUTE_UNUSED)
15081 {
15082   /* If the next basic block is most likely entered via a fallthru edge
15083      we keep the last sched state.  Otherwise we start a new group.
15084      The scheduler traverses basic blocks in "instruction stream" ordering
15085      so if we see a fallthru edge here, sched_state will be of its
15086      source block.
15087 
15088      current_sched_info->prev_head is the insn before the first insn of the
15089      block of insns to be scheduled.
15090      */
15091   rtx_insn *insn = current_sched_info->prev_head
15092     ? NEXT_INSN (current_sched_info->prev_head) : NULL;
15093   basic_block bb = insn ? BLOCK_FOR_INSN (insn) : NULL;
15094   if (s390_tune < PROCESSOR_2964_Z13 || !s390_bb_fallthru_entry_likely (bb))
15095     {
15096       last_scheduled_insn = NULL;
15097       memset (last_scheduled_unit_distance, 0,
15098 	  MAX_SCHED_UNITS * NUM_SIDES * sizeof (int));
15099       sched_state.group_state = 0;
15100       sched_state.group_of_two = false;
15101     }
15102 }
15103 
15104 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
15105    a new number struct loop *loop should be unrolled if tuned for cpus with
15106    a built-in stride prefetcher.
15107    The loop is analyzed for memory accesses by calling check_dpu for
15108    each rtx of the loop. Depending on the loop_depth and the amount of
15109    memory accesses a new number <=nunroll is returned to improve the
15110    behavior of the hardware prefetch unit.  */
15111 static unsigned
s390_loop_unroll_adjust(unsigned nunroll,struct loop * loop)15112 s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
15113 {
15114   basic_block *bbs;
15115   rtx_insn *insn;
15116   unsigned i;
15117   unsigned mem_count = 0;
15118 
15119   if (s390_tune < PROCESSOR_2097_Z10)
15120     return nunroll;
15121 
15122   /* Count the number of memory references within the loop body.  */
15123   bbs = get_loop_body (loop);
15124   subrtx_iterator::array_type array;
15125   for (i = 0; i < loop->num_nodes; i++)
15126     FOR_BB_INSNS (bbs[i], insn)
15127       if (INSN_P (insn) && INSN_CODE (insn) != -1)
15128 	{
15129 	  rtx set;
15130 
15131 	  /* The runtime of small loops with memory block operations
15132 	     will be determined by the memory operation.  Doing
15133 	     unrolling doesn't help here.  Measurements to confirm
15134 	     this where only done on recent CPU levels.  So better do
15135 	     not change anything for older CPUs.  */
15136 	  if (s390_tune >= PROCESSOR_2964_Z13
15137 	      && loop->ninsns <= BLOCK_MEM_OPS_LOOP_INSNS
15138 	      && ((set = single_set (insn)) != NULL_RTX)
15139 	      && ((GET_MODE (SET_DEST (set)) == BLKmode
15140 		   && (GET_MODE (SET_SRC (set)) == BLKmode
15141 		       || SET_SRC (set) == const0_rtx))
15142 		  || (GET_CODE (SET_SRC (set)) == COMPARE
15143 		      && GET_MODE (XEXP (SET_SRC (set), 0)) == BLKmode
15144 		      && GET_MODE (XEXP (SET_SRC (set), 1)) == BLKmode)))
15145 	    return 1;
15146 
15147 	  FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
15148 	    if (MEM_P (*iter))
15149 	      mem_count += 1;
15150 	}
15151   free (bbs);
15152 
15153   /* Prevent division by zero, and we do not need to adjust nunroll in this case.  */
15154   if (mem_count == 0)
15155     return nunroll;
15156 
15157   switch (loop_depth(loop))
15158     {
15159     case 1:
15160       return MIN (nunroll, 28 / mem_count);
15161     case 2:
15162       return MIN (nunroll, 22 / mem_count);
15163     default:
15164       return MIN (nunroll, 16 / mem_count);
15165     }
15166 }
15167 
15168 /* Restore the current options.  This is a hook function and also called
15169    internally.  */
15170 
15171 static void
s390_function_specific_restore(struct gcc_options * opts,struct cl_target_option * ptr ATTRIBUTE_UNUSED)15172 s390_function_specific_restore (struct gcc_options *opts,
15173 				struct cl_target_option *ptr ATTRIBUTE_UNUSED)
15174 {
15175   opts->x_s390_cost_pointer = (long)processor_table[opts->x_s390_tune].cost;
15176 }
15177 
15178 static void
s390_default_align(struct gcc_options * opts)15179 s390_default_align (struct gcc_options *opts)
15180 {
15181   /* Set the default function alignment to 16 in order to get rid of
15182      some unwanted performance effects. */
15183   if (opts->x_flag_align_functions && !opts->x_str_align_functions
15184       && opts->x_s390_tune >= PROCESSOR_2964_Z13)
15185     opts->x_str_align_functions = "16";
15186 }
15187 
15188 static void
s390_override_options_after_change(void)15189 s390_override_options_after_change (void)
15190 {
15191   s390_default_align (&global_options);
15192 }
15193 
15194 static void
s390_option_override_internal(struct gcc_options * opts,const struct gcc_options * opts_set)15195 s390_option_override_internal (struct gcc_options *opts,
15196 			       const struct gcc_options *opts_set)
15197 {
15198   /* Architecture mode defaults according to ABI.  */
15199   if (!(opts_set->x_target_flags & MASK_ZARCH))
15200     {
15201       if (TARGET_64BIT)
15202 	opts->x_target_flags |= MASK_ZARCH;
15203       else
15204 	opts->x_target_flags &= ~MASK_ZARCH;
15205     }
15206 
15207   /* Set the march default in case it hasn't been specified on cmdline.  */
15208   if (!opts_set->x_s390_arch)
15209     opts->x_s390_arch = PROCESSOR_2064_Z900;
15210 
15211   opts->x_s390_arch_flags = processor_flags_table[(int) opts->x_s390_arch];
15212 
15213   /* Determine processor to tune for.  */
15214   if (!opts_set->x_s390_tune)
15215     opts->x_s390_tune = opts->x_s390_arch;
15216 
15217   opts->x_s390_tune_flags = processor_flags_table[opts->x_s390_tune];
15218 
15219   /* Sanity checks.  */
15220   if (opts->x_s390_arch == PROCESSOR_NATIVE
15221       || opts->x_s390_tune == PROCESSOR_NATIVE)
15222     gcc_unreachable ();
15223   if (TARGET_64BIT && !TARGET_ZARCH_P (opts->x_target_flags))
15224     error ("64-bit ABI not supported in ESA/390 mode");
15225 
15226   if (opts->x_s390_indirect_branch == indirect_branch_thunk_inline
15227       || opts->x_s390_indirect_branch_call == indirect_branch_thunk_inline
15228       || opts->x_s390_function_return == indirect_branch_thunk_inline
15229       || opts->x_s390_function_return_reg == indirect_branch_thunk_inline
15230       || opts->x_s390_function_return_mem == indirect_branch_thunk_inline)
15231     error ("thunk-inline is only supported with %<-mindirect-branch-jump%>");
15232 
15233   if (opts->x_s390_indirect_branch != indirect_branch_keep)
15234     {
15235       if (!opts_set->x_s390_indirect_branch_call)
15236 	opts->x_s390_indirect_branch_call = opts->x_s390_indirect_branch;
15237 
15238       if (!opts_set->x_s390_indirect_branch_jump)
15239 	opts->x_s390_indirect_branch_jump = opts->x_s390_indirect_branch;
15240     }
15241 
15242   if (opts->x_s390_function_return != indirect_branch_keep)
15243     {
15244       if (!opts_set->x_s390_function_return_reg)
15245 	opts->x_s390_function_return_reg = opts->x_s390_function_return;
15246 
15247       if (!opts_set->x_s390_function_return_mem)
15248 	opts->x_s390_function_return_mem = opts->x_s390_function_return;
15249     }
15250 
15251   /* Enable hardware transactions if available and not explicitly
15252      disabled by user.  E.g. with -m31 -march=zEC12 -mzarch */
15253   if (!TARGET_OPT_HTM_P (opts_set->x_target_flags))
15254     {
15255       if (TARGET_CPU_HTM_P (opts) && TARGET_ZARCH_P (opts->x_target_flags))
15256 	opts->x_target_flags |= MASK_OPT_HTM;
15257       else
15258 	opts->x_target_flags &= ~MASK_OPT_HTM;
15259     }
15260 
15261   if (TARGET_OPT_VX_P (opts_set->x_target_flags))
15262     {
15263       if (TARGET_OPT_VX_P (opts->x_target_flags))
15264 	{
15265 	  if (!TARGET_CPU_VX_P (opts))
15266 	    error ("hardware vector support not available on %s",
15267 		   processor_table[(int)opts->x_s390_arch].name);
15268 	  if (TARGET_SOFT_FLOAT_P (opts->x_target_flags))
15269 	    error ("hardware vector support not available with "
15270 		   "%<-msoft-float%>");
15271 	}
15272     }
15273   else
15274     {
15275       if (TARGET_CPU_VX_P (opts))
15276 	/* Enable vector support if available and not explicitly disabled
15277 	   by user.  E.g. with -m31 -march=z13 -mzarch */
15278 	opts->x_target_flags |= MASK_OPT_VX;
15279       else
15280 	opts->x_target_flags &= ~MASK_OPT_VX;
15281     }
15282 
15283   /* Use hardware DFP if available and not explicitly disabled by
15284      user. E.g. with -m31 -march=z10 -mzarch   */
15285   if (!TARGET_HARD_DFP_P (opts_set->x_target_flags))
15286     {
15287       if (TARGET_DFP_P (opts))
15288 	opts->x_target_flags |= MASK_HARD_DFP;
15289       else
15290 	opts->x_target_flags &= ~MASK_HARD_DFP;
15291     }
15292 
15293   if (TARGET_HARD_DFP_P (opts->x_target_flags) && !TARGET_DFP_P (opts))
15294     {
15295       if (TARGET_HARD_DFP_P (opts_set->x_target_flags))
15296 	{
15297 	  if (!TARGET_CPU_DFP_P (opts))
15298 	    error ("hardware decimal floating point instructions"
15299 		   " not available on %s",
15300 		   processor_table[(int)opts->x_s390_arch].name);
15301 	  if (!TARGET_ZARCH_P (opts->x_target_flags))
15302 	    error ("hardware decimal floating point instructions"
15303 		   " not available in ESA/390 mode");
15304 	}
15305       else
15306 	opts->x_target_flags &= ~MASK_HARD_DFP;
15307     }
15308 
15309   if (TARGET_SOFT_FLOAT_P (opts_set->x_target_flags)
15310       && TARGET_SOFT_FLOAT_P (opts->x_target_flags))
15311     {
15312       if (TARGET_HARD_DFP_P (opts_set->x_target_flags)
15313 	  && TARGET_HARD_DFP_P (opts->x_target_flags))
15314 	error ("%<-mhard-dfp%> can%'t be used in conjunction with "
15315 	       "%<-msoft-float%>");
15316 
15317       opts->x_target_flags &= ~MASK_HARD_DFP;
15318     }
15319 
15320   if (TARGET_BACKCHAIN_P (opts->x_target_flags)
15321       && TARGET_PACKED_STACK_P (opts->x_target_flags)
15322       && TARGET_HARD_FLOAT_P (opts->x_target_flags))
15323     error ("%<-mbackchain%> %<-mpacked-stack%> %<-mhard-float%> are not "
15324 	   "supported in combination");
15325 
15326   if (opts->x_s390_stack_size)
15327     {
15328       if (opts->x_s390_stack_guard >= opts->x_s390_stack_size)
15329 	error ("stack size must be greater than the stack guard value");
15330       else if (opts->x_s390_stack_size > 1 << 16)
15331 	error ("stack size must not be greater than 64k");
15332     }
15333   else if (opts->x_s390_stack_guard)
15334     error ("%<-mstack-guard%> implies use of %<-mstack-size%>");
15335 
15336   /* Our implementation of the stack probe requires the probe interval
15337      to be used as displacement in an address operand.  The maximum
15338      probe interval currently is 64k.  This would exceed short
15339      displacements.  Trim that value down to 4k if that happens.  This
15340      might result in too many probes being generated only on the
15341      oldest supported machine level z900.  */
15342   if (!DISP_IN_RANGE ((1 << param_stack_clash_protection_probe_interval)))
15343     param_stack_clash_protection_probe_interval = 12;
15344 
15345 #if TARGET_TPF != 0
15346   if (!CONST_OK_FOR_J (opts->x_s390_tpf_trace_hook_prologue_check))
15347     error ("-mtpf-trace-hook-prologue-check requires integer in range 0..4095");
15348 
15349   if (!CONST_OK_FOR_J (opts->x_s390_tpf_trace_hook_prologue_target))
15350     error ("-mtpf-trace-hook-prologue-target requires integer in range 0..4095");
15351 
15352   if (!CONST_OK_FOR_J (opts->x_s390_tpf_trace_hook_epilogue_check))
15353     error ("-mtpf-trace-hook-epilogue-check requires integer in range 0..4095");
15354 
15355   if (!CONST_OK_FOR_J (opts->x_s390_tpf_trace_hook_epilogue_target))
15356     error ("-mtpf-trace-hook-epilogue-target requires integer in range 0..4095");
15357 
15358   if (s390_tpf_trace_skip)
15359     {
15360       opts->x_s390_tpf_trace_hook_prologue_target = TPF_TRACE_PROLOGUE_SKIP_TARGET;
15361       opts->x_s390_tpf_trace_hook_epilogue_target = TPF_TRACE_EPILOGUE_SKIP_TARGET;
15362     }
15363 #endif
15364 
15365 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
15366   if (!TARGET_LONG_DOUBLE_128_P (opts_set->x_target_flags))
15367     opts->x_target_flags |= MASK_LONG_DOUBLE_128;
15368 #endif
15369 
15370   if (opts->x_s390_tune >= PROCESSOR_2097_Z10)
15371     {
15372       SET_OPTION_IF_UNSET (opts, opts_set, param_max_unrolled_insns,
15373 			   100);
15374       SET_OPTION_IF_UNSET (opts, opts_set, param_max_unroll_times, 32);
15375       SET_OPTION_IF_UNSET (opts, opts_set, param_max_completely_peeled_insns,
15376 			   2000);
15377       SET_OPTION_IF_UNSET (opts, opts_set, param_max_completely_peel_times,
15378 			   64);
15379     }
15380 
15381   SET_OPTION_IF_UNSET (opts, opts_set, param_max_pending_list_length,
15382 		       256);
15383   /* values for loop prefetching */
15384   SET_OPTION_IF_UNSET (opts, opts_set, param_l1_cache_line_size, 256);
15385   SET_OPTION_IF_UNSET (opts, opts_set, param_l1_cache_size, 128);
15386   /* s390 has more than 2 levels and the size is much larger.  Since
15387      we are always running virtualized assume that we only get a small
15388      part of the caches above l1.  */
15389   SET_OPTION_IF_UNSET (opts, opts_set, param_l2_cache_size, 1500);
15390   SET_OPTION_IF_UNSET (opts, opts_set,
15391 		       param_prefetch_min_insn_to_mem_ratio, 2);
15392   SET_OPTION_IF_UNSET (opts, opts_set, param_simultaneous_prefetches, 6);
15393 
15394   /* Use the alternative scheduling-pressure algorithm by default.  */
15395   SET_OPTION_IF_UNSET (opts, opts_set, param_sched_pressure_algorithm, 2);
15396   SET_OPTION_IF_UNSET (opts, opts_set, param_min_vect_loop_bound, 2);
15397 
15398   /* Use aggressive inlining parameters.  */
15399   if (opts->x_s390_tune >= PROCESSOR_2964_Z13)
15400     {
15401       SET_OPTION_IF_UNSET (opts, opts_set, param_inline_min_speedup, 2);
15402       SET_OPTION_IF_UNSET (opts, opts_set, param_max_inline_insns_auto, 80);
15403     }
15404 
15405   /* Set the default alignment.  */
15406   s390_default_align (opts);
15407 
15408   /* Call target specific restore function to do post-init work.  At the moment,
15409      this just sets opts->x_s390_cost_pointer.  */
15410   s390_function_specific_restore (opts, NULL);
15411 
15412   /* Check whether -mfentry is supported. It cannot be used in 31-bit mode,
15413      because 31-bit PLT stubs assume that %r12 contains GOT address, which is
15414      not the case when the code runs before the prolog. */
15415   if (opts->x_flag_fentry && !TARGET_64BIT)
15416     error ("%<-mfentry%> is supported only for 64-bit CPUs");
15417 }
15418 
15419 static void
s390_option_override(void)15420 s390_option_override (void)
15421 {
15422   unsigned int i;
15423   cl_deferred_option *opt;
15424   vec<cl_deferred_option> *v =
15425     (vec<cl_deferred_option> *) s390_deferred_options;
15426 
15427   if (v)
15428     FOR_EACH_VEC_ELT (*v, i, opt)
15429       {
15430 	switch (opt->opt_index)
15431 	  {
15432 	  case OPT_mhotpatch_:
15433 	    {
15434 	      int val1;
15435 	      int val2;
15436 	      char *s = strtok (ASTRDUP (opt->arg), ",");
15437 	      char *t = strtok (NULL, "\0");
15438 
15439 	      if (t != NULL)
15440 		{
15441 		  val1 = integral_argument (s);
15442 		  val2 = integral_argument (t);
15443 		}
15444 	      else
15445 		{
15446 		  val1 = -1;
15447 		  val2 = -1;
15448 		}
15449 	      if (val1 == -1 || val2 == -1)
15450 		{
15451 		  /* argument is not a plain number */
15452 		  error ("arguments to %qs should be non-negative integers",
15453 			 "-mhotpatch=n,m");
15454 		  break;
15455 		}
15456 	      else if (val1 > s390_hotpatch_hw_max
15457 		       || val2 > s390_hotpatch_hw_max)
15458 		{
15459 		  error ("argument to %qs is too large (max. %d)",
15460 			 "-mhotpatch=n,m", s390_hotpatch_hw_max);
15461 		  break;
15462 		}
15463 	      s390_hotpatch_hw_before_label = val1;
15464 	      s390_hotpatch_hw_after_label = val2;
15465 	      break;
15466 	    }
15467 	  default:
15468 	    gcc_unreachable ();
15469 	  }
15470       }
15471 
15472   /* Set up function hooks.  */
15473   init_machine_status = s390_init_machine_status;
15474 
15475   s390_option_override_internal (&global_options, &global_options_set);
15476 
15477   /* Save the initial options in case the user does function specific
15478      options.  */
15479   target_option_default_node = build_target_option_node (&global_options);
15480   target_option_current_node = target_option_default_node;
15481 
15482   /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
15483      requires the arch flags to be evaluated already.  Since prefetching
15484      is beneficial on s390, we enable it if available.  */
15485   if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3)
15486     flag_prefetch_loop_arrays = 1;
15487 
15488   if (!s390_pic_data_is_text_relative && !flag_pic)
15489     error ("%<-mno-pic-data-is-text-relative%> cannot be used without "
15490 	   "%<-fpic%>/%<-fPIC%>");
15491 
15492   if (TARGET_TPF)
15493     {
15494       /* Don't emit DWARF3/4 unless specifically selected.  The TPF
15495 	 debuggers do not yet support DWARF 3/4.  */
15496       if (!global_options_set.x_dwarf_strict)
15497 	dwarf_strict = 1;
15498       if (!global_options_set.x_dwarf_version)
15499 	dwarf_version = 2;
15500     }
15501 }
15502 
15503 #if S390_USE_TARGET_ATTRIBUTE
15504 /* Inner function to process the attribute((target(...))), take an argument and
15505    set the current options from the argument. If we have a list, recursively go
15506    over the list.  */
15507 
15508 static bool
s390_valid_target_attribute_inner_p(tree args,struct gcc_options * opts,struct gcc_options * new_opts_set,bool force_pragma)15509 s390_valid_target_attribute_inner_p (tree args,
15510 				     struct gcc_options *opts,
15511 				     struct gcc_options *new_opts_set,
15512 				     bool force_pragma)
15513 {
15514   char *next_optstr;
15515   bool ret = true;
15516 
15517 #define S390_ATTRIB(S,O,A)  { S, sizeof (S)-1, O, A, 0 }
15518 #define S390_PRAGMA(S,O,A)  { S, sizeof (S)-1, O, A, 1 }
15519   static const struct
15520   {
15521     const char *string;
15522     size_t len;
15523     int opt;
15524     int has_arg;
15525     int only_as_pragma;
15526   } attrs[] = {
15527     /* enum options */
15528     S390_ATTRIB ("arch=", OPT_march_, 1),
15529     S390_ATTRIB ("tune=", OPT_mtune_, 1),
15530     /* uinteger options */
15531     S390_ATTRIB ("stack-guard=", OPT_mstack_guard_, 1),
15532     S390_ATTRIB ("stack-size=", OPT_mstack_size_, 1),
15533     S390_ATTRIB ("branch-cost=", OPT_mbranch_cost_, 1),
15534     S390_ATTRIB ("warn-framesize=", OPT_mwarn_framesize_, 1),
15535     /* flag options */
15536     S390_ATTRIB ("backchain", OPT_mbackchain, 0),
15537     S390_ATTRIB ("hard-dfp", OPT_mhard_dfp, 0),
15538     S390_ATTRIB ("hard-float", OPT_mhard_float, 0),
15539     S390_ATTRIB ("htm", OPT_mhtm, 0),
15540     S390_ATTRIB ("vx", OPT_mvx, 0),
15541     S390_ATTRIB ("packed-stack", OPT_mpacked_stack, 0),
15542     S390_ATTRIB ("small-exec", OPT_msmall_exec, 0),
15543     S390_ATTRIB ("soft-float", OPT_msoft_float, 0),
15544     S390_ATTRIB ("mvcle", OPT_mmvcle, 0),
15545     S390_PRAGMA ("zvector", OPT_mzvector, 0),
15546     /* boolean options */
15547     S390_ATTRIB ("warn-dynamicstack", OPT_mwarn_dynamicstack, 0),
15548   };
15549 #undef S390_ATTRIB
15550 #undef S390_PRAGMA
15551 
15552   /* If this is a list, recurse to get the options.  */
15553   if (TREE_CODE (args) == TREE_LIST)
15554     {
15555       bool ret = true;
15556       int num_pragma_values;
15557       int i;
15558 
15559       /* Note: attribs.c:decl_attributes prepends the values from
15560 	 current_target_pragma to the list of target attributes.  To determine
15561 	 whether we're looking at a value of the attribute or the pragma we
15562 	 assume that the first [list_length (current_target_pragma)] values in
15563 	 the list are the values from the pragma.  */
15564       num_pragma_values = (!force_pragma && current_target_pragma != NULL)
15565 	? list_length (current_target_pragma) : 0;
15566       for (i = 0; args; args = TREE_CHAIN (args), i++)
15567 	{
15568 	  bool is_pragma;
15569 
15570 	  is_pragma = (force_pragma || i < num_pragma_values);
15571 	  if (TREE_VALUE (args)
15572 	      && !s390_valid_target_attribute_inner_p (TREE_VALUE (args),
15573 						       opts, new_opts_set,
15574 						       is_pragma))
15575 	    {
15576 	      ret = false;
15577 	    }
15578 	}
15579       return ret;
15580     }
15581 
15582   else if (TREE_CODE (args) != STRING_CST)
15583     {
15584       error ("attribute %<target%> argument not a string");
15585       return false;
15586     }
15587 
15588   /* Handle multiple arguments separated by commas.  */
15589   next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
15590 
15591   while (next_optstr && *next_optstr != '\0')
15592     {
15593       char *p = next_optstr;
15594       char *orig_p = p;
15595       char *comma = strchr (next_optstr, ',');
15596       size_t len, opt_len;
15597       int opt;
15598       bool opt_set_p;
15599       char ch;
15600       unsigned i;
15601       int mask = 0;
15602       enum cl_var_type var_type;
15603       bool found;
15604 
15605       if (comma)
15606 	{
15607 	  *comma = '\0';
15608 	  len = comma - next_optstr;
15609 	  next_optstr = comma + 1;
15610 	}
15611       else
15612 	{
15613 	  len = strlen (p);
15614 	  next_optstr = NULL;
15615 	}
15616 
15617       /* Recognize no-xxx.  */
15618       if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
15619 	{
15620 	  opt_set_p = false;
15621 	  p += 3;
15622 	  len -= 3;
15623 	}
15624       else
15625 	opt_set_p = true;
15626 
15627       /* Find the option.  */
15628       ch = *p;
15629       found = false;
15630       for (i = 0; i < ARRAY_SIZE (attrs); i++)
15631 	{
15632 	  opt_len = attrs[i].len;
15633 	  if (ch == attrs[i].string[0]
15634 	      && ((attrs[i].has_arg) ? len > opt_len : len == opt_len)
15635 	      && memcmp (p, attrs[i].string, opt_len) == 0)
15636 	    {
15637 	      opt = attrs[i].opt;
15638 	      if (!opt_set_p && cl_options[opt].cl_reject_negative)
15639 		continue;
15640 	      mask = cl_options[opt].var_value;
15641 	      var_type = cl_options[opt].var_type;
15642 	      found = true;
15643 	      break;
15644 	    }
15645 	}
15646 
15647       /* Process the option.  */
15648       if (!found)
15649 	{
15650 	  error ("attribute(target(\"%s\")) is unknown", orig_p);
15651 	  return false;
15652 	}
15653       else if (attrs[i].only_as_pragma && !force_pragma)
15654 	{
15655 	  /* Value is not allowed for the target attribute.  */
15656 	  error ("value %qs is not supported by attribute %<target%>",
15657 		 attrs[i].string);
15658 	  return false;
15659 	}
15660 
15661       else if (var_type == CLVC_BIT_SET || var_type == CLVC_BIT_CLEAR)
15662 	{
15663 	  if (var_type == CLVC_BIT_CLEAR)
15664 	    opt_set_p = !opt_set_p;
15665 
15666 	  if (opt_set_p)
15667 	    opts->x_target_flags |= mask;
15668 	  else
15669 	    opts->x_target_flags &= ~mask;
15670 	  new_opts_set->x_target_flags |= mask;
15671 	}
15672 
15673       else if (cl_options[opt].var_type == CLVC_BOOLEAN)
15674 	{
15675 	  int value;
15676 
15677 	  if (cl_options[opt].cl_uinteger)
15678 	    {
15679 	      /* Unsigned integer argument.  Code based on the function
15680 		 decode_cmdline_option () in opts-common.c.  */
15681 	      value = integral_argument (p + opt_len);
15682 	    }
15683 	  else
15684 	    value = (opt_set_p) ? 1 : 0;
15685 
15686 	  if (value != -1)
15687 	    {
15688 	      struct cl_decoded_option decoded;
15689 
15690 	      /* Value range check; only implemented for numeric and boolean
15691 		 options at the moment.  */
15692 	      generate_option (opt, NULL, value, CL_TARGET, &decoded);
15693 	      s390_handle_option (opts, new_opts_set, &decoded, input_location);
15694 	      set_option (opts, new_opts_set, opt, value,
15695 			  p + opt_len, DK_UNSPECIFIED, input_location,
15696 			  global_dc);
15697 	    }
15698 	  else
15699 	    {
15700 	      error ("attribute(target(\"%s\")) is unknown", orig_p);
15701 	      ret = false;
15702 	    }
15703 	}
15704 
15705       else if (cl_options[opt].var_type == CLVC_ENUM)
15706 	{
15707 	  bool arg_ok;
15708 	  int value;
15709 
15710 	  arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
15711 	  if (arg_ok)
15712 	    set_option (opts, new_opts_set, opt, value,
15713 			p + opt_len, DK_UNSPECIFIED, input_location,
15714 			global_dc);
15715 	  else
15716 	    {
15717 	      error ("attribute(target(\"%s\")) is unknown", orig_p);
15718 	      ret = false;
15719 	    }
15720 	}
15721 
15722       else
15723 	gcc_unreachable ();
15724     }
15725   return ret;
15726 }
15727 
15728 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
15729 
15730 tree
s390_valid_target_attribute_tree(tree args,struct gcc_options * opts,const struct gcc_options * opts_set,bool force_pragma)15731 s390_valid_target_attribute_tree (tree args,
15732 				  struct gcc_options *opts,
15733 				  const struct gcc_options *opts_set,
15734 				  bool force_pragma)
15735 {
15736   tree t = NULL_TREE;
15737   struct gcc_options new_opts_set;
15738 
15739   memset (&new_opts_set, 0, sizeof (new_opts_set));
15740 
15741   /* Process each of the options on the chain.  */
15742   if (! s390_valid_target_attribute_inner_p (args, opts, &new_opts_set,
15743 					     force_pragma))
15744     return error_mark_node;
15745 
15746   /* If some option was set (even if it has not changed), rerun
15747      s390_option_override_internal, and then save the options away.  */
15748   if (new_opts_set.x_target_flags
15749       || new_opts_set.x_s390_arch
15750       || new_opts_set.x_s390_tune
15751       || new_opts_set.x_s390_stack_guard
15752       || new_opts_set.x_s390_stack_size
15753       || new_opts_set.x_s390_branch_cost
15754       || new_opts_set.x_s390_warn_framesize
15755       || new_opts_set.x_s390_warn_dynamicstack_p)
15756     {
15757       const unsigned char *src = (const unsigned char *)opts_set;
15758       unsigned char *dest = (unsigned char *)&new_opts_set;
15759       unsigned int i;
15760 
15761       /* Merge the original option flags into the new ones.  */
15762       for (i = 0; i < sizeof(*opts_set); i++)
15763 	dest[i] |= src[i];
15764 
15765       /* Do any overrides, such as arch=xxx, or tune=xxx support.  */
15766       s390_option_override_internal (opts, &new_opts_set);
15767       /* Save the current options unless we are validating options for
15768 	 #pragma.  */
15769       t = build_target_option_node (opts);
15770     }
15771   return t;
15772 }
15773 
15774 /* Hook to validate attribute((target("string"))).  */
15775 
15776 static bool
s390_valid_target_attribute_p(tree fndecl,tree ARG_UNUSED (name),tree args,int ARG_UNUSED (flags))15777 s390_valid_target_attribute_p (tree fndecl,
15778 			       tree ARG_UNUSED (name),
15779 			       tree args,
15780 			       int ARG_UNUSED (flags))
15781 {
15782   struct gcc_options func_options;
15783   tree new_target, new_optimize;
15784   bool ret = true;
15785 
15786   /* attribute((target("default"))) does nothing, beyond
15787      affecting multi-versioning.  */
15788   if (TREE_VALUE (args)
15789       && TREE_CODE (TREE_VALUE (args)) == STRING_CST
15790       && TREE_CHAIN (args) == NULL_TREE
15791       && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
15792     return true;
15793 
15794   tree old_optimize = build_optimization_node (&global_options);
15795 
15796   /* Get the optimization options of the current function.  */
15797   tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
15798 
15799   if (!func_optimize)
15800     func_optimize = old_optimize;
15801 
15802   /* Init func_options.  */
15803   memset (&func_options, 0, sizeof (func_options));
15804   init_options_struct (&func_options, NULL);
15805   lang_hooks.init_options_struct (&func_options);
15806 
15807   cl_optimization_restore (&func_options, TREE_OPTIMIZATION (func_optimize));
15808 
15809   /* Initialize func_options to the default before its target options can
15810      be set.  */
15811   cl_target_option_restore (&func_options,
15812 			    TREE_TARGET_OPTION (target_option_default_node));
15813 
15814   new_target = s390_valid_target_attribute_tree (args, &func_options,
15815 						 &global_options_set,
15816 						 (args ==
15817 						  current_target_pragma));
15818   new_optimize = build_optimization_node (&func_options);
15819   if (new_target == error_mark_node)
15820     ret = false;
15821   else if (fndecl && new_target)
15822     {
15823       DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
15824       if (old_optimize != new_optimize)
15825 	DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
15826     }
15827   return ret;
15828 }
15829 
15830 /* Hook to determine if one function can safely inline another.  */
15831 
15832 static bool
s390_can_inline_p(tree caller,tree callee)15833 s390_can_inline_p (tree caller, tree callee)
15834 {
15835   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
15836   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
15837 
15838   if (!callee_tree)
15839     callee_tree = target_option_default_node;
15840   if (!caller_tree)
15841     caller_tree = target_option_default_node;
15842   if (callee_tree == caller_tree)
15843     return true;
15844 
15845   struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
15846   struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
15847   bool ret = true;
15848 
15849   if ((caller_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP))
15850       != (callee_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP)))
15851     ret = false;
15852 
15853   /* Don't inline functions to be compiled for a more recent arch into a
15854      function for an older arch.  */
15855   else if (caller_opts->x_s390_arch < callee_opts->x_s390_arch)
15856     ret = false;
15857 
15858   /* Inlining a hard float function into a soft float function is only
15859      allowed if the hard float function doesn't actually make use of
15860      floating point.
15861 
15862      We are called from FEs for multi-versioning call optimization, so
15863      beware of ipa_fn_summaries not available.  */
15864   else if (((TARGET_SOFT_FLOAT_P (caller_opts->x_target_flags)
15865 	     && !TARGET_SOFT_FLOAT_P (callee_opts->x_target_flags))
15866 	    || (!TARGET_HARD_DFP_P (caller_opts->x_target_flags)
15867 		&& TARGET_HARD_DFP_P (callee_opts->x_target_flags)))
15868 	   && (! ipa_fn_summaries
15869 	       || ipa_fn_summaries->get
15870 	       (cgraph_node::get (callee))->fp_expressions))
15871     ret = false;
15872 
15873   return ret;
15874 }
15875 #endif
15876 
15877 /* Set VAL to correct enum value according to the indirect-branch or
15878    function-return attribute in ATTR.  */
15879 
15880 static inline void
s390_indirect_branch_attrvalue(tree attr,enum indirect_branch * val)15881 s390_indirect_branch_attrvalue (tree attr, enum indirect_branch *val)
15882 {
15883   const char *str = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
15884   if (strcmp (str, "keep") == 0)
15885     *val = indirect_branch_keep;
15886   else if (strcmp (str, "thunk") == 0)
15887     *val = indirect_branch_thunk;
15888   else if (strcmp (str, "thunk-inline") == 0)
15889     *val = indirect_branch_thunk_inline;
15890   else if (strcmp (str, "thunk-extern") == 0)
15891     *val = indirect_branch_thunk_extern;
15892 }
15893 
15894 /* Memorize the setting for -mindirect-branch* and -mfunction-return*
15895    from either the cmdline or the function attributes in
15896    cfun->machine.  */
15897 
15898 static void
s390_indirect_branch_settings(tree fndecl)15899 s390_indirect_branch_settings (tree fndecl)
15900 {
15901   tree attr;
15902 
15903   if (!fndecl)
15904     return;
15905 
15906   /* Initialize with the cmdline options and let the attributes
15907      override it.  */
15908   cfun->machine->indirect_branch_jump = s390_indirect_branch_jump;
15909   cfun->machine->indirect_branch_call = s390_indirect_branch_call;
15910 
15911   cfun->machine->function_return_reg = s390_function_return_reg;
15912   cfun->machine->function_return_mem = s390_function_return_mem;
15913 
15914   if ((attr = lookup_attribute ("indirect_branch",
15915 				DECL_ATTRIBUTES (fndecl))))
15916     {
15917       s390_indirect_branch_attrvalue (attr,
15918 				      &cfun->machine->indirect_branch_jump);
15919       s390_indirect_branch_attrvalue (attr,
15920 				      &cfun->machine->indirect_branch_call);
15921     }
15922 
15923   if ((attr = lookup_attribute ("indirect_branch_jump",
15924 				DECL_ATTRIBUTES (fndecl))))
15925     s390_indirect_branch_attrvalue (attr, &cfun->machine->indirect_branch_jump);
15926 
15927   if ((attr = lookup_attribute ("indirect_branch_call",
15928 				DECL_ATTRIBUTES (fndecl))))
15929     s390_indirect_branch_attrvalue (attr, &cfun->machine->indirect_branch_call);
15930 
15931   if ((attr = lookup_attribute ("function_return",
15932 				DECL_ATTRIBUTES (fndecl))))
15933     {
15934       s390_indirect_branch_attrvalue (attr,
15935 				      &cfun->machine->function_return_reg);
15936       s390_indirect_branch_attrvalue (attr,
15937 				      &cfun->machine->function_return_mem);
15938     }
15939 
15940   if ((attr = lookup_attribute ("function_return_reg",
15941 				DECL_ATTRIBUTES (fndecl))))
15942     s390_indirect_branch_attrvalue (attr, &cfun->machine->function_return_reg);
15943 
15944   if ((attr = lookup_attribute ("function_return_mem",
15945 				DECL_ATTRIBUTES (fndecl))))
15946     s390_indirect_branch_attrvalue (attr, &cfun->machine->function_return_mem);
15947 }
15948 
15949 #if S390_USE_TARGET_ATTRIBUTE
15950 /* Restore targets globals from NEW_TREE and invalidate s390_previous_fndecl
15951    cache.  */
15952 
15953 void
s390_activate_target_options(tree new_tree)15954 s390_activate_target_options (tree new_tree)
15955 {
15956   cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
15957   if (TREE_TARGET_GLOBALS (new_tree))
15958     restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
15959   else if (new_tree == target_option_default_node)
15960     restore_target_globals (&default_target_globals);
15961   else
15962     TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
15963   s390_previous_fndecl = NULL_TREE;
15964 }
15965 #endif
15966 
15967 /* Establish appropriate back-end context for processing the function
15968    FNDECL.  The argument might be NULL to indicate processing at top
15969    level, outside of any function scope.  */
15970 static void
s390_set_current_function(tree fndecl)15971 s390_set_current_function (tree fndecl)
15972 {
15973 #if S390_USE_TARGET_ATTRIBUTE
15974   /* Only change the context if the function changes.  This hook is called
15975      several times in the course of compiling a function, and we don't want to
15976      slow things down too much or call target_reinit when it isn't safe.  */
15977   if (fndecl == s390_previous_fndecl)
15978     {
15979       s390_indirect_branch_settings (fndecl);
15980       return;
15981     }
15982 
15983   tree old_tree;
15984   if (s390_previous_fndecl == NULL_TREE)
15985     old_tree = target_option_current_node;
15986   else if (DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl))
15987     old_tree = DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl);
15988   else
15989     old_tree = target_option_default_node;
15990 
15991   if (fndecl == NULL_TREE)
15992     {
15993       if (old_tree != target_option_current_node)
15994 	s390_activate_target_options (target_option_current_node);
15995       return;
15996     }
15997 
15998   tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
15999   if (new_tree == NULL_TREE)
16000     new_tree = target_option_default_node;
16001 
16002   if (old_tree != new_tree)
16003     s390_activate_target_options (new_tree);
16004   s390_previous_fndecl = fndecl;
16005 #endif
16006   s390_indirect_branch_settings (fndecl);
16007 }
16008 
16009 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P.  */
16010 
16011 static bool
s390_use_by_pieces_infrastructure_p(unsigned HOST_WIDE_INT size,unsigned int align ATTRIBUTE_UNUSED,enum by_pieces_operation op ATTRIBUTE_UNUSED,bool speed_p ATTRIBUTE_UNUSED)16012 s390_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
16013 				     unsigned int align ATTRIBUTE_UNUSED,
16014 				     enum by_pieces_operation op ATTRIBUTE_UNUSED,
16015 				     bool speed_p ATTRIBUTE_UNUSED)
16016 {
16017   return (size == 1 || size == 2
16018 	  || size == 4 || (TARGET_ZARCH && size == 8));
16019 }
16020 
16021 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook.  */
16022 
16023 static void
s390_atomic_assign_expand_fenv(tree * hold,tree * clear,tree * update)16024 s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
16025 {
16026   tree sfpc = s390_builtin_decls[S390_BUILTIN_s390_sfpc];
16027   tree efpc = s390_builtin_decls[S390_BUILTIN_s390_efpc];
16028   tree call_efpc = build_call_expr (efpc, 0);
16029   tree fenv_var = create_tmp_var_raw (unsigned_type_node);
16030 
16031 #define FPC_EXCEPTION_MASK	 HOST_WIDE_INT_UC (0xf8000000)
16032 #define FPC_FLAGS_MASK		 HOST_WIDE_INT_UC (0x00f80000)
16033 #define FPC_DXC_MASK		 HOST_WIDE_INT_UC (0x0000ff00)
16034 #define FPC_EXCEPTION_MASK_SHIFT HOST_WIDE_INT_UC (24)
16035 #define FPC_FLAGS_SHIFT		 HOST_WIDE_INT_UC (16)
16036 #define FPC_DXC_SHIFT		 HOST_WIDE_INT_UC (8)
16037 
16038   /* Generates the equivalent of feholdexcept (&fenv_var)
16039 
16040      fenv_var = __builtin_s390_efpc ();
16041      __builtin_s390_sfpc (fenv_var & mask) */
16042   tree old_fpc = build2 (MODIFY_EXPR, unsigned_type_node, fenv_var, call_efpc);
16043   tree new_fpc =
16044     build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
16045 	    build_int_cst (unsigned_type_node,
16046 			   ~(FPC_DXC_MASK | FPC_FLAGS_MASK |
16047 			     FPC_EXCEPTION_MASK)));
16048   tree set_new_fpc = build_call_expr (sfpc, 1, new_fpc);
16049   *hold = build2 (COMPOUND_EXPR, void_type_node, old_fpc, set_new_fpc);
16050 
16051   /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT)
16052 
16053      __builtin_s390_sfpc (__builtin_s390_efpc () & mask) */
16054   new_fpc = build2 (BIT_AND_EXPR, unsigned_type_node, call_efpc,
16055 		    build_int_cst (unsigned_type_node,
16056 				   ~(FPC_DXC_MASK | FPC_FLAGS_MASK)));
16057   *clear = build_call_expr (sfpc, 1, new_fpc);
16058 
16059   /* Generates the equivalent of feupdateenv (fenv_var)
16060 
16061   old_fpc = __builtin_s390_efpc ();
16062   __builtin_s390_sfpc (fenv_var);
16063   __atomic_feraiseexcept ((old_fpc & FPC_FLAGS_MASK) >> FPC_FLAGS_SHIFT);  */
16064 
16065   old_fpc = create_tmp_var_raw (unsigned_type_node);
16066   tree store_old_fpc = build2 (MODIFY_EXPR, void_type_node,
16067 			       old_fpc, call_efpc);
16068 
16069   set_new_fpc = build_call_expr (sfpc, 1, fenv_var);
16070 
16071   tree raise_old_except = build2 (BIT_AND_EXPR, unsigned_type_node, old_fpc,
16072 				  build_int_cst (unsigned_type_node,
16073 						 FPC_FLAGS_MASK));
16074   raise_old_except = build2 (RSHIFT_EXPR, unsigned_type_node, raise_old_except,
16075 			     build_int_cst (unsigned_type_node,
16076 					    FPC_FLAGS_SHIFT));
16077   tree atomic_feraiseexcept
16078     = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
16079   raise_old_except = build_call_expr (atomic_feraiseexcept,
16080 				      1, raise_old_except);
16081 
16082   *update = build2 (COMPOUND_EXPR, void_type_node,
16083 		    build2 (COMPOUND_EXPR, void_type_node,
16084 			    store_old_fpc, set_new_fpc),
16085 		    raise_old_except);
16086 
16087 #undef FPC_EXCEPTION_MASK
16088 #undef FPC_FLAGS_MASK
16089 #undef FPC_DXC_MASK
16090 #undef FPC_EXCEPTION_MASK_SHIFT
16091 #undef FPC_FLAGS_SHIFT
16092 #undef FPC_DXC_SHIFT
16093 }
16094 
16095 /* Return the vector mode to be used for inner mode MODE when doing
16096    vectorization.  */
16097 static machine_mode
s390_preferred_simd_mode(scalar_mode mode)16098 s390_preferred_simd_mode (scalar_mode mode)
16099 {
16100   if (TARGET_VXE)
16101     switch (mode)
16102       {
16103       case E_SFmode:
16104 	return V4SFmode;
16105       default:;
16106       }
16107 
16108   if (TARGET_VX)
16109     switch (mode)
16110       {
16111       case E_DFmode:
16112 	return V2DFmode;
16113       case E_DImode:
16114 	return V2DImode;
16115       case E_SImode:
16116 	return V4SImode;
16117       case E_HImode:
16118 	return V8HImode;
16119       case E_QImode:
16120 	return V16QImode;
16121       default:;
16122       }
16123   return word_mode;
16124 }
16125 
16126 /* Our hardware does not require vectors to be strictly aligned.  */
16127 static bool
s390_support_vector_misalignment(machine_mode mode ATTRIBUTE_UNUSED,const_tree type ATTRIBUTE_UNUSED,int misalignment ATTRIBUTE_UNUSED,bool is_packed ATTRIBUTE_UNUSED)16128 s390_support_vector_misalignment (machine_mode mode ATTRIBUTE_UNUSED,
16129 				  const_tree type ATTRIBUTE_UNUSED,
16130 				  int misalignment ATTRIBUTE_UNUSED,
16131 				  bool is_packed ATTRIBUTE_UNUSED)
16132 {
16133   if (TARGET_VX)
16134     return true;
16135 
16136   return default_builtin_support_vector_misalignment (mode, type, misalignment,
16137 						      is_packed);
16138 }
16139 
16140 /* The vector ABI requires vector types to be aligned on an 8 byte
16141    boundary (our stack alignment).  However, we allow this to be
16142    overriden by the user, while this definitely breaks the ABI.  */
16143 static HOST_WIDE_INT
s390_vector_alignment(const_tree type)16144 s390_vector_alignment (const_tree type)
16145 {
16146   tree size = TYPE_SIZE (type);
16147 
16148   if (!TARGET_VX_ABI)
16149     return default_vector_alignment (type);
16150 
16151   if (TYPE_USER_ALIGN (type))
16152     return TYPE_ALIGN (type);
16153 
16154   if (tree_fits_uhwi_p (size)
16155       && tree_to_uhwi (size) < BIGGEST_ALIGNMENT)
16156     return tree_to_uhwi (size);
16157 
16158   return BIGGEST_ALIGNMENT;
16159 }
16160 
16161 /* Implement TARGET_CONSTANT_ALIGNMENT.  Alignment on even addresses for
16162    LARL instruction.  */
16163 
16164 static HOST_WIDE_INT
s390_constant_alignment(const_tree,HOST_WIDE_INT align)16165 s390_constant_alignment (const_tree, HOST_WIDE_INT align)
16166 {
16167   return MAX (align, 16);
16168 }
16169 
16170 #ifdef HAVE_AS_MACHINE_MACHINEMODE
16171 /* Implement TARGET_ASM_FILE_START.  */
16172 static void
s390_asm_file_start(void)16173 s390_asm_file_start (void)
16174 {
16175   default_file_start ();
16176   s390_asm_output_machine_for_arch (asm_out_file);
16177 }
16178 #endif
16179 
16180 /* Implement TARGET_ASM_FILE_END.  */
16181 static void
s390_asm_file_end(void)16182 s390_asm_file_end (void)
16183 {
16184 #ifdef HAVE_AS_GNU_ATTRIBUTE
16185   varpool_node *vnode;
16186   cgraph_node *cnode;
16187 
16188   FOR_EACH_VARIABLE (vnode)
16189     if (TREE_PUBLIC (vnode->decl))
16190       s390_check_type_for_vector_abi (TREE_TYPE (vnode->decl), false, false);
16191 
16192   FOR_EACH_FUNCTION (cnode)
16193     if (TREE_PUBLIC (cnode->decl))
16194       s390_check_type_for_vector_abi (TREE_TYPE (cnode->decl), false, false);
16195 
16196 
16197   if (s390_vector_abi != 0)
16198     fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
16199 	     s390_vector_abi);
16200 #endif
16201   file_end_indicate_exec_stack ();
16202 
16203   if (flag_split_stack)
16204     file_end_indicate_split_stack ();
16205 }
16206 
16207 /* Return true if TYPE is a vector bool type.  */
16208 static inline bool
s390_vector_bool_type_p(const_tree type)16209 s390_vector_bool_type_p (const_tree type)
16210 {
16211   return TYPE_VECTOR_OPAQUE (type);
16212 }
16213 
16214 /* Return the diagnostic message string if the binary operation OP is
16215    not permitted on TYPE1 and TYPE2, NULL otherwise.  */
16216 static const char*
s390_invalid_binary_op(int op ATTRIBUTE_UNUSED,const_tree type1,const_tree type2)16217 s390_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
16218 {
16219   bool bool1_p, bool2_p;
16220   bool plusminus_p;
16221   bool muldiv_p;
16222   bool compare_p;
16223   machine_mode mode1, mode2;
16224 
16225   if (!TARGET_ZVECTOR)
16226     return NULL;
16227 
16228   if (!VECTOR_TYPE_P (type1) || !VECTOR_TYPE_P (type2))
16229     return NULL;
16230 
16231   bool1_p = s390_vector_bool_type_p (type1);
16232   bool2_p = s390_vector_bool_type_p (type2);
16233 
16234   /* Mixing signed and unsigned types is forbidden for all
16235      operators.  */
16236   if (!bool1_p && !bool2_p
16237       && TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
16238     return N_("types differ in signedness");
16239 
16240   plusminus_p = (op == PLUS_EXPR || op == MINUS_EXPR);
16241   muldiv_p = (op == MULT_EXPR || op == RDIV_EXPR || op == TRUNC_DIV_EXPR
16242 	      || op == CEIL_DIV_EXPR || op == FLOOR_DIV_EXPR
16243 	      || op == ROUND_DIV_EXPR);
16244   compare_p = (op == LT_EXPR || op == LE_EXPR || op == GT_EXPR || op == GE_EXPR
16245 	       || op == EQ_EXPR || op == NE_EXPR);
16246 
16247   if (bool1_p && bool2_p && (plusminus_p || muldiv_p))
16248     return N_("binary operator does not support two vector bool operands");
16249 
16250   if (bool1_p != bool2_p && (muldiv_p || compare_p))
16251     return N_("binary operator does not support vector bool operand");
16252 
16253   mode1 = TYPE_MODE (type1);
16254   mode2 = TYPE_MODE (type2);
16255 
16256   if (bool1_p != bool2_p && plusminus_p
16257       && (GET_MODE_CLASS (mode1) == MODE_VECTOR_FLOAT
16258 	  || GET_MODE_CLASS (mode2) == MODE_VECTOR_FLOAT))
16259     return N_("binary operator does not support mixing vector "
16260 	      "bool with floating point vector operands");
16261 
16262   return NULL;
16263 }
16264 
16265 /* Implement TARGET_C_EXCESS_PRECISION.
16266 
16267    FIXME: For historical reasons, float_t and double_t are typedef'ed to
16268    double on s390, causing operations on float_t to operate in a higher
16269    precision than is necessary.  However, it is not the case that SFmode
16270    operations have implicit excess precision, and we generate more optimal
16271    code if we let the compiler know no implicit extra precision is added.
16272 
16273    That means when we are compiling with -fexcess-precision=fast, the value
16274    we set for FLT_EVAL_METHOD will be out of line with the actual precision of
16275    float_t (though they would be correct for -fexcess-precision=standard).
16276 
16277    A complete fix would modify glibc to remove the unnecessary typedef
16278    of float_t to double.  */
16279 
16280 static enum flt_eval_method
s390_excess_precision(enum excess_precision_type type)16281 s390_excess_precision (enum excess_precision_type type)
16282 {
16283   switch (type)
16284     {
16285       case EXCESS_PRECISION_TYPE_IMPLICIT:
16286       case EXCESS_PRECISION_TYPE_FAST:
16287 	/* The fastest type to promote to will always be the native type,
16288 	   whether that occurs with implicit excess precision or
16289 	   otherwise.  */
16290 	return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
16291       case EXCESS_PRECISION_TYPE_STANDARD:
16292 	/* Otherwise, when we are in a standards compliant mode, to
16293 	   ensure consistency with the implementation in glibc, report that
16294 	   float is evaluated to the range and precision of double.  */
16295 	return FLT_EVAL_METHOD_PROMOTE_TO_DOUBLE;
16296       default:
16297 	gcc_unreachable ();
16298     }
16299   return FLT_EVAL_METHOD_UNPREDICTABLE;
16300 }
16301 
16302 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
16303 
16304 static unsigned HOST_WIDE_INT
s390_asan_shadow_offset(void)16305 s390_asan_shadow_offset (void)
16306 {
16307   return TARGET_64BIT ? HOST_WIDE_INT_1U << 52 : HOST_WIDE_INT_UC (0x20000000);
16308 }
16309 
16310 #ifdef HAVE_GAS_HIDDEN
16311 # define USE_HIDDEN_LINKONCE 1
16312 #else
16313 # define USE_HIDDEN_LINKONCE 0
16314 #endif
16315 
16316 /* Output an indirect branch trampoline for target register REGNO.  */
16317 
16318 static void
s390_output_indirect_thunk_function(unsigned int regno,bool z10_p)16319 s390_output_indirect_thunk_function (unsigned int regno, bool z10_p)
16320 {
16321   tree decl;
16322   char thunk_label[32];
16323   int i;
16324 
16325   if (z10_p)
16326     sprintf (thunk_label, TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL, regno);
16327   else
16328     sprintf (thunk_label, TARGET_INDIRECT_BRANCH_THUNK_NAME_EX,
16329 	     INDIRECT_BRANCH_THUNK_REGNUM, regno);
16330 
16331   decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
16332 		     get_identifier (thunk_label),
16333 		     build_function_type_list (void_type_node, NULL_TREE));
16334   DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
16335 				   NULL_TREE, void_type_node);
16336   TREE_PUBLIC (decl) = 1;
16337   TREE_STATIC (decl) = 1;
16338   DECL_IGNORED_P (decl) = 1;
16339 
16340   if (USE_HIDDEN_LINKONCE)
16341     {
16342       cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
16343 
16344       targetm.asm_out.unique_section (decl, 0);
16345       switch_to_section (get_named_section (decl, NULL, 0));
16346 
16347       targetm.asm_out.globalize_label (asm_out_file, thunk_label);
16348       fputs ("\t.hidden\t", asm_out_file);
16349       assemble_name (asm_out_file, thunk_label);
16350       putc ('\n', asm_out_file);
16351       ASM_DECLARE_FUNCTION_NAME (asm_out_file, thunk_label, decl);
16352     }
16353   else
16354     {
16355       switch_to_section (text_section);
16356       ASM_OUTPUT_LABEL (asm_out_file, thunk_label);
16357     }
16358 
16359   DECL_INITIAL (decl) = make_node (BLOCK);
16360   current_function_decl = decl;
16361   allocate_struct_function (decl, false);
16362   init_function_start (decl);
16363   cfun->is_thunk = true;
16364   first_function_block_is_cold = false;
16365   final_start_function (emit_barrier (), asm_out_file, 1);
16366 
16367   /* This makes CFI at least usable for indirect jumps.
16368 
16369      Stopping in the thunk: backtrace will point to the thunk target
16370      is if it was interrupted by a signal.  For a call this means that
16371      the call chain will be: caller->callee->thunk   */
16372   if (flag_asynchronous_unwind_tables && flag_dwarf2_cfi_asm)
16373     {
16374       fputs ("\t.cfi_signal_frame\n", asm_out_file);
16375       fprintf (asm_out_file, "\t.cfi_return_column %d\n", regno);
16376       for (i = 0; i < FPR15_REGNUM; i++)
16377 	fprintf (asm_out_file, "\t.cfi_same_value %s\n", reg_names[i]);
16378     }
16379 
16380   if (z10_p)
16381     {
16382       /* exrl  0,1f  */
16383 
16384       /* We generate a thunk for z10 compiled code although z10 is
16385 	 currently not enabled.  Tell the assembler to accept the
16386 	 instruction.  */
16387       if (!TARGET_CPU_Z10)
16388 	{
16389 	  fputs ("\t.machine push\n", asm_out_file);
16390 	  fputs ("\t.machine z10\n", asm_out_file);
16391 	}
16392       /* We use exrl even if -mzarch hasn't been specified on the
16393 	 command line so we have to tell the assembler to accept
16394 	 it.  */
16395       if (!TARGET_ZARCH)
16396 	fputs ("\t.machinemode zarch\n", asm_out_file);
16397 
16398       fputs ("\texrl\t0,1f\n", asm_out_file);
16399 
16400       if (!TARGET_ZARCH)
16401 	fputs ("\t.machinemode esa\n", asm_out_file);
16402 
16403       if (!TARGET_CPU_Z10)
16404 	fputs ("\t.machine pop\n", asm_out_file);
16405     }
16406   else
16407     {
16408       /* larl %r1,1f  */
16409       fprintf (asm_out_file, "\tlarl\t%%r%d,1f\n",
16410 	       INDIRECT_BRANCH_THUNK_REGNUM);
16411 
16412       /* ex 0,0(%r1)  */
16413       fprintf (asm_out_file, "\tex\t0,0(%%r%d)\n",
16414 	       INDIRECT_BRANCH_THUNK_REGNUM);
16415     }
16416 
16417   /* 0:    j 0b  */
16418   fputs ("0:\tj\t0b\n", asm_out_file);
16419 
16420   /* 1:    br <regno>  */
16421   fprintf (asm_out_file, "1:\tbr\t%%r%d\n", regno);
16422 
16423   final_end_function ();
16424   init_insn_lengths ();
16425   free_after_compilation (cfun);
16426   set_cfun (NULL);
16427   current_function_decl = NULL;
16428 }
16429 
16430 /* Implement the asm.code_end target hook.  */
16431 
16432 static void
s390_code_end(void)16433 s390_code_end (void)
16434 {
16435   int i;
16436 
16437   for (i = 1; i < 16; i++)
16438     {
16439       if (indirect_branch_z10thunk_mask & (1 << i))
16440 	s390_output_indirect_thunk_function (i, true);
16441 
16442       if (indirect_branch_prez10thunk_mask & (1 << i))
16443 	s390_output_indirect_thunk_function (i, false);
16444     }
16445 
16446   if (TARGET_INDIRECT_BRANCH_TABLE)
16447     {
16448       int o;
16449       int i;
16450 
16451       for (o = 0; o < INDIRECT_BRANCH_NUM_OPTIONS; o++)
16452 	{
16453 	  if (indirect_branch_table_label_no[o] == 0)
16454 	    continue;
16455 
16456 	  switch_to_section (get_section (indirect_branch_table_name[o],
16457 					  0,
16458 					  NULL_TREE));
16459 	  for (i = 0; i < indirect_branch_table_label_no[o]; i++)
16460 	    {
16461 	      char label_start[32];
16462 
16463 	      ASM_GENERATE_INTERNAL_LABEL (label_start,
16464 					   indirect_branch_table_label[o], i);
16465 
16466 	      fputs ("\t.long\t", asm_out_file);
16467 	      assemble_name_raw (asm_out_file, label_start);
16468 	      fputs ("-.\n", asm_out_file);
16469 	    }
16470 	  switch_to_section (current_function_section ());
16471 	}
16472     }
16473 }
16474 
16475 /* Implement the TARGET_CASE_VALUES_THRESHOLD target hook.  */
16476 
16477 unsigned int
s390_case_values_threshold(void)16478 s390_case_values_threshold (void)
16479 {
16480   /* Disabling branch prediction for indirect jumps makes jump tables
16481      much more expensive.  */
16482   if (TARGET_INDIRECT_BRANCH_NOBP_JUMP)
16483     return 20;
16484 
16485   return default_case_values_threshold ();
16486 }
16487 
16488 /* Evaluate the insns between HEAD and TAIL and do back-end to install
16489    back-end specific dependencies.
16490 
16491    Establish an ANTI dependency between r11 and r15 restores from FPRs
16492    to prevent the instructions scheduler from reordering them since
16493    this would break CFI.  No further handling in the sched_reorder
16494    hook is required since the r11 and r15 restore will never appear in
16495    the same ready list with that change.  */
16496 void
s390_sched_dependencies_evaluation(rtx_insn * head,rtx_insn * tail)16497 s390_sched_dependencies_evaluation (rtx_insn *head, rtx_insn *tail)
16498 {
16499   if (!frame_pointer_needed || !epilogue_completed)
16500     return;
16501 
16502   while (head != tail && DEBUG_INSN_P (head))
16503     head = NEXT_INSN (head);
16504 
16505   rtx_insn *r15_restore = NULL, *r11_restore = NULL;
16506 
16507   for (rtx_insn *insn = tail; insn != head; insn = PREV_INSN (insn))
16508     {
16509       rtx set = single_set (insn);
16510       if (!INSN_P (insn)
16511 	  || !RTX_FRAME_RELATED_P (insn)
16512 	  || set == NULL_RTX
16513 	  || !REG_P (SET_DEST (set))
16514 	  || !FP_REG_P (SET_SRC (set)))
16515 	continue;
16516 
16517       if (REGNO (SET_DEST (set)) == HARD_FRAME_POINTER_REGNUM)
16518 	r11_restore = insn;
16519 
16520       if (REGNO (SET_DEST (set)) == STACK_POINTER_REGNUM)
16521 	r15_restore = insn;
16522     }
16523 
16524   if (r11_restore == NULL || r15_restore == NULL)
16525     return;
16526   add_dependence (r11_restore, r15_restore, REG_DEP_ANTI);
16527 }
16528 
16529 /* Implement TARGET_SHIFT_TRUNCATION_MASK for integer shifts.  */
16530 
16531 static unsigned HOST_WIDE_INT
s390_shift_truncation_mask(machine_mode mode)16532 s390_shift_truncation_mask (machine_mode mode)
16533 {
16534   return mode == DImode || mode == SImode ? 63 : 0;
16535 }
16536 
16537 /* Initialize GCC target structure.  */
16538 
16539 #undef  TARGET_ASM_ALIGNED_HI_OP
16540 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
16541 #undef  TARGET_ASM_ALIGNED_DI_OP
16542 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
16543 #undef  TARGET_ASM_INTEGER
16544 #define TARGET_ASM_INTEGER s390_assemble_integer
16545 
16546 #undef  TARGET_ASM_OPEN_PAREN
16547 #define TARGET_ASM_OPEN_PAREN ""
16548 
16549 #undef  TARGET_ASM_CLOSE_PAREN
16550 #define TARGET_ASM_CLOSE_PAREN ""
16551 
16552 #undef TARGET_OPTION_OVERRIDE
16553 #define TARGET_OPTION_OVERRIDE s390_option_override
16554 
16555 #ifdef TARGET_THREAD_SSP_OFFSET
16556 #undef TARGET_STACK_PROTECT_GUARD
16557 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
16558 #endif
16559 
16560 #undef	TARGET_ENCODE_SECTION_INFO
16561 #define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
16562 
16563 #undef TARGET_SCALAR_MODE_SUPPORTED_P
16564 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
16565 
16566 #ifdef HAVE_AS_TLS
16567 #undef TARGET_HAVE_TLS
16568 #define TARGET_HAVE_TLS true
16569 #endif
16570 #undef TARGET_CANNOT_FORCE_CONST_MEM
16571 #define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
16572 
16573 #undef TARGET_DELEGITIMIZE_ADDRESS
16574 #define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
16575 
16576 #undef TARGET_LEGITIMIZE_ADDRESS
16577 #define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
16578 
16579 #undef TARGET_RETURN_IN_MEMORY
16580 #define TARGET_RETURN_IN_MEMORY s390_return_in_memory
16581 
16582 #undef  TARGET_INIT_BUILTINS
16583 #define TARGET_INIT_BUILTINS s390_init_builtins
16584 #undef  TARGET_EXPAND_BUILTIN
16585 #define TARGET_EXPAND_BUILTIN s390_expand_builtin
16586 #undef  TARGET_BUILTIN_DECL
16587 #define TARGET_BUILTIN_DECL s390_builtin_decl
16588 
16589 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
16590 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
16591 
16592 #undef TARGET_ASM_OUTPUT_MI_THUNK
16593 #define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
16594 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
16595 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
16596 
16597 #undef TARGET_C_EXCESS_PRECISION
16598 #define TARGET_C_EXCESS_PRECISION s390_excess_precision
16599 
16600 #undef  TARGET_SCHED_ADJUST_PRIORITY
16601 #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
16602 #undef TARGET_SCHED_ISSUE_RATE
16603 #define TARGET_SCHED_ISSUE_RATE s390_issue_rate
16604 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
16605 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
16606 
16607 #undef TARGET_SCHED_VARIABLE_ISSUE
16608 #define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
16609 #undef TARGET_SCHED_REORDER
16610 #define TARGET_SCHED_REORDER s390_sched_reorder
16611 #undef TARGET_SCHED_INIT
16612 #define TARGET_SCHED_INIT s390_sched_init
16613 
16614 #undef TARGET_CANNOT_COPY_INSN_P
16615 #define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
16616 #undef TARGET_RTX_COSTS
16617 #define TARGET_RTX_COSTS s390_rtx_costs
16618 #undef TARGET_ADDRESS_COST
16619 #define TARGET_ADDRESS_COST s390_address_cost
16620 #undef TARGET_REGISTER_MOVE_COST
16621 #define TARGET_REGISTER_MOVE_COST s390_register_move_cost
16622 #undef TARGET_MEMORY_MOVE_COST
16623 #define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
16624 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
16625 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
16626   s390_builtin_vectorization_cost
16627 
16628 #undef TARGET_MACHINE_DEPENDENT_REORG
16629 #define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
16630 
16631 #undef TARGET_VALID_POINTER_MODE
16632 #define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
16633 
16634 #undef TARGET_BUILD_BUILTIN_VA_LIST
16635 #define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
16636 #undef TARGET_EXPAND_BUILTIN_VA_START
16637 #define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
16638 #undef TARGET_ASAN_SHADOW_OFFSET
16639 #define TARGET_ASAN_SHADOW_OFFSET s390_asan_shadow_offset
16640 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
16641 #define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
16642 
16643 #undef TARGET_PROMOTE_FUNCTION_MODE
16644 #define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
16645 #undef TARGET_PASS_BY_REFERENCE
16646 #define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
16647 
16648 #undef  TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
16649 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE s390_override_options_after_change
16650 
16651 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
16652 #define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
16653 #undef TARGET_FUNCTION_ARG
16654 #define TARGET_FUNCTION_ARG s390_function_arg
16655 #undef TARGET_FUNCTION_ARG_ADVANCE
16656 #define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
16657 #undef TARGET_FUNCTION_ARG_PADDING
16658 #define TARGET_FUNCTION_ARG_PADDING s390_function_arg_padding
16659 #undef TARGET_FUNCTION_VALUE
16660 #define TARGET_FUNCTION_VALUE s390_function_value
16661 #undef TARGET_LIBCALL_VALUE
16662 #define TARGET_LIBCALL_VALUE s390_libcall_value
16663 #undef TARGET_STRICT_ARGUMENT_NAMING
16664 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
16665 
16666 #undef TARGET_KEEP_LEAF_WHEN_PROFILED
16667 #define TARGET_KEEP_LEAF_WHEN_PROFILED s390_keep_leaf_when_profiled
16668 
16669 #undef TARGET_FIXED_CONDITION_CODE_REGS
16670 #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
16671 
16672 #undef TARGET_CC_MODES_COMPATIBLE
16673 #define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
16674 
16675 #undef TARGET_INVALID_WITHIN_DOLOOP
16676 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
16677 
16678 #ifdef HAVE_AS_TLS
16679 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
16680 #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
16681 #endif
16682 
16683 #undef TARGET_DWARF_FRAME_REG_MODE
16684 #define TARGET_DWARF_FRAME_REG_MODE s390_dwarf_frame_reg_mode
16685 
16686 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
16687 #undef TARGET_MANGLE_TYPE
16688 #define TARGET_MANGLE_TYPE s390_mangle_type
16689 #endif
16690 
16691 #undef TARGET_SCALAR_MODE_SUPPORTED_P
16692 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
16693 
16694 #undef TARGET_VECTOR_MODE_SUPPORTED_P
16695 #define TARGET_VECTOR_MODE_SUPPORTED_P s390_vector_mode_supported_p
16696 
16697 #undef  TARGET_PREFERRED_RELOAD_CLASS
16698 #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
16699 
16700 #undef TARGET_SECONDARY_RELOAD
16701 #define TARGET_SECONDARY_RELOAD s390_secondary_reload
16702 #undef TARGET_SECONDARY_MEMORY_NEEDED
16703 #define TARGET_SECONDARY_MEMORY_NEEDED s390_secondary_memory_needed
16704 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
16705 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE s390_secondary_memory_needed_mode
16706 
16707 #undef TARGET_LIBGCC_CMP_RETURN_MODE
16708 #define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
16709 
16710 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
16711 #define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
16712 
16713 #undef TARGET_LEGITIMATE_ADDRESS_P
16714 #define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
16715 
16716 #undef TARGET_LEGITIMATE_CONSTANT_P
16717 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
16718 
16719 #undef TARGET_LRA_P
16720 #define TARGET_LRA_P s390_lra_p
16721 
16722 #undef TARGET_CAN_ELIMINATE
16723 #define TARGET_CAN_ELIMINATE s390_can_eliminate
16724 
16725 #undef TARGET_CONDITIONAL_REGISTER_USAGE
16726 #define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
16727 
16728 #undef TARGET_LOOP_UNROLL_ADJUST
16729 #define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
16730 
16731 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
16732 #define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
16733 #undef TARGET_TRAMPOLINE_INIT
16734 #define TARGET_TRAMPOLINE_INIT s390_trampoline_init
16735 
16736 /* PR 79421 */
16737 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
16738 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
16739 
16740 #undef TARGET_UNWIND_WORD_MODE
16741 #define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
16742 
16743 #undef TARGET_CANONICALIZE_COMPARISON
16744 #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
16745 
16746 #undef TARGET_HARD_REGNO_SCRATCH_OK
16747 #define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok
16748 
16749 #undef TARGET_HARD_REGNO_NREGS
16750 #define TARGET_HARD_REGNO_NREGS s390_hard_regno_nregs
16751 #undef TARGET_HARD_REGNO_MODE_OK
16752 #define TARGET_HARD_REGNO_MODE_OK s390_hard_regno_mode_ok
16753 #undef TARGET_MODES_TIEABLE_P
16754 #define TARGET_MODES_TIEABLE_P s390_modes_tieable_p
16755 
16756 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
16757 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
16758   s390_hard_regno_call_part_clobbered
16759 
16760 #undef TARGET_ATTRIBUTE_TABLE
16761 #define TARGET_ATTRIBUTE_TABLE s390_attribute_table
16762 
16763 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
16764 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
16765 
16766 #undef TARGET_SET_UP_BY_PROLOGUE
16767 #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
16768 
16769 #undef TARGET_EXTRA_LIVE_ON_ENTRY
16770 #define TARGET_EXTRA_LIVE_ON_ENTRY s390_live_on_entry
16771 
16772 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
16773 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
16774   s390_use_by_pieces_infrastructure_p
16775 
16776 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
16777 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV s390_atomic_assign_expand_fenv
16778 
16779 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
16780 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN s390_invalid_arg_for_unprototyped_fn
16781 
16782 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
16783 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE s390_preferred_simd_mode
16784 
16785 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
16786 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT s390_support_vector_misalignment
16787 
16788 #undef TARGET_VECTOR_ALIGNMENT
16789 #define TARGET_VECTOR_ALIGNMENT s390_vector_alignment
16790 
16791 #undef TARGET_INVALID_BINARY_OP
16792 #define TARGET_INVALID_BINARY_OP s390_invalid_binary_op
16793 
16794 #ifdef HAVE_AS_MACHINE_MACHINEMODE
16795 #undef TARGET_ASM_FILE_START
16796 #define TARGET_ASM_FILE_START s390_asm_file_start
16797 #endif
16798 
16799 #undef TARGET_ASM_FILE_END
16800 #define TARGET_ASM_FILE_END s390_asm_file_end
16801 
16802 #undef TARGET_SET_CURRENT_FUNCTION
16803 #define TARGET_SET_CURRENT_FUNCTION s390_set_current_function
16804 
16805 #if S390_USE_TARGET_ATTRIBUTE
16806 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
16807 #define TARGET_OPTION_VALID_ATTRIBUTE_P s390_valid_target_attribute_p
16808 
16809 #undef TARGET_CAN_INLINE_P
16810 #define TARGET_CAN_INLINE_P s390_can_inline_p
16811 #endif
16812 
16813 #undef TARGET_OPTION_RESTORE
16814 #define TARGET_OPTION_RESTORE s390_function_specific_restore
16815 
16816 #undef TARGET_CAN_CHANGE_MODE_CLASS
16817 #define TARGET_CAN_CHANGE_MODE_CLASS s390_can_change_mode_class
16818 
16819 #undef TARGET_CONSTANT_ALIGNMENT
16820 #define TARGET_CONSTANT_ALIGNMENT s390_constant_alignment
16821 
16822 #undef TARGET_ASM_CODE_END
16823 #define TARGET_ASM_CODE_END s390_code_end
16824 
16825 #undef TARGET_CASE_VALUES_THRESHOLD
16826 #define TARGET_CASE_VALUES_THRESHOLD s390_case_values_threshold
16827 
16828 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
16829 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
16830   s390_sched_dependencies_evaluation
16831 
16832 #undef TARGET_SHIFT_TRUNCATION_MASK
16833 #define TARGET_SHIFT_TRUNCATION_MASK s390_shift_truncation_mask
16834 
16835 /* Use only short displacement, since long displacement is not available for
16836    the floating point instructions.  */
16837 #undef TARGET_MAX_ANCHOR_OFFSET
16838 #define TARGET_MAX_ANCHOR_OFFSET 0xfff
16839 
16840 struct gcc_target targetm = TARGET_INITIALIZER;
16841 
16842 #include "gt-s390.h"
16843