1 // SPDX-License-Identifier: GPL-3.0-or-later
2 /* Subroutines used for code generation on IBM RS/6000.
3    Copyright (C) 1991-2022 Free Software Foundation, Inc.
4    Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 
6    This file is part of GCC.
7 
8    GCC is free software; you can redistribute it and/or modify it
9    under the terms of the GNU General Public License as published
10    by the Free Software Foundation; either version 3, or (at your
11    option) any later version.
12 
13    GCC is distributed in the hope that it will be useful, but WITHOUT
14    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
16    License for more details.
17 
18    You should have received a copy of the GNU General Public License
19    along with GCC; see the file COPYING3.  If not see
20    <http://www.gnu.org/licenses/>.  */
21 
22 #define IN_TARGET_CODE 1
23 
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "backend.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "cfgloop.h"
34 #include "df.h"
35 #include "tm_p.h"
36 #include "stringpool.h"
37 #include "expmed.h"
38 #include "optabs.h"
39 #include "regs.h"
40 #include "ira.h"
41 #include "recog.h"
42 #include "cgraph.h"
43 #include "diagnostic-core.h"
44 #include "insn-attr.h"
45 #include "flags.h"
46 #include "alias.h"
47 #include "fold-const.h"
48 #include "attribs.h"
49 #include "stor-layout.h"
50 #include "calls.h"
51 #include "print-tree.h"
52 #include "varasm.h"
53 #include "explow.h"
54 #include "expr.h"
55 #include "output.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "sched-int.h"
60 #include "gimplify.h"
61 #include "gimple-fold.h"
62 #include "gimple-iterator.h"
63 #include "gimple-walk.h"
64 #include "ssa.h"
65 #include "tree-vectorizer.h"
66 #include "tree-ssa-propagate.h"
67 #include "intl.h"
68 #include "tm-constrs.h"
69 #include "target-globals.h"
70 #include "builtins.h"
71 #include "tree-vector-builder.h"
72 #include "context.h"
73 #include "tree-pass.h"
74 #include "symbol-summary.h"
75 #include "ipa-prop.h"
76 #include "ipa-fnsummary.h"
77 #include "except.h"
78 #if TARGET_XCOFF
79 #include "xcoffout.h"  /* get declarations of xcoff_*_section_name */
80 #endif
81 #include "case-cfn-macros.h"
82 #include "ppc-auxv.h"
83 #include "rs6000-internal.h"
84 #include "opts.h"
85 
86 /* This file should be included last.  */
87 #include "target-def.h"
88 
89 extern tree rs6000_builtin_mask_for_load (void);
90 extern tree rs6000_builtin_md_vectorized_function (tree, tree, tree);
91 extern tree rs6000_builtin_reciprocal (tree);
92 
93   /* Set -mabi=ieeelongdouble on some old targets.  In the future, power server
94      systems will also set long double to be IEEE 128-bit.  AIX and Darwin
95      explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
96      those systems will not pick up this default.  This needs to be after all
97      of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
98      properly defined.  */
99 #ifndef TARGET_IEEEQUAD_DEFAULT
100 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
101 #define TARGET_IEEEQUAD_DEFAULT 1
102 #else
103 #define TARGET_IEEEQUAD_DEFAULT 0
104 #endif
105 #endif
106 
107 /* Don't enable PC-relative addressing if the target does not support it.  */
108 #ifndef PCREL_SUPPORTED_BY_OS
109 #define PCREL_SUPPORTED_BY_OS	0
110 #endif
111 
112 #ifdef USING_ELFOS_H
113 /* Counter for labels which are to be placed in .fixup.  */
114 int fixuplabelno = 0;
115 #endif
116 
117 /* Whether to use variant of AIX ABI for PowerPC64 Linux.  */
118 int dot_symbols;
119 
120 /* Specify the machine mode that pointers have.  After generation of rtl, the
121    compiler makes no further distinction between pointers and any other objects
122    of this machine mode.  */
123 scalar_int_mode rs6000_pmode;
124 
125 /* Track use of r13 in 64bit AIX TLS.  */
126 static bool xcoff_tls_exec_model_detected = false;
127 
128 /* Width in bits of a pointer.  */
129 unsigned rs6000_pointer_size;
130 
131 #ifdef HAVE_AS_GNU_ATTRIBUTE
132 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
133 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
134 # endif
135 /* Flag whether floating point values have been passed/returned.
136    Note that this doesn't say whether fprs are used, since the
137    Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
138    should be set for soft-float values passed in gprs and ieee128
139    values passed in vsx registers.  */
140 bool rs6000_passes_float = false;
141 bool rs6000_passes_long_double = false;
142 /* Flag whether vector values have been passed/returned.  */
143 bool rs6000_passes_vector = false;
144 /* Flag whether small (<= 8 byte) structures have been returned.  */
145 bool rs6000_returns_struct = false;
146 #endif
147 
148 /* Value is TRUE if register/mode pair is acceptable.  */
149 static bool rs6000_hard_regno_mode_ok_p
150   [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
151 
152 /* Maximum number of registers needed for a given register class and mode.  */
153 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
154 
155 /* How many registers are needed for a given register and mode.  */
156 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
157 
158 /* Map register number to register class.  */
159 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
160 
161 static int dbg_cost_ctrl;
162 
163 /* Flag to say the TOC is initialized */
164 int toc_initialized, need_toc_init;
165 char toc_label_name[10];
166 
167 /* Cached value of rs6000_variable_issue. This is cached in
168    rs6000_variable_issue hook and returned from rs6000_sched_reorder2.  */
169 static short cached_can_issue_more;
170 
171 static GTY(()) section *read_only_data_section;
172 static GTY(()) section *private_data_section;
173 static GTY(()) section *tls_data_section;
174 static GTY(()) section *tls_private_data_section;
175 static GTY(()) section *read_only_private_data_section;
176 static GTY(()) section *sdata2_section;
177 
178 section *toc_section = 0;
179 
180 /* Describe the vector unit used for modes.  */
181 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
182 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
183 
184 /* Register classes for various constraints that are based on the target
185    switches.  */
186 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
187 
188 /* Describe the alignment of a vector.  */
189 int rs6000_vector_align[NUM_MACHINE_MODES];
190 
191 /* What modes to automatically generate reciprocal divide estimate (fre) and
192    reciprocal sqrt (frsqrte) for.  */
193 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
194 
195 /* Masks to determine which reciprocal esitmate instructions to generate
196    automatically.  */
197 enum rs6000_recip_mask {
198   RECIP_SF_DIV		= 0x001,	/* Use divide estimate */
199   RECIP_DF_DIV		= 0x002,
200   RECIP_V4SF_DIV	= 0x004,
201   RECIP_V2DF_DIV	= 0x008,
202 
203   RECIP_SF_RSQRT	= 0x010,	/* Use reciprocal sqrt estimate.  */
204   RECIP_DF_RSQRT	= 0x020,
205   RECIP_V4SF_RSQRT	= 0x040,
206   RECIP_V2DF_RSQRT	= 0x080,
207 
208   /* Various combination of flags for -mrecip=xxx.  */
209   RECIP_NONE		= 0,
210   RECIP_ALL		= (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
211 			   | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
212 			   | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
213 
214   RECIP_HIGH_PRECISION	= RECIP_ALL,
215 
216   /* On low precision machines like the power5, don't enable double precision
217      reciprocal square root estimate, since it isn't accurate enough.  */
218   RECIP_LOW_PRECISION	= (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
219 };
220 
221 /* -mrecip options.  */
222 static struct
223 {
224   const char *string;		/* option name */
225   unsigned int mask;		/* mask bits to set */
226 } recip_options[] = {
227   { "all",	 RECIP_ALL },
228   { "none",	 RECIP_NONE },
229   { "div",	 (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
230 		  | RECIP_V2DF_DIV) },
231   { "divf",	 (RECIP_SF_DIV | RECIP_V4SF_DIV) },
232   { "divd",	 (RECIP_DF_DIV | RECIP_V2DF_DIV) },
233   { "rsqrt",	 (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
234 		  | RECIP_V2DF_RSQRT) },
235   { "rsqrtf",	 (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
236   { "rsqrtd",	 (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
237 };
238 
239 /* On PowerPC, we have a limited number of target clones that we care about
240    which means we can use an array to hold the options, rather than having more
241    elaborate data structures to identify each possible variation.  Order the
242    clones from the default to the highest ISA.  */
243 enum {
244   CLONE_DEFAULT		= 0,		/* default clone.  */
245   CLONE_ISA_2_05,			/* ISA 2.05 (power6).  */
246   CLONE_ISA_2_06,			/* ISA 2.06 (power7).  */
247   CLONE_ISA_2_07,			/* ISA 2.07 (power8).  */
248   CLONE_ISA_3_00,			/* ISA 3.0 (power9).  */
249   CLONE_ISA_3_1,			/* ISA 3.1 (power10).  */
250   CLONE_MAX
251 };
252 
253 /* Map compiler ISA bits into HWCAP names.  */
254 struct clone_map {
255   HOST_WIDE_INT isa_mask;	/* rs6000_isa mask */
256   const char *name;		/* name to use in __builtin_cpu_supports.  */
257 };
258 
259 static const struct clone_map rs6000_clone_map[CLONE_MAX] = {
260   { 0,				"" },		/* Default options.  */
261   { OPTION_MASK_CMPB,		"arch_2_05" },	/* ISA 2.05 (power6).  */
262   { OPTION_MASK_POPCNTD,	"arch_2_06" },	/* ISA 2.06 (power7).  */
263   { OPTION_MASK_P8_VECTOR,	"arch_2_07" },	/* ISA 2.07 (power8).  */
264   { OPTION_MASK_P9_VECTOR,	"arch_3_00" },	/* ISA 3.0 (power9).  */
265   { OPTION_MASK_POWER10,	"arch_3_1" },	/* ISA 3.1 (power10).  */
266 };
267 
268 
269 /* Newer LIBCs explicitly export this symbol to declare that they provide
270    the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB.  We emit a
271    reference to this symbol whenever we expand a CPU builtin, so that
272    we never link against an old LIBC.  */
273 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
274 
275 /* True if we have expanded a CPU builtin.  */
276 bool cpu_builtin_p = false;
277 
278 /* Pointer to function (in rs6000-c.cc) that can define or undefine target
279    macros that have changed.  Languages that don't support the preprocessor
280    don't link in rs6000-c.cc, so we can't call it directly.  */
281 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
282 
283 /* Simplfy register classes into simpler classifications.  We assume
284    GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
285    check for standard register classes (gpr/floating/altivec/vsx) and
286    floating/vector classes (float/altivec/vsx).  */
287 
288 enum rs6000_reg_type {
289   NO_REG_TYPE,
290   PSEUDO_REG_TYPE,
291   GPR_REG_TYPE,
292   VSX_REG_TYPE,
293   ALTIVEC_REG_TYPE,
294   FPR_REG_TYPE,
295   SPR_REG_TYPE,
296   CR_REG_TYPE
297 };
298 
299 /* Map register class to register type.  */
300 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
301 
302 /* First/last register type for the 'normal' register types (i.e. general
303    purpose, floating point, altivec, and VSX registers).  */
304 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
305 
306 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
307 
308 
309 /* Register classes we care about in secondary reload or go if legitimate
310    address.  We only need to worry about GPR, FPR, and Altivec registers here,
311    along an ANY field that is the OR of the 3 register classes.  */
312 
313 enum rs6000_reload_reg_type {
314   RELOAD_REG_GPR,			/* General purpose registers.  */
315   RELOAD_REG_FPR,			/* Traditional floating point regs.  */
316   RELOAD_REG_VMX,			/* Altivec (VMX) registers.  */
317   RELOAD_REG_ANY,			/* OR of GPR, FPR, Altivec masks.  */
318   N_RELOAD_REG
319 };
320 
321 /* For setting up register classes, loop through the 3 register classes mapping
322    into real registers, and skip the ANY class, which is just an OR of the
323    bits.  */
324 #define FIRST_RELOAD_REG_CLASS	RELOAD_REG_GPR
325 #define LAST_RELOAD_REG_CLASS	RELOAD_REG_VMX
326 
327 /* Map reload register type to a register in the register class.  */
328 struct reload_reg_map_type {
329   const char *name;			/* Register class name.  */
330   int reg;				/* Register in the register class.  */
331 };
332 
333 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
334   { "Gpr",	FIRST_GPR_REGNO },	/* RELOAD_REG_GPR.  */
335   { "Fpr",	FIRST_FPR_REGNO },	/* RELOAD_REG_FPR.  */
336   { "VMX",	FIRST_ALTIVEC_REGNO },	/* RELOAD_REG_VMX.  */
337   { "Any",	-1 },			/* RELOAD_REG_ANY.  */
338 };
339 
340 /* Mask bits for each register class, indexed per mode.  Historically the
341    compiler has been more restrictive which types can do PRE_MODIFY instead of
342    PRE_INC and PRE_DEC, so keep track of sepaate bits for these two.  */
343 typedef unsigned char addr_mask_type;
344 
345 #define RELOAD_REG_VALID	0x01	/* Mode valid in register..  */
346 #define RELOAD_REG_MULTIPLE	0x02	/* Mode takes multiple registers.  */
347 #define RELOAD_REG_INDEXED	0x04	/* Reg+reg addressing.  */
348 #define RELOAD_REG_OFFSET	0x08	/* Reg+offset addressing. */
349 #define RELOAD_REG_PRE_INCDEC	0x10	/* PRE_INC/PRE_DEC valid.  */
350 #define RELOAD_REG_PRE_MODIFY	0x20	/* PRE_MODIFY valid.  */
351 #define RELOAD_REG_AND_M16	0x40	/* AND -16 addressing.  */
352 #define RELOAD_REG_QUAD_OFFSET	0x80	/* quad offset is limited.  */
353 
354 /* Register type masks based on the type, of valid addressing modes.  */
355 struct rs6000_reg_addr {
356   enum insn_code reload_load;		/* INSN to reload for loading. */
357   enum insn_code reload_store;		/* INSN to reload for storing.  */
358   enum insn_code reload_fpr_gpr;	/* INSN to move from FPR to GPR.  */
359   enum insn_code reload_gpr_vsx;	/* INSN to move from GPR to VSX.  */
360   enum insn_code reload_vsx_gpr;	/* INSN to move from VSX to GPR.  */
361   addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks.  */
362   bool scalar_in_vmx_p;			/* Scalar value can go in VMX.  */
363 };
364 
365 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
366 
367 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC.  */
368 static inline bool
mode_supports_pre_incdec_p(machine_mode mode)369 mode_supports_pre_incdec_p (machine_mode mode)
370 {
371   return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
372 	  != 0);
373 }
374 
375 /* Helper function to say whether a mode supports PRE_MODIFY.  */
376 static inline bool
mode_supports_pre_modify_p(machine_mode mode)377 mode_supports_pre_modify_p (machine_mode mode)
378 {
379   return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
380 	  != 0);
381 }
382 
383 /* Return true if we have D-form addressing in altivec registers.  */
384 static inline bool
mode_supports_vmx_dform(machine_mode mode)385 mode_supports_vmx_dform (machine_mode mode)
386 {
387   return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
388 }
389 
390 /* Return true if we have D-form addressing in VSX registers.  This addressing
391    is more limited than normal d-form addressing in that the offset must be
392    aligned on a 16-byte boundary.  */
393 static inline bool
mode_supports_dq_form(machine_mode mode)394 mode_supports_dq_form (machine_mode mode)
395 {
396   return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
397 	  != 0);
398 }
399 
400 /* Given that there exists at least one variable that is set (produced)
401    by OUT_INSN and read (consumed) by IN_INSN, return true iff
402    IN_INSN represents one or more memory store operations and none of
403    the variables set by OUT_INSN is used by IN_INSN as the address of a
404    store operation.  If either IN_INSN or OUT_INSN does not represent
405    a "single" RTL SET expression (as loosely defined by the
406    implementation of the single_set function) or a PARALLEL with only
407    SETs, CLOBBERs, and USEs inside, this function returns false.
408 
409    This rs6000-specific version of store_data_bypass_p checks for
410    certain conditions that result in assertion failures (and internal
411    compiler errors) in the generic store_data_bypass_p function and
412    returns false rather than calling store_data_bypass_p if one of the
413    problematic conditions is detected.  */
414 
415 int
rs6000_store_data_bypass_p(rtx_insn * out_insn,rtx_insn * in_insn)416 rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
417 {
418   rtx out_set, in_set;
419   rtx out_pat, in_pat;
420   rtx out_exp, in_exp;
421   int i, j;
422 
423   in_set = single_set (in_insn);
424   if (in_set)
425     {
426       if (MEM_P (SET_DEST (in_set)))
427 	{
428 	  out_set = single_set (out_insn);
429 	  if (!out_set)
430 	    {
431 	      out_pat = PATTERN (out_insn);
432 	      if (GET_CODE (out_pat) == PARALLEL)
433 		{
434 		  for (i = 0; i < XVECLEN (out_pat, 0); i++)
435 		    {
436 		      out_exp = XVECEXP (out_pat, 0, i);
437 		      if ((GET_CODE (out_exp) == CLOBBER)
438 			  || (GET_CODE (out_exp) == USE))
439 			continue;
440 		      else if (GET_CODE (out_exp) != SET)
441 			return false;
442 		    }
443 		}
444 	    }
445 	}
446     }
447   else
448     {
449       in_pat = PATTERN (in_insn);
450       if (GET_CODE (in_pat) != PARALLEL)
451 	return false;
452 
453       for (i = 0; i < XVECLEN (in_pat, 0); i++)
454 	{
455 	  in_exp = XVECEXP (in_pat, 0, i);
456 	  if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
457 	    continue;
458 	  else if (GET_CODE (in_exp) != SET)
459 	    return false;
460 
461 	  if (MEM_P (SET_DEST (in_exp)))
462 	    {
463 	      out_set = single_set (out_insn);
464 	      if (!out_set)
465 		{
466 		  out_pat = PATTERN (out_insn);
467 		  if (GET_CODE (out_pat) != PARALLEL)
468 		    return false;
469 		  for (j = 0; j < XVECLEN (out_pat, 0); j++)
470 		    {
471 		      out_exp = XVECEXP (out_pat, 0, j);
472 		      if ((GET_CODE (out_exp) == CLOBBER)
473 			  || (GET_CODE (out_exp) == USE))
474 			continue;
475 		      else if (GET_CODE (out_exp) != SET)
476 			return false;
477 		    }
478 		}
479 	    }
480 	}
481     }
482   return store_data_bypass_p (out_insn, in_insn);
483 }
484 
485 
486 /* Processor costs (relative to an add) */
487 
488 const struct processor_costs *rs6000_cost;
489 
490 /* Instruction size costs on 32bit processors.  */
491 static const
492 struct processor_costs size32_cost = {
493   COSTS_N_INSNS (1),    /* mulsi */
494   COSTS_N_INSNS (1),    /* mulsi_const */
495   COSTS_N_INSNS (1),    /* mulsi_const9 */
496   COSTS_N_INSNS (1),    /* muldi */
497   COSTS_N_INSNS (1),    /* divsi */
498   COSTS_N_INSNS (1),    /* divdi */
499   COSTS_N_INSNS (1),    /* fp */
500   COSTS_N_INSNS (1),    /* dmul */
501   COSTS_N_INSNS (1),    /* sdiv */
502   COSTS_N_INSNS (1),    /* ddiv */
503   32,			/* cache line size */
504   0,			/* l1 cache */
505   0,			/* l2 cache */
506   0,			/* streams */
507   0,			/* SF->DF convert */
508 };
509 
510 /* Instruction size costs on 64bit processors.  */
511 static const
512 struct processor_costs size64_cost = {
513   COSTS_N_INSNS (1),    /* mulsi */
514   COSTS_N_INSNS (1),    /* mulsi_const */
515   COSTS_N_INSNS (1),    /* mulsi_const9 */
516   COSTS_N_INSNS (1),    /* muldi */
517   COSTS_N_INSNS (1),    /* divsi */
518   COSTS_N_INSNS (1),    /* divdi */
519   COSTS_N_INSNS (1),    /* fp */
520   COSTS_N_INSNS (1),    /* dmul */
521   COSTS_N_INSNS (1),    /* sdiv */
522   COSTS_N_INSNS (1),    /* ddiv */
523   128,			/* cache line size */
524   0,			/* l1 cache */
525   0,			/* l2 cache */
526   0,			/* streams */
527   0,			/* SF->DF convert */
528 };
529 
530 /* Instruction costs on RS64A processors.  */
531 static const
532 struct processor_costs rs64a_cost = {
533   COSTS_N_INSNS (20),   /* mulsi */
534   COSTS_N_INSNS (12),   /* mulsi_const */
535   COSTS_N_INSNS (8),    /* mulsi_const9 */
536   COSTS_N_INSNS (34),   /* muldi */
537   COSTS_N_INSNS (65),   /* divsi */
538   COSTS_N_INSNS (67),   /* divdi */
539   COSTS_N_INSNS (4),    /* fp */
540   COSTS_N_INSNS (4),    /* dmul */
541   COSTS_N_INSNS (31),   /* sdiv */
542   COSTS_N_INSNS (31),   /* ddiv */
543   128,			/* cache line size */
544   128,			/* l1 cache */
545   2048,			/* l2 cache */
546   1,			/* streams */
547   0,			/* SF->DF convert */
548 };
549 
550 /* Instruction costs on MPCCORE processors.  */
551 static const
552 struct processor_costs mpccore_cost = {
553   COSTS_N_INSNS (2),    /* mulsi */
554   COSTS_N_INSNS (2),    /* mulsi_const */
555   COSTS_N_INSNS (2),    /* mulsi_const9 */
556   COSTS_N_INSNS (2),    /* muldi */
557   COSTS_N_INSNS (6),    /* divsi */
558   COSTS_N_INSNS (6),    /* divdi */
559   COSTS_N_INSNS (4),    /* fp */
560   COSTS_N_INSNS (5),    /* dmul */
561   COSTS_N_INSNS (10),   /* sdiv */
562   COSTS_N_INSNS (17),   /* ddiv */
563   32,			/* cache line size */
564   4,			/* l1 cache */
565   16,			/* l2 cache */
566   1,			/* streams */
567   0,			/* SF->DF convert */
568 };
569 
570 /* Instruction costs on PPC403 processors.  */
571 static const
572 struct processor_costs ppc403_cost = {
573   COSTS_N_INSNS (4),    /* mulsi */
574   COSTS_N_INSNS (4),    /* mulsi_const */
575   COSTS_N_INSNS (4),    /* mulsi_const9 */
576   COSTS_N_INSNS (4),    /* muldi */
577   COSTS_N_INSNS (33),   /* divsi */
578   COSTS_N_INSNS (33),   /* divdi */
579   COSTS_N_INSNS (11),   /* fp */
580   COSTS_N_INSNS (11),   /* dmul */
581   COSTS_N_INSNS (11),   /* sdiv */
582   COSTS_N_INSNS (11),   /* ddiv */
583   32,			/* cache line size */
584   4,			/* l1 cache */
585   16,			/* l2 cache */
586   1,			/* streams */
587   0,			/* SF->DF convert */
588 };
589 
590 /* Instruction costs on PPC405 processors.  */
591 static const
592 struct processor_costs ppc405_cost = {
593   COSTS_N_INSNS (5),    /* mulsi */
594   COSTS_N_INSNS (4),    /* mulsi_const */
595   COSTS_N_INSNS (3),    /* mulsi_const9 */
596   COSTS_N_INSNS (5),    /* muldi */
597   COSTS_N_INSNS (35),   /* divsi */
598   COSTS_N_INSNS (35),   /* divdi */
599   COSTS_N_INSNS (11),   /* fp */
600   COSTS_N_INSNS (11),   /* dmul */
601   COSTS_N_INSNS (11),   /* sdiv */
602   COSTS_N_INSNS (11),   /* ddiv */
603   32,			/* cache line size */
604   16,			/* l1 cache */
605   128,			/* l2 cache */
606   1,			/* streams */
607   0,			/* SF->DF convert */
608 };
609 
610 /* Instruction costs on PPC440 processors.  */
611 static const
612 struct processor_costs ppc440_cost = {
613   COSTS_N_INSNS (3),    /* mulsi */
614   COSTS_N_INSNS (2),    /* mulsi_const */
615   COSTS_N_INSNS (2),    /* mulsi_const9 */
616   COSTS_N_INSNS (3),    /* muldi */
617   COSTS_N_INSNS (34),   /* divsi */
618   COSTS_N_INSNS (34),   /* divdi */
619   COSTS_N_INSNS (5),    /* fp */
620   COSTS_N_INSNS (5),    /* dmul */
621   COSTS_N_INSNS (19),   /* sdiv */
622   COSTS_N_INSNS (33),   /* ddiv */
623   32,			/* cache line size */
624   32,			/* l1 cache */
625   256,			/* l2 cache */
626   1,			/* streams */
627   0,			/* SF->DF convert */
628 };
629 
630 /* Instruction costs on PPC476 processors.  */
631 static const
632 struct processor_costs ppc476_cost = {
633   COSTS_N_INSNS (4),    /* mulsi */
634   COSTS_N_INSNS (4),    /* mulsi_const */
635   COSTS_N_INSNS (4),    /* mulsi_const9 */
636   COSTS_N_INSNS (4),    /* muldi */
637   COSTS_N_INSNS (11),   /* divsi */
638   COSTS_N_INSNS (11),   /* divdi */
639   COSTS_N_INSNS (6),    /* fp */
640   COSTS_N_INSNS (6),    /* dmul */
641   COSTS_N_INSNS (19),   /* sdiv */
642   COSTS_N_INSNS (33),   /* ddiv */
643   32,			/* l1 cache line size */
644   32,			/* l1 cache */
645   512,			/* l2 cache */
646   1,			/* streams */
647   0,			/* SF->DF convert */
648 };
649 
650 /* Instruction costs on PPC601 processors.  */
651 static const
652 struct processor_costs ppc601_cost = {
653   COSTS_N_INSNS (5),    /* mulsi */
654   COSTS_N_INSNS (5),    /* mulsi_const */
655   COSTS_N_INSNS (5),    /* mulsi_const9 */
656   COSTS_N_INSNS (5),    /* muldi */
657   COSTS_N_INSNS (36),   /* divsi */
658   COSTS_N_INSNS (36),   /* divdi */
659   COSTS_N_INSNS (4),    /* fp */
660   COSTS_N_INSNS (5),    /* dmul */
661   COSTS_N_INSNS (17),   /* sdiv */
662   COSTS_N_INSNS (31),   /* ddiv */
663   32,			/* cache line size */
664   32,			/* l1 cache */
665   256,			/* l2 cache */
666   1,			/* streams */
667   0,			/* SF->DF convert */
668 };
669 
670 /* Instruction costs on PPC603 processors.  */
671 static const
672 struct processor_costs ppc603_cost = {
673   COSTS_N_INSNS (5),    /* mulsi */
674   COSTS_N_INSNS (3),    /* mulsi_const */
675   COSTS_N_INSNS (2),    /* mulsi_const9 */
676   COSTS_N_INSNS (5),    /* muldi */
677   COSTS_N_INSNS (37),   /* divsi */
678   COSTS_N_INSNS (37),   /* divdi */
679   COSTS_N_INSNS (3),    /* fp */
680   COSTS_N_INSNS (4),    /* dmul */
681   COSTS_N_INSNS (18),   /* sdiv */
682   COSTS_N_INSNS (33),   /* ddiv */
683   32,			/* cache line size */
684   8,			/* l1 cache */
685   64,			/* l2 cache */
686   1,			/* streams */
687   0,			/* SF->DF convert */
688 };
689 
690 /* Instruction costs on PPC604 processors.  */
691 static const
692 struct processor_costs ppc604_cost = {
693   COSTS_N_INSNS (4),    /* mulsi */
694   COSTS_N_INSNS (4),    /* mulsi_const */
695   COSTS_N_INSNS (4),    /* mulsi_const9 */
696   COSTS_N_INSNS (4),    /* muldi */
697   COSTS_N_INSNS (20),   /* divsi */
698   COSTS_N_INSNS (20),   /* divdi */
699   COSTS_N_INSNS (3),    /* fp */
700   COSTS_N_INSNS (3),    /* dmul */
701   COSTS_N_INSNS (18),   /* sdiv */
702   COSTS_N_INSNS (32),   /* ddiv */
703   32,			/* cache line size */
704   16,			/* l1 cache */
705   512,			/* l2 cache */
706   1,			/* streams */
707   0,			/* SF->DF convert */
708 };
709 
710 /* Instruction costs on PPC604e processors.  */
711 static const
712 struct processor_costs ppc604e_cost = {
713   COSTS_N_INSNS (2),    /* mulsi */
714   COSTS_N_INSNS (2),    /* mulsi_const */
715   COSTS_N_INSNS (2),    /* mulsi_const9 */
716   COSTS_N_INSNS (2),    /* muldi */
717   COSTS_N_INSNS (20),   /* divsi */
718   COSTS_N_INSNS (20),   /* divdi */
719   COSTS_N_INSNS (3),    /* fp */
720   COSTS_N_INSNS (3),    /* dmul */
721   COSTS_N_INSNS (18),   /* sdiv */
722   COSTS_N_INSNS (32),   /* ddiv */
723   32,			/* cache line size */
724   32,			/* l1 cache */
725   1024,			/* l2 cache */
726   1,			/* streams */
727   0,			/* SF->DF convert */
728 };
729 
730 /* Instruction costs on PPC620 processors.  */
731 static const
732 struct processor_costs ppc620_cost = {
733   COSTS_N_INSNS (5),    /* mulsi */
734   COSTS_N_INSNS (4),    /* mulsi_const */
735   COSTS_N_INSNS (3),    /* mulsi_const9 */
736   COSTS_N_INSNS (7),    /* muldi */
737   COSTS_N_INSNS (21),   /* divsi */
738   COSTS_N_INSNS (37),   /* divdi */
739   COSTS_N_INSNS (3),    /* fp */
740   COSTS_N_INSNS (3),    /* dmul */
741   COSTS_N_INSNS (18),   /* sdiv */
742   COSTS_N_INSNS (32),   /* ddiv */
743   128,			/* cache line size */
744   32,			/* l1 cache */
745   1024,			/* l2 cache */
746   1,			/* streams */
747   0,			/* SF->DF convert */
748 };
749 
750 /* Instruction costs on PPC630 processors.  */
751 static const
752 struct processor_costs ppc630_cost = {
753   COSTS_N_INSNS (5),    /* mulsi */
754   COSTS_N_INSNS (4),    /* mulsi_const */
755   COSTS_N_INSNS (3),    /* mulsi_const9 */
756   COSTS_N_INSNS (7),    /* muldi */
757   COSTS_N_INSNS (21),   /* divsi */
758   COSTS_N_INSNS (37),   /* divdi */
759   COSTS_N_INSNS (3),    /* fp */
760   COSTS_N_INSNS (3),    /* dmul */
761   COSTS_N_INSNS (17),   /* sdiv */
762   COSTS_N_INSNS (21),   /* ddiv */
763   128,			/* cache line size */
764   64,			/* l1 cache */
765   1024,			/* l2 cache */
766   1,			/* streams */
767   0,			/* SF->DF convert */
768 };
769 
770 /* Instruction costs on Cell processor.  */
771 /* COSTS_N_INSNS (1) ~ one add.  */
772 static const
773 struct processor_costs ppccell_cost = {
774   COSTS_N_INSNS (9/2)+2,    /* mulsi */
775   COSTS_N_INSNS (6/2),    /* mulsi_const */
776   COSTS_N_INSNS (6/2),    /* mulsi_const9 */
777   COSTS_N_INSNS (15/2)+2,   /* muldi */
778   COSTS_N_INSNS (38/2),   /* divsi */
779   COSTS_N_INSNS (70/2),   /* divdi */
780   COSTS_N_INSNS (10/2),   /* fp */
781   COSTS_N_INSNS (10/2),   /* dmul */
782   COSTS_N_INSNS (74/2),   /* sdiv */
783   COSTS_N_INSNS (74/2),   /* ddiv */
784   128,			/* cache line size */
785   32,			/* l1 cache */
786   512,			/* l2 cache */
787   6,			/* streams */
788   0,			/* SF->DF convert */
789 };
790 
791 /* Instruction costs on PPC750 and PPC7400 processors.  */
792 static const
793 struct processor_costs ppc750_cost = {
794   COSTS_N_INSNS (5),    /* mulsi */
795   COSTS_N_INSNS (3),    /* mulsi_const */
796   COSTS_N_INSNS (2),    /* mulsi_const9 */
797   COSTS_N_INSNS (5),    /* muldi */
798   COSTS_N_INSNS (17),   /* divsi */
799   COSTS_N_INSNS (17),   /* divdi */
800   COSTS_N_INSNS (3),    /* fp */
801   COSTS_N_INSNS (3),    /* dmul */
802   COSTS_N_INSNS (17),   /* sdiv */
803   COSTS_N_INSNS (31),   /* ddiv */
804   32,			/* cache line size */
805   32,			/* l1 cache */
806   512,			/* l2 cache */
807   1,			/* streams */
808   0,			/* SF->DF convert */
809 };
810 
811 /* Instruction costs on PPC7450 processors.  */
812 static const
813 struct processor_costs ppc7450_cost = {
814   COSTS_N_INSNS (4),    /* mulsi */
815   COSTS_N_INSNS (3),    /* mulsi_const */
816   COSTS_N_INSNS (3),    /* mulsi_const9 */
817   COSTS_N_INSNS (4),    /* muldi */
818   COSTS_N_INSNS (23),   /* divsi */
819   COSTS_N_INSNS (23),   /* divdi */
820   COSTS_N_INSNS (5),    /* fp */
821   COSTS_N_INSNS (5),    /* dmul */
822   COSTS_N_INSNS (21),   /* sdiv */
823   COSTS_N_INSNS (35),   /* ddiv */
824   32,			/* cache line size */
825   32,			/* l1 cache */
826   1024,			/* l2 cache */
827   1,			/* streams */
828   0,			/* SF->DF convert */
829 };
830 
831 /* Instruction costs on PPC8540 processors.  */
832 static const
833 struct processor_costs ppc8540_cost = {
834   COSTS_N_INSNS (4),    /* mulsi */
835   COSTS_N_INSNS (4),    /* mulsi_const */
836   COSTS_N_INSNS (4),    /* mulsi_const9 */
837   COSTS_N_INSNS (4),    /* muldi */
838   COSTS_N_INSNS (19),   /* divsi */
839   COSTS_N_INSNS (19),   /* divdi */
840   COSTS_N_INSNS (4),    /* fp */
841   COSTS_N_INSNS (4),    /* dmul */
842   COSTS_N_INSNS (29),   /* sdiv */
843   COSTS_N_INSNS (29),   /* ddiv */
844   32,			/* cache line size */
845   32,			/* l1 cache */
846   256,			/* l2 cache */
847   1,			/* prefetch streams /*/
848   0,			/* SF->DF convert */
849 };
850 
851 /* Instruction costs on E300C2 and E300C3 cores.  */
852 static const
853 struct processor_costs ppce300c2c3_cost = {
854   COSTS_N_INSNS (4),    /* mulsi */
855   COSTS_N_INSNS (4),    /* mulsi_const */
856   COSTS_N_INSNS (4),    /* mulsi_const9 */
857   COSTS_N_INSNS (4),    /* muldi */
858   COSTS_N_INSNS (19),   /* divsi */
859   COSTS_N_INSNS (19),   /* divdi */
860   COSTS_N_INSNS (3),    /* fp */
861   COSTS_N_INSNS (4),    /* dmul */
862   COSTS_N_INSNS (18),   /* sdiv */
863   COSTS_N_INSNS (33),   /* ddiv */
864   32,
865   16,			/* l1 cache */
866   16,			/* l2 cache */
867   1,			/* prefetch streams /*/
868   0,			/* SF->DF convert */
869 };
870 
871 /* Instruction costs on PPCE500MC processors.  */
872 static const
873 struct processor_costs ppce500mc_cost = {
874   COSTS_N_INSNS (4),    /* mulsi */
875   COSTS_N_INSNS (4),    /* mulsi_const */
876   COSTS_N_INSNS (4),    /* mulsi_const9 */
877   COSTS_N_INSNS (4),    /* muldi */
878   COSTS_N_INSNS (14),   /* divsi */
879   COSTS_N_INSNS (14),   /* divdi */
880   COSTS_N_INSNS (8),    /* fp */
881   COSTS_N_INSNS (10),   /* dmul */
882   COSTS_N_INSNS (36),   /* sdiv */
883   COSTS_N_INSNS (66),   /* ddiv */
884   64,			/* cache line size */
885   32,			/* l1 cache */
886   128,			/* l2 cache */
887   1,			/* prefetch streams /*/
888   0,			/* SF->DF convert */
889 };
890 
891 /* Instruction costs on PPCE500MC64 processors.  */
892 static const
893 struct processor_costs ppce500mc64_cost = {
894   COSTS_N_INSNS (4),    /* mulsi */
895   COSTS_N_INSNS (4),    /* mulsi_const */
896   COSTS_N_INSNS (4),    /* mulsi_const9 */
897   COSTS_N_INSNS (4),    /* muldi */
898   COSTS_N_INSNS (14),   /* divsi */
899   COSTS_N_INSNS (14),   /* divdi */
900   COSTS_N_INSNS (4),    /* fp */
901   COSTS_N_INSNS (10),   /* dmul */
902   COSTS_N_INSNS (36),   /* sdiv */
903   COSTS_N_INSNS (66),   /* ddiv */
904   64,			/* cache line size */
905   32,			/* l1 cache */
906   128,			/* l2 cache */
907   1,			/* prefetch streams /*/
908   0,			/* SF->DF convert */
909 };
910 
911 /* Instruction costs on PPCE5500 processors.  */
912 static const
913 struct processor_costs ppce5500_cost = {
914   COSTS_N_INSNS (5),    /* mulsi */
915   COSTS_N_INSNS (5),    /* mulsi_const */
916   COSTS_N_INSNS (4),    /* mulsi_const9 */
917   COSTS_N_INSNS (5),    /* muldi */
918   COSTS_N_INSNS (14),   /* divsi */
919   COSTS_N_INSNS (14),   /* divdi */
920   COSTS_N_INSNS (7),    /* fp */
921   COSTS_N_INSNS (10),   /* dmul */
922   COSTS_N_INSNS (36),   /* sdiv */
923   COSTS_N_INSNS (66),   /* ddiv */
924   64,			/* cache line size */
925   32,			/* l1 cache */
926   128,			/* l2 cache */
927   1,			/* prefetch streams /*/
928   0,			/* SF->DF convert */
929 };
930 
931 /* Instruction costs on PPCE6500 processors.  */
932 static const
933 struct processor_costs ppce6500_cost = {
934   COSTS_N_INSNS (5),    /* mulsi */
935   COSTS_N_INSNS (5),    /* mulsi_const */
936   COSTS_N_INSNS (4),    /* mulsi_const9 */
937   COSTS_N_INSNS (5),    /* muldi */
938   COSTS_N_INSNS (14),   /* divsi */
939   COSTS_N_INSNS (14),   /* divdi */
940   COSTS_N_INSNS (7),    /* fp */
941   COSTS_N_INSNS (10),   /* dmul */
942   COSTS_N_INSNS (36),   /* sdiv */
943   COSTS_N_INSNS (66),   /* ddiv */
944   64,			/* cache line size */
945   32,			/* l1 cache */
946   128,			/* l2 cache */
947   1,			/* prefetch streams /*/
948   0,			/* SF->DF convert */
949 };
950 
951 /* Instruction costs on AppliedMicro Titan processors.  */
952 static const
953 struct processor_costs titan_cost = {
954   COSTS_N_INSNS (5),    /* mulsi */
955   COSTS_N_INSNS (5),    /* mulsi_const */
956   COSTS_N_INSNS (5),    /* mulsi_const9 */
957   COSTS_N_INSNS (5),    /* muldi */
958   COSTS_N_INSNS (18),   /* divsi */
959   COSTS_N_INSNS (18),   /* divdi */
960   COSTS_N_INSNS (10),   /* fp */
961   COSTS_N_INSNS (10),   /* dmul */
962   COSTS_N_INSNS (46),   /* sdiv */
963   COSTS_N_INSNS (72),   /* ddiv */
964   32,			/* cache line size */
965   32,			/* l1 cache */
966   512,			/* l2 cache */
967   1,			/* prefetch streams /*/
968   0,			/* SF->DF convert */
969 };
970 
971 /* Instruction costs on POWER4 and POWER5 processors.  */
972 static const
973 struct processor_costs power4_cost = {
974   COSTS_N_INSNS (3),    /* mulsi */
975   COSTS_N_INSNS (2),    /* mulsi_const */
976   COSTS_N_INSNS (2),    /* mulsi_const9 */
977   COSTS_N_INSNS (4),    /* muldi */
978   COSTS_N_INSNS (18),   /* divsi */
979   COSTS_N_INSNS (34),   /* divdi */
980   COSTS_N_INSNS (3),    /* fp */
981   COSTS_N_INSNS (3),    /* dmul */
982   COSTS_N_INSNS (17),   /* sdiv */
983   COSTS_N_INSNS (17),   /* ddiv */
984   128,			/* cache line size */
985   32,			/* l1 cache */
986   1024,			/* l2 cache */
987   8,			/* prefetch streams /*/
988   0,			/* SF->DF convert */
989 };
990 
991 /* Instruction costs on POWER6 processors.  */
992 static const
993 struct processor_costs power6_cost = {
994   COSTS_N_INSNS (8),    /* mulsi */
995   COSTS_N_INSNS (8),    /* mulsi_const */
996   COSTS_N_INSNS (8),    /* mulsi_const9 */
997   COSTS_N_INSNS (8),    /* muldi */
998   COSTS_N_INSNS (22),   /* divsi */
999   COSTS_N_INSNS (28),   /* divdi */
1000   COSTS_N_INSNS (3),    /* fp */
1001   COSTS_N_INSNS (3),    /* dmul */
1002   COSTS_N_INSNS (13),   /* sdiv */
1003   COSTS_N_INSNS (16),   /* ddiv */
1004   128,			/* cache line size */
1005   64,			/* l1 cache */
1006   2048,			/* l2 cache */
1007   16,			/* prefetch streams */
1008   0,			/* SF->DF convert */
1009 };
1010 
1011 /* Instruction costs on POWER7 processors.  */
1012 static const
1013 struct processor_costs power7_cost = {
1014   COSTS_N_INSNS (2),	/* mulsi */
1015   COSTS_N_INSNS (2),	/* mulsi_const */
1016   COSTS_N_INSNS (2),	/* mulsi_const9 */
1017   COSTS_N_INSNS (2),	/* muldi */
1018   COSTS_N_INSNS (18),	/* divsi */
1019   COSTS_N_INSNS (34),	/* divdi */
1020   COSTS_N_INSNS (3),	/* fp */
1021   COSTS_N_INSNS (3),	/* dmul */
1022   COSTS_N_INSNS (13),	/* sdiv */
1023   COSTS_N_INSNS (16),	/* ddiv */
1024   128,			/* cache line size */
1025   32,			/* l1 cache */
1026   256,			/* l2 cache */
1027   12,			/* prefetch streams */
1028   COSTS_N_INSNS (3),	/* SF->DF convert */
1029 };
1030 
1031 /* Instruction costs on POWER8 processors.  */
1032 static const
1033 struct processor_costs power8_cost = {
1034   COSTS_N_INSNS (3),	/* mulsi */
1035   COSTS_N_INSNS (3),	/* mulsi_const */
1036   COSTS_N_INSNS (3),	/* mulsi_const9 */
1037   COSTS_N_INSNS (3),	/* muldi */
1038   COSTS_N_INSNS (19),	/* divsi */
1039   COSTS_N_INSNS (35),	/* divdi */
1040   COSTS_N_INSNS (3),	/* fp */
1041   COSTS_N_INSNS (3),	/* dmul */
1042   COSTS_N_INSNS (14),	/* sdiv */
1043   COSTS_N_INSNS (17),	/* ddiv */
1044   128,			/* cache line size */
1045   32,			/* l1 cache */
1046   512,			/* l2 cache */
1047   12,			/* prefetch streams */
1048   COSTS_N_INSNS (3),	/* SF->DF convert */
1049 };
1050 
1051 /* Instruction costs on POWER9 processors.  */
1052 static const
1053 struct processor_costs power9_cost = {
1054   COSTS_N_INSNS (3),	/* mulsi */
1055   COSTS_N_INSNS (3),	/* mulsi_const */
1056   COSTS_N_INSNS (3),	/* mulsi_const9 */
1057   COSTS_N_INSNS (3),	/* muldi */
1058   COSTS_N_INSNS (8),	/* divsi */
1059   COSTS_N_INSNS (12),	/* divdi */
1060   COSTS_N_INSNS (3),	/* fp */
1061   COSTS_N_INSNS (3),	/* dmul */
1062   COSTS_N_INSNS (13),	/* sdiv */
1063   COSTS_N_INSNS (18),	/* ddiv */
1064   128,			/* cache line size */
1065   32,			/* l1 cache */
1066   512,			/* l2 cache */
1067   8,			/* prefetch streams */
1068   COSTS_N_INSNS (3),	/* SF->DF convert */
1069 };
1070 
1071 /* Instruction costs on POWER10 processors.  */
1072 static const
1073 struct processor_costs power10_cost = {
1074   COSTS_N_INSNS (2),	/* mulsi */
1075   COSTS_N_INSNS (2),	/* mulsi_const */
1076   COSTS_N_INSNS (2),	/* mulsi_const9 */
1077   COSTS_N_INSNS (2),	/* muldi */
1078   COSTS_N_INSNS (6),	/* divsi */
1079   COSTS_N_INSNS (6),	/* divdi */
1080   COSTS_N_INSNS (2),	/* fp */
1081   COSTS_N_INSNS (2),	/* dmul */
1082   COSTS_N_INSNS (11),	/* sdiv */
1083   COSTS_N_INSNS (13),	/* ddiv */
1084   128,			/* cache line size */
1085   32,			/* l1 cache */
1086   512,			/* l2 cache */
1087   16,			/* prefetch streams */
1088   COSTS_N_INSNS (2),	/* SF->DF convert */
1089 };
1090 
1091 /* Instruction costs on POWER A2 processors.  */
1092 static const
1093 struct processor_costs ppca2_cost = {
1094   COSTS_N_INSNS (16),    /* mulsi */
1095   COSTS_N_INSNS (16),    /* mulsi_const */
1096   COSTS_N_INSNS (16),    /* mulsi_const9 */
1097   COSTS_N_INSNS (16),   /* muldi */
1098   COSTS_N_INSNS (22),   /* divsi */
1099   COSTS_N_INSNS (28),   /* divdi */
1100   COSTS_N_INSNS (3),    /* fp */
1101   COSTS_N_INSNS (3),    /* dmul */
1102   COSTS_N_INSNS (59),   /* sdiv */
1103   COSTS_N_INSNS (72),   /* ddiv */
1104   64,
1105   16,			/* l1 cache */
1106   2048,			/* l2 cache */
1107   16,			/* prefetch streams */
1108   0,			/* SF->DF convert */
1109 };
1110 
1111 /* Support for -mveclibabi=<xxx> to control which vector library to use.  */
1112 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1113 
1114 
1115 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1116 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1117 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1118 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1119 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1120 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1121 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1122 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1123 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1124 				      bool);
1125 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1126 				     unsigned int);
1127 static bool is_microcoded_insn (rtx_insn *);
1128 static bool is_nonpipeline_insn (rtx_insn *);
1129 static bool is_cracked_insn (rtx_insn *);
1130 static bool is_load_insn (rtx, rtx *);
1131 static bool is_store_insn (rtx, rtx *);
1132 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1133 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1134 static bool insn_must_be_first_in_group (rtx_insn *);
1135 static bool insn_must_be_last_in_group (rtx_insn *);
1136 bool easy_vector_constant (rtx, machine_mode);
1137 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1138 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1139 #if TARGET_MACHO
1140 static tree get_prev_label (tree);
1141 #endif
1142 static bool rs6000_mode_dependent_address (const_rtx);
1143 static bool rs6000_debug_mode_dependent_address (const_rtx);
1144 static bool rs6000_offsettable_memref_p (rtx, machine_mode, bool);
1145 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1146 						     machine_mode, rtx);
1147 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1148 							   machine_mode,
1149 							   rtx);
1150 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1151 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1152 							   enum reg_class);
1153 static bool rs6000_debug_secondary_memory_needed (machine_mode,
1154 						  reg_class_t,
1155 						  reg_class_t);
1156 static bool rs6000_debug_can_change_mode_class (machine_mode,
1157 						machine_mode,
1158 						reg_class_t);
1159 
1160 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1161   = rs6000_mode_dependent_address;
1162 
1163 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1164 						     machine_mode, rtx)
1165   = rs6000_secondary_reload_class;
1166 
1167 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1168   = rs6000_preferred_reload_class;
1169 
1170 const int INSN_NOT_AVAILABLE = -1;
1171 
1172 static void rs6000_print_isa_options (FILE *, int, const char *,
1173 				      HOST_WIDE_INT);
1174 static void rs6000_print_builtin_options (FILE *, int, const char *,
1175 					  HOST_WIDE_INT);
1176 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
1177 
1178 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1179 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1180 					  enum rs6000_reg_type,
1181 					  machine_mode,
1182 					  secondary_reload_info *,
1183 					  bool);
1184 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1185 
1186 /* Hash table stuff for keeping track of TOC entries.  */
1187 
1188 struct GTY((for_user)) toc_hash_struct
1189 {
1190   /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1191      ASM_OUTPUT_SPECIAL_POOL_ENTRY_P.  */
1192   rtx key;
1193   machine_mode key_mode;
1194   int labelno;
1195 };
1196 
1197 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1198 {
1199   static hashval_t hash (toc_hash_struct *);
1200   static bool equal (toc_hash_struct *, toc_hash_struct *);
1201 };
1202 
1203 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1204 
1205 
1206 
1207 /* Default register names.  */
1208 char rs6000_reg_names[][8] =
1209 {
1210   /* GPRs */
1211       "0",  "1",  "2",  "3",  "4",  "5",  "6",  "7",
1212       "8",  "9", "10", "11", "12", "13", "14", "15",
1213      "16", "17", "18", "19", "20", "21", "22", "23",
1214      "24", "25", "26", "27", "28", "29", "30", "31",
1215   /* FPRs */
1216       "0",  "1",  "2",  "3",  "4",  "5",  "6",  "7",
1217       "8",  "9", "10", "11", "12", "13", "14", "15",
1218      "16", "17", "18", "19", "20", "21", "22", "23",
1219      "24", "25", "26", "27", "28", "29", "30", "31",
1220   /* VRs */
1221       "0",  "1",  "2",  "3",  "4",  "5",  "6",  "7",
1222       "8",  "9", "10", "11", "12", "13", "14", "15",
1223      "16", "17", "18", "19", "20", "21", "22", "23",
1224      "24", "25", "26", "27", "28", "29", "30", "31",
1225   /* lr ctr ca ap */
1226      "lr", "ctr", "ca", "ap",
1227   /* cr0..cr7 */
1228       "0",  "1",  "2",  "3",  "4",  "5",  "6",  "7",
1229   /* vrsave vscr sfp */
1230       "vrsave", "vscr", "sfp",
1231 };
1232 
1233 #ifdef TARGET_REGNAMES
1234 static const char alt_reg_names[][8] =
1235 {
1236   /* GPRs */
1237    "%r0",  "%r1",  "%r2",  "%r3",  "%r4",  "%r5",  "%r6",  "%r7",
1238    "%r8",  "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1239   "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1240   "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1241   /* FPRs */
1242    "%f0",  "%f1",  "%f2",  "%f3",  "%f4",  "%f5",  "%f6",  "%f7",
1243    "%f8",  "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1244   "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1245   "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1246   /* VRs */
1247    "%v0",  "%v1",  "%v2",  "%v3",  "%v4",  "%v5",  "%v6",  "%v7",
1248    "%v8",  "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1249   "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1250   "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1251   /* lr ctr ca ap */
1252     "lr",  "ctr",   "ca",   "ap",
1253   /* cr0..cr7 */
1254   "%cr0",  "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1255   /* vrsave vscr sfp */
1256   "vrsave", "vscr", "sfp",
1257 };
1258 #endif
1259 
1260 /* Table of valid machine attributes.  */
1261 
1262 static const struct attribute_spec rs6000_attribute_table[] =
1263 {
1264   /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1265        affects_type_identity, handler, exclude } */
1266   { "altivec",   1, 1, false, true,  false, false,
1267     rs6000_handle_altivec_attribute, NULL },
1268   { "longcall",  0, 0, false, true,  true,  false,
1269     rs6000_handle_longcall_attribute, NULL },
1270   { "shortcall", 0, 0, false, true,  true,  false,
1271     rs6000_handle_longcall_attribute, NULL },
1272   { "ms_struct", 0, 0, false, false, false, false,
1273     rs6000_handle_struct_attribute, NULL },
1274   { "gcc_struct", 0, 0, false, false, false, false,
1275     rs6000_handle_struct_attribute, NULL },
1276 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1277   SUBTARGET_ATTRIBUTE_TABLE,
1278 #endif
1279   { NULL,        0, 0, false, false, false, false, NULL, NULL }
1280 };
1281 
1282 #ifndef TARGET_PROFILE_KERNEL
1283 #define TARGET_PROFILE_KERNEL 0
1284 #endif
1285 
1286 /* Initialize the GCC target structure.  */
1287 #undef TARGET_ATTRIBUTE_TABLE
1288 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1289 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1290 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1291 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1292 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1293 
1294 #undef TARGET_ASM_ALIGNED_DI_OP
1295 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1296 
1297 /* Default unaligned ops are only provided for ELF.  Find the ops needed
1298    for non-ELF systems.  */
1299 #ifndef OBJECT_FORMAT_ELF
1300 #if TARGET_XCOFF
1301 /* For XCOFF.  rs6000_assemble_integer will handle unaligned DIs on
1302    64-bit targets.  */
1303 #undef TARGET_ASM_UNALIGNED_HI_OP
1304 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1305 #undef TARGET_ASM_UNALIGNED_SI_OP
1306 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1307 #undef TARGET_ASM_UNALIGNED_DI_OP
1308 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1309 #else
1310 /* For Darwin.  */
1311 #undef TARGET_ASM_UNALIGNED_HI_OP
1312 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1313 #undef TARGET_ASM_UNALIGNED_SI_OP
1314 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1315 #undef TARGET_ASM_UNALIGNED_DI_OP
1316 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1317 #undef TARGET_ASM_ALIGNED_DI_OP
1318 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1319 #endif
1320 #endif
1321 
1322 /* This hook deals with fixups for relocatable code and DI-mode objects
1323    in 64-bit code.  */
1324 #undef TARGET_ASM_INTEGER
1325 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1326 
1327 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1328 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1329 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1330 #endif
1331 
1332 #undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
1333 #define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
1334   rs6000_print_patchable_function_entry
1335 
1336 #undef TARGET_SET_UP_BY_PROLOGUE
1337 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1338 
1339 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1340 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1341 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1342 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1343 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1344 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1345 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1346 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1347 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1348 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1349 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1350 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1351 
1352 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1353 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1354 
1355 #undef TARGET_INTERNAL_ARG_POINTER
1356 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1357 
1358 #undef TARGET_HAVE_TLS
1359 #define TARGET_HAVE_TLS HAVE_AS_TLS
1360 
1361 #undef TARGET_CANNOT_FORCE_CONST_MEM
1362 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1363 
1364 #undef TARGET_DELEGITIMIZE_ADDRESS
1365 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1366 
1367 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1368 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1369 
1370 #undef TARGET_LEGITIMATE_COMBINED_INSN
1371 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1372 
1373 #undef TARGET_ASM_FUNCTION_PROLOGUE
1374 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1375 #undef TARGET_ASM_FUNCTION_EPILOGUE
1376 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1377 
1378 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1379 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1380 
1381 #undef  TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC
1382 #define TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC rs6000_gen_pic_addr_diff_vec
1383 
1384 #undef TARGET_LEGITIMIZE_ADDRESS
1385 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1386 
1387 #undef  TARGET_SCHED_VARIABLE_ISSUE
1388 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1389 
1390 #undef TARGET_SCHED_ISSUE_RATE
1391 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1392 #undef TARGET_SCHED_ADJUST_COST
1393 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1394 #undef TARGET_SCHED_ADJUST_PRIORITY
1395 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1396 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1397 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1398 #undef TARGET_SCHED_INIT
1399 #define TARGET_SCHED_INIT rs6000_sched_init
1400 #undef TARGET_SCHED_FINISH
1401 #define TARGET_SCHED_FINISH rs6000_sched_finish
1402 #undef TARGET_SCHED_REORDER
1403 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1404 #undef TARGET_SCHED_REORDER2
1405 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1406 
1407 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1408 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1409 
1410 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1411 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1412 
1413 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1414 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1415 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1416 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1417 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1418 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1419 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1420 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1421 
1422 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1423 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1424 
1425 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1426 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1427 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1428 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT		\
1429   rs6000_builtin_support_vector_misalignment
1430 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1431 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1432 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1433 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1434   rs6000_builtin_vectorization_cost
1435 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1436 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1437   rs6000_preferred_simd_mode
1438 #undef TARGET_VECTORIZE_CREATE_COSTS
1439 #define TARGET_VECTORIZE_CREATE_COSTS rs6000_vectorize_create_costs
1440 
1441 #undef TARGET_LOOP_UNROLL_ADJUST
1442 #define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
1443 
1444 #undef TARGET_INIT_BUILTINS
1445 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1446 #undef TARGET_BUILTIN_DECL
1447 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1448 
1449 #undef TARGET_FOLD_BUILTIN
1450 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1451 #undef TARGET_GIMPLE_FOLD_BUILTIN
1452 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1453 
1454 #undef TARGET_EXPAND_BUILTIN
1455 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1456 
1457 #undef TARGET_MANGLE_TYPE
1458 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1459 
1460 #undef TARGET_INIT_LIBFUNCS
1461 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1462 
1463 #if TARGET_MACHO
1464 #undef TARGET_BINDS_LOCAL_P
1465 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1466 #endif
1467 
1468 #undef TARGET_MS_BITFIELD_LAYOUT_P
1469 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1470 
1471 #undef TARGET_ASM_OUTPUT_MI_THUNK
1472 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1473 
1474 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1475 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1476 
1477 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1478 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1479 
1480 #undef TARGET_REGISTER_MOVE_COST
1481 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1482 #undef TARGET_MEMORY_MOVE_COST
1483 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1484 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
1485 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
1486   rs6000_ira_change_pseudo_allocno_class
1487 #undef TARGET_CANNOT_COPY_INSN_P
1488 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1489 #undef TARGET_RTX_COSTS
1490 #define TARGET_RTX_COSTS rs6000_rtx_costs
1491 #undef TARGET_ADDRESS_COST
1492 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1493 #undef TARGET_INSN_COST
1494 #define TARGET_INSN_COST rs6000_insn_cost
1495 
1496 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1497 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1498 
1499 #undef TARGET_PROMOTE_FUNCTION_MODE
1500 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1501 
1502 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
1503 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE rs6000_override_options_after_change
1504 
1505 #undef TARGET_RETURN_IN_MEMORY
1506 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1507 
1508 #undef TARGET_RETURN_IN_MSB
1509 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1510 
1511 #undef TARGET_SETUP_INCOMING_VARARGS
1512 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1513 
1514 /* Always strict argument naming on rs6000.  */
1515 #undef TARGET_STRICT_ARGUMENT_NAMING
1516 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1517 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1518 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1519 #undef TARGET_SPLIT_COMPLEX_ARG
1520 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1521 #undef TARGET_MUST_PASS_IN_STACK
1522 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1523 #undef TARGET_PASS_BY_REFERENCE
1524 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1525 #undef TARGET_ARG_PARTIAL_BYTES
1526 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1527 #undef TARGET_FUNCTION_ARG_ADVANCE
1528 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1529 #undef TARGET_FUNCTION_ARG
1530 #define TARGET_FUNCTION_ARG rs6000_function_arg
1531 #undef TARGET_FUNCTION_ARG_PADDING
1532 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1533 #undef TARGET_FUNCTION_ARG_BOUNDARY
1534 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1535 
1536 #undef TARGET_BUILD_BUILTIN_VA_LIST
1537 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1538 
1539 #undef TARGET_EXPAND_BUILTIN_VA_START
1540 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1541 
1542 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1543 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1544 
1545 #undef TARGET_EH_RETURN_FILTER_MODE
1546 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1547 
1548 #undef TARGET_TRANSLATE_MODE_ATTRIBUTE
1549 #define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute
1550 
1551 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1552 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1553 
1554 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
1555 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
1556   rs6000_libgcc_floating_mode_supported_p
1557 
1558 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1559 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1560 
1561 #undef TARGET_FLOATN_MODE
1562 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1563 
1564 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1565 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1566 
1567 #undef TARGET_MD_ASM_ADJUST
1568 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1569 
1570 #undef TARGET_OPTION_OVERRIDE
1571 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1572 
1573 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1574 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1575   rs6000_builtin_vectorized_function
1576 
1577 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1578 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1579   rs6000_builtin_md_vectorized_function
1580 
1581 #undef TARGET_STACK_PROTECT_GUARD
1582 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1583 
1584 #if !TARGET_MACHO
1585 #undef TARGET_STACK_PROTECT_FAIL
1586 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1587 #endif
1588 
1589 #ifdef HAVE_AS_TLS
1590 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1591 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1592 #endif
1593 
1594 /* Use a 32-bit anchor range.  This leads to sequences like:
1595 
1596 	addis	tmp,anchor,high
1597 	add	dest,tmp,low
1598 
1599    where tmp itself acts as an anchor, and can be shared between
1600    accesses to the same 64k page.  */
1601 #undef TARGET_MIN_ANCHOR_OFFSET
1602 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1603 #undef TARGET_MAX_ANCHOR_OFFSET
1604 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1605 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1606 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1607 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1608 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1609 
1610 #undef TARGET_BUILTIN_RECIPROCAL
1611 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1612 
1613 #undef TARGET_SECONDARY_RELOAD
1614 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1615 #undef TARGET_SECONDARY_MEMORY_NEEDED
1616 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1617 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1618 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1619 
1620 #undef TARGET_LEGITIMATE_ADDRESS_P
1621 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1622 
1623 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1624 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1625 
1626 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1627 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1628 
1629 #undef TARGET_CAN_ELIMINATE
1630 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1631 
1632 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1633 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1634 
1635 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1636 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1637 
1638 #undef TARGET_TRAMPOLINE_INIT
1639 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1640 
1641 #undef TARGET_FUNCTION_VALUE
1642 #define TARGET_FUNCTION_VALUE rs6000_function_value
1643 
1644 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1645 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1646 
1647 #undef TARGET_OPTION_SAVE
1648 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1649 
1650 #undef TARGET_OPTION_RESTORE
1651 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1652 
1653 #undef TARGET_OPTION_PRINT
1654 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1655 
1656 #undef TARGET_CAN_INLINE_P
1657 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1658 
1659 #undef TARGET_SET_CURRENT_FUNCTION
1660 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1661 
1662 #undef TARGET_LEGITIMATE_CONSTANT_P
1663 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1664 
1665 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1666 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1667 
1668 #undef TARGET_CAN_USE_DOLOOP_P
1669 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1670 
1671 #undef TARGET_PREDICT_DOLOOP_P
1672 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
1673 
1674 #undef TARGET_HAVE_COUNT_REG_DECR_P
1675 #define TARGET_HAVE_COUNT_REG_DECR_P true
1676 
1677 /* 1000000000 is infinite cost in IVOPTs.  */
1678 #undef TARGET_DOLOOP_COST_FOR_GENERIC
1679 #define TARGET_DOLOOP_COST_FOR_GENERIC 1000000000
1680 
1681 #undef TARGET_DOLOOP_COST_FOR_ADDRESS
1682 #define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000
1683 
1684 #undef TARGET_PREFERRED_DOLOOP_MODE
1685 #define TARGET_PREFERRED_DOLOOP_MODE rs6000_preferred_doloop_mode
1686 
1687 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1688 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1689 
1690 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1691 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1692 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1693 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1694 #undef TARGET_UNWIND_WORD_MODE
1695 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1696 
1697 #undef TARGET_OFFLOAD_OPTIONS
1698 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1699 
1700 #undef TARGET_C_MODE_FOR_SUFFIX
1701 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1702 
1703 #undef TARGET_INVALID_BINARY_OP
1704 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1705 
1706 #undef TARGET_OPTAB_SUPPORTED_P
1707 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1708 
1709 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1710 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1711 
1712 #undef TARGET_COMPARE_VERSION_PRIORITY
1713 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1714 
1715 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1716 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY				\
1717   rs6000_generate_version_dispatcher_body
1718 
1719 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1720 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER				\
1721   rs6000_get_function_versions_dispatcher
1722 
1723 #undef TARGET_OPTION_FUNCTION_VERSIONS
1724 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1725 
1726 #undef TARGET_HARD_REGNO_NREGS
1727 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1728 #undef TARGET_HARD_REGNO_MODE_OK
1729 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1730 
1731 #undef TARGET_MODES_TIEABLE_P
1732 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1733 
1734 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1735 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1736   rs6000_hard_regno_call_part_clobbered
1737 
1738 #undef TARGET_SLOW_UNALIGNED_ACCESS
1739 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1740 
1741 #undef TARGET_CAN_CHANGE_MODE_CLASS
1742 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1743 
1744 #undef TARGET_CONSTANT_ALIGNMENT
1745 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1746 
1747 #undef TARGET_STARTING_FRAME_OFFSET
1748 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1749 
1750 #undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P
1751 #define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true
1752 
1753 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
1754 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
1755 
1756 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
1757 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P \
1758   rs6000_cannot_substitute_mem_equiv_p
1759 
1760 #undef TARGET_INVALID_CONVERSION
1761 #define TARGET_INVALID_CONVERSION rs6000_invalid_conversion
1762 
1763 #undef TARGET_NEED_IPA_FN_TARGET_INFO
1764 #define TARGET_NEED_IPA_FN_TARGET_INFO rs6000_need_ipa_fn_target_info
1765 
1766 #undef TARGET_UPDATE_IPA_FN_TARGET_INFO
1767 #define TARGET_UPDATE_IPA_FN_TARGET_INFO rs6000_update_ipa_fn_target_info
1768 
1769 
1770 /* Processor table.  */
1771 struct rs6000_ptt
1772 {
1773   const char *const name;		/* Canonical processor name.  */
1774   const enum processor_type processor;	/* Processor type enum value.  */
1775   const HOST_WIDE_INT target_enable;	/* Target flags to enable.  */
1776 };
1777 
1778 static struct rs6000_ptt const processor_target_table[] =
1779 {
1780 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1781 #include "rs6000-cpus.def"
1782 #undef RS6000_CPU
1783 };
1784 
1785 /* Look up a processor name for -mcpu=xxx and -mtune=xxx.  Return -1 if the
1786    name is invalid.  */
1787 
1788 static int
rs6000_cpu_name_lookup(const char * name)1789 rs6000_cpu_name_lookup (const char *name)
1790 {
1791   size_t i;
1792 
1793   if (name != NULL)
1794     {
1795       for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1796 	if (! strcmp (name, processor_target_table[i].name))
1797 	  return (int)i;
1798     }
1799 
1800   return -1;
1801 }
1802 
1803 
1804 /* Return number of consecutive hard regs needed starting at reg REGNO
1805    to hold something of mode MODE.
1806    This is ordinarily the length in words of a value of mode MODE
1807    but can be less for certain modes in special long registers.
1808 
1809    POWER and PowerPC GPRs hold 32 bits worth;
1810    PowerPC64 GPRs and FPRs point register holds 64 bits worth.  */
1811 
1812 static int
rs6000_hard_regno_nregs_internal(int regno,machine_mode mode)1813 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1814 {
1815   unsigned HOST_WIDE_INT reg_size;
1816 
1817   /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1818      128-bit floating point that can go in vector registers, which has VSX
1819      memory addressing.  */
1820   if (FP_REGNO_P (regno))
1821     reg_size = (VECTOR_MEM_VSX_P (mode) || VECTOR_ALIGNMENT_P (mode)
1822 		? UNITS_PER_VSX_WORD
1823 		: UNITS_PER_FP_WORD);
1824 
1825   else if (ALTIVEC_REGNO_P (regno))
1826     reg_size = UNITS_PER_ALTIVEC_WORD;
1827 
1828   else
1829     reg_size = UNITS_PER_WORD;
1830 
1831   return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1832 }
1833 
1834 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1835    MODE.  */
1836 static int
rs6000_hard_regno_mode_ok_uncached(int regno,machine_mode mode)1837 rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
1838 {
1839   int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1840 
1841   if (COMPLEX_MODE_P (mode))
1842     mode = GET_MODE_INNER (mode);
1843 
1844   /* Vector pair modes need even/odd VSX register pairs.  Only allow vector
1845      registers.  */
1846   if (mode == OOmode)
1847     return (TARGET_MMA && VSX_REGNO_P (regno) && (regno & 1) == 0);
1848 
1849   /* MMA accumulator modes need FPR registers divisible by 4.  */
1850   if (mode == XOmode)
1851     return (TARGET_MMA && FP_REGNO_P (regno) && (regno & 3) == 0);
1852 
1853   /* PTImode can only go in GPRs.  Quad word memory operations require even/odd
1854      register combinations, and use PTImode where we need to deal with quad
1855      word memory operations.  Don't allow quad words in the argument or frame
1856      pointer registers, just registers 0..31.  */
1857   if (mode == PTImode)
1858     return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1859 	    && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1860 	    && ((regno & 1) == 0));
1861 
1862   /* VSX registers that overlap the FPR registers are larger than for non-VSX
1863      implementations.  Don't allow an item to be split between a FP register
1864      and an Altivec register.  Allow TImode in all VSX registers if the user
1865      asked for it.  */
1866   if (TARGET_VSX && VSX_REGNO_P (regno)
1867       && (VECTOR_MEM_VSX_P (mode)
1868 	  || VECTOR_ALIGNMENT_P (mode)
1869 	  || reg_addr[mode].scalar_in_vmx_p
1870 	  || mode == TImode
1871 	  || (TARGET_VADDUQM && mode == V1TImode)))
1872     {
1873       if (FP_REGNO_P (regno))
1874 	return FP_REGNO_P (last_regno);
1875 
1876       if (ALTIVEC_REGNO_P (regno))
1877 	{
1878 	  if (GET_MODE_SIZE (mode) < 16 && !reg_addr[mode].scalar_in_vmx_p)
1879 	    return 0;
1880 
1881 	  return ALTIVEC_REGNO_P (last_regno);
1882 	}
1883     }
1884 
1885   /* The GPRs can hold any mode, but values bigger than one register
1886      cannot go past R31.  */
1887   if (INT_REGNO_P (regno))
1888     return INT_REGNO_P (last_regno);
1889 
1890   /* The float registers (except for VSX vector modes) can only hold floating
1891      modes and DImode.  */
1892   if (FP_REGNO_P (regno))
1893     {
1894       if (VECTOR_ALIGNMENT_P (mode))
1895 	return false;
1896 
1897       if (SCALAR_FLOAT_MODE_P (mode)
1898 	  && (mode != TDmode || (regno % 2) == 0)
1899 	  && FP_REGNO_P (last_regno))
1900 	return 1;
1901 
1902       if (GET_MODE_CLASS (mode) == MODE_INT)
1903 	{
1904 	  if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1905 	    return 1;
1906 
1907 	  if (TARGET_P8_VECTOR && (mode == SImode))
1908 	    return 1;
1909 
1910 	  if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
1911 	    return 1;
1912 	}
1913 
1914       return 0;
1915     }
1916 
1917   /* The CR register can only hold CC modes.  */
1918   if (CR_REGNO_P (regno))
1919     return GET_MODE_CLASS (mode) == MODE_CC;
1920 
1921   if (CA_REGNO_P (regno))
1922     return mode == Pmode || mode == SImode;
1923 
1924   /* AltiVec only in AldyVec registers.  */
1925   if (ALTIVEC_REGNO_P (regno))
1926     return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
1927 	    || mode == V1TImode);
1928 
1929   /* We cannot put non-VSX TImode or PTImode anywhere except general register
1930      and it must be able to fit within the register set.  */
1931 
1932   return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
1933 }
1934 
1935 /* Implement TARGET_HARD_REGNO_NREGS.  */
1936 
1937 static unsigned int
rs6000_hard_regno_nregs_hook(unsigned int regno,machine_mode mode)1938 rs6000_hard_regno_nregs_hook (unsigned int regno, machine_mode mode)
1939 {
1940   return rs6000_hard_regno_nregs[mode][regno];
1941 }
1942 
1943 /* Implement TARGET_HARD_REGNO_MODE_OK.  */
1944 
1945 static bool
rs6000_hard_regno_mode_ok(unsigned int regno,machine_mode mode)1946 rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
1947 {
1948   return rs6000_hard_regno_mode_ok_p[mode][regno];
1949 }
1950 
1951 /* Implement TARGET_MODES_TIEABLE_P.
1952 
1953    PTImode cannot tie with other modes because PTImode is restricted to even
1954    GPR registers, and TImode can go in any GPR as well as VSX registers (PR
1955    57744).
1956 
1957    Similarly, don't allow OOmode (vector pair, restricted to even VSX
1958    registers) or XOmode (vector quad, restricted to FPR registers divisible
1959    by 4) to tie with other modes.
1960 
1961    Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
1962    128-bit floating point on VSX systems ties with other vectors.  */
1963 
1964 static bool
rs6000_modes_tieable_p(machine_mode mode1,machine_mode mode2)1965 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
1966 {
1967   if (mode1 == PTImode || mode1 == OOmode || mode1 == XOmode
1968       || mode2 == PTImode || mode2 == OOmode || mode2 == XOmode)
1969     return mode1 == mode2;
1970 
1971   if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
1972     return ALTIVEC_OR_VSX_VECTOR_MODE (mode2);
1973   if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2))
1974     return false;
1975 
1976   if (SCALAR_FLOAT_MODE_P (mode1))
1977     return SCALAR_FLOAT_MODE_P (mode2);
1978   if (SCALAR_FLOAT_MODE_P (mode2))
1979     return false;
1980 
1981   if (GET_MODE_CLASS (mode1) == MODE_CC)
1982     return GET_MODE_CLASS (mode2) == MODE_CC;
1983   if (GET_MODE_CLASS (mode2) == MODE_CC)
1984     return false;
1985 
1986   return true;
1987 }
1988 
1989 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED.  */
1990 
1991 static bool
rs6000_hard_regno_call_part_clobbered(unsigned int,unsigned int regno,machine_mode mode)1992 rs6000_hard_regno_call_part_clobbered (unsigned int, unsigned int regno,
1993 				       machine_mode mode)
1994 {
1995   if (TARGET_32BIT
1996       && TARGET_POWERPC64
1997       && GET_MODE_SIZE (mode) > 4
1998       && INT_REGNO_P (regno))
1999     return true;
2000 
2001   if (TARGET_VSX
2002       && FP_REGNO_P (regno)
2003       && GET_MODE_SIZE (mode) > 8
2004       && !FLOAT128_2REG_P (mode))
2005     return true;
2006 
2007   return false;
2008 }
2009 
2010 /* Print interesting facts about registers.  */
2011 static void
rs6000_debug_reg_print(int first_regno,int last_regno,const char * reg_name)2012 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
2013 {
2014   int r, m;
2015 
2016   for (r = first_regno; r <= last_regno; ++r)
2017     {
2018       const char *comma = "";
2019       int len;
2020 
2021       if (first_regno == last_regno)
2022 	fprintf (stderr, "%s:\t", reg_name);
2023       else
2024 	fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
2025 
2026       len = 8;
2027       for (m = 0; m < NUM_MACHINE_MODES; ++m)
2028 	if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
2029 	  {
2030 	    if (len > 70)
2031 	      {
2032 		fprintf (stderr, ",\n\t");
2033 		len = 8;
2034 		comma = "";
2035 	      }
2036 
2037 	    if (rs6000_hard_regno_nregs[m][r] > 1)
2038 	      len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2039 			     rs6000_hard_regno_nregs[m][r]);
2040 	    else
2041 	      len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2042 
2043 	    comma = ", ";
2044 	  }
2045 
2046       if (call_used_or_fixed_reg_p (r))
2047 	{
2048 	  if (len > 70)
2049 	    {
2050 	      fprintf (stderr, ",\n\t");
2051 	      len = 8;
2052 	      comma = "";
2053 	    }
2054 
2055 	  len += fprintf (stderr, "%s%s", comma, "call-used");
2056 	  comma = ", ";
2057 	}
2058 
2059       if (fixed_regs[r])
2060 	{
2061 	  if (len > 70)
2062 	    {
2063 	      fprintf (stderr, ",\n\t");
2064 	      len = 8;
2065 	      comma = "";
2066 	    }
2067 
2068 	  len += fprintf (stderr, "%s%s", comma, "fixed");
2069 	  comma = ", ";
2070 	}
2071 
2072       if (len > 70)
2073 	{
2074 	  fprintf (stderr, ",\n\t");
2075 	  comma = "";
2076 	}
2077 
2078       len += fprintf (stderr, "%sreg-class = %s", comma,
2079 		      reg_class_names[(int)rs6000_regno_regclass[r]]);
2080       comma = ", ";
2081 
2082       if (len > 70)
2083 	{
2084 	  fprintf (stderr, ",\n\t");
2085 	  comma = "";
2086 	}
2087 
2088       fprintf (stderr, "%sregno = %d\n", comma, r);
2089     }
2090 }
2091 
2092 static const char *
rs6000_debug_vector_unit(enum rs6000_vector v)2093 rs6000_debug_vector_unit (enum rs6000_vector v)
2094 {
2095   const char *ret;
2096 
2097   switch (v)
2098     {
2099     case VECTOR_NONE:	   ret = "none";      break;
2100     case VECTOR_ALTIVEC:   ret = "altivec";   break;
2101     case VECTOR_VSX:	   ret = "vsx";       break;
2102     case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2103     default:		   ret = "unknown";   break;
2104     }
2105 
2106   return ret;
2107 }
2108 
2109 /* Inner function printing just the address mask for a particular reload
2110    register class.  */
2111 DEBUG_FUNCTION char *
rs6000_debug_addr_mask(addr_mask_type mask,bool keep_spaces)2112 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2113 {
2114   static char ret[8];
2115   char *p = ret;
2116 
2117   if ((mask & RELOAD_REG_VALID) != 0)
2118     *p++ = 'v';
2119   else if (keep_spaces)
2120     *p++ = ' ';
2121 
2122   if ((mask & RELOAD_REG_MULTIPLE) != 0)
2123     *p++ = 'm';
2124   else if (keep_spaces)
2125     *p++ = ' ';
2126 
2127   if ((mask & RELOAD_REG_INDEXED) != 0)
2128     *p++ = 'i';
2129   else if (keep_spaces)
2130     *p++ = ' ';
2131 
2132   if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2133     *p++ = 'O';
2134   else if ((mask & RELOAD_REG_OFFSET) != 0)
2135     *p++ = 'o';
2136   else if (keep_spaces)
2137     *p++ = ' ';
2138 
2139   if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2140     *p++ = '+';
2141   else if (keep_spaces)
2142     *p++ = ' ';
2143 
2144   if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2145     *p++ = '+';
2146   else if (keep_spaces)
2147     *p++ = ' ';
2148 
2149   if ((mask & RELOAD_REG_AND_M16) != 0)
2150     *p++ = '&';
2151   else if (keep_spaces)
2152     *p++ = ' ';
2153 
2154   *p = '\0';
2155 
2156   return ret;
2157 }
2158 
2159 /* Print the address masks in a human readble fashion.  */
2160 DEBUG_FUNCTION void
rs6000_debug_print_mode(ssize_t m)2161 rs6000_debug_print_mode (ssize_t m)
2162 {
2163   ssize_t rc;
2164   int spaces = 0;
2165 
2166   fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2167   for (rc = 0; rc < N_RELOAD_REG; rc++)
2168     fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2169 	     rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2170 
2171   if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2172       || (reg_addr[m].reload_load != CODE_FOR_nothing))
2173     {
2174       fprintf (stderr, "%*s  Reload=%c%c", spaces, "",
2175 	       (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2176 	       (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2177       spaces = 0;
2178     }
2179   else
2180     spaces += strlen ("  Reload=sl");
2181 
2182   if (reg_addr[m].scalar_in_vmx_p)
2183     {
2184       fprintf (stderr, "%*s  Upper=y", spaces, "");
2185       spaces = 0;
2186     }
2187   else
2188     spaces += strlen ("  Upper=y");
2189 
2190   if (rs6000_vector_unit[m] != VECTOR_NONE
2191       || rs6000_vector_mem[m] != VECTOR_NONE)
2192     {
2193       fprintf (stderr, "%*s  vector: arith=%-10s mem=%s",
2194 	       spaces, "",
2195 	       rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2196 	       rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2197     }
2198 
2199   fputs ("\n", stderr);
2200 }
2201 
2202 #define DEBUG_FMT_ID "%-32s= "
2203 #define DEBUG_FMT_D   DEBUG_FMT_ID "%d\n"
2204 #define DEBUG_FMT_WX  DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2205 #define DEBUG_FMT_S   DEBUG_FMT_ID "%s\n"
2206 
2207 /* Print various interesting information with -mdebug=reg.  */
2208 static void
rs6000_debug_reg_global(void)2209 rs6000_debug_reg_global (void)
2210 {
2211   static const char *const tf[2] = { "false", "true" };
2212   const char *nl = (const char *)0;
2213   int m;
2214   size_t m1, m2, v;
2215   char costly_num[20];
2216   char nop_num[20];
2217   char flags_buffer[40];
2218   const char *costly_str;
2219   const char *nop_str;
2220   const char *trace_str;
2221   const char *abi_str;
2222   const char *cmodel_str;
2223   struct cl_target_option cl_opts;
2224 
2225   /* Modes we want tieable information on.  */
2226   static const machine_mode print_tieable_modes[] = {
2227     QImode,
2228     HImode,
2229     SImode,
2230     DImode,
2231     TImode,
2232     PTImode,
2233     SFmode,
2234     DFmode,
2235     TFmode,
2236     IFmode,
2237     KFmode,
2238     SDmode,
2239     DDmode,
2240     TDmode,
2241     V2SImode,
2242     V2SFmode,
2243     V16QImode,
2244     V8HImode,
2245     V4SImode,
2246     V2DImode,
2247     V1TImode,
2248     V32QImode,
2249     V16HImode,
2250     V8SImode,
2251     V4DImode,
2252     V2TImode,
2253     V4SFmode,
2254     V2DFmode,
2255     V8SFmode,
2256     V4DFmode,
2257     OOmode,
2258     XOmode,
2259     CCmode,
2260     CCUNSmode,
2261     CCEQmode,
2262     CCFPmode,
2263   };
2264 
2265   /* Virtual regs we are interested in.  */
2266   const static struct {
2267     int regno;			/* register number.  */
2268     const char *name;		/* register name.  */
2269   } virtual_regs[] = {
2270     { STACK_POINTER_REGNUM,			"stack pointer:" },
2271     { TOC_REGNUM,				"toc:          " },
2272     { STATIC_CHAIN_REGNUM,			"static chain: " },
2273     { RS6000_PIC_OFFSET_TABLE_REGNUM,		"pic offset:   " },
2274     { HARD_FRAME_POINTER_REGNUM,		"hard frame:   " },
2275     { ARG_POINTER_REGNUM,			"arg pointer:  " },
2276     { FRAME_POINTER_REGNUM,			"frame pointer:" },
2277     { FIRST_PSEUDO_REGISTER,			"first pseudo: " },
2278     { FIRST_VIRTUAL_REGISTER,			"first virtual:" },
2279     { VIRTUAL_INCOMING_ARGS_REGNUM,		"incoming_args:" },
2280     { VIRTUAL_STACK_VARS_REGNUM,		"stack_vars:   " },
2281     { VIRTUAL_STACK_DYNAMIC_REGNUM,		"stack_dynamic:" },
2282     { VIRTUAL_OUTGOING_ARGS_REGNUM,		"outgoing_args:" },
2283     { VIRTUAL_CFA_REGNUM,			"cfa (frame):  " },
2284     { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM,	"stack boundry:" },
2285     { LAST_VIRTUAL_REGISTER,			"last virtual: " },
2286   };
2287 
2288   fputs ("\nHard register information:\n", stderr);
2289   rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2290   rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2291   rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2292 			  LAST_ALTIVEC_REGNO,
2293 			  "vs");
2294   rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2295   rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2296   rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2297   rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2298   rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2299   rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2300 
2301   fputs ("\nVirtual/stack/frame registers:\n", stderr);
2302   for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2303     fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2304 
2305   fprintf (stderr,
2306 	   "\n"
2307 	   "d  reg_class = %s\n"
2308 	   "f  reg_class = %s\n"
2309 	   "v  reg_class = %s\n"
2310 	   "wa reg_class = %s\n"
2311 	   "we reg_class = %s\n"
2312 	   "wr reg_class = %s\n"
2313 	   "wx reg_class = %s\n"
2314 	   "wA reg_class = %s\n"
2315 	   "\n",
2316 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2317 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2318 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2319 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2320 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2321 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2322 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2323 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]);
2324 
2325   nl = "\n";
2326   for (m = 0; m < NUM_MACHINE_MODES; ++m)
2327     rs6000_debug_print_mode (m);
2328 
2329   fputs ("\n", stderr);
2330 
2331   for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2332     {
2333       machine_mode mode1 = print_tieable_modes[m1];
2334       bool first_time = true;
2335 
2336       nl = (const char *)0;
2337       for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2338 	{
2339 	  machine_mode mode2 = print_tieable_modes[m2];
2340 	  if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2))
2341 	    {
2342 	      if (first_time)
2343 		{
2344 		  fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2345 		  nl = "\n";
2346 		  first_time = false;
2347 		}
2348 
2349 	      fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2350 	    }
2351 	}
2352 
2353       if (!first_time)
2354 	fputs ("\n", stderr);
2355     }
2356 
2357   if (nl)
2358     fputs (nl, stderr);
2359 
2360   if (rs6000_recip_control)
2361     {
2362       fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2363 
2364       for (m = 0; m < NUM_MACHINE_MODES; ++m)
2365 	if (rs6000_recip_bits[m])
2366 	  {
2367 	    fprintf (stderr,
2368 		     "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2369 		     GET_MODE_NAME (m),
2370 		     (RS6000_RECIP_AUTO_RE_P (m)
2371 		      ? "auto"
2372 		      : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2373 		     (RS6000_RECIP_AUTO_RSQRTE_P (m)
2374 		      ? "auto"
2375 		      : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2376 	  }
2377 
2378       fputs ("\n", stderr);
2379     }
2380 
2381   if (rs6000_cpu_index >= 0)
2382     {
2383       const char *name = processor_target_table[rs6000_cpu_index].name;
2384       HOST_WIDE_INT flags
2385 	= processor_target_table[rs6000_cpu_index].target_enable;
2386 
2387       sprintf (flags_buffer, "-mcpu=%s flags", name);
2388       rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2389     }
2390   else
2391     fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2392 
2393   if (rs6000_tune_index >= 0)
2394     {
2395       const char *name = processor_target_table[rs6000_tune_index].name;
2396       HOST_WIDE_INT flags
2397 	= processor_target_table[rs6000_tune_index].target_enable;
2398 
2399       sprintf (flags_buffer, "-mtune=%s flags", name);
2400       rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2401     }
2402   else
2403     fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2404 
2405   cl_target_option_save (&cl_opts, &global_options, &global_options_set);
2406   rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2407 			    rs6000_isa_flags);
2408 
2409   rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2410 			    rs6000_isa_flags_explicit);
2411 
2412   rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2413 				rs6000_builtin_mask);
2414 
2415   rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2416 
2417   fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2418 	   OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2419 
2420   switch (rs6000_sched_costly_dep)
2421     {
2422     case max_dep_latency:
2423       costly_str = "max_dep_latency";
2424       break;
2425 
2426     case no_dep_costly:
2427       costly_str = "no_dep_costly";
2428       break;
2429 
2430     case all_deps_costly:
2431       costly_str = "all_deps_costly";
2432       break;
2433 
2434     case true_store_to_load_dep_costly:
2435       costly_str = "true_store_to_load_dep_costly";
2436       break;
2437 
2438     case store_to_load_dep_costly:
2439       costly_str = "store_to_load_dep_costly";
2440       break;
2441 
2442     default:
2443       costly_str = costly_num;
2444       sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2445       break;
2446     }
2447 
2448   fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2449 
2450   switch (rs6000_sched_insert_nops)
2451     {
2452     case sched_finish_regroup_exact:
2453       nop_str = "sched_finish_regroup_exact";
2454       break;
2455 
2456     case sched_finish_pad_groups:
2457       nop_str = "sched_finish_pad_groups";
2458       break;
2459 
2460     case sched_finish_none:
2461       nop_str = "sched_finish_none";
2462       break;
2463 
2464     default:
2465       nop_str = nop_num;
2466       sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2467       break;
2468     }
2469 
2470   fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2471 
2472   switch (rs6000_sdata)
2473     {
2474     default:
2475     case SDATA_NONE:
2476       break;
2477 
2478     case SDATA_DATA:
2479       fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2480       break;
2481 
2482     case SDATA_SYSV:
2483       fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2484       break;
2485 
2486     case SDATA_EABI:
2487       fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2488       break;
2489 
2490     }
2491 
2492   switch (rs6000_traceback)
2493     {
2494     case traceback_default:	trace_str = "default";	break;
2495     case traceback_none:	trace_str = "none";	break;
2496     case traceback_part:	trace_str = "part";	break;
2497     case traceback_full:	trace_str = "full";	break;
2498     default:			trace_str = "unknown";	break;
2499     }
2500 
2501   fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2502 
2503   switch (rs6000_current_cmodel)
2504     {
2505     case CMODEL_SMALL:	cmodel_str = "small";	break;
2506     case CMODEL_MEDIUM:	cmodel_str = "medium";	break;
2507     case CMODEL_LARGE:	cmodel_str = "large";	break;
2508     default:		cmodel_str = "unknown";	break;
2509     }
2510 
2511   fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2512 
2513   switch (rs6000_current_abi)
2514     {
2515     case ABI_NONE:	abi_str = "none";	break;
2516     case ABI_AIX:	abi_str = "aix";	break;
2517     case ABI_ELFv2:	abi_str = "ELFv2";	break;
2518     case ABI_V4:	abi_str = "V4";		break;
2519     case ABI_DARWIN:	abi_str = "darwin";	break;
2520     default:		abi_str = "unknown";	break;
2521     }
2522 
2523   fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2524 
2525   if (rs6000_altivec_abi)
2526     fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2527 
2528   if (rs6000_aix_extabi)
2529     fprintf (stderr, DEBUG_FMT_S, "AIX vec-extabi", "true");
2530 
2531   if (rs6000_darwin64_abi)
2532     fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2533 
2534   fprintf (stderr, DEBUG_FMT_S, "soft_float",
2535 	   (TARGET_SOFT_FLOAT ? "true" : "false"));
2536 
2537   if (TARGET_LINK_STACK)
2538     fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2539 
2540   if (TARGET_P8_FUSION)
2541     {
2542       char options[80];
2543 
2544       strcpy (options, "power8");
2545       if (TARGET_P8_FUSION_SIGN)
2546 	strcat (options, ", sign");
2547 
2548       fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2549     }
2550 
2551   fprintf (stderr, DEBUG_FMT_S, "plt-format",
2552 	   TARGET_SECURE_PLT ? "secure" : "bss");
2553   fprintf (stderr, DEBUG_FMT_S, "struct-return",
2554 	   aix_struct_return ? "aix" : "sysv");
2555   fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2556   fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2557   fprintf (stderr, DEBUG_FMT_S, "align_branch",
2558 	   tf[!!rs6000_align_branch_targets]);
2559   fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2560   fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2561 	   rs6000_long_double_type_size);
2562   if (rs6000_long_double_type_size > 64)
2563     {
2564       fprintf (stderr, DEBUG_FMT_S, "long double type",
2565 	       TARGET_IEEEQUAD ? "IEEE" : "IBM");
2566       fprintf (stderr, DEBUG_FMT_S, "default long double type",
2567 	       TARGET_IEEEQUAD_DEFAULT ? "IEEE" : "IBM");
2568     }
2569   fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2570 	   (int)rs6000_sched_restricted_insns_priority);
2571   fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2572 	   (int)END_BUILTINS);
2573 
2574   fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2575 	   (int)TARGET_FLOAT128_ENABLE_TYPE);
2576 
2577   if (TARGET_VSX)
2578     fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2579 	     (int)VECTOR_ELEMENT_SCALAR_64BIT);
2580 
2581   if (TARGET_DIRECT_MOVE_128)
2582     fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2583 	     (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2584 }
2585 
2586 
2587 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2588    legitimate address support to figure out the appropriate addressing to
2589    use.  */
2590 
2591 static void
rs6000_setup_reg_addr_masks(void)2592 rs6000_setup_reg_addr_masks (void)
2593 {
2594   ssize_t rc, reg, m, nregs;
2595   addr_mask_type any_addr_mask, addr_mask;
2596 
2597   for (m = 0; m < NUM_MACHINE_MODES; ++m)
2598     {
2599       machine_mode m2 = (machine_mode) m;
2600       bool complex_p = false;
2601       bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2602       size_t msize;
2603 
2604       if (COMPLEX_MODE_P (m2))
2605 	{
2606 	  complex_p = true;
2607 	  m2 = GET_MODE_INNER (m2);
2608 	}
2609 
2610       msize = GET_MODE_SIZE (m2);
2611 
2612       /* SDmode is special in that we want to access it only via REG+REG
2613 	 addressing on power7 and above, since we want to use the LFIWZX and
2614 	 STFIWZX instructions to load it.  */
2615       bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2616 
2617       any_addr_mask = 0;
2618       for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2619 	{
2620 	  addr_mask = 0;
2621 	  reg = reload_reg_map[rc].reg;
2622 
2623 	  /* Can mode values go in the GPR/FPR/Altivec registers?  */
2624 	  if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2625 	    {
2626 	      bool small_int_vsx_p = (small_int_p
2627 				      && (rc == RELOAD_REG_FPR
2628 					  || rc == RELOAD_REG_VMX));
2629 
2630 	      nregs = rs6000_hard_regno_nregs[m][reg];
2631 	      addr_mask |= RELOAD_REG_VALID;
2632 
2633 	      /* Indicate if the mode takes more than 1 physical register.  If
2634 		 it takes a single register, indicate it can do REG+REG
2635 		 addressing.  Small integers in VSX registers can only do
2636 		 REG+REG addressing.  */
2637 	      if (small_int_vsx_p)
2638 		addr_mask |= RELOAD_REG_INDEXED;
2639 	      else if (nregs > 1 || m == BLKmode || complex_p)
2640 		addr_mask |= RELOAD_REG_MULTIPLE;
2641 	      else
2642 		addr_mask |= RELOAD_REG_INDEXED;
2643 
2644 	      /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2645 		 addressing.  If we allow scalars into Altivec registers,
2646 		 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2647 
2648 		 For VSX systems, we don't allow update addressing for
2649 		 DFmode/SFmode if those registers can go in both the
2650 		 traditional floating point registers and Altivec registers.
2651 		 The load/store instructions for the Altivec registers do not
2652 		 have update forms.  If we allowed update addressing, it seems
2653 		 to break IV-OPT code using floating point if the index type is
2654 		 int instead of long (PR target/81550 and target/84042).  */
2655 
2656 	      if (TARGET_UPDATE
2657 		  && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2658 		  && msize <= 8
2659 		  && !VECTOR_MODE_P (m2)
2660 		  && !VECTOR_ALIGNMENT_P (m2)
2661 		  && !complex_p
2662 		  && (m != E_DFmode || !TARGET_VSX)
2663 		  && (m != E_SFmode || !TARGET_P8_VECTOR)
2664 		  && !small_int_vsx_p)
2665 		{
2666 		  addr_mask |= RELOAD_REG_PRE_INCDEC;
2667 
2668 		  /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2669 		     we don't allow PRE_MODIFY for some multi-register
2670 		     operations.  */
2671 		  switch (m)
2672 		    {
2673 		    default:
2674 		      addr_mask |= RELOAD_REG_PRE_MODIFY;
2675 		      break;
2676 
2677 		    case E_DImode:
2678 		      if (TARGET_POWERPC64)
2679 			addr_mask |= RELOAD_REG_PRE_MODIFY;
2680 		      break;
2681 
2682 		    case E_DFmode:
2683 		    case E_DDmode:
2684 		      if (TARGET_HARD_FLOAT)
2685 			addr_mask |= RELOAD_REG_PRE_MODIFY;
2686 		      break;
2687 		    }
2688 		}
2689 	    }
2690 
2691 	  /* GPR and FPR registers can do REG+OFFSET addressing, except
2692 	     possibly for SDmode.  ISA 3.0 (i.e. power9) adds D-form addressing
2693 	     for 64-bit scalars and 32-bit SFmode to altivec registers.  */
2694 	  if ((addr_mask != 0) && !indexed_only_p
2695 	      && msize <= 8
2696 	      && (rc == RELOAD_REG_GPR
2697 		  || ((msize == 8 || m2 == SFmode)
2698 		      && (rc == RELOAD_REG_FPR
2699 			  || (rc == RELOAD_REG_VMX && TARGET_P9_VECTOR)))))
2700 	    addr_mask |= RELOAD_REG_OFFSET;
2701 
2702 	  /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2703 	     instructions are enabled.  The offset for 128-bit VSX registers is
2704 	     only 12-bits.  While GPRs can handle the full offset range, VSX
2705 	     registers can only handle the restricted range.  */
2706 	  else if ((addr_mask != 0) && !indexed_only_p
2707 		   && msize == 16 && TARGET_P9_VECTOR
2708 		   && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2709 		       || (m2 == TImode && TARGET_VSX)))
2710 	    {
2711 	      addr_mask |= RELOAD_REG_OFFSET;
2712 	      if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2713 		addr_mask |= RELOAD_REG_QUAD_OFFSET;
2714 	    }
2715 
2716 	  /* Vector pairs can do both indexed and offset loads if the
2717 	     instructions are enabled, otherwise they can only do offset loads
2718 	     since it will be broken into two vector moves.  Vector quads can
2719 	     only do offset loads.  */
2720 	  else if ((addr_mask != 0) && TARGET_MMA
2721 		   && (m2 == OOmode || m2 == XOmode))
2722 	    {
2723 	      addr_mask |= RELOAD_REG_OFFSET;
2724 	      if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2725 		{
2726 		  addr_mask |= RELOAD_REG_QUAD_OFFSET;
2727 		  if (m2 == OOmode)
2728 		    addr_mask |= RELOAD_REG_INDEXED;
2729 		}
2730 	    }
2731 
2732 	  /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2733 	     addressing on 128-bit types.  */
2734 	  if (rc == RELOAD_REG_VMX && msize == 16
2735 	      && (addr_mask & RELOAD_REG_VALID) != 0)
2736 	    addr_mask |= RELOAD_REG_AND_M16;
2737 
2738 	  reg_addr[m].addr_mask[rc] = addr_mask;
2739 	  any_addr_mask |= addr_mask;
2740 	}
2741 
2742       reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2743     }
2744 }
2745 
2746 
2747 /* Initialize the various global tables that are based on register size.  */
2748 static void
rs6000_init_hard_regno_mode_ok(bool global_init_p)2749 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2750 {
2751   ssize_t r, m, c;
2752   int align64;
2753   int align32;
2754 
2755   /* Precalculate REGNO_REG_CLASS.  */
2756   rs6000_regno_regclass[0] = GENERAL_REGS;
2757   for (r = 1; r < 32; ++r)
2758     rs6000_regno_regclass[r] = BASE_REGS;
2759 
2760   for (r = 32; r < 64; ++r)
2761     rs6000_regno_regclass[r] = FLOAT_REGS;
2762 
2763   for (r = 64; HARD_REGISTER_NUM_P (r); ++r)
2764     rs6000_regno_regclass[r] = NO_REGS;
2765 
2766   for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2767     rs6000_regno_regclass[r] = ALTIVEC_REGS;
2768 
2769   rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2770   for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2771     rs6000_regno_regclass[r] = CR_REGS;
2772 
2773   rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2774   rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2775   rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2776   rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2777   rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2778   rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2779   rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2780 
2781   /* Precalculate register class to simpler reload register class.  We don't
2782      need all of the register classes that are combinations of different
2783      classes, just the simple ones that have constraint letters.  */
2784   for (c = 0; c < N_REG_CLASSES; c++)
2785     reg_class_to_reg_type[c] = NO_REG_TYPE;
2786 
2787   reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2788   reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2789   reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2790   reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2791   reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2792   reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2793   reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2794   reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2795   reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2796   reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2797 
2798   if (TARGET_VSX)
2799     {
2800       reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2801       reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2802     }
2803   else
2804     {
2805       reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2806       reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2807     }
2808 
2809   /* Precalculate the valid memory formats as well as the vector information,
2810      this must be set up before the rs6000_hard_regno_nregs_internal calls
2811      below.  */
2812   gcc_assert ((int)VECTOR_NONE == 0);
2813   memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2814   memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_mem));
2815 
2816   gcc_assert ((int)CODE_FOR_nothing == 0);
2817   memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2818 
2819   gcc_assert ((int)NO_REGS == 0);
2820   memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2821 
2822   /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2823      believes it can use native alignment or still uses 128-bit alignment.  */
2824   if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2825     {
2826       align64 = 64;
2827       align32 = 32;
2828     }
2829   else
2830     {
2831       align64 = 128;
2832       align32 = 128;
2833     }
2834 
2835   /* KF mode (IEEE 128-bit in VSX registers).  We do not have arithmetic, so
2836      only set the memory modes.  Include TFmode if -mabi=ieeelongdouble.  */
2837   if (TARGET_FLOAT128_TYPE)
2838     {
2839       rs6000_vector_mem[KFmode] = VECTOR_VSX;
2840       rs6000_vector_align[KFmode] = 128;
2841 
2842       if (FLOAT128_IEEE_P (TFmode))
2843 	{
2844 	  rs6000_vector_mem[TFmode] = VECTOR_VSX;
2845 	  rs6000_vector_align[TFmode] = 128;
2846 	}
2847     }
2848 
2849   /* V2DF mode, VSX only.  */
2850   if (TARGET_VSX)
2851     {
2852       rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2853       rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2854       rs6000_vector_align[V2DFmode] = align64;
2855     }
2856 
2857   /* V4SF mode, either VSX or Altivec.  */
2858   if (TARGET_VSX)
2859     {
2860       rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2861       rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2862       rs6000_vector_align[V4SFmode] = align32;
2863     }
2864   else if (TARGET_ALTIVEC)
2865     {
2866       rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2867       rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2868       rs6000_vector_align[V4SFmode] = align32;
2869     }
2870 
2871   /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2872      and stores. */
2873   if (TARGET_ALTIVEC)
2874     {
2875       rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2876       rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2877       rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2878       rs6000_vector_align[V4SImode] = align32;
2879       rs6000_vector_align[V8HImode] = align32;
2880       rs6000_vector_align[V16QImode] = align32;
2881 
2882       if (TARGET_VSX)
2883 	{
2884 	  rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2885 	  rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2886 	  rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2887 	}
2888       else
2889 	{
2890 	  rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2891 	  rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2892 	  rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2893 	}
2894     }
2895 
2896   /* V2DImode, full mode depends on ISA 2.07 vector mode.  Allow under VSX to
2897      do insert/splat/extract.  Altivec doesn't have 64-bit integer support.  */
2898   if (TARGET_VSX)
2899     {
2900       rs6000_vector_mem[V2DImode] = VECTOR_VSX;
2901       rs6000_vector_unit[V2DImode]
2902 	= (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2903       rs6000_vector_align[V2DImode] = align64;
2904 
2905       rs6000_vector_mem[V1TImode] = VECTOR_VSX;
2906       rs6000_vector_unit[V1TImode]
2907 	= (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2908       rs6000_vector_align[V1TImode] = 128;
2909     }
2910 
2911   /* DFmode, see if we want to use the VSX unit.  Memory is handled
2912      differently, so don't set rs6000_vector_mem.  */
2913   if (TARGET_VSX)
2914     {
2915       rs6000_vector_unit[DFmode] = VECTOR_VSX;
2916       rs6000_vector_align[DFmode] = 64;
2917     }
2918 
2919   /* SFmode, see if we want to use the VSX unit.  */
2920   if (TARGET_P8_VECTOR)
2921     {
2922       rs6000_vector_unit[SFmode] = VECTOR_VSX;
2923       rs6000_vector_align[SFmode] = 32;
2924     }
2925 
2926   /* Allow TImode in VSX register and set the VSX memory macros.  */
2927   if (TARGET_VSX)
2928     {
2929       rs6000_vector_mem[TImode] = VECTOR_VSX;
2930       rs6000_vector_align[TImode] = align64;
2931     }
2932 
2933   /* Add support for vector pairs and vector quad registers.  */
2934   if (TARGET_MMA)
2935     {
2936       rs6000_vector_unit[OOmode] = VECTOR_NONE;
2937       rs6000_vector_mem[OOmode] = VECTOR_VSX;
2938       rs6000_vector_align[OOmode] = 256;
2939 
2940       rs6000_vector_unit[XOmode] = VECTOR_NONE;
2941       rs6000_vector_mem[XOmode] = VECTOR_VSX;
2942       rs6000_vector_align[XOmode] = 512;
2943     }
2944 
2945   /* Register class constraints for the constraints that depend on compile
2946      switches. When the VSX code was added, different constraints were added
2947      based on the type (DFmode, V2DFmode, V4SFmode).  For the vector types, all
2948      of the VSX registers are used.  The register classes for scalar floating
2949      point types is set, based on whether we allow that type into the upper
2950      (Altivec) registers.  GCC has register classes to target the Altivec
2951      registers for load/store operations, to select using a VSX memory
2952      operation instead of the traditional floating point operation.  The
2953      constraints are:
2954 
2955 	d  - Register class to use with traditional DFmode instructions.
2956 	f  - Register class to use with traditional SFmode instructions.
2957 	v  - Altivec register.
2958 	wa - Any VSX register.
2959 	wc - Reserved to represent individual CR bits (used in LLVM).
2960 	wn - always NO_REGS.
2961 	wr - GPR if 64-bit mode is permitted.
2962 	wx - Float register if we can do 32-bit int stores.  */
2963 
2964   if (TARGET_HARD_FLOAT)
2965     {
2966       rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS;	/* SFmode  */
2967       rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS;	/* DFmode  */
2968     }
2969 
2970   if (TARGET_VSX)
2971     rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
2972 
2973   /* Add conditional constraints based on various options, to allow us to
2974      collapse multiple insn patterns.  */
2975   if (TARGET_ALTIVEC)
2976     rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
2977 
2978   if (TARGET_POWERPC64)
2979     {
2980       rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
2981       rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
2982     }
2983 
2984   if (TARGET_STFIWX)
2985     rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS;	/* DImode  */
2986 
2987   /* Support for new direct moves (ISA 3.0 + 64bit).  */
2988   if (TARGET_DIRECT_MOVE_128)
2989     rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
2990 
2991   /* Set up the reload helper and direct move functions.  */
2992   if (TARGET_VSX || TARGET_ALTIVEC)
2993     {
2994       if (TARGET_64BIT)
2995 	{
2996 	  reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
2997 	  reg_addr[V16QImode].reload_load  = CODE_FOR_reload_v16qi_di_load;
2998 	  reg_addr[V8HImode].reload_store  = CODE_FOR_reload_v8hi_di_store;
2999 	  reg_addr[V8HImode].reload_load   = CODE_FOR_reload_v8hi_di_load;
3000 	  reg_addr[V4SImode].reload_store  = CODE_FOR_reload_v4si_di_store;
3001 	  reg_addr[V4SImode].reload_load   = CODE_FOR_reload_v4si_di_load;
3002 	  reg_addr[V2DImode].reload_store  = CODE_FOR_reload_v2di_di_store;
3003 	  reg_addr[V2DImode].reload_load   = CODE_FOR_reload_v2di_di_load;
3004 	  reg_addr[V1TImode].reload_store  = CODE_FOR_reload_v1ti_di_store;
3005 	  reg_addr[V1TImode].reload_load   = CODE_FOR_reload_v1ti_di_load;
3006 	  reg_addr[V4SFmode].reload_store  = CODE_FOR_reload_v4sf_di_store;
3007 	  reg_addr[V4SFmode].reload_load   = CODE_FOR_reload_v4sf_di_load;
3008 	  reg_addr[V2DFmode].reload_store  = CODE_FOR_reload_v2df_di_store;
3009 	  reg_addr[V2DFmode].reload_load   = CODE_FOR_reload_v2df_di_load;
3010 	  reg_addr[DFmode].reload_store    = CODE_FOR_reload_df_di_store;
3011 	  reg_addr[DFmode].reload_load     = CODE_FOR_reload_df_di_load;
3012 	  reg_addr[DDmode].reload_store    = CODE_FOR_reload_dd_di_store;
3013 	  reg_addr[DDmode].reload_load     = CODE_FOR_reload_dd_di_load;
3014 	  reg_addr[SFmode].reload_store    = CODE_FOR_reload_sf_di_store;
3015 	  reg_addr[SFmode].reload_load     = CODE_FOR_reload_sf_di_load;
3016 
3017 	  if (FLOAT128_VECTOR_P (KFmode))
3018 	    {
3019 	      reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3020 	      reg_addr[KFmode].reload_load  = CODE_FOR_reload_kf_di_load;
3021 	    }
3022 
3023 	  if (FLOAT128_VECTOR_P (TFmode))
3024 	    {
3025 	      reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3026 	      reg_addr[TFmode].reload_load  = CODE_FOR_reload_tf_di_load;
3027 	    }
3028 
3029 	  /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3030 	     available.  */
3031 	  if (TARGET_NO_SDMODE_STACK)
3032 	    {
3033 	      reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3034 	      reg_addr[SDmode].reload_load  = CODE_FOR_reload_sd_di_load;
3035 	    }
3036 
3037 	  if (TARGET_VSX)
3038 	    {
3039 	      reg_addr[TImode].reload_store  = CODE_FOR_reload_ti_di_store;
3040 	      reg_addr[TImode].reload_load   = CODE_FOR_reload_ti_di_load;
3041 	    }
3042 
3043 	  if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3044 	    {
3045 	      reg_addr[TImode].reload_gpr_vsx    = CODE_FOR_reload_gpr_from_vsxti;
3046 	      reg_addr[V1TImode].reload_gpr_vsx  = CODE_FOR_reload_gpr_from_vsxv1ti;
3047 	      reg_addr[V2DFmode].reload_gpr_vsx  = CODE_FOR_reload_gpr_from_vsxv2df;
3048 	      reg_addr[V2DImode].reload_gpr_vsx  = CODE_FOR_reload_gpr_from_vsxv2di;
3049 	      reg_addr[V4SFmode].reload_gpr_vsx  = CODE_FOR_reload_gpr_from_vsxv4sf;
3050 	      reg_addr[V4SImode].reload_gpr_vsx  = CODE_FOR_reload_gpr_from_vsxv4si;
3051 	      reg_addr[V8HImode].reload_gpr_vsx  = CODE_FOR_reload_gpr_from_vsxv8hi;
3052 	      reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3053 	      reg_addr[SFmode].reload_gpr_vsx    = CODE_FOR_reload_gpr_from_vsxsf;
3054 
3055 	      reg_addr[TImode].reload_vsx_gpr    = CODE_FOR_reload_vsx_from_gprti;
3056 	      reg_addr[V1TImode].reload_vsx_gpr  = CODE_FOR_reload_vsx_from_gprv1ti;
3057 	      reg_addr[V2DFmode].reload_vsx_gpr  = CODE_FOR_reload_vsx_from_gprv2df;
3058 	      reg_addr[V2DImode].reload_vsx_gpr  = CODE_FOR_reload_vsx_from_gprv2di;
3059 	      reg_addr[V4SFmode].reload_vsx_gpr  = CODE_FOR_reload_vsx_from_gprv4sf;
3060 	      reg_addr[V4SImode].reload_vsx_gpr  = CODE_FOR_reload_vsx_from_gprv4si;
3061 	      reg_addr[V8HImode].reload_vsx_gpr  = CODE_FOR_reload_vsx_from_gprv8hi;
3062 	      reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3063 	      reg_addr[SFmode].reload_vsx_gpr    = CODE_FOR_reload_vsx_from_gprsf;
3064 
3065 	      if (FLOAT128_VECTOR_P (KFmode))
3066 		{
3067 		  reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3068 		  reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3069 		}
3070 
3071 	      if (FLOAT128_VECTOR_P (TFmode))
3072 		{
3073 		  reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3074 		  reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3075 		}
3076 
3077 	      if (TARGET_MMA)
3078 		{
3079 		  reg_addr[OOmode].reload_store = CODE_FOR_reload_oo_di_store;
3080 		  reg_addr[OOmode].reload_load = CODE_FOR_reload_oo_di_load;
3081 		  reg_addr[XOmode].reload_store = CODE_FOR_reload_xo_di_store;
3082 		  reg_addr[XOmode].reload_load = CODE_FOR_reload_xo_di_load;
3083 		}
3084 	    }
3085 	}
3086       else
3087 	{
3088 	  reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3089 	  reg_addr[V16QImode].reload_load  = CODE_FOR_reload_v16qi_si_load;
3090 	  reg_addr[V8HImode].reload_store  = CODE_FOR_reload_v8hi_si_store;
3091 	  reg_addr[V8HImode].reload_load   = CODE_FOR_reload_v8hi_si_load;
3092 	  reg_addr[V4SImode].reload_store  = CODE_FOR_reload_v4si_si_store;
3093 	  reg_addr[V4SImode].reload_load   = CODE_FOR_reload_v4si_si_load;
3094 	  reg_addr[V2DImode].reload_store  = CODE_FOR_reload_v2di_si_store;
3095 	  reg_addr[V2DImode].reload_load   = CODE_FOR_reload_v2di_si_load;
3096 	  reg_addr[V1TImode].reload_store  = CODE_FOR_reload_v1ti_si_store;
3097 	  reg_addr[V1TImode].reload_load   = CODE_FOR_reload_v1ti_si_load;
3098 	  reg_addr[V4SFmode].reload_store  = CODE_FOR_reload_v4sf_si_store;
3099 	  reg_addr[V4SFmode].reload_load   = CODE_FOR_reload_v4sf_si_load;
3100 	  reg_addr[V2DFmode].reload_store  = CODE_FOR_reload_v2df_si_store;
3101 	  reg_addr[V2DFmode].reload_load   = CODE_FOR_reload_v2df_si_load;
3102 	  reg_addr[DFmode].reload_store    = CODE_FOR_reload_df_si_store;
3103 	  reg_addr[DFmode].reload_load     = CODE_FOR_reload_df_si_load;
3104 	  reg_addr[DDmode].reload_store    = CODE_FOR_reload_dd_si_store;
3105 	  reg_addr[DDmode].reload_load     = CODE_FOR_reload_dd_si_load;
3106 	  reg_addr[SFmode].reload_store    = CODE_FOR_reload_sf_si_store;
3107 	  reg_addr[SFmode].reload_load     = CODE_FOR_reload_sf_si_load;
3108 
3109 	  if (FLOAT128_VECTOR_P (KFmode))
3110 	    {
3111 	      reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3112 	      reg_addr[KFmode].reload_load  = CODE_FOR_reload_kf_si_load;
3113 	    }
3114 
3115 	  if (FLOAT128_IEEE_P (TFmode))
3116 	    {
3117 	      reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3118 	      reg_addr[TFmode].reload_load  = CODE_FOR_reload_tf_si_load;
3119 	    }
3120 
3121 	  /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3122 	     available.  */
3123 	  if (TARGET_NO_SDMODE_STACK)
3124 	    {
3125 	      reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3126 	      reg_addr[SDmode].reload_load  = CODE_FOR_reload_sd_si_load;
3127 	    }
3128 
3129 	  if (TARGET_VSX)
3130 	    {
3131 	      reg_addr[TImode].reload_store  = CODE_FOR_reload_ti_si_store;
3132 	      reg_addr[TImode].reload_load   = CODE_FOR_reload_ti_si_load;
3133 	    }
3134 
3135 	  if (TARGET_DIRECT_MOVE)
3136 	    {
3137 	      reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3138 	      reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3139 	      reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3140 	    }
3141 	}
3142 
3143       reg_addr[DFmode].scalar_in_vmx_p = true;
3144       reg_addr[DImode].scalar_in_vmx_p = true;
3145 
3146       if (TARGET_P8_VECTOR)
3147 	{
3148 	  reg_addr[SFmode].scalar_in_vmx_p = true;
3149 	  reg_addr[SImode].scalar_in_vmx_p = true;
3150 
3151 	  if (TARGET_P9_VECTOR)
3152 	    {
3153 	      reg_addr[HImode].scalar_in_vmx_p = true;
3154 	      reg_addr[QImode].scalar_in_vmx_p = true;
3155 	    }
3156 	}
3157     }
3158 
3159   /* Precalculate HARD_REGNO_NREGS.  */
3160   for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3161     for (m = 0; m < NUM_MACHINE_MODES; ++m)
3162       rs6000_hard_regno_nregs[m][r]
3163 	= rs6000_hard_regno_nregs_internal (r, (machine_mode) m);
3164 
3165   /* Precalculate TARGET_HARD_REGNO_MODE_OK.  */
3166   for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3167     for (m = 0; m < NUM_MACHINE_MODES; ++m)
3168       rs6000_hard_regno_mode_ok_p[m][r]
3169 	= rs6000_hard_regno_mode_ok_uncached (r, (machine_mode) m);
3170 
3171   /* Precalculate CLASS_MAX_NREGS sizes.  */
3172   for (c = 0; c < LIM_REG_CLASSES; ++c)
3173     {
3174       int reg_size;
3175 
3176       if (TARGET_VSX && VSX_REG_CLASS_P (c))
3177 	reg_size = UNITS_PER_VSX_WORD;
3178 
3179       else if (c == ALTIVEC_REGS)
3180 	reg_size = UNITS_PER_ALTIVEC_WORD;
3181 
3182       else if (c == FLOAT_REGS)
3183 	reg_size = UNITS_PER_FP_WORD;
3184 
3185       else
3186 	reg_size = UNITS_PER_WORD;
3187 
3188       for (m = 0; m < NUM_MACHINE_MODES; ++m)
3189 	{
3190 	  machine_mode m2 = (machine_mode)m;
3191 	  int reg_size2 = reg_size;
3192 
3193 	  /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3194 	     in VSX.  */
3195 	  if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3196 	    reg_size2 = UNITS_PER_FP_WORD;
3197 
3198 	  rs6000_class_max_nregs[m][c]
3199 	    = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3200 	}
3201     }
3202 
3203   /* Calculate which modes to automatically generate code to use a the
3204      reciprocal divide and square root instructions.  In the future, possibly
3205      automatically generate the instructions even if the user did not specify
3206      -mrecip.  The older machines double precision reciprocal sqrt estimate is
3207      not accurate enough.  */
3208   memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3209   if (TARGET_FRES)
3210     rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3211   if (TARGET_FRE)
3212     rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3213   if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3214     rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3215   if (VECTOR_UNIT_VSX_P (V2DFmode))
3216     rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3217 
3218   if (TARGET_FRSQRTES)
3219     rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3220   if (TARGET_FRSQRTE)
3221     rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3222   if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3223     rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3224   if (VECTOR_UNIT_VSX_P (V2DFmode))
3225     rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3226 
3227   if (rs6000_recip_control)
3228     {
3229       if (!flag_finite_math_only)
3230 	warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3231 		 "-ffast-math");
3232       if (flag_trapping_math)
3233 	warning (0, "%qs requires %qs or %qs", "-mrecip",
3234 		 "-fno-trapping-math", "-ffast-math");
3235       if (!flag_reciprocal_math)
3236 	warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3237 		 "-ffast-math");
3238       if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3239 	{
3240 	  if (RS6000_RECIP_HAVE_RE_P (SFmode)
3241 	      && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3242 	    rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3243 
3244 	  if (RS6000_RECIP_HAVE_RE_P (DFmode)
3245 	      && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3246 	    rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3247 
3248 	  if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3249 	      && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3250 	    rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3251 
3252 	  if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3253 	      && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3254 	    rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3255 
3256 	  if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3257 	      && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3258 	    rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3259 
3260 	  if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3261 	      && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3262 	    rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3263 
3264 	  if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3265 	      && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3266 	    rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3267 
3268 	  if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3269 	      && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3270 	    rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3271 	}
3272     }
3273 
3274   /* Update the addr mask bits in reg_addr to help secondary reload and go if
3275      legitimate address support to figure out the appropriate addressing to
3276      use.  */
3277   rs6000_setup_reg_addr_masks ();
3278 
3279   if (global_init_p || TARGET_DEBUG_TARGET)
3280     {
3281       if (TARGET_DEBUG_REG)
3282 	rs6000_debug_reg_global ();
3283 
3284       if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3285 	fprintf (stderr,
3286 		 "SImode variable mult cost       = %d\n"
3287 		 "SImode constant mult cost       = %d\n"
3288 		 "SImode short constant mult cost = %d\n"
3289 		 "DImode multipliciation cost     = %d\n"
3290 		 "SImode division cost            = %d\n"
3291 		 "DImode division cost            = %d\n"
3292 		 "Simple fp operation cost        = %d\n"
3293 		 "DFmode multiplication cost      = %d\n"
3294 		 "SFmode division cost            = %d\n"
3295 		 "DFmode division cost            = %d\n"
3296 		 "cache line size                 = %d\n"
3297 		 "l1 cache size                   = %d\n"
3298 		 "l2 cache size                   = %d\n"
3299 		 "simultaneous prefetches         = %d\n"
3300 		 "\n",
3301 		 rs6000_cost->mulsi,
3302 		 rs6000_cost->mulsi_const,
3303 		 rs6000_cost->mulsi_const9,
3304 		 rs6000_cost->muldi,
3305 		 rs6000_cost->divsi,
3306 		 rs6000_cost->divdi,
3307 		 rs6000_cost->fp,
3308 		 rs6000_cost->dmul,
3309 		 rs6000_cost->sdiv,
3310 		 rs6000_cost->ddiv,
3311 		 rs6000_cost->cache_line_size,
3312 		 rs6000_cost->l1_cache_size,
3313 		 rs6000_cost->l2_cache_size,
3314 		 rs6000_cost->simultaneous_prefetches);
3315     }
3316 }
3317 
3318 #if TARGET_MACHO
3319 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS.  */
3320 
3321 static void
darwin_rs6000_override_options(void)3322 darwin_rs6000_override_options (void)
3323 {
3324   /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3325      off.  */
3326   rs6000_altivec_abi = 1;
3327   TARGET_ALTIVEC_VRSAVE = 1;
3328   rs6000_current_abi = ABI_DARWIN;
3329 
3330   if (DEFAULT_ABI == ABI_DARWIN
3331       && TARGET_64BIT)
3332       darwin_one_byte_bool = 1;
3333 
3334   if (TARGET_64BIT && ! TARGET_POWERPC64)
3335     {
3336       rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3337       warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3338     }
3339 
3340   /* The linkers [ld64] that support 64Bit do not need the JBSR longcall
3341      optimisation, and will not work with the most generic case (where the
3342      symbol is undefined external, but there is no symbl stub).  */
3343   if (TARGET_64BIT)
3344     rs6000_default_long_calls = 0;
3345 
3346   /* ld_classic is (so far) still used for kernel (static) code, and supports
3347      the JBSR longcall / branch islands.  */
3348   if (flag_mkernel)
3349     {
3350       rs6000_default_long_calls = 1;
3351 
3352       /* Allow a kext author to do -mkernel -mhard-float.  */
3353       if (! (rs6000_isa_flags_explicit & OPTION_MASK_SOFT_FLOAT))
3354         rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3355     }
3356 
3357   /* Make -m64 imply -maltivec.  Darwin's 64-bit ABI includes
3358      Altivec.  */
3359   if (!flag_mkernel && !flag_apple_kext
3360       && TARGET_64BIT
3361       && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3362     rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3363 
3364   /* Unless the user (not the configurer) has explicitly overridden
3365      it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3366      G4 unless targeting the kernel.  */
3367   if (!flag_mkernel
3368       && !flag_apple_kext
3369       && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3370       && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3371       && ! OPTION_SET_P (rs6000_cpu_index))
3372     {
3373       rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3374     }
3375 }
3376 #endif
3377 
3378 /* If not otherwise specified by a target, make 'long double' equivalent to
3379    'double'.  */
3380 
3381 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3382 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3383 #endif
3384 
3385 /* Return the builtin mask of the various options used that could affect which
3386    builtins were used.  In the past we used target_flags, but we've run out of
3387    bits, and some options are no longer in target_flags.  */
3388 
3389 HOST_WIDE_INT
rs6000_builtin_mask_calculate(void)3390 rs6000_builtin_mask_calculate (void)
3391 {
3392   return (((TARGET_ALTIVEC)		    ? RS6000_BTM_ALTIVEC   : 0)
3393 	  | ((TARGET_CMPB)		    ? RS6000_BTM_CMPB	   : 0)
3394 	  | ((TARGET_VSX)		    ? RS6000_BTM_VSX	   : 0)
3395 	  | ((TARGET_FRE)		    ? RS6000_BTM_FRE	   : 0)
3396 	  | ((TARGET_FRES)		    ? RS6000_BTM_FRES	   : 0)
3397 	  | ((TARGET_FRSQRTE)		    ? RS6000_BTM_FRSQRTE   : 0)
3398 	  | ((TARGET_FRSQRTES)		    ? RS6000_BTM_FRSQRTES  : 0)
3399 	  | ((TARGET_POPCNTD)		    ? RS6000_BTM_POPCNTD   : 0)
3400 	  | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL      : 0)
3401 	  | ((TARGET_P8_VECTOR)		    ? RS6000_BTM_P8_VECTOR : 0)
3402 	  | ((TARGET_P9_VECTOR)		    ? RS6000_BTM_P9_VECTOR : 0)
3403 	  | ((TARGET_P9_MISC)		    ? RS6000_BTM_P9_MISC   : 0)
3404 	  | ((TARGET_MODULO)		    ? RS6000_BTM_MODULO    : 0)
3405 	  | ((TARGET_64BIT)		    ? RS6000_BTM_64BIT     : 0)
3406 	  | ((TARGET_POWERPC64)		    ? RS6000_BTM_POWERPC64 : 0)
3407 	  | ((TARGET_CRYPTO)		    ? RS6000_BTM_CRYPTO	   : 0)
3408 	  | ((TARGET_HTM)		    ? RS6000_BTM_HTM	   : 0)
3409 	  | ((TARGET_DFP)		    ? RS6000_BTM_DFP	   : 0)
3410 	  | ((TARGET_HARD_FLOAT)	    ? RS6000_BTM_HARD_FLOAT : 0)
3411 	  | ((TARGET_LONG_DOUBLE_128
3412 	      && TARGET_HARD_FLOAT
3413 	      && !TARGET_IEEEQUAD)	    ? RS6000_BTM_LDBL128   : 0)
3414 	  | ((TARGET_FLOAT128_TYPE)	    ? RS6000_BTM_FLOAT128  : 0)
3415 	  | ((TARGET_FLOAT128_HW)	    ? RS6000_BTM_FLOAT128_HW : 0)
3416 	  | ((TARGET_MMA)		    ? RS6000_BTM_MMA	   : 0)
3417 	  | ((TARGET_POWER10)               ? RS6000_BTM_P10       : 0));
3418 }
3419 
3420 /* Implement TARGET_MD_ASM_ADJUST.  All asm statements are considered
3421    to clobber the XER[CA] bit because clobbering that bit without telling
3422    the compiler worked just fine with versions of GCC before GCC 5, and
3423    breaking a lot of older code in ways that are hard to track down is
3424    not such a great idea.  */
3425 
3426 static rtx_insn *
rs6000_md_asm_adjust(vec<rtx> &,vec<rtx> &,vec<machine_mode> &,vec<const char * > &,vec<rtx> & clobbers,HARD_REG_SET & clobbered_regs,location_t)3427 rs6000_md_asm_adjust (vec<rtx> & /*outputs*/, vec<rtx> & /*inputs*/,
3428 		      vec<machine_mode> & /*input_modes*/,
3429 		      vec<const char *> & /*constraints*/, vec<rtx> &clobbers,
3430 		      HARD_REG_SET &clobbered_regs, location_t /*loc*/)
3431 {
3432   clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3433   SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3434   return NULL;
3435 }
3436 
3437 /* This target function is similar to the hook TARGET_OPTION_OVERRIDE
3438    but is called when the optimize level is changed via an attribute or
3439    pragma or when it is reset at the end of the code affected by the
3440    attribute or pragma.  It is not called at the beginning of compilation
3441    when TARGET_OPTION_OVERRIDE is called so if you want to perform these
3442    actions then, you should have TARGET_OPTION_OVERRIDE call
3443    TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE.  */
3444 
3445 static void
rs6000_override_options_after_change(void)3446 rs6000_override_options_after_change (void)
3447 {
3448   /* Explicit -funroll-loops turns -munroll-only-small-loops off, and
3449      turns -frename-registers on.  */
3450   if ((OPTION_SET_P (flag_unroll_loops) && flag_unroll_loops)
3451        || (OPTION_SET_P (flag_unroll_all_loops)
3452 	   && flag_unroll_all_loops))
3453     {
3454       if (!OPTION_SET_P (unroll_only_small_loops))
3455 	unroll_only_small_loops = 0;
3456       if (!OPTION_SET_P (flag_rename_registers))
3457 	flag_rename_registers = 1;
3458       if (!OPTION_SET_P (flag_cunroll_grow_size))
3459 	flag_cunroll_grow_size = 1;
3460     }
3461   else if (!OPTION_SET_P (flag_cunroll_grow_size))
3462     flag_cunroll_grow_size = flag_peel_loops || optimize >= 3;
3463 
3464   /* If we are inserting ROP-protect instructions, disable shrink wrap.  */
3465   if (rs6000_rop_protect)
3466     flag_shrink_wrap = 0;
3467 }
3468 
3469 #ifdef TARGET_USES_LINUX64_OPT
3470 static void
rs6000_linux64_override_options()3471 rs6000_linux64_override_options ()
3472 {
3473   if (!OPTION_SET_P (rs6000_alignment_flags))
3474     rs6000_alignment_flags = MASK_ALIGN_NATURAL;
3475   if (rs6000_isa_flags & OPTION_MASK_64BIT)
3476     {
3477       if (DEFAULT_ABI != ABI_AIX)
3478 	{
3479 	  rs6000_current_abi = ABI_AIX;
3480 	  error (INVALID_64BIT, "call");
3481 	}
3482       dot_symbols = !strcmp (rs6000_abi_name, "aixdesc");
3483       if (ELFv2_ABI_CHECK)
3484 	{
3485 	  rs6000_current_abi = ABI_ELFv2;
3486 	  if (dot_symbols)
3487 	    error ("%<-mcall-aixdesc%> incompatible with %<-mabi=elfv2%>");
3488 	}
3489       if (rs6000_isa_flags & OPTION_MASK_RELOCATABLE)
3490 	{
3491 	  rs6000_isa_flags &= ~OPTION_MASK_RELOCATABLE;
3492 	  error (INVALID_64BIT, "relocatable");
3493 	}
3494       if (rs6000_isa_flags & OPTION_MASK_EABI)
3495 	{
3496 	  rs6000_isa_flags &= ~OPTION_MASK_EABI;
3497 	  error (INVALID_64BIT, "eabi");
3498 	}
3499       if (TARGET_PROTOTYPE)
3500 	{
3501 	  target_prototype = 0;
3502 	  error (INVALID_64BIT, "prototype");
3503 	}
3504       if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) == 0)
3505 	{
3506 	  rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3507 	  error ("%<-m64%> requires a PowerPC64 cpu");
3508 	}
3509       if (!OPTION_SET_P (rs6000_current_cmodel))
3510 	SET_CMODEL (CMODEL_MEDIUM);
3511       if ((rs6000_isa_flags_explicit & OPTION_MASK_MINIMAL_TOC) != 0)
3512 	{
3513 	  if (OPTION_SET_P (rs6000_current_cmodel)
3514 	      && rs6000_current_cmodel != CMODEL_SMALL)
3515 	    error ("%<-mcmodel%> incompatible with other toc options");
3516 	  if (TARGET_MINIMAL_TOC)
3517 	    SET_CMODEL (CMODEL_SMALL);
3518 	  else if (TARGET_PCREL
3519 		   || (PCREL_SUPPORTED_BY_OS
3520 		       && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0))
3521 	    /* Ignore -mno-minimal-toc.  */
3522 	    ;
3523 	  else
3524 	    SET_CMODEL (CMODEL_SMALL);
3525 	}
3526       if (rs6000_current_cmodel != CMODEL_SMALL)
3527 	{
3528 	  if (!OPTION_SET_P (TARGET_NO_FP_IN_TOC))
3529 	    TARGET_NO_FP_IN_TOC = rs6000_current_cmodel == CMODEL_MEDIUM;
3530 	  if (!OPTION_SET_P (TARGET_NO_SUM_IN_TOC))
3531 	    TARGET_NO_SUM_IN_TOC = 0;
3532 	}
3533       if (TARGET_PLTSEQ && DEFAULT_ABI != ABI_ELFv2)
3534 	{
3535 	  if (OPTION_SET_P (rs6000_pltseq))
3536 	    warning (0, "%qs unsupported for this ABI",
3537 		     "-mpltseq");
3538 	  rs6000_pltseq = false;
3539 	}
3540     }
3541   else if (TARGET_64BIT)
3542     error (INVALID_32BIT, "32");
3543   else
3544     {
3545       if (TARGET_PROFILE_KERNEL)
3546 	{
3547 	  profile_kernel = 0;
3548 	  error (INVALID_32BIT, "profile-kernel");
3549 	}
3550       if (OPTION_SET_P (rs6000_current_cmodel))
3551 	{
3552 	  SET_CMODEL (CMODEL_SMALL);
3553 	  error (INVALID_32BIT, "cmodel");
3554 	}
3555     }
3556 }
3557 #endif
3558 
3559 /* Return true if we are using GLIBC, and it supports IEEE 128-bit long double.
3560    This support is only in little endian GLIBC 2.32 or newer.  */
3561 static bool
glibc_supports_ieee_128bit(void)3562 glibc_supports_ieee_128bit (void)
3563 {
3564 #ifdef OPTION_GLIBC
3565   if (OPTION_GLIBC && !BYTES_BIG_ENDIAN
3566       && ((TARGET_GLIBC_MAJOR * 1000) + TARGET_GLIBC_MINOR) >= 2032)
3567     return true;
3568 #endif /* OPTION_GLIBC.  */
3569 
3570   return false;
3571 }
3572 
3573 /* Override command line options.
3574 
3575    Combine build-specific configuration information with options
3576    specified on the command line to set various state variables which
3577    influence code generation, optimization, and expansion of built-in
3578    functions.  Assure that command-line configuration preferences are
3579    compatible with each other and with the build configuration; issue
3580    warnings while adjusting configuration or error messages while
3581    rejecting configuration.
3582 
3583    Upon entry to this function:
3584 
3585      This function is called once at the beginning of
3586      compilation, and then again at the start and end of compiling
3587      each section of code that has a different configuration, as
3588      indicated, for example, by adding the
3589 
3590        __attribute__((__target__("cpu=power9")))
3591 
3592      qualifier to a function definition or, for example, by bracketing
3593      code between
3594 
3595        #pragma GCC target("altivec")
3596 
3597      and
3598 
3599        #pragma GCC reset_options
3600 
3601      directives.  Parameter global_init_p is true for the initial
3602      invocation, which initializes global variables, and false for all
3603      subsequent invocations.
3604 
3605 
3606      Various global state information is assumed to be valid.  This
3607      includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3608      default CPU specified at build configure time, TARGET_DEFAULT,
3609      representing the default set of option flags for the default
3610      target, and OPTION_SET_P (rs6000_isa_flags), representing
3611      which options were requested on the command line.
3612 
3613    Upon return from this function:
3614 
3615      rs6000_isa_flags_explicit has a non-zero bit for each flag that
3616      was set by name on the command line.  Additionally, if certain
3617      attributes are automatically enabled or disabled by this function
3618      in order to assure compatibility between options and
3619      configuration, the flags associated with those attributes are
3620      also set.  By setting these "explicit bits", we avoid the risk
3621      that other code might accidentally overwrite these particular
3622      attributes with "default values".
3623 
3624      The various bits of rs6000_isa_flags are set to indicate the
3625      target options that have been selected for the most current
3626      compilation efforts.  This has the effect of also turning on the
3627      associated TARGET_XXX values since these are macros which are
3628      generally defined to test the corresponding bit of the
3629      rs6000_isa_flags variable.
3630 
3631      The variable rs6000_builtin_mask is set to represent the target
3632      options for the most current compilation efforts, consistent with
3633      the current contents of rs6000_isa_flags.  This variable controls
3634      expansion of built-in functions.
3635 
3636      Various other global variables and fields of global structures
3637      (over 50 in all) are initialized to reflect the desired options
3638      for the most current compilation efforts.  */
3639 
3640 static bool
rs6000_option_override_internal(bool global_init_p)3641 rs6000_option_override_internal (bool global_init_p)
3642 {
3643   bool ret = true;
3644 
3645   HOST_WIDE_INT set_masks;
3646   HOST_WIDE_INT ignore_masks;
3647   int cpu_index = -1;
3648   int tune_index;
3649   struct cl_target_option *main_target_opt
3650     = ((global_init_p || target_option_default_node == NULL)
3651        ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3652 
3653   /* Print defaults.  */
3654   if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3655     rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3656 
3657   /* Remember the explicit arguments.  */
3658   if (global_init_p)
3659     rs6000_isa_flags_explicit = OPTION_SET_P (rs6000_isa_flags);
3660 
3661   /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3662      library functions, so warn about it. The flag may be useful for
3663      performance studies from time to time though, so don't disable it
3664      entirely.  */
3665   if (OPTION_SET_P (rs6000_alignment_flags)
3666       && rs6000_alignment_flags == MASK_ALIGN_POWER
3667       && DEFAULT_ABI == ABI_DARWIN
3668       && TARGET_64BIT)
3669     warning (0, "%qs is not supported for 64-bit Darwin;"
3670 	     " it is incompatible with the installed C and C++ libraries",
3671 	     "-malign-power");
3672 
3673   /* Numerous experiment shows that IRA based loop pressure
3674      calculation works better for RTL loop invariant motion on targets
3675      with enough (>= 32) registers.  It is an expensive optimization.
3676      So it is on only for peak performance.  */
3677   if (optimize >= 3 && global_init_p
3678       && !OPTION_SET_P (flag_ira_loop_pressure))
3679     flag_ira_loop_pressure = 1;
3680 
3681   /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3682      for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3683      options were already specified.  */
3684   if (flag_sanitize & SANITIZE_USER_ADDRESS
3685       && !OPTION_SET_P (flag_asynchronous_unwind_tables))
3686     flag_asynchronous_unwind_tables = 1;
3687 
3688   /* -fvariable-expansion-in-unroller is a win for POWER whenever the
3689      loop unroller is active.  It is only checked during unrolling, so
3690      we can just set it on by default.  */
3691   if (!OPTION_SET_P (flag_variable_expansion_in_unroller))
3692     flag_variable_expansion_in_unroller = 1;
3693 
3694   /* Set the pointer size.  */
3695   if (TARGET_64BIT)
3696     {
3697       rs6000_pmode = DImode;
3698       rs6000_pointer_size = 64;
3699     }
3700   else
3701     {
3702       rs6000_pmode = SImode;
3703       rs6000_pointer_size = 32;
3704     }
3705 
3706   /* Some OSs don't support saving the high part of 64-bit registers on context
3707      switch.  Other OSs don't support saving Altivec registers.  On those OSs,
3708      we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3709      if the user wants either, the user must explicitly specify them and we
3710      won't interfere with the user's specification.  */
3711 
3712   set_masks = POWERPC_MASKS;
3713 #ifdef OS_MISSING_POWERPC64
3714   if (OS_MISSING_POWERPC64)
3715     set_masks &= ~OPTION_MASK_POWERPC64;
3716 #endif
3717 #ifdef OS_MISSING_ALTIVEC
3718   if (OS_MISSING_ALTIVEC)
3719     set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX
3720 		   | OTHER_VSX_VECTOR_MASKS);
3721 #endif
3722 
3723   /* Don't override by the processor default if given explicitly.  */
3724   set_masks &= ~rs6000_isa_flags_explicit;
3725 
3726   /* Process the -mcpu=<xxx> and -mtune=<xxx> argument.  If the user changed
3727      the cpu in a target attribute or pragma, but did not specify a tuning
3728      option, use the cpu for the tuning option rather than the option specified
3729      with -mtune on the command line.  Process a '--with-cpu' configuration
3730      request as an implicit --cpu.  */
3731   if (rs6000_cpu_index >= 0)
3732     cpu_index = rs6000_cpu_index;
3733   else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3734     cpu_index = main_target_opt->x_rs6000_cpu_index;
3735   else if (OPTION_TARGET_CPU_DEFAULT)
3736     cpu_index = rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT);
3737 
3738   /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3739      compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3740      with those from the cpu, except for options that were explicitly set.  If
3741      we don't have a cpu, do not override the target bits set in
3742      TARGET_DEFAULT.  */
3743   if (cpu_index >= 0)
3744     {
3745       rs6000_cpu_index = cpu_index;
3746       rs6000_isa_flags &= ~set_masks;
3747       rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3748 			   & set_masks);
3749     }
3750   else
3751     {
3752       /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3753 	 POWERPC_MASKS.  Originally, TARGET_DEFAULT was used to initialize
3754 	 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook.  When we switched
3755 	 to using rs6000_isa_flags, we need to do the initialization here.
3756 
3757 	 If there is a TARGET_DEFAULT, use that.  Otherwise fall back to using
3758 	 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults.  */
3759       HOST_WIDE_INT flags;
3760       if (TARGET_DEFAULT)
3761 	flags = TARGET_DEFAULT;
3762       else
3763 	{
3764 	  /* PowerPC 64-bit LE requires at least ISA 2.07.  */
3765 	  const char *default_cpu = (!TARGET_POWERPC64
3766 				     ? "powerpc"
3767 				     : (BYTES_BIG_ENDIAN
3768 					? "powerpc64"
3769 					: "powerpc64le"));
3770 	  int default_cpu_index = rs6000_cpu_name_lookup (default_cpu);
3771 	  flags = processor_target_table[default_cpu_index].target_enable;
3772 	}
3773       rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3774     }
3775 
3776   if (rs6000_tune_index >= 0)
3777     tune_index = rs6000_tune_index;
3778   else if (cpu_index >= 0)
3779     rs6000_tune_index = tune_index = cpu_index;
3780   else
3781     {
3782       size_t i;
3783       enum processor_type tune_proc
3784 	= (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3785 
3786       tune_index = -1;
3787       for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3788 	if (processor_target_table[i].processor == tune_proc)
3789 	  {
3790 	    tune_index = i;
3791 	    break;
3792 	  }
3793     }
3794 
3795   if (cpu_index >= 0)
3796     rs6000_cpu = processor_target_table[cpu_index].processor;
3797   else
3798     rs6000_cpu = TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT;
3799 
3800   gcc_assert (tune_index >= 0);
3801   rs6000_tune = processor_target_table[tune_index].processor;
3802 
3803   if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3804       || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3805       || rs6000_cpu == PROCESSOR_PPCE5500)
3806     {
3807       if (TARGET_ALTIVEC)
3808 	error ("AltiVec not supported in this target");
3809     }
3810 
3811   /* If we are optimizing big endian systems for space, use the load/store
3812      multiple instructions.  */
3813   if (BYTES_BIG_ENDIAN && optimize_size)
3814     rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE;
3815 
3816   /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
3817      because the hardware doesn't support the instructions used in little
3818      endian mode, and causes an alignment trap.  The 750 does not cause an
3819      alignment trap (except when the target is unaligned).  */
3820 
3821   if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750 && TARGET_MULTIPLE)
3822     {
3823       rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3824       if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3825 	warning (0, "%qs is not supported on little endian systems",
3826 		 "-mmultiple");
3827     }
3828 
3829   /* If little-endian, default to -mstrict-align on older processors.
3830      Testing for direct_move matches power8 and later.  */
3831   if (!BYTES_BIG_ENDIAN
3832       && !(processor_target_table[tune_index].target_enable
3833 	   & OPTION_MASK_DIRECT_MOVE))
3834     rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3835 
3836   /* Add some warnings for VSX.  */
3837   if (TARGET_VSX)
3838     {
3839       const char *msg = NULL;
3840       if (!TARGET_HARD_FLOAT)
3841 	{
3842 	  if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3843 	    msg = N_("%<-mvsx%> requires hardware floating point");
3844 	  else
3845 	    {
3846 	      rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3847 	      rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3848 	    }
3849 	}
3850       else if (TARGET_AVOID_XFORM > 0)
3851 	msg = N_("%<-mvsx%> needs indexed addressing");
3852       else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3853 				   & OPTION_MASK_ALTIVEC))
3854         {
3855 	  if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3856 	    msg = N_("%<-mvsx%> and %<-mno-altivec%> are incompatible");
3857 	  else
3858 	    msg = N_("%<-mno-altivec%> disables vsx");
3859         }
3860 
3861       if (msg)
3862 	{
3863 	  warning (0, msg);
3864 	  rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3865 	  rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3866 	}
3867     }
3868 
3869   /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3870      the -mcpu setting to enable options that conflict. */
3871   if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3872       && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3873 				       | OPTION_MASK_ALTIVEC
3874 				       | OPTION_MASK_VSX)) != 0)
3875     rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3876 			   | OPTION_MASK_DIRECT_MOVE)
3877 		         & ~rs6000_isa_flags_explicit);
3878 
3879   if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3880     rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3881 
3882 #ifdef XCOFF_DEBUGGING_INFO
3883   /* For AIX default to 64-bit DWARF.  */
3884   if (!OPTION_SET_P (dwarf_offset_size))
3885     dwarf_offset_size = POINTER_SIZE_UNITS;
3886 #endif
3887 
3888   /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
3889      off all of the options that depend on those flags.  */
3890   ignore_masks = rs6000_disable_incompatible_switches ();
3891 
3892   /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3893      unless the user explicitly used the -mno-<option> to disable the code.  */
3894   if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_MISC)
3895     rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3896   else if (TARGET_P9_MINMAX)
3897     {
3898       if (cpu_index >= 0)
3899 	{
3900 	  if (cpu_index == PROCESSOR_POWER9)
3901 	    {
3902 	      /* legacy behavior: allow -mcpu=power9 with certain
3903 		 capabilities explicitly disabled.  */
3904 	      rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3905 	    }
3906 	  else
3907 	    error ("power9 target option is incompatible with %<%s=<xxx>%> "
3908 		   "for <xxx> less than power9", "-mcpu");
3909 	}
3910       else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
3911 	       != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
3912 		   & rs6000_isa_flags_explicit))
3913 	/* Enforce that none of the ISA_3_0_MASKS_SERVER flags
3914 	   were explicitly cleared.  */
3915 	error ("%qs incompatible with explicitly disabled options",
3916 	       "-mpower9-minmax");
3917       else
3918 	rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
3919     }
3920   else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
3921     rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
3922   else if (TARGET_VSX)
3923     rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
3924   else if (TARGET_POPCNTD)
3925     rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
3926   else if (TARGET_DFP)
3927     rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
3928   else if (TARGET_CMPB)
3929     rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
3930   else if (TARGET_FPRND)
3931     rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
3932   else if (TARGET_POPCNTB)
3933     rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
3934   else if (TARGET_ALTIVEC)
3935     rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
3936 
3937   /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
3938      target attribute or pragma which automatically enables both options,
3939      unless the altivec ABI was set.  This is set by default for 64-bit, but
3940      not for 32-bit.  Don't move this before the above code using ignore_masks,
3941      since it can reset the cleared VSX/ALTIVEC flag again.  */
3942   if (main_target_opt && !main_target_opt->x_rs6000_altivec_abi)
3943     rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC)
3944 			  & ~rs6000_isa_flags_explicit);
3945 
3946   if (TARGET_CRYPTO && !TARGET_ALTIVEC)
3947     {
3948       if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
3949 	error ("%qs requires %qs", "-mcrypto", "-maltivec");
3950       rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
3951     }
3952 
3953   if (!TARGET_FPRND && TARGET_VSX)
3954     {
3955       if (rs6000_isa_flags_explicit & OPTION_MASK_FPRND)
3956 	/* TARGET_VSX = 1 implies Power 7 and newer */
3957 	error ("%qs requires %qs", "-mvsx", "-mfprnd");
3958       rs6000_isa_flags &= ~OPTION_MASK_FPRND;
3959     }
3960 
3961   if (TARGET_DIRECT_MOVE && !TARGET_VSX)
3962     {
3963       if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
3964 	error ("%qs requires %qs", "-mdirect-move", "-mvsx");
3965       rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
3966     }
3967 
3968   if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
3969     {
3970       if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3971 	error ("%qs requires %qs", "-mpower8-vector", "-maltivec");
3972       rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3973     }
3974 
3975   if (TARGET_P8_VECTOR && !TARGET_VSX)
3976     {
3977       if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3978 	  && (rs6000_isa_flags_explicit & OPTION_MASK_VSX))
3979 	error ("%qs requires %qs", "-mpower8-vector", "-mvsx");
3980       else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0)
3981 	{
3982 	  rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3983 	  if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3984 	    rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
3985 	}
3986       else
3987 	{
3988 	  /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
3989 	     not explicit.  */
3990 	  rs6000_isa_flags |= OPTION_MASK_VSX;
3991 	  rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3992 	}
3993     }
3994 
3995   if (TARGET_DFP && !TARGET_HARD_FLOAT)
3996     {
3997       if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
3998 	error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
3999       rs6000_isa_flags &= ~OPTION_MASK_DFP;
4000     }
4001 
4002   /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
4003      silently turn off quad memory mode.  */
4004   if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
4005     {
4006       if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4007 	warning (0, N_("%<-mquad-memory%> requires 64-bit mode"));
4008 
4009       if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
4010 	warning (0, N_("%<-mquad-memory-atomic%> requires 64-bit mode"));
4011 
4012       rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
4013 			    | OPTION_MASK_QUAD_MEMORY_ATOMIC);
4014     }
4015 
4016   /* Non-atomic quad memory load/store are disabled for little endian, since
4017      the words are reversed, but atomic operations can still be done by
4018      swapping the words.  */
4019   if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
4020     {
4021       if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4022 	warning (0, N_("%<-mquad-memory%> is not available in little endian "
4023 		       "mode"));
4024 
4025       rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
4026     }
4027 
4028   /* Assume if the user asked for normal quad memory instructions, they want
4029      the atomic versions as well, unless they explicity told us not to use quad
4030      word atomic instructions.  */
4031   if (TARGET_QUAD_MEMORY
4032       && !TARGET_QUAD_MEMORY_ATOMIC
4033       && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
4034     rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
4035 
4036   /* If we can shrink-wrap the TOC register save separately, then use
4037      -msave-toc-indirect unless explicitly disabled.  */
4038   if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
4039       && flag_shrink_wrap_separate
4040       && optimize_function_for_speed_p (cfun))
4041     rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;
4042 
4043   /* Enable power8 fusion if we are tuning for power8, even if we aren't
4044      generating power8 instructions.  Power9 does not optimize power8 fusion
4045      cases.  */
4046   if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
4047     {
4048       if (processor_target_table[tune_index].processor == PROCESSOR_POWER8)
4049 	rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4050       else
4051 	rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4052     }
4053 
4054   /* Setting additional fusion flags turns on base fusion.  */
4055   if (!TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN)
4056     {
4057       if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4058 	{
4059 	  if (TARGET_P8_FUSION_SIGN)
4060 	    error ("%qs requires %qs", "-mpower8-fusion-sign",
4061 		   "-mpower8-fusion");
4062 
4063 	  rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4064 	}
4065       else
4066 	rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4067     }
4068 
4069   /* Power8 does not fuse sign extended loads with the addis.  If we are
4070      optimizing at high levels for speed, convert a sign extended load into a
4071      zero extending load, and an explicit sign extension.  */
4072   if (TARGET_P8_FUSION
4073       && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
4074       && optimize_function_for_speed_p (cfun)
4075       && optimize >= 3)
4076     rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
4077 
4078   /* ISA 3.0 vector instructions include ISA 2.07.  */
4079   if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
4080     {
4081       /* We prefer to not mention undocumented options in
4082 	 error messages.  However, if users have managed to select
4083 	 power9-vector without selecting power8-vector, they
4084 	 already know about undocumented flags.  */
4085       if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) &&
4086 	  (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR))
4087 	error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector");
4088       else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0)
4089 	{
4090 	  rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
4091 	  if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4092 	    rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
4093 	}
4094       else
4095 	{
4096 	  /* OPTION_MASK_P9_VECTOR is explicit and
4097 	     OPTION_MASK_P8_VECTOR is not explicit.  */
4098 	  rs6000_isa_flags |= OPTION_MASK_P8_VECTOR;
4099 	  rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4100 	}
4101     }
4102 
4103   /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4104      support. If we only have ISA 2.06 support, and the user did not specify
4105      the switch, leave it set to -1 so the movmisalign patterns are enabled,
4106      but we don't enable the full vectorization support  */
4107   if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4108     TARGET_ALLOW_MOVMISALIGN = 1;
4109 
4110   else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4111     {
4112       if (TARGET_ALLOW_MOVMISALIGN > 0
4113 	  && OPTION_SET_P (TARGET_ALLOW_MOVMISALIGN))
4114 	error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
4115 
4116       TARGET_ALLOW_MOVMISALIGN = 0;
4117     }
4118 
4119   /* Determine when unaligned vector accesses are permitted, and when
4120      they are preferred over masked Altivec loads.  Note that if
4121      TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4122      TARGET_EFFICIENT_UNALIGNED_VSX must be as well.  The converse is
4123      not true.  */
4124   if (TARGET_EFFICIENT_UNALIGNED_VSX)
4125     {
4126       if (!TARGET_VSX)
4127 	{
4128 	  if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4129 	    error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
4130 
4131 	  rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4132 	}
4133 
4134       else if (!TARGET_ALLOW_MOVMISALIGN)
4135 	{
4136 	  if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4137 	    error ("%qs requires %qs", "-munefficient-unaligned-vsx",
4138 		   "-mallow-movmisalign");
4139 
4140 	  rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4141 	}
4142     }
4143 
4144   if (!(rs6000_isa_flags_explicit & OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX))
4145     {
4146       if (TARGET_EFFICIENT_UNALIGNED_VSX)
4147 	rs6000_isa_flags |= OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX;
4148       else
4149 	rs6000_isa_flags &= ~OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX;
4150     }
4151 
4152   /* Use long double size to select the appropriate long double.  We use
4153      TYPE_PRECISION to differentiate the 3 different long double types.  We map
4154      128 into the precision used for TFmode.  */
4155   int default_long_double_size = (RS6000_DEFAULT_LONG_DOUBLE_SIZE == 64
4156 				  ? 64
4157 				  : FLOAT_PRECISION_TFmode);
4158 
4159   /* Set long double size before the IEEE 128-bit tests.  */
4160   if (!OPTION_SET_P (rs6000_long_double_type_size))
4161     {
4162       if (main_target_opt != NULL
4163 	  && (main_target_opt->x_rs6000_long_double_type_size
4164 	      != default_long_double_size))
4165 	error ("target attribute or pragma changes %<long double%> size");
4166       else
4167 	rs6000_long_double_type_size = default_long_double_size;
4168     }
4169   else if (rs6000_long_double_type_size == FLOAT_PRECISION_TFmode)
4170     ; /* The option value can be seen when cl_target_option_restore is called.  */
4171   else if (rs6000_long_double_type_size == 128)
4172     rs6000_long_double_type_size = FLOAT_PRECISION_TFmode;
4173 
4174   /* Set -mabi=ieeelongdouble on some old targets.  In the future, power server
4175      systems will also set long double to be IEEE 128-bit.  AIX and Darwin
4176      explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
4177      those systems will not pick up this default.  Warn if the user changes the
4178      default unless -Wno-psabi.  */
4179   if (!OPTION_SET_P (rs6000_ieeequad))
4180     rs6000_ieeequad = TARGET_IEEEQUAD_DEFAULT;
4181 
4182   else if (TARGET_LONG_DOUBLE_128)
4183     {
4184       if (global_options.x_rs6000_ieeequad
4185 	  && (!TARGET_POPCNTD || !TARGET_VSX))
4186 	error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble");
4187 
4188       if (rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT)
4189 	{
4190 	  /* Determine if the user can change the default long double type at
4191 	     compilation time.  You need GLIBC 2.32 or newer to be able to
4192 	     change the long double type.  Only issue one warning.  */
4193 	  static bool warned_change_long_double;
4194 
4195 	  if (!warned_change_long_double && !glibc_supports_ieee_128bit ())
4196 	    {
4197 	      warned_change_long_double = true;
4198 	      if (TARGET_IEEEQUAD)
4199 		warning (OPT_Wpsabi, "Using IEEE extended precision "
4200 			 "%<long double%>");
4201 	      else
4202 		warning (OPT_Wpsabi, "Using IBM extended precision "
4203 			 "%<long double%>");
4204 	    }
4205 	}
4206     }
4207 
4208   /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4209      sytems.  In GCC 7, we would enable the IEEE 128-bit floating point
4210      infrastructure (-mfloat128-type) but not enable the actual __float128 type
4211      unless the user used the explicit -mfloat128.  In GCC 8, we enable both
4212      the keyword as well as the type.  */
4213   TARGET_FLOAT128_TYPE = TARGET_FLOAT128_ENABLE_TYPE && TARGET_VSX;
4214 
4215   /* IEEE 128-bit floating point requires VSX support.  */
4216   if (TARGET_FLOAT128_KEYWORD)
4217     {
4218       if (!TARGET_VSX)
4219 	{
4220 	  if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4221 	    error ("%qs requires VSX support", "-mfloat128");
4222 
4223 	  TARGET_FLOAT128_TYPE = 0;
4224 	  rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_KEYWORD
4225 				| OPTION_MASK_FLOAT128_HW);
4226 	}
4227       else if (!TARGET_FLOAT128_TYPE)
4228 	{
4229 	  TARGET_FLOAT128_TYPE = 1;
4230 	  warning (0, "The %<-mfloat128%> option may not be fully supported");
4231 	}
4232     }
4233 
4234   /* Enable the __float128 keyword under Linux by default.  */
4235   if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_KEYWORD
4236       && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
4237     rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
4238 
4239   /* If we have are supporting the float128 type and full ISA 3.0 support,
4240      enable -mfloat128-hardware by default.  However, don't enable the
4241      __float128 keyword if it was explicitly turned off.  64-bit mode is needed
4242      because sometimes the compiler wants to put things in an integer
4243      container, and if we don't have __int128 support, it is impossible.  */
4244   if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW && TARGET_64BIT
4245       && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
4246       && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
4247     rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4248 
4249   if (TARGET_FLOAT128_HW
4250       && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4251     {
4252       if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4253 	error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>");
4254 
4255       rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4256     }
4257 
4258   if (TARGET_FLOAT128_HW && !TARGET_64BIT)
4259     {
4260       if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4261 	error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64");
4262 
4263       rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4264     }
4265 
4266   /* Enable -mprefixed by default on power10 systems.  */
4267   if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) == 0)
4268     rs6000_isa_flags |= OPTION_MASK_PREFIXED;
4269 
4270   /* -mprefixed requires -mcpu=power10 (or later).  */
4271   else if (TARGET_PREFIXED && !TARGET_POWER10)
4272     {
4273       if ((rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) != 0)
4274 	error ("%qs requires %qs", "-mprefixed", "-mcpu=power10");
4275 
4276       rs6000_isa_flags &= ~OPTION_MASK_PREFIXED;
4277     }
4278 
4279   /* -mpcrel requires prefixed load/store addressing.  */
4280   if (TARGET_PCREL && !TARGET_PREFIXED)
4281     {
4282       if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4283 	error ("%qs requires %qs", "-mpcrel", "-mprefixed");
4284 
4285       rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4286     }
4287 
4288   /* Print the options after updating the defaults.  */
4289   if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4290     rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4291 
4292   /* E500mc does "better" if we inline more aggressively.  Respect the
4293      user's opinion, though.  */
4294   if (rs6000_block_move_inline_limit == 0
4295       && (rs6000_tune == PROCESSOR_PPCE500MC
4296 	  || rs6000_tune == PROCESSOR_PPCE500MC64
4297 	  || rs6000_tune == PROCESSOR_PPCE5500
4298 	  || rs6000_tune == PROCESSOR_PPCE6500))
4299     rs6000_block_move_inline_limit = 128;
4300 
4301   /* store_one_arg depends on expand_block_move to handle at least the
4302      size of reg_parm_stack_space.  */
4303   if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4304     rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4305 
4306   if (global_init_p)
4307     {
4308       /* If the appropriate debug option is enabled, replace the target hooks
4309 	 with debug versions that call the real version and then prints
4310 	 debugging information.  */
4311       if (TARGET_DEBUG_COST)
4312 	{
4313 	  targetm.rtx_costs = rs6000_debug_rtx_costs;
4314 	  targetm.address_cost = rs6000_debug_address_cost;
4315 	  targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4316 	}
4317 
4318       if (TARGET_DEBUG_ADDR)
4319 	{
4320 	  targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4321 	  targetm.legitimize_address = rs6000_debug_legitimize_address;
4322 	  rs6000_secondary_reload_class_ptr
4323 	    = rs6000_debug_secondary_reload_class;
4324 	  targetm.secondary_memory_needed
4325 	    = rs6000_debug_secondary_memory_needed;
4326 	  targetm.can_change_mode_class
4327 	    = rs6000_debug_can_change_mode_class;
4328 	  rs6000_preferred_reload_class_ptr
4329 	    = rs6000_debug_preferred_reload_class;
4330 	  rs6000_mode_dependent_address_ptr
4331 	    = rs6000_debug_mode_dependent_address;
4332 	}
4333 
4334       if (rs6000_veclibabi_name)
4335 	{
4336 	  if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4337 	    rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4338 	  else
4339 	    {
4340 	      error ("unknown vectorization library ABI type in "
4341 		     "%<-mveclibabi=%s%>", rs6000_veclibabi_name);
4342 	      ret = false;
4343 	    }
4344 	}
4345     }
4346 
4347   /* Enable Altivec ABI for AIX -maltivec.  */
4348   if (TARGET_XCOFF
4349       && (TARGET_ALTIVEC || TARGET_VSX)
4350       && !OPTION_SET_P (rs6000_altivec_abi))
4351     {
4352       if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4353 	error ("target attribute or pragma changes AltiVec ABI");
4354       else
4355 	rs6000_altivec_abi = 1;
4356     }
4357 
4358   /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux.  For
4359      PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI.  It can
4360      be explicitly overridden in either case.  */
4361   if (TARGET_ELF)
4362     {
4363       if (!OPTION_SET_P (rs6000_altivec_abi)
4364 	  && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4365 	{
4366 	  if (main_target_opt != NULL &&
4367 	      !main_target_opt->x_rs6000_altivec_abi)
4368 	    error ("target attribute or pragma changes AltiVec ABI");
4369 	  else
4370 	    rs6000_altivec_abi = 1;
4371 	}
4372     }
4373 
4374   /* Set the Darwin64 ABI as default for 64-bit Darwin.
4375      So far, the only darwin64 targets are also MACH-O.  */
4376   if (TARGET_MACHO
4377       && DEFAULT_ABI == ABI_DARWIN
4378       && TARGET_64BIT)
4379     {
4380       if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4381 	error ("target attribute or pragma changes darwin64 ABI");
4382       else
4383 	{
4384 	  rs6000_darwin64_abi = 1;
4385 	  /* Default to natural alignment, for better performance.  */
4386 	  rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4387 	}
4388     }
4389 
4390   /* Place FP constants in the constant pool instead of TOC
4391      if section anchors enabled.  */
4392   if (flag_section_anchors
4393       && !OPTION_SET_P (TARGET_NO_FP_IN_TOC))
4394     TARGET_NO_FP_IN_TOC = 1;
4395 
4396   if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4397     rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4398 
4399 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4400   SUBTARGET_OVERRIDE_OPTIONS;
4401 #endif
4402 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4403   SUBSUBTARGET_OVERRIDE_OPTIONS;
4404 #endif
4405 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4406   SUB3TARGET_OVERRIDE_OPTIONS;
4407 #endif
4408 
4409   /* If the ABI has support for PC-relative relocations, enable it by default.
4410      This test depends on the sub-target tests above setting the code model to
4411      medium for ELF v2 systems.  */
4412   if (PCREL_SUPPORTED_BY_OS
4413       && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0)
4414     rs6000_isa_flags |= OPTION_MASK_PCREL;
4415 
4416   /* -mpcrel requires -mcmodel=medium, but we can't check TARGET_CMODEL until
4417       after the subtarget override options are done.  */
4418   else if (TARGET_PCREL && TARGET_CMODEL != CMODEL_MEDIUM)
4419     {
4420       if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4421 	error ("%qs requires %qs", "-mpcrel", "-mcmodel=medium");
4422 
4423       rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4424     }
4425 
4426   /* Enable -mmma by default on power10 systems.  */
4427   if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_MMA) == 0)
4428     rs6000_isa_flags |= OPTION_MASK_MMA;
4429 
4430   if (TARGET_POWER10
4431       && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION) == 0)
4432     rs6000_isa_flags |= OPTION_MASK_P10_FUSION;
4433 
4434   /* Turn off vector pair/mma options on non-power10 systems.  */
4435   else if (!TARGET_POWER10 && TARGET_MMA)
4436     {
4437       if ((rs6000_isa_flags_explicit & OPTION_MASK_MMA) != 0)
4438 	error ("%qs requires %qs", "-mmma", "-mcpu=power10");
4439 
4440       rs6000_isa_flags &= ~OPTION_MASK_MMA;
4441     }
4442 
4443   /* MMA requires SIMD support as ISA 3.1 claims and our implementation
4444      such as "*movoo" uses vector pair access which use VSX registers.
4445      So make MMA require VSX support here.  */
4446   if (TARGET_MMA && !TARGET_VSX)
4447     {
4448       if ((rs6000_isa_flags_explicit & OPTION_MASK_MMA) != 0)
4449 	error ("%qs requires %qs", "-mmma", "-mvsx");
4450       rs6000_isa_flags &= ~OPTION_MASK_MMA;
4451     }
4452 
4453   if (!TARGET_PCREL && TARGET_PCREL_OPT)
4454     rs6000_isa_flags &= ~OPTION_MASK_PCREL_OPT;
4455 
4456   if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4457     rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4458 
4459   rs6000_always_hint = (rs6000_tune != PROCESSOR_POWER4
4460 			&& rs6000_tune != PROCESSOR_POWER5
4461 			&& rs6000_tune != PROCESSOR_POWER6
4462 			&& rs6000_tune != PROCESSOR_POWER7
4463 			&& rs6000_tune != PROCESSOR_POWER8
4464 			&& rs6000_tune != PROCESSOR_POWER9
4465 			&& rs6000_tune != PROCESSOR_POWER10
4466 			&& rs6000_tune != PROCESSOR_PPCA2
4467 			&& rs6000_tune != PROCESSOR_CELL
4468 			&& rs6000_tune != PROCESSOR_PPC476);
4469   rs6000_sched_groups = (rs6000_tune == PROCESSOR_POWER4
4470 			 || rs6000_tune == PROCESSOR_POWER5
4471 			 || rs6000_tune == PROCESSOR_POWER7
4472 			 || rs6000_tune == PROCESSOR_POWER8);
4473   rs6000_align_branch_targets = (rs6000_tune == PROCESSOR_POWER4
4474 				 || rs6000_tune == PROCESSOR_POWER5
4475 				 || rs6000_tune == PROCESSOR_POWER6
4476 				 || rs6000_tune == PROCESSOR_POWER7
4477 				 || rs6000_tune == PROCESSOR_POWER8
4478 				 || rs6000_tune == PROCESSOR_POWER9
4479 				 || rs6000_tune == PROCESSOR_POWER10
4480 				 || rs6000_tune == PROCESSOR_PPCE500MC
4481 				 || rs6000_tune == PROCESSOR_PPCE500MC64
4482 				 || rs6000_tune == PROCESSOR_PPCE5500
4483 				 || rs6000_tune == PROCESSOR_PPCE6500);
4484 
4485   /* Allow debug switches to override the above settings.  These are set to -1
4486      in rs6000.opt to indicate the user hasn't directly set the switch.  */
4487   if (TARGET_ALWAYS_HINT >= 0)
4488     rs6000_always_hint = TARGET_ALWAYS_HINT;
4489 
4490   if (TARGET_SCHED_GROUPS >= 0)
4491     rs6000_sched_groups = TARGET_SCHED_GROUPS;
4492 
4493   if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4494     rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4495 
4496   rs6000_sched_restricted_insns_priority
4497     = (rs6000_sched_groups ? 1 : 0);
4498 
4499   /* Handle -msched-costly-dep option.  */
4500   rs6000_sched_costly_dep
4501     = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4502 
4503   if (rs6000_sched_costly_dep_str)
4504     {
4505       if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4506 	rs6000_sched_costly_dep = no_dep_costly;
4507       else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4508 	rs6000_sched_costly_dep = all_deps_costly;
4509       else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4510 	rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4511       else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4512 	rs6000_sched_costly_dep = store_to_load_dep_costly;
4513       else
4514 	rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4515 				   atoi (rs6000_sched_costly_dep_str));
4516     }
4517 
4518   /* Handle -minsert-sched-nops option.  */
4519   rs6000_sched_insert_nops
4520     = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4521 
4522   if (rs6000_sched_insert_nops_str)
4523     {
4524       if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4525 	rs6000_sched_insert_nops = sched_finish_none;
4526       else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4527 	rs6000_sched_insert_nops = sched_finish_pad_groups;
4528       else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4529 	rs6000_sched_insert_nops = sched_finish_regroup_exact;
4530       else
4531 	rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4532 				    atoi (rs6000_sched_insert_nops_str));
4533     }
4534 
4535   /* Handle stack protector */
4536   if (!OPTION_SET_P (rs6000_stack_protector_guard))
4537 #ifdef TARGET_THREAD_SSP_OFFSET
4538     rs6000_stack_protector_guard = SSP_TLS;
4539 #else
4540     rs6000_stack_protector_guard = SSP_GLOBAL;
4541 #endif
4542 
4543 #ifdef TARGET_THREAD_SSP_OFFSET
4544   rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4545   rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
4546 #endif
4547 
4548   if (OPTION_SET_P (rs6000_stack_protector_guard_offset_str))
4549     {
4550       char *endp;
4551       const char *str = rs6000_stack_protector_guard_offset_str;
4552 
4553       errno = 0;
4554       long offset = strtol (str, &endp, 0);
4555       if (!*str || *endp || errno)
4556 	error ("%qs is not a valid number in %qs", str,
4557 	       "-mstack-protector-guard-offset=");
4558 
4559       if (!IN_RANGE (offset, -0x8000, 0x7fff)
4560 	  || (TARGET_64BIT && (offset & 3)))
4561 	error ("%qs is not a valid offset in %qs", str,
4562 	       "-mstack-protector-guard-offset=");
4563 
4564       rs6000_stack_protector_guard_offset = offset;
4565     }
4566 
4567   if (OPTION_SET_P (rs6000_stack_protector_guard_reg_str))
4568     {
4569       const char *str = rs6000_stack_protector_guard_reg_str;
4570       int reg = decode_reg_name (str);
4571 
4572       if (!IN_RANGE (reg, 1, 31))
4573 	error ("%qs is not a valid base register in %qs", str,
4574 	       "-mstack-protector-guard-reg=");
4575 
4576       rs6000_stack_protector_guard_reg = reg;
4577     }
4578 
4579   if (rs6000_stack_protector_guard == SSP_TLS
4580       && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
4581     error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4582 
4583   if (global_init_p)
4584     {
4585 #ifdef TARGET_REGNAMES
4586       /* If the user desires alternate register names, copy in the
4587 	 alternate names now.  */
4588       if (TARGET_REGNAMES)
4589 	memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4590 #endif
4591 
4592       /* Set aix_struct_return last, after the ABI is determined.
4593 	 If -maix-struct-return or -msvr4-struct-return was explicitly
4594 	 used, don't override with the ABI default.  */
4595       if (!OPTION_SET_P (aix_struct_return))
4596 	aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4597 
4598 #if 0
4599       /* IBM XL compiler defaults to unsigned bitfields.  */
4600       if (TARGET_XL_COMPAT)
4601 	flag_signed_bitfields = 0;
4602 #endif
4603 
4604       if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4605 	REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4606 
4607       ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4608 
4609       /* We can only guarantee the availability of DI pseudo-ops when
4610 	 assembling for 64-bit targets.  */
4611       if (!TARGET_64BIT)
4612 	{
4613 	  targetm.asm_out.aligned_op.di = NULL;
4614 	  targetm.asm_out.unaligned_op.di = NULL;
4615 	}
4616 
4617 
4618       /* Set branch target alignment, if not optimizing for size.  */
4619       if (!optimize_size)
4620 	{
4621 	  /* Cell wants to be aligned 8byte for dual issue.  Titan wants to be
4622 	     aligned 8byte to avoid misprediction by the branch predictor.  */
4623 	  if (rs6000_tune == PROCESSOR_TITAN
4624 	      || rs6000_tune == PROCESSOR_CELL)
4625 	    {
4626 	      if (flag_align_functions && !str_align_functions)
4627 		str_align_functions = "8";
4628 	      if (flag_align_jumps && !str_align_jumps)
4629 		str_align_jumps = "8";
4630 	      if (flag_align_loops && !str_align_loops)
4631 		str_align_loops = "8";
4632 	    }
4633 	  if (rs6000_align_branch_targets)
4634 	    {
4635 	      if (flag_align_functions && !str_align_functions)
4636 		str_align_functions = "16";
4637 	      if (flag_align_jumps && !str_align_jumps)
4638 		str_align_jumps = "16";
4639 	      if (flag_align_loops && !str_align_loops)
4640 		{
4641 		  can_override_loop_align = 1;
4642 		  str_align_loops = "16";
4643 		}
4644 	    }
4645 	}
4646 
4647       /* Arrange to save and restore machine status around nested functions.  */
4648       init_machine_status = rs6000_init_machine_status;
4649 
4650       /* We should always be splitting complex arguments, but we can't break
4651 	 Linux and Darwin ABIs at the moment.  For now, only AIX is fixed.  */
4652       if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4653 	targetm.calls.split_complex_arg = NULL;
4654 
4655       /* The AIX and ELFv1 ABIs define standard function descriptors.  */
4656       if (DEFAULT_ABI == ABI_AIX)
4657 	targetm.calls.custom_function_descriptors = 0;
4658     }
4659 
4660   /* Initialize rs6000_cost with the appropriate target costs.  */
4661   if (optimize_size)
4662     rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4663   else
4664     switch (rs6000_tune)
4665       {
4666       case PROCESSOR_RS64A:
4667 	rs6000_cost = &rs64a_cost;
4668 	break;
4669 
4670       case PROCESSOR_MPCCORE:
4671 	rs6000_cost = &mpccore_cost;
4672 	break;
4673 
4674       case PROCESSOR_PPC403:
4675 	rs6000_cost = &ppc403_cost;
4676 	break;
4677 
4678       case PROCESSOR_PPC405:
4679 	rs6000_cost = &ppc405_cost;
4680 	break;
4681 
4682       case PROCESSOR_PPC440:
4683 	rs6000_cost = &ppc440_cost;
4684 	break;
4685 
4686       case PROCESSOR_PPC476:
4687 	rs6000_cost = &ppc476_cost;
4688 	break;
4689 
4690       case PROCESSOR_PPC601:
4691 	rs6000_cost = &ppc601_cost;
4692 	break;
4693 
4694       case PROCESSOR_PPC603:
4695 	rs6000_cost = &ppc603_cost;
4696 	break;
4697 
4698       case PROCESSOR_PPC604:
4699 	rs6000_cost = &ppc604_cost;
4700 	break;
4701 
4702       case PROCESSOR_PPC604e:
4703 	rs6000_cost = &ppc604e_cost;
4704 	break;
4705 
4706       case PROCESSOR_PPC620:
4707 	rs6000_cost = &ppc620_cost;
4708 	break;
4709 
4710       case PROCESSOR_PPC630:
4711 	rs6000_cost = &ppc630_cost;
4712 	break;
4713 
4714       case PROCESSOR_CELL:
4715 	rs6000_cost = &ppccell_cost;
4716 	break;
4717 
4718       case PROCESSOR_PPC750:
4719       case PROCESSOR_PPC7400:
4720 	rs6000_cost = &ppc750_cost;
4721 	break;
4722 
4723       case PROCESSOR_PPC7450:
4724 	rs6000_cost = &ppc7450_cost;
4725 	break;
4726 
4727       case PROCESSOR_PPC8540:
4728       case PROCESSOR_PPC8548:
4729 	rs6000_cost = &ppc8540_cost;
4730 	break;
4731 
4732       case PROCESSOR_PPCE300C2:
4733       case PROCESSOR_PPCE300C3:
4734 	rs6000_cost = &ppce300c2c3_cost;
4735 	break;
4736 
4737       case PROCESSOR_PPCE500MC:
4738 	rs6000_cost = &ppce500mc_cost;
4739 	break;
4740 
4741       case PROCESSOR_PPCE500MC64:
4742 	rs6000_cost = &ppce500mc64_cost;
4743 	break;
4744 
4745       case PROCESSOR_PPCE5500:
4746 	rs6000_cost = &ppce5500_cost;
4747 	break;
4748 
4749       case PROCESSOR_PPCE6500:
4750 	rs6000_cost = &ppce6500_cost;
4751 	break;
4752 
4753       case PROCESSOR_TITAN:
4754 	rs6000_cost = &titan_cost;
4755 	break;
4756 
4757       case PROCESSOR_POWER4:
4758       case PROCESSOR_POWER5:
4759 	rs6000_cost = &power4_cost;
4760 	break;
4761 
4762       case PROCESSOR_POWER6:
4763 	rs6000_cost = &power6_cost;
4764 	break;
4765 
4766       case PROCESSOR_POWER7:
4767 	rs6000_cost = &power7_cost;
4768 	break;
4769 
4770       case PROCESSOR_POWER8:
4771 	rs6000_cost = &power8_cost;
4772 	break;
4773 
4774       case PROCESSOR_POWER9:
4775 	rs6000_cost = &power9_cost;
4776 	break;
4777 
4778       case PROCESSOR_POWER10:
4779 	rs6000_cost = &power10_cost;
4780 	break;
4781 
4782       case PROCESSOR_PPCA2:
4783 	rs6000_cost = &ppca2_cost;
4784 	break;
4785 
4786       default:
4787 	gcc_unreachable ();
4788       }
4789 
4790   if (global_init_p)
4791     {
4792       SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4793 			   param_simultaneous_prefetches,
4794 			   rs6000_cost->simultaneous_prefetches);
4795       SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4796 			   param_l1_cache_size,
4797 			   rs6000_cost->l1_cache_size);
4798       SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4799 			   param_l1_cache_line_size,
4800 			   rs6000_cost->cache_line_size);
4801       SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4802 			   param_l2_cache_size,
4803 			   rs6000_cost->l2_cache_size);
4804 
4805       /* Increase loop peeling limits based on performance analysis. */
4806       SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4807 			   param_max_peeled_insns, 400);
4808       SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4809 			   param_max_completely_peeled_insns, 400);
4810 
4811       /* The lxvl/stxvl instructions don't perform well before Power10.  */
4812       if (TARGET_POWER10)
4813 	SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4814 			     param_vect_partial_vector_usage, 1);
4815       else
4816 	SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4817 			     param_vect_partial_vector_usage, 0);
4818 
4819       /* Use the 'model' -fsched-pressure algorithm by default.  */
4820       SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4821 			   param_sched_pressure_algorithm,
4822 			   SCHED_PRESSURE_MODEL);
4823 
4824       /* If using typedef char *va_list, signal that
4825 	 __builtin_va_start (&ap, 0) can be optimized to
4826 	 ap = __builtin_next_arg (0).  */
4827       if (DEFAULT_ABI != ABI_V4)
4828 	targetm.expand_builtin_va_start = NULL;
4829     }
4830 
4831   rs6000_override_options_after_change ();
4832 
4833   /* If not explicitly specified via option, decide whether to generate indexed
4834      load/store instructions.  A value of -1 indicates that the
4835      initial value of this variable has not been overwritten. During
4836      compilation, TARGET_AVOID_XFORM is either 0 or 1. */
4837   if (TARGET_AVOID_XFORM == -1)
4838     /* Avoid indexed addressing when targeting Power6 in order to avoid the
4839      DERAT mispredict penalty.  However the LVE and STVE altivec instructions
4840      need indexed accesses and the type used is the scalar type of the element
4841      being loaded or stored.  */
4842     TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_CMPB
4843 			  && !TARGET_ALTIVEC);
4844 
4845   /* Set the -mrecip options.  */
4846   if (rs6000_recip_name)
4847     {
4848       char *p = ASTRDUP (rs6000_recip_name);
4849       char *q;
4850       unsigned int mask, i;
4851       bool invert;
4852 
4853       while ((q = strtok (p, ",")) != NULL)
4854 	{
4855 	  p = NULL;
4856 	  if (*q == '!')
4857 	    {
4858 	      invert = true;
4859 	      q++;
4860 	    }
4861 	  else
4862 	    invert = false;
4863 
4864 	  if (!strcmp (q, "default"))
4865 	    mask = ((TARGET_RECIP_PRECISION)
4866 		    ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4867 	  else
4868 	    {
4869 	      for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4870 		if (!strcmp (q, recip_options[i].string))
4871 		  {
4872 		    mask = recip_options[i].mask;
4873 		    break;
4874 		  }
4875 
4876 	      if (i == ARRAY_SIZE (recip_options))
4877 		{
4878 		  error ("unknown option for %<%s=%s%>", "-mrecip", q);
4879 		  invert = false;
4880 		  mask = 0;
4881 		  ret = false;
4882 		}
4883 	    }
4884 
4885 	  if (invert)
4886 	    rs6000_recip_control &= ~mask;
4887 	  else
4888 	    rs6000_recip_control |= mask;
4889 	}
4890     }
4891 
4892   /* Set the builtin mask of the various options used that could affect which
4893      builtins were used.  In the past we used target_flags, but we've run out
4894      of bits, and some options are no longer in target_flags.  */
4895   rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
4896   if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
4897     rs6000_print_builtin_options (stderr, 0, "builtin mask",
4898 				  rs6000_builtin_mask);
4899 
4900   /* Initialize all of the registers.  */
4901   rs6000_init_hard_regno_mode_ok (global_init_p);
4902 
4903   /* Save the initial options in case the user does function specific options */
4904   if (global_init_p)
4905     target_option_default_node = target_option_current_node
4906       = build_target_option_node (&global_options, &global_options_set);
4907 
4908   /* If not explicitly specified via option, decide whether to generate the
4909      extra blr's required to preserve the link stack on some cpus (eg, 476).  */
4910   if (TARGET_LINK_STACK == -1)
4911     SET_TARGET_LINK_STACK (rs6000_tune == PROCESSOR_PPC476 && flag_pic);
4912 
4913   /* Deprecate use of -mno-speculate-indirect-jumps.  */
4914   if (!rs6000_speculate_indirect_jumps)
4915     warning (0, "%qs is deprecated and not recommended in any circumstances",
4916 	     "-mno-speculate-indirect-jumps");
4917 
4918   return ret;
4919 }
4920 
4921 /* Implement TARGET_OPTION_OVERRIDE.  On the RS/6000 this is used to
4922    define the target cpu type.  */
4923 
4924 static void
rs6000_option_override(void)4925 rs6000_option_override (void)
4926 {
4927   (void) rs6000_option_override_internal (true);
4928 }
4929 
4930 
4931 /* Implement LOOP_ALIGN. */
4932 align_flags
rs6000_loop_align(rtx label)4933 rs6000_loop_align (rtx label)
4934 {
4935   basic_block bb;
4936   int ninsns;
4937 
4938   /* Don't override loop alignment if -falign-loops was specified. */
4939   if (!can_override_loop_align)
4940     return align_loops;
4941 
4942   bb = BLOCK_FOR_INSN (label);
4943   ninsns = num_loop_insns(bb->loop_father);
4944 
4945   /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4946   if (ninsns > 4 && ninsns <= 8
4947       && (rs6000_tune == PROCESSOR_POWER4
4948 	  || rs6000_tune == PROCESSOR_POWER5
4949 	  || rs6000_tune == PROCESSOR_POWER6
4950 	  || rs6000_tune == PROCESSOR_POWER7
4951 	  || rs6000_tune == PROCESSOR_POWER8))
4952     return align_flags (5);
4953   else
4954     return align_loops;
4955 }
4956 
4957 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4958    after applying N number of iterations.  This routine does not determine
4959    how may iterations are required to reach desired alignment.  */
4960 
4961 static bool
rs6000_vector_alignment_reachable(const_tree type ATTRIBUTE_UNUSED,bool is_packed)4962 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
4963 {
4964   if (is_packed)
4965     return false;
4966 
4967   if (TARGET_32BIT)
4968     {
4969       if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
4970         return true;
4971 
4972       if (rs6000_alignment_flags ==  MASK_ALIGN_POWER)
4973         return true;
4974 
4975       return false;
4976     }
4977   else
4978     {
4979       if (TARGET_MACHO)
4980         return false;
4981 
4982       /* Assuming that all other types are naturally aligned. CHECKME!  */
4983       return true;
4984     }
4985 }
4986 
4987 /* Return true if the vector misalignment factor is supported by the
4988    target.  */
4989 static bool
rs6000_builtin_support_vector_misalignment(machine_mode mode,const_tree type,int misalignment,bool is_packed)4990 rs6000_builtin_support_vector_misalignment (machine_mode mode,
4991 					    const_tree type,
4992 					    int misalignment,
4993 					    bool is_packed)
4994 {
4995   if (TARGET_VSX)
4996     {
4997       if (TARGET_EFFICIENT_UNALIGNED_VSX)
4998 	return true;
4999 
5000       /* Return if movmisalign pattern is not supported for this mode.  */
5001       if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
5002         return false;
5003 
5004       if (misalignment == -1)
5005 	{
5006 	  /* Misalignment factor is unknown at compile time but we know
5007 	     it's word aligned.  */
5008 	  if (rs6000_vector_alignment_reachable (type, is_packed))
5009             {
5010               int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
5011 
5012               if (element_size == 64 || element_size == 32)
5013                return true;
5014             }
5015 
5016 	  return false;
5017 	}
5018 
5019       /* VSX supports word-aligned vector.  */
5020       if (misalignment % 4 == 0)
5021 	return true;
5022     }
5023   return false;
5024 }
5025 
5026 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
5027 static int
rs6000_builtin_vectorization_cost(enum vect_cost_for_stmt type_of_cost,tree vectype,int misalign)5028 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5029                                    tree vectype, int misalign)
5030 {
5031   unsigned elements;
5032   tree elem_type;
5033 
5034   switch (type_of_cost)
5035     {
5036       case scalar_stmt:
5037       case scalar_store:
5038       case vector_stmt:
5039       case vector_store:
5040       case vec_to_scalar:
5041       case scalar_to_vec:
5042       case cond_branch_not_taken:
5043         return 1;
5044       case scalar_load:
5045       case vector_load:
5046 	/* Like rs6000_insn_cost, make load insns cost a bit more.  */
5047 	  return 2;
5048 
5049       case vec_perm:
5050 	/* Power7 has only one permute unit, make it a bit expensive.  */
5051 	if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
5052 	  return 3;
5053 	else
5054 	  return 1;
5055 
5056       case vec_promote_demote:
5057 	/* Power7 has only one permute/pack unit, make it a bit expensive.  */
5058 	if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
5059 	  return 4;
5060 	else
5061 	  return 1;
5062 
5063       case cond_branch_taken:
5064         return 3;
5065 
5066       case unaligned_load:
5067       case vector_gather_load:
5068 	/* Like rs6000_insn_cost, make load insns cost a bit more.  */
5069 	if (TARGET_EFFICIENT_UNALIGNED_VSX)
5070 	  return 2;
5071 
5072 	if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5073 	  {
5074 	    elements = TYPE_VECTOR_SUBPARTS (vectype);
5075 	    /* See PR102767, consider V1TI to keep consistency.  */
5076 	    if (elements == 2 || elements == 1)
5077 	      /* Double word aligned.  */
5078 	      return 4;
5079 
5080 	    if (elements == 4)
5081 	      {
5082 		switch (misalign)
5083 		  {
5084 		  case 8:
5085 		    /* Double word aligned.  */
5086 		    return 4;
5087 
5088 		  case -1:
5089 		    /* Unknown misalignment.  */
5090 		  case 4:
5091 		  case 12:
5092 		    /* Word aligned.  */
5093 		    return 33;
5094 
5095 		  default:
5096 		    gcc_unreachable ();
5097 		  }
5098 	      }
5099 	  }
5100 
5101 	if (TARGET_ALTIVEC)
5102 	  /* Misaligned loads are not supported.  */
5103 	  gcc_unreachable ();
5104 
5105 	/* Like rs6000_insn_cost, make load insns cost a bit more.  */
5106 	return 4;
5107 
5108       case unaligned_store:
5109       case vector_scatter_store:
5110 	if (TARGET_EFFICIENT_UNALIGNED_VSX)
5111 	  return 1;
5112 
5113 	if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5114 	  {
5115 	    elements = TYPE_VECTOR_SUBPARTS (vectype);
5116 	    /* See PR102767, consider V1TI to keep consistency.  */
5117 	    if (elements == 2 || elements == 1)
5118 	      /* Double word aligned.  */
5119 	      return 2;
5120 
5121 	    if (elements == 4)
5122 	      {
5123 		switch (misalign)
5124 		  {
5125 		  case 8:
5126 		    /* Double word aligned.  */
5127 		    return 2;
5128 
5129 		  case -1:
5130 		    /* Unknown misalignment.  */
5131 		  case 4:
5132 		  case 12:
5133 		    /* Word aligned.  */
5134 		    return 23;
5135 
5136 		  default:
5137 		    gcc_unreachable ();
5138 		  }
5139 	      }
5140 	  }
5141 
5142 	if (TARGET_ALTIVEC)
5143 	  /* Misaligned stores are not supported.  */
5144 	  gcc_unreachable ();
5145 
5146 	return 2;
5147 
5148       case vec_construct:
5149 	/* This is a rough approximation assuming non-constant elements
5150 	   constructed into a vector via element insertion.  FIXME:
5151 	   vec_construct is not granular enough for uniformly good
5152 	   decisions.  If the initialization is a splat, this is
5153 	   cheaper than we estimate.  Improve this someday.  */
5154 	elem_type = TREE_TYPE (vectype);
5155 	/* 32-bit vectors loaded into registers are stored as double
5156 	   precision, so we need 2 permutes, 2 converts, and 1 merge
5157 	   to construct a vector of short floats from them.  */
5158 	if (SCALAR_FLOAT_TYPE_P (elem_type)
5159 	    && TYPE_PRECISION (elem_type) == 32)
5160 	  return 5;
5161 	/* On POWER9, integer vector types are built up in GPRs and then
5162 	   use a direct move (2 cycles).  For POWER8 this is even worse,
5163 	   as we need two direct moves and a merge, and the direct moves
5164 	   are five cycles.  */
5165 	else if (INTEGRAL_TYPE_P (elem_type))
5166 	  {
5167 	    if (TARGET_P9_VECTOR)
5168 	      return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
5169 	    else
5170 	      return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 5;
5171 	  }
5172 	else
5173 	  /* V2DFmode doesn't need a direct move.  */
5174 	  return 2;
5175 
5176       default:
5177         gcc_unreachable ();
5178     }
5179 }
5180 
5181 /* Implement targetm.vectorize.preferred_simd_mode.  */
5182 
5183 static machine_mode
rs6000_preferred_simd_mode(scalar_mode mode)5184 rs6000_preferred_simd_mode (scalar_mode mode)
5185 {
5186   opt_machine_mode vmode = mode_for_vector (mode, 16 / GET_MODE_SIZE (mode));
5187 
5188   if (vmode.exists () && !VECTOR_MEM_NONE_P (vmode.require ()))
5189     return vmode.require ();
5190 
5191   return word_mode;
5192 }
5193 
5194 class rs6000_cost_data : public vector_costs
5195 {
5196 public:
5197   using vector_costs::vector_costs;
5198 
5199   unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
5200 			      stmt_vec_info stmt_info, slp_tree, tree vectype,
5201 			      int misalign,
5202 			      vect_cost_model_location where) override;
5203   void finish_cost (const vector_costs *) override;
5204 
5205 protected:
5206   void update_target_cost_per_stmt (vect_cost_for_stmt, stmt_vec_info,
5207 				    vect_cost_model_location, unsigned int);
5208   void density_test (loop_vec_info);
5209   void adjust_vect_cost_per_loop (loop_vec_info);
5210 
5211   /* Total number of vectorized stmts (loop only).  */
5212   unsigned m_nstmts = 0;
5213   /* Total number of loads (loop only).  */
5214   unsigned m_nloads = 0;
5215   /* Possible extra penalized cost on vector construction (loop only).  */
5216   unsigned m_extra_ctor_cost = 0;
5217   /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5218      instruction is needed by the vectorization.  */
5219   bool m_vect_nonmem = false;
5220 };
5221 
5222 /* Test for likely overcommitment of vector hardware resources.  If a
5223    loop iteration is relatively large, and too large a percentage of
5224    instructions in the loop are vectorized, the cost model may not
5225    adequately reflect delays from unavailable vector resources.
5226    Penalize the loop body cost for this case.  */
5227 
5228 void
density_test(loop_vec_info loop_vinfo)5229 rs6000_cost_data::density_test (loop_vec_info loop_vinfo)
5230 {
5231   /* This density test only cares about the cost of vector version of the
5232      loop, so immediately return if we are passed costing for the scalar
5233      version (namely computing single scalar iteration cost).  */
5234   if (m_costing_for_scalar)
5235     return;
5236 
5237   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5238   basic_block *bbs = get_loop_body (loop);
5239   int nbbs = loop->num_nodes;
5240   int vec_cost = m_costs[vect_body], not_vec_cost = 0;
5241 
5242   for (int i = 0; i < nbbs; i++)
5243     {
5244       basic_block bb = bbs[i];
5245       gimple_stmt_iterator gsi;
5246 
5247       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5248 	{
5249 	  gimple *stmt = gsi_stmt (gsi);
5250 	  if (is_gimple_debug (stmt))
5251 	    continue;
5252 
5253 	  stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
5254 
5255 	  if (!STMT_VINFO_RELEVANT_P (stmt_info)
5256 	      && !STMT_VINFO_IN_PATTERN_P (stmt_info))
5257 	    not_vec_cost++;
5258 	}
5259     }
5260 
5261   free (bbs);
5262   int density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
5263 
5264   if (density_pct > rs6000_density_pct_threshold
5265       && vec_cost + not_vec_cost > rs6000_density_size_threshold)
5266     {
5267       m_costs[vect_body] = vec_cost * (100 + rs6000_density_penalty) / 100;
5268       if (dump_enabled_p ())
5269 	dump_printf_loc (MSG_NOTE, vect_location,
5270 			 "density %d%%, cost %d exceeds threshold, penalizing "
5271 			 "loop body cost by %u%%\n", density_pct,
5272 			 vec_cost + not_vec_cost, rs6000_density_penalty);
5273     }
5274 
5275   /* Check whether we need to penalize the body cost to account
5276      for excess strided or elementwise loads.  */
5277   if (m_extra_ctor_cost > 0)
5278     {
5279       gcc_assert (m_nloads <= m_nstmts);
5280       unsigned int load_pct = (m_nloads * 100) / m_nstmts;
5281 
5282       /* It's likely to be bounded by latency and execution resources
5283 	 from many scalar loads which are strided or elementwise loads
5284 	 into a vector if both conditions below are found:
5285 	   1. there are many loads, it's easy to result in a long wait
5286 	      for load units;
5287 	   2. load has a big proportion of all vectorized statements,
5288 	      it's not easy to schedule other statements to spread among
5289 	      the loads.
5290 	 One typical case is the innermost loop of the hotspot of SPEC2017
5291 	 503.bwaves_r without loop interchange.  */
5292       if (m_nloads > (unsigned int) rs6000_density_load_num_threshold
5293 	  && load_pct > (unsigned int) rs6000_density_load_pct_threshold)
5294 	{
5295 	  m_costs[vect_body] += m_extra_ctor_cost;
5296 	  if (dump_enabled_p ())
5297 	    dump_printf_loc (MSG_NOTE, vect_location,
5298 			     "Found %u loads and "
5299 			     "load pct. %u%% exceed "
5300 			     "the threshold, "
5301 			     "penalizing loop body "
5302 			     "cost by extra cost %u "
5303 			     "for ctor.\n",
5304 			     m_nloads, load_pct,
5305 			     m_extra_ctor_cost);
5306 	}
5307     }
5308 }
5309 
5310 /* Implement targetm.vectorize.create_costs.  */
5311 
5312 static vector_costs *
rs6000_vectorize_create_costs(vec_info * vinfo,bool costing_for_scalar)5313 rs6000_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
5314 {
5315   return new rs6000_cost_data (vinfo, costing_for_scalar);
5316 }
5317 
5318 /* Adjust vectorization cost after calling rs6000_builtin_vectorization_cost.
5319    For some statement, we would like to further fine-grain tweak the cost on
5320    top of rs6000_builtin_vectorization_cost handling which doesn't have any
5321    information on statement operation codes etc.  One typical case here is
5322    COND_EXPR, it takes the same cost to simple FXU instruction when evaluating
5323    for scalar cost, but it should be priced more whatever transformed to either
5324    compare + branch or compare + isel instructions.  */
5325 
5326 static unsigned
rs6000_adjust_vect_cost_per_stmt(enum vect_cost_for_stmt kind,struct _stmt_vec_info * stmt_info)5327 rs6000_adjust_vect_cost_per_stmt (enum vect_cost_for_stmt kind,
5328 				  struct _stmt_vec_info *stmt_info)
5329 {
5330   if (kind == scalar_stmt && stmt_info && stmt_info->stmt
5331       && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
5332     {
5333       tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
5334       if (subcode == COND_EXPR)
5335 	return 2;
5336     }
5337 
5338   return 0;
5339 }
5340 
5341 /* Helper function for add_stmt_cost.  Check each statement cost
5342    entry, gather information and update the target_cost fields
5343    accordingly.  */
5344 void
update_target_cost_per_stmt(vect_cost_for_stmt kind,stmt_vec_info stmt_info,vect_cost_model_location where,unsigned int orig_count)5345 rs6000_cost_data::update_target_cost_per_stmt (vect_cost_for_stmt kind,
5346 					       stmt_vec_info stmt_info,
5347 					       vect_cost_model_location where,
5348 					       unsigned int orig_count)
5349 {
5350 
5351   /* Check whether we're doing something other than just a copy loop.
5352      Not all such loops may be profitably vectorized; see
5353      rs6000_finish_cost.  */
5354   if (kind == vec_to_scalar
5355       || kind == vec_perm
5356       || kind == vec_promote_demote
5357       || kind == vec_construct
5358       || kind == scalar_to_vec
5359       || (where == vect_body && kind == vector_stmt))
5360     m_vect_nonmem = true;
5361 
5362   /* Gather some information when we are costing the vectorized instruction
5363      for the statements located in a loop body.  */
5364   if (!m_costing_for_scalar
5365       && is_a<loop_vec_info> (m_vinfo)
5366       && where == vect_body)
5367     {
5368       m_nstmts += orig_count;
5369 
5370       if (kind == scalar_load || kind == vector_load
5371 	  || kind == unaligned_load || kind == vector_gather_load)
5372 	m_nloads += orig_count;
5373 
5374       /* Power processors do not currently have instructions for strided
5375 	 and elementwise loads, and instead we must generate multiple
5376 	 scalar loads.  This leads to undercounting of the cost.  We
5377 	 account for this by scaling the construction cost by the number
5378 	 of elements involved, and saving this as extra cost that we may
5379 	 or may not need to apply.  When finalizing the cost of the loop,
5380 	 the extra penalty is applied when the load density heuristics
5381 	 are satisfied.  */
5382       if (kind == vec_construct && stmt_info
5383 	  && STMT_VINFO_TYPE (stmt_info) == load_vec_info_type
5384 	  && (STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE
5385 	      || STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_STRIDED_SLP))
5386 	{
5387 	  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5388 	  unsigned int nunits = vect_nunits_for_cost (vectype);
5389 	  /* As PR103702 shows, it's possible that vectorizer wants to do
5390 	     costings for only one unit here, it's no need to do any
5391 	     penalization for it, so simply early return here.  */
5392 	  if (nunits == 1)
5393 	    return;
5394 	  /* i386 port adopts nunits * stmt_cost as the penalized cost
5395 	     for this kind of penalization, we used to follow it but
5396 	     found it could result in an unreliable body cost especially
5397 	     for V16QI/V8HI modes.  To make it better, we choose this
5398 	     new heuristic: for each scalar load, we use 2 as penalized
5399 	     cost for the case with 2 nunits and use 1 for the other
5400 	     cases.  It's without much supporting theory, mainly
5401 	     concluded from the broad performance evaluations on Power8,
5402 	     Power9 and Power10.  One possibly related point is that:
5403 	     vector construction for more units would use more insns,
5404 	     it has more chances to schedule them better (even run in
5405 	     parallelly when enough available units at that time), so
5406 	     it seems reasonable not to penalize that much for them.  */
5407 	  unsigned int adjusted_cost = (nunits == 2) ? 2 : 1;
5408 	  unsigned int extra_cost = nunits * adjusted_cost;
5409 	  m_extra_ctor_cost += extra_cost;
5410 	}
5411     }
5412 }
5413 
5414 unsigned
add_stmt_cost(int count,vect_cost_for_stmt kind,stmt_vec_info stmt_info,slp_tree,tree vectype,int misalign,vect_cost_model_location where)5415 rs6000_cost_data::add_stmt_cost (int count, vect_cost_for_stmt kind,
5416 				 stmt_vec_info stmt_info, slp_tree,
5417 				 tree vectype, int misalign,
5418 				 vect_cost_model_location where)
5419 {
5420   unsigned retval = 0;
5421 
5422   if (flag_vect_cost_model)
5423     {
5424       int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5425 							 misalign);
5426       stmt_cost += rs6000_adjust_vect_cost_per_stmt (kind, stmt_info);
5427       /* Statements in an inner loop relative to the loop being
5428 	 vectorized are weighted more heavily.  The value here is
5429 	 arbitrary and could potentially be improved with analysis.  */
5430       unsigned int orig_count = count;
5431       retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost);
5432       m_costs[where] += retval;
5433 
5434       update_target_cost_per_stmt (kind, stmt_info, where, orig_count);
5435     }
5436 
5437   return retval;
5438 }
5439 
5440 /* For some target specific vectorization cost which can't be handled per stmt,
5441    we check the requisite conditions and adjust the vectorization cost
5442    accordingly if satisfied.  One typical example is to model shift cost for
5443    vector with length by counting number of required lengths under condition
5444    LOOP_VINFO_FULLY_WITH_LENGTH_P.  */
5445 
5446 void
adjust_vect_cost_per_loop(loop_vec_info loop_vinfo)5447 rs6000_cost_data::adjust_vect_cost_per_loop (loop_vec_info loop_vinfo)
5448 {
5449   if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
5450     {
5451       rgroup_controls *rgc;
5452       unsigned int num_vectors_m1;
5453       unsigned int shift_cnt = 0;
5454       FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo), num_vectors_m1, rgc)
5455 	if (rgc->type)
5456 	  /* Each length needs one shift to fill into bits 0-7.  */
5457 	  shift_cnt += num_vectors_m1 + 1;
5458 
5459       add_stmt_cost (shift_cnt, scalar_stmt, NULL, NULL,
5460 		     NULL_TREE, 0, vect_body);
5461     }
5462 }
5463 
5464 void
finish_cost(const vector_costs * scalar_costs)5465 rs6000_cost_data::finish_cost (const vector_costs *scalar_costs)
5466 {
5467   if (loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo))
5468     {
5469       adjust_vect_cost_per_loop (loop_vinfo);
5470       density_test (loop_vinfo);
5471 
5472       /* Don't vectorize minimum-vectorization-factor, simple copy loops
5473 	 that require versioning for any reason.  The vectorization is at
5474 	 best a wash inside the loop, and the versioning checks make
5475 	 profitability highly unlikely and potentially quite harmful.  */
5476       if (!m_vect_nonmem
5477 	  && LOOP_VINFO_VECT_FACTOR (loop_vinfo) == 2
5478 	  && LOOP_REQUIRES_VERSIONING (loop_vinfo))
5479 	m_costs[vect_body] += 10000;
5480     }
5481 
5482   vector_costs::finish_cost (scalar_costs);
5483 }
5484 
5485 /* Implement targetm.loop_unroll_adjust.  */
5486 
5487 static unsigned
rs6000_loop_unroll_adjust(unsigned nunroll,struct loop * loop)5488 rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
5489 {
5490    if (unroll_only_small_loops)
5491     {
5492       /* TODO: These are hardcoded values right now.  We probably should use
5493 	 a PARAM here.  */
5494       if (loop->ninsns <= 6)
5495 	return MIN (4, nunroll);
5496       if (loop->ninsns <= 10)
5497 	return MIN (2, nunroll);
5498 
5499       return 0;
5500     }
5501 
5502   return nunroll;
5503 }
5504 
5505 /* Returns a function decl for a vectorized version of the builtin function
5506    with builtin function code FN and the result vector type TYPE, or NULL_TREE
5507    if it is not available.
5508 
5509    Implement targetm.vectorize.builtin_vectorized_function.  */
5510 
5511 static tree
rs6000_builtin_vectorized_function(unsigned int fn,tree type_out,tree type_in)5512 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5513 				    tree type_in)
5514 {
5515   machine_mode in_mode, out_mode;
5516   int in_n, out_n;
5517 
5518   if (TARGET_DEBUG_BUILTIN)
5519     fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5520 	     combined_fn_name (combined_fn (fn)),
5521 	     GET_MODE_NAME (TYPE_MODE (type_out)),
5522 	     GET_MODE_NAME (TYPE_MODE (type_in)));
5523 
5524   /* TODO: Should this be gcc_assert?  */
5525   if (TREE_CODE (type_out) != VECTOR_TYPE
5526       || TREE_CODE (type_in) != VECTOR_TYPE)
5527     return NULL_TREE;
5528 
5529   out_mode = TYPE_MODE (TREE_TYPE (type_out));
5530   out_n = TYPE_VECTOR_SUBPARTS (type_out);
5531   in_mode = TYPE_MODE (TREE_TYPE (type_in));
5532   in_n = TYPE_VECTOR_SUBPARTS (type_in);
5533 
5534   switch (fn)
5535     {
5536     CASE_CFN_COPYSIGN:
5537       if (VECTOR_UNIT_VSX_P (V2DFmode)
5538 	  && out_mode == DFmode && out_n == 2
5539 	  && in_mode == DFmode && in_n == 2)
5540 	return rs6000_builtin_decls[RS6000_BIF_CPSGNDP];
5541       if (VECTOR_UNIT_VSX_P (V4SFmode)
5542 	  && out_mode == SFmode && out_n == 4
5543 	  && in_mode == SFmode && in_n == 4)
5544 	return rs6000_builtin_decls[RS6000_BIF_CPSGNSP];
5545       if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5546 	  && out_mode == SFmode && out_n == 4
5547 	  && in_mode == SFmode && in_n == 4)
5548 	return rs6000_builtin_decls[RS6000_BIF_COPYSIGN_V4SF];
5549       break;
5550     CASE_CFN_CEIL:
5551       if (VECTOR_UNIT_VSX_P (V2DFmode)
5552 	  && out_mode == DFmode && out_n == 2
5553 	  && in_mode == DFmode && in_n == 2)
5554 	return rs6000_builtin_decls[RS6000_BIF_XVRDPIP];
5555       if (VECTOR_UNIT_VSX_P (V4SFmode)
5556 	  && out_mode == SFmode && out_n == 4
5557 	  && in_mode == SFmode && in_n == 4)
5558 	return rs6000_builtin_decls[RS6000_BIF_XVRSPIP];
5559       if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5560 	  && out_mode == SFmode && out_n == 4
5561 	  && in_mode == SFmode && in_n == 4)
5562 	return rs6000_builtin_decls[RS6000_BIF_VRFIP];
5563       break;
5564     CASE_CFN_FLOOR:
5565       if (VECTOR_UNIT_VSX_P (V2DFmode)
5566 	  && out_mode == DFmode && out_n == 2
5567 	  && in_mode == DFmode && in_n == 2)
5568 	return rs6000_builtin_decls[RS6000_BIF_XVRDPIM];
5569       if (VECTOR_UNIT_VSX_P (V4SFmode)
5570 	  && out_mode == SFmode && out_n == 4
5571 	  && in_mode == SFmode && in_n == 4)
5572 	return rs6000_builtin_decls[RS6000_BIF_XVRSPIM];
5573       if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5574 	  && out_mode == SFmode && out_n == 4
5575 	  && in_mode == SFmode && in_n == 4)
5576 	return rs6000_builtin_decls[RS6000_BIF_VRFIM];
5577       break;
5578     CASE_CFN_FMA:
5579       if (VECTOR_UNIT_VSX_P (V2DFmode)
5580 	  && out_mode == DFmode && out_n == 2
5581 	  && in_mode == DFmode && in_n == 2)
5582 	return rs6000_builtin_decls[RS6000_BIF_XVMADDDP];
5583       if (VECTOR_UNIT_VSX_P (V4SFmode)
5584 	  && out_mode == SFmode && out_n == 4
5585 	  && in_mode == SFmode && in_n == 4)
5586 	return rs6000_builtin_decls[RS6000_BIF_XVMADDSP];
5587       if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5588 	  && out_mode == SFmode && out_n == 4
5589 	  && in_mode == SFmode && in_n == 4)
5590 	return rs6000_builtin_decls[RS6000_BIF_VMADDFP];
5591       break;
5592     CASE_CFN_TRUNC:
5593       if (VECTOR_UNIT_VSX_P (V2DFmode)
5594 	  && out_mode == DFmode && out_n == 2
5595 	  && in_mode == DFmode && in_n == 2)
5596 	return rs6000_builtin_decls[RS6000_BIF_XVRDPIZ];
5597       if (VECTOR_UNIT_VSX_P (V4SFmode)
5598 	  && out_mode == SFmode && out_n == 4
5599 	  && in_mode == SFmode && in_n == 4)
5600 	return rs6000_builtin_decls[RS6000_BIF_XVRSPIZ];
5601       if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5602 	  && out_mode == SFmode && out_n == 4
5603 	  && in_mode == SFmode && in_n == 4)
5604 	return rs6000_builtin_decls[RS6000_BIF_VRFIZ];
5605       break;
5606     CASE_CFN_NEARBYINT:
5607       if (VECTOR_UNIT_VSX_P (V2DFmode)
5608 	  && flag_unsafe_math_optimizations
5609 	  && out_mode == DFmode && out_n == 2
5610 	  && in_mode == DFmode && in_n == 2)
5611 	return rs6000_builtin_decls[RS6000_BIF_XVRDPI];
5612       if (VECTOR_UNIT_VSX_P (V4SFmode)
5613 	  && flag_unsafe_math_optimizations
5614 	  && out_mode == SFmode && out_n == 4
5615 	  && in_mode == SFmode && in_n == 4)
5616 	return rs6000_builtin_decls[RS6000_BIF_XVRSPI];
5617       break;
5618     CASE_CFN_RINT:
5619       if (VECTOR_UNIT_VSX_P (V2DFmode)
5620 	  && !flag_trapping_math
5621 	  && out_mode == DFmode && out_n == 2
5622 	  && in_mode == DFmode && in_n == 2)
5623 	return rs6000_builtin_decls[RS6000_BIF_XVRDPIC];
5624       if (VECTOR_UNIT_VSX_P (V4SFmode)
5625 	  && !flag_trapping_math
5626 	  && out_mode == SFmode && out_n == 4
5627 	  && in_mode == SFmode && in_n == 4)
5628 	return rs6000_builtin_decls[RS6000_BIF_XVRSPIC];
5629       break;
5630     default:
5631       break;
5632     }
5633 
5634   /* Generate calls to libmass if appropriate.  */
5635   if (rs6000_veclib_handler)
5636     return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5637 
5638   return NULL_TREE;
5639 }
5640 
5641 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5642    library with vectorized intrinsics.  */
5643 
5644 static tree
rs6000_builtin_vectorized_libmass(combined_fn fn,tree type_out,tree type_in)5645 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5646 				   tree type_in)
5647 {
5648   char name[32];
5649   const char *suffix = NULL;
5650   tree fntype, new_fndecl, bdecl = NULL_TREE;
5651   int n_args = 1;
5652   const char *bname;
5653   machine_mode el_mode, in_mode;
5654   int n, in_n;
5655 
5656   /* Libmass is suitable for unsafe math only as it does not correctly support
5657      parts of IEEE with the required precision such as denormals.  Only support
5658      it if we have VSX to use the simd d2 or f4 functions.
5659      XXX: Add variable length support.  */
5660   if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5661     return NULL_TREE;
5662 
5663   el_mode = TYPE_MODE (TREE_TYPE (type_out));
5664   n = TYPE_VECTOR_SUBPARTS (type_out);
5665   in_mode = TYPE_MODE (TREE_TYPE (type_in));
5666   in_n = TYPE_VECTOR_SUBPARTS (type_in);
5667   if (el_mode != in_mode
5668       || n != in_n)
5669     return NULL_TREE;
5670 
5671   switch (fn)
5672     {
5673     CASE_CFN_ATAN2:
5674     CASE_CFN_HYPOT:
5675     CASE_CFN_POW:
5676       n_args = 2;
5677       gcc_fallthrough ();
5678 
5679     CASE_CFN_ACOS:
5680     CASE_CFN_ACOSH:
5681     CASE_CFN_ASIN:
5682     CASE_CFN_ASINH:
5683     CASE_CFN_ATAN:
5684     CASE_CFN_ATANH:
5685     CASE_CFN_CBRT:
5686     CASE_CFN_COS:
5687     CASE_CFN_COSH:
5688     CASE_CFN_ERF:
5689     CASE_CFN_ERFC:
5690     CASE_CFN_EXP2:
5691     CASE_CFN_EXP:
5692     CASE_CFN_EXPM1:
5693     CASE_CFN_LGAMMA:
5694     CASE_CFN_LOG10:
5695     CASE_CFN_LOG1P:
5696     CASE_CFN_LOG2:
5697     CASE_CFN_LOG:
5698     CASE_CFN_SIN:
5699     CASE_CFN_SINH:
5700     CASE_CFN_SQRT:
5701     CASE_CFN_TAN:
5702     CASE_CFN_TANH:
5703       if (el_mode == DFmode && n == 2)
5704 	{
5705 	  bdecl = mathfn_built_in (double_type_node, fn);
5706 	  suffix = "d2";				/* pow -> powd2 */
5707 	}
5708       else if (el_mode == SFmode && n == 4)
5709 	{
5710 	  bdecl = mathfn_built_in (float_type_node, fn);
5711 	  suffix = "4";					/* powf -> powf4 */
5712 	}
5713       else
5714 	return NULL_TREE;
5715       if (!bdecl)
5716 	return NULL_TREE;
5717       break;
5718 
5719     default:
5720       return NULL_TREE;
5721     }
5722 
5723   gcc_assert (suffix != NULL);
5724   bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5725   if (!bname)
5726     return NULL_TREE;
5727 
5728   strcpy (name, bname + strlen ("__builtin_"));
5729   strcat (name, suffix);
5730 
5731   if (n_args == 1)
5732     fntype = build_function_type_list (type_out, type_in, NULL);
5733   else if (n_args == 2)
5734     fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5735   else
5736     gcc_unreachable ();
5737 
5738   /* Build a function declaration for the vectorized function.  */
5739   new_fndecl = build_decl (BUILTINS_LOCATION,
5740 			   FUNCTION_DECL, get_identifier (name), fntype);
5741   TREE_PUBLIC (new_fndecl) = 1;
5742   DECL_EXTERNAL (new_fndecl) = 1;
5743   DECL_IS_NOVOPS (new_fndecl) = 1;
5744   TREE_READONLY (new_fndecl) = 1;
5745 
5746   return new_fndecl;
5747 }
5748 
5749 
5750 /* Default CPU string for rs6000*_file_start functions.  */
5751 static const char *rs6000_default_cpu;
5752 
5753 #ifdef USING_ELFOS_H
5754 const char *rs6000_machine;
5755 
5756 const char *
rs6000_machine_from_flags(void)5757 rs6000_machine_from_flags (void)
5758 {
5759   /* e300 and e500 */
5760   if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3)
5761     return "e300";
5762   if (rs6000_cpu == PROCESSOR_PPC8540 || rs6000_cpu == PROCESSOR_PPC8548)
5763     return "e500";
5764   if (rs6000_cpu == PROCESSOR_PPCE500MC)
5765     return "e500mc";
5766   if (rs6000_cpu == PROCESSOR_PPCE500MC64)
5767     return "e500mc64";
5768   if (rs6000_cpu == PROCESSOR_PPCE5500)
5769     return "e5500";
5770   if (rs6000_cpu == PROCESSOR_PPCE6500)
5771     return "e6500";
5772 
5773   /* 400 series */
5774   if (rs6000_cpu == PROCESSOR_PPC403)
5775     return "\"403\"";
5776   if (rs6000_cpu == PROCESSOR_PPC405)
5777     return "\"405\"";
5778   if (rs6000_cpu == PROCESSOR_PPC440)
5779     return "\"440\"";
5780   if (rs6000_cpu == PROCESSOR_PPC476)
5781     return "\"476\"";
5782 
5783   /* A2 */
5784   if (rs6000_cpu == PROCESSOR_PPCA2)
5785     return "a2";
5786 
5787   /* Cell BE */
5788   if (rs6000_cpu == PROCESSOR_CELL)
5789     return "cell";
5790 
5791   /* Titan */
5792   if (rs6000_cpu == PROCESSOR_TITAN)
5793     return "titan";
5794 
5795   /* 500 series and 800 series */
5796   if (rs6000_cpu == PROCESSOR_MPCCORE)
5797     return "\"821\"";
5798 
5799 #if 0
5800   /* This (and ppc64 below) are disabled here (for now at least) because
5801      PROCESSOR_POWERPC, PROCESSOR_POWERPC64, and PROCESSOR_COMMON
5802      are #define'd as some of these.  Untangling that is a job for later.  */
5803 
5804   /* 600 series and 700 series, "classic" */
5805   if (rs6000_cpu == PROCESSOR_PPC601 || rs6000_cpu == PROCESSOR_PPC603
5806       || rs6000_cpu == PROCESSOR_PPC604 || rs6000_cpu == PROCESSOR_PPC604e
5807       || rs6000_cpu == PROCESSOR_PPC750)
5808     return "ppc";
5809 #endif
5810 
5811   /* Classic with AltiVec, "G4" */
5812   if (rs6000_cpu == PROCESSOR_PPC7400 || rs6000_cpu == PROCESSOR_PPC7450)
5813     return "\"7450\"";
5814 
5815 #if 0
5816   /* The older 64-bit CPUs */
5817   if (rs6000_cpu == PROCESSOR_PPC620 || rs6000_cpu == PROCESSOR_PPC630
5818       || rs6000_cpu == PROCESSOR_RS64A)
5819     return "ppc64";
5820 #endif
5821 
5822   HOST_WIDE_INT flags = rs6000_isa_flags;
5823 
5824   /* Disable the flags that should never influence the .machine selection.  */
5825   flags &= ~(OPTION_MASK_PPC_GFXOPT | OPTION_MASK_PPC_GPOPT | OPTION_MASK_ISEL);
5826 
5827   if ((flags & (ISA_3_1_MASKS_SERVER & ~ISA_3_0_MASKS_SERVER)) != 0)
5828     return "power10";
5829   if ((flags & (ISA_3_0_MASKS_SERVER & ~ISA_2_7_MASKS_SERVER)) != 0)
5830     return "power9";
5831   if ((flags & (ISA_2_7_MASKS_SERVER & ~ISA_2_6_MASKS_SERVER)) != 0)
5832     return "power8";
5833   if ((flags & (ISA_2_6_MASKS_SERVER & ~ISA_2_5_MASKS_SERVER)) != 0)
5834     return "power7";
5835   if ((flags & (ISA_2_5_MASKS_SERVER & ~ISA_2_4_MASKS)) != 0)
5836     return "power6";
5837   if ((flags & (ISA_2_4_MASKS & ~ISA_2_1_MASKS)) != 0)
5838     return "power5";
5839   if ((flags & ISA_2_1_MASKS) != 0)
5840     return "power4";
5841   if ((flags & OPTION_MASK_POWERPC64) != 0)
5842     return "ppc64";
5843   return "ppc";
5844 }
5845 
5846 void
emit_asm_machine(void)5847 emit_asm_machine (void)
5848 {
5849   fprintf (asm_out_file, "\t.machine %s\n", rs6000_machine);
5850 }
5851 #endif
5852 
5853 /* Do anything needed at the start of the asm file.  */
5854 
5855 static void
rs6000_file_start(void)5856 rs6000_file_start (void)
5857 {
5858   char buffer[80];
5859   const char *start = buffer;
5860   FILE *file = asm_out_file;
5861 
5862   rs6000_default_cpu = TARGET_CPU_DEFAULT;
5863 
5864   default_file_start ();
5865 
5866   if (flag_verbose_asm)
5867     {
5868       sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5869 
5870       if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5871 	{
5872 	  fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5873 	  start = "";
5874 	}
5875 
5876       if (OPTION_SET_P (rs6000_cpu_index))
5877 	{
5878 	  fprintf (file, "%s -mcpu=%s", start,
5879 		   processor_target_table[rs6000_cpu_index].name);
5880 	  start = "";
5881 	}
5882 
5883       if (OPTION_SET_P (rs6000_tune_index))
5884 	{
5885 	  fprintf (file, "%s -mtune=%s", start,
5886 		   processor_target_table[rs6000_tune_index].name);
5887 	  start = "";
5888 	}
5889 
5890       if (PPC405_ERRATUM77)
5891 	{
5892 	  fprintf (file, "%s PPC405CR_ERRATUM77", start);
5893 	  start = "";
5894 	}
5895 
5896 #ifdef USING_ELFOS_H
5897       switch (rs6000_sdata)
5898 	{
5899 	case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5900 	case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5901 	case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5902 	case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5903 	}
5904 
5905       if (rs6000_sdata && g_switch_value)
5906 	{
5907 	  fprintf (file, "%s -G %d", start,
5908 		   g_switch_value);
5909 	  start = "";
5910 	}
5911 #endif
5912 
5913       if (*start == '\0')
5914 	putc ('\n', file);
5915     }
5916 
5917 #ifdef USING_ELFOS_H
5918   rs6000_machine = rs6000_machine_from_flags ();
5919   emit_asm_machine ();
5920 #endif
5921 
5922   if (DEFAULT_ABI == ABI_ELFv2)
5923     fprintf (file, "\t.abiversion 2\n");
5924 }
5925 
5926 
5927 /* Return nonzero if this function is known to have a null epilogue.  */
5928 
5929 int
direct_return(void)5930 direct_return (void)
5931 {
5932   if (reload_completed)
5933     {
5934       rs6000_stack_t *info = rs6000_stack_info ();
5935 
5936       if (info->first_gp_reg_save == 32
5937 	  && info->first_fp_reg_save == 64
5938 	  && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
5939 	  && ! info->lr_save_p
5940 	  && ! info->cr_save_p
5941 	  && info->vrsave_size == 0
5942 	  && ! info->push_p)
5943 	return 1;
5944     }
5945 
5946   return 0;
5947 }
5948 
5949 /* Helper for num_insns_constant.  Calculate number of instructions to
5950    load VALUE to a single gpr using combinations of addi, addis, ori,
5951    oris, sldi and rldimi instructions.  */
5952 
5953 static int
num_insns_constant_gpr(HOST_WIDE_INT value)5954 num_insns_constant_gpr (HOST_WIDE_INT value)
5955 {
5956   /* signed constant loadable with addi */
5957   if (SIGNED_INTEGER_16BIT_P (value))
5958     return 1;
5959 
5960   /* constant loadable with addis */
5961   else if ((value & 0xffff) == 0
5962 	   && (value >> 31 == -1 || value >> 31 == 0))
5963     return 1;
5964 
5965   /* PADDI can support up to 34 bit signed integers.  */
5966   else if (TARGET_PREFIXED && SIGNED_INTEGER_34BIT_P (value))
5967     return 1;
5968 
5969   else if (TARGET_POWERPC64)
5970     {
5971       HOST_WIDE_INT low  = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
5972       HOST_WIDE_INT high = value >> 31;
5973 
5974       if (high == 0 || high == -1)
5975 	return 2;
5976 
5977       high >>= 1;
5978 
5979       if (low == 0 || low == high)
5980 	return num_insns_constant_gpr (high) + 1;
5981       else if (high == 0)
5982 	return num_insns_constant_gpr (low) + 1;
5983       else
5984 	return (num_insns_constant_gpr (high)
5985 		+ num_insns_constant_gpr (low) + 1);
5986     }
5987 
5988   else
5989     return 2;
5990 }
5991 
5992 /* Helper for num_insns_constant.  Allow constants formed by the
5993    num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
5994    and handle modes that require multiple gprs.  */
5995 
5996 static int
num_insns_constant_multi(HOST_WIDE_INT value,machine_mode mode)5997 num_insns_constant_multi (HOST_WIDE_INT value, machine_mode mode)
5998 {
5999   int nregs = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6000   int total = 0;
6001   while (nregs-- > 0)
6002     {
6003       HOST_WIDE_INT low = sext_hwi (value, BITS_PER_WORD);
6004       int insns = num_insns_constant_gpr (low);
6005       if (insns > 2
6006 	  /* We won't get more than 2 from num_insns_constant_gpr
6007 	     except when TARGET_POWERPC64 and mode is DImode or
6008 	     wider, so the register mode must be DImode.  */
6009 	  && rs6000_is_valid_and_mask (GEN_INT (low), DImode))
6010 	insns = 2;
6011       total += insns;
6012       /* If BITS_PER_WORD is the number of bits in HOST_WIDE_INT, doing
6013 	 it all at once would be UB. */
6014       value >>= (BITS_PER_WORD - 1);
6015       value >>= 1;
6016     }
6017   return total;
6018 }
6019 
6020 /* Return the number of instructions it takes to form a constant in as
6021    many gprs are needed for MODE.  */
6022 
6023 int
num_insns_constant(rtx op,machine_mode mode)6024 num_insns_constant (rtx op, machine_mode mode)
6025 {
6026   HOST_WIDE_INT val;
6027 
6028   switch (GET_CODE (op))
6029     {
6030     case CONST_INT:
6031       val = INTVAL (op);
6032       break;
6033 
6034     case CONST_WIDE_INT:
6035       {
6036 	int insns = 0;
6037 	for (int i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
6038 	  insns += num_insns_constant_multi (CONST_WIDE_INT_ELT (op, i),
6039 					     DImode);
6040 	return insns;
6041       }
6042 
6043     case CONST_DOUBLE:
6044       {
6045 	const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
6046 
6047 	if (mode == SFmode || mode == SDmode)
6048 	  {
6049 	    long l;
6050 
6051 	    if (mode == SDmode)
6052 	      REAL_VALUE_TO_TARGET_DECIMAL32 (*rv, l);
6053 	    else
6054 	      REAL_VALUE_TO_TARGET_SINGLE (*rv, l);
6055 	    /* See the first define_split in rs6000.md handling a
6056 	       const_double_operand.  */
6057 	    val = l;
6058 	    mode = SImode;
6059 	  }
6060 	else if (mode == DFmode || mode == DDmode)
6061 	  {
6062 	    long l[2];
6063 
6064 	    if (mode == DDmode)
6065 	      REAL_VALUE_TO_TARGET_DECIMAL64 (*rv, l);
6066 	    else
6067 	      REAL_VALUE_TO_TARGET_DOUBLE (*rv, l);
6068 
6069 	    /* See the second (32-bit) and third (64-bit) define_split
6070 	       in rs6000.md handling a const_double_operand.  */
6071 	    val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 1] << 32;
6072 	    val |= l[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffffUL;
6073 	    mode = DImode;
6074 	  }
6075 	else if (mode == TFmode || mode == TDmode
6076 		 || mode == KFmode || mode == IFmode)
6077 	  {
6078 	    long l[4];
6079 	    int insns;
6080 
6081 	    if (mode == TDmode)
6082 	      REAL_VALUE_TO_TARGET_DECIMAL128 (*rv, l);
6083 	    else
6084 	      REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv, l);
6085 
6086 	    val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 3] << 32;
6087 	    val |= l[WORDS_BIG_ENDIAN ? 1 : 2] & 0xffffffffUL;
6088 	    insns = num_insns_constant_multi (val, DImode);
6089 	    val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 2 : 1] << 32;
6090 	    val |= l[WORDS_BIG_ENDIAN ? 3 : 0] & 0xffffffffUL;
6091 	    insns += num_insns_constant_multi (val, DImode);
6092 	    return insns;
6093 	  }
6094 	else
6095 	  gcc_unreachable ();
6096       }
6097       break;
6098 
6099     default:
6100       gcc_unreachable ();
6101     }
6102 
6103   return num_insns_constant_multi (val, mode);
6104 }
6105 
6106 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6107    If the mode of OP is MODE_VECTOR_INT, this simply returns the
6108    corresponding element of the vector, but for V4SFmode, the
6109    corresponding "float" is interpreted as an SImode integer.  */
6110 
6111 HOST_WIDE_INT
const_vector_elt_as_int(rtx op,unsigned int elt)6112 const_vector_elt_as_int (rtx op, unsigned int elt)
6113 {
6114   rtx tmp;
6115 
6116   /* We can't handle V2DImode and V2DFmode vector constants here yet.  */
6117   gcc_assert (GET_MODE (op) != V2DImode
6118 	      && GET_MODE (op) != V2DFmode);
6119 
6120   tmp = CONST_VECTOR_ELT (op, elt);
6121   if (GET_MODE (op) == V4SFmode)
6122     tmp = gen_lowpart (SImode, tmp);
6123   return INTVAL (tmp);
6124 }
6125 
6126 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6127    or vspltisw instruction.  OP is a CONST_VECTOR.  Which instruction is used
6128    depends on STEP and COPIES, one of which will be 1.  If COPIES > 1,
6129    all items are set to the same value and contain COPIES replicas of the
6130    vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6131    operand and the others are set to the value of the operand's msb.  */
6132 
6133 static bool
vspltis_constant(rtx op,unsigned step,unsigned copies)6134 vspltis_constant (rtx op, unsigned step, unsigned copies)
6135 {
6136   machine_mode mode = GET_MODE (op);
6137   machine_mode inner = GET_MODE_INNER (mode);
6138 
6139   unsigned i;
6140   unsigned nunits;
6141   unsigned bitsize;
6142   unsigned mask;
6143 
6144   HOST_WIDE_INT val;
6145   HOST_WIDE_INT splat_val;
6146   HOST_WIDE_INT msb_val;
6147 
6148   if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
6149     return false;
6150 
6151   nunits = GET_MODE_NUNITS (mode);
6152   bitsize = GET_MODE_BITSIZE (inner);
6153   mask = GET_MODE_MASK (inner);
6154 
6155   val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6156   splat_val = val;
6157   msb_val = val >= 0 ? 0 : -1;
6158 
6159   if (val == 0 && step > 1)
6160     {
6161       /* Special case for loading most significant bit with step > 1.
6162 	 In that case, match 0s in all but step-1s elements, where match
6163 	 EASY_VECTOR_MSB.  */
6164       for (i = 1; i < nunits; ++i)
6165 	{
6166 	  unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6167 	  HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6168 	  if ((i & (step - 1)) == step - 1)
6169 	    {
6170 	      if (!EASY_VECTOR_MSB (elt_val, inner))
6171 		break;
6172 	    }
6173 	  else if (elt_val)
6174 	    break;
6175 	}
6176       if (i == nunits)
6177 	return true;
6178     }
6179 
6180   /* Construct the value to be splatted, if possible.  If not, return 0.  */
6181   for (i = 2; i <= copies; i *= 2)
6182     {
6183       HOST_WIDE_INT small_val;
6184       bitsize /= 2;
6185       small_val = splat_val >> bitsize;
6186       mask >>= bitsize;
6187       if (splat_val != ((HOST_WIDE_INT)
6188           ((unsigned HOST_WIDE_INT) small_val << bitsize)
6189           | (small_val & mask)))
6190 	return false;
6191       splat_val = small_val;
6192       inner = smallest_int_mode_for_size (bitsize);
6193     }
6194 
6195   /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw].  */
6196   if (EASY_VECTOR_15 (splat_val))
6197     ;
6198 
6199   /* Also check if we can splat, and then add the result to itself.  Do so if
6200      the value is positive, of if the splat instruction is using OP's mode;
6201      for splat_val < 0, the splat and the add should use the same mode.  */
6202   else if (EASY_VECTOR_15_ADD_SELF (splat_val)
6203            && (splat_val >= 0 || (step == 1 && copies == 1)))
6204     ;
6205 
6206   /* Also check if are loading up the most significant bit which can be done by
6207      loading up -1 and shifting the value left by -1.  Only do this for
6208      step 1 here, for larger steps it is done earlier.  */
6209   else if (EASY_VECTOR_MSB (splat_val, inner) && step == 1)
6210     ;
6211 
6212   else
6213     return false;
6214 
6215   /* Check if VAL is present in every STEP-th element, and the
6216      other elements are filled with its most significant bit.  */
6217   for (i = 1; i < nunits; ++i)
6218     {
6219       HOST_WIDE_INT desired_val;
6220       unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6221       if ((i & (step - 1)) == 0)
6222 	desired_val = val;
6223       else
6224 	desired_val = msb_val;
6225 
6226       if (desired_val != const_vector_elt_as_int (op, elt))
6227 	return false;
6228     }
6229 
6230   return true;
6231 }
6232 
6233 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6234    instruction, filling in the bottom elements with 0 or -1.
6235 
6236    Return 0 if the constant cannot be generated with VSLDOI.  Return positive
6237    for the number of zeroes to shift in, or negative for the number of 0xff
6238    bytes to shift in.
6239 
6240    OP is a CONST_VECTOR.  */
6241 
6242 int
vspltis_shifted(rtx op)6243 vspltis_shifted (rtx op)
6244 {
6245   machine_mode mode = GET_MODE (op);
6246   machine_mode inner = GET_MODE_INNER (mode);
6247 
6248   unsigned i, j;
6249   unsigned nunits;
6250   unsigned mask;
6251 
6252   HOST_WIDE_INT val;
6253 
6254   if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
6255     return false;
6256 
6257   /* We need to create pseudo registers to do the shift, so don't recognize
6258      shift vector constants after reload.  Don't match it even before RA
6259      after split1 is done, because there won't be further splitting pass
6260      before RA to do the splitting.  */
6261   if (!can_create_pseudo_p ()
6262       || (cfun->curr_properties & PROP_rtl_split_insns))
6263     return false;
6264 
6265   nunits = GET_MODE_NUNITS (mode);
6266   mask = GET_MODE_MASK (inner);
6267 
6268   val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
6269 
6270   /* Check if the value can really be the operand of a vspltis[bhw].  */
6271   if (EASY_VECTOR_15 (val))
6272     ;
6273 
6274   /* Also check if we are loading up the most significant bit which can be done
6275      by loading up -1 and shifting the value left by -1.  */
6276   else if (EASY_VECTOR_MSB (val, inner))
6277     ;
6278 
6279   else
6280     return 0;
6281 
6282   /* Check if VAL is present in every STEP-th element until we find elements
6283      that are 0 or all 1 bits.  */
6284   for (i = 1; i < nunits; ++i)
6285     {
6286       unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
6287       HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6288 
6289       /* If the value isn't the splat value, check for the remaining elements
6290 	 being 0/-1.  */
6291       if (val != elt_val)
6292 	{
6293 	  if (elt_val == 0)
6294 	    {
6295 	      for (j = i+1; j < nunits; ++j)
6296 		{
6297 		  unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6298 		  if (const_vector_elt_as_int (op, elt2) != 0)
6299 		    return 0;
6300 		}
6301 
6302 	      return (nunits - i) * GET_MODE_SIZE (inner);
6303 	    }
6304 
6305 	  else if ((elt_val & mask) == mask)
6306 	    {
6307 	      for (j = i+1; j < nunits; ++j)
6308 		{
6309 		  unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6310 		  if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
6311 		    return 0;
6312 		}
6313 
6314 	      return -((nunits - i) * GET_MODE_SIZE (inner));
6315 	    }
6316 
6317 	  else
6318 	    return 0;
6319 	}
6320     }
6321 
6322   /* If all elements are equal, we don't need to do VSLDOI.  */
6323   return 0;
6324 }
6325 
6326 
6327 /* Return non-zero (element mode byte size) if OP is of the given MODE
6328    and can be synthesized with a vspltisb, vspltish or vspltisw.  */
6329 
6330 int
easy_altivec_constant(rtx op,machine_mode mode)6331 easy_altivec_constant (rtx op, machine_mode mode)
6332 {
6333   unsigned step, copies;
6334 
6335   if (mode == VOIDmode)
6336     mode = GET_MODE (op);
6337   else if (mode != GET_MODE (op))
6338     return 0;
6339 
6340   /* V2DI/V2DF was added with VSX.  Only allow 0 and all 1's as easy
6341      constants.  */
6342   if (mode == V2DFmode)
6343     return zero_constant (op, mode) ? 8 : 0;
6344 
6345   else if (mode == V2DImode)
6346     {
6347       if (!CONST_INT_P (CONST_VECTOR_ELT (op, 0))
6348 	  || !CONST_INT_P (CONST_VECTOR_ELT (op, 1)))
6349 	return 0;
6350 
6351       if (zero_constant (op, mode))
6352 	return 8;
6353 
6354       if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
6355 	  && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
6356 	return 8;
6357 
6358       return 0;
6359     }
6360 
6361   /* V1TImode is a special container for TImode.  Ignore for now.  */
6362   else if (mode == V1TImode)
6363     return 0;
6364 
6365   /* Start with a vspltisw.  */
6366   step = GET_MODE_NUNITS (mode) / 4;
6367   copies = 1;
6368 
6369   if (vspltis_constant (op, step, copies))
6370     return 4;
6371 
6372   /* Then try with a vspltish.  */
6373   if (step == 1)
6374     copies <<= 1;
6375   else
6376     step >>= 1;
6377 
6378   if (vspltis_constant (op, step, copies))
6379     return 2;
6380 
6381   /* And finally a vspltisb.  */
6382   if (step == 1)
6383     copies <<= 1;
6384   else
6385     step >>= 1;
6386 
6387   if (vspltis_constant (op, step, copies))
6388     return 1;
6389 
6390   if (vspltis_shifted (op) != 0)
6391     return GET_MODE_SIZE (GET_MODE_INNER (mode));
6392 
6393   return 0;
6394 }
6395 
6396 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6397    result is OP.  Abort if it is not possible.  */
6398 
6399 rtx
gen_easy_altivec_constant(rtx op)6400 gen_easy_altivec_constant (rtx op)
6401 {
6402   machine_mode mode = GET_MODE (op);
6403   int nunits = GET_MODE_NUNITS (mode);
6404   rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6405   unsigned step = nunits / 4;
6406   unsigned copies = 1;
6407 
6408   /* Start with a vspltisw.  */
6409   if (vspltis_constant (op, step, copies))
6410     return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6411 
6412   /* Then try with a vspltish.  */
6413   if (step == 1)
6414     copies <<= 1;
6415   else
6416     step >>= 1;
6417 
6418   if (vspltis_constant (op, step, copies))
6419     return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6420 
6421   /* And finally a vspltisb.  */
6422   if (step == 1)
6423     copies <<= 1;
6424   else
6425     step >>= 1;
6426 
6427   if (vspltis_constant (op, step, copies))
6428     return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6429 
6430   gcc_unreachable ();
6431 }
6432 
6433 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6434    instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6435 
6436    Return the number of instructions needed (1 or 2) into the address pointed
6437    via NUM_INSNS_PTR.
6438 
6439    Return the constant that is being split via CONSTANT_PTR.  */
6440 
6441 bool
xxspltib_constant_p(rtx op,machine_mode mode,int * num_insns_ptr,int * constant_ptr)6442 xxspltib_constant_p (rtx op,
6443 		     machine_mode mode,
6444 		     int *num_insns_ptr,
6445 		     int *constant_ptr)
6446 {
6447   size_t nunits = GET_MODE_NUNITS (mode);
6448   size_t i;
6449   HOST_WIDE_INT value;
6450   rtx element;
6451 
6452   /* Set the returned values to out of bound values.  */
6453   *num_insns_ptr = -1;
6454   *constant_ptr = 256;
6455 
6456   if (!TARGET_P9_VECTOR)
6457     return false;
6458 
6459   if (mode == VOIDmode)
6460     mode = GET_MODE (op);
6461 
6462   else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6463     return false;
6464 
6465   /* Handle (vec_duplicate <constant>).  */
6466   if (GET_CODE (op) == VEC_DUPLICATE)
6467     {
6468       if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6469 	  && mode != V2DImode)
6470 	return false;
6471 
6472       element = XEXP (op, 0);
6473       if (!CONST_INT_P (element))
6474 	return false;
6475 
6476       value = INTVAL (element);
6477       if (!IN_RANGE (value, -128, 127))
6478 	return false;
6479     }
6480 
6481   /* Handle (const_vector [...]).  */
6482   else if (GET_CODE (op) == CONST_VECTOR)
6483     {
6484       if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6485 	  && mode != V2DImode)
6486 	return false;
6487 
6488       element = CONST_VECTOR_ELT (op, 0);
6489       if (!CONST_INT_P (element))
6490 	return false;
6491 
6492       value = INTVAL (element);
6493       if (!IN_RANGE (value, -128, 127))
6494 	return false;
6495 
6496       for (i = 1; i < nunits; i++)
6497 	{
6498 	  element = CONST_VECTOR_ELT (op, i);
6499 	  if (!CONST_INT_P (element))
6500 	    return false;
6501 
6502 	  if (value != INTVAL (element))
6503 	    return false;
6504 	}
6505     }
6506 
6507   /* Handle integer constants being loaded into the upper part of the VSX
6508      register as a scalar.  If the value isn't 0/-1, only allow it if the mode
6509      can go in Altivec registers.  Prefer VSPLTISW/VUPKHSW over XXSPLITIB.  */
6510   else if (CONST_INT_P (op))
6511     {
6512       if (!SCALAR_INT_MODE_P (mode))
6513 	return false;
6514 
6515       value = INTVAL (op);
6516       if (!IN_RANGE (value, -128, 127))
6517 	return false;
6518 
6519       if (!IN_RANGE (value, -1, 0))
6520 	{
6521 	  if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6522 	    return false;
6523 
6524 	  if (EASY_VECTOR_15 (value))
6525 	    return false;
6526 	}
6527     }
6528 
6529   else
6530     return false;
6531 
6532   /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6533      sign extend.  Special case 0/-1 to allow getting any VSX register instead
6534      of an Altivec register.  */
6535   if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6536       && EASY_VECTOR_15 (value))
6537     return false;
6538 
6539   /* Return # of instructions and the constant byte for XXSPLTIB.  */
6540   if (mode == V16QImode)
6541     *num_insns_ptr = 1;
6542 
6543   else if (IN_RANGE (value, -1, 0))
6544     *num_insns_ptr = 1;
6545 
6546   /* Do not generate XXSPLTIB and a sign extend operation if we can generate a
6547      single XXSPLTIW or XXSPLTIDP instruction.  */
6548   else if (vsx_prefixed_constant (op, mode))
6549     return false;
6550 
6551   /* Return XXSPLITB followed by a sign extend operation to convert the
6552      constant to V8HImode or V4SImode.  */
6553   else
6554     *num_insns_ptr = 2;
6555 
6556   *constant_ptr = (int) value;
6557   return true;
6558 }
6559 
6560 const char *
output_vec_const_move(rtx * operands)6561 output_vec_const_move (rtx *operands)
6562 {
6563   int shift;
6564   machine_mode mode;
6565   rtx dest, vec;
6566 
6567   dest = operands[0];
6568   vec = operands[1];
6569   mode = GET_MODE (dest);
6570 
6571   if (TARGET_VSX)
6572     {
6573       bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6574       int xxspltib_value = 256;
6575       int num_insns = -1;
6576 
6577       if (zero_constant (vec, mode))
6578 	{
6579 	  if (TARGET_P9_VECTOR)
6580 	    return "xxspltib %x0,0";
6581 
6582 	  else if (dest_vmx_p)
6583 	    return "vspltisw %0,0";
6584 
6585 	  else
6586 	    return "xxlxor %x0,%x0,%x0";
6587 	}
6588 
6589       if (all_ones_constant (vec, mode))
6590 	{
6591 	  if (TARGET_P9_VECTOR)
6592 	    return "xxspltib %x0,255";
6593 
6594 	  else if (dest_vmx_p)
6595 	    return "vspltisw %0,-1";
6596 
6597 	  else if (TARGET_P8_VECTOR)
6598 	    return "xxlorc %x0,%x0,%x0";
6599 
6600 	  else
6601 	    gcc_unreachable ();
6602 	}
6603 
6604       vec_const_128bit_type vsx_const;
6605       if (TARGET_POWER10 && vec_const_128bit_to_bytes (vec, mode, &vsx_const))
6606 	{
6607 	  unsigned imm = constant_generates_lxvkq (&vsx_const);
6608 	  if (imm)
6609 	    {
6610 	      operands[2] = GEN_INT (imm);
6611 	      return "lxvkq %x0,%2";
6612 	    }
6613 
6614 	  imm = constant_generates_xxspltiw (&vsx_const);
6615 	  if (imm)
6616 	    {
6617 	      operands[2] = GEN_INT (imm);
6618 	      return "xxspltiw %x0,%2";
6619 	    }
6620 
6621 	  imm = constant_generates_xxspltidp (&vsx_const);
6622 	  if (imm)
6623 	    {
6624 	      operands[2] = GEN_INT (imm);
6625 	      return "xxspltidp %x0,%2";
6626 	    }
6627 	}
6628 
6629       if (TARGET_P9_VECTOR
6630 	  && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6631 	{
6632 	  if (num_insns == 1)
6633 	    {
6634 	      operands[2] = GEN_INT (xxspltib_value & 0xff);
6635 	      return "xxspltib %x0,%2";
6636 	    }
6637 
6638 	  return "#";
6639 	}
6640     }
6641 
6642   if (TARGET_ALTIVEC)
6643     {
6644       rtx splat_vec;
6645 
6646       gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6647       if (zero_constant (vec, mode))
6648 	return "vspltisw %0,0";
6649 
6650       if (all_ones_constant (vec, mode))
6651 	return "vspltisw %0,-1";
6652 
6653       /* Do we need to construct a value using VSLDOI?  */
6654       shift = vspltis_shifted (vec);
6655       if (shift != 0)
6656 	return "#";
6657 
6658       splat_vec = gen_easy_altivec_constant (vec);
6659       gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6660       operands[1] = XEXP (splat_vec, 0);
6661       if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6662 	return "#";
6663 
6664       switch (GET_MODE (splat_vec))
6665 	{
6666 	case E_V4SImode:
6667 	  return "vspltisw %0,%1";
6668 
6669 	case E_V8HImode:
6670 	  return "vspltish %0,%1";
6671 
6672 	case E_V16QImode:
6673 	  return "vspltisb %0,%1";
6674 
6675 	default:
6676 	  gcc_unreachable ();
6677 	}
6678     }
6679 
6680   gcc_unreachable ();
6681 }
6682 
6683 /* Initialize vector TARGET to VALS.  */
6684 
6685 void
rs6000_expand_vector_init(rtx target,rtx vals)6686 rs6000_expand_vector_init (rtx target, rtx vals)
6687 {
6688   machine_mode mode = GET_MODE (target);
6689   machine_mode inner_mode = GET_MODE_INNER (mode);
6690   unsigned int n_elts = GET_MODE_NUNITS (mode);
6691   int n_var = 0, one_var = -1;
6692   bool all_same = true, all_const_zero = true;
6693   rtx x, mem;
6694   unsigned int i;
6695 
6696   for (i = 0; i < n_elts; ++i)
6697     {
6698       x = XVECEXP (vals, 0, i);
6699       if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6700 	++n_var, one_var = i;
6701       else if (x != CONST0_RTX (inner_mode))
6702 	all_const_zero = false;
6703 
6704       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6705 	all_same = false;
6706     }
6707 
6708   if (n_var == 0)
6709     {
6710       rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6711       bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6712       if ((int_vector_p || TARGET_VSX) && all_const_zero)
6713 	{
6714 	  /* Zero register.  */
6715 	  emit_move_insn (target, CONST0_RTX (mode));
6716 	  return;
6717 	}
6718       else if (int_vector_p && easy_vector_constant (const_vec, mode))
6719 	{
6720 	  /* Splat immediate.  */
6721 	  emit_insn (gen_rtx_SET (target, const_vec));
6722 	  return;
6723 	}
6724       else
6725 	{
6726 	  /* Load from constant pool.  */
6727 	  emit_move_insn (target, const_vec);
6728 	  return;
6729 	}
6730     }
6731 
6732   /* Double word values on VSX can use xxpermdi or lxvdsx.  */
6733   if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6734     {
6735       rtx op[2];
6736       size_t i;
6737       size_t num_elements = all_same ? 1 : 2;
6738       for (i = 0; i < num_elements; i++)
6739 	{
6740 	  op[i] = XVECEXP (vals, 0, i);
6741 	  /* Just in case there is a SUBREG with a smaller mode, do a
6742 	     conversion.  */
6743 	  if (GET_MODE (op[i]) != inner_mode)
6744 	    {
6745 	      rtx tmp = gen_reg_rtx (inner_mode);
6746 	      convert_move (tmp, op[i], 0);
6747 	      op[i] = tmp;
6748 	    }
6749 	  /* Allow load with splat double word.  */
6750 	  else if (MEM_P (op[i]))
6751 	    {
6752 	      if (!all_same)
6753 		op[i] = force_reg (inner_mode, op[i]);
6754 	    }
6755 	  else if (!REG_P (op[i]))
6756 	    op[i] = force_reg (inner_mode, op[i]);
6757 	}
6758 
6759       if (all_same)
6760 	{
6761 	  if (mode == V2DFmode)
6762 	    emit_insn (gen_vsx_splat_v2df (target, op[0]));
6763 	  else
6764 	    emit_insn (gen_vsx_splat_v2di (target, op[0]));
6765 	}
6766       else
6767 	{
6768 	  if (mode == V2DFmode)
6769 	    emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
6770 	  else
6771 	    emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
6772 	}
6773       return;
6774     }
6775 
6776   /* Special case initializing vector int if we are on 64-bit systems with
6777      direct move or we have the ISA 3.0 instructions.  */
6778   if (mode == V4SImode  && VECTOR_MEM_VSX_P (V4SImode)
6779       && TARGET_DIRECT_MOVE_64BIT)
6780     {
6781       if (all_same)
6782 	{
6783 	  rtx element0 = XVECEXP (vals, 0, 0);
6784 	  if (MEM_P (element0))
6785 	    element0 = rs6000_force_indexed_or_indirect_mem (element0);
6786 	  else
6787 	    element0 = force_reg (SImode, element0);
6788 
6789 	  if (TARGET_P9_VECTOR)
6790 	    emit_insn (gen_vsx_splat_v4si (target, element0));
6791 	  else
6792 	    {
6793 	      rtx tmp = gen_reg_rtx (DImode);
6794 	      emit_insn (gen_zero_extendsidi2 (tmp, element0));
6795 	      emit_insn (gen_vsx_splat_v4si_di (target, tmp));
6796 	    }
6797 	  return;
6798 	}
6799       else
6800 	{
6801 	  rtx elements[4];
6802 	  size_t i;
6803 
6804 	  for (i = 0; i < 4; i++)
6805 	    elements[i] = force_reg (SImode, XVECEXP (vals, 0, i));
6806 
6807 	  emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
6808 					elements[2], elements[3]));
6809 	  return;
6810 	}
6811     }
6812 
6813   /* With single precision floating point on VSX, know that internally single
6814      precision is actually represented as a double, and either make 2 V2DF
6815      vectors, and convert these vectors to single precision, or do one
6816      conversion, and splat the result to the other elements.  */
6817   if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
6818     {
6819       if (all_same)
6820 	{
6821 	  rtx element0 = XVECEXP (vals, 0, 0);
6822 
6823 	  if (TARGET_P9_VECTOR)
6824 	    {
6825 	      if (MEM_P (element0))
6826 		element0 = rs6000_force_indexed_or_indirect_mem (element0);
6827 
6828 	      emit_insn (gen_vsx_splat_v4sf (target, element0));
6829 	    }
6830 
6831 	  else
6832 	    {
6833 	      rtx freg = gen_reg_rtx (V4SFmode);
6834 	      rtx sreg = force_reg (SFmode, element0);
6835 	      rtx cvt  = (TARGET_XSCVDPSPN
6836 			  ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6837 			  : gen_vsx_xscvdpsp_scalar (freg, sreg));
6838 
6839 	      emit_insn (cvt);
6840 	      emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
6841 						      const0_rtx));
6842 	    }
6843 	}
6844       else
6845 	{
6846 	  if (TARGET_P8_VECTOR && TARGET_POWERPC64)
6847 	    {
6848 	      rtx tmp_sf[4];
6849 	      rtx tmp_si[4];
6850 	      rtx tmp_di[4];
6851 	      rtx mrg_di[4];
6852 	      for (i = 0; i < 4; i++)
6853 		{
6854 		  tmp_si[i] = gen_reg_rtx (SImode);
6855 		  tmp_di[i] = gen_reg_rtx (DImode);
6856 		  mrg_di[i] = gen_reg_rtx (DImode);
6857 		  tmp_sf[i] = force_reg (SFmode, XVECEXP (vals, 0, i));
6858 		  emit_insn (gen_movsi_from_sf (tmp_si[i], tmp_sf[i]));
6859 		  emit_insn (gen_zero_extendsidi2 (tmp_di[i], tmp_si[i]));
6860 		}
6861 
6862 	      if (!BYTES_BIG_ENDIAN)
6863 		{
6864 		  std::swap (tmp_di[0], tmp_di[1]);
6865 		  std::swap (tmp_di[2], tmp_di[3]);
6866 		}
6867 
6868 	      emit_insn (gen_ashldi3 (mrg_di[0], tmp_di[0], GEN_INT (32)));
6869 	      emit_insn (gen_iordi3 (mrg_di[1], mrg_di[0], tmp_di[1]));
6870 	      emit_insn (gen_ashldi3 (mrg_di[2], tmp_di[2], GEN_INT (32)));
6871 	      emit_insn (gen_iordi3 (mrg_di[3], mrg_di[2], tmp_di[3]));
6872 
6873 	      rtx tmp_v2di = gen_reg_rtx (V2DImode);
6874 	      emit_insn (gen_vsx_concat_v2di (tmp_v2di, mrg_di[1], mrg_di[3]));
6875 	      emit_move_insn (target, gen_lowpart (V4SFmode, tmp_v2di));
6876 	    }
6877 	  else
6878 	    {
6879 	      rtx dbl_even = gen_reg_rtx (V2DFmode);
6880 	      rtx dbl_odd  = gen_reg_rtx (V2DFmode);
6881 	      rtx flt_even = gen_reg_rtx (V4SFmode);
6882 	      rtx flt_odd  = gen_reg_rtx (V4SFmode);
6883 	      rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
6884 	      rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
6885 	      rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
6886 	      rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
6887 
6888 	      emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
6889 	      emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
6890 	      emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6891 	      emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6892 	      rs6000_expand_extract_even (target, flt_even, flt_odd);
6893 	    }
6894 	}
6895       return;
6896     }
6897 
6898   /* Special case initializing vector short/char that are splats if we are on
6899      64-bit systems with direct move.  */
6900   if (all_same && TARGET_DIRECT_MOVE_64BIT
6901       && (mode == V16QImode || mode == V8HImode))
6902     {
6903       rtx op0 = XVECEXP (vals, 0, 0);
6904       rtx di_tmp = gen_reg_rtx (DImode);
6905 
6906       if (!REG_P (op0))
6907 	op0 = force_reg (GET_MODE_INNER (mode), op0);
6908 
6909       if (mode == V16QImode)
6910 	{
6911 	  emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
6912 	  emit_insn (gen_vsx_vspltb_di (target, di_tmp));
6913 	  return;
6914 	}
6915 
6916       if (mode == V8HImode)
6917 	{
6918 	  emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
6919 	  emit_insn (gen_vsx_vsplth_di (target, di_tmp));
6920 	  return;
6921 	}
6922     }
6923 
6924   /* Store value to stack temp.  Load vector element.  Splat.  However, splat
6925      of 64-bit items is not supported on Altivec.  */
6926   if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
6927     {
6928       mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6929       emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
6930 		      XVECEXP (vals, 0, 0));
6931       x = gen_rtx_UNSPEC (VOIDmode,
6932 			  gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6933       emit_insn (gen_rtx_PARALLEL (VOIDmode,
6934 				   gen_rtvec (2,
6935 					      gen_rtx_SET (target, mem),
6936 					      x)));
6937       x = gen_rtx_VEC_SELECT (inner_mode, target,
6938 			      gen_rtx_PARALLEL (VOIDmode,
6939 						gen_rtvec (1, const0_rtx)));
6940       emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
6941       return;
6942     }
6943 
6944   /* One field is non-constant.  Load constant then overwrite
6945      varying field.  */
6946   if (n_var == 1)
6947     {
6948       rtx copy = copy_rtx (vals);
6949 
6950       /* Load constant part of vector, substitute neighboring value for
6951 	 varying element.  */
6952       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
6953       rs6000_expand_vector_init (target, copy);
6954 
6955       /* Insert variable.  */
6956       rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var),
6957 				GEN_INT (one_var));
6958       return;
6959     }
6960 
6961   if (TARGET_DIRECT_MOVE && (mode == V16QImode || mode == V8HImode))
6962     {
6963       rtx op[16];
6964       /* Force the values into word_mode registers.  */
6965       for (i = 0; i < n_elts; i++)
6966 	{
6967 	  rtx tmp = force_reg (inner_mode, XVECEXP (vals, 0, i));
6968 	  machine_mode tmode = TARGET_POWERPC64 ? DImode : SImode;
6969 	  op[i] = simplify_gen_subreg (tmode, tmp, inner_mode, 0);
6970 	}
6971 
6972       /* Take unsigned char big endianness on 64bit as example for below
6973 	 construction, the input values are: A, B, C, D, ..., O, P.  */
6974 
6975       if (TARGET_DIRECT_MOVE_128)
6976 	{
6977 	  /* Move to VSX register with vec_concat, each has 2 values.
6978 	     eg: vr1[0] = { xxxxxxxA, xxxxxxxB };
6979 		 vr1[1] = { xxxxxxxC, xxxxxxxD };
6980 		 ...
6981 		 vr1[7] = { xxxxxxxO, xxxxxxxP };  */
6982 	  rtx vr1[8];
6983 	  for (i = 0; i < n_elts / 2; i++)
6984 	    {
6985 	      vr1[i] = gen_reg_rtx (V2DImode);
6986 	      emit_insn (gen_vsx_concat_v2di (vr1[i], op[i * 2],
6987 					      op[i * 2 + 1]));
6988 	    }
6989 
6990 	  /* Pack vectors with 2 values into vectors with 4 values.
6991 	     eg: vr2[0] = { xxxAxxxB, xxxCxxxD };
6992 		 vr2[1] = { xxxExxxF, xxxGxxxH };
6993 		 vr2[1] = { xxxIxxxJ, xxxKxxxL };
6994 		 vr2[3] = { xxxMxxxN, xxxOxxxP };  */
6995 	  rtx vr2[4];
6996 	  for (i = 0; i < n_elts / 4; i++)
6997 	    {
6998 	      vr2[i] = gen_reg_rtx (V4SImode);
6999 	      emit_insn (gen_altivec_vpkudum (vr2[i], vr1[i * 2],
7000 					      vr1[i * 2 + 1]));
7001 	    }
7002 
7003 	  /* Pack vectors with 4 values into vectors with 8 values.
7004 	     eg: vr3[0] = { xAxBxCxD, xExFxGxH };
7005 		 vr3[1] = { xIxJxKxL, xMxNxOxP };  */
7006 	  rtx vr3[2];
7007 	  for (i = 0; i < n_elts / 8; i++)
7008 	    {
7009 	      vr3[i] = gen_reg_rtx (V8HImode);
7010 	      emit_insn (gen_altivec_vpkuwum (vr3[i], vr2[i * 2],
7011 					      vr2[i * 2 + 1]));
7012 	    }
7013 
7014 	  /* If it's V8HImode, it's done and return it. */
7015 	  if (mode == V8HImode)
7016 	    {
7017 	      emit_insn (gen_rtx_SET (target, vr3[0]));
7018 	      return;
7019 	    }
7020 
7021 	  /* Pack vectors with 8 values into 16 values.  */
7022 	  rtx res = gen_reg_rtx (V16QImode);
7023 	  emit_insn (gen_altivec_vpkuhum (res, vr3[0], vr3[1]));
7024 	  emit_insn (gen_rtx_SET (target, res));
7025 	}
7026       else
7027 	{
7028 	  rtx (*merge_v16qi) (rtx, rtx, rtx) = NULL;
7029 	  rtx (*merge_v8hi) (rtx, rtx, rtx) = NULL;
7030 	  rtx (*merge_v4si) (rtx, rtx, rtx) = NULL;
7031 	  rtx perm_idx;
7032 
7033 	  /* Set up some common gen routines and values.  */
7034 	  if (BYTES_BIG_ENDIAN)
7035 	    {
7036 	      if (mode == V16QImode)
7037 		{
7038 		  merge_v16qi = gen_altivec_vmrghb;
7039 		  merge_v8hi = gen_altivec_vmrglh;
7040 		}
7041 	      else
7042 		merge_v8hi = gen_altivec_vmrghh;
7043 
7044 	      merge_v4si = gen_altivec_vmrglw;
7045 	      perm_idx = GEN_INT (3);
7046 	    }
7047 	  else
7048 	    {
7049 	      if (mode == V16QImode)
7050 		{
7051 		  merge_v16qi = gen_altivec_vmrglb;
7052 		  merge_v8hi = gen_altivec_vmrghh;
7053 		}
7054 	      else
7055 		merge_v8hi = gen_altivec_vmrglh;
7056 
7057 	      merge_v4si = gen_altivec_vmrghw;
7058 	      perm_idx = GEN_INT (0);
7059 	    }
7060 
7061 	  /* Move to VSX register with direct move.
7062 	     eg: vr_qi[0] = { xxxxxxxA, xxxxxxxx };
7063 		 vr_qi[1] = { xxxxxxxB, xxxxxxxx };
7064 		 ...
7065 		 vr_qi[15] = { xxxxxxxP, xxxxxxxx };  */
7066 	  rtx vr_qi[16];
7067 	  for (i = 0; i < n_elts; i++)
7068 	    {
7069 	      vr_qi[i] = gen_reg_rtx (V16QImode);
7070 	      if (TARGET_POWERPC64)
7071 		emit_insn (gen_p8_mtvsrd_v16qidi2 (vr_qi[i], op[i]));
7072 	      else
7073 		emit_insn (gen_p8_mtvsrwz_v16qisi2 (vr_qi[i], op[i]));
7074 	    }
7075 
7076 	  /* Merge/move to vector short.
7077 	     eg: vr_hi[0] = { xxxxxxxx, xxxxxxAB };
7078 		 vr_hi[1] = { xxxxxxxx, xxxxxxCD };
7079 		 ...
7080 		 vr_hi[7] = { xxxxxxxx, xxxxxxOP };  */
7081 	  rtx vr_hi[8];
7082 	  for (i = 0; i < 8; i++)
7083 	    {
7084 	      rtx tmp = vr_qi[i];
7085 	      if (mode == V16QImode)
7086 		{
7087 		  tmp = gen_reg_rtx (V16QImode);
7088 		  emit_insn (merge_v16qi (tmp, vr_qi[2 * i], vr_qi[2 * i + 1]));
7089 		}
7090 	      vr_hi[i] = gen_reg_rtx (V8HImode);
7091 	      emit_move_insn (vr_hi[i], gen_lowpart (V8HImode, tmp));
7092 	    }
7093 
7094 	  /* Merge vector short to vector int.
7095 	     eg: vr_si[0] = { xxxxxxxx, xxxxABCD };
7096 		 vr_si[1] = { xxxxxxxx, xxxxEFGH };
7097 		 ...
7098 		 vr_si[3] = { xxxxxxxx, xxxxMNOP };  */
7099 	  rtx vr_si[4];
7100 	  for (i = 0; i < 4; i++)
7101 	    {
7102 	      rtx tmp = gen_reg_rtx (V8HImode);
7103 	      emit_insn (merge_v8hi (tmp, vr_hi[2 * i], vr_hi[2 * i + 1]));
7104 	      vr_si[i] = gen_reg_rtx (V4SImode);
7105 	      emit_move_insn (vr_si[i], gen_lowpart (V4SImode, tmp));
7106 	    }
7107 
7108 	  /* Merge vector int to vector long.
7109 	     eg: vr_di[0] = { xxxxxxxx, ABCDEFGH };
7110 		 vr_di[1] = { xxxxxxxx, IJKLMNOP };  */
7111 	  rtx vr_di[2];
7112 	  for (i = 0; i < 2; i++)
7113 	    {
7114 	      rtx tmp = gen_reg_rtx (V4SImode);
7115 	      emit_insn (merge_v4si (tmp, vr_si[2 * i], vr_si[2 * i + 1]));
7116 	      vr_di[i] = gen_reg_rtx (V2DImode);
7117 	      emit_move_insn (vr_di[i], gen_lowpart (V2DImode, tmp));
7118 	    }
7119 
7120 	  rtx res = gen_reg_rtx (V2DImode);
7121 	  emit_insn (gen_vsx_xxpermdi_v2di (res, vr_di[0], vr_di[1], perm_idx));
7122 	  emit_insn (gen_rtx_SET (target, gen_lowpart (mode, res)));
7123 	}
7124 
7125       return;
7126     }
7127 
7128   /* Construct the vector in memory one field at a time
7129      and load the whole vector.  */
7130   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7131   for (i = 0; i < n_elts; i++)
7132     emit_move_insn (adjust_address_nv (mem, inner_mode,
7133 				    i * GET_MODE_SIZE (inner_mode)),
7134 		    XVECEXP (vals, 0, i));
7135   emit_move_insn (target, mem);
7136 }
7137 
7138 /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
7139    is variable and also counts by vector element size for p9 and above.  */
7140 
7141 static void
rs6000_expand_vector_set_var_p9(rtx target,rtx val,rtx idx)7142 rs6000_expand_vector_set_var_p9 (rtx target, rtx val, rtx idx)
7143 {
7144   machine_mode mode = GET_MODE (target);
7145 
7146   gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx));
7147 
7148   machine_mode inner_mode = GET_MODE (val);
7149 
7150   int width = GET_MODE_SIZE (inner_mode);
7151 
7152   gcc_assert (width >= 1 && width <= 8);
7153 
7154   int shift = exact_log2 (width);
7155 
7156   machine_mode idx_mode = GET_MODE (idx);
7157 
7158   machine_mode shift_mode;
7159   /* Gen function pointers for shifting left and generation of permutation
7160      control vectors.  */
7161   rtx (*gen_ashl) (rtx, rtx, rtx);
7162   rtx (*gen_pcvr1) (rtx, rtx);
7163   rtx (*gen_pcvr2) (rtx, rtx);
7164 
7165   if (TARGET_POWERPC64)
7166     {
7167       shift_mode = DImode;
7168       gen_ashl = gen_ashldi3;
7169       gen_pcvr1 = BYTES_BIG_ENDIAN ? gen_altivec_lvsl_reg_di
7170 				   : gen_altivec_lvsr_reg_di;
7171       gen_pcvr2 = BYTES_BIG_ENDIAN ? gen_altivec_lvsr_reg_di
7172 				   : gen_altivec_lvsl_reg_di;
7173     }
7174   else
7175     {
7176       shift_mode = SImode;
7177       gen_ashl = gen_ashlsi3;
7178       gen_pcvr1 = BYTES_BIG_ENDIAN ? gen_altivec_lvsl_reg_si
7179 				   : gen_altivec_lvsr_reg_si;
7180       gen_pcvr2 = BYTES_BIG_ENDIAN ? gen_altivec_lvsr_reg_si
7181 				   : gen_altivec_lvsl_reg_si;
7182     }
7183   /* Generate the IDX for permute shift, width is the vector element size.
7184      idx = idx * width.  */
7185   rtx tmp = gen_reg_rtx (shift_mode);
7186   idx = convert_modes (shift_mode, idx_mode, idx, 1);
7187 
7188   emit_insn (gen_ashl (tmp, idx, GEN_INT (shift)));
7189 
7190   /* Generate one permutation control vector used for rotating the element
7191      at to-insert position to element zero in target vector.  lvsl is
7192      used for big endianness while lvsr is used for little endianness:
7193      lvs[lr]    v1,0,idx.  */
7194   rtx pcvr1 = gen_reg_rtx (V16QImode);
7195   emit_insn (gen_pcvr1 (pcvr1, tmp));
7196 
7197   rtx sub_target = simplify_gen_subreg (V16QImode, target, mode, 0);
7198   rtx perm1 = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target,
7199 					   pcvr1);
7200   emit_insn (perm1);
7201 
7202   /* Insert val into element 0 of target vector.  */
7203   rs6000_expand_vector_set (target, val, const0_rtx);
7204 
7205   /* Rotate back with a reversed permutation control vector generated from:
7206      lvs[rl]   v2,0,idx.  */
7207   rtx pcvr2 = gen_reg_rtx (V16QImode);
7208   emit_insn (gen_pcvr2 (pcvr2, tmp));
7209 
7210   rtx perm2 = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target,
7211 					   pcvr2);
7212   emit_insn (perm2);
7213 }
7214 
7215 /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
7216    is variable and also counts by vector element size for p7 & p8.  */
7217 
7218 static void
rs6000_expand_vector_set_var_p7(rtx target,rtx val,rtx idx)7219 rs6000_expand_vector_set_var_p7 (rtx target, rtx val, rtx idx)
7220 {
7221   machine_mode mode = GET_MODE (target);
7222 
7223   gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx));
7224 
7225   machine_mode inner_mode = GET_MODE (val);
7226   HOST_WIDE_INT mode_mask = GET_MODE_MASK (inner_mode);
7227 
7228   int width = GET_MODE_SIZE (inner_mode);
7229   gcc_assert (width >= 1 && width <= 4);
7230 
7231   int shift = exact_log2 (width);
7232 
7233   machine_mode idx_mode = GET_MODE (idx);
7234 
7235   machine_mode shift_mode;
7236   rtx (*gen_ashl)(rtx, rtx, rtx);
7237   rtx (*gen_add)(rtx, rtx, rtx);
7238   rtx (*gen_sub)(rtx, rtx, rtx);
7239   rtx (*gen_lvsl)(rtx, rtx);
7240 
7241   if (TARGET_POWERPC64)
7242     {
7243       shift_mode = DImode;
7244       gen_ashl = gen_ashldi3;
7245       gen_add = gen_adddi3;
7246       gen_sub = gen_subdi3;
7247       gen_lvsl = gen_altivec_lvsl_reg_di;
7248     }
7249   else
7250     {
7251       shift_mode = SImode;
7252       gen_ashl = gen_ashlsi3;
7253       gen_add = gen_addsi3;
7254       gen_sub = gen_subsi3;
7255       gen_lvsl = gen_altivec_lvsl_reg_si;
7256     }
7257 
7258   /*  idx = idx * width.  */
7259   rtx tmp = gen_reg_rtx (shift_mode);
7260   idx = convert_modes (shift_mode, idx_mode, idx, 1);
7261 
7262   emit_insn (gen_ashl (tmp, idx, GEN_INT (shift)));
7263 
7264   /*  For LE:  idx = idx + 8.  */
7265   if (!BYTES_BIG_ENDIAN)
7266     emit_insn (gen_add (tmp, tmp, GEN_INT (8)));
7267   else
7268     emit_insn (gen_sub (tmp, GEN_INT (24 - width), tmp));
7269 
7270   /*  lxv vs33, mask.
7271       DImode: 0xffffffffffffffff0000000000000000
7272       SImode: 0x00000000ffffffff0000000000000000
7273       HImode: 0x000000000000ffff0000000000000000.
7274       QImode: 0x00000000000000ff0000000000000000.  */
7275   rtx mask = gen_reg_rtx (V16QImode);
7276   rtx mask_v2di = gen_reg_rtx (V2DImode);
7277   rtvec v = rtvec_alloc (2);
7278   if (!BYTES_BIG_ENDIAN)
7279     {
7280       RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, 0);
7281       RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, mode_mask);
7282     }
7283   else
7284     {
7285       RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, mode_mask);
7286       RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, 0);
7287     }
7288   emit_insn (gen_vec_initv2didi (mask_v2di, gen_rtx_PARALLEL (V2DImode, v)));
7289   rtx sub_mask = simplify_gen_subreg (V16QImode, mask_v2di, V2DImode, 0);
7290   emit_insn (gen_rtx_SET (mask, sub_mask));
7291 
7292   /*  mtvsrd[wz] f0,tmp_val.  */
7293   rtx tmp_val = gen_reg_rtx (SImode);
7294   if (inner_mode == E_SFmode)
7295     if (TARGET_DIRECT_MOVE_64BIT)
7296       emit_insn (gen_movsi_from_sf (tmp_val, val));
7297     else
7298       {
7299 	rtx stack = rs6000_allocate_stack_temp (SFmode, false, true);
7300 	emit_insn (gen_movsf_hardfloat (stack, val));
7301 	rtx stack2 = copy_rtx (stack);
7302 	PUT_MODE (stack2, SImode);
7303 	emit_move_insn (tmp_val, stack2);
7304       }
7305   else
7306     tmp_val = force_reg (SImode, val);
7307 
7308   rtx val_v16qi = gen_reg_rtx (V16QImode);
7309   rtx val_v2di = gen_reg_rtx (V2DImode);
7310   rtvec vec_val = rtvec_alloc (2);
7311   if (!BYTES_BIG_ENDIAN)
7312   {
7313     RTVEC_ELT (vec_val, 0) = gen_rtx_CONST_INT (DImode, 0);
7314     RTVEC_ELT (vec_val, 1) = tmp_val;
7315   }
7316   else
7317   {
7318     RTVEC_ELT (vec_val, 0) = tmp_val;
7319     RTVEC_ELT (vec_val, 1) = gen_rtx_CONST_INT (DImode, 0);
7320   }
7321   emit_insn (
7322     gen_vec_initv2didi (val_v2di, gen_rtx_PARALLEL (V2DImode, vec_val)));
7323   rtx sub_val = simplify_gen_subreg (V16QImode, val_v2di, V2DImode, 0);
7324   emit_insn (gen_rtx_SET (val_v16qi, sub_val));
7325 
7326   /*  lvsl    13,0,idx.  */
7327   rtx pcv = gen_reg_rtx (V16QImode);
7328   emit_insn (gen_lvsl (pcv, tmp));
7329 
7330   /*  vperm 1,1,1,13.  */
7331   /*  vperm 0,0,0,13.  */
7332   rtx val_perm = gen_reg_rtx (V16QImode);
7333   rtx mask_perm = gen_reg_rtx (V16QImode);
7334   emit_insn (gen_altivec_vperm_v8hiv16qi (val_perm, val_v16qi, val_v16qi, pcv));
7335   emit_insn (gen_altivec_vperm_v8hiv16qi (mask_perm, mask, mask, pcv));
7336 
7337   rtx target_v16qi = simplify_gen_subreg (V16QImode, target, mode, 0);
7338 
7339   /*  xxsel 34,34,32,33.  */
7340   emit_insn (
7341     gen_vector_select_v16qi (target_v16qi, target_v16qi, val_perm, mask_perm));
7342 }
7343 
7344 /* Set field ELT_RTX of TARGET to VAL.  */
7345 
7346 void
rs6000_expand_vector_set(rtx target,rtx val,rtx elt_rtx)7347 rs6000_expand_vector_set (rtx target, rtx val, rtx elt_rtx)
7348 {
7349   machine_mode mode = GET_MODE (target);
7350   machine_mode inner_mode = GET_MODE_INNER (mode);
7351   rtx reg = gen_reg_rtx (mode);
7352   rtx mask, mem, x;
7353   int width = GET_MODE_SIZE (inner_mode);
7354   int i;
7355 
7356   val = force_reg (GET_MODE (val), val);
7357 
7358   if (VECTOR_MEM_VSX_P (mode))
7359     {
7360       if (!CONST_INT_P (elt_rtx))
7361 	{
7362 	  /* For V2DI/V2DF, could leverage the P9 version to generate xxpermdi
7363 	     when elt_rtx is variable.  */
7364 	  if ((TARGET_P9_VECTOR && TARGET_POWERPC64) || width == 8)
7365 	    {
7366 	      rs6000_expand_vector_set_var_p9 (target, val, elt_rtx);
7367 	      return;
7368 	    }
7369 	  else if (TARGET_VSX)
7370 	    {
7371 	      rs6000_expand_vector_set_var_p7 (target, val, elt_rtx);
7372 	      return;
7373 	    }
7374 	  else
7375 	    gcc_assert (CONST_INT_P (elt_rtx));
7376 	}
7377 
7378       rtx insn = NULL_RTX;
7379 
7380       if (mode == V2DFmode)
7381 	insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
7382 
7383       else if (mode == V2DImode)
7384 	insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
7385 
7386       else if (TARGET_P9_VECTOR && TARGET_POWERPC64)
7387 	{
7388 	  if (mode == V4SImode)
7389 	    insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
7390 	  else if (mode == V8HImode)
7391 	    insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
7392 	  else if (mode == V16QImode)
7393 	    insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
7394 	  else if (mode == V4SFmode)
7395 	    insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx);
7396 	}
7397 
7398       if (insn)
7399 	{
7400 	  emit_insn (insn);
7401 	  return;
7402 	}
7403     }
7404 
7405   /* Simplify setting single element vectors like V1TImode.  */
7406   if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode)
7407       && INTVAL (elt_rtx) == 0)
7408     {
7409       emit_move_insn (target, gen_lowpart (mode, val));
7410       return;
7411     }
7412 
7413   /* Load single variable value.  */
7414   mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7415   emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
7416   x = gen_rtx_UNSPEC (VOIDmode,
7417 		      gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7418   emit_insn (gen_rtx_PARALLEL (VOIDmode,
7419 			       gen_rtvec (2,
7420 					  gen_rtx_SET (reg, mem),
7421 					  x)));
7422 
7423   /* Linear sequence.  */
7424   mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
7425   for (i = 0; i < 16; ++i)
7426     XVECEXP (mask, 0, i) = GEN_INT (i);
7427 
7428   /* Set permute mask to insert element into target.  */
7429   for (i = 0; i < width; ++i)
7430     XVECEXP (mask, 0, INTVAL (elt_rtx) * width + i) = GEN_INT (i + 0x10);
7431   x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
7432 
7433   if (BYTES_BIG_ENDIAN)
7434     x = gen_rtx_UNSPEC (mode,
7435 			gen_rtvec (3, target, reg,
7436 				   force_reg (V16QImode, x)),
7437 			UNSPEC_VPERM);
7438   else
7439     {
7440       if (TARGET_P9_VECTOR)
7441 	x = gen_rtx_UNSPEC (mode,
7442 			    gen_rtvec (3, reg, target,
7443 				       force_reg (V16QImode, x)),
7444 			    UNSPEC_VPERMR);
7445       else
7446 	{
7447 	  /* Invert selector.  We prefer to generate VNAND on P8 so
7448 	     that future fusion opportunities can kick in, but must
7449 	     generate VNOR elsewhere.  */
7450 	  rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
7451 	  rtx iorx = (TARGET_P8_VECTOR
7452 		      ? gen_rtx_IOR (V16QImode, notx, notx)
7453 		      : gen_rtx_AND (V16QImode, notx, notx));
7454 	  rtx tmp = gen_reg_rtx (V16QImode);
7455 	  emit_insn (gen_rtx_SET (tmp, iorx));
7456 
7457 	  /* Permute with operands reversed and adjusted selector.  */
7458 	  x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
7459 			      UNSPEC_VPERM);
7460 	}
7461     }
7462 
7463   emit_insn (gen_rtx_SET (target, x));
7464 }
7465 
7466 /* Extract field ELT from VEC into TARGET.  */
7467 
7468 void
rs6000_expand_vector_extract(rtx target,rtx vec,rtx elt)7469 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
7470 {
7471   machine_mode mode = GET_MODE (vec);
7472   machine_mode inner_mode = GET_MODE_INNER (mode);
7473   rtx mem;
7474 
7475   if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
7476     {
7477       switch (mode)
7478 	{
7479 	default:
7480 	  break;
7481 	case E_V1TImode:
7482 	  emit_move_insn (target, gen_lowpart (TImode, vec));
7483 	  break;
7484 	case E_V2DFmode:
7485 	  emit_insn (gen_vsx_extract_v2df (target, vec, elt));
7486 	  return;
7487 	case E_V2DImode:
7488 	  emit_insn (gen_vsx_extract_v2di (target, vec, elt));
7489 	  return;
7490 	case E_V4SFmode:
7491 	  emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
7492 	  return;
7493 	case E_V16QImode:
7494 	  if (TARGET_DIRECT_MOVE_64BIT)
7495 	    {
7496 	      emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
7497 	      return;
7498 	    }
7499 	  else
7500 	    break;
7501 	case E_V8HImode:
7502 	  if (TARGET_DIRECT_MOVE_64BIT)
7503 	    {
7504 	      emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
7505 	      return;
7506 	    }
7507 	  else
7508 	    break;
7509 	case E_V4SImode:
7510 	  if (TARGET_DIRECT_MOVE_64BIT)
7511 	    {
7512 	      emit_insn (gen_vsx_extract_v4si (target, vec, elt));
7513 	      return;
7514 	    }
7515 	  break;
7516 	}
7517     }
7518   else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
7519 	   && TARGET_DIRECT_MOVE_64BIT)
7520     {
7521       if (GET_MODE (elt) != DImode)
7522 	{
7523 	  rtx tmp = gen_reg_rtx (DImode);
7524 	  convert_move (tmp, elt, 0);
7525 	  elt = tmp;
7526 	}
7527       else if (!REG_P (elt))
7528 	elt = force_reg (DImode, elt);
7529 
7530       switch (mode)
7531 	{
7532 	case E_V1TImode:
7533 	  emit_move_insn (target, gen_lowpart (TImode, vec));
7534 	  return;
7535 
7536 	case E_V2DFmode:
7537 	  emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
7538 	  return;
7539 
7540 	case E_V2DImode:
7541 	  emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
7542 	  return;
7543 
7544 	case E_V4SFmode:
7545 	  emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
7546 	  return;
7547 
7548 	case E_V4SImode:
7549 	  emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
7550 	  return;
7551 
7552 	case E_V8HImode:
7553 	  emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
7554 	  return;
7555 
7556 	case E_V16QImode:
7557 	  emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
7558 	  return;
7559 
7560 	default:
7561 	  gcc_unreachable ();
7562 	}
7563     }
7564 
7565   /* Allocate mode-sized buffer.  */
7566   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7567 
7568   emit_move_insn (mem, vec);
7569   if (CONST_INT_P (elt))
7570     {
7571       int modulo_elt = INTVAL (elt) % GET_MODE_NUNITS (mode);
7572 
7573       /* Add offset to field within buffer matching vector element.  */
7574       mem = adjust_address_nv (mem, inner_mode,
7575 			       modulo_elt * GET_MODE_SIZE (inner_mode));
7576       emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
7577     }
7578   else
7579     {
7580       unsigned int ele_size = GET_MODE_SIZE (inner_mode);
7581       rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
7582       rtx new_addr = gen_reg_rtx (Pmode);
7583 
7584       elt = gen_rtx_AND (Pmode, elt, num_ele_m1);
7585       if (ele_size > 1)
7586 	elt = gen_rtx_MULT (Pmode, elt, GEN_INT (ele_size));
7587       new_addr = gen_rtx_PLUS (Pmode, XEXP (mem, 0), elt);
7588       new_addr = change_address (mem, inner_mode, new_addr);
7589       emit_move_insn (target, new_addr);
7590     }
7591 }
7592 
7593 /* Return the offset within a memory object (MEM) of a vector type to a given
7594    element within the vector (ELEMENT) with an element size (SCALAR_SIZE).  If
7595    the element is constant, we return a constant integer.
7596 
7597    Otherwise, we use a base register temporary to calculate the offset after
7598    masking it to fit within the bounds of the vector and scaling it.  The
7599    masking is required by the 64-bit ELF version 2 ABI for the vec_extract
7600    built-in function.  */
7601 
7602 static rtx
get_vector_offset(rtx mem,rtx element,rtx base_tmp,unsigned scalar_size)7603 get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
7604 {
7605   if (CONST_INT_P (element))
7606     return GEN_INT (INTVAL (element) * scalar_size);
7607 
7608   /* All insns should use the 'Q' constraint (address is a single register) if
7609      the element number is not a constant.  */
7610   gcc_assert (satisfies_constraint_Q (mem));
7611 
7612   /* Mask the element to make sure the element number is between 0 and the
7613      maximum number of elements - 1 so that we don't generate an address
7614      outside the vector.  */
7615   rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (GET_MODE (mem)) - 1);
7616   rtx and_op = gen_rtx_AND (Pmode, element, num_ele_m1);
7617   emit_insn (gen_rtx_SET (base_tmp, and_op));
7618 
7619   /* Shift the element to get the byte offset from the element number.  */
7620   int shift = exact_log2 (scalar_size);
7621   gcc_assert (shift >= 0);
7622 
7623   if (shift > 0)
7624     {
7625       rtx shift_op = gen_rtx_ASHIFT (Pmode, base_tmp, GEN_INT (shift));
7626       emit_insn (gen_rtx_SET (base_tmp, shift_op));
7627     }
7628 
7629   return base_tmp;
7630 }
7631 
7632 /* Helper function update PC-relative addresses when we are adjusting a memory
7633    address (ADDR) to a vector to point to a scalar field within the vector with
7634    a constant offset (ELEMENT_OFFSET).  If the address is not valid, we can
7635    use the base register temporary (BASE_TMP) to form the address.  */
7636 
7637 static rtx
adjust_vec_address_pcrel(rtx addr,rtx element_offset,rtx base_tmp)7638 adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
7639 {
7640   rtx new_addr = NULL;
7641 
7642   gcc_assert (CONST_INT_P (element_offset));
7643 
7644   if (GET_CODE (addr) == CONST)
7645     addr = XEXP (addr, 0);
7646 
7647   if (GET_CODE (addr) == PLUS)
7648     {
7649       rtx op0 = XEXP (addr, 0);
7650       rtx op1 = XEXP (addr, 1);
7651 
7652       if (CONST_INT_P (op1))
7653 	{
7654 	  HOST_WIDE_INT offset
7655 	    = INTVAL (XEXP (addr, 1)) + INTVAL (element_offset);
7656 
7657 	  if (offset == 0)
7658 	    new_addr = op0;
7659 
7660 	  else
7661 	    {
7662 	      rtx plus = gen_rtx_PLUS (Pmode, op0, GEN_INT (offset));
7663 	      new_addr = gen_rtx_CONST (Pmode, plus);
7664 	    }
7665 	}
7666 
7667       else
7668 	{
7669 	  emit_move_insn (base_tmp, addr);
7670 	  new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7671 	}
7672     }
7673 
7674   else if (SYMBOL_REF_P (addr) || LABEL_REF_P (addr))
7675     {
7676       rtx plus = gen_rtx_PLUS (Pmode, addr, element_offset);
7677       new_addr = gen_rtx_CONST (Pmode, plus);
7678     }
7679 
7680   else
7681     gcc_unreachable ();
7682 
7683   return new_addr;
7684 }
7685 
7686 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7687    within the vector (ELEMENT) with a mode (SCALAR_MODE).  Use a base register
7688    temporary (BASE_TMP) to fixup the address.  Return the new memory address
7689    that is valid for reads or writes to a given register (SCALAR_REG).
7690 
7691    This function is expected to be called after reload is completed when we are
7692    splitting insns.  The temporary BASE_TMP might be set multiple times with
7693    this code.  */
7694 
7695 rtx
rs6000_adjust_vec_address(rtx scalar_reg,rtx mem,rtx element,rtx base_tmp,machine_mode scalar_mode)7696 rs6000_adjust_vec_address (rtx scalar_reg,
7697 			   rtx mem,
7698 			   rtx element,
7699 			   rtx base_tmp,
7700 			   machine_mode scalar_mode)
7701 {
7702   unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7703   rtx addr = XEXP (mem, 0);
7704   rtx new_addr;
7705 
7706   gcc_assert (!reg_mentioned_p (base_tmp, addr));
7707   gcc_assert (!reg_mentioned_p (base_tmp, element));
7708 
7709   /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY.  */
7710   gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
7711 
7712   /* Calculate what we need to add to the address to get the element
7713      address.  */
7714   rtx element_offset = get_vector_offset (mem, element, base_tmp, scalar_size);
7715 
7716   /* Create the new address pointing to the element within the vector.  If we
7717      are adding 0, we don't have to change the address.  */
7718   if (element_offset == const0_rtx)
7719     new_addr = addr;
7720 
7721   /* A simple indirect address can be converted into a reg + offset
7722      address.  */
7723   else if (REG_P (addr) || SUBREG_P (addr))
7724     new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
7725 
7726   /* For references to local static variables, fold a constant offset into the
7727      address.  */
7728   else if (pcrel_local_address (addr, Pmode) && CONST_INT_P (element_offset))
7729     new_addr = adjust_vec_address_pcrel (addr, element_offset, base_tmp);
7730 
7731   /* Optimize D-FORM addresses with constant offset with a constant element, to
7732      include the element offset in the address directly.  */
7733   else if (GET_CODE (addr) == PLUS)
7734     {
7735       rtx op0 = XEXP (addr, 0);
7736       rtx op1 = XEXP (addr, 1);
7737 
7738       gcc_assert (REG_P (op0) || SUBREG_P (op0));
7739       if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
7740 	{
7741 	  /* op0 should never be r0, because r0+offset is not valid.  But it
7742 	     doesn't hurt to make sure it is not r0.  */
7743 	  gcc_assert (reg_or_subregno (op0) != 0);
7744 
7745 	  /* D-FORM address with constant element number.  */
7746 	  HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
7747 	  rtx offset_rtx = GEN_INT (offset);
7748 	  new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
7749 	}
7750       else
7751 	{
7752 	  /* If we don't have a D-FORM address with a constant element number,
7753 	     add the two elements in the current address.  Then add the offset.
7754 
7755 	     Previously, we tried to add the offset to OP1 and change the
7756 	     address to an X-FORM format adding OP0 and BASE_TMP, but it became
7757 	     complicated because we had to verify that op1 was not GPR0 and we
7758 	     had a constant element offset (due to the way ADDI is defined).
7759 	     By doing the add of OP0 and OP1 first, and then adding in the
7760 	     offset, it has the benefit that if D-FORM instructions are
7761 	     allowed, the offset is part of the memory access to the vector
7762 	     element. */
7763 	  emit_insn (gen_rtx_SET (base_tmp, gen_rtx_PLUS (Pmode, op0, op1)));
7764 	  new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7765 	}
7766     }
7767 
7768   else
7769     {
7770       emit_move_insn (base_tmp, addr);
7771       new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7772     }
7773 
7774     /* If the address isn't valid, move the address into the temporary base
7775        register.  Some reasons it could not be valid include:
7776 
7777        The address offset overflowed the 16 or 34 bit offset size;
7778        We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
7779        We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
7780        Only X_FORM loads can be done, and the address is D_FORM.  */
7781 
7782   enum insn_form iform
7783     = address_to_insn_form (new_addr, scalar_mode,
7784 			    reg_to_non_prefixed (scalar_reg, scalar_mode));
7785 
7786   if (iform == INSN_FORM_BAD)
7787     {
7788       emit_move_insn (base_tmp, new_addr);
7789       new_addr = base_tmp;
7790     }
7791 
7792   return change_address (mem, scalar_mode, new_addr);
7793 }
7794 
7795 /* Split a variable vec_extract operation into the component instructions.  */
7796 
7797 void
rs6000_split_vec_extract_var(rtx dest,rtx src,rtx element,rtx tmp_gpr,rtx tmp_altivec)7798 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
7799 			      rtx tmp_altivec)
7800 {
7801   machine_mode mode = GET_MODE (src);
7802   machine_mode scalar_mode = GET_MODE_INNER (GET_MODE (src));
7803   unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7804   int byte_shift = exact_log2 (scalar_size);
7805 
7806   gcc_assert (byte_shift >= 0);
7807 
7808   /* If we are given a memory address, optimize to load just the element.  We
7809      don't have to adjust the vector element number on little endian
7810      systems.  */
7811   if (MEM_P (src))
7812     {
7813       emit_move_insn (dest,
7814 		      rs6000_adjust_vec_address (dest, src, element, tmp_gpr,
7815 						 scalar_mode));
7816       return;
7817     }
7818 
7819   else if (REG_P (src) || SUBREG_P (src))
7820     {
7821       int num_elements = GET_MODE_NUNITS (mode);
7822       int bits_in_element = mode_to_bits (GET_MODE_INNER (mode));
7823       int bit_shift = 7 - exact_log2 (num_elements);
7824       rtx element2;
7825       unsigned int dest_regno = reg_or_subregno (dest);
7826       unsigned int src_regno = reg_or_subregno (src);
7827       unsigned int element_regno = reg_or_subregno (element);
7828 
7829       gcc_assert (REG_P (tmp_gpr));
7830 
7831       /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
7832 	 a general purpose register.  */
7833       if (TARGET_P9_VECTOR
7834 	  && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
7835 	  && INT_REGNO_P (dest_regno)
7836 	  && ALTIVEC_REGNO_P (src_regno)
7837 	  && INT_REGNO_P (element_regno))
7838 	{
7839 	  rtx dest_si = gen_rtx_REG (SImode, dest_regno);
7840 	  rtx element_si = gen_rtx_REG (SImode, element_regno);
7841 
7842 	  if (mode == V16QImode)
7843 	    emit_insn (BYTES_BIG_ENDIAN
7844 		       ? gen_vextublx (dest_si, element_si, src)
7845 		       : gen_vextubrx (dest_si, element_si, src));
7846 
7847 	  else if (mode == V8HImode)
7848 	    {
7849 	      rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7850 	      emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
7851 	      emit_insn (BYTES_BIG_ENDIAN
7852 			 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
7853 			 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
7854 	    }
7855 
7856 
7857 	  else
7858 	    {
7859 	      rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7860 	      emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
7861 	      emit_insn (BYTES_BIG_ENDIAN
7862 			 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
7863 			 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
7864 	    }
7865 
7866 	  return;
7867 	}
7868 
7869 
7870       gcc_assert (REG_P (tmp_altivec));
7871 
7872       /* For little endian, adjust element ordering.  For V2DI/V2DF, we can use
7873 	 an XOR, otherwise we need to subtract.  The shift amount is so VSLO
7874 	 will shift the element into the upper position (adding 3 to convert a
7875 	 byte shift into a bit shift).  */
7876       if (scalar_size == 8)
7877 	{
7878 	  if (!BYTES_BIG_ENDIAN)
7879 	    {
7880 	      emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
7881 	      element2 = tmp_gpr;
7882 	    }
7883 	  else
7884 	    element2 = element;
7885 
7886 	  /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7887 	     bit.  */
7888 	  emit_insn (gen_rtx_SET (tmp_gpr,
7889 				  gen_rtx_AND (DImode,
7890 					       gen_rtx_ASHIFT (DImode,
7891 							       element2,
7892 							       GEN_INT (6)),
7893 					       GEN_INT (64))));
7894 	}
7895       else
7896 	{
7897 	  if (!BYTES_BIG_ENDIAN)
7898 	    {
7899 	      rtx num_ele_m1 = GEN_INT (num_elements - 1);
7900 
7901 	      emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
7902 	      emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
7903 	      element2 = tmp_gpr;
7904 	    }
7905 	  else
7906 	    element2 = element;
7907 
7908 	  emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
7909 	}
7910 
7911       /* Get the value into the lower byte of the Altivec register where VSLO
7912 	 expects it.  */
7913       if (TARGET_P9_VECTOR)
7914 	emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
7915       else if (can_create_pseudo_p ())
7916 	emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
7917       else
7918 	{
7919 	  rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7920 	  emit_move_insn (tmp_di, tmp_gpr);
7921 	  emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
7922 	}
7923 
7924       /* Do the VSLO to get the value into the final location.  */
7925       switch (mode)
7926 	{
7927 	case E_V2DFmode:
7928 	  emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
7929 	  return;
7930 
7931 	case E_V2DImode:
7932 	  emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
7933 	  return;
7934 
7935 	case E_V4SFmode:
7936 	  {
7937 	    rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7938 	    rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
7939 	    rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7940 	    emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7941 					  tmp_altivec));
7942 
7943 	    emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
7944 	    return;
7945 	  }
7946 
7947 	case E_V4SImode:
7948 	case E_V8HImode:
7949 	case E_V16QImode:
7950 	  {
7951 	    rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7952 	    rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7953 	    rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
7954 	    emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7955 					  tmp_altivec));
7956 	    emit_move_insn (tmp_gpr_di, tmp_altivec_di);
7957 	    emit_insn (gen_lshrdi3 (tmp_gpr_di, tmp_gpr_di,
7958 				    GEN_INT (64 - bits_in_element)));
7959 	    return;
7960 	  }
7961 
7962 	default:
7963 	  gcc_unreachable ();
7964 	}
7965 
7966       return;
7967     }
7968   else
7969     gcc_unreachable ();
7970  }
7971 
7972 /* Return alignment of TYPE.  Existing alignment is ALIGN.  HOW
7973    selects whether the alignment is abi mandated, optional, or
7974    both abi and optional alignment.  */
7975 
7976 unsigned int
rs6000_data_alignment(tree type,unsigned int align,enum data_align how)7977 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
7978 {
7979   if (how != align_opt)
7980     {
7981       if (TREE_CODE (type) == VECTOR_TYPE && align < 128)
7982 	align = 128;
7983     }
7984 
7985   if (how != align_abi)
7986     {
7987       if (TREE_CODE (type) == ARRAY_TYPE
7988 	  && TYPE_MODE (TREE_TYPE (type)) == QImode)
7989 	{
7990 	  if (align < BITS_PER_WORD)
7991 	    align = BITS_PER_WORD;
7992 	}
7993     }
7994 
7995   return align;
7996 }
7997 
7998 /* Implement TARGET_SLOW_UNALIGNED_ACCESS.  Altivec vector memory
7999    instructions simply ignore the low bits; VSX memory instructions
8000    are aligned to 4 or 8 bytes.  */
8001 
8002 static bool
rs6000_slow_unaligned_access(machine_mode mode,unsigned int align)8003 rs6000_slow_unaligned_access (machine_mode mode, unsigned int align)
8004 {
8005   return (STRICT_ALIGNMENT
8006 	  || (!TARGET_EFFICIENT_UNALIGNED_VSX
8007 	      && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
8008 		  || ((VECTOR_MODE_P (mode) || VECTOR_ALIGNMENT_P (mode))
8009 		      && (int) align < VECTOR_ALIGN (mode)))));
8010 }
8011 
8012 /* AIX word-aligns FP doubles but doubleword-aligns 64-bit ints.  */
8013 
8014 unsigned int
rs6000_special_adjust_field_align(tree type,unsigned int computed)8015 rs6000_special_adjust_field_align (tree type, unsigned int computed)
8016 {
8017   if (computed <= 32 || TYPE_PACKED (type))
8018     return computed;
8019 
8020   /* Strip initial arrays.  */
8021   while (TREE_CODE (type) == ARRAY_TYPE)
8022     type = TREE_TYPE (type);
8023 
8024   /* If RECORD or UNION, recursively find the first field. */
8025   while (AGGREGATE_TYPE_P (type))
8026     {
8027       tree field = TYPE_FIELDS (type);
8028 
8029       /* Skip all non field decls */
8030       while (field != NULL
8031 	     && (TREE_CODE (field) != FIELD_DECL
8032 		 || DECL_FIELD_ABI_IGNORED (field)))
8033 	field = DECL_CHAIN (field);
8034 
8035       if (! field)
8036 	break;
8037 
8038       /* A packed field does not contribute any extra alignment.  */
8039       if (DECL_PACKED (field))
8040 	return computed;
8041 
8042       type = TREE_TYPE (field);
8043 
8044       /* Strip arrays.  */
8045       while (TREE_CODE (type) == ARRAY_TYPE)
8046 	type = TREE_TYPE (type);
8047     }
8048 
8049   if (! AGGREGATE_TYPE_P (type) && type != error_mark_node
8050       && (TYPE_MODE (type) == DFmode || TYPE_MODE (type) == DCmode))
8051     computed = MIN (computed, 32);
8052 
8053   return computed;
8054 }
8055 
8056 /* AIX increases natural record alignment to doubleword if the innermost first
8057    field is an FP double while the FP fields remain word aligned.
8058    Only called if TYPE initially is a RECORD or UNION.  */
8059 
8060 unsigned int
rs6000_special_round_type_align(tree type,unsigned int computed,unsigned int specified)8061 rs6000_special_round_type_align (tree type, unsigned int computed,
8062 				 unsigned int specified)
8063 {
8064   unsigned int align = MAX (computed, specified);
8065 
8066   if (TYPE_PACKED (type) || align >= 64)
8067     return align;
8068 
8069   /* If RECORD or UNION, recursively find the first field. */
8070   do
8071     {
8072       tree field = TYPE_FIELDS (type);
8073 
8074       /* Skip all non field decls */
8075       while (field != NULL
8076 	     && (TREE_CODE (field) != FIELD_DECL
8077 		 || DECL_FIELD_ABI_IGNORED (field)))
8078 	field = DECL_CHAIN (field);
8079 
8080       if (! field)
8081 	break;
8082 
8083       /* A packed field does not contribute any extra alignment.  */
8084       if (DECL_PACKED (field))
8085 	return align;
8086 
8087       type = TREE_TYPE (field);
8088 
8089       /* Strip arrays.  */
8090       while (TREE_CODE (type) == ARRAY_TYPE)
8091 	type = TREE_TYPE (type);
8092     } while (AGGREGATE_TYPE_P (type));
8093 
8094   if (! AGGREGATE_TYPE_P (type) && type != error_mark_node
8095       && (TYPE_MODE (type) == DFmode || TYPE_MODE (type) == DCmode))
8096     align = MAX (align, 64);
8097 
8098   return align;
8099 }
8100 
8101 /* Darwin increases record alignment to the natural alignment of
8102    the first field.  */
8103 
8104 unsigned int
darwin_rs6000_special_round_type_align(tree type,unsigned int computed,unsigned int specified)8105 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
8106 					unsigned int specified)
8107 {
8108   unsigned int align = MAX (computed, specified);
8109 
8110   if (TYPE_PACKED (type))
8111     return align;
8112 
8113   /* Find the first field, looking down into aggregates.  */
8114   do {
8115     tree field = TYPE_FIELDS (type);
8116     /* Skip all non field decls */
8117     while (field != NULL
8118 	   && (TREE_CODE (field) != FIELD_DECL
8119 	       || DECL_FIELD_ABI_IGNORED (field)))
8120       field = DECL_CHAIN (field);
8121     if (! field)
8122       break;
8123     /* A packed field does not contribute any extra alignment.  */
8124     if (DECL_PACKED (field))
8125       return align;
8126     type = TREE_TYPE (field);
8127     while (TREE_CODE (type) == ARRAY_TYPE)
8128       type = TREE_TYPE (type);
8129   } while (AGGREGATE_TYPE_P (type));
8130 
8131   if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
8132     align = MAX (align, TYPE_ALIGN (type));
8133 
8134   return align;
8135 }
8136 
8137 /* Return 1 for an operand in small memory on V.4/eabi.  */
8138 
8139 int
small_data_operand(rtx op ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED)8140 small_data_operand (rtx op ATTRIBUTE_UNUSED,
8141 		    machine_mode mode ATTRIBUTE_UNUSED)
8142 {
8143 #if TARGET_ELF
8144   rtx sym_ref;
8145 
8146   if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
8147     return 0;
8148 
8149   if (DEFAULT_ABI != ABI_V4)
8150     return 0;
8151 
8152   if (SYMBOL_REF_P (op))
8153     sym_ref = op;
8154 
8155   else if (GET_CODE (op) != CONST
8156 	   || GET_CODE (XEXP (op, 0)) != PLUS
8157 	   || !SYMBOL_REF_P (XEXP (XEXP (op, 0), 0))
8158 	   || !CONST_INT_P (XEXP (XEXP (op, 0), 1)))
8159     return 0;
8160 
8161   else
8162     {
8163       rtx sum = XEXP (op, 0);
8164       HOST_WIDE_INT summand;
8165 
8166       /* We have to be careful here, because it is the referenced address
8167 	 that must be 32k from _SDA_BASE_, not just the symbol.  */
8168       summand = INTVAL (XEXP (sum, 1));
8169       if (summand < 0 || summand > g_switch_value)
8170 	return 0;
8171 
8172       sym_ref = XEXP (sum, 0);
8173     }
8174 
8175   return SYMBOL_REF_SMALL_P (sym_ref);
8176 #else
8177   return 0;
8178 #endif
8179 }
8180 
8181 /* Return true if either operand is a general purpose register.  */
8182 
8183 bool
gpr_or_gpr_p(rtx op0,rtx op1)8184 gpr_or_gpr_p (rtx op0, rtx op1)
8185 {
8186   return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
8187 	  || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
8188 }
8189 
8190 /* Return true if this is a move direct operation between GPR registers and
8191    floating point/VSX registers.  */
8192 
8193 bool
direct_move_p(rtx op0,rtx op1)8194 direct_move_p (rtx op0, rtx op1)
8195 {
8196   if (!REG_P (op0) || !REG_P (op1))
8197     return false;
8198 
8199   if (!TARGET_DIRECT_MOVE)
8200     return false;
8201 
8202   int regno0 = REGNO (op0);
8203   int regno1 = REGNO (op1);
8204   if (!HARD_REGISTER_NUM_P (regno0) || !HARD_REGISTER_NUM_P (regno1))
8205     return false;
8206 
8207   if (INT_REGNO_P (regno0) && VSX_REGNO_P (regno1))
8208     return true;
8209 
8210   if (VSX_REGNO_P (regno0) && INT_REGNO_P (regno1))
8211     return true;
8212 
8213   return false;
8214 }
8215 
8216 /* Return true if the ADDR is an acceptable address for a quad memory
8217    operation of mode MODE (either LQ/STQ for general purpose registers, or
8218    LXV/STXV for vector registers under ISA 3.0.  GPR_P is true if this address
8219    is intended for LQ/STQ.  If it is false, the address is intended for the ISA
8220    3.0 LXV/STXV instruction.  */
8221 
8222 bool
quad_address_p(rtx addr,machine_mode mode,bool strict)8223 quad_address_p (rtx addr, machine_mode mode, bool strict)
8224 {
8225   rtx op0, op1;
8226 
8227   if (GET_MODE_SIZE (mode) < 16)
8228     return false;
8229 
8230   if (legitimate_indirect_address_p (addr, strict))
8231     return true;
8232 
8233   if (VECTOR_MODE_P (mode) && !mode_supports_dq_form (mode))
8234     return false;
8235 
8236   /* Is this a valid prefixed address?  If the bottom four bits of the offset
8237      are non-zero, we could use a prefixed instruction (which does not have the
8238      DQ-form constraint that the traditional instruction had) instead of
8239      forcing the unaligned offset to a GPR.  */
8240   if (address_is_prefixed (addr, mode, NON_PREFIXED_DQ))
8241     return true;
8242 
8243   if (GET_CODE (addr) != PLUS)
8244     return false;
8245 
8246   op0 = XEXP (addr, 0);
8247   if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
8248     return false;
8249 
8250   op1 = XEXP (addr, 1);
8251   if (!CONST_INT_P (op1))
8252     return false;
8253 
8254   return quad_address_offset_p (INTVAL (op1));
8255 }
8256 
8257 /* Return true if this is a load or store quad operation.  This function does
8258    not handle the atomic quad memory instructions.  */
8259 
8260 bool
quad_load_store_p(rtx op0,rtx op1)8261 quad_load_store_p (rtx op0, rtx op1)
8262 {
8263   bool ret;
8264 
8265   if (!TARGET_QUAD_MEMORY)
8266     ret = false;
8267 
8268   else if (REG_P (op0) && MEM_P (op1))
8269     ret = (quad_int_reg_operand (op0, GET_MODE (op0))
8270 	   && quad_memory_operand (op1, GET_MODE (op1))
8271 	   && !reg_overlap_mentioned_p (op0, op1));
8272 
8273   else if (MEM_P (op0) && REG_P (op1))
8274     ret = (quad_memory_operand (op0, GET_MODE (op0))
8275 	   && quad_int_reg_operand (op1, GET_MODE (op1)));
8276 
8277   else
8278     ret = false;
8279 
8280   if (TARGET_DEBUG_ADDR)
8281     {
8282       fprintf (stderr, "\n========== quad_load_store, return %s\n",
8283 	       ret ? "true" : "false");
8284       debug_rtx (gen_rtx_SET (op0, op1));
8285     }
8286 
8287   return ret;
8288 }
8289 
8290 /* Given an address, return a constant offset term if one exists.  */
8291 
8292 static rtx
address_offset(rtx op)8293 address_offset (rtx op)
8294 {
8295   if (GET_CODE (op) == PRE_INC
8296       || GET_CODE (op) == PRE_DEC)
8297     op = XEXP (op, 0);
8298   else if (GET_CODE (op) == PRE_MODIFY
8299 	   || GET_CODE (op) == LO_SUM)
8300     op = XEXP (op, 1);
8301 
8302   if (GET_CODE (op) == CONST)
8303     op = XEXP (op, 0);
8304 
8305   if (GET_CODE (op) == PLUS)
8306     op = XEXP (op, 1);
8307 
8308   if (CONST_INT_P (op))
8309     return op;
8310 
8311   return NULL_RTX;
8312 }
8313 
8314 /* This tests that a lo_sum {constant, symbol, symbol+offset} is valid for
8315    the mode.  If we can't find (or don't know) the alignment of the symbol
8316    we assume (optimistically) that it's sufficiently aligned [??? maybe we
8317    should be pessimistic].  Offsets are validated in the same way as for
8318    reg + offset.  */
8319 static bool
darwin_rs6000_legitimate_lo_sum_const_p(rtx x,machine_mode mode)8320 darwin_rs6000_legitimate_lo_sum_const_p (rtx x, machine_mode mode)
8321 {
8322   /* We should not get here with this.  */
8323   gcc_checking_assert (! mode_supports_dq_form (mode));
8324 
8325   if (GET_CODE (x) == CONST)
8326     x = XEXP (x, 0);
8327 
8328   /* If we are building PIC code, then any symbol must be wrapped in an
8329      UNSPEC_MACHOPIC_OFFSET so that it will get the picbase subtracted.  */
8330   bool machopic_offs_p = false;
8331   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_MACHOPIC_OFFSET)
8332     {
8333       x =  XVECEXP (x, 0, 0);
8334       machopic_offs_p = true;
8335     }
8336 
8337   rtx sym = NULL_RTX;
8338   unsigned HOST_WIDE_INT offset = 0;
8339 
8340   if (GET_CODE (x) == PLUS)
8341     {
8342       sym = XEXP (x, 0);
8343       if (! SYMBOL_REF_P (sym))
8344 	return false;
8345       if (!CONST_INT_P (XEXP (x, 1)))
8346 	return false;
8347       offset = INTVAL (XEXP (x, 1));
8348     }
8349   else if (SYMBOL_REF_P (x))
8350     sym = x;
8351   else if (CONST_INT_P (x))
8352     offset = INTVAL (x);
8353   else if (GET_CODE (x) == LABEL_REF)
8354     offset = 0; // We assume code labels are Pmode aligned
8355   else
8356     return false; // not sure what we have here.
8357 
8358   /* If we don't know the alignment of the thing to which the symbol refers,
8359      we assume optimistically it is "enough".
8360      ??? maybe we should be pessimistic instead.  */
8361   unsigned align = 0;
8362 
8363   if (sym)
8364     {
8365       tree decl = SYMBOL_REF_DECL (sym);
8366       /* As noted above, PIC code cannot use a bare SYMBOL_REF.  */
8367       if (TARGET_MACHO && flag_pic && !machopic_offs_p)
8368 	return false;
8369 #if TARGET_MACHO
8370       if (MACHO_SYMBOL_INDIRECTION_P (sym))
8371       /* The decl in an indirection symbol is the original one, which might
8372 	 be less aligned than the indirection.  Our indirections are always
8373 	 pointer-aligned.  */
8374 	;
8375       else
8376 #endif
8377       if (decl && DECL_ALIGN (decl))
8378 	align = DECL_ALIGN_UNIT (decl);
8379    }
8380 
8381   unsigned int extra = 0;
8382   switch (mode)
8383     {
8384     case E_DFmode:
8385     case E_DDmode:
8386     case E_DImode:
8387       /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8388 	 addressing.  */
8389       if (VECTOR_MEM_VSX_P (mode))
8390 	return false;
8391 
8392       if (!TARGET_POWERPC64)
8393 	extra = 4;
8394       else if ((offset & 3) || (align & 3))
8395 	return false;
8396       break;
8397 
8398     case E_TFmode:
8399     case E_IFmode:
8400     case E_KFmode:
8401     case E_TDmode:
8402     case E_TImode:
8403     case E_PTImode:
8404       extra = 8;
8405       if (!TARGET_POWERPC64)
8406 	extra = 12;
8407       else if ((offset & 3) || (align & 3))
8408 	return false;
8409       break;
8410 
8411     default:
8412       break;
8413     }
8414 
8415   /* We only care if the access(es) would cause a change to the high part.  */
8416   offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8417   return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8418 }
8419 
8420 /* Return true if the MEM operand is a memory operand suitable for use
8421    with a (full width, possibly multiple) gpr load/store.  On
8422    powerpc64 this means the offset must be divisible by 4.
8423    Implements 'Y' constraint.
8424 
8425    Accept direct, indexed, offset, lo_sum and tocref.  Since this is
8426    a constraint function we know the operand has satisfied a suitable
8427    memory predicate.
8428 
8429    Offsetting a lo_sum should not be allowed, except where we know by
8430    alignment that a 32k boundary is not crossed.  Note that by
8431    "offsetting" here we mean a further offset to access parts of the
8432    MEM.  It's fine to have a lo_sum where the inner address is offset
8433    from a sym, since the same sym+offset will appear in the high part
8434    of the address calculation.  */
8435 
8436 bool
mem_operand_gpr(rtx op,machine_mode mode)8437 mem_operand_gpr (rtx op, machine_mode mode)
8438 {
8439   unsigned HOST_WIDE_INT offset;
8440   int extra;
8441   rtx addr = XEXP (op, 0);
8442 
8443   /* PR85755: Allow PRE_INC and PRE_DEC addresses.  */
8444   if (TARGET_UPDATE
8445       && (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
8446       && mode_supports_pre_incdec_p (mode)
8447       && legitimate_indirect_address_p (XEXP (addr, 0), false))
8448     return true;
8449 
8450   /* Allow prefixed instructions if supported.  If the bottom two bits of the
8451      offset are non-zero, we could use a prefixed instruction (which does not
8452      have the DS-form constraint that the traditional instruction had) instead
8453      of forcing the unaligned offset to a GPR.  */
8454   if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
8455     return true;
8456 
8457   /* We need to look through Mach-O PIC unspecs to determine if a lo_sum is
8458      really OK.  Doing this early avoids teaching all the other machinery
8459      about them.  */
8460   if (TARGET_MACHO && GET_CODE (addr) == LO_SUM)
8461     return darwin_rs6000_legitimate_lo_sum_const_p (XEXP (addr, 1), mode);
8462 
8463   /* Only allow offsettable addresses.  See PRs 83969 and 84279.  */
8464   if (!rs6000_offsettable_memref_p (op, mode, false))
8465     return false;
8466 
8467   op = address_offset (addr);
8468   if (op == NULL_RTX)
8469     return true;
8470 
8471   offset = INTVAL (op);
8472   if (TARGET_POWERPC64 && (offset & 3) != 0)
8473     return false;
8474 
8475   extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8476   if (extra < 0)
8477     extra = 0;
8478 
8479   if (GET_CODE (addr) == LO_SUM)
8480     /* For lo_sum addresses, we must allow any offset except one that
8481        causes a wrap, so test only the low 16 bits.  */
8482     offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8483 
8484   return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8485 }
8486 
8487 /* As above, but for DS-FORM VSX insns.  Unlike mem_operand_gpr,
8488    enforce an offset divisible by 4 even for 32-bit.  */
8489 
8490 bool
mem_operand_ds_form(rtx op,machine_mode mode)8491 mem_operand_ds_form (rtx op, machine_mode mode)
8492 {
8493   unsigned HOST_WIDE_INT offset;
8494   int extra;
8495   rtx addr = XEXP (op, 0);
8496 
8497   /* Allow prefixed instructions if supported.  If the bottom two bits of the
8498      offset are non-zero, we could use a prefixed instruction (which does not
8499      have the DS-form constraint that the traditional instruction had) instead
8500      of forcing the unaligned offset to a GPR.  */
8501   if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
8502     return true;
8503 
8504   if (!offsettable_address_p (false, mode, addr))
8505     return false;
8506 
8507   op = address_offset (addr);
8508   if (op == NULL_RTX)
8509     return true;
8510 
8511   offset = INTVAL (op);
8512   if ((offset & 3) != 0)
8513     return false;
8514 
8515   extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8516   if (extra < 0)
8517     extra = 0;
8518 
8519   if (GET_CODE (addr) == LO_SUM)
8520     /* For lo_sum addresses, we must allow any offset except one that
8521        causes a wrap, so test only the low 16 bits.  */
8522     offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8523 
8524   return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8525 }
8526 
8527 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p.  */
8528 
8529 static bool
reg_offset_addressing_ok_p(machine_mode mode)8530 reg_offset_addressing_ok_p (machine_mode mode)
8531 {
8532   switch (mode)
8533     {
8534     case E_V16QImode:
8535     case E_V8HImode:
8536     case E_V4SFmode:
8537     case E_V4SImode:
8538     case E_V2DFmode:
8539     case E_V2DImode:
8540     case E_V1TImode:
8541     case E_TImode:
8542     case E_TFmode:
8543     case E_KFmode:
8544       /* AltiVec/VSX vector modes.  Only reg+reg addressing was valid until the
8545 	 ISA 3.0 vector d-form addressing mode was added.  While TImode is not
8546 	 a vector mode, if we want to use the VSX registers to move it around,
8547 	 we need to restrict ourselves to reg+reg addressing.  Similarly for
8548 	 IEEE 128-bit floating point that is passed in a single vector
8549 	 register.  */
8550       if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
8551 	return mode_supports_dq_form (mode);
8552       break;
8553 
8554       /* The vector pair/quad types support offset addressing if the
8555 	 underlying vectors support offset addressing.  */
8556     case E_OOmode:
8557     case E_XOmode:
8558       return TARGET_MMA;
8559 
8560     case E_SDmode:
8561       /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8562 	 addressing for the LFIWZX and STFIWX instructions.  */
8563       if (TARGET_NO_SDMODE_STACK)
8564 	return false;
8565       break;
8566 
8567     default:
8568       break;
8569     }
8570 
8571   return true;
8572 }
8573 
8574 static bool
virtual_stack_registers_memory_p(rtx op)8575 virtual_stack_registers_memory_p (rtx op)
8576 {
8577   int regnum;
8578 
8579   if (REG_P (op))
8580     regnum = REGNO (op);
8581 
8582   else if (GET_CODE (op) == PLUS
8583 	   && REG_P (XEXP (op, 0))
8584 	   && CONST_INT_P (XEXP (op, 1)))
8585     regnum = REGNO (XEXP (op, 0));
8586 
8587   else
8588     return false;
8589 
8590   return (regnum >= FIRST_VIRTUAL_REGISTER
8591 	  && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
8592 }
8593 
8594 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8595    is known to not straddle a 32k boundary.  This function is used
8596    to determine whether -mcmodel=medium code can use TOC pointer
8597    relative addressing for OP.  This means the alignment of the TOC
8598    pointer must also be taken into account, and unfortunately that is
8599    only 8 bytes.  */
8600 
8601 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8602 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8603 #endif
8604 
8605 static bool
offsettable_ok_by_alignment(rtx op,HOST_WIDE_INT offset,machine_mode mode)8606 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
8607 			     machine_mode mode)
8608 {
8609   tree decl;
8610   unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
8611 
8612   if (!SYMBOL_REF_P (op))
8613     return false;
8614 
8615   /* ISA 3.0 vector d-form addressing is restricted, don't allow
8616      SYMBOL_REF.  */
8617   if (mode_supports_dq_form (mode))
8618     return false;
8619 
8620   dsize = GET_MODE_SIZE (mode);
8621   decl = SYMBOL_REF_DECL (op);
8622   if (!decl)
8623     {
8624       if (dsize == 0)
8625 	return false;
8626 
8627       /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8628 	 replacing memory addresses with an anchor plus offset.  We
8629 	 could find the decl by rummaging around in the block->objects
8630 	 VEC for the given offset but that seems like too much work.  */
8631       dalign = BITS_PER_UNIT;
8632       if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
8633 	  && SYMBOL_REF_ANCHOR_P (op)
8634 	  && SYMBOL_REF_BLOCK (op) != NULL)
8635 	{
8636 	  struct object_block *block = SYMBOL_REF_BLOCK (op);
8637 
8638 	  dalign = block->alignment;
8639 	  offset += SYMBOL_REF_BLOCK_OFFSET (op);
8640 	}
8641       else if (CONSTANT_POOL_ADDRESS_P (op))
8642 	{
8643 	  /* It would be nice to have get_pool_align()..  */
8644 	  machine_mode cmode = get_pool_mode (op);
8645 
8646 	  dalign = GET_MODE_ALIGNMENT (cmode);
8647 	}
8648     }
8649   else if (DECL_P (decl))
8650     {
8651       dalign = DECL_ALIGN (decl);
8652 
8653       if (dsize == 0)
8654 	{
8655 	  /* Allow BLKmode when the entire object is known to not
8656 	     cross a 32k boundary.  */
8657 	  if (!DECL_SIZE_UNIT (decl))
8658 	    return false;
8659 
8660 	  if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
8661 	    return false;
8662 
8663 	  dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
8664 	  if (dsize > 32768)
8665 	    return false;
8666 
8667 	  dalign /= BITS_PER_UNIT;
8668 	  if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8669 	    dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8670 	  return dalign >= dsize;
8671 	}
8672     }
8673   else
8674     gcc_unreachable ();
8675 
8676   /* Find how many bits of the alignment we know for this access.  */
8677   dalign /= BITS_PER_UNIT;
8678   if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8679     dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8680   mask = dalign - 1;
8681   lsb = offset & -offset;
8682   mask &= lsb - 1;
8683   dalign = mask + 1;
8684 
8685   return dalign >= dsize;
8686 }
8687 
8688 static bool
constant_pool_expr_p(rtx op)8689 constant_pool_expr_p (rtx op)
8690 {
8691   rtx base, offset;
8692 
8693   split_const (op, &base, &offset);
8694   return (SYMBOL_REF_P (base)
8695 	  && CONSTANT_POOL_ADDRESS_P (base)
8696 	  && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
8697 }
8698 
8699 /* Create a TOC reference for symbol_ref SYMBOL.  If LARGETOC_REG is non-null,
8700    use that as the register to put the HIGH value into if register allocation
8701    is already done.  */
8702 
8703 rtx
create_TOC_reference(rtx symbol,rtx largetoc_reg)8704 create_TOC_reference (rtx symbol, rtx largetoc_reg)
8705 {
8706   rtx tocrel, tocreg, hi;
8707 
8708   gcc_assert (TARGET_TOC);
8709 
8710   if (TARGET_DEBUG_ADDR)
8711     {
8712       if (SYMBOL_REF_P (symbol))
8713 	fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
8714 		 XSTR (symbol, 0));
8715       else
8716 	{
8717 	  fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
8718 		   GET_RTX_NAME (GET_CODE (symbol)));
8719 	  debug_rtx (symbol);
8720 	}
8721     }
8722 
8723   if (!can_create_pseudo_p ())
8724     df_set_regs_ever_live (TOC_REGISTER, true);
8725 
8726   tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
8727   tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
8728   if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
8729     return tocrel;
8730 
8731   hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
8732   if (largetoc_reg != NULL)
8733     {
8734       emit_move_insn (largetoc_reg, hi);
8735       hi = largetoc_reg;
8736     }
8737   return gen_rtx_LO_SUM (Pmode, hi, tocrel);
8738 }
8739 
8740 /* These are only used to pass through from print_operand/print_operand_address
8741    to rs6000_output_addr_const_extra over the intervening function
8742    output_addr_const which is not target code.  */
8743 static const_rtx tocrel_base_oac, tocrel_offset_oac;
8744 
8745 /* Return true if OP is a toc pointer relative address (the output
8746    of create_TOC_reference).  If STRICT, do not match non-split
8747    -mcmodel=large/medium toc pointer relative addresses.  If the pointers
8748    are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
8749    TOCREL_OFFSET_RET respectively.  */
8750 
8751 bool
toc_relative_expr_p(const_rtx op,bool strict,const_rtx * tocrel_base_ret,const_rtx * tocrel_offset_ret)8752 toc_relative_expr_p (const_rtx op, bool strict, const_rtx *tocrel_base_ret,
8753 		     const_rtx *tocrel_offset_ret)
8754 {
8755   if (!TARGET_TOC)
8756     return false;
8757 
8758   if (TARGET_CMODEL != CMODEL_SMALL)
8759     {
8760       /* When strict ensure we have everything tidy.  */
8761       if (strict
8762 	  && !(GET_CODE (op) == LO_SUM
8763 	       && REG_P (XEXP (op, 0))
8764 	       && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
8765 	return false;
8766 
8767       /* When not strict, allow non-split TOC addresses and also allow
8768 	 (lo_sum (high ..)) TOC addresses created during reload.  */
8769       if (GET_CODE (op) == LO_SUM)
8770 	op = XEXP (op, 1);
8771     }
8772 
8773   const_rtx tocrel_base = op;
8774   const_rtx tocrel_offset = const0_rtx;
8775 
8776   if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
8777     {
8778       tocrel_base = XEXP (op, 0);
8779       tocrel_offset = XEXP (op, 1);
8780     }
8781 
8782   if (tocrel_base_ret)
8783     *tocrel_base_ret = tocrel_base;
8784   if (tocrel_offset_ret)
8785     *tocrel_offset_ret = tocrel_offset;
8786 
8787   return (GET_CODE (tocrel_base) == UNSPEC
8788 	  && XINT (tocrel_base, 1) == UNSPEC_TOCREL
8789 	  && REG_P (XVECEXP (tocrel_base, 0, 1))
8790 	  && REGNO (XVECEXP (tocrel_base, 0, 1)) == TOC_REGISTER);
8791 }
8792 
8793 /* Return true if X is a constant pool address, and also for cmodel=medium
8794    if X is a toc-relative address known to be offsettable within MODE.  */
8795 
8796 bool
legitimate_constant_pool_address_p(const_rtx x,machine_mode mode,bool strict)8797 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
8798 				    bool strict)
8799 {
8800   const_rtx tocrel_base, tocrel_offset;
8801   return (toc_relative_expr_p (x, strict, &tocrel_base, &tocrel_offset)
8802 	  && (TARGET_CMODEL != CMODEL_MEDIUM
8803 	      || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
8804 	      || mode == QImode
8805 	      || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
8806 					      INTVAL (tocrel_offset), mode)));
8807 }
8808 
8809 static bool
legitimate_small_data_p(machine_mode mode,rtx x)8810 legitimate_small_data_p (machine_mode mode, rtx x)
8811 {
8812   return (DEFAULT_ABI == ABI_V4
8813 	  && !flag_pic && !TARGET_TOC
8814 	  && (SYMBOL_REF_P (x) || GET_CODE (x) == CONST)
8815 	  && small_data_operand (x, mode));
8816 }
8817 
8818 bool
rs6000_legitimate_offset_address_p(machine_mode mode,rtx x,bool strict,bool worst_case)8819 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
8820 				    bool strict, bool worst_case)
8821 {
8822   unsigned HOST_WIDE_INT offset;
8823   unsigned int extra;
8824 
8825   if (GET_CODE (x) != PLUS)
8826     return false;
8827   if (!REG_P (XEXP (x, 0)))
8828     return false;
8829   if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8830     return false;
8831   if (mode_supports_dq_form (mode))
8832     return quad_address_p (x, mode, strict);
8833   if (!reg_offset_addressing_ok_p (mode))
8834     return virtual_stack_registers_memory_p (x);
8835   if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
8836     return true;
8837   if (!CONST_INT_P (XEXP (x, 1)))
8838     return false;
8839 
8840   offset = INTVAL (XEXP (x, 1));
8841   extra = 0;
8842   switch (mode)
8843     {
8844     case E_DFmode:
8845     case E_DDmode:
8846     case E_DImode:
8847       /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8848 	 addressing.  */
8849       if (VECTOR_MEM_VSX_P (mode))
8850 	return false;
8851 
8852       if (!worst_case)
8853 	break;
8854       if (!TARGET_POWERPC64)
8855 	extra = 4;
8856       else if (offset & 3)
8857 	return false;
8858       break;
8859 
8860     case E_TFmode:
8861     case E_IFmode:
8862     case E_KFmode:
8863     case E_TDmode:
8864     case E_TImode:
8865     case E_PTImode:
8866       extra = 8;
8867       if (!worst_case)
8868 	break;
8869       if (!TARGET_POWERPC64)
8870 	extra = 12;
8871       else if (offset & 3)
8872 	return false;
8873       break;
8874 
8875     default:
8876       break;
8877     }
8878 
8879   if (TARGET_PREFIXED)
8880     return SIGNED_34BIT_OFFSET_EXTRA_P (offset, extra);
8881   else
8882     return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8883 }
8884 
8885 bool
legitimate_indexed_address_p(rtx x,int strict)8886 legitimate_indexed_address_p (rtx x, int strict)
8887 {
8888   rtx op0, op1;
8889 
8890   if (GET_CODE (x) != PLUS)
8891     return false;
8892 
8893   op0 = XEXP (x, 0);
8894   op1 = XEXP (x, 1);
8895 
8896   return (REG_P (op0) && REG_P (op1)
8897 	  && ((INT_REG_OK_FOR_BASE_P (op0, strict)
8898 	       && INT_REG_OK_FOR_INDEX_P (op1, strict))
8899 	      || (INT_REG_OK_FOR_BASE_P (op1, strict)
8900 		  && INT_REG_OK_FOR_INDEX_P (op0, strict))));
8901 }
8902 
8903 bool
avoiding_indexed_address_p(machine_mode mode)8904 avoiding_indexed_address_p (machine_mode mode)
8905 {
8906   unsigned int msize = GET_MODE_SIZE (mode);
8907 
8908   /* Avoid indexed addressing for modes that have non-indexed load/store
8909      instruction forms.  On power10, vector pairs have an indexed
8910      form, but vector quads don't.  */
8911   if (msize > 16)
8912     return msize != 32;
8913 
8914   return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
8915 }
8916 
8917 bool
legitimate_indirect_address_p(rtx x,int strict)8918 legitimate_indirect_address_p (rtx x, int strict)
8919 {
8920   return REG_P (x) && INT_REG_OK_FOR_BASE_P (x, strict);
8921 }
8922 
8923 bool
macho_lo_sum_memory_operand(rtx x,machine_mode mode)8924 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
8925 {
8926   if (!TARGET_MACHO || !flag_pic
8927       || mode != SImode || !MEM_P (x))
8928     return false;
8929   x = XEXP (x, 0);
8930 
8931   if (GET_CODE (x) != LO_SUM)
8932     return false;
8933   if (!REG_P (XEXP (x, 0)))
8934     return false;
8935   if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
8936     return false;
8937   x = XEXP (x, 1);
8938 
8939   return CONSTANT_P (x);
8940 }
8941 
8942 static bool
legitimate_lo_sum_address_p(machine_mode mode,rtx x,int strict)8943 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
8944 {
8945   if (GET_CODE (x) != LO_SUM)
8946     return false;
8947   if (!REG_P (XEXP (x, 0)))
8948     return false;
8949   if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8950     return false;
8951   /* quad word addresses are restricted, and we can't use LO_SUM.  */
8952   if (mode_supports_dq_form (mode))
8953     return false;
8954   x = XEXP (x, 1);
8955 
8956   if (TARGET_ELF)
8957     {
8958       bool large_toc_ok;
8959 
8960       if (DEFAULT_ABI == ABI_V4 && flag_pic)
8961 	return false;
8962       /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
8963 	 push_reload from reload pass code.  LEGITIMIZE_RELOAD_ADDRESS
8964 	 recognizes some LO_SUM addresses as valid although this
8965 	 function says opposite.  In most cases, LRA through different
8966 	 transformations can generate correct code for address reloads.
8967 	 It cannot manage only some LO_SUM cases.  So we need to add
8968 	 code here saying that some addresses are still valid.  */
8969       large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
8970 		      && small_toc_ref (x, VOIDmode));
8971       if (TARGET_TOC && ! large_toc_ok)
8972 	return false;
8973       if (GET_MODE_NUNITS (mode) != 1)
8974 	return false;
8975       if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
8976 	  && !(/* ??? Assume floating point reg based on mode?  */
8977 	       TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8978 	return false;
8979 
8980       return CONSTANT_P (x) || large_toc_ok;
8981     }
8982   else if (TARGET_MACHO)
8983     {
8984       if (GET_MODE_NUNITS (mode) != 1)
8985 	return false;
8986       if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
8987 	  && !(/* see above  */
8988 	       TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8989 	return false;
8990 #if TARGET_MACHO
8991       if (MACHO_DYNAMIC_NO_PIC_P || !flag_pic)
8992 	return CONSTANT_P (x);
8993 #endif
8994       /* Macho-O PIC code from here.  */
8995       if (GET_CODE (x) == CONST)
8996 	x = XEXP (x, 0);
8997 
8998       /* SYMBOL_REFs need to be wrapped in an UNSPEC_MACHOPIC_OFFSET.  */
8999       if (SYMBOL_REF_P (x))
9000 	return false;
9001 
9002       /* So this is OK if the wrapped object is const.  */
9003       if (GET_CODE (x) == UNSPEC
9004 	  && XINT (x, 1) == UNSPEC_MACHOPIC_OFFSET)
9005 	return CONSTANT_P (XVECEXP (x, 0, 0));
9006       return CONSTANT_P (x);
9007     }
9008   return false;
9009 }
9010 
9011 
9012 /* Try machine-dependent ways of modifying an illegitimate address
9013    to be legitimate.  If we find one, return the new, valid address.
9014    This is used from only one place: `memory_address' in explow.cc.
9015 
9016    OLDX is the address as it was before break_out_memory_refs was
9017    called.  In some cases it is useful to look at this to decide what
9018    needs to be done.
9019 
9020    It is always safe for this function to do nothing.  It exists to
9021    recognize opportunities to optimize the output.
9022 
9023    On RS/6000, first check for the sum of a register with a constant
9024    integer that is out of range.  If so, generate code to add the
9025    constant with the low-order 16 bits masked to the register and force
9026    this result into another register (this can be done with `cau').
9027    Then generate an address of REG+(CONST&0xffff), allowing for the
9028    possibility of bit 16 being a one.
9029 
9030    Then check for the sum of a register and something not constant, try to
9031    load the other things into a register and return the sum.  */
9032 
9033 static rtx
rs6000_legitimize_address(rtx x,rtx oldx ATTRIBUTE_UNUSED,machine_mode mode)9034 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
9035 			   machine_mode mode)
9036 {
9037   unsigned int extra;
9038 
9039   if (!reg_offset_addressing_ok_p (mode)
9040       || mode_supports_dq_form (mode))
9041     {
9042       if (virtual_stack_registers_memory_p (x))
9043 	return x;
9044 
9045       /* In theory we should not be seeing addresses of the form reg+0,
9046 	 but just in case it is generated, optimize it away.  */
9047       if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
9048 	return force_reg (Pmode, XEXP (x, 0));
9049 
9050       /* For TImode with load/store quad, restrict addresses to just a single
9051 	 pointer, so it works with both GPRs and VSX registers.  */
9052       /* Make sure both operands are registers.  */
9053       else if (GET_CODE (x) == PLUS
9054 	       && (mode != TImode || !TARGET_VSX))
9055 	return gen_rtx_PLUS (Pmode,
9056 			     force_reg (Pmode, XEXP (x, 0)),
9057 			     force_reg (Pmode, XEXP (x, 1)));
9058       else
9059 	return force_reg (Pmode, x);
9060     }
9061   if (SYMBOL_REF_P (x) && !TARGET_MACHO)
9062     {
9063       enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
9064       if (model != 0)
9065 	return rs6000_legitimize_tls_address (x, model);
9066     }
9067 
9068   extra = 0;
9069   switch (mode)
9070     {
9071     case E_TFmode:
9072     case E_TDmode:
9073     case E_TImode:
9074     case E_PTImode:
9075     case E_IFmode:
9076     case E_KFmode:
9077       /* As in legitimate_offset_address_p we do not assume
9078 	 worst-case.  The mode here is just a hint as to the registers
9079 	 used.  A TImode is usually in gprs, but may actually be in
9080 	 fprs.  Leave worst-case scenario for reload to handle via
9081 	 insn constraints.  PTImode is only GPRs.  */
9082       extra = 8;
9083       break;
9084     default:
9085       break;
9086     }
9087 
9088   if (GET_CODE (x) == PLUS
9089       && REG_P (XEXP (x, 0))
9090       && CONST_INT_P (XEXP (x, 1))
9091       && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
9092 	  >= 0x10000 - extra))
9093     {
9094       HOST_WIDE_INT high_int, low_int;
9095       rtx sum;
9096       low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
9097       if (low_int >= 0x8000 - extra)
9098 	low_int = 0;
9099       high_int = INTVAL (XEXP (x, 1)) - low_int;
9100       sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
9101 					 gen_int_mode (high_int, Pmode)), 0);
9102       return plus_constant (Pmode, sum, low_int);
9103     }
9104   else if (GET_CODE (x) == PLUS
9105 	   && REG_P (XEXP (x, 0))
9106 	   && !CONST_INT_P (XEXP (x, 1))
9107 	   && GET_MODE_NUNITS (mode) == 1
9108 	   && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
9109 	       || (/* ??? Assume floating point reg based on mode?  */
9110 		   TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
9111 	   && !avoiding_indexed_address_p (mode))
9112     {
9113       return gen_rtx_PLUS (Pmode, XEXP (x, 0),
9114 			   force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
9115     }
9116   else if ((TARGET_ELF
9117 #if TARGET_MACHO
9118 	    || !MACHO_DYNAMIC_NO_PIC_P
9119 #endif
9120 	    )
9121 	   && TARGET_32BIT
9122 	   && TARGET_NO_TOC_OR_PCREL
9123 	   && !flag_pic
9124 	   && !CONST_INT_P (x)
9125 	   && !CONST_WIDE_INT_P (x)
9126 	   && !CONST_DOUBLE_P (x)
9127 	   && CONSTANT_P (x)
9128 	   && GET_MODE_NUNITS (mode) == 1
9129 	   && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
9130 	       || (/* ??? Assume floating point reg based on mode?  */
9131 		   TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode))))
9132     {
9133       rtx reg = gen_reg_rtx (Pmode);
9134       if (TARGET_ELF)
9135 	emit_insn (gen_elf_high (reg, x));
9136       else
9137 	emit_insn (gen_macho_high (Pmode, reg, x));
9138       return gen_rtx_LO_SUM (Pmode, reg, x);
9139     }
9140   else if (TARGET_TOC
9141 	   && SYMBOL_REF_P (x)
9142 	   && constant_pool_expr_p (x)
9143 	   && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
9144     return create_TOC_reference (x, NULL_RTX);
9145   else
9146     return x;
9147 }
9148 
9149 /* Debug version of rs6000_legitimize_address.  */
9150 static rtx
rs6000_debug_legitimize_address(rtx x,rtx oldx,machine_mode mode)9151 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
9152 {
9153   rtx ret;
9154   rtx_insn *insns;
9155 
9156   start_sequence ();
9157   ret = rs6000_legitimize_address (x, oldx, mode);
9158   insns = get_insns ();
9159   end_sequence ();
9160 
9161   if (ret != x)
9162     {
9163       fprintf (stderr,
9164 	       "\nrs6000_legitimize_address: mode %s, old code %s, "
9165 	       "new code %s, modified\n",
9166 	       GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
9167 	       GET_RTX_NAME (GET_CODE (ret)));
9168 
9169       fprintf (stderr, "Original address:\n");
9170       debug_rtx (x);
9171 
9172       fprintf (stderr, "oldx:\n");
9173       debug_rtx (oldx);
9174 
9175       fprintf (stderr, "New address:\n");
9176       debug_rtx (ret);
9177 
9178       if (insns)
9179 	{
9180 	  fprintf (stderr, "Insns added:\n");
9181 	  debug_rtx_list (insns, 20);
9182 	}
9183     }
9184   else
9185     {
9186       fprintf (stderr,
9187 	       "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
9188 	       GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
9189 
9190       debug_rtx (x);
9191     }
9192 
9193   if (insns)
9194     emit_insn (insns);
9195 
9196   return ret;
9197 }
9198 
9199 /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
9200    We need to emit DTP-relative relocations.  */
9201 
9202 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
9203 static void
rs6000_output_dwarf_dtprel(FILE * file,int size,rtx x)9204 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
9205 {
9206   switch (size)
9207     {
9208     case 4:
9209       fputs ("\t.long\t", file);
9210       break;
9211     case 8:
9212       fputs (DOUBLE_INT_ASM_OP, file);
9213       break;
9214     default:
9215       gcc_unreachable ();
9216     }
9217   output_addr_const (file, x);
9218   if (TARGET_ELF)
9219     fputs ("@dtprel+0x8000", file);
9220 }
9221 
9222 /* Return true if X is a symbol that refers to real (rather than emulated)
9223    TLS.  */
9224 
9225 static bool
rs6000_real_tls_symbol_ref_p(rtx x)9226 rs6000_real_tls_symbol_ref_p (rtx x)
9227 {
9228   return (SYMBOL_REF_P (x)
9229 	  && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
9230 }
9231 
9232 /* In the name of slightly smaller debug output, and to cater to
9233    general assembler lossage, recognize various UNSPEC sequences
9234    and turn them back into a direct symbol reference.  */
9235 
9236 static rtx
rs6000_delegitimize_address(rtx orig_x)9237 rs6000_delegitimize_address (rtx orig_x)
9238 {
9239   rtx x, y, offset;
9240 
9241   /* UNSPEC_FUSION_GPR is created by the peephole2 for power8 fusion.  It
9242      encodes loading up the high part of the address of a TOC reference along
9243      with a load of a GPR using the same base register used for the load.  We
9244      return the original SYMBOL_REF.
9245 
9246 	(set (reg:INT1 <reg>
9247 	     (unspec:INT1 [<combined-address>] UNSPEC_FUSION_GPR)))
9248 
9249      UNSPEC_PCREL_OPT_LD_ADDR is used by the power10 PCREL_OPT pass.  These
9250      UNSPECs include the external SYMBOL_REF along with the value being loaded.
9251      We return the original SYMBOL_REF.
9252 
9253 	(parallel [(set (reg:DI <base-reg>)
9254 			(unspec:DI [(symbol_ref <symbol>)
9255 				    (const_int <marker>)]
9256 				   UNSPEC_PCREL_OPT_LD_ADDR))
9257 		   (set (reg:DI <load-reg>)
9258 			(unspec:DI [(const_int 0)]
9259 				   UNSPEC_PCREL_OPT_LD_DATA))])
9260 
9261      UNSPEC_PCREL_OPT_LD_SAME_REG is an alternative that is used if the
9262      GPR being loaded is the same as the GPR used to hold the external address.
9263 
9264 	(set (reg:DI <base-reg>)
9265 	     (unspec:DI [(symbol_ref <symbol>)
9266 			 (const_int <marker>)]
9267 			UNSPEC_PCREL_OPT_LD_SAME_REG))
9268 
9269      UNSPEC_PCREL_OPT_ST_ADDR is used by the power10 PCREL_OPT pass.  This
9270      UNSPEC include the external SYMBOL_REF along with the value being loaded.
9271      We return the original SYMBOL_REF.
9272 
9273 	(parallel [(set (reg:DI <base-reg>)
9274 			(unspec:DI [(symbol_ref <symbol>)
9275 				    (const_int <marker>)]
9276 				   UNSPEC_PCREL_OPT_ST_ADDR))
9277 		   (use (reg <store-reg>))])  */
9278 
9279   if (GET_CODE (orig_x) == UNSPEC)
9280     switch (XINT (orig_x, 1))
9281       {
9282       case UNSPEC_FUSION_GPR:
9283       case UNSPEC_PCREL_OPT_LD_ADDR:
9284       case UNSPEC_PCREL_OPT_LD_SAME_REG:
9285       case UNSPEC_PCREL_OPT_ST_ADDR:
9286 	orig_x = XVECEXP (orig_x, 0, 0);
9287 	break;
9288 
9289       default:
9290 	break;
9291       }
9292 
9293   orig_x = delegitimize_mem_from_attrs (orig_x);
9294 
9295   x = orig_x;
9296   if (MEM_P (x))
9297     x = XEXP (x, 0);
9298 
9299   y = x;
9300   if (TARGET_CMODEL != CMODEL_SMALL && GET_CODE (y) == LO_SUM)
9301     y = XEXP (y, 1);
9302 
9303   offset = NULL_RTX;
9304   if (GET_CODE (y) == PLUS
9305       && GET_MODE (y) == Pmode
9306       && CONST_INT_P (XEXP (y, 1)))
9307     {
9308       offset = XEXP (y, 1);
9309       y = XEXP (y, 0);
9310     }
9311 
9312   if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_TOCREL)
9313     {
9314       y = XVECEXP (y, 0, 0);
9315 
9316 #ifdef HAVE_AS_TLS
9317       /* Do not associate thread-local symbols with the original
9318 	 constant pool symbol.  */
9319       if (TARGET_XCOFF
9320 	  && SYMBOL_REF_P (y)
9321 	  && CONSTANT_POOL_ADDRESS_P (y)
9322 	  && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
9323 	return orig_x;
9324 #endif
9325 
9326       if (offset != NULL_RTX)
9327 	y = gen_rtx_PLUS (Pmode, y, offset);
9328       if (!MEM_P (orig_x))
9329 	return y;
9330       else
9331 	return replace_equiv_address_nv (orig_x, y);
9332     }
9333 
9334   if (TARGET_MACHO
9335       && GET_CODE (orig_x) == LO_SUM
9336       && GET_CODE (XEXP (orig_x, 1)) == CONST)
9337     {
9338       y = XEXP (XEXP (orig_x, 1), 0);
9339       if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
9340 	return XVECEXP (y, 0, 0);
9341     }
9342 
9343   return orig_x;
9344 }
9345 
9346 /* Return true if X shouldn't be emitted into the debug info.
9347    The linker doesn't like .toc section references from
9348    .debug_* sections, so reject .toc section symbols.  */
9349 
9350 static bool
rs6000_const_not_ok_for_debug_p(rtx x)9351 rs6000_const_not_ok_for_debug_p (rtx x)
9352 {
9353   if (GET_CODE (x) == UNSPEC)
9354     return true;
9355   if (SYMBOL_REF_P (x)
9356       && CONSTANT_POOL_ADDRESS_P (x))
9357     {
9358       rtx c = get_pool_constant (x);
9359       machine_mode cmode = get_pool_mode (x);
9360       if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
9361 	return true;
9362     }
9363 
9364   return false;
9365 }
9366 
9367 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook.  */
9368 
9369 static bool
rs6000_legitimate_combined_insn(rtx_insn * insn)9370 rs6000_legitimate_combined_insn (rtx_insn *insn)
9371 {
9372   int icode = INSN_CODE (insn);
9373 
9374   /* Reject creating doloop insns.  Combine should not be allowed
9375      to create these for a number of reasons:
9376      1) In a nested loop, if combine creates one of these in an
9377      outer loop and the register allocator happens to allocate ctr
9378      to the outer loop insn, then the inner loop can't use ctr.
9379      Inner loops ought to be more highly optimized.
9380      2) Combine often wants to create one of these from what was
9381      originally a three insn sequence, first combining the three
9382      insns to two, then to ctrsi/ctrdi.  When ctrsi/ctrdi is not
9383      allocated ctr, the splitter takes use back to the three insn
9384      sequence.  It's better to stop combine at the two insn
9385      sequence.
9386      3) Faced with not being able to allocate ctr for ctrsi/crtdi
9387      insns, the register allocator sometimes uses floating point
9388      or vector registers for the pseudo.  Since ctrsi/ctrdi is a
9389      jump insn and output reloads are not implemented for jumps,
9390      the ctrsi/ctrdi splitters need to handle all possible cases.
9391      That's a pain, and it gets to be seriously difficult when a
9392      splitter that runs after reload needs memory to transfer from
9393      a gpr to fpr.  See PR70098 and PR71763 which are not fixed
9394      for the difficult case.  It's better to not create problems
9395      in the first place.  */
9396   if (icode != CODE_FOR_nothing
9397       && (icode == CODE_FOR_bdz_si
9398 	  || icode == CODE_FOR_bdz_di
9399 	  || icode == CODE_FOR_bdnz_si
9400 	  || icode == CODE_FOR_bdnz_di
9401 	  || icode == CODE_FOR_bdztf_si
9402 	  || icode == CODE_FOR_bdztf_di
9403 	  || icode == CODE_FOR_bdnztf_si
9404 	  || icode == CODE_FOR_bdnztf_di))
9405     return false;
9406 
9407   return true;
9408 }
9409 
9410 /* Construct the SYMBOL_REF for the tls_get_addr function.  */
9411 
9412 static GTY(()) rtx rs6000_tls_symbol;
9413 static rtx
rs6000_tls_get_addr(void)9414 rs6000_tls_get_addr (void)
9415 {
9416   if (!rs6000_tls_symbol)
9417     rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
9418 
9419   return rs6000_tls_symbol;
9420 }
9421 
9422 /* Construct the SYMBOL_REF for TLS GOT references.  */
9423 
9424 static GTY(()) rtx rs6000_got_symbol;
9425 rtx
rs6000_got_sym(void)9426 rs6000_got_sym (void)
9427 {
9428   if (!rs6000_got_symbol)
9429     {
9430       rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
9431       SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
9432       SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
9433     }
9434 
9435   return rs6000_got_symbol;
9436 }
9437 
9438 /* AIX Thread-Local Address support.  */
9439 
9440 static rtx
rs6000_legitimize_tls_address_aix(rtx addr,enum tls_model model)9441 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
9442 {
9443   rtx sym, mem, tocref, tlsreg, tmpreg, dest;
9444   const char *name;
9445   char *tlsname;
9446 
9447   /* Place addr into TOC constant pool.  */
9448   sym = force_const_mem (GET_MODE (addr), addr);
9449 
9450   /* Output the TOC entry and create the MEM referencing the value.  */
9451   if (constant_pool_expr_p (XEXP (sym, 0))
9452       && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
9453     {
9454       tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
9455       mem = gen_const_mem (Pmode, tocref);
9456       set_mem_alias_set (mem, get_TOC_alias_set ());
9457     }
9458   else
9459     return sym;
9460 
9461   /* Use global-dynamic for local-dynamic.  */
9462   if (model == TLS_MODEL_GLOBAL_DYNAMIC
9463       || model == TLS_MODEL_LOCAL_DYNAMIC)
9464     {
9465       /* Create new TOC reference for @m symbol.  */
9466       name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
9467       tlsname = XALLOCAVEC (char, strlen (name) + 1);
9468       strcpy (tlsname, "*LCM");
9469       strcat (tlsname, name + 3);
9470       rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
9471       SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
9472       tocref = create_TOC_reference (modaddr, NULL_RTX);
9473       rtx modmem = gen_const_mem (Pmode, tocref);
9474       set_mem_alias_set (modmem, get_TOC_alias_set ());
9475 
9476       rtx modreg = gen_reg_rtx (Pmode);
9477       emit_insn (gen_rtx_SET (modreg, modmem));
9478 
9479       tmpreg = gen_reg_rtx (Pmode);
9480       emit_insn (gen_rtx_SET (tmpreg, mem));
9481 
9482       dest = gen_reg_rtx (Pmode);
9483       if (TARGET_32BIT)
9484 	emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
9485       else
9486 	emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
9487       return dest;
9488     }
9489   /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13.  */
9490   else if (TARGET_32BIT)
9491     {
9492       tlsreg = gen_reg_rtx (SImode);
9493       emit_insn (gen_tls_get_tpointer (tlsreg));
9494     }
9495   else
9496     {
9497       tlsreg = gen_rtx_REG (DImode, 13);
9498       xcoff_tls_exec_model_detected = true;
9499     }
9500 
9501   /* Load the TOC value into temporary register.  */
9502   tmpreg = gen_reg_rtx (Pmode);
9503   emit_insn (gen_rtx_SET (tmpreg, mem));
9504   set_unique_reg_note (get_last_insn (), REG_EQUAL,
9505 		       gen_rtx_MINUS (Pmode, addr, tlsreg));
9506 
9507   /* Add TOC symbol value to TLS pointer.  */
9508   dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
9509 
9510   return dest;
9511 }
9512 
9513 /* Passes the tls arg value for global dynamic and local dynamic
9514    emit_library_call_value in rs6000_legitimize_tls_address to
9515    rs6000_call_aix and rs6000_call_sysv.  This is used to emit the
9516    marker relocs put on __tls_get_addr calls.  */
9517 static rtx global_tlsarg;
9518 
9519 /* ADDR contains a thread-local SYMBOL_REF.  Generate code to compute
9520    this (thread-local) address.  */
9521 
9522 static rtx
rs6000_legitimize_tls_address(rtx addr,enum tls_model model)9523 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
9524 {
9525   rtx dest, insn;
9526 
9527   if (TARGET_XCOFF)
9528     return rs6000_legitimize_tls_address_aix (addr, model);
9529 
9530   dest = gen_reg_rtx (Pmode);
9531   if (model == TLS_MODEL_LOCAL_EXEC
9532       && (rs6000_tls_size == 16 || rs6000_pcrel_p ()))
9533     {
9534       rtx tlsreg;
9535 
9536       if (TARGET_64BIT)
9537 	{
9538 	  tlsreg = gen_rtx_REG (Pmode, 13);
9539 	  insn = gen_tls_tprel_64 (dest, tlsreg, addr);
9540 	}
9541       else
9542 	{
9543 	  tlsreg = gen_rtx_REG (Pmode, 2);
9544 	  insn = gen_tls_tprel_32 (dest, tlsreg, addr);
9545 	}
9546       emit_insn (insn);
9547     }
9548   else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
9549     {
9550       rtx tlsreg, tmp;
9551 
9552       tmp = gen_reg_rtx (Pmode);
9553       if (TARGET_64BIT)
9554 	{
9555 	  tlsreg = gen_rtx_REG (Pmode, 13);
9556 	  insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
9557 	}
9558       else
9559 	{
9560 	  tlsreg = gen_rtx_REG (Pmode, 2);
9561 	  insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
9562 	}
9563       emit_insn (insn);
9564       if (TARGET_64BIT)
9565 	insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
9566       else
9567 	insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
9568       emit_insn (insn);
9569     }
9570   else
9571     {
9572       rtx got, tga, tmp1, tmp2;
9573 
9574       /* We currently use relocations like @got@tlsgd for tls, which
9575 	 means the linker will handle allocation of tls entries, placing
9576 	 them in the .got section.  So use a pointer to the .got section,
9577 	 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9578 	 or to secondary GOT sections used by 32-bit -fPIC.  */
9579       if (rs6000_pcrel_p ())
9580 	got = const0_rtx;
9581       else if (TARGET_64BIT)
9582 	got = gen_rtx_REG (Pmode, 2);
9583       else
9584 	{
9585 	  if (flag_pic == 1)
9586 	    got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
9587 	  else
9588 	    {
9589 	      rtx gsym = rs6000_got_sym ();
9590 	      got = gen_reg_rtx (Pmode);
9591 	      if (flag_pic == 0)
9592 		rs6000_emit_move (got, gsym, Pmode);
9593 	      else
9594 		{
9595 		  rtx mem, lab;
9596 
9597 		  tmp1 = gen_reg_rtx (Pmode);
9598 		  tmp2 = gen_reg_rtx (Pmode);
9599 		  mem = gen_const_mem (Pmode, tmp1);
9600 		  lab = gen_label_rtx ();
9601 		  emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
9602 		  emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
9603 		  if (TARGET_LINK_STACK)
9604 		    emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
9605 		  emit_move_insn (tmp2, mem);
9606 		  rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
9607 		  set_unique_reg_note (last, REG_EQUAL, gsym);
9608 		}
9609 	    }
9610 	}
9611 
9612       if (model == TLS_MODEL_GLOBAL_DYNAMIC)
9613 	{
9614 	  rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addr, got),
9615 				    UNSPEC_TLSGD);
9616 	  tga = rs6000_tls_get_addr ();
9617 	  rtx argreg = gen_rtx_REG (Pmode, 3);
9618 	  emit_insn (gen_rtx_SET (argreg, arg));
9619 	  global_tlsarg = arg;
9620 	  emit_library_call_value (tga, dest, LCT_CONST, Pmode, argreg, Pmode);
9621 	  global_tlsarg = NULL_RTX;
9622 
9623 	  /* Make a note so that the result of this call can be CSEd.  */
9624 	  rtvec vec = gen_rtvec (1, copy_rtx (arg));
9625 	  rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
9626 	  set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
9627 	}
9628       else if (model == TLS_MODEL_LOCAL_DYNAMIC)
9629 	{
9630 	  rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got), UNSPEC_TLSLD);
9631 	  tga = rs6000_tls_get_addr ();
9632 	  tmp1 = gen_reg_rtx (Pmode);
9633 	  rtx argreg = gen_rtx_REG (Pmode, 3);
9634 	  emit_insn (gen_rtx_SET (argreg, arg));
9635 	  global_tlsarg = arg;
9636 	  emit_library_call_value (tga, tmp1, LCT_CONST, Pmode, argreg, Pmode);
9637 	  global_tlsarg = NULL_RTX;
9638 
9639 	  /* Make a note so that the result of this call can be CSEd.  */
9640 	  rtvec vec = gen_rtvec (1, copy_rtx (arg));
9641 	  rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
9642 	  set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
9643 
9644 	  if (rs6000_tls_size == 16 || rs6000_pcrel_p ())
9645 	    {
9646 	      if (TARGET_64BIT)
9647 		insn = gen_tls_dtprel_64 (dest, tmp1, addr);
9648 	      else
9649 		insn = gen_tls_dtprel_32 (dest, tmp1, addr);
9650 	    }
9651 	  else if (rs6000_tls_size == 32)
9652 	    {
9653 	      tmp2 = gen_reg_rtx (Pmode);
9654 	      if (TARGET_64BIT)
9655 		insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
9656 	      else
9657 		insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
9658 	      emit_insn (insn);
9659 	      if (TARGET_64BIT)
9660 		insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
9661 	      else
9662 		insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
9663 	    }
9664 	  else
9665 	    {
9666 	      tmp2 = gen_reg_rtx (Pmode);
9667 	      if (TARGET_64BIT)
9668 		insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
9669 	      else
9670 		insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
9671 	      emit_insn (insn);
9672 	      insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
9673 	    }
9674 	  emit_insn (insn);
9675 	}
9676       else
9677 	{
9678 	  /* IE, or 64-bit offset LE.  */
9679 	  tmp2 = gen_reg_rtx (Pmode);
9680 	  if (TARGET_64BIT)
9681 	    insn = gen_tls_got_tprel_64 (tmp2, got, addr);
9682 	  else
9683 	    insn = gen_tls_got_tprel_32 (tmp2, got, addr);
9684 	  emit_insn (insn);
9685 	  if (rs6000_pcrel_p ())
9686 	    {
9687 	      if (TARGET_64BIT)
9688 		insn = gen_tls_tls_pcrel_64 (dest, tmp2, addr);
9689 	      else
9690 		insn = gen_tls_tls_pcrel_32 (dest, tmp2, addr);
9691 	    }
9692 	  else if (TARGET_64BIT)
9693 	    insn = gen_tls_tls_64 (dest, tmp2, addr);
9694 	  else
9695 	    insn = gen_tls_tls_32 (dest, tmp2, addr);
9696 	  emit_insn (insn);
9697 	}
9698     }
9699 
9700   return dest;
9701 }
9702 
9703 /* Only create the global variable for the stack protect guard if we are using
9704    the global flavor of that guard.  */
9705 static tree
rs6000_init_stack_protect_guard(void)9706 rs6000_init_stack_protect_guard (void)
9707 {
9708   if (rs6000_stack_protector_guard == SSP_GLOBAL)
9709     return default_stack_protect_guard ();
9710 
9711   return NULL_TREE;
9712 }
9713 
9714 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
9715 
9716 static bool
rs6000_cannot_force_const_mem(machine_mode mode ATTRIBUTE_UNUSED,rtx x)9717 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9718 {
9719   if (GET_CODE (x) == HIGH
9720       && GET_CODE (XEXP (x, 0)) == UNSPEC)
9721     return true;
9722 
9723   /* A TLS symbol in the TOC cannot contain a sum.  */
9724   if (GET_CODE (x) == CONST
9725       && GET_CODE (XEXP (x, 0)) == PLUS
9726       && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
9727       && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
9728     return true;
9729 
9730   /* Allow AIX TOC TLS symbols in the constant pool,
9731      but not ELF TLS symbols.  */
9732   return TARGET_ELF && tls_referenced_p (x);
9733 }
9734 
9735 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9736    that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9737    can be addressed relative to the toc pointer.  */
9738 
9739 static bool
use_toc_relative_ref(rtx sym,machine_mode mode)9740 use_toc_relative_ref (rtx sym, machine_mode mode)
9741 {
9742   return ((constant_pool_expr_p (sym)
9743 	   && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
9744 					       get_pool_mode (sym)))
9745 	  || (TARGET_CMODEL == CMODEL_MEDIUM
9746 	      && SYMBOL_REF_LOCAL_P (sym)
9747 	      && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
9748 }
9749 
9750 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
9751    that is a valid memory address for an instruction.
9752    The MODE argument is the machine mode for the MEM expression
9753    that wants to use this address.
9754 
9755    On the RS/6000, there are four valid address: a SYMBOL_REF that
9756    refers to a constant pool entry of an address (or the sum of it
9757    plus a constant), a short (16-bit signed) constant plus a register,
9758    the sum of two registers, or a register indirect, possibly with an
9759    auto-increment.  For DFmode, DDmode and DImode with a constant plus
9760    register, we must ensure that both words are addressable or PowerPC64
9761    with offset word aligned.
9762 
9763    For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
9764    32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
9765    because adjacent memory cells are accessed by adding word-sized offsets
9766    during assembly output.  */
9767 static bool
rs6000_legitimate_address_p(machine_mode mode,rtx x,bool reg_ok_strict)9768 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
9769 {
9770   bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9771   bool quad_offset_p = mode_supports_dq_form (mode);
9772 
9773   if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
9774     return 0;
9775 
9776   /* Handle unaligned altivec lvx/stvx type addresses.  */
9777   if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
9778       && GET_CODE (x) == AND
9779       && CONST_INT_P (XEXP (x, 1))
9780       && INTVAL (XEXP (x, 1)) == -16)
9781     {
9782       x = XEXP (x, 0);
9783       return (legitimate_indirect_address_p (x, reg_ok_strict)
9784 	      || legitimate_indexed_address_p (x, reg_ok_strict)
9785 	      || virtual_stack_registers_memory_p (x));
9786     }
9787 
9788   if (legitimate_indirect_address_p (x, reg_ok_strict))
9789     return 1;
9790   if (TARGET_UPDATE
9791       && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
9792       && mode_supports_pre_incdec_p (mode)
9793       && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
9794     return 1;
9795 
9796   /* Handle prefixed addresses (PC-relative or 34-bit offset).  */
9797   if (address_is_prefixed (x, mode, NON_PREFIXED_DEFAULT))
9798     return 1;
9799 
9800   /* Handle restricted vector d-form offsets in ISA 3.0.  */
9801   if (quad_offset_p)
9802     {
9803       if (quad_address_p (x, mode, reg_ok_strict))
9804 	return 1;
9805     }
9806   else if (virtual_stack_registers_memory_p (x))
9807     return 1;
9808 
9809   else if (reg_offset_p)
9810     {
9811       if (legitimate_small_data_p (mode, x))
9812 	return 1;
9813       if (legitimate_constant_pool_address_p (x, mode,
9814 					     reg_ok_strict || lra_in_progress))
9815 	return 1;
9816     }
9817 
9818   /* For TImode, if we have TImode in VSX registers, only allow register
9819      indirect addresses.  This will allow the values to go in either GPRs
9820      or VSX registers without reloading.  The vector types would tend to
9821      go into VSX registers, so we allow REG+REG, while TImode seems
9822      somewhat split, in that some uses are GPR based, and some VSX based.  */
9823   /* FIXME: We could loosen this by changing the following to
9824        if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
9825      but currently we cannot allow REG+REG addressing for TImode.  See
9826      PR72827 for complete details on how this ends up hoodwinking DSE.  */
9827   if (mode == TImode && TARGET_VSX)
9828     return 0;
9829   /* If not REG_OK_STRICT (before reload) let pass any stack offset.  */
9830   if (! reg_ok_strict
9831       && reg_offset_p
9832       && GET_CODE (x) == PLUS
9833       && REG_P (XEXP (x, 0))
9834       && (XEXP (x, 0) == virtual_stack_vars_rtx
9835 	  || XEXP (x, 0) == arg_pointer_rtx)
9836       && CONST_INT_P (XEXP (x, 1)))
9837     return 1;
9838   if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
9839     return 1;
9840   if (!FLOAT128_2REG_P (mode)
9841       && (TARGET_HARD_FLOAT
9842 	  || TARGET_POWERPC64
9843 	  || (mode != DFmode && mode != DDmode))
9844       && (TARGET_POWERPC64 || mode != DImode)
9845       && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
9846       && mode != PTImode
9847       && !avoiding_indexed_address_p (mode)
9848       && legitimate_indexed_address_p (x, reg_ok_strict))
9849     return 1;
9850   if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
9851       && mode_supports_pre_modify_p (mode)
9852       && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
9853       && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
9854 					      reg_ok_strict, false)
9855 	  || (!avoiding_indexed_address_p (mode)
9856 	      && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
9857       && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
9858     {
9859       /* There is no prefixed version of the load/store with update.  */
9860       rtx addr = XEXP (x, 1);
9861       return !address_is_prefixed (addr, mode, NON_PREFIXED_DEFAULT);
9862     }
9863   if (reg_offset_p && !quad_offset_p
9864       && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
9865     return 1;
9866   return 0;
9867 }
9868 
9869 /* Debug version of rs6000_legitimate_address_p.  */
9870 static bool
rs6000_debug_legitimate_address_p(machine_mode mode,rtx x,bool reg_ok_strict)9871 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
9872 				   bool reg_ok_strict)
9873 {
9874   bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
9875   fprintf (stderr,
9876 	   "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
9877 	   "strict = %d, reload = %s, code = %s\n",
9878 	   ret ? "true" : "false",
9879 	   GET_MODE_NAME (mode),
9880 	   reg_ok_strict,
9881 	   (reload_completed ? "after" : "before"),
9882 	   GET_RTX_NAME (GET_CODE (x)));
9883   debug_rtx (x);
9884 
9885   return ret;
9886 }
9887 
9888 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P.  */
9889 
9890 static bool
rs6000_mode_dependent_address_p(const_rtx addr,addr_space_t as ATTRIBUTE_UNUSED)9891 rs6000_mode_dependent_address_p (const_rtx addr,
9892 				 addr_space_t as ATTRIBUTE_UNUSED)
9893 {
9894   return rs6000_mode_dependent_address_ptr (addr);
9895 }
9896 
9897 /* Go to LABEL if ADDR (a legitimate address expression)
9898    has an effect that depends on the machine mode it is used for.
9899 
9900    On the RS/6000 this is true of all integral offsets (since AltiVec
9901    and VSX modes don't allow them) or is a pre-increment or decrement.
9902 
9903    ??? Except that due to conceptual problems in offsettable_address_p
9904    we can't really report the problems of integral offsets.  So leave
9905    this assuming that the adjustable offset must be valid for the
9906    sub-words of a TFmode operand, which is what we had before.  */
9907 
9908 static bool
rs6000_mode_dependent_address(const_rtx addr)9909 rs6000_mode_dependent_address (const_rtx addr)
9910 {
9911   switch (GET_CODE (addr))
9912     {
9913     case PLUS:
9914       /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
9915 	 is considered a legitimate address before reload, so there
9916 	 are no offset restrictions in that case.  Note that this
9917 	 condition is safe in strict mode because any address involving
9918 	 virtual_stack_vars_rtx or arg_pointer_rtx would already have
9919 	 been rejected as illegitimate.  */
9920       if (XEXP (addr, 0) != virtual_stack_vars_rtx
9921 	  && XEXP (addr, 0) != arg_pointer_rtx
9922 	  && CONST_INT_P (XEXP (addr, 1)))
9923 	{
9924 	  HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
9925 	  HOST_WIDE_INT extra = TARGET_POWERPC64 ? 8 : 12;
9926 	  if (TARGET_PREFIXED)
9927 	    return !SIGNED_34BIT_OFFSET_EXTRA_P (val, extra);
9928 	  else
9929 	    return !SIGNED_16BIT_OFFSET_EXTRA_P (val, extra);
9930 	}
9931       break;
9932 
9933     case LO_SUM:
9934       /* Anything in the constant pool is sufficiently aligned that
9935 	 all bytes have the same high part address.  */
9936       return !legitimate_constant_pool_address_p (addr, QImode, false);
9937 
9938     /* Auto-increment cases are now treated generically in recog.cc.  */
9939     case PRE_MODIFY:
9940       return TARGET_UPDATE;
9941 
9942     /* AND is only allowed in Altivec loads.  */
9943     case AND:
9944       return true;
9945 
9946     default:
9947       break;
9948     }
9949 
9950   return false;
9951 }
9952 
9953 /* Debug version of rs6000_mode_dependent_address.  */
9954 static bool
rs6000_debug_mode_dependent_address(const_rtx addr)9955 rs6000_debug_mode_dependent_address (const_rtx addr)
9956 {
9957   bool ret = rs6000_mode_dependent_address (addr);
9958 
9959   fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
9960 	   ret ? "true" : "false");
9961   debug_rtx (addr);
9962 
9963   return ret;
9964 }
9965 
9966 /* Implement FIND_BASE_TERM.  */
9967 
9968 rtx
rs6000_find_base_term(rtx op)9969 rs6000_find_base_term (rtx op)
9970 {
9971   rtx base;
9972 
9973   base = op;
9974   if (GET_CODE (base) == CONST)
9975     base = XEXP (base, 0);
9976   if (GET_CODE (base) == PLUS)
9977     base = XEXP (base, 0);
9978   if (GET_CODE (base) == UNSPEC)
9979     switch (XINT (base, 1))
9980       {
9981       case UNSPEC_TOCREL:
9982       case UNSPEC_MACHOPIC_OFFSET:
9983 	/* OP represents SYM [+ OFFSET] - ANCHOR.  SYM is the base term
9984 	   for aliasing purposes.  */
9985 	return XVECEXP (base, 0, 0);
9986       }
9987 
9988   return op;
9989 }
9990 
9991 /* More elaborate version of recog's offsettable_memref_p predicate
9992    that works around the ??? note of rs6000_mode_dependent_address.
9993    In particular it accepts
9994 
9995      (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
9996 
9997    in 32-bit mode, that the recog predicate rejects.  */
9998 
9999 static bool
rs6000_offsettable_memref_p(rtx op,machine_mode reg_mode,bool strict)10000 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode, bool strict)
10001 {
10002   bool worst_case;
10003 
10004   if (!MEM_P (op))
10005     return false;
10006 
10007   /* First mimic offsettable_memref_p.  */
10008   if (offsettable_address_p (strict, GET_MODE (op), XEXP (op, 0)))
10009     return true;
10010 
10011   /* offsettable_address_p invokes rs6000_mode_dependent_address, but
10012      the latter predicate knows nothing about the mode of the memory
10013      reference and, therefore, assumes that it is the largest supported
10014      mode (TFmode).  As a consequence, legitimate offsettable memory
10015      references are rejected.  rs6000_legitimate_offset_address_p contains
10016      the correct logic for the PLUS case of rs6000_mode_dependent_address,
10017      at least with a little bit of help here given that we know the
10018      actual registers used.  */
10019   worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
10020 		|| GET_MODE_SIZE (reg_mode) == 4);
10021   return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
10022 					     strict, worst_case);
10023 }
10024 
10025 /* Determine the reassociation width to be used in reassociate_bb.
10026    This takes into account how many parallel operations we
10027    can actually do of a given type, and also the latency.
10028    P8:
10029      int add/sub 6/cycle
10030          mul 2/cycle
10031      vect add/sub/mul 2/cycle
10032      fp   add/sub/mul 2/cycle
10033      dfp  1/cycle
10034 */
10035 
10036 static int
rs6000_reassociation_width(unsigned int opc ATTRIBUTE_UNUSED,machine_mode mode)10037 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
10038                             machine_mode mode)
10039 {
10040   switch (rs6000_tune)
10041     {
10042     case PROCESSOR_POWER8:
10043     case PROCESSOR_POWER9:
10044     case PROCESSOR_POWER10:
10045       if (DECIMAL_FLOAT_MODE_P (mode))
10046 	return 1;
10047       if (VECTOR_MODE_P (mode))
10048 	return 4;
10049       if (INTEGRAL_MODE_P (mode))
10050 	return 1;
10051       if (FLOAT_MODE_P (mode))
10052 	return 4;
10053       break;
10054     default:
10055       break;
10056     }
10057   return 1;
10058 }
10059 
10060 /* Change register usage conditional on target flags.  */
10061 static void
rs6000_conditional_register_usage(void)10062 rs6000_conditional_register_usage (void)
10063 {
10064   int i;
10065 
10066   if (TARGET_DEBUG_TARGET)
10067     fprintf (stderr, "rs6000_conditional_register_usage called\n");
10068 
10069   /* 64-bit AIX and Linux reserve GPR13 for thread-private data.  */
10070   if (TARGET_64BIT)
10071     fixed_regs[13] = call_used_regs[13] = 1;
10072 
10073   /* Conditionally disable FPRs.  */
10074   if (TARGET_SOFT_FLOAT)
10075     for (i = 32; i < 64; i++)
10076       fixed_regs[i] = call_used_regs[i] = 1;
10077 
10078   /* The TOC register is not killed across calls in a way that is
10079      visible to the compiler.  */
10080   if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10081     call_used_regs[2] = 0;
10082 
10083   if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
10084     fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10085 
10086   if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
10087     fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10088       = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10089 
10090   if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
10091     fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10092       = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10093 
10094   if (TARGET_TOC && TARGET_MINIMAL_TOC)
10095     fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10096 
10097   if (!TARGET_ALTIVEC && !TARGET_VSX)
10098     {
10099       for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
10100 	fixed_regs[i] = call_used_regs[i] = 1;
10101       call_used_regs[VRSAVE_REGNO] = 1;
10102     }
10103 
10104   if (TARGET_ALTIVEC || TARGET_VSX)
10105     global_regs[VSCR_REGNO] = 1;
10106 
10107   if (TARGET_ALTIVEC_ABI)
10108     {
10109       for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
10110 	call_used_regs[i] = 1;
10111 
10112       /* AIX reserves VR20:31 in non-extended ABI mode.  */
10113       if (TARGET_XCOFF && !rs6000_aix_extabi)
10114 	for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
10115 	  fixed_regs[i] = call_used_regs[i] = 1;
10116     }
10117 }
10118 
10119 
10120 /* Output insns to set DEST equal to the constant SOURCE as a series of
10121    lis, ori and shl instructions and return TRUE.  */
10122 
10123 bool
rs6000_emit_set_const(rtx dest,rtx source)10124 rs6000_emit_set_const (rtx dest, rtx source)
10125 {
10126   machine_mode mode = GET_MODE (dest);
10127   rtx temp, set;
10128   rtx_insn *insn;
10129   HOST_WIDE_INT c;
10130 
10131   gcc_checking_assert (CONST_INT_P (source));
10132   c = INTVAL (source);
10133   switch (mode)
10134     {
10135     case E_QImode:
10136     case E_HImode:
10137       emit_insn (gen_rtx_SET (dest, source));
10138       return true;
10139 
10140     case E_SImode:
10141       temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
10142 
10143       emit_insn (gen_rtx_SET (copy_rtx (temp),
10144 			      GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
10145       emit_insn (gen_rtx_SET (dest,
10146 			      gen_rtx_IOR (SImode, copy_rtx (temp),
10147 					   GEN_INT (c & 0xffff))));
10148       break;
10149 
10150     case E_DImode:
10151       if (!TARGET_POWERPC64)
10152 	{
10153 	  rtx hi, lo;
10154 
10155 	  hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
10156 				      DImode);
10157 	  lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
10158 				      DImode);
10159 	  emit_move_insn (hi, GEN_INT (c >> 32));
10160 	  c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
10161 	  emit_move_insn (lo, GEN_INT (c));
10162 	}
10163       else
10164 	rs6000_emit_set_long_const (dest, c);
10165       break;
10166 
10167     default:
10168       gcc_unreachable ();
10169     }
10170 
10171   insn = get_last_insn ();
10172   set = single_set (insn);
10173   if (! CONSTANT_P (SET_SRC (set)))
10174     set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
10175 
10176   return true;
10177 }
10178 
10179 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
10180    Output insns to set DEST equal to the constant C as a series of
10181    lis, ori and shl instructions.  */
10182 
10183 static void
rs6000_emit_set_long_const(rtx dest,HOST_WIDE_INT c)10184 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
10185 {
10186   rtx temp;
10187   HOST_WIDE_INT ud1, ud2, ud3, ud4;
10188 
10189   ud1 = c & 0xffff;
10190   c = c >> 16;
10191   ud2 = c & 0xffff;
10192   c = c >> 16;
10193   ud3 = c & 0xffff;
10194   c = c >> 16;
10195   ud4 = c & 0xffff;
10196 
10197   if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
10198       || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
10199     emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
10200 
10201   else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
10202 	   || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
10203     {
10204       temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10205 
10206       emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10207 		      GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
10208       if (ud1 != 0)
10209 	emit_move_insn (dest,
10210 			gen_rtx_IOR (DImode, copy_rtx (temp),
10211 				     GEN_INT (ud1)));
10212     }
10213   else if (ud3 == 0 && ud4 == 0)
10214     {
10215       temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10216 
10217       gcc_assert (ud2 & 0x8000);
10218       emit_move_insn (copy_rtx (temp),
10219 		      GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
10220       if (ud1 != 0)
10221 	emit_move_insn (copy_rtx (temp),
10222 			gen_rtx_IOR (DImode, copy_rtx (temp),
10223 				     GEN_INT (ud1)));
10224       emit_move_insn (dest,
10225 		      gen_rtx_ZERO_EXTEND (DImode,
10226 					   gen_lowpart (SImode,
10227 							copy_rtx (temp))));
10228     }
10229   else if (ud1 == ud3 && ud2 == ud4)
10230     {
10231       temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10232       HOST_WIDE_INT num = (ud2 << 16) | ud1;
10233       rs6000_emit_set_long_const (temp, (num ^ 0x80000000) - 0x80000000);
10234       rtx one = gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff));
10235       rtx two = gen_rtx_ASHIFT (DImode, temp, GEN_INT (32));
10236       emit_move_insn (dest, gen_rtx_IOR (DImode, one, two));
10237     }
10238   else if ((ud4 == 0xffff && (ud3 & 0x8000))
10239 	   || (ud4 == 0 && ! (ud3 & 0x8000)))
10240     {
10241       temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10242 
10243       emit_move_insn (copy_rtx (temp),
10244 		      GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
10245       if (ud2 != 0)
10246 	emit_move_insn (copy_rtx (temp),
10247 			gen_rtx_IOR (DImode, copy_rtx (temp),
10248 				     GEN_INT (ud2)));
10249       emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10250 		      gen_rtx_ASHIFT (DImode, copy_rtx (temp),
10251 				      GEN_INT (16)));
10252       if (ud1 != 0)
10253 	emit_move_insn (dest,
10254 			gen_rtx_IOR (DImode, copy_rtx (temp),
10255 				     GEN_INT (ud1)));
10256     }
10257   else
10258     {
10259       temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10260 
10261       emit_move_insn (copy_rtx (temp),
10262 		      GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
10263       if (ud3 != 0)
10264 	emit_move_insn (copy_rtx (temp),
10265 			gen_rtx_IOR (DImode, copy_rtx (temp),
10266 				     GEN_INT (ud3)));
10267 
10268       emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
10269 		      gen_rtx_ASHIFT (DImode, copy_rtx (temp),
10270 				      GEN_INT (32)));
10271       if (ud2 != 0)
10272 	emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10273 			gen_rtx_IOR (DImode, copy_rtx (temp),
10274 				     GEN_INT (ud2 << 16)));
10275       if (ud1 != 0)
10276 	emit_move_insn (dest,
10277 			gen_rtx_IOR (DImode, copy_rtx (temp),
10278 				     GEN_INT (ud1)));
10279     }
10280 }
10281 
10282 /* Helper for the following.  Get rid of [r+r] memory refs
10283    in cases where it won't work (TImode, TFmode, TDmode, PTImode).  */
10284 
10285 static void
rs6000_eliminate_indexed_memrefs(rtx operands[2])10286 rs6000_eliminate_indexed_memrefs (rtx operands[2])
10287 {
10288   if (MEM_P (operands[0])
10289       && !REG_P (XEXP (operands[0], 0))
10290       && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
10291 					       GET_MODE (operands[0]), false))
10292     operands[0]
10293       = replace_equiv_address (operands[0],
10294 			       copy_addr_to_reg (XEXP (operands[0], 0)));
10295 
10296   if (MEM_P (operands[1])
10297       && !REG_P (XEXP (operands[1], 0))
10298       && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
10299 					       GET_MODE (operands[1]), false))
10300     operands[1]
10301       = replace_equiv_address (operands[1],
10302 			       copy_addr_to_reg (XEXP (operands[1], 0)));
10303 }
10304 
10305 /* Generate a vector of constants to permute MODE for a little-endian
10306    storage operation by swapping the two halves of a vector.  */
10307 static rtvec
rs6000_const_vec(machine_mode mode)10308 rs6000_const_vec (machine_mode mode)
10309 {
10310   int i, subparts;
10311   rtvec v;
10312 
10313   switch (mode)
10314     {
10315     case E_V1TImode:
10316       subparts = 1;
10317       break;
10318     case E_V2DFmode:
10319     case E_V2DImode:
10320       subparts = 2;
10321       break;
10322     case E_V4SFmode:
10323     case E_V4SImode:
10324       subparts = 4;
10325       break;
10326     case E_V8HImode:
10327       subparts = 8;
10328       break;
10329     case E_V16QImode:
10330       subparts = 16;
10331       break;
10332     default:
10333       gcc_unreachable();
10334     }
10335 
10336   v = rtvec_alloc (subparts);
10337 
10338   for (i = 0; i < subparts / 2; ++i)
10339     RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
10340   for (i = subparts / 2; i < subparts; ++i)
10341     RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
10342 
10343   return v;
10344 }
10345 
10346 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
10347    store operation.  */
10348 void
rs6000_emit_le_vsx_permute(rtx dest,rtx source,machine_mode mode)10349 rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
10350 {
10351   gcc_assert (!altivec_indexed_or_indirect_operand (dest, mode));
10352   gcc_assert (!altivec_indexed_or_indirect_operand (source, mode));
10353 
10354   /* Scalar permutations are easier to express in integer modes rather than
10355      floating-point modes, so cast them here.  We use V1TImode instead
10356      of TImode to ensure that the values don't go through GPRs.  */
10357   if (FLOAT128_VECTOR_P (mode))
10358     {
10359       dest = gen_lowpart (V1TImode, dest);
10360       source = gen_lowpart (V1TImode, source);
10361       mode = V1TImode;
10362     }
10363 
10364   /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
10365      scalar.  */
10366   if (mode == TImode || mode == V1TImode)
10367     emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
10368 						  GEN_INT (64))));
10369   else
10370     {
10371       rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
10372       emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
10373     }
10374 }
10375 
10376 /* Emit a little-endian load from vector memory location SOURCE to VSX
10377    register DEST in mode MODE.  The load is done with two permuting
10378    insn's that represent an lxvd2x and xxpermdi.  */
10379 void
rs6000_emit_le_vsx_load(rtx dest,rtx source,machine_mode mode)10380 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
10381 {
10382   /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
10383      V1TImode).  */
10384   if (mode == TImode || mode == V1TImode)
10385     {
10386       mode = V2DImode;
10387       dest = gen_lowpart (V2DImode, dest);
10388       source = adjust_address (source, V2DImode, 0);
10389     }
10390 
10391   rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
10392   rs6000_emit_le_vsx_permute (tmp, source, mode);
10393   rs6000_emit_le_vsx_permute (dest, tmp, mode);
10394 }
10395 
10396 /* Emit a little-endian store to vector memory location DEST from VSX
10397    register SOURCE in mode MODE.  The store is done with two permuting
10398    insn's that represent an xxpermdi and an stxvd2x.  */
10399 void
rs6000_emit_le_vsx_store(rtx dest,rtx source,machine_mode mode)10400 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
10401 {
10402   /* This should never be called after LRA.  */
10403   gcc_assert (can_create_pseudo_p ());
10404 
10405   /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
10406      V1TImode).  */
10407   if (mode == TImode || mode == V1TImode)
10408     {
10409       mode = V2DImode;
10410       dest = adjust_address (dest, V2DImode, 0);
10411       source = gen_lowpart (V2DImode, source);
10412     }
10413 
10414   rtx tmp = gen_reg_rtx_and_attrs (source);
10415   rs6000_emit_le_vsx_permute (tmp, source, mode);
10416   rs6000_emit_le_vsx_permute (dest, tmp, mode);
10417 }
10418 
10419 /* Emit a sequence representing a little-endian VSX load or store,
10420    moving data from SOURCE to DEST in mode MODE.  This is done
10421    separately from rs6000_emit_move to ensure it is called only
10422    during expand.  LE VSX loads and stores introduced later are
10423    handled with a split.  The expand-time RTL generation allows
10424    us to optimize away redundant pairs of register-permutes.  */
10425 void
rs6000_emit_le_vsx_move(rtx dest,rtx source,machine_mode mode)10426 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
10427 {
10428   gcc_assert (!BYTES_BIG_ENDIAN
10429 	      && VECTOR_MEM_VSX_P (mode)
10430 	      && !TARGET_P9_VECTOR
10431 	      && !gpr_or_gpr_p (dest, source)
10432 	      && (MEM_P (source) ^ MEM_P (dest)));
10433 
10434   if (MEM_P (source))
10435     {
10436       gcc_assert (REG_P (dest) || SUBREG_P (dest));
10437       rs6000_emit_le_vsx_load (dest, source, mode);
10438     }
10439   else
10440     {
10441       if (!REG_P (source))
10442 	source = force_reg (mode, source);
10443       rs6000_emit_le_vsx_store (dest, source, mode);
10444     }
10445 }
10446 
10447 /* Return whether a SFmode or SImode move can be done without converting one
10448    mode to another.  This arrises when we have:
10449 
10450 	(SUBREG:SF (REG:SI ...))
10451 	(SUBREG:SI (REG:SF ...))
10452 
10453    and one of the values is in a floating point/vector register, where SFmode
10454    scalars are stored in DFmode format.  */
10455 
10456 bool
valid_sf_si_move(rtx dest,rtx src,machine_mode mode)10457 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
10458 {
10459   if (TARGET_ALLOW_SF_SUBREG)
10460     return true;
10461 
10462   if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
10463     return true;
10464 
10465   if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
10466     return true;
10467 
10468   /*.  Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))).  */
10469   if (SUBREG_P (dest))
10470     {
10471       rtx dest_subreg = SUBREG_REG (dest);
10472       rtx src_subreg = SUBREG_REG (src);
10473       return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
10474     }
10475 
10476   return false;
10477 }
10478 
10479 
10480 /* Helper function to change moves with:
10481 
10482 	(SUBREG:SF (REG:SI)) and
10483 	(SUBREG:SI (REG:SF))
10484 
10485    into separate UNSPEC insns.  In the PowerPC architecture, scalar SFmode
10486    values are stored as DFmode values in the VSX registers.  We need to convert
10487    the bits before we can use a direct move or operate on the bits in the
10488    vector register as an integer type.
10489 
10490    Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)).  */
10491 
10492 static bool
rs6000_emit_move_si_sf_subreg(rtx dest,rtx source,machine_mode mode)10493 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
10494 {
10495   if (TARGET_DIRECT_MOVE_64BIT && !reload_completed
10496       && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
10497       && SUBREG_P (source) && sf_subreg_operand (source, mode))
10498     {
10499       rtx inner_source = SUBREG_REG (source);
10500       machine_mode inner_mode = GET_MODE (inner_source);
10501 
10502       if (mode == SImode && inner_mode == SFmode)
10503 	{
10504 	  emit_insn (gen_movsi_from_sf (dest, inner_source));
10505 	  return true;
10506 	}
10507 
10508       if (mode == SFmode && inner_mode == SImode)
10509 	{
10510 	  emit_insn (gen_movsf_from_si (dest, inner_source));
10511 	  return true;
10512 	}
10513     }
10514 
10515   return false;
10516 }
10517 
10518 /* Emit a move from SOURCE to DEST in mode MODE.  */
10519 void
rs6000_emit_move(rtx dest,rtx source,machine_mode mode)10520 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
10521 {
10522   rtx operands[2];
10523   operands[0] = dest;
10524   operands[1] = source;
10525 
10526   if (TARGET_DEBUG_ADDR)
10527     {
10528       fprintf (stderr,
10529 	       "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
10530 	       "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
10531 	       GET_MODE_NAME (mode),
10532 	       lra_in_progress,
10533 	       reload_completed,
10534 	       can_create_pseudo_p ());
10535       debug_rtx (dest);
10536       fprintf (stderr, "source:\n");
10537       debug_rtx (source);
10538     }
10539 
10540   /* Check that we get CONST_WIDE_INT only when we should.  */
10541   if (CONST_WIDE_INT_P (operands[1])
10542       && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
10543     gcc_unreachable ();
10544 
10545 #ifdef HAVE_AS_GNU_ATTRIBUTE
10546   /* If we use a long double type, set the flags in .gnu_attribute that say
10547      what the long double type is.  This is to allow the linker's warning
10548      message for the wrong long double to be useful, even if the function does
10549      not do a call (for example, doing a 128-bit add on power9 if the long
10550      double type is IEEE 128-bit.  Do not set this if __ibm128 or __floa128 are
10551      used if they aren't the default long dobule type.  */
10552   if (rs6000_gnu_attr && (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT))
10553     {
10554       if (TARGET_LONG_DOUBLE_128 && (mode == TFmode || mode == TCmode))
10555 	rs6000_passes_float = rs6000_passes_long_double = true;
10556 
10557       else if (!TARGET_LONG_DOUBLE_128 && (mode == DFmode || mode == DCmode))
10558 	rs6000_passes_float = rs6000_passes_long_double = true;
10559     }
10560 #endif
10561 
10562   /* See if we need to special case SImode/SFmode SUBREG moves.  */
10563   if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
10564       && rs6000_emit_move_si_sf_subreg (dest, source, mode))
10565     return;
10566 
10567   /* Check if GCC is setting up a block move that will end up using FP
10568      registers as temporaries.  We must make sure this is acceptable.  */
10569   if (MEM_P (operands[0])
10570       && MEM_P (operands[1])
10571       && mode == DImode
10572       && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0]))
10573 	  || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1])))
10574       && ! (rs6000_slow_unaligned_access (SImode,
10575 					  (MEM_ALIGN (operands[0]) > 32
10576 					   ? 32 : MEM_ALIGN (operands[0])))
10577 	    || rs6000_slow_unaligned_access (SImode,
10578 					     (MEM_ALIGN (operands[1]) > 32
10579 					      ? 32 : MEM_ALIGN (operands[1]))))
10580       && ! MEM_VOLATILE_P (operands [0])
10581       && ! MEM_VOLATILE_P (operands [1]))
10582     {
10583       emit_move_insn (adjust_address (operands[0], SImode, 0),
10584 		      adjust_address (operands[1], SImode, 0));
10585       emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
10586 		      adjust_address (copy_rtx (operands[1]), SImode, 4));
10587       return;
10588     }
10589 
10590   if (can_create_pseudo_p () && MEM_P (operands[0])
10591       && !gpc_reg_operand (operands[1], mode))
10592     operands[1] = force_reg (mode, operands[1]);
10593 
10594   /* Recognize the case where operand[1] is a reference to thread-local
10595      data and load its address to a register.  */
10596   if (tls_referenced_p (operands[1]))
10597     {
10598       enum tls_model model;
10599       rtx tmp = operands[1];
10600       rtx addend = NULL;
10601 
10602       if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
10603 	{
10604           addend = XEXP (XEXP (tmp, 0), 1);
10605 	  tmp = XEXP (XEXP (tmp, 0), 0);
10606 	}
10607 
10608       gcc_assert (SYMBOL_REF_P (tmp));
10609       model = SYMBOL_REF_TLS_MODEL (tmp);
10610       gcc_assert (model != 0);
10611 
10612       tmp = rs6000_legitimize_tls_address (tmp, model);
10613       if (addend)
10614 	{
10615 	  tmp = gen_rtx_PLUS (mode, tmp, addend);
10616 	  tmp = force_operand (tmp, operands[0]);
10617 	}
10618       operands[1] = tmp;
10619     }
10620 
10621   /* 128-bit constant floating-point values on Darwin should really be loaded
10622      as two parts.  However, this premature splitting is a problem when DFmode
10623      values can go into Altivec registers.  */
10624   if (TARGET_MACHO && CONST_DOUBLE_P (operands[1]) && FLOAT128_IBM_P (mode)
10625       && !reg_addr[DFmode].scalar_in_vmx_p)
10626     {
10627       rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
10628 			simplify_gen_subreg (DFmode, operands[1], mode, 0),
10629 			DFmode);
10630       rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
10631 					     GET_MODE_SIZE (DFmode)),
10632 			simplify_gen_subreg (DFmode, operands[1], mode,
10633 					     GET_MODE_SIZE (DFmode)),
10634 			DFmode);
10635       return;
10636     }
10637 
10638   /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
10639      p1:SD) if p1 is not of floating point class and p0 is spilled as
10640      we can have no analogous movsd_store for this.  */
10641   if (lra_in_progress && mode == DDmode
10642       && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
10643       && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10644       && SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1]))
10645       && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
10646     {
10647       enum reg_class cl;
10648       int regno = REGNO (SUBREG_REG (operands[1]));
10649 
10650       if (!HARD_REGISTER_NUM_P (regno))
10651 	{
10652 	  cl = reg_preferred_class (regno);
10653 	  regno = reg_renumber[regno];
10654 	  if (regno < 0)
10655 	    regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
10656 	}
10657       if (regno >= 0 && ! FP_REGNO_P (regno))
10658 	{
10659 	  mode = SDmode;
10660 	  operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
10661 	  operands[1] = SUBREG_REG (operands[1]);
10662 	}
10663     }
10664   if (lra_in_progress
10665       && mode == SDmode
10666       && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
10667       && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10668       && (REG_P (operands[1])
10669 	  || (SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1])))))
10670     {
10671       int regno = reg_or_subregno (operands[1]);
10672       enum reg_class cl;
10673 
10674       if (!HARD_REGISTER_NUM_P (regno))
10675 	{
10676 	  cl = reg_preferred_class (regno);
10677 	  gcc_assert (cl != NO_REGS);
10678 	  regno = reg_renumber[regno];
10679 	  if (regno < 0)
10680 	    regno = ira_class_hard_regs[cl][0];
10681 	}
10682       if (FP_REGNO_P (regno))
10683 	{
10684 	  if (GET_MODE (operands[0]) != DDmode)
10685 	    operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
10686 	  emit_insn (gen_movsd_store (operands[0], operands[1]));
10687 	}
10688       else if (INT_REGNO_P (regno))
10689 	emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10690       else
10691 	gcc_unreachable();
10692       return;
10693     }
10694   /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
10695      p:DD)) if p0 is not of floating point class and p1 is spilled as
10696      we can have no analogous movsd_load for this.  */
10697   if (lra_in_progress && mode == DDmode
10698       && SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))
10699       && GET_MODE (SUBREG_REG (operands[0])) == SDmode
10700       && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
10701       && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10702     {
10703       enum reg_class cl;
10704       int regno = REGNO (SUBREG_REG (operands[0]));
10705 
10706       if (!HARD_REGISTER_NUM_P (regno))
10707 	{
10708 	  cl = reg_preferred_class (regno);
10709 	  regno = reg_renumber[regno];
10710 	  if (regno < 0)
10711 	    regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
10712 	}
10713       if (regno >= 0 && ! FP_REGNO_P (regno))
10714 	{
10715 	  mode = SDmode;
10716 	  operands[0] = SUBREG_REG (operands[0]);
10717 	  operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
10718 	}
10719     }
10720   if (lra_in_progress
10721       && mode == SDmode
10722       && (REG_P (operands[0])
10723 	  || (SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))))
10724       && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
10725       && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10726     {
10727       int regno = reg_or_subregno (operands[0]);
10728       enum reg_class cl;
10729 
10730       if (!HARD_REGISTER_NUM_P (regno))
10731 	{
10732 	  cl = reg_preferred_class (regno);
10733 	  gcc_assert (cl != NO_REGS);
10734 	  regno = reg_renumber[regno];
10735 	  if (regno < 0)
10736 	    regno = ira_class_hard_regs[cl][0];
10737 	}
10738       if (FP_REGNO_P (regno))
10739 	{
10740 	  if (GET_MODE (operands[1]) != DDmode)
10741 	    operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
10742 	  emit_insn (gen_movsd_load (operands[0], operands[1]));
10743 	}
10744       else if (INT_REGNO_P (regno))
10745 	emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10746       else
10747 	gcc_unreachable();
10748       return;
10749     }
10750 
10751   /* FIXME:  In the long term, this switch statement should go away
10752      and be replaced by a sequence of tests based on things like
10753      mode == Pmode.  */
10754   switch (mode)
10755     {
10756     case E_HImode:
10757     case E_QImode:
10758       if (CONSTANT_P (operands[1])
10759 	  && !CONST_INT_P (operands[1]))
10760 	operands[1] = force_const_mem (mode, operands[1]);
10761       break;
10762 
10763     case E_TFmode:
10764     case E_TDmode:
10765     case E_IFmode:
10766     case E_KFmode:
10767       if (FLOAT128_2REG_P (mode))
10768 	rs6000_eliminate_indexed_memrefs (operands);
10769       /* fall through */
10770 
10771     case E_DFmode:
10772     case E_DDmode:
10773     case E_SFmode:
10774     case E_SDmode:
10775       if (CONSTANT_P (operands[1])
10776 	  && ! easy_fp_constant (operands[1], mode))
10777 	operands[1] = force_const_mem (mode, operands[1]);
10778       break;
10779 
10780     case E_V16QImode:
10781     case E_V8HImode:
10782     case E_V4SFmode:
10783     case E_V4SImode:
10784     case E_V2DFmode:
10785     case E_V2DImode:
10786     case E_V1TImode:
10787       if (CONSTANT_P (operands[1])
10788 	  && !easy_vector_constant (operands[1], mode))
10789 	operands[1] = force_const_mem (mode, operands[1]);
10790       break;
10791 
10792     case E_OOmode:
10793     case E_XOmode:
10794       if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) != 0)
10795 	error ("%qs is an opaque type, and you cannot set it to other values",
10796 	       (mode == OOmode) ? "__vector_pair" : "__vector_quad");
10797       break;
10798 
10799     case E_SImode:
10800     case E_DImode:
10801       /* Use default pattern for address of ELF small data */
10802       if (TARGET_ELF
10803 	  && mode == Pmode
10804 	  && DEFAULT_ABI == ABI_V4
10805 	  && (SYMBOL_REF_P (operands[1])
10806 	      || GET_CODE (operands[1]) == CONST)
10807 	  && small_data_operand (operands[1], mode))
10808 	{
10809 	  emit_insn (gen_rtx_SET (operands[0], operands[1]));
10810 	  return;
10811 	}
10812 
10813       /* Use the default pattern for loading up PC-relative addresses.  */
10814       if (TARGET_PCREL && mode == Pmode
10815 	  && pcrel_local_or_external_address (operands[1], Pmode))
10816 	{
10817 	  emit_insn (gen_rtx_SET (operands[0], operands[1]));
10818 	  return;
10819 	}
10820 
10821       if (DEFAULT_ABI == ABI_V4
10822 	  && mode == Pmode && mode == SImode
10823 	  && flag_pic == 1 && got_operand (operands[1], mode))
10824 	{
10825 	  emit_insn (gen_movsi_got (operands[0], operands[1]));
10826 	  return;
10827 	}
10828 
10829       if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
10830 	  && TARGET_NO_TOC_OR_PCREL
10831 	  && ! flag_pic
10832 	  && mode == Pmode
10833 	  && CONSTANT_P (operands[1])
10834 	  && GET_CODE (operands[1]) != HIGH
10835 	  && !CONST_INT_P (operands[1]))
10836 	{
10837 	  rtx target = (!can_create_pseudo_p ()
10838 			? operands[0]
10839 			: gen_reg_rtx (mode));
10840 
10841 	  /* If this is a function address on -mcall-aixdesc,
10842 	     convert it to the address of the descriptor.  */
10843 	  if (DEFAULT_ABI == ABI_AIX
10844 	      && SYMBOL_REF_P (operands[1])
10845 	      && XSTR (operands[1], 0)[0] == '.')
10846 	    {
10847 	      const char *name = XSTR (operands[1], 0);
10848 	      rtx new_ref;
10849 	      while (*name == '.')
10850 		name++;
10851 	      new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
10852 	      CONSTANT_POOL_ADDRESS_P (new_ref)
10853 		= CONSTANT_POOL_ADDRESS_P (operands[1]);
10854 	      SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
10855 	      SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
10856 	      SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
10857 	      operands[1] = new_ref;
10858 	    }
10859 
10860 	  if (DEFAULT_ABI == ABI_DARWIN)
10861 	    {
10862 #if TARGET_MACHO
10863 	      /* This is not PIC code, but could require the subset of
10864 		 indirections used by mdynamic-no-pic.  */
10865 	      if (MACHO_DYNAMIC_NO_PIC_P)
10866 		{
10867 		  /* Take care of any required data indirection.  */
10868 		  operands[1] = rs6000_machopic_legitimize_pic_address (
10869 				  operands[1], mode, operands[0]);
10870 		  if (operands[0] != operands[1])
10871 		    emit_insn (gen_rtx_SET (operands[0], operands[1]));
10872 		  return;
10873 		}
10874 #endif
10875 	      emit_insn (gen_macho_high (Pmode, target, operands[1]));
10876 	      emit_insn (gen_macho_low (Pmode, operands[0],
10877 					target, operands[1]));
10878 	      return;
10879 	    }
10880 
10881 	  emit_insn (gen_elf_high (target, operands[1]));
10882 	  emit_insn (gen_elf_low (operands[0], target, operands[1]));
10883 	  return;
10884 	}
10885 
10886       /* If this is a SYMBOL_REF that refers to a constant pool entry,
10887 	 and we have put it in the TOC, we just need to make a TOC-relative
10888 	 reference to it.  */
10889       if (TARGET_TOC
10890 	  && SYMBOL_REF_P (operands[1])
10891 	  && use_toc_relative_ref (operands[1], mode))
10892 	operands[1] = create_TOC_reference (operands[1], operands[0]);
10893       else if (mode == Pmode
10894 	       && CONSTANT_P (operands[1])
10895 	       && GET_CODE (operands[1]) != HIGH
10896 	       && ((REG_P (operands[0])
10897 		    && FP_REGNO_P (REGNO (operands[0])))
10898 		   || !CONST_INT_P (operands[1])
10899 		   || (num_insns_constant (operands[1], mode)
10900 		       > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
10901 	       && !toc_relative_expr_p (operands[1], false, NULL, NULL)
10902 	       && (TARGET_CMODEL == CMODEL_SMALL
10903 		   || can_create_pseudo_p ()
10904 		   || (REG_P (operands[0])
10905 		       && INT_REG_OK_FOR_BASE_P (operands[0], true))))
10906 	{
10907 
10908 #if TARGET_MACHO
10909 	  /* Darwin uses a special PIC legitimizer.  */
10910 	  if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
10911 	    {
10912 	      operands[1] =
10913 		rs6000_machopic_legitimize_pic_address (operands[1], mode,
10914 							operands[0]);
10915 	      if (operands[0] != operands[1])
10916 		emit_insn (gen_rtx_SET (operands[0], operands[1]));
10917 	      return;
10918 	    }
10919 #endif
10920 
10921 	  /* If we are to limit the number of things we put in the TOC and
10922 	     this is a symbol plus a constant we can add in one insn,
10923 	     just put the symbol in the TOC and add the constant.  */
10924 	  if (GET_CODE (operands[1]) == CONST
10925 	      && TARGET_NO_SUM_IN_TOC
10926 	      && GET_CODE (XEXP (operands[1], 0)) == PLUS
10927 	      && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
10928 	      && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
10929 		  || SYMBOL_REF_P (XEXP (XEXP (operands[1], 0), 0)))
10930 	      && ! side_effects_p (operands[0]))
10931 	    {
10932 	      rtx sym =
10933 		force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
10934 	      rtx other = XEXP (XEXP (operands[1], 0), 1);
10935 
10936 	      sym = force_reg (mode, sym);
10937 	      emit_insn (gen_add3_insn (operands[0], sym, other));
10938 	      return;
10939 	    }
10940 
10941 	  operands[1] = force_const_mem (mode, operands[1]);
10942 
10943 	  if (TARGET_TOC
10944 	      && SYMBOL_REF_P (XEXP (operands[1], 0))
10945 	      && use_toc_relative_ref (XEXP (operands[1], 0), mode))
10946 	    {
10947 	      rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
10948 						 operands[0]);
10949 	      operands[1] = gen_const_mem (mode, tocref);
10950 	      set_mem_alias_set (operands[1], get_TOC_alias_set ());
10951 	    }
10952 	}
10953       break;
10954 
10955     case E_TImode:
10956       if (!VECTOR_MEM_VSX_P (TImode))
10957 	rs6000_eliminate_indexed_memrefs (operands);
10958       break;
10959 
10960     case E_PTImode:
10961       rs6000_eliminate_indexed_memrefs (operands);
10962       break;
10963 
10964     default:
10965       fatal_insn ("bad move", gen_rtx_SET (dest, source));
10966     }
10967 
10968   /* Above, we may have called force_const_mem which may have returned
10969      an invalid address.  If we can, fix this up; otherwise, reload will
10970      have to deal with it.  */
10971   if (MEM_P (operands[1]))
10972     operands[1] = validize_mem (operands[1]);
10973 
10974   emit_insn (gen_rtx_SET (operands[0], operands[1]));
10975 }
10976 
10977 
10978 /* Set up AIX/Darwin/64-bit Linux quad floating point routines.  */
10979 static void
init_float128_ibm(machine_mode mode)10980 init_float128_ibm (machine_mode mode)
10981 {
10982   if (!TARGET_XL_COMPAT)
10983     {
10984       set_optab_libfunc (add_optab, mode, "__gcc_qadd");
10985       set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
10986       set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
10987       set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
10988 
10989       if (!TARGET_HARD_FLOAT)
10990 	{
10991 	  set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
10992 	  set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
10993 	  set_optab_libfunc (ne_optab, mode, "__gcc_qne");
10994 	  set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
10995 	  set_optab_libfunc (ge_optab, mode, "__gcc_qge");
10996 	  set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
10997 	  set_optab_libfunc (le_optab, mode, "__gcc_qle");
10998 	  set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
10999 
11000 	  set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
11001 	  set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
11002 	  set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
11003 	  set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
11004 	  set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
11005 	  set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
11006 	  set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
11007 	  set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
11008 	}
11009     }
11010   else
11011     {
11012       set_optab_libfunc (add_optab, mode, "_xlqadd");
11013       set_optab_libfunc (sub_optab, mode, "_xlqsub");
11014       set_optab_libfunc (smul_optab, mode, "_xlqmul");
11015       set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
11016     }
11017 
11018   /* Add various conversions for IFmode to use the traditional TFmode
11019      names.  */
11020   if (mode == IFmode)
11021     {
11022       set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf");
11023       set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf");
11024       set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdtf");
11025       set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd");
11026       set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd");
11027       set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtftd");
11028 
11029       set_conv_libfunc (sfix_optab, DImode, mode, "__fixtfdi");
11030       set_conv_libfunc (ufix_optab, DImode, mode, "__fixunstfdi");
11031 
11032       set_conv_libfunc (sfloat_optab, mode, DImode, "__floatditf");
11033       set_conv_libfunc (ufloat_optab, mode, DImode, "__floatunditf");
11034 
11035       if (TARGET_POWERPC64)
11036 	{
11037 	  set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
11038 	  set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
11039 	  set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
11040 	  set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
11041 	}
11042     }
11043 }
11044 
11045 /* Set up IEEE 128-bit floating point routines.  Use different names if the
11046    arguments can be passed in a vector register.  The historical PowerPC
11047    implementation of IEEE 128-bit floating point used _q_<op> for the names, so
11048    continue to use that if we aren't using vector registers to pass IEEE
11049    128-bit floating point.  */
11050 
11051 static void
init_float128_ieee(machine_mode mode)11052 init_float128_ieee (machine_mode mode)
11053 {
11054   if (FLOAT128_VECTOR_P (mode))
11055     {
11056       set_optab_libfunc (add_optab, mode, "__addkf3");
11057       set_optab_libfunc (sub_optab, mode, "__subkf3");
11058       set_optab_libfunc (neg_optab, mode, "__negkf2");
11059       set_optab_libfunc (smul_optab, mode, "__mulkf3");
11060       set_optab_libfunc (sdiv_optab, mode, "__divkf3");
11061       set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
11062       set_optab_libfunc (abs_optab, mode, "__abskf2");
11063       set_optab_libfunc (powi_optab, mode, "__powikf2");
11064 
11065       set_optab_libfunc (eq_optab, mode, "__eqkf2");
11066       set_optab_libfunc (ne_optab, mode, "__nekf2");
11067       set_optab_libfunc (gt_optab, mode, "__gtkf2");
11068       set_optab_libfunc (ge_optab, mode, "__gekf2");
11069       set_optab_libfunc (lt_optab, mode, "__ltkf2");
11070       set_optab_libfunc (le_optab, mode, "__lekf2");
11071       set_optab_libfunc (unord_optab, mode, "__unordkf2");
11072 
11073       set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
11074       set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
11075       set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
11076       set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
11077 
11078       set_conv_libfunc (sext_optab, mode, IFmode, "__trunctfkf2");
11079       if (mode != TFmode && FLOAT128_IBM_P (TFmode))
11080 	set_conv_libfunc (sext_optab, mode, TFmode, "__trunctfkf2");
11081 
11082       set_conv_libfunc (trunc_optab, IFmode, mode, "__extendkftf2");
11083       if (mode != TFmode && FLOAT128_IBM_P (TFmode))
11084 	set_conv_libfunc (trunc_optab, TFmode, mode, "__extendkftf2");
11085 
11086       set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf");
11087       set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf");
11088       set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdkf");
11089       set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd");
11090       set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd");
11091       set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendkftd");
11092 
11093       set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
11094       set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
11095       set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
11096       set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
11097 
11098       set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
11099       set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
11100       set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
11101       set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
11102 
11103       if (TARGET_POWERPC64)
11104 	{
11105 	  set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti_sw");
11106 	  set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti_sw");
11107 	  set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf_sw");
11108 	  set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf_sw");
11109 	}
11110     }
11111 
11112   else
11113     {
11114       set_optab_libfunc (add_optab, mode, "_q_add");
11115       set_optab_libfunc (sub_optab, mode, "_q_sub");
11116       set_optab_libfunc (neg_optab, mode, "_q_neg");
11117       set_optab_libfunc (smul_optab, mode, "_q_mul");
11118       set_optab_libfunc (sdiv_optab, mode, "_q_div");
11119       if (TARGET_PPC_GPOPT)
11120 	set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
11121 
11122       set_optab_libfunc (eq_optab, mode, "_q_feq");
11123       set_optab_libfunc (ne_optab, mode, "_q_fne");
11124       set_optab_libfunc (gt_optab, mode, "_q_fgt");
11125       set_optab_libfunc (ge_optab, mode, "_q_fge");
11126       set_optab_libfunc (lt_optab, mode, "_q_flt");
11127       set_optab_libfunc (le_optab, mode, "_q_fle");
11128 
11129       set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
11130       set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
11131       set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
11132       set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
11133       set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
11134       set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
11135       set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
11136       set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
11137     }
11138 }
11139 
11140 static void
rs6000_init_libfuncs(void)11141 rs6000_init_libfuncs (void)
11142 {
11143   /* __float128 support.  */
11144   if (TARGET_FLOAT128_TYPE)
11145     {
11146       init_float128_ibm (IFmode);
11147       init_float128_ieee (KFmode);
11148     }
11149 
11150   /* AIX/Darwin/64-bit Linux quad floating point routines.  */
11151   if (TARGET_LONG_DOUBLE_128)
11152     {
11153       if (!TARGET_IEEEQUAD)
11154 	init_float128_ibm (TFmode);
11155 
11156       /* IEEE 128-bit including 32-bit SVR4 quad floating point routines.  */
11157       else
11158 	init_float128_ieee (TFmode);
11159     }
11160 }
11161 
11162 /* Emit a potentially record-form instruction, setting DST from SRC.
11163    If DOT is 0, that is all; otherwise, set CCREG to the result of the
11164    signed comparison of DST with zero.  If DOT is 1, the generated RTL
11165    doesn't care about the DST result; if DOT is 2, it does.  If CCREG
11166    is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
11167    a separate COMPARE.  */
11168 
11169 void
rs6000_emit_dot_insn(rtx dst,rtx src,int dot,rtx ccreg)11170 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
11171 {
11172   if (dot == 0)
11173     {
11174       emit_move_insn (dst, src);
11175       return;
11176     }
11177 
11178   if (cc_reg_not_cr0_operand (ccreg, CCmode))
11179     {
11180       emit_move_insn (dst, src);
11181       emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
11182       return;
11183     }
11184 
11185   rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
11186   if (dot == 1)
11187     {
11188       rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
11189       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
11190     }
11191   else
11192     {
11193       rtx set = gen_rtx_SET (dst, src);
11194       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
11195     }
11196 }
11197 
11198 
11199 /* A validation routine: say whether CODE, a condition code, and MODE
11200    match.  The other alternatives either don't make sense or should
11201    never be generated.  */
11202 
11203 void
validate_condition_mode(enum rtx_code code,machine_mode mode)11204 validate_condition_mode (enum rtx_code code, machine_mode mode)
11205 {
11206   gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
11207 	       || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
11208 	      && GET_MODE_CLASS (mode) == MODE_CC);
11209 
11210   /* These don't make sense.  */
11211   gcc_assert ((code != GT && code != LT && code != GE && code != LE)
11212 	      || mode != CCUNSmode);
11213 
11214   gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
11215 	      || mode == CCUNSmode);
11216 
11217   gcc_assert (mode == CCFPmode
11218 	      || (code != ORDERED && code != UNORDERED
11219 		  && code != UNEQ && code != LTGT
11220 		  && code != UNGT && code != UNLT
11221 		  && code != UNGE && code != UNLE));
11222 
11223   /* These are invalid; the information is not there.  */
11224   gcc_assert (mode != CCEQmode || code == EQ || code == NE);
11225 }
11226 
11227 
11228 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
11229    rldicl, rldicr, or rldic instruction in mode MODE.  If so, if E is
11230    not zero, store there the bit offset (counted from the right) where
11231    the single stretch of 1 bits begins; and similarly for B, the bit
11232    offset where it ends.  */
11233 
11234 bool
rs6000_is_valid_mask(rtx mask,int * b,int * e,machine_mode mode)11235 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
11236 {
11237   unsigned HOST_WIDE_INT val = INTVAL (mask);
11238   unsigned HOST_WIDE_INT bit;
11239   int nb, ne;
11240   int n = GET_MODE_PRECISION (mode);
11241 
11242   if (mode != DImode && mode != SImode)
11243     return false;
11244 
11245   if (INTVAL (mask) >= 0)
11246     {
11247       bit = val & -val;
11248       ne = exact_log2 (bit);
11249       nb = exact_log2 (val + bit);
11250     }
11251   else if (val + 1 == 0)
11252     {
11253       nb = n;
11254       ne = 0;
11255     }
11256   else if (val & 1)
11257     {
11258       val = ~val;
11259       bit = val & -val;
11260       nb = exact_log2 (bit);
11261       ne = exact_log2 (val + bit);
11262     }
11263   else
11264     {
11265       bit = val & -val;
11266       ne = exact_log2 (bit);
11267       if (val + bit == 0)
11268 	nb = n;
11269       else
11270 	nb = 0;
11271     }
11272 
11273   nb--;
11274 
11275   if (nb < 0 || ne < 0 || nb >= n || ne >= n)
11276     return false;
11277 
11278   if (b)
11279     *b = nb;
11280   if (e)
11281     *e = ne;
11282 
11283   return true;
11284 }
11285 
11286 bool
rs6000_is_valid_rotate_dot_mask(rtx mask,machine_mode mode)11287 rs6000_is_valid_rotate_dot_mask (rtx mask, machine_mode mode)
11288 {
11289   int nb, ne;
11290   if (rs6000_is_valid_mask (mask, &nb, &ne, mode) && nb >= ne && ne > 0)
11291     {
11292       if (TARGET_64BIT)
11293 	return true;
11294       /* *rotldi3_mask_dot requires for -m32 -mpowerpc64 that the mask is
11295 	 <= 0x7fffffff.  */
11296       return (UINTVAL (mask) << (63 - nb)) <= 0x7fffffff;
11297     }
11298 
11299   return false;
11300 }
11301 
11302 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
11303    or rldicr instruction, to implement an AND with it in mode MODE.  */
11304 
11305 bool
rs6000_is_valid_and_mask(rtx mask,machine_mode mode)11306 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
11307 {
11308   int nb, ne;
11309 
11310   if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
11311     return false;
11312 
11313   /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
11314      does not wrap.  */
11315   if (mode == DImode)
11316     return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
11317 
11318   /* For SImode, rlwinm can do everything.  */
11319   if (mode == SImode)
11320     return (nb < 32 && ne < 32);
11321 
11322   return false;
11323 }
11324 
11325 /* Return the instruction template for an AND with mask in mode MODE, with
11326    operands OPERANDS.  If DOT is true, make it a record-form instruction.  */
11327 
11328 const char *
rs6000_insn_for_and_mask(machine_mode mode,rtx * operands,bool dot)11329 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
11330 {
11331   int nb, ne;
11332 
11333   if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
11334     gcc_unreachable ();
11335 
11336   if (mode == DImode && ne == 0)
11337     {
11338       operands[3] = GEN_INT (63 - nb);
11339       if (dot)
11340 	return "rldicl. %0,%1,0,%3";
11341       return "rldicl %0,%1,0,%3";
11342     }
11343 
11344   if (mode == DImode && nb == 63)
11345     {
11346       operands[3] = GEN_INT (63 - ne);
11347       if (dot)
11348 	return "rldicr. %0,%1,0,%3";
11349       return "rldicr %0,%1,0,%3";
11350     }
11351 
11352   if (nb < 32 && ne < 32)
11353     {
11354       operands[3] = GEN_INT (31 - nb);
11355       operands[4] = GEN_INT (31 - ne);
11356       if (dot)
11357 	return "rlwinm. %0,%1,0,%3,%4";
11358       return "rlwinm %0,%1,0,%3,%4";
11359     }
11360 
11361   gcc_unreachable ();
11362 }
11363 
11364 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
11365    rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
11366    shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE.  */
11367 
11368 bool
rs6000_is_valid_shift_mask(rtx mask,rtx shift,machine_mode mode)11369 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
11370 {
11371   int nb, ne;
11372 
11373   if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
11374     return false;
11375 
11376   int n = GET_MODE_PRECISION (mode);
11377   int sh = -1;
11378 
11379   if (CONST_INT_P (XEXP (shift, 1)))
11380     {
11381       sh = INTVAL (XEXP (shift, 1));
11382       if (sh < 0 || sh >= n)
11383 	return false;
11384     }
11385 
11386   rtx_code code = GET_CODE (shift);
11387 
11388   /* Convert any shift by 0 to a rotate, to simplify below code.  */
11389   if (sh == 0)
11390     code = ROTATE;
11391 
11392   /* Convert rotate to simple shift if we can, to make analysis simpler.  */
11393   if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
11394     code = ASHIFT;
11395   if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
11396     {
11397       code = LSHIFTRT;
11398       sh = n - sh;
11399     }
11400 
11401   /* DImode rotates need rld*.  */
11402   if (mode == DImode && code == ROTATE)
11403     return (nb == 63 || ne == 0 || ne == sh);
11404 
11405   /* SImode rotates need rlw*.  */
11406   if (mode == SImode && code == ROTATE)
11407     return (nb < 32 && ne < 32 && sh < 32);
11408 
11409   /* Wrap-around masks are only okay for rotates.  */
11410   if (ne > nb)
11411     return false;
11412 
11413   /* Variable shifts are only okay for rotates.  */
11414   if (sh < 0)
11415     return false;
11416 
11417   /* Don't allow ASHIFT if the mask is wrong for that.  */
11418   if (code == ASHIFT && ne < sh)
11419     return false;
11420 
11421   /* If we can do it with an rlw*, we can do it.  Don't allow LSHIFTRT
11422      if the mask is wrong for that.  */
11423   if (nb < 32 && ne < 32 && sh < 32
11424       && !(code == LSHIFTRT && nb >= 32 - sh))
11425     return true;
11426 
11427   /* If we can do it with an rld*, we can do it.  Don't allow LSHIFTRT
11428      if the mask is wrong for that.  */
11429   if (code == LSHIFTRT)
11430     sh = 64 - sh;
11431   if (nb == 63 || ne == 0 || ne == sh)
11432     return !(code == LSHIFTRT && nb >= sh);
11433 
11434   return false;
11435 }
11436 
11437 /* Return the instruction template for a shift with mask in mode MODE, with
11438    operands OPERANDS.  If DOT is true, make it a record-form instruction.  */
11439 
11440 const char *
rs6000_insn_for_shift_mask(machine_mode mode,rtx * operands,bool dot)11441 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
11442 {
11443   int nb, ne;
11444 
11445   if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
11446     gcc_unreachable ();
11447 
11448   if (mode == DImode && ne == 0)
11449     {
11450       if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11451 	operands[2] = GEN_INT (64 - INTVAL (operands[2]));
11452       operands[3] = GEN_INT (63 - nb);
11453       if (dot)
11454 	return "rld%I2cl. %0,%1,%2,%3";
11455       return "rld%I2cl %0,%1,%2,%3";
11456     }
11457 
11458   if (mode == DImode && nb == 63)
11459     {
11460       operands[3] = GEN_INT (63 - ne);
11461       if (dot)
11462 	return "rld%I2cr. %0,%1,%2,%3";
11463       return "rld%I2cr %0,%1,%2,%3";
11464     }
11465 
11466   if (mode == DImode
11467       && GET_CODE (operands[4]) != LSHIFTRT
11468       && CONST_INT_P (operands[2])
11469       && ne == INTVAL (operands[2]))
11470     {
11471       operands[3] = GEN_INT (63 - nb);
11472       if (dot)
11473 	return "rld%I2c. %0,%1,%2,%3";
11474       return "rld%I2c %0,%1,%2,%3";
11475     }
11476 
11477   if (nb < 32 && ne < 32)
11478     {
11479       if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11480 	operands[2] = GEN_INT (32 - INTVAL (operands[2]));
11481       operands[3] = GEN_INT (31 - nb);
11482       operands[4] = GEN_INT (31 - ne);
11483       /* This insn can also be a 64-bit rotate with mask that really makes
11484 	 it just a shift right (with mask); the %h below are to adjust for
11485 	 that situation (shift count is >= 32 in that case).  */
11486       if (dot)
11487 	return "rlw%I2nm. %0,%1,%h2,%3,%4";
11488       return "rlw%I2nm %0,%1,%h2,%3,%4";
11489     }
11490 
11491   gcc_unreachable ();
11492 }
11493 
11494 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
11495    rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
11496    ASHIFT, or LSHIFTRT) in mode MODE.  */
11497 
11498 bool
rs6000_is_valid_insert_mask(rtx mask,rtx shift,machine_mode mode)11499 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
11500 {
11501   int nb, ne;
11502 
11503   if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
11504     return false;
11505 
11506   int n = GET_MODE_PRECISION (mode);
11507 
11508   int sh = INTVAL (XEXP (shift, 1));
11509   if (sh < 0 || sh >= n)
11510     return false;
11511 
11512   rtx_code code = GET_CODE (shift);
11513 
11514   /* Convert any shift by 0 to a rotate, to simplify below code.  */
11515   if (sh == 0)
11516     code = ROTATE;
11517 
11518   /* Convert rotate to simple shift if we can, to make analysis simpler.  */
11519   if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
11520     code = ASHIFT;
11521   if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
11522     {
11523       code = LSHIFTRT;
11524       sh = n - sh;
11525     }
11526 
11527   /* DImode rotates need rldimi.  */
11528   if (mode == DImode && code == ROTATE)
11529     return (ne == sh);
11530 
11531   /* SImode rotates need rlwimi.  */
11532   if (mode == SImode && code == ROTATE)
11533     return (nb < 32 && ne < 32 && sh < 32);
11534 
11535   /* Wrap-around masks are only okay for rotates.  */
11536   if (ne > nb)
11537     return false;
11538 
11539   /* Don't allow ASHIFT if the mask is wrong for that.  */
11540   if (code == ASHIFT && ne < sh)
11541     return false;
11542 
11543   /* If we can do it with an rlwimi, we can do it.  Don't allow LSHIFTRT
11544      if the mask is wrong for that.  */
11545   if (nb < 32 && ne < 32 && sh < 32
11546       && !(code == LSHIFTRT && nb >= 32 - sh))
11547     return true;
11548 
11549   /* If we can do it with an rldimi, we can do it.  Don't allow LSHIFTRT
11550      if the mask is wrong for that.  */
11551   if (code == LSHIFTRT)
11552     sh = 64 - sh;
11553   if (ne == sh)
11554     return !(code == LSHIFTRT && nb >= sh);
11555 
11556   return false;
11557 }
11558 
11559 /* Return the instruction template for an insert with mask in mode MODE, with
11560    operands OPERANDS.  If DOT is true, make it a record-form instruction.  */
11561 
11562 const char *
rs6000_insn_for_insert_mask(machine_mode mode,rtx * operands,bool dot)11563 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
11564 {
11565   int nb, ne;
11566 
11567   if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
11568     gcc_unreachable ();
11569 
11570   /* Prefer rldimi because rlwimi is cracked.  */
11571   if (TARGET_POWERPC64
11572       && (!dot || mode == DImode)
11573       && GET_CODE (operands[4]) != LSHIFTRT
11574       && ne == INTVAL (operands[2]))
11575     {
11576       operands[3] = GEN_INT (63 - nb);
11577       if (dot)
11578 	return "rldimi. %0,%1,%2,%3";
11579       return "rldimi %0,%1,%2,%3";
11580     }
11581 
11582   if (nb < 32 && ne < 32)
11583     {
11584       if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11585 	operands[2] = GEN_INT (32 - INTVAL (operands[2]));
11586       operands[3] = GEN_INT (31 - nb);
11587       operands[4] = GEN_INT (31 - ne);
11588       if (dot)
11589 	return "rlwimi. %0,%1,%2,%3,%4";
11590       return "rlwimi %0,%1,%2,%3,%4";
11591     }
11592 
11593   gcc_unreachable ();
11594 }
11595 
11596 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
11597    using two machine instructions.  */
11598 
11599 bool
rs6000_is_valid_2insn_and(rtx c,machine_mode mode)11600 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
11601 {
11602   /* There are two kinds of AND we can handle with two insns:
11603      1) those we can do with two rl* insn;
11604      2) ori[s];xori[s].
11605 
11606      We do not handle that last case yet.  */
11607 
11608   /* If there is just one stretch of ones, we can do it.  */
11609   if (rs6000_is_valid_mask (c, NULL, NULL, mode))
11610     return true;
11611 
11612   /* Otherwise, fill in the lowest "hole"; if we can do the result with
11613      one insn, we can do the whole thing with two.  */
11614   unsigned HOST_WIDE_INT val = INTVAL (c);
11615   unsigned HOST_WIDE_INT bit1 = val & -val;
11616   unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
11617   unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
11618   unsigned HOST_WIDE_INT bit3 = val1 & -val1;
11619   return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
11620 }
11621 
11622 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
11623    If EXPAND is true, split rotate-and-mask instructions we generate to
11624    their constituent parts as well (this is used during expand); if DOT
11625    is 1, make the last insn a record-form instruction clobbering the
11626    destination GPR and setting the CC reg (from operands[3]); if 2, set
11627    that GPR as well as the CC reg.  */
11628 
11629 void
rs6000_emit_2insn_and(machine_mode mode,rtx * operands,bool expand,int dot)11630 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
11631 {
11632   gcc_assert (!(expand && dot));
11633 
11634   unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
11635 
11636   /* If it is one stretch of ones, it is DImode; shift left, mask, then
11637      shift right.  This generates better code than doing the masks without
11638      shifts, or shifting first right and then left.  */
11639   int nb, ne;
11640   if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
11641     {
11642       gcc_assert (mode == DImode);
11643 
11644       int shift = 63 - nb;
11645       if (expand)
11646 	{
11647 	  rtx tmp1 = gen_reg_rtx (DImode);
11648 	  rtx tmp2 = gen_reg_rtx (DImode);
11649 	  emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
11650 	  emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
11651 	  emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
11652 	}
11653       else
11654 	{
11655 	  rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
11656 	  tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
11657 	  emit_move_insn (operands[0], tmp);
11658 	  tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
11659 	  rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11660 	}
11661       return;
11662     }
11663 
11664   /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
11665      that does the rest.  */
11666   unsigned HOST_WIDE_INT bit1 = val & -val;
11667   unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
11668   unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
11669   unsigned HOST_WIDE_INT bit3 = val1 & -val1;
11670 
11671   unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
11672   unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
11673 
11674   gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
11675 
11676   /* Two "no-rotate"-and-mask instructions, for SImode.  */
11677   if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
11678     {
11679       gcc_assert (mode == SImode);
11680 
11681       rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
11682       rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
11683       emit_move_insn (reg, tmp);
11684       tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
11685       rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11686       return;
11687     }
11688 
11689   gcc_assert (mode == DImode);
11690 
11691   /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
11692      insns; we have to do the first in SImode, because it wraps.  */
11693   if (mask2 <= 0xffffffff
11694       && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
11695     {
11696       rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
11697       rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
11698 			     GEN_INT (mask1));
11699       rtx reg_low = gen_lowpart (SImode, reg);
11700       emit_move_insn (reg_low, tmp);
11701       tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
11702       rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11703       return;
11704     }
11705 
11706   /* Two rld* insns: rotate, clear the hole in the middle (which now is
11707      at the top end), rotate back and clear the other hole.  */
11708   int right = exact_log2 (bit3);
11709   int left = 64 - right;
11710 
11711   /* Rotate the mask too.  */
11712   mask1 = (mask1 >> right) | ((bit2 - 1) << left);
11713 
11714   if (expand)
11715     {
11716       rtx tmp1 = gen_reg_rtx (DImode);
11717       rtx tmp2 = gen_reg_rtx (DImode);
11718       rtx tmp3 = gen_reg_rtx (DImode);
11719       emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
11720       emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
11721       emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
11722       emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
11723     }
11724   else
11725     {
11726       rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
11727       tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
11728       emit_move_insn (operands[0], tmp);
11729       tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
11730       tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
11731       rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11732     }
11733 }
11734 
11735 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
11736    for lfq and stfq insns iff the registers are hard registers.   */
11737 
11738 int
registers_ok_for_quad_peep(rtx reg1,rtx reg2)11739 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
11740 {
11741   /* We might have been passed a SUBREG.  */
11742   if (!REG_P (reg1) || !REG_P (reg2))
11743     return 0;
11744 
11745   /* We might have been passed non floating point registers.  */
11746   if (!FP_REGNO_P (REGNO (reg1))
11747       || !FP_REGNO_P (REGNO (reg2)))
11748     return 0;
11749 
11750   return (REGNO (reg1) == REGNO (reg2) - 1);
11751 }
11752 
11753 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
11754    addr1 and addr2 must be in consecutive memory locations
11755    (addr2 == addr1 + 8).  */
11756 
11757 int
mems_ok_for_quad_peep(rtx mem1,rtx mem2)11758 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
11759 {
11760   rtx addr1, addr2;
11761   unsigned int reg1, reg2;
11762   int offset1, offset2;
11763 
11764   /* The mems cannot be volatile.  */
11765   if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
11766     return 0;
11767 
11768   addr1 = XEXP (mem1, 0);
11769   addr2 = XEXP (mem2, 0);
11770 
11771   /* Extract an offset (if used) from the first addr.  */
11772   if (GET_CODE (addr1) == PLUS)
11773     {
11774       /* If not a REG, return zero.  */
11775       if (!REG_P (XEXP (addr1, 0)))
11776 	return 0;
11777       else
11778 	{
11779 	  reg1 = REGNO (XEXP (addr1, 0));
11780 	  /* The offset must be constant!  */
11781 	  if (!CONST_INT_P (XEXP (addr1, 1)))
11782 	    return 0;
11783 	  offset1 = INTVAL (XEXP (addr1, 1));
11784 	}
11785     }
11786   else if (!REG_P (addr1))
11787     return 0;
11788   else
11789     {
11790       reg1 = REGNO (addr1);
11791       /* This was a simple (mem (reg)) expression.  Offset is 0.  */
11792       offset1 = 0;
11793     }
11794 
11795   /* And now for the second addr.  */
11796   if (GET_CODE (addr2) == PLUS)
11797     {
11798       /* If not a REG, return zero.  */
11799       if (!REG_P (XEXP (addr2, 0)))
11800 	return 0;
11801       else
11802 	{
11803 	  reg2 = REGNO (XEXP (addr2, 0));
11804 	  /* The offset must be constant. */
11805 	  if (!CONST_INT_P (XEXP (addr2, 1)))
11806 	    return 0;
11807 	  offset2 = INTVAL (XEXP (addr2, 1));
11808 	}
11809     }
11810   else if (!REG_P (addr2))
11811     return 0;
11812   else
11813     {
11814       reg2 = REGNO (addr2);
11815       /* This was a simple (mem (reg)) expression.  Offset is 0.  */
11816       offset2 = 0;
11817     }
11818 
11819   /* Both of these must have the same base register.  */
11820   if (reg1 != reg2)
11821     return 0;
11822 
11823   /* The offset for the second addr must be 8 more than the first addr.  */
11824   if (offset2 != offset1 + 8)
11825     return 0;
11826 
11827   /* All the tests passed.  addr1 and addr2 are valid for lfq or stfq
11828      instructions.  */
11829   return 1;
11830 }
11831 
11832 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE.  For SDmode values we
11833    need to use DDmode, in all other cases we can use the same mode.  */
11834 static machine_mode
rs6000_secondary_memory_needed_mode(machine_mode mode)11835 rs6000_secondary_memory_needed_mode (machine_mode mode)
11836 {
11837   if (lra_in_progress && mode == SDmode)
11838     return DDmode;
11839   return mode;
11840 }
11841 
11842 /* Classify a register type.  Because the FMRGOW/FMRGEW instructions only work
11843    on traditional floating point registers, and the VMRGOW/VMRGEW instructions
11844    only work on the traditional altivec registers, note if an altivec register
11845    was chosen.  */
11846 
11847 static enum rs6000_reg_type
register_to_reg_type(rtx reg,bool * is_altivec)11848 register_to_reg_type (rtx reg, bool *is_altivec)
11849 {
11850   HOST_WIDE_INT regno;
11851   enum reg_class rclass;
11852 
11853   if (SUBREG_P (reg))
11854     reg = SUBREG_REG (reg);
11855 
11856   if (!REG_P (reg))
11857     return NO_REG_TYPE;
11858 
11859   regno = REGNO (reg);
11860   if (!HARD_REGISTER_NUM_P (regno))
11861     {
11862       if (!lra_in_progress && !reload_completed)
11863 	return PSEUDO_REG_TYPE;
11864 
11865       regno = true_regnum (reg);
11866       if (regno < 0 || !HARD_REGISTER_NUM_P (regno))
11867 	return PSEUDO_REG_TYPE;
11868     }
11869 
11870   gcc_assert (regno >= 0);
11871 
11872   if (is_altivec && ALTIVEC_REGNO_P (regno))
11873     *is_altivec = true;
11874 
11875   rclass = rs6000_regno_regclass[regno];
11876   return reg_class_to_reg_type[(int)rclass];
11877 }
11878 
11879 /* Helper function to return the cost of adding a TOC entry address.  */
11880 
11881 static inline int
rs6000_secondary_reload_toc_costs(addr_mask_type addr_mask)11882 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
11883 {
11884   int ret;
11885 
11886   if (TARGET_CMODEL != CMODEL_SMALL)
11887     ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
11888 
11889   else
11890     ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
11891 
11892   return ret;
11893 }
11894 
11895 /* Helper function for rs6000_secondary_reload to determine whether the memory
11896    address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
11897    needs reloading.  Return negative if the memory is not handled by the memory
11898    helper functions and to try a different reload method, 0 if no additional
11899    instructions are need, and positive to give the extra cost for the
11900    memory.  */
11901 
11902 static int
rs6000_secondary_reload_memory(rtx addr,enum reg_class rclass,machine_mode mode)11903 rs6000_secondary_reload_memory (rtx addr,
11904 				enum reg_class rclass,
11905 				machine_mode mode)
11906 {
11907   int extra_cost = 0;
11908   rtx reg, and_arg, plus_arg0, plus_arg1;
11909   addr_mask_type addr_mask;
11910   const char *type = NULL;
11911   const char *fail_msg = NULL;
11912 
11913   if (GPR_REG_CLASS_P (rclass))
11914     addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
11915 
11916   else if (rclass == FLOAT_REGS)
11917     addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
11918 
11919   else if (rclass == ALTIVEC_REGS)
11920     addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
11921 
11922   /* For the combined VSX_REGS, turn off Altivec AND -16.  */
11923   else if (rclass == VSX_REGS)
11924     addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
11925 		 & ~RELOAD_REG_AND_M16);
11926 
11927   /* If the register allocator hasn't made up its mind yet on the register
11928      class to use, settle on defaults to use.  */
11929   else if (rclass == NO_REGS)
11930     {
11931       addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
11932 		   & ~RELOAD_REG_AND_M16);
11933 
11934       if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
11935 	addr_mask &= ~(RELOAD_REG_INDEXED
11936 		       | RELOAD_REG_PRE_INCDEC
11937 		       | RELOAD_REG_PRE_MODIFY);
11938     }
11939 
11940   else
11941     addr_mask = 0;
11942 
11943   /* If the register isn't valid in this register class, just return now.  */
11944   if ((addr_mask & RELOAD_REG_VALID) == 0)
11945     {
11946       if (TARGET_DEBUG_ADDR)
11947 	{
11948 	  fprintf (stderr,
11949 		   "rs6000_secondary_reload_memory: mode = %s, class = %s, "
11950 		   "not valid in class\n",
11951 		   GET_MODE_NAME (mode), reg_class_names[rclass]);
11952 	  debug_rtx (addr);
11953 	}
11954 
11955       return -1;
11956     }
11957 
11958   switch (GET_CODE (addr))
11959     {
11960       /* Does the register class supports auto update forms for this mode?  We
11961 	 don't need a scratch register, since the powerpc only supports
11962 	 PRE_INC, PRE_DEC, and PRE_MODIFY.  */
11963     case PRE_INC:
11964     case PRE_DEC:
11965       reg = XEXP (addr, 0);
11966       if (!base_reg_operand (addr, GET_MODE (reg)))
11967 	{
11968 	  fail_msg = "no base register #1";
11969 	  extra_cost = -1;
11970 	}
11971 
11972       else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
11973 	{
11974 	  extra_cost = 1;
11975 	  type = "update";
11976 	}
11977       break;
11978 
11979     case PRE_MODIFY:
11980       reg = XEXP (addr, 0);
11981       plus_arg1 = XEXP (addr, 1);
11982       if (!base_reg_operand (reg, GET_MODE (reg))
11983 	  || GET_CODE (plus_arg1) != PLUS
11984 	  || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
11985 	{
11986 	  fail_msg = "bad PRE_MODIFY";
11987 	  extra_cost = -1;
11988 	}
11989 
11990       else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
11991 	{
11992 	  extra_cost = 1;
11993 	  type = "update";
11994 	}
11995       break;
11996 
11997       /* Do we need to simulate AND -16 to clear the bottom address bits used
11998 	 in VMX load/stores?  Only allow the AND for vector sizes.  */
11999     case AND:
12000       and_arg = XEXP (addr, 0);
12001       if (GET_MODE_SIZE (mode) != 16
12002 	  || !CONST_INT_P (XEXP (addr, 1))
12003 	  || INTVAL (XEXP (addr, 1)) != -16)
12004 	{
12005 	  fail_msg = "bad Altivec AND #1";
12006 	  extra_cost = -1;
12007 	}
12008 
12009       if (rclass != ALTIVEC_REGS)
12010 	{
12011 	  if (legitimate_indirect_address_p (and_arg, false))
12012 	    extra_cost = 1;
12013 
12014 	  else if (legitimate_indexed_address_p (and_arg, false))
12015 	    extra_cost = 2;
12016 
12017 	  else
12018 	    {
12019 	      fail_msg = "bad Altivec AND #2";
12020 	      extra_cost = -1;
12021 	    }
12022 
12023 	  type = "and";
12024 	}
12025       break;
12026 
12027       /* If this is an indirect address, make sure it is a base register.  */
12028     case REG:
12029     case SUBREG:
12030       if (!legitimate_indirect_address_p (addr, false))
12031 	{
12032 	  extra_cost = 1;
12033 	  type = "move";
12034 	}
12035       break;
12036 
12037       /* If this is an indexed address, make sure the register class can handle
12038 	 indexed addresses for this mode.  */
12039     case PLUS:
12040       plus_arg0 = XEXP (addr, 0);
12041       plus_arg1 = XEXP (addr, 1);
12042 
12043       /* (plus (plus (reg) (constant)) (constant)) is generated during
12044 	 push_reload processing, so handle it now.  */
12045       if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
12046 	{
12047 	  if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12048 	    {
12049 	      extra_cost = 1;
12050 	      type = "offset";
12051 	    }
12052 	}
12053 
12054       /* (plus (plus (reg) (constant)) (reg)) is also generated during
12055 	 push_reload processing, so handle it now.  */
12056       else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
12057 	{
12058 	  if ((addr_mask & RELOAD_REG_INDEXED) == 0)
12059 	    {
12060 	      extra_cost = 1;
12061 	      type = "indexed #2";
12062 	    }
12063 	}
12064 
12065       else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
12066 	{
12067 	  fail_msg = "no base register #2";
12068 	  extra_cost = -1;
12069 	}
12070 
12071       else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
12072 	{
12073 	  if ((addr_mask & RELOAD_REG_INDEXED) == 0
12074 	      || !legitimate_indexed_address_p (addr, false))
12075 	    {
12076 	      extra_cost = 1;
12077 	      type = "indexed";
12078 	    }
12079 	}
12080 
12081       else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
12082 	       && CONST_INT_P (plus_arg1))
12083 	{
12084 	  if (!quad_address_offset_p (INTVAL (plus_arg1)))
12085 	    {
12086 	      extra_cost = 1;
12087 	      type = "vector d-form offset";
12088 	    }
12089 	}
12090 
12091       /* Make sure the register class can handle offset addresses.  */
12092       else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
12093 	{
12094 	  if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12095 	    {
12096 	      extra_cost = 1;
12097 	      type = "offset #2";
12098 	    }
12099 	}
12100 
12101       else
12102 	{
12103 	  fail_msg = "bad PLUS";
12104 	  extra_cost = -1;
12105 	}
12106 
12107       break;
12108 
12109     case LO_SUM:
12110       /* Quad offsets are restricted and can't handle normal addresses.  */
12111       if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
12112 	{
12113 	  extra_cost = -1;
12114 	  type = "vector d-form lo_sum";
12115 	}
12116 
12117       else if (!legitimate_lo_sum_address_p (mode, addr, false))
12118 	{
12119 	  fail_msg = "bad LO_SUM";
12120 	  extra_cost = -1;
12121 	}
12122 
12123       if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12124 	{
12125 	  extra_cost = 1;
12126 	  type = "lo_sum";
12127 	}
12128       break;
12129 
12130       /* Static addresses need to create a TOC entry.  */
12131     case CONST:
12132     case SYMBOL_REF:
12133     case LABEL_REF:
12134       if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
12135 	{
12136 	  extra_cost = -1;
12137 	  type = "vector d-form lo_sum #2";
12138 	}
12139 
12140       else
12141 	{
12142 	  type = "address";
12143 	  extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
12144 	}
12145       break;
12146 
12147       /* TOC references look like offsetable memory.  */
12148     case UNSPEC:
12149       if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
12150 	{
12151 	  fail_msg = "bad UNSPEC";
12152 	  extra_cost = -1;
12153 	}
12154 
12155       else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
12156 	{
12157 	  extra_cost = -1;
12158 	  type = "vector d-form lo_sum #3";
12159 	}
12160 
12161       else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12162 	{
12163 	  extra_cost = 1;
12164 	  type = "toc reference";
12165 	}
12166       break;
12167 
12168     default:
12169 	{
12170 	  fail_msg = "bad address";
12171 	  extra_cost = -1;
12172 	}
12173     }
12174 
12175   if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
12176     {
12177       if (extra_cost < 0)
12178 	fprintf (stderr,
12179 		 "rs6000_secondary_reload_memory error: mode = %s, "
12180 		 "class = %s, addr_mask = '%s', %s\n",
12181 		 GET_MODE_NAME (mode),
12182 		 reg_class_names[rclass],
12183 		 rs6000_debug_addr_mask (addr_mask, false),
12184 		 (fail_msg != NULL) ? fail_msg : "<bad address>");
12185 
12186       else
12187 	fprintf (stderr,
12188 		 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
12189 		 "addr_mask = '%s', extra cost = %d, %s\n",
12190 		 GET_MODE_NAME (mode),
12191 		 reg_class_names[rclass],
12192 		 rs6000_debug_addr_mask (addr_mask, false),
12193 		 extra_cost,
12194 		 (type) ? type : "<none>");
12195 
12196       debug_rtx (addr);
12197     }
12198 
12199   return extra_cost;
12200 }
12201 
12202 /* Helper function for rs6000_secondary_reload to return true if a move to a
12203    different register classe is really a simple move.  */
12204 
12205 static bool
rs6000_secondary_reload_simple_move(enum rs6000_reg_type to_type,enum rs6000_reg_type from_type,machine_mode mode)12206 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
12207 				     enum rs6000_reg_type from_type,
12208 				     machine_mode mode)
12209 {
12210   int size = GET_MODE_SIZE (mode);
12211 
12212   /* Add support for various direct moves available.  In this function, we only
12213      look at cases where we don't need any extra registers, and one or more
12214      simple move insns are issued.  Originally small integers are not allowed
12215      in FPR/VSX registers.  Single precision binary floating is not a simple
12216      move because we need to convert to the single precision memory layout.
12217      The 4-byte SDmode can be moved.  TDmode values are disallowed since they
12218      need special direct move handling, which we do not support yet.  */
12219   if (TARGET_DIRECT_MOVE
12220       && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
12221 	  || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
12222     {
12223       if (TARGET_POWERPC64)
12224 	{
12225 	  /* ISA 2.07: MTVSRD or MVFVSRD.  */
12226 	  if (size == 8)
12227 	    return true;
12228 
12229 	  /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD.  */
12230 	  if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
12231 	    return true;
12232 	}
12233 
12234       /* ISA 2.07: MTVSRWZ or  MFVSRWZ.  */
12235       if (TARGET_P8_VECTOR)
12236 	{
12237 	  if (mode == SImode)
12238 	    return true;
12239 
12240 	  if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
12241 	    return true;
12242 	}
12243 
12244       /* ISA 2.07: MTVSRWZ or  MFVSRWZ.  */
12245       if (mode == SDmode)
12246 	return true;
12247     }
12248 
12249   /* Move to/from SPR.  */
12250   else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
12251 	   && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
12252 	       || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
12253     return true;
12254 
12255   return false;
12256 }
12257 
12258 /* Direct move helper function for rs6000_secondary_reload, handle all of the
12259    special direct moves that involve allocating an extra register, return the
12260    insn code of the helper function if there is such a function or
12261    CODE_FOR_nothing if not.  */
12262 
12263 static bool
rs6000_secondary_reload_direct_move(enum rs6000_reg_type to_type,enum rs6000_reg_type from_type,machine_mode mode,secondary_reload_info * sri,bool altivec_p)12264 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
12265 				     enum rs6000_reg_type from_type,
12266 				     machine_mode mode,
12267 				     secondary_reload_info *sri,
12268 				     bool altivec_p)
12269 {
12270   bool ret = false;
12271   enum insn_code icode = CODE_FOR_nothing;
12272   int cost = 0;
12273   int size = GET_MODE_SIZE (mode);
12274 
12275   if (TARGET_POWERPC64 && size == 16)
12276     {
12277       /* Handle moving 128-bit values from GPRs to VSX point registers on
12278 	 ISA 2.07 (power8, power9) when running in 64-bit mode using
12279 	 XXPERMDI to glue the two 64-bit values back together.  */
12280       if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
12281 	{
12282 	  cost = 3;			/* 2 mtvsrd's, 1 xxpermdi.  */
12283 	  icode = reg_addr[mode].reload_vsx_gpr;
12284 	}
12285 
12286       /* Handle moving 128-bit values from VSX point registers to GPRs on
12287 	 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
12288 	 bottom 64-bit value.  */
12289       else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
12290 	{
12291 	  cost = 3;			/* 2 mfvsrd's, 1 xxpermdi.  */
12292 	  icode = reg_addr[mode].reload_gpr_vsx;
12293 	}
12294     }
12295 
12296   else if (TARGET_POWERPC64 && mode == SFmode)
12297     {
12298       if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
12299 	{
12300 	  cost = 3;			/* xscvdpspn, mfvsrd, and.  */
12301 	  icode = reg_addr[mode].reload_gpr_vsx;
12302 	}
12303 
12304       else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
12305 	{
12306 	  cost = 2;			/* mtvsrz, xscvspdpn.  */
12307 	  icode = reg_addr[mode].reload_vsx_gpr;
12308 	}
12309     }
12310 
12311   else if (!TARGET_POWERPC64 && size == 8)
12312     {
12313       /* Handle moving 64-bit values from GPRs to floating point registers on
12314 	 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
12315 	 32-bit values back together.  Altivec register classes must be handled
12316 	 specially since a different instruction is used, and the secondary
12317 	 reload support requires a single instruction class in the scratch
12318 	 register constraint.  However, right now TFmode is not allowed in
12319 	 Altivec registers, so the pattern will never match.  */
12320       if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
12321 	{
12322 	  cost = 3;			/* 2 mtvsrwz's, 1 fmrgow.  */
12323 	  icode = reg_addr[mode].reload_fpr_gpr;
12324 	}
12325     }
12326 
12327   if (icode != CODE_FOR_nothing)
12328     {
12329       ret = true;
12330       if (sri)
12331 	{
12332 	  sri->icode = icode;
12333 	  sri->extra_cost = cost;
12334 	}
12335     }
12336 
12337   return ret;
12338 }
12339 
12340 /* Return whether a move between two register classes can be done either
12341    directly (simple move) or via a pattern that uses a single extra temporary
12342    (using ISA 2.07's direct move in this case.  */
12343 
12344 static bool
rs6000_secondary_reload_move(enum rs6000_reg_type to_type,enum rs6000_reg_type from_type,machine_mode mode,secondary_reload_info * sri,bool altivec_p)12345 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
12346 			      enum rs6000_reg_type from_type,
12347 			      machine_mode mode,
12348 			      secondary_reload_info *sri,
12349 			      bool altivec_p)
12350 {
12351   /* Fall back to load/store reloads if either type is not a register.  */
12352   if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
12353     return false;
12354 
12355   /* If we haven't allocated registers yet, assume the move can be done for the
12356      standard register types.  */
12357   if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
12358       || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
12359       || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
12360     return true;
12361 
12362   /* Moves to the same set of registers is a simple move for non-specialized
12363      registers.  */
12364   if (to_type == from_type && IS_STD_REG_TYPE (to_type))
12365     return true;
12366 
12367   /* Check whether a simple move can be done directly.  */
12368   if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
12369     {
12370       if (sri)
12371 	{
12372 	  sri->icode = CODE_FOR_nothing;
12373 	  sri->extra_cost = 0;
12374 	}
12375       return true;
12376     }
12377 
12378   /* Now check if we can do it in a few steps.  */
12379   return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
12380 					      altivec_p);
12381 }
12382 
12383 /* Inform reload about cases where moving X with a mode MODE to a register in
12384    RCLASS requires an extra scratch or immediate register.  Return the class
12385    needed for the immediate register.
12386 
12387    For VSX and Altivec, we may need a register to convert sp+offset into
12388    reg+sp.
12389 
12390    For misaligned 64-bit gpr loads and stores we need a register to
12391    convert an offset address to indirect.  */
12392 
12393 static reg_class_t
rs6000_secondary_reload(bool in_p,rtx x,reg_class_t rclass_i,machine_mode mode,secondary_reload_info * sri)12394 rs6000_secondary_reload (bool in_p,
12395 			 rtx x,
12396 			 reg_class_t rclass_i,
12397 			 machine_mode mode,
12398 			 secondary_reload_info *sri)
12399 {
12400   enum reg_class rclass = (enum reg_class) rclass_i;
12401   reg_class_t ret = ALL_REGS;
12402   enum insn_code icode;
12403   bool default_p = false;
12404   bool done_p = false;
12405 
12406   /* Allow subreg of memory before/during reload.  */
12407   bool memory_p = (MEM_P (x)
12408 		   || (!reload_completed && SUBREG_P (x)
12409 		       && MEM_P (SUBREG_REG (x))));
12410 
12411   sri->icode = CODE_FOR_nothing;
12412   sri->t_icode = CODE_FOR_nothing;
12413   sri->extra_cost = 0;
12414   icode = ((in_p)
12415 	   ? reg_addr[mode].reload_load
12416 	   : reg_addr[mode].reload_store);
12417 
12418   if (REG_P (x) || register_operand (x, mode))
12419     {
12420       enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
12421       bool altivec_p = (rclass == ALTIVEC_REGS);
12422       enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
12423 
12424       if (!in_p)
12425 	std::swap (to_type, from_type);
12426 
12427       /* Can we do a direct move of some sort?  */
12428       if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
12429 					altivec_p))
12430 	{
12431 	  icode = (enum insn_code)sri->icode;
12432 	  default_p = false;
12433 	  done_p = true;
12434 	  ret = NO_REGS;
12435 	}
12436     }
12437 
12438   /* Make sure 0.0 is not reloaded or forced into memory.  */
12439   if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
12440     {
12441       ret = NO_REGS;
12442       default_p = false;
12443       done_p = true;
12444     }
12445 
12446   /* If this is a scalar floating point value and we want to load it into the
12447      traditional Altivec registers, do it via a move via a traditional floating
12448      point register, unless we have D-form addressing.  Also make sure that
12449      non-zero constants use a FPR.  */
12450   if (!done_p && reg_addr[mode].scalar_in_vmx_p
12451       && !mode_supports_vmx_dform (mode)
12452       && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
12453       && (memory_p || CONST_DOUBLE_P (x)))
12454     {
12455       ret = FLOAT_REGS;
12456       default_p = false;
12457       done_p = true;
12458     }
12459 
12460   /* Handle reload of load/stores if we have reload helper functions.  */
12461   if (!done_p && icode != CODE_FOR_nothing && memory_p)
12462     {
12463       int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
12464 						       mode);
12465 
12466       if (extra_cost >= 0)
12467 	{
12468 	  done_p = true;
12469 	  ret = NO_REGS;
12470 	  if (extra_cost > 0)
12471 	    {
12472 	      sri->extra_cost = extra_cost;
12473 	      sri->icode = icode;
12474 	    }
12475 	}
12476     }
12477 
12478   /* Handle unaligned loads and stores of integer registers.  */
12479   if (!done_p && TARGET_POWERPC64
12480       && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
12481       && memory_p
12482       && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
12483     {
12484       rtx addr = XEXP (x, 0);
12485       rtx off = address_offset (addr);
12486 
12487       if (off != NULL_RTX)
12488 	{
12489 	  unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
12490 	  unsigned HOST_WIDE_INT offset = INTVAL (off);
12491 
12492 	  /* We need a secondary reload when our legitimate_address_p
12493 	     says the address is good (as otherwise the entire address
12494 	     will be reloaded), and the offset is not a multiple of
12495 	     four or we have an address wrap.  Address wrap will only
12496 	     occur for LO_SUMs since legitimate_offset_address_p
12497 	     rejects addresses for 16-byte mems that will wrap.  */
12498 	  if (GET_CODE (addr) == LO_SUM
12499 	      ? (1 /* legitimate_address_p allows any offset for lo_sum */
12500 		 && ((offset & 3) != 0
12501 		     || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
12502 	      : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
12503 		 && (offset & 3) != 0))
12504 	    {
12505 	      /* -m32 -mpowerpc64 needs to use a 32-bit scratch register.  */
12506 	      if (in_p)
12507 		sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
12508 			      : CODE_FOR_reload_di_load);
12509 	      else
12510 		sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
12511 			      : CODE_FOR_reload_di_store);
12512 	      sri->extra_cost = 2;
12513 	      ret = NO_REGS;
12514 	      done_p = true;
12515 	    }
12516 	  else
12517 	    default_p = true;
12518 	}
12519       else
12520 	default_p = true;
12521     }
12522 
12523   if (!done_p && !TARGET_POWERPC64
12524       && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
12525       && memory_p
12526       && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
12527     {
12528       rtx addr = XEXP (x, 0);
12529       rtx off = address_offset (addr);
12530 
12531       if (off != NULL_RTX)
12532 	{
12533 	  unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
12534 	  unsigned HOST_WIDE_INT offset = INTVAL (off);
12535 
12536 	  /* We need a secondary reload when our legitimate_address_p
12537 	     says the address is good (as otherwise the entire address
12538 	     will be reloaded), and we have a wrap.
12539 
12540 	     legitimate_lo_sum_address_p allows LO_SUM addresses to
12541 	     have any offset so test for wrap in the low 16 bits.
12542 
12543 	     legitimate_offset_address_p checks for the range
12544 	     [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
12545 	     for mode size of 16.  We wrap at [0x7ffc,0x7fff] and
12546 	     [0x7ff4,0x7fff] respectively, so test for the
12547 	     intersection of these ranges, [0x7ffc,0x7fff] and
12548 	     [0x7ff4,0x7ff7] respectively.
12549 
12550 	     Note that the address we see here may have been
12551 	     manipulated by legitimize_reload_address.  */
12552 	  if (GET_CODE (addr) == LO_SUM
12553 	      ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
12554 	      : offset - (0x8000 - extra) < UNITS_PER_WORD)
12555 	    {
12556 	      if (in_p)
12557 		sri->icode = CODE_FOR_reload_si_load;
12558 	      else
12559 		sri->icode = CODE_FOR_reload_si_store;
12560 	      sri->extra_cost = 2;
12561 	      ret = NO_REGS;
12562 	      done_p = true;
12563 	    }
12564 	  else
12565 	    default_p = true;
12566 	}
12567       else
12568 	default_p = true;
12569     }
12570 
12571   if (!done_p)
12572     default_p = true;
12573 
12574   if (default_p)
12575     ret = default_secondary_reload (in_p, x, rclass, mode, sri);
12576 
12577   gcc_assert (ret != ALL_REGS);
12578 
12579   if (TARGET_DEBUG_ADDR)
12580     {
12581       fprintf (stderr,
12582 	       "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
12583 	       "mode = %s",
12584 	       reg_class_names[ret],
12585 	       in_p ? "true" : "false",
12586 	       reg_class_names[rclass],
12587 	       GET_MODE_NAME (mode));
12588 
12589       if (reload_completed)
12590 	fputs (", after reload", stderr);
12591 
12592       if (!done_p)
12593 	fputs (", done_p not set", stderr);
12594 
12595       if (default_p)
12596 	fputs (", default secondary reload", stderr);
12597 
12598       if (sri->icode != CODE_FOR_nothing)
12599 	fprintf (stderr, ", reload func = %s, extra cost = %d",
12600 		 insn_data[sri->icode].name, sri->extra_cost);
12601 
12602       else if (sri->extra_cost > 0)
12603 	fprintf (stderr, ", extra cost = %d", sri->extra_cost);
12604 
12605       fputs ("\n", stderr);
12606       debug_rtx (x);
12607     }
12608 
12609   return ret;
12610 }
12611 
12612 /* Better tracing for rs6000_secondary_reload_inner.  */
12613 
12614 static void
rs6000_secondary_reload_trace(int line,rtx reg,rtx mem,rtx scratch,bool store_p)12615 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
12616 			       bool store_p)
12617 {
12618   rtx set, clobber;
12619 
12620   gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
12621 
12622   fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
12623 	   store_p ? "store" : "load");
12624 
12625   if (store_p)
12626     set = gen_rtx_SET (mem, reg);
12627   else
12628     set = gen_rtx_SET (reg, mem);
12629 
12630   clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
12631   debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
12632 }
12633 
12634 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
12635   ATTRIBUTE_NORETURN;
12636 
12637 static void
rs6000_secondary_reload_fail(int line,rtx reg,rtx mem,rtx scratch,bool store_p)12638 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
12639 			      bool store_p)
12640 {
12641   rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
12642   gcc_unreachable ();
12643 }
12644 
12645 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
12646    reload helper functions.  These were identified in
12647    rs6000_secondary_reload_memory, and if reload decided to use the secondary
12648    reload, it calls the insns:
12649 	reload_<RELOAD:mode>_<P:mptrsize>_store
12650 	reload_<RELOAD:mode>_<P:mptrsize>_load
12651 
12652    which in turn calls this function, to do whatever is necessary to create
12653    valid addresses.  */
12654 
12655 void
rs6000_secondary_reload_inner(rtx reg,rtx mem,rtx scratch,bool store_p)12656 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
12657 {
12658   int regno = true_regnum (reg);
12659   machine_mode mode = GET_MODE (reg);
12660   addr_mask_type addr_mask;
12661   rtx addr;
12662   rtx new_addr;
12663   rtx op_reg, op0, op1;
12664   rtx and_op;
12665   rtx cc_clobber;
12666   rtvec rv;
12667 
12668   if (regno < 0 || !HARD_REGISTER_NUM_P (regno) || !MEM_P (mem)
12669       || !base_reg_operand (scratch, GET_MODE (scratch)))
12670     rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12671 
12672   if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
12673     addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
12674 
12675   else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
12676     addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
12677 
12678   else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
12679     addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
12680 
12681   else
12682     rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12683 
12684   /* Make sure the mode is valid in this register class.  */
12685   if ((addr_mask & RELOAD_REG_VALID) == 0)
12686     rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12687 
12688   if (TARGET_DEBUG_ADDR)
12689     rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
12690 
12691   new_addr = addr = XEXP (mem, 0);
12692   switch (GET_CODE (addr))
12693     {
12694       /* Does the register class support auto update forms for this mode?  If
12695 	 not, do the update now.  We don't need a scratch register, since the
12696 	 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY.  */
12697     case PRE_INC:
12698     case PRE_DEC:
12699       op_reg = XEXP (addr, 0);
12700       if (!base_reg_operand (op_reg, Pmode))
12701 	rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12702 
12703       if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
12704 	{
12705 	  int delta = GET_MODE_SIZE (mode);
12706 	  if (GET_CODE (addr) == PRE_DEC)
12707 	    delta = -delta;
12708 	  emit_insn (gen_add2_insn (op_reg, GEN_INT (delta)));
12709 	  new_addr = op_reg;
12710 	}
12711       break;
12712 
12713     case PRE_MODIFY:
12714       op0 = XEXP (addr, 0);
12715       op1 = XEXP (addr, 1);
12716       if (!base_reg_operand (op0, Pmode)
12717 	  || GET_CODE (op1) != PLUS
12718 	  || !rtx_equal_p (op0, XEXP (op1, 0)))
12719 	rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12720 
12721       if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
12722 	{
12723 	  emit_insn (gen_rtx_SET (op0, op1));
12724 	  new_addr = reg;
12725 	}
12726       break;
12727 
12728       /* Do we need to simulate AND -16 to clear the bottom address bits used
12729 	 in VMX load/stores?  */
12730     case AND:
12731       op0 = XEXP (addr, 0);
12732       op1 = XEXP (addr, 1);
12733       if ((addr_mask & RELOAD_REG_AND_M16) == 0)
12734 	{
12735 	  if (REG_P (op0) || SUBREG_P (op0))
12736 	    op_reg = op0;
12737 
12738 	  else if (GET_CODE (op1) == PLUS)
12739 	    {
12740 	      emit_insn (gen_rtx_SET (scratch, op1));
12741 	      op_reg = scratch;
12742 	    }
12743 
12744 	  else
12745 	    rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12746 
12747 	  and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
12748 	  cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
12749 	  rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
12750 	  emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
12751 	  new_addr = scratch;
12752 	}
12753       break;
12754 
12755       /* If this is an indirect address, make sure it is a base register.  */
12756     case REG:
12757     case SUBREG:
12758       if (!base_reg_operand (addr, GET_MODE (addr)))
12759 	{
12760 	  emit_insn (gen_rtx_SET (scratch, addr));
12761 	  new_addr = scratch;
12762 	}
12763       break;
12764 
12765       /* If this is an indexed address, make sure the register class can handle
12766 	 indexed addresses for this mode.  */
12767     case PLUS:
12768       op0 = XEXP (addr, 0);
12769       op1 = XEXP (addr, 1);
12770       if (!base_reg_operand (op0, Pmode))
12771 	rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12772 
12773       else if (int_reg_operand (op1, Pmode))
12774 	{
12775 	  if ((addr_mask & RELOAD_REG_INDEXED) == 0)
12776 	    {
12777 	      emit_insn (gen_rtx_SET (scratch, addr));
12778 	      new_addr = scratch;
12779 	    }
12780 	}
12781 
12782       else if (mode_supports_dq_form (mode) && CONST_INT_P (op1))
12783 	{
12784 	  if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
12785 	      || !quad_address_p (addr, mode, false))
12786 	    {
12787 	      emit_insn (gen_rtx_SET (scratch, addr));
12788 	      new_addr = scratch;
12789 	    }
12790 	}
12791 
12792       /* Make sure the register class can handle offset addresses.  */
12793       else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
12794 	{
12795 	  if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12796 	    {
12797 	      emit_insn (gen_rtx_SET (scratch, addr));
12798 	      new_addr = scratch;
12799 	    }
12800 	}
12801 
12802       else
12803 	rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12804 
12805       break;
12806 
12807     case LO_SUM:
12808       op0 = XEXP (addr, 0);
12809       op1 = XEXP (addr, 1);
12810       if (!base_reg_operand (op0, Pmode))
12811 	rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12812 
12813       else if (int_reg_operand (op1, Pmode))
12814 	{
12815 	  if ((addr_mask & RELOAD_REG_INDEXED) == 0)
12816 	    {
12817 	      emit_insn (gen_rtx_SET (scratch, addr));
12818 	      new_addr = scratch;
12819 	    }
12820 	}
12821 
12822       /* Quad offsets are restricted and can't handle normal addresses.  */
12823       else if (mode_supports_dq_form (mode))
12824 	{
12825 	  emit_insn (gen_rtx_SET (scratch, addr));
12826 	  new_addr = scratch;
12827 	}
12828 
12829       /* Make sure the register class can handle offset addresses.  */
12830       else if (legitimate_lo_sum_address_p (mode, addr, false))
12831 	{
12832 	  if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12833 	    {
12834 	      emit_insn (gen_rtx_SET (scratch, addr));
12835 	      new_addr = scratch;
12836 	    }
12837 	}
12838 
12839       else
12840 	rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12841 
12842       break;
12843 
12844     case SYMBOL_REF:
12845     case CONST:
12846     case LABEL_REF:
12847       rs6000_emit_move (scratch, addr, Pmode);
12848       new_addr = scratch;
12849       break;
12850 
12851     default:
12852       rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12853     }
12854 
12855   /* Adjust the address if it changed.  */
12856   if (addr != new_addr)
12857     {
12858       mem = replace_equiv_address_nv (mem, new_addr);
12859       if (TARGET_DEBUG_ADDR)
12860 	fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
12861     }
12862 
12863   /* Now create the move.  */
12864   if (store_p)
12865     emit_insn (gen_rtx_SET (mem, reg));
12866   else
12867     emit_insn (gen_rtx_SET (reg, mem));
12868 
12869   return;
12870 }
12871 
12872 /* Convert reloads involving 64-bit gprs and misaligned offset
12873    addressing, or multiple 32-bit gprs and offsets that are too large,
12874    to use indirect addressing.  */
12875 
12876 void
rs6000_secondary_reload_gpr(rtx reg,rtx mem,rtx scratch,bool store_p)12877 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
12878 {
12879   int regno = true_regnum (reg);
12880   enum reg_class rclass;
12881   rtx addr;
12882   rtx scratch_or_premodify = scratch;
12883 
12884   if (TARGET_DEBUG_ADDR)
12885     {
12886       fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
12887 	       store_p ? "store" : "load");
12888       fprintf (stderr, "reg:\n");
12889       debug_rtx (reg);
12890       fprintf (stderr, "mem:\n");
12891       debug_rtx (mem);
12892       fprintf (stderr, "scratch:\n");
12893       debug_rtx (scratch);
12894     }
12895 
12896   gcc_assert (regno >= 0 && HARD_REGISTER_NUM_P (regno));
12897   gcc_assert (MEM_P (mem));
12898   rclass = REGNO_REG_CLASS (regno);
12899   gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
12900   addr = XEXP (mem, 0);
12901 
12902   if (GET_CODE (addr) == PRE_MODIFY)
12903     {
12904       gcc_assert (REG_P (XEXP (addr, 0))
12905 		  && GET_CODE (XEXP (addr, 1)) == PLUS
12906 		  && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
12907       scratch_or_premodify = XEXP (addr, 0);
12908       addr = XEXP (addr, 1);
12909     }
12910   gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
12911 
12912   rs6000_emit_move (scratch_or_premodify, addr, Pmode);
12913 
12914   mem = replace_equiv_address_nv (mem, scratch_or_premodify);
12915 
12916   /* Now create the move.  */
12917   if (store_p)
12918     emit_insn (gen_rtx_SET (mem, reg));
12919   else
12920     emit_insn (gen_rtx_SET (reg, mem));
12921 
12922   return;
12923 }
12924 
12925 /* Given an rtx X being reloaded into a reg required to be
12926    in class CLASS, return the class of reg to actually use.
12927    In general this is just CLASS; but on some machines
12928    in some cases it is preferable to use a more restrictive class.
12929 
12930    On the RS/6000, we have to return NO_REGS when we want to reload a
12931    floating-point CONST_DOUBLE to force it to be copied to memory.
12932 
12933    We also don't want to reload integer values into floating-point
12934    registers if we can at all help it.  In fact, this can
12935    cause reload to die, if it tries to generate a reload of CTR
12936    into a FP register and discovers it doesn't have the memory location
12937    required.
12938 
12939    ??? Would it be a good idea to have reload do the converse, that is
12940    try to reload floating modes into FP registers if possible?
12941  */
12942 
12943 static enum reg_class
rs6000_preferred_reload_class(rtx x,enum reg_class rclass)12944 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
12945 {
12946   machine_mode mode = GET_MODE (x);
12947   bool is_constant = CONSTANT_P (x);
12948 
12949   /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
12950      reload class for it.  */
12951   if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
12952       && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
12953     return NO_REGS;
12954 
12955   if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
12956       && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
12957     return NO_REGS;
12958 
12959   /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS.  Do not allow
12960      the reloading of address expressions using PLUS into floating point
12961      registers.  */
12962   if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
12963     {
12964       if (is_constant)
12965 	{
12966 	  /* Zero is always allowed in all VSX registers.  */
12967 	  if (x == CONST0_RTX (mode))
12968 	    return rclass;
12969 
12970 	  /* If this is a vector constant that can be formed with a few Altivec
12971 	     instructions, we want altivec registers.  */
12972 	  if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
12973 	    return ALTIVEC_REGS;
12974 
12975 	  /* If this is an integer constant that can easily be loaded into
12976 	     vector registers, allow it.  */
12977 	  if (CONST_INT_P (x))
12978 	    {
12979 	      HOST_WIDE_INT value = INTVAL (x);
12980 
12981 	      /* ISA 2.07 can generate -1 in all registers with XXLORC.  ISA
12982 		 2.06 can generate it in the Altivec registers with
12983 		 VSPLTI<x>.  */
12984 	      if (value == -1)
12985 		{
12986 		  if (TARGET_P8_VECTOR)
12987 		    return rclass;
12988 		  else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
12989 		    return ALTIVEC_REGS;
12990 		  else
12991 		    return NO_REGS;
12992 		}
12993 
12994 	      /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
12995 		 a sign extend in the Altivec registers.  */
12996 	      if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
12997 		  && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
12998 		return ALTIVEC_REGS;
12999 	    }
13000 
13001 	  /* Force constant to memory.  */
13002 	  return NO_REGS;
13003 	}
13004 
13005       /* D-form addressing can easily reload the value.  */
13006       if (mode_supports_vmx_dform (mode)
13007 	  || mode_supports_dq_form (mode))
13008 	return rclass;
13009 
13010       /* If this is a scalar floating point value and we don't have D-form
13011 	 addressing, prefer the traditional floating point registers so that we
13012 	 can use D-form (register+offset) addressing.  */
13013       if (rclass == VSX_REGS
13014 	  && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
13015 	return FLOAT_REGS;
13016 
13017       /* Prefer the Altivec registers if Altivec is handling the vector
13018 	 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
13019 	 loads.  */
13020       if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
13021 	  || mode == V1TImode)
13022 	return ALTIVEC_REGS;
13023 
13024       return rclass;
13025     }
13026 
13027   if (is_constant || GET_CODE (x) == PLUS)
13028     {
13029       if (reg_class_subset_p (GENERAL_REGS, rclass))
13030 	return GENERAL_REGS;
13031       if (reg_class_subset_p (BASE_REGS, rclass))
13032 	return BASE_REGS;
13033       return NO_REGS;
13034     }
13035 
13036   /* For the vector pair and vector quad modes, prefer their natural register
13037      (VSX or FPR) rather than GPR registers.  For other integer types, prefer
13038      the GPR registers.  */
13039   if (rclass == GEN_OR_FLOAT_REGS)
13040     {
13041       if (mode == OOmode)
13042 	return VSX_REGS;
13043 
13044       if (mode == XOmode)
13045 	return FLOAT_REGS;
13046 
13047       if (GET_MODE_CLASS (mode) == MODE_INT)
13048 	return GENERAL_REGS;
13049     }
13050 
13051   return rclass;
13052 }
13053 
13054 /* Debug version of rs6000_preferred_reload_class.  */
13055 static enum reg_class
rs6000_debug_preferred_reload_class(rtx x,enum reg_class rclass)13056 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
13057 {
13058   enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
13059 
13060   fprintf (stderr,
13061 	   "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
13062 	   "mode = %s, x:\n",
13063 	   reg_class_names[ret], reg_class_names[rclass],
13064 	   GET_MODE_NAME (GET_MODE (x)));
13065   debug_rtx (x);
13066 
13067   return ret;
13068 }
13069 
13070 /* If we are copying between FP or AltiVec registers and anything else, we need
13071    a memory location.  The exception is when we are targeting ppc64 and the
13072    move to/from fpr to gpr instructions are available.  Also, under VSX, you
13073    can copy vector registers from the FP register set to the Altivec register
13074    set and vice versa.  */
13075 
13076 static bool
rs6000_secondary_memory_needed(machine_mode mode,reg_class_t from_class,reg_class_t to_class)13077 rs6000_secondary_memory_needed (machine_mode mode,
13078 				reg_class_t from_class,
13079 				reg_class_t to_class)
13080 {
13081   enum rs6000_reg_type from_type, to_type;
13082   bool altivec_p = ((from_class == ALTIVEC_REGS)
13083 		    || (to_class == ALTIVEC_REGS));
13084 
13085   /* If a simple/direct move is available, we don't need secondary memory  */
13086   from_type = reg_class_to_reg_type[(int)from_class];
13087   to_type = reg_class_to_reg_type[(int)to_class];
13088 
13089   if (rs6000_secondary_reload_move (to_type, from_type, mode,
13090 				    (secondary_reload_info *)0, altivec_p))
13091     return false;
13092 
13093   /* If we have a floating point or vector register class, we need to use
13094      memory to transfer the data.  */
13095   if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
13096     return true;
13097 
13098   return false;
13099 }
13100 
13101 /* Debug version of rs6000_secondary_memory_needed.  */
13102 static bool
rs6000_debug_secondary_memory_needed(machine_mode mode,reg_class_t from_class,reg_class_t to_class)13103 rs6000_debug_secondary_memory_needed (machine_mode mode,
13104 				      reg_class_t from_class,
13105 				      reg_class_t to_class)
13106 {
13107   bool ret = rs6000_secondary_memory_needed (mode, from_class, to_class);
13108 
13109   fprintf (stderr,
13110 	   "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
13111 	   "to_class = %s, mode = %s\n",
13112 	   ret ? "true" : "false",
13113 	   reg_class_names[from_class],
13114 	   reg_class_names[to_class],
13115 	   GET_MODE_NAME (mode));
13116 
13117   return ret;
13118 }
13119 
13120 /* Return the register class of a scratch register needed to copy IN into
13121    or out of a register in RCLASS in MODE.  If it can be done directly,
13122    NO_REGS is returned.  */
13123 
13124 static enum reg_class
rs6000_secondary_reload_class(enum reg_class rclass,machine_mode mode,rtx in)13125 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
13126 			       rtx in)
13127 {
13128   int regno;
13129 
13130   if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
13131 #if TARGET_MACHO
13132 		     && MACHOPIC_INDIRECT
13133 #endif
13134 		     ))
13135     {
13136       /* We cannot copy a symbolic operand directly into anything
13137 	 other than BASE_REGS for TARGET_ELF.  So indicate that a
13138 	 register from BASE_REGS is needed as an intermediate
13139 	 register.
13140 
13141 	 On Darwin, pic addresses require a load from memory, which
13142 	 needs a base register.  */
13143       if (rclass != BASE_REGS
13144 	  && (SYMBOL_REF_P (in)
13145 	      || GET_CODE (in) == HIGH
13146 	      || GET_CODE (in) == LABEL_REF
13147 	      || GET_CODE (in) == CONST))
13148 	return BASE_REGS;
13149     }
13150 
13151   if (REG_P (in))
13152     {
13153       regno = REGNO (in);
13154       if (!HARD_REGISTER_NUM_P (regno))
13155 	{
13156 	  regno = true_regnum (in);
13157 	  if (!HARD_REGISTER_NUM_P (regno))
13158 	    regno = -1;
13159 	}
13160     }
13161   else if (SUBREG_P (in))
13162     {
13163       regno = true_regnum (in);
13164       if (!HARD_REGISTER_NUM_P (regno))
13165 	regno = -1;
13166     }
13167   else
13168     regno = -1;
13169 
13170   /* If we have VSX register moves, prefer moving scalar values between
13171      Altivec registers and GPR by going via an FPR (and then via memory)
13172      instead of reloading the secondary memory address for Altivec moves.  */
13173   if (TARGET_VSX
13174       && GET_MODE_SIZE (mode) < 16
13175       && !mode_supports_vmx_dform (mode)
13176       && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
13177            && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
13178           || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
13179               && (regno >= 0 && INT_REGNO_P (regno)))))
13180     return FLOAT_REGS;
13181 
13182   /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
13183      into anything.  */
13184   if (rclass == GENERAL_REGS || rclass == BASE_REGS
13185       || (regno >= 0 && INT_REGNO_P (regno)))
13186     return NO_REGS;
13187 
13188   /* Constants, memory, and VSX registers can go into VSX registers (both the
13189      traditional floating point and the altivec registers).  */
13190   if (rclass == VSX_REGS
13191       && (regno == -1 || VSX_REGNO_P (regno)))
13192     return NO_REGS;
13193 
13194   /* Constants, memory, and FP registers can go into FP registers.  */
13195   if ((regno == -1 || FP_REGNO_P (regno))
13196       && (rclass == FLOAT_REGS || rclass == GEN_OR_FLOAT_REGS))
13197     return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
13198 
13199   /* Memory, and AltiVec registers can go into AltiVec registers.  */
13200   if ((regno == -1 || ALTIVEC_REGNO_P (regno))
13201       && rclass == ALTIVEC_REGS)
13202     return NO_REGS;
13203 
13204   /* We can copy among the CR registers.  */
13205   if ((rclass == CR_REGS || rclass == CR0_REGS)
13206       && regno >= 0 && CR_REGNO_P (regno))
13207     return NO_REGS;
13208 
13209   /* Otherwise, we need GENERAL_REGS.  */
13210   return GENERAL_REGS;
13211 }
13212 
13213 /* Debug version of rs6000_secondary_reload_class.  */
13214 static enum reg_class
rs6000_debug_secondary_reload_class(enum reg_class rclass,machine_mode mode,rtx in)13215 rs6000_debug_secondary_reload_class (enum reg_class rclass,
13216 				     machine_mode mode, rtx in)
13217 {
13218   enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
13219   fprintf (stderr,
13220 	   "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
13221 	   "mode = %s, input rtx:\n",
13222 	   reg_class_names[ret], reg_class_names[rclass],
13223 	   GET_MODE_NAME (mode));
13224   debug_rtx (in);
13225 
13226   return ret;
13227 }
13228 
13229 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
13230 
13231 static bool
rs6000_can_change_mode_class(machine_mode from,machine_mode to,reg_class_t rclass)13232 rs6000_can_change_mode_class (machine_mode from,
13233 			      machine_mode to,
13234 			      reg_class_t rclass)
13235 {
13236   unsigned from_size = GET_MODE_SIZE (from);
13237   unsigned to_size = GET_MODE_SIZE (to);
13238 
13239   if (from_size != to_size)
13240     {
13241       enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
13242 
13243       if (reg_classes_intersect_p (xclass, rclass))
13244 	{
13245 	  unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to);
13246 	  unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from);
13247 	  bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
13248 	  bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
13249 
13250 	  /* Don't allow 64-bit types to overlap with 128-bit types that take a
13251 	     single register under VSX because the scalar part of the register
13252 	     is in the upper 64-bits, and not the lower 64-bits.  Types like
13253 	     TFmode/TDmode that take 2 scalar register can overlap.  128-bit
13254 	     IEEE floating point can't overlap, and neither can small
13255 	     values.  */
13256 
13257 	  if (to_float128_vector_p && from_float128_vector_p)
13258 	    return true;
13259 
13260 	  else if (to_float128_vector_p || from_float128_vector_p)
13261 	    return false;
13262 
13263 	  /* TDmode in floating-mode registers must always go into a register
13264 	     pair with the most significant word in the even-numbered register
13265 	     to match ISA requirements.  In little-endian mode, this does not
13266 	     match subreg numbering, so we cannot allow subregs.  */
13267 	  if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
13268 	    return false;
13269 
13270 	  /* Allow SD<->DD changes, since SDmode values are stored in
13271 	     the low half of the DDmode, just like target-independent
13272 	     code expects.  We need to allow at least SD->DD since
13273 	     rs6000_secondary_memory_needed_mode asks for that change
13274 	     to be made for SD reloads.  */
13275 	  if ((to == DDmode && from == SDmode)
13276 	      || (to == SDmode && from == DDmode))
13277 	    return true;
13278 
13279 	  if (from_size < 8 || to_size < 8)
13280 	    return false;
13281 
13282 	  if (from_size == 8 && (8 * to_nregs) != to_size)
13283 	    return false;
13284 
13285 	  if (to_size == 8 && (8 * from_nregs) != from_size)
13286 	    return false;
13287 
13288 	  return true;
13289 	}
13290       else
13291 	return true;
13292     }
13293 
13294   /* Since the VSX register set includes traditional floating point registers
13295      and altivec registers, just check for the size being different instead of
13296      trying to check whether the modes are vector modes.  Otherwise it won't
13297      allow say DF and DI to change classes.  For types like TFmode and TDmode
13298      that take 2 64-bit registers, rather than a single 128-bit register, don't
13299      allow subregs of those types to other 128 bit types.  */
13300   if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
13301     {
13302       unsigned num_regs = (from_size + 15) / 16;
13303       if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs
13304 	  || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs)
13305 	return false;
13306 
13307       return (from_size == 8 || from_size == 16);
13308     }
13309 
13310   if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
13311       && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
13312     return false;
13313 
13314   return true;
13315 }
13316 
13317 /* Debug version of rs6000_can_change_mode_class.  */
13318 static bool
rs6000_debug_can_change_mode_class(machine_mode from,machine_mode to,reg_class_t rclass)13319 rs6000_debug_can_change_mode_class (machine_mode from,
13320 				    machine_mode to,
13321 				    reg_class_t rclass)
13322 {
13323   bool ret = rs6000_can_change_mode_class (from, to, rclass);
13324 
13325   fprintf (stderr,
13326 	   "rs6000_can_change_mode_class, return %s, from = %s, "
13327 	   "to = %s, rclass = %s\n",
13328 	   ret ? "true" : "false",
13329 	   GET_MODE_NAME (from), GET_MODE_NAME (to),
13330 	   reg_class_names[rclass]);
13331 
13332   return ret;
13333 }
13334 
13335 /* Return a string to do a move operation of 128 bits of data.  */
13336 
13337 const char *
rs6000_output_move_128bit(rtx operands[])13338 rs6000_output_move_128bit (rtx operands[])
13339 {
13340   rtx dest = operands[0];
13341   rtx src = operands[1];
13342   machine_mode mode = GET_MODE (dest);
13343   int dest_regno;
13344   int src_regno;
13345   bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
13346   bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
13347 
13348   if (REG_P (dest))
13349     {
13350       dest_regno = REGNO (dest);
13351       dest_gpr_p = INT_REGNO_P (dest_regno);
13352       dest_fp_p = FP_REGNO_P (dest_regno);
13353       dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
13354       dest_vsx_p = dest_fp_p | dest_vmx_p;
13355     }
13356   else
13357     {
13358       dest_regno = -1;
13359       dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
13360     }
13361 
13362   if (REG_P (src))
13363     {
13364       src_regno = REGNO (src);
13365       src_gpr_p = INT_REGNO_P (src_regno);
13366       src_fp_p = FP_REGNO_P (src_regno);
13367       src_vmx_p = ALTIVEC_REGNO_P (src_regno);
13368       src_vsx_p = src_fp_p | src_vmx_p;
13369     }
13370   else
13371     {
13372       src_regno = -1;
13373       src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
13374     }
13375 
13376   /* Register moves.  */
13377   if (dest_regno >= 0 && src_regno >= 0)
13378     {
13379       if (dest_gpr_p)
13380 	{
13381 	  if (src_gpr_p)
13382 	    return "#";
13383 
13384 	  if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
13385 	    return (WORDS_BIG_ENDIAN
13386 		    ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
13387 		    : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
13388 
13389 	  else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
13390 	    return "#";
13391 	}
13392 
13393       else if (TARGET_VSX && dest_vsx_p)
13394 	{
13395 	  if (src_vsx_p)
13396 	    return "xxlor %x0,%x1,%x1";
13397 
13398 	  else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
13399 	    return (WORDS_BIG_ENDIAN
13400 		    ? "mtvsrdd %x0,%1,%L1"
13401 		    : "mtvsrdd %x0,%L1,%1");
13402 
13403 	  else if (TARGET_DIRECT_MOVE && src_gpr_p)
13404 	    return "#";
13405 	}
13406 
13407       else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
13408 	return "vor %0,%1,%1";
13409 
13410       else if (dest_fp_p && src_fp_p)
13411 	return "#";
13412     }
13413 
13414   /* Loads.  */
13415   else if (dest_regno >= 0 && MEM_P (src))
13416     {
13417       if (dest_gpr_p)
13418 	{
13419 	  if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
13420 	    return "lq %0,%1";
13421 	  else
13422 	    return "#";
13423 	}
13424 
13425       else if (TARGET_ALTIVEC && dest_vmx_p
13426 	       && altivec_indexed_or_indirect_operand (src, mode))
13427 	return "lvx %0,%y1";
13428 
13429       else if (TARGET_VSX && dest_vsx_p)
13430 	{
13431 	  if (mode_supports_dq_form (mode)
13432 	      && quad_address_p (XEXP (src, 0), mode, true))
13433 	    return "lxv %x0,%1";
13434 
13435 	  else if (TARGET_P9_VECTOR)
13436 	    return "lxvx %x0,%y1";
13437 
13438 	  else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
13439 	    return "lxvw4x %x0,%y1";
13440 
13441 	  else
13442 	    return "lxvd2x %x0,%y1";
13443 	}
13444 
13445       else if (TARGET_ALTIVEC && dest_vmx_p)
13446 	return "lvx %0,%y1";
13447 
13448       else if (dest_fp_p)
13449 	return "#";
13450     }
13451 
13452   /* Stores.  */
13453   else if (src_regno >= 0 && MEM_P (dest))
13454     {
13455       if (src_gpr_p)
13456 	{
13457  	  if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
13458 	    return "stq %1,%0";
13459 	  else
13460 	    return "#";
13461 	}
13462 
13463       else if (TARGET_ALTIVEC && src_vmx_p
13464 	       && altivec_indexed_or_indirect_operand (dest, mode))
13465 	return "stvx %1,%y0";
13466 
13467       else if (TARGET_VSX && src_vsx_p)
13468 	{
13469 	  if (mode_supports_dq_form (mode)
13470 	      && quad_address_p (XEXP (dest, 0), mode, true))
13471 	    return "stxv %x1,%0";
13472 
13473 	  else if (TARGET_P9_VECTOR)
13474 	    return "stxvx %x1,%y0";
13475 
13476 	  else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
13477 	    return "stxvw4x %x1,%y0";
13478 
13479 	  else
13480 	    return "stxvd2x %x1,%y0";
13481 	}
13482 
13483       else if (TARGET_ALTIVEC && src_vmx_p)
13484 	return "stvx %1,%y0";
13485 
13486       else if (src_fp_p)
13487 	return "#";
13488     }
13489 
13490   /* Constants.  */
13491   else if (dest_regno >= 0
13492 	   && (CONST_INT_P (src)
13493 	       || CONST_WIDE_INT_P (src)
13494 	       || CONST_DOUBLE_P (src)
13495 	       || GET_CODE (src) == CONST_VECTOR))
13496     {
13497       if (dest_gpr_p)
13498 	return "#";
13499 
13500       else if ((dest_vmx_p && TARGET_ALTIVEC)
13501 	       || (dest_vsx_p && TARGET_VSX))
13502 	return output_vec_const_move (operands);
13503     }
13504 
13505   fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
13506 }
13507 
13508 /* Validate a 128-bit move.  */
13509 bool
rs6000_move_128bit_ok_p(rtx operands[])13510 rs6000_move_128bit_ok_p (rtx operands[])
13511 {
13512   machine_mode mode = GET_MODE (operands[0]);
13513   return (gpc_reg_operand (operands[0], mode)
13514 	  || gpc_reg_operand (operands[1], mode));
13515 }
13516 
13517 /* Return true if a 128-bit move needs to be split.  */
13518 bool
rs6000_split_128bit_ok_p(rtx operands[])13519 rs6000_split_128bit_ok_p (rtx operands[])
13520 {
13521   if (!reload_completed)
13522     return false;
13523 
13524   if (!gpr_or_gpr_p (operands[0], operands[1]))
13525     return false;
13526 
13527   if (quad_load_store_p (operands[0], operands[1]))
13528     return false;
13529 
13530   return true;
13531 }
13532 
13533 
13534 /* Given a comparison operation, return the bit number in CCR to test.  We
13535    know this is a valid comparison.
13536 
13537    SCC_P is 1 if this is for an scc.  That means that %D will have been
13538    used instead of %C, so the bits will be in different places.
13539 
13540    Return -1 if OP isn't a valid comparison for some reason.  */
13541 
13542 int
ccr_bit(rtx op,int scc_p)13543 ccr_bit (rtx op, int scc_p)
13544 {
13545   enum rtx_code code = GET_CODE (op);
13546   machine_mode cc_mode;
13547   int cc_regnum;
13548   int base_bit;
13549   rtx reg;
13550 
13551   if (!COMPARISON_P (op))
13552     return -1;
13553 
13554   reg = XEXP (op, 0);
13555 
13556   if (!REG_P (reg) || !CR_REGNO_P (REGNO (reg)))
13557     return -1;
13558 
13559   cc_mode = GET_MODE (reg);
13560   cc_regnum = REGNO (reg);
13561   base_bit = 4 * (cc_regnum - CR0_REGNO);
13562 
13563   validate_condition_mode (code, cc_mode);
13564 
13565   /* When generating a sCOND operation, only positive conditions are
13566      allowed.  */
13567   if (scc_p)
13568     switch (code)
13569       {
13570       case EQ:
13571       case GT:
13572       case LT:
13573       case UNORDERED:
13574       case GTU:
13575       case LTU:
13576 	break;
13577       default:
13578 	return -1;
13579       }
13580 
13581   switch (code)
13582     {
13583     case NE:
13584       return scc_p ? base_bit + 3 : base_bit + 2;
13585     case EQ:
13586       return base_bit + 2;
13587     case GT:  case GTU:  case UNLE:
13588       return base_bit + 1;
13589     case LT:  case LTU:  case UNGE:
13590       return base_bit;
13591     case ORDERED:  case UNORDERED:
13592       return base_bit + 3;
13593 
13594     case GE:  case GEU:
13595       /* If scc, we will have done a cror to put the bit in the
13596 	 unordered position.  So test that bit.  For integer, this is ! LT
13597 	 unless this is an scc insn.  */
13598       return scc_p ? base_bit + 3 : base_bit;
13599 
13600     case LE:  case LEU:
13601       return scc_p ? base_bit + 3 : base_bit + 1;
13602 
13603     default:
13604       return -1;
13605     }
13606 }
13607 
13608 /* Return the GOT register.  */
13609 
13610 rtx
rs6000_got_register(rtx value ATTRIBUTE_UNUSED)13611 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
13612 {
13613   /* The second flow pass currently (June 1999) can't update
13614      regs_ever_live without disturbing other parts of the compiler, so
13615      update it here to make the prolog/epilogue code happy.  */
13616   if (!can_create_pseudo_p ()
13617       && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
13618     df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
13619 
13620   crtl->uses_pic_offset_table = 1;
13621 
13622   return pic_offset_table_rtx;
13623 }
13624 
13625 #define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode)
13626 
13627 /* Write out a function code label.  */
13628 
13629 void
rs6000_output_function_entry(FILE * file,const char * fname)13630 rs6000_output_function_entry (FILE *file, const char *fname)
13631 {
13632   if (fname[0] != '.')
13633     {
13634       switch (DEFAULT_ABI)
13635 	{
13636 	default:
13637 	  gcc_unreachable ();
13638 
13639 	case ABI_AIX:
13640 	  if (DOT_SYMBOLS)
13641 	    putc ('.', file);
13642 	  else
13643 	    ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
13644 	  break;
13645 
13646 	case ABI_ELFv2:
13647 	case ABI_V4:
13648 	case ABI_DARWIN:
13649 	  break;
13650 	}
13651     }
13652 
13653   RS6000_OUTPUT_BASENAME (file, fname);
13654 }
13655 
13656 /* Print an operand.  Recognize special options, documented below.  */
13657 
13658 #if TARGET_ELF
13659 /* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is
13660    only introduced by the linker, when applying the sda21
13661    relocation.  */
13662 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
13663 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
13664 #else
13665 #define SMALL_DATA_RELOC "sda21"
13666 #define SMALL_DATA_REG 0
13667 #endif
13668 
13669 void
print_operand(FILE * file,rtx x,int code)13670 print_operand (FILE *file, rtx x, int code)
13671 {
13672   int i;
13673   unsigned HOST_WIDE_INT uval;
13674 
13675   switch (code)
13676     {
13677       /* %a is output_address.  */
13678 
13679       /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
13680 	 output_operand.  */
13681 
13682     case 'A':
13683       /* Write the MMA accumulator number associated with VSX register X.  */
13684       if (!REG_P (x) || !FP_REGNO_P (REGNO (x)) || (REGNO (x) % 4) != 0)
13685 	output_operand_lossage ("invalid %%A value");
13686       else
13687 	fprintf (file, "%d", (REGNO (x) - FIRST_FPR_REGNO) / 4);
13688       return;
13689 
13690     case 'D':
13691       /* Like 'J' but get to the GT bit only.  */
13692       if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13693 	{
13694 	  output_operand_lossage ("invalid %%D value");
13695 	  return;
13696 	}
13697 
13698       /* Bit 1 is GT bit.  */
13699       i = 4 * (REGNO (x) - CR0_REGNO) + 1;
13700 
13701       /* Add one for shift count in rlinm for scc.  */
13702       fprintf (file, "%d", i + 1);
13703       return;
13704 
13705     case 'e':
13706       /* If the low 16 bits are 0, but some other bit is set, write 's'.  */
13707       if (! INT_P (x))
13708 	{
13709 	  output_operand_lossage ("invalid %%e value");
13710 	  return;
13711 	}
13712 
13713       uval = INTVAL (x);
13714       if ((uval & 0xffff) == 0 && uval != 0)
13715 	putc ('s', file);
13716       return;
13717 
13718     case 'E':
13719       /* X is a CR register.  Print the number of the EQ bit of the CR */
13720       if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13721 	output_operand_lossage ("invalid %%E value");
13722       else
13723 	fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
13724       return;
13725 
13726     case 'f':
13727       /* X is a CR register.  Print the shift count needed to move it
13728 	 to the high-order four bits.  */
13729       if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13730 	output_operand_lossage ("invalid %%f value");
13731       else
13732 	fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
13733       return;
13734 
13735     case 'F':
13736       /* Similar, but print the count for the rotate in the opposite
13737 	 direction.  */
13738       if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13739 	output_operand_lossage ("invalid %%F value");
13740       else
13741 	fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
13742       return;
13743 
13744     case 'G':
13745       /* X is a constant integer.  If it is negative, print "m",
13746 	 otherwise print "z".  This is to make an aze or ame insn.  */
13747       if (!CONST_INT_P (x))
13748 	output_operand_lossage ("invalid %%G value");
13749       else if (INTVAL (x) >= 0)
13750 	putc ('z', file);
13751       else
13752 	putc ('m', file);
13753       return;
13754 
13755     case 'h':
13756       /* If constant, output low-order five bits.  Otherwise, write
13757 	 normally.  */
13758       if (INT_P (x))
13759 	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
13760       else
13761 	print_operand (file, x, 0);
13762       return;
13763 
13764     case 'H':
13765       /* If constant, output low-order six bits.  Otherwise, write
13766 	 normally.  */
13767       if (INT_P (x))
13768 	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
13769       else
13770 	print_operand (file, x, 0);
13771       return;
13772 
13773     case 'I':
13774       /* Print `i' if this is a constant, else nothing.  */
13775       if (INT_P (x))
13776 	putc ('i', file);
13777       return;
13778 
13779     case 'j':
13780       /* Write the bit number in CCR for jump.  */
13781       i = ccr_bit (x, 0);
13782       if (i == -1)
13783 	output_operand_lossage ("invalid %%j code");
13784       else
13785 	fprintf (file, "%d", i);
13786       return;
13787 
13788     case 'J':
13789       /* Similar, but add one for shift count in rlinm for scc and pass
13790 	 scc flag to `ccr_bit'.  */
13791       i = ccr_bit (x, 1);
13792       if (i == -1)
13793 	output_operand_lossage ("invalid %%J code");
13794       else
13795 	/* If we want bit 31, write a shift count of zero, not 32.  */
13796 	fprintf (file, "%d", i == 31 ? 0 : i + 1);
13797       return;
13798 
13799     case 'k':
13800       /* X must be a constant.  Write the 1's complement of the
13801 	 constant.  */
13802       if (! INT_P (x))
13803 	output_operand_lossage ("invalid %%k value");
13804       else
13805 	fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
13806       return;
13807 
13808     case 'K':
13809       /* X must be a symbolic constant on ELF.  Write an
13810 	 expression suitable for an 'addi' that adds in the low 16
13811 	 bits of the MEM.  */
13812       if (GET_CODE (x) == CONST)
13813 	{
13814 	  if (GET_CODE (XEXP (x, 0)) != PLUS
13815 	      || (!SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
13816 		  && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
13817 	      || !CONST_INT_P (XEXP (XEXP (x, 0), 1)))
13818 	    output_operand_lossage ("invalid %%K value");
13819 	}
13820       print_operand_address (file, x);
13821       fputs ("@l", file);
13822       return;
13823 
13824       /* %l is output_asm_label.  */
13825 
13826     case 'L':
13827       /* Write second word of DImode or DFmode reference.  Works on register
13828 	 or non-indexed memory only.  */
13829       if (REG_P (x))
13830 	fputs (reg_names[REGNO (x) + 1], file);
13831       else if (MEM_P (x))
13832 	{
13833 	  machine_mode mode = GET_MODE (x);
13834 	  /* Handle possible auto-increment.  Since it is pre-increment and
13835 	     we have already done it, we can just use an offset of word.  */
13836 	  if (GET_CODE (XEXP (x, 0)) == PRE_INC
13837 	      || GET_CODE (XEXP (x, 0)) == PRE_DEC)
13838 	    output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
13839 						 UNITS_PER_WORD));
13840 	  else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13841 	    output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
13842 						 UNITS_PER_WORD));
13843 	  else
13844 	    output_address (mode, XEXP (adjust_address_nv (x, SImode,
13845 							   UNITS_PER_WORD),
13846 				  0));
13847 
13848 	  if (small_data_operand (x, GET_MODE (x)))
13849 	    fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13850 		     reg_names[SMALL_DATA_REG]);
13851 	}
13852       return;
13853 
13854     case 'N': /* Unused */
13855       /* Write the number of elements in the vector times 4.  */
13856       if (GET_CODE (x) != PARALLEL)
13857 	output_operand_lossage ("invalid %%N value");
13858       else
13859 	fprintf (file, "%d", XVECLEN (x, 0) * 4);
13860       return;
13861 
13862     case 'O': /* Unused */
13863       /* Similar, but subtract 1 first.  */
13864       if (GET_CODE (x) != PARALLEL)
13865 	output_operand_lossage ("invalid %%O value");
13866       else
13867 	fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
13868       return;
13869 
13870     case 'p':
13871       /* X is a CONST_INT that is a power of two.  Output the logarithm.  */
13872       if (! INT_P (x)
13873 	  || INTVAL (x) < 0
13874 	  || (i = exact_log2 (INTVAL (x))) < 0)
13875 	output_operand_lossage ("invalid %%p value");
13876       else
13877 	fprintf (file, "%d", i);
13878       return;
13879 
13880     case 'P':
13881       /* The operand must be an indirect memory reference.  The result
13882 	 is the register name.  */
13883       if (!MEM_P (x) || !REG_P (XEXP (x, 0))
13884 	  || REGNO (XEXP (x, 0)) >= 32)
13885 	output_operand_lossage ("invalid %%P value");
13886       else
13887 	fputs (reg_names[REGNO (XEXP (x, 0))], file);
13888       return;
13889 
13890     case 'q':
13891       /* This outputs the logical code corresponding to a boolean
13892 	 expression.  The expression may have one or both operands
13893 	 negated (if one, only the first one).  For condition register
13894 	 logical operations, it will also treat the negated
13895 	 CR codes as NOTs, but not handle NOTs of them.  */
13896       {
13897 	const char *const *t = 0;
13898 	const char *s;
13899 	enum rtx_code code = GET_CODE (x);
13900 	static const char * const tbl[3][3] = {
13901 	  { "and", "andc", "nor" },
13902 	  { "or", "orc", "nand" },
13903 	  { "xor", "eqv", "xor" } };
13904 
13905 	if (code == AND)
13906 	  t = tbl[0];
13907 	else if (code == IOR)
13908 	  t = tbl[1];
13909 	else if (code == XOR)
13910 	  t = tbl[2];
13911 	else
13912 	  output_operand_lossage ("invalid %%q value");
13913 
13914 	if (GET_CODE (XEXP (x, 0)) != NOT)
13915 	  s = t[0];
13916 	else
13917 	  {
13918 	    if (GET_CODE (XEXP (x, 1)) == NOT)
13919 	      s = t[2];
13920 	    else
13921 	      s = t[1];
13922 	  }
13923 
13924 	fputs (s, file);
13925       }
13926       return;
13927 
13928     case 'Q':
13929       if (! TARGET_MFCRF)
13930 	return;
13931       fputc (',', file);
13932       /* FALLTHRU */
13933 
13934     case 'R':
13935       /* X is a CR register.  Print the mask for `mtcrf'.  */
13936       if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13937 	output_operand_lossage ("invalid %%R value");
13938       else
13939 	fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
13940       return;
13941 
13942     case 's':
13943       /* Low 5 bits of 32 - value */
13944       if (! INT_P (x))
13945 	output_operand_lossage ("invalid %%s value");
13946       else
13947 	fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
13948       return;
13949 
13950     case 't':
13951       /* Like 'J' but get to the OVERFLOW/UNORDERED bit.  */
13952       if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13953 	{
13954 	  output_operand_lossage ("invalid %%t value");
13955 	  return;
13956 	}
13957 
13958       /* Bit 3 is OV bit.  */
13959       i = 4 * (REGNO (x) - CR0_REGNO) + 3;
13960 
13961       /* If we want bit 31, write a shift count of zero, not 32.  */
13962       fprintf (file, "%d", i == 31 ? 0 : i + 1);
13963       return;
13964 
13965     case 'T':
13966       /* Print the symbolic name of a branch target register.  */
13967       if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
13968 	x = XVECEXP (x, 0, 0);
13969       if (!REG_P (x) || (REGNO (x) != LR_REGNO
13970 			 && REGNO (x) != CTR_REGNO))
13971 	output_operand_lossage ("invalid %%T value");
13972       else if (REGNO (x) == LR_REGNO)
13973 	fputs ("lr", file);
13974       else
13975 	fputs ("ctr", file);
13976       return;
13977 
13978     case 'u':
13979       /* High-order or low-order 16 bits of constant, whichever is non-zero,
13980 	 for use in unsigned operand.  */
13981       if (! INT_P (x))
13982 	{
13983 	  output_operand_lossage ("invalid %%u value");
13984 	  return;
13985 	}
13986 
13987       uval = INTVAL (x);
13988       if ((uval & 0xffff) == 0)
13989 	uval >>= 16;
13990 
13991       fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
13992       return;
13993 
13994     case 'v':
13995       /* High-order 16 bits of constant for use in signed operand.  */
13996       if (! INT_P (x))
13997 	output_operand_lossage ("invalid %%v value");
13998       else
13999 	fprintf (file, HOST_WIDE_INT_PRINT_HEX,
14000 		 (INTVAL (x) >> 16) & 0xffff);
14001       return;
14002 
14003     case 'U':
14004       /* Print `u' if this has an auto-increment or auto-decrement.  */
14005       if (MEM_P (x)
14006 	  && (GET_CODE (XEXP (x, 0)) == PRE_INC
14007 	      || GET_CODE (XEXP (x, 0)) == PRE_DEC
14008 	      || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
14009 	putc ('u', file);
14010       return;
14011 
14012     case 'V':
14013       /* Print the trap code for this operand.  */
14014       switch (GET_CODE (x))
14015 	{
14016 	case EQ:
14017 	  fputs ("eq", file);   /* 4 */
14018 	  break;
14019 	case NE:
14020 	  fputs ("ne", file);   /* 24 */
14021 	  break;
14022 	case LT:
14023 	  fputs ("lt", file);   /* 16 */
14024 	  break;
14025 	case LE:
14026 	  fputs ("le", file);   /* 20 */
14027 	  break;
14028 	case GT:
14029 	  fputs ("gt", file);   /* 8 */
14030 	  break;
14031 	case GE:
14032 	  fputs ("ge", file);   /* 12 */
14033 	  break;
14034 	case LTU:
14035 	  fputs ("llt", file);  /* 2 */
14036 	  break;
14037 	case LEU:
14038 	  fputs ("lle", file);  /* 6 */
14039 	  break;
14040 	case GTU:
14041 	  fputs ("lgt", file);  /* 1 */
14042 	  break;
14043 	case GEU:
14044 	  fputs ("lge", file);  /* 5 */
14045 	  break;
14046 	default:
14047 	  output_operand_lossage ("invalid %%V value");
14048 	}
14049       break;
14050 
14051     case 'w':
14052       /* If constant, low-order 16 bits of constant, signed.  Otherwise, write
14053 	 normally.  */
14054       if (INT_P (x))
14055 	fprintf (file, HOST_WIDE_INT_PRINT_DEC,
14056 		 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
14057       else
14058 	print_operand (file, x, 0);
14059       return;
14060 
14061     case 'x':
14062       /* X is a FPR or Altivec register used in a VSX context.  */
14063       if (!REG_P (x) || !VSX_REGNO_P (REGNO (x)))
14064 	output_operand_lossage ("invalid %%x value");
14065       else
14066 	{
14067 	  int reg = REGNO (x);
14068 	  int vsx_reg = (FP_REGNO_P (reg)
14069 			 ? reg - 32
14070 			 : reg - FIRST_ALTIVEC_REGNO + 32);
14071 
14072 #ifdef TARGET_REGNAMES
14073 	  if (TARGET_REGNAMES)
14074 	    fprintf (file, "%%vs%d", vsx_reg);
14075 	  else
14076 #endif
14077 	    fprintf (file, "%d", vsx_reg);
14078 	}
14079       return;
14080 
14081     case 'X':
14082       if (MEM_P (x)
14083 	  && (legitimate_indexed_address_p (XEXP (x, 0), 0)
14084 	      || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
14085 		  && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
14086 	putc ('x', file);
14087       return;
14088 
14089     case 'Y':
14090       /* Like 'L', for third word of TImode/PTImode  */
14091       if (REG_P (x))
14092 	fputs (reg_names[REGNO (x) + 2], file);
14093       else if (MEM_P (x))
14094 	{
14095 	  machine_mode mode = GET_MODE (x);
14096 	  if (GET_CODE (XEXP (x, 0)) == PRE_INC
14097 	      || GET_CODE (XEXP (x, 0)) == PRE_DEC)
14098 	    output_address (mode, plus_constant (Pmode,
14099 						 XEXP (XEXP (x, 0), 0), 8));
14100 	  else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14101 	    output_address (mode, plus_constant (Pmode,
14102 						 XEXP (XEXP (x, 0), 0), 8));
14103 	  else
14104 	    output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
14105 	  if (small_data_operand (x, GET_MODE (x)))
14106 	    fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14107 		     reg_names[SMALL_DATA_REG]);
14108 	}
14109       return;
14110 
14111     case 'z':
14112       if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
14113 	x = XVECEXP (x, 0, 1);
14114       /* X is a SYMBOL_REF.  Write out the name preceded by a
14115 	 period and without any trailing data in brackets.  Used for function
14116 	 names.  If we are configured for System V (or the embedded ABI) on
14117 	 the PowerPC, do not emit the period, since those systems do not use
14118 	 TOCs and the like.  */
14119       if (!SYMBOL_REF_P (x))
14120 	{
14121 	  output_operand_lossage ("invalid %%z value");
14122 	  return;
14123 	}
14124 
14125       /* For macho, check to see if we need a stub.  */
14126       if (TARGET_MACHO)
14127 	{
14128 	  const char *name = XSTR (x, 0);
14129 #if TARGET_MACHO
14130 	  if (darwin_symbol_stubs
14131 	      && MACHOPIC_INDIRECT
14132 	      && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14133 	    name = machopic_indirection_name (x, /*stub_p=*/true);
14134 #endif
14135 	  assemble_name (file, name);
14136 	}
14137       else if (!DOT_SYMBOLS)
14138 	assemble_name (file, XSTR (x, 0));
14139       else
14140 	rs6000_output_function_entry (file, XSTR (x, 0));
14141       return;
14142 
14143     case 'Z':
14144       /* Like 'L', for last word of TImode/PTImode.  */
14145       if (REG_P (x))
14146 	fputs (reg_names[REGNO (x) + 3], file);
14147       else if (MEM_P (x))
14148 	{
14149 	  machine_mode mode = GET_MODE (x);
14150 	  if (GET_CODE (XEXP (x, 0)) == PRE_INC
14151 	      || GET_CODE (XEXP (x, 0)) == PRE_DEC)
14152 	    output_address (mode, plus_constant (Pmode,
14153 						 XEXP (XEXP (x, 0), 0), 12));
14154 	  else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14155 	    output_address (mode, plus_constant (Pmode,
14156 						 XEXP (XEXP (x, 0), 0), 12));
14157 	  else
14158 	    output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
14159 	  if (small_data_operand (x, GET_MODE (x)))
14160 	    fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14161 		     reg_names[SMALL_DATA_REG]);
14162 	}
14163       return;
14164 
14165       /* Print AltiVec memory operand.  */
14166     case 'y':
14167       {
14168 	rtx tmp;
14169 
14170 	gcc_assert (MEM_P (x));
14171 
14172 	tmp = XEXP (x, 0);
14173 
14174 	if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x))
14175 	    && GET_CODE (tmp) == AND
14176 	    && CONST_INT_P (XEXP (tmp, 1))
14177 	    && INTVAL (XEXP (tmp, 1)) == -16)
14178 	  tmp = XEXP (tmp, 0);
14179 	else if (VECTOR_MEM_VSX_P (GET_MODE (x))
14180 		 && GET_CODE (tmp) == PRE_MODIFY)
14181 	  tmp = XEXP (tmp, 1);
14182 	if (REG_P (tmp))
14183 	  fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
14184 	else
14185 	  {
14186 	    if (GET_CODE (tmp) != PLUS
14187 		|| !REG_P (XEXP (tmp, 0))
14188 		|| !REG_P (XEXP (tmp, 1)))
14189 	      {
14190 		output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
14191 		break;
14192 	      }
14193 
14194 	    if (REGNO (XEXP (tmp, 0)) == 0)
14195 	      fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
14196 		       reg_names[ REGNO (XEXP (tmp, 0)) ]);
14197 	    else
14198 	      fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
14199 		       reg_names[ REGNO (XEXP (tmp, 1)) ]);
14200 	  }
14201 	break;
14202       }
14203 
14204     case 0:
14205       if (REG_P (x))
14206 	fprintf (file, "%s", reg_names[REGNO (x)]);
14207       else if (MEM_P (x))
14208 	{
14209 	  /* We need to handle PRE_INC and PRE_DEC here, since we need to
14210 	     know the width from the mode.  */
14211 	  if (GET_CODE (XEXP (x, 0)) == PRE_INC)
14212 	    fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
14213 		     reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
14214 	  else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
14215 	    fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
14216 		     reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
14217 	  else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14218 	    output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
14219 	  else
14220 	    output_address (GET_MODE (x), XEXP (x, 0));
14221 	}
14222       else if (toc_relative_expr_p (x, false,
14223 				    &tocrel_base_oac, &tocrel_offset_oac))
14224 	/* This hack along with a corresponding hack in
14225 	   rs6000_output_addr_const_extra arranges to output addends
14226 	   where the assembler expects to find them.  eg.
14227 	   (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
14228 	   without this hack would be output as "x@toc+4".  We
14229 	   want "x+4@toc".  */
14230 	output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
14231       else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
14232 	output_addr_const (file, XVECEXP (x, 0, 0));
14233       else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
14234 	output_addr_const (file, XVECEXP (x, 0, 1));
14235       else
14236 	output_addr_const (file, x);
14237       return;
14238 
14239     case '&':
14240       if (const char *name = get_some_local_dynamic_name ())
14241 	assemble_name (file, name);
14242       else
14243 	output_operand_lossage ("'%%&' used without any "
14244 				"local dynamic TLS references");
14245       return;
14246 
14247     default:
14248       output_operand_lossage ("invalid %%xn code");
14249     }
14250 }
14251 
14252 /* Print the address of an operand.  */
14253 
14254 void
print_operand_address(FILE * file,rtx x)14255 print_operand_address (FILE *file, rtx x)
14256 {
14257   if (REG_P (x))
14258     fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
14259 
14260   /* Is it a PC-relative address?  */
14261   else if (TARGET_PCREL && pcrel_local_or_external_address (x, VOIDmode))
14262     {
14263       HOST_WIDE_INT offset;
14264 
14265       if (GET_CODE (x) == CONST)
14266 	x = XEXP (x, 0);
14267 
14268       if (GET_CODE (x) == PLUS)
14269 	{
14270 	  offset = INTVAL (XEXP (x, 1));
14271 	  x = XEXP (x, 0);
14272 	}
14273       else
14274 	offset = 0;
14275 
14276       output_addr_const (file, x);
14277 
14278       if (offset)
14279 	fprintf (file, "%+" PRId64, offset);
14280 
14281       if (SYMBOL_REF_P (x) && !SYMBOL_REF_LOCAL_P (x))
14282 	fprintf (file, "@got");
14283 
14284       fprintf (file, "@pcrel");
14285     }
14286   else if (SYMBOL_REF_P (x) || GET_CODE (x) == CONST
14287 	   || GET_CODE (x) == LABEL_REF)
14288     {
14289       output_addr_const (file, x);
14290       if (small_data_operand (x, GET_MODE (x)))
14291 	fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14292 		 reg_names[SMALL_DATA_REG]);
14293       else
14294 	gcc_assert (!TARGET_TOC);
14295     }
14296   else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
14297 	   && REG_P (XEXP (x, 1)))
14298     {
14299       if (REGNO (XEXP (x, 0)) == 0)
14300 	fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
14301 		 reg_names[ REGNO (XEXP (x, 0)) ]);
14302       else
14303 	fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
14304 		 reg_names[ REGNO (XEXP (x, 1)) ]);
14305     }
14306   else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
14307 	   && CONST_INT_P (XEXP (x, 1)))
14308     fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
14309 	     INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
14310 #if TARGET_MACHO
14311   else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
14312 	   && CONSTANT_P (XEXP (x, 1)))
14313     {
14314       fprintf (file, "lo16(");
14315       output_addr_const (file, XEXP (x, 1));
14316       fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
14317     }
14318 #endif
14319 #if TARGET_ELF
14320   else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
14321 	   && CONSTANT_P (XEXP (x, 1)))
14322     {
14323       output_addr_const (file, XEXP (x, 1));
14324       fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
14325     }
14326 #endif
14327   else if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
14328     {
14329       /* This hack along with a corresponding hack in
14330 	 rs6000_output_addr_const_extra arranges to output addends
14331 	 where the assembler expects to find them.  eg.
14332 	 (lo_sum (reg 9)
14333 	 .       (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
14334 	 without this hack would be output as "x@toc+8@l(9)".  We
14335 	 want "x+8@toc@l(9)".  */
14336       output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
14337       if (GET_CODE (x) == LO_SUM)
14338 	fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
14339       else
14340 	fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base_oac, 0, 1))]);
14341     }
14342   else
14343     output_addr_const (file, x);
14344 }
14345 
14346 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
14347 
14348 bool
rs6000_output_addr_const_extra(FILE * file,rtx x)14349 rs6000_output_addr_const_extra (FILE *file, rtx x)
14350 {
14351   if (GET_CODE (x) == UNSPEC)
14352     switch (XINT (x, 1))
14353       {
14354       case UNSPEC_TOCREL:
14355 	gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x, 0, 0))
14356 			     && REG_P (XVECEXP (x, 0, 1))
14357 			     && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
14358 	output_addr_const (file, XVECEXP (x, 0, 0));
14359 	if (x == tocrel_base_oac && tocrel_offset_oac != const0_rtx)
14360 	  {
14361 	    if (INTVAL (tocrel_offset_oac) >= 0)
14362 	      fprintf (file, "+");
14363 	    output_addr_const (file, CONST_CAST_RTX (tocrel_offset_oac));
14364 	  }
14365 	if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
14366 	  {
14367 	    putc ('-', file);
14368 	    assemble_name (file, toc_label_name);
14369 	    need_toc_init = 1;
14370 	  }
14371 	else if (TARGET_ELF)
14372 	  fputs ("@toc", file);
14373 	return true;
14374 
14375 #if TARGET_MACHO
14376       case UNSPEC_MACHOPIC_OFFSET:
14377 	output_addr_const (file, XVECEXP (x, 0, 0));
14378 	putc ('-', file);
14379 	machopic_output_function_base_name (file);
14380 	return true;
14381 #endif
14382       }
14383   return false;
14384 }
14385 
14386 /* Target hook for assembling integer objects.  The PowerPC version has
14387    to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
14388    is defined.  It also needs to handle DI-mode objects on 64-bit
14389    targets.  */
14390 
14391 static bool
rs6000_assemble_integer(rtx x,unsigned int size,int aligned_p)14392 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
14393 {
14394 #ifdef RELOCATABLE_NEEDS_FIXUP
14395   /* Special handling for SI values.  */
14396   if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
14397     {
14398       static int recurse = 0;
14399 
14400       /* For -mrelocatable, we mark all addresses that need to be fixed up in
14401 	 the .fixup section.  Since the TOC section is already relocated, we
14402 	 don't need to mark it here.  We used to skip the text section, but it
14403 	 should never be valid for relocated addresses to be placed in the text
14404 	 section.  */
14405       if (DEFAULT_ABI == ABI_V4
14406 	  && (TARGET_RELOCATABLE || flag_pic > 1)
14407 	  && in_section != toc_section
14408 	  && !recurse
14409 	  && !CONST_SCALAR_INT_P (x)
14410 	  && CONSTANT_P (x))
14411 	{
14412 	  char buf[256];
14413 
14414 	  recurse = 1;
14415 	  ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
14416 	  fixuplabelno++;
14417 	  ASM_OUTPUT_LABEL (asm_out_file, buf);
14418 	  fprintf (asm_out_file, "\t.long\t(");
14419 	  output_addr_const (asm_out_file, x);
14420 	  fprintf (asm_out_file, ")@fixup\n");
14421 	  fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
14422 	  ASM_OUTPUT_ALIGN (asm_out_file, 2);
14423 	  fprintf (asm_out_file, "\t.long\t");
14424 	  assemble_name (asm_out_file, buf);
14425 	  fprintf (asm_out_file, "\n\t.previous\n");
14426 	  recurse = 0;
14427 	  return true;
14428 	}
14429       /* Remove initial .'s to turn a -mcall-aixdesc function
14430 	 address into the address of the descriptor, not the function
14431 	 itself.  */
14432       else if (SYMBOL_REF_P (x)
14433 	       && XSTR (x, 0)[0] == '.'
14434 	       && DEFAULT_ABI == ABI_AIX)
14435 	{
14436 	  const char *name = XSTR (x, 0);
14437 	  while (*name == '.')
14438 	    name++;
14439 
14440 	  fprintf (asm_out_file, "\t.long\t%s\n", name);
14441 	  return true;
14442 	}
14443     }
14444 #endif /* RELOCATABLE_NEEDS_FIXUP */
14445   return default_assemble_integer (x, size, aligned_p);
14446 }
14447 
14448 /* Return a template string for assembly to emit when making an
14449    external call.  FUNOP is the call mem argument operand number.  */
14450 
14451 static const char *
rs6000_call_template_1(rtx * operands,unsigned int funop,bool sibcall)14452 rs6000_call_template_1 (rtx *operands, unsigned int funop, bool sibcall)
14453 {
14454   /* -Wformat-overflow workaround, without which gcc thinks that %u
14455       might produce 10 digits.  */
14456   gcc_assert (funop <= MAX_RECOG_OPERANDS);
14457 
14458   char arg[12];
14459   arg[0] = 0;
14460   if (GET_CODE (operands[funop + 1]) == UNSPEC)
14461     {
14462       if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
14463 	sprintf (arg, "(%%%u@tlsgd)", funop + 1);
14464       else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
14465 	sprintf (arg, "(%%&@tlsld)");
14466     }
14467 
14468   /* The magic 32768 offset here corresponds to the offset of
14469      r30 in .got2, as given by LCTOC1.  See sysv4.h:toc_section.  */
14470   char z[11];
14471   sprintf (z, "%%z%u%s", funop,
14472 	   (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic == 2
14473 	    ? "+32768" : ""));
14474 
14475   static char str[32];  /* 1 spare */
14476   if (rs6000_pcrel_p ())
14477     sprintf (str, "b%s %s@notoc%s", sibcall ? "" : "l", z, arg);
14478   else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
14479     sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
14480 	     sibcall ? "" : "\n\tnop");
14481   else if (DEFAULT_ABI == ABI_V4)
14482     sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
14483 	     flag_pic ? "@plt" : "");
14484 #if TARGET_MACHO
14485   /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */
14486    else if (DEFAULT_ABI == ABI_DARWIN)
14487     {
14488       /* The cookie is in operand func+2.  */
14489       gcc_checking_assert (GET_CODE (operands[funop + 2]) == CONST_INT);
14490       int cookie = INTVAL (operands[funop + 2]);
14491       if (cookie & CALL_LONG)
14492 	{
14493 	  tree funname = get_identifier (XSTR (operands[funop], 0));
14494 	  tree labelname = get_prev_label (funname);
14495 	  gcc_checking_assert (labelname && !sibcall);
14496 
14497 	  /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
14498 	     instruction will reach 'foo', otherwise link as 'bl L42'".
14499 	     "L42" should be a 'branch island', that will do a far jump to
14500 	     'foo'.  Branch islands are generated in
14501 	     macho_branch_islands().  */
14502 	  sprintf (str, "jbsr %%z%u,%.10s", funop,
14503 		   IDENTIFIER_POINTER (labelname));
14504 	}
14505       else
14506         /* Same as AIX or ELFv2, except to keep backwards compat, no nop
14507 	   after the call.  */
14508 	sprintf (str, "b%s %s%s", sibcall ? "" : "l", z, arg);
14509     }
14510 #endif
14511   else
14512     gcc_unreachable ();
14513   return str;
14514 }
14515 
14516 const char *
rs6000_call_template(rtx * operands,unsigned int funop)14517 rs6000_call_template (rtx *operands, unsigned int funop)
14518 {
14519   return rs6000_call_template_1 (operands, funop, false);
14520 }
14521 
14522 const char *
rs6000_sibcall_template(rtx * operands,unsigned int funop)14523 rs6000_sibcall_template (rtx *operands, unsigned int funop)
14524 {
14525   return rs6000_call_template_1 (operands, funop, true);
14526 }
14527 
14528 /* As above, for indirect calls.  */
14529 
14530 static const char *
rs6000_indirect_call_template_1(rtx * operands,unsigned int funop,bool sibcall)14531 rs6000_indirect_call_template_1 (rtx *operands, unsigned int funop,
14532 				 bool sibcall)
14533 {
14534   /* -Wformat-overflow workaround, without which gcc thinks that %u
14535      might produce 10 digits.  Note that -Wformat-overflow will not
14536      currently warn here for str[], so do not rely on a warning to
14537      ensure str[] is correctly sized.  */
14538   gcc_assert (funop <= MAX_RECOG_OPERANDS);
14539 
14540   /* Currently, funop is either 0 or 1.  The maximum string is always
14541      a !speculate 64-bit __tls_get_addr call.
14542 
14543      ABI_ELFv2, pcrel:
14544      . 27	.reloc .,R_PPC64_TLSGD,%2\n\t
14545      . 35	.reloc .,R_PPC64_PLTSEQ_NOTOC,%z1\n\t
14546      .  9	crset 2\n\t
14547      . 27	.reloc .,R_PPC64_TLSGD,%2\n\t
14548      . 36	.reloc .,R_PPC64_PLTCALL_NOTOC,%z1\n\t
14549      .  8	beq%T1l-
14550      .---
14551      .142
14552 
14553      ABI_AIX:
14554      .  9	ld 2,%3\n\t
14555      . 27	.reloc .,R_PPC64_TLSGD,%2\n\t
14556      . 29	.reloc .,R_PPC64_PLTSEQ,%z1\n\t
14557      .  9	crset 2\n\t
14558      . 27	.reloc .,R_PPC64_TLSGD,%2\n\t
14559      . 30	.reloc .,R_PPC64_PLTCALL,%z1\n\t
14560      . 10	beq%T1l-\n\t
14561      . 10	ld 2,%4(1)
14562      .---
14563      .151
14564 
14565      ABI_ELFv2:
14566      . 27	.reloc .,R_PPC64_TLSGD,%2\n\t
14567      . 29	.reloc .,R_PPC64_PLTSEQ,%z1\n\t
14568      .  9	crset 2\n\t
14569      . 27	.reloc .,R_PPC64_TLSGD,%2\n\t
14570      . 30	.reloc .,R_PPC64_PLTCALL,%z1\n\t
14571      . 10	beq%T1l-\n\t
14572      . 10	ld 2,%3(1)
14573      .---
14574      .142
14575 
14576      ABI_V4:
14577      . 27	.reloc .,R_PPC64_TLSGD,%2\n\t
14578      . 35	.reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t
14579      .  9	crset 2\n\t
14580      . 27	.reloc .,R_PPC64_TLSGD,%2\n\t
14581      . 36	.reloc .,R_PPC64_PLTCALL,%z1+32768\n\t
14582      .  8	beq%T1l-
14583      .---
14584      .141  */
14585   static char str[160];  /* 8 spare */
14586   char *s = str;
14587   const char *ptrload = TARGET_64BIT ? "d" : "wz";
14588 
14589   if (DEFAULT_ABI == ABI_AIX)
14590     s += sprintf (s,
14591 		  "l%s 2,%%%u\n\t",
14592 		  ptrload, funop + 3);
14593 
14594   /* We don't need the extra code to stop indirect call speculation if
14595      calling via LR.  */
14596   bool speculate = (TARGET_MACHO
14597 		    || rs6000_speculate_indirect_jumps
14598 		    || (REG_P (operands[funop])
14599 			&& REGNO (operands[funop]) == LR_REGNO));
14600 
14601   if (TARGET_PLTSEQ && GET_CODE (operands[funop]) == UNSPEC)
14602     {
14603       const char *rel64 = TARGET_64BIT ? "64" : "";
14604       char tls[29];
14605       tls[0] = 0;
14606       if (GET_CODE (operands[funop + 1]) == UNSPEC)
14607 	{
14608 	  if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
14609 	    sprintf (tls, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t",
14610 		     rel64, funop + 1);
14611 	  else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
14612 	    sprintf (tls, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
14613 		     rel64);
14614 	}
14615 
14616       const char *notoc = rs6000_pcrel_p () ? "_NOTOC" : "";
14617       const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
14618 			    && flag_pic == 2 ? "+32768" : "");
14619       if (!speculate)
14620 	{
14621 	  s += sprintf (s,
14622 			"%s.reloc .,R_PPC%s_PLTSEQ%s,%%z%u%s\n\t",
14623 			tls, rel64, notoc, funop, addend);
14624 	  s += sprintf (s, "crset 2\n\t");
14625 	}
14626       s += sprintf (s,
14627 		    "%s.reloc .,R_PPC%s_PLTCALL%s,%%z%u%s\n\t",
14628 		    tls, rel64, notoc, funop, addend);
14629     }
14630   else if (!speculate)
14631     s += sprintf (s, "crset 2\n\t");
14632 
14633   if (rs6000_pcrel_p ())
14634     {
14635       if (speculate)
14636 	sprintf (s, "b%%T%ul", funop);
14637       else
14638 	sprintf (s, "beq%%T%ul-", funop);
14639     }
14640   else if (DEFAULT_ABI == ABI_AIX)
14641     {
14642       if (speculate)
14643 	sprintf (s,
14644 		 "b%%T%ul\n\t"
14645 		 "l%s 2,%%%u(1)",
14646 		 funop, ptrload, funop + 4);
14647       else
14648 	sprintf (s,
14649 		 "beq%%T%ul-\n\t"
14650 		 "l%s 2,%%%u(1)",
14651 		 funop, ptrload, funop + 4);
14652     }
14653   else if (DEFAULT_ABI == ABI_ELFv2)
14654     {
14655       if (speculate)
14656 	sprintf (s,
14657 		 "b%%T%ul\n\t"
14658 		 "l%s 2,%%%u(1)",
14659 		 funop, ptrload, funop + 3);
14660       else
14661 	sprintf (s,
14662 		 "beq%%T%ul-\n\t"
14663 		 "l%s 2,%%%u(1)",
14664 		 funop, ptrload, funop + 3);
14665     }
14666   else
14667     {
14668       if (speculate)
14669 	sprintf (s,
14670 		 "b%%T%u%s",
14671 		 funop, sibcall ? "" : "l");
14672       else
14673 	sprintf (s,
14674 		 "beq%%T%u%s-%s",
14675 		 funop, sibcall ? "" : "l", sibcall ? "\n\tb $" : "");
14676     }
14677   return str;
14678 }
14679 
14680 const char *
rs6000_indirect_call_template(rtx * operands,unsigned int funop)14681 rs6000_indirect_call_template (rtx *operands, unsigned int funop)
14682 {
14683   return rs6000_indirect_call_template_1 (operands, funop, false);
14684 }
14685 
14686 const char *
rs6000_indirect_sibcall_template(rtx * operands,unsigned int funop)14687 rs6000_indirect_sibcall_template (rtx *operands, unsigned int funop)
14688 {
14689   return rs6000_indirect_call_template_1 (operands, funop, true);
14690 }
14691 
14692 #if HAVE_AS_PLTSEQ
14693 /* Output indirect call insns.  WHICH identifies the type of sequence.  */
14694 const char *
rs6000_pltseq_template(rtx * operands,int which)14695 rs6000_pltseq_template (rtx *operands, int which)
14696 {
14697   const char *rel64 = TARGET_64BIT ? "64" : "";
14698   char tls[30];
14699   tls[0] = 0;
14700   if (GET_CODE (operands[3]) == UNSPEC)
14701     {
14702       char off = which == RS6000_PLTSEQ_PLT_PCREL34 ? '8' : '4';
14703       if (XINT (operands[3], 1) == UNSPEC_TLSGD)
14704 	sprintf (tls, ".reloc .-%c,R_PPC%s_TLSGD,%%3\n\t",
14705 		 off, rel64);
14706       else if (XINT (operands[3], 1) == UNSPEC_TLSLD)
14707 	sprintf (tls, ".reloc .-%c,R_PPC%s_TLSLD,%%&\n\t",
14708 		 off, rel64);
14709     }
14710 
14711   gcc_assert (DEFAULT_ABI == ABI_ELFv2 || DEFAULT_ABI == ABI_V4);
14712   static char str[96];  /* 10 spare */
14713   char off = WORDS_BIG_ENDIAN ? '2' : '4';
14714   const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
14715 			&& flag_pic == 2 ? "+32768" : "");
14716   switch (which)
14717     {
14718     case RS6000_PLTSEQ_TOCSAVE:
14719       sprintf (str,
14720 	       "st%s\n\t"
14721 	       "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2",
14722 	       TARGET_64BIT ? "d 2,24(1)" : "w 2,12(1)",
14723 	       tls, rel64);
14724       break;
14725     case RS6000_PLTSEQ_PLT16_HA:
14726       if (DEFAULT_ABI == ABI_V4 && !flag_pic)
14727 	sprintf (str,
14728 		 "lis %%0,0\n\t"
14729 		 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2",
14730 		 tls, off, rel64);
14731       else
14732 	sprintf (str,
14733 		 "addis %%0,%%1,0\n\t"
14734 		 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2%s",
14735 		 tls, off, rel64, addend);
14736       break;
14737     case RS6000_PLTSEQ_PLT16_LO:
14738       sprintf (str,
14739 	       "l%s %%0,0(%%1)\n\t"
14740 	       "%s.reloc .-%c,R_PPC%s_PLT16_LO%s,%%z2%s",
14741 	       TARGET_64BIT ? "d" : "wz",
14742 	       tls, off, rel64, TARGET_64BIT ? "_DS" : "", addend);
14743       break;
14744     case RS6000_PLTSEQ_MTCTR:
14745       sprintf (str,
14746 	       "mtctr %%1\n\t"
14747 	       "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2%s",
14748 	       tls, rel64, addend);
14749       break;
14750     case RS6000_PLTSEQ_PLT_PCREL34:
14751       sprintf (str,
14752 	       "pl%s %%0,0(0),1\n\t"
14753 	       "%s.reloc .-8,R_PPC%s_PLT_PCREL34_NOTOC,%%z2",
14754 	       TARGET_64BIT ? "d" : "wz",
14755 	       tls, rel64);
14756       break;
14757     default:
14758       gcc_unreachable ();
14759     }
14760   return str;
14761 }
14762 #endif
14763 
14764 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
14765 /* Emit an assembler directive to set symbol visibility for DECL to
14766    VISIBILITY_TYPE.  */
14767 
14768 static void
rs6000_assemble_visibility(tree decl,int vis)14769 rs6000_assemble_visibility (tree decl, int vis)
14770 {
14771   if (TARGET_XCOFF)
14772     return;
14773 
14774   /* Functions need to have their entry point symbol visibility set as
14775      well as their descriptor symbol visibility.  */
14776   if (DEFAULT_ABI == ABI_AIX
14777       && DOT_SYMBOLS
14778       && TREE_CODE (decl) == FUNCTION_DECL)
14779     {
14780       static const char * const visibility_types[] = {
14781 	NULL, "protected", "hidden", "internal"
14782       };
14783 
14784       const char *name, *type;
14785 
14786       name = ((* targetm.strip_name_encoding)
14787 	      (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
14788       type = visibility_types[vis];
14789 
14790       fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
14791       fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
14792     }
14793   else
14794     default_assemble_visibility (decl, vis);
14795 }
14796 #endif
14797 
14798 /* Write PATCH_AREA_SIZE NOPs into the asm outfile FILE around a function
14799    entry.  If RECORD_P is true and the target supports named sections,
14800    the location of the NOPs will be recorded in a special object section
14801    called "__patchable_function_entries".  This routine may be called
14802    twice per function to put NOPs before and after the function
14803    entry.  */
14804 
14805 void
rs6000_print_patchable_function_entry(FILE * file,unsigned HOST_WIDE_INT patch_area_size,bool record_p)14806 rs6000_print_patchable_function_entry (FILE *file,
14807 				       unsigned HOST_WIDE_INT patch_area_size,
14808 				       bool record_p)
14809 {
14810   unsigned int flags = SECTION_WRITE | SECTION_RELRO;
14811   /* When .opd section is emitted, the function symbol
14812      default_print_patchable_function_entry_1 is emitted into the .opd section
14813      while the patchable area is emitted into the function section.
14814      Don't use SECTION_LINK_ORDER in that case.  */
14815   if (!(TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
14816       && HAVE_GAS_SECTION_LINK_ORDER)
14817     flags |= SECTION_LINK_ORDER;
14818   default_print_patchable_function_entry_1 (file, patch_area_size, record_p,
14819 					    flags);
14820 }
14821 
14822 enum rtx_code
rs6000_reverse_condition(machine_mode mode,enum rtx_code code)14823 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
14824 {
14825   /* Reversal of FP compares takes care -- an ordered compare
14826      becomes an unordered compare and vice versa.  */
14827   if (mode == CCFPmode
14828       && (!flag_finite_math_only
14829 	  || code == UNLT || code == UNLE || code == UNGT || code == UNGE
14830 	  || code == UNEQ || code == LTGT))
14831     return reverse_condition_maybe_unordered (code);
14832   else
14833     return reverse_condition (code);
14834 }
14835 
14836 /* Generate a compare for CODE.  Return a brand-new rtx that
14837    represents the result of the compare.  */
14838 
14839 static rtx
rs6000_generate_compare(rtx cmp,machine_mode mode)14840 rs6000_generate_compare (rtx cmp, machine_mode mode)
14841 {
14842   machine_mode comp_mode;
14843   rtx compare_result;
14844   enum rtx_code code = GET_CODE (cmp);
14845   rtx op0 = XEXP (cmp, 0);
14846   rtx op1 = XEXP (cmp, 1);
14847 
14848   if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
14849     comp_mode = CCmode;
14850   else if (FLOAT_MODE_P (mode))
14851     comp_mode = CCFPmode;
14852   else if (code == GTU || code == LTU
14853 	   || code == GEU || code == LEU)
14854     comp_mode = CCUNSmode;
14855   else if ((code == EQ || code == NE)
14856 	   && unsigned_reg_p (op0)
14857 	   && (unsigned_reg_p (op1)
14858 	       || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
14859     /* These are unsigned values, perhaps there will be a later
14860        ordering compare that can be shared with this one.  */
14861     comp_mode = CCUNSmode;
14862   else
14863     comp_mode = CCmode;
14864 
14865   /* If we have an unsigned compare, make sure we don't have a signed value as
14866      an immediate.  */
14867   if (comp_mode == CCUNSmode && CONST_INT_P (op1)
14868       && INTVAL (op1) < 0)
14869     {
14870       op0 = copy_rtx_if_shared (op0);
14871       op1 = force_reg (GET_MODE (op0), op1);
14872       cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
14873     }
14874 
14875   /* First, the compare.  */
14876   compare_result = gen_reg_rtx (comp_mode);
14877 
14878   /* IEEE 128-bit support in VSX registers when we do not have hardware
14879      support.  */
14880   if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
14881     {
14882       rtx libfunc = NULL_RTX;
14883       bool check_nan = false;
14884       rtx dest;
14885 
14886       switch (code)
14887 	{
14888 	case EQ:
14889 	case NE:
14890 	  libfunc = optab_libfunc (eq_optab, mode);
14891 	  break;
14892 
14893 	case GT:
14894 	case GE:
14895 	  libfunc = optab_libfunc (ge_optab, mode);
14896 	  break;
14897 
14898 	case LT:
14899 	case LE:
14900 	  libfunc = optab_libfunc (le_optab, mode);
14901 	  break;
14902 
14903 	case UNORDERED:
14904 	case ORDERED:
14905 	  libfunc = optab_libfunc (unord_optab, mode);
14906 	  code = (code == UNORDERED) ? NE : EQ;
14907 	  break;
14908 
14909 	case UNGE:
14910 	case UNGT:
14911 	  check_nan = true;
14912 	  libfunc = optab_libfunc (ge_optab, mode);
14913 	  code = (code == UNGE) ? GE : GT;
14914 	  break;
14915 
14916 	case UNLE:
14917 	case UNLT:
14918 	  check_nan = true;
14919 	  libfunc = optab_libfunc (le_optab, mode);
14920 	  code = (code == UNLE) ? LE : LT;
14921 	  break;
14922 
14923 	case UNEQ:
14924 	case LTGT:
14925 	  check_nan = true;
14926 	  libfunc = optab_libfunc (eq_optab, mode);
14927 	  code = (code = UNEQ) ? EQ : NE;
14928 	  break;
14929 
14930 	default:
14931 	  gcc_unreachable ();
14932 	}
14933 
14934       gcc_assert (libfunc);
14935 
14936       if (!check_nan)
14937 	dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
14938 					SImode, op0, mode, op1, mode);
14939 
14940       /* The library signals an exception for signalling NaNs, so we need to
14941 	 handle isgreater, etc. by first checking isordered.  */
14942       else
14943 	{
14944 	  rtx ne_rtx, normal_dest, unord_dest;
14945 	  rtx unord_func = optab_libfunc (unord_optab, mode);
14946 	  rtx join_label = gen_label_rtx ();
14947 	  rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
14948 	  rtx unord_cmp = gen_reg_rtx (comp_mode);
14949 
14950 
14951 	  /* Test for either value being a NaN.  */
14952 	  gcc_assert (unord_func);
14953 	  unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
14954 						SImode, op0, mode, op1, mode);
14955 
14956 	  /* Set value (0) if either value is a NaN, and jump to the join
14957 	     label.  */
14958 	  dest = gen_reg_rtx (SImode);
14959 	  emit_move_insn (dest, const1_rtx);
14960 	  emit_insn (gen_rtx_SET (unord_cmp,
14961 				  gen_rtx_COMPARE (comp_mode, unord_dest,
14962 						   const0_rtx)));
14963 
14964 	  ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
14965 	  emit_jump_insn (gen_rtx_SET (pc_rtx,
14966 				       gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
14967 							     join_ref,
14968 							     pc_rtx)));
14969 
14970 	  /* Do the normal comparison, knowing that the values are not
14971 	     NaNs.  */
14972 	  normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
14973 						 SImode, op0, mode, op1, mode);
14974 
14975 	  emit_insn (gen_cstoresi4 (dest,
14976 				    gen_rtx_fmt_ee (code, SImode, normal_dest,
14977 						    const0_rtx),
14978 				    normal_dest, const0_rtx));
14979 
14980 	  /* Join NaN and non-Nan paths.  Compare dest against 0.  */
14981 	  emit_label (join_label);
14982 	  code = NE;
14983 	}
14984 
14985       emit_insn (gen_rtx_SET (compare_result,
14986 			      gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
14987     }
14988 
14989   else
14990     {
14991       /* Generate XLC-compatible TFmode compare as PARALLEL with extra
14992 	 CLOBBERs to match cmptf_internal2 pattern.  */
14993       if (comp_mode == CCFPmode && TARGET_XL_COMPAT
14994 	  && FLOAT128_IBM_P (GET_MODE (op0))
14995 	  && TARGET_HARD_FLOAT)
14996 	emit_insn (gen_rtx_PARALLEL (VOIDmode,
14997 	  gen_rtvec (10,
14998 		     gen_rtx_SET (compare_result,
14999 				  gen_rtx_COMPARE (comp_mode, op0, op1)),
15000 		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15001 		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15002 		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15003 		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15004 		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15005 		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15006 		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15007 		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15008 		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
15009       else if (GET_CODE (op1) == UNSPEC
15010 	       && XINT (op1, 1) == UNSPEC_SP_TEST)
15011 	{
15012 	  rtx op1b = XVECEXP (op1, 0, 0);
15013 	  comp_mode = CCEQmode;
15014 	  compare_result = gen_reg_rtx (CCEQmode);
15015 	  if (TARGET_64BIT)
15016 	    emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
15017 	  else
15018 	    emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
15019 	}
15020       else
15021 	emit_insn (gen_rtx_SET (compare_result,
15022 				gen_rtx_COMPARE (comp_mode, op0, op1)));
15023     }
15024 
15025   validate_condition_mode (code, GET_MODE (compare_result));
15026 
15027   return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
15028 }
15029 
15030 
15031 /* Return the diagnostic message string if the binary operation OP is
15032    not permitted on TYPE1 and TYPE2, NULL otherwise.  */
15033 
15034 static const char*
rs6000_invalid_binary_op(int op ATTRIBUTE_UNUSED,const_tree type1,const_tree type2)15035 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
15036 			  const_tree type1,
15037 			  const_tree type2)
15038 {
15039   machine_mode mode1 = TYPE_MODE (type1);
15040   machine_mode mode2 = TYPE_MODE (type2);
15041 
15042   /* For complex modes, use the inner type.  */
15043   if (COMPLEX_MODE_P (mode1))
15044     mode1 = GET_MODE_INNER (mode1);
15045 
15046   if (COMPLEX_MODE_P (mode2))
15047     mode2 = GET_MODE_INNER (mode2);
15048 
15049   /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
15050      double to intermix unless -mfloat128-convert.  */
15051   if (mode1 == mode2)
15052     return NULL;
15053 
15054   if (!TARGET_FLOAT128_CVT)
15055     {
15056       if ((FLOAT128_IEEE_P (mode1) && FLOAT128_IBM_P (mode2))
15057 	  || (FLOAT128_IBM_P (mode1) && FLOAT128_IEEE_P (mode2)))
15058 	return N_("Invalid mixing of IEEE 128-bit and IBM 128-bit floating "
15059 		  "point types");
15060     }
15061 
15062   return NULL;
15063 }
15064 
15065 
15066 /* Expand floating point conversion to/from __float128 and __ibm128.  */
15067 
15068 void
rs6000_expand_float128_convert(rtx dest,rtx src,bool unsigned_p)15069 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
15070 {
15071   machine_mode dest_mode = GET_MODE (dest);
15072   machine_mode src_mode = GET_MODE (src);
15073   convert_optab cvt = unknown_optab;
15074   bool do_move = false;
15075   rtx libfunc = NULL_RTX;
15076   rtx dest2;
15077   typedef rtx (*rtx_2func_t) (rtx, rtx);
15078   rtx_2func_t hw_convert = (rtx_2func_t)0;
15079   size_t kf_or_tf;
15080 
15081   struct hw_conv_t {
15082     rtx_2func_t	from_df;
15083     rtx_2func_t from_sf;
15084     rtx_2func_t from_si_sign;
15085     rtx_2func_t from_si_uns;
15086     rtx_2func_t from_di_sign;
15087     rtx_2func_t from_di_uns;
15088     rtx_2func_t to_df;
15089     rtx_2func_t to_sf;
15090     rtx_2func_t to_si_sign;
15091     rtx_2func_t to_si_uns;
15092     rtx_2func_t to_di_sign;
15093     rtx_2func_t to_di_uns;
15094   } hw_conversions[2] = {
15095     /* convertions to/from KFmode */
15096     {
15097       gen_extenddfkf2_hw,		/* KFmode <- DFmode.  */
15098       gen_extendsfkf2_hw,		/* KFmode <- SFmode.  */
15099       gen_float_kfsi2_hw,		/* KFmode <- SImode (signed).  */
15100       gen_floatuns_kfsi2_hw,		/* KFmode <- SImode (unsigned).  */
15101       gen_float_kfdi2_hw,		/* KFmode <- DImode (signed).  */
15102       gen_floatuns_kfdi2_hw,		/* KFmode <- DImode (unsigned).  */
15103       gen_trunckfdf2_hw,		/* DFmode <- KFmode.  */
15104       gen_trunckfsf2_hw,		/* SFmode <- KFmode.  */
15105       gen_fix_kfsi2_hw,			/* SImode <- KFmode (signed).  */
15106       gen_fixuns_kfsi2_hw,		/* SImode <- KFmode (unsigned).  */
15107       gen_fix_kfdi2_hw,			/* DImode <- KFmode (signed).  */
15108       gen_fixuns_kfdi2_hw,		/* DImode <- KFmode (unsigned).  */
15109     },
15110 
15111     /* convertions to/from TFmode */
15112     {
15113       gen_extenddftf2_hw,		/* TFmode <- DFmode.  */
15114       gen_extendsftf2_hw,		/* TFmode <- SFmode.  */
15115       gen_float_tfsi2_hw,		/* TFmode <- SImode (signed).  */
15116       gen_floatuns_tfsi2_hw,		/* TFmode <- SImode (unsigned).  */
15117       gen_float_tfdi2_hw,		/* TFmode <- DImode (signed).  */
15118       gen_floatuns_tfdi2_hw,		/* TFmode <- DImode (unsigned).  */
15119       gen_trunctfdf2_hw,		/* DFmode <- TFmode.  */
15120       gen_trunctfsf2_hw,		/* SFmode <- TFmode.  */
15121       gen_fix_tfsi2_hw,			/* SImode <- TFmode (signed).  */
15122       gen_fixuns_tfsi2_hw,		/* SImode <- TFmode (unsigned).  */
15123       gen_fix_tfdi2_hw,			/* DImode <- TFmode (signed).  */
15124       gen_fixuns_tfdi2_hw,		/* DImode <- TFmode (unsigned).  */
15125     },
15126   };
15127 
15128   if (dest_mode == src_mode)
15129     gcc_unreachable ();
15130 
15131   /* Eliminate memory operations.  */
15132   if (MEM_P (src))
15133     src = force_reg (src_mode, src);
15134 
15135   if (MEM_P (dest))
15136     {
15137       rtx tmp = gen_reg_rtx (dest_mode);
15138       rs6000_expand_float128_convert (tmp, src, unsigned_p);
15139       rs6000_emit_move (dest, tmp, dest_mode);
15140       return;
15141     }
15142 
15143   /* Convert to IEEE 128-bit floating point.  */
15144   if (FLOAT128_IEEE_P (dest_mode))
15145     {
15146       if (dest_mode == KFmode)
15147 	kf_or_tf = 0;
15148       else if (dest_mode == TFmode)
15149 	kf_or_tf = 1;
15150       else
15151 	gcc_unreachable ();
15152 
15153       switch (src_mode)
15154 	{
15155 	case E_DFmode:
15156 	  cvt = sext_optab;
15157 	  hw_convert = hw_conversions[kf_or_tf].from_df;
15158 	  break;
15159 
15160 	case E_SFmode:
15161 	  cvt = sext_optab;
15162 	  hw_convert = hw_conversions[kf_or_tf].from_sf;
15163 	  break;
15164 
15165 	case E_KFmode:
15166 	case E_IFmode:
15167 	case E_TFmode:
15168 	  if (FLOAT128_IBM_P (src_mode))
15169 	    cvt = sext_optab;
15170 	  else
15171 	    do_move = true;
15172 	  break;
15173 
15174 	case E_SImode:
15175 	  if (unsigned_p)
15176 	    {
15177 	      cvt = ufloat_optab;
15178 	      hw_convert = hw_conversions[kf_or_tf].from_si_uns;
15179 	    }
15180 	  else
15181 	    {
15182 	      cvt = sfloat_optab;
15183 	      hw_convert = hw_conversions[kf_or_tf].from_si_sign;
15184 	    }
15185 	  break;
15186 
15187 	case E_DImode:
15188 	  if (unsigned_p)
15189 	    {
15190 	      cvt = ufloat_optab;
15191 	      hw_convert = hw_conversions[kf_or_tf].from_di_uns;
15192 	    }
15193 	  else
15194 	    {
15195 	      cvt = sfloat_optab;
15196 	      hw_convert = hw_conversions[kf_or_tf].from_di_sign;
15197 	    }
15198 	  break;
15199 
15200 	default:
15201 	  gcc_unreachable ();
15202 	}
15203     }
15204 
15205   /* Convert from IEEE 128-bit floating point.  */
15206   else if (FLOAT128_IEEE_P (src_mode))
15207     {
15208       if (src_mode == KFmode)
15209 	kf_or_tf = 0;
15210       else if (src_mode == TFmode)
15211 	kf_or_tf = 1;
15212       else
15213 	gcc_unreachable ();
15214 
15215       switch (dest_mode)
15216 	{
15217 	case E_DFmode:
15218 	  cvt = trunc_optab;
15219 	  hw_convert = hw_conversions[kf_or_tf].to_df;
15220 	  break;
15221 
15222 	case E_SFmode:
15223 	  cvt = trunc_optab;
15224 	  hw_convert = hw_conversions[kf_or_tf].to_sf;
15225 	  break;
15226 
15227 	case E_KFmode:
15228 	case E_IFmode:
15229 	case E_TFmode:
15230 	  if (FLOAT128_IBM_P (dest_mode))
15231 	    cvt = trunc_optab;
15232 	  else
15233 	    do_move = true;
15234 	  break;
15235 
15236 	case E_SImode:
15237 	  if (unsigned_p)
15238 	    {
15239 	      cvt = ufix_optab;
15240 	      hw_convert = hw_conversions[kf_or_tf].to_si_uns;
15241 	    }
15242 	  else
15243 	    {
15244 	      cvt = sfix_optab;
15245 	      hw_convert = hw_conversions[kf_or_tf].to_si_sign;
15246 	    }
15247 	  break;
15248 
15249 	case E_DImode:
15250 	  if (unsigned_p)
15251 	    {
15252 	      cvt = ufix_optab;
15253 	      hw_convert = hw_conversions[kf_or_tf].to_di_uns;
15254 	    }
15255 	  else
15256 	    {
15257 	      cvt = sfix_optab;
15258 	      hw_convert = hw_conversions[kf_or_tf].to_di_sign;
15259 	    }
15260 	  break;
15261 
15262 	default:
15263 	  gcc_unreachable ();
15264 	}
15265     }
15266 
15267   /* Both IBM format.  */
15268   else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
15269     do_move = true;
15270 
15271   else
15272     gcc_unreachable ();
15273 
15274   /* Handle conversion between TFmode/KFmode/IFmode.  */
15275   if (do_move)
15276     emit_insn (gen_rtx_SET (dest, gen_rtx_FLOAT_EXTEND (dest_mode, src)));
15277 
15278   /* Handle conversion if we have hardware support.  */
15279   else if (TARGET_FLOAT128_HW && hw_convert)
15280     emit_insn ((hw_convert) (dest, src));
15281 
15282   /* Call an external function to do the conversion.  */
15283   else if (cvt != unknown_optab)
15284     {
15285       libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
15286       gcc_assert (libfunc != NULL_RTX);
15287 
15288       dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode,
15289 				       src, src_mode);
15290 
15291       gcc_assert (dest2 != NULL_RTX);
15292       if (!rtx_equal_p (dest, dest2))
15293 	emit_move_insn (dest, dest2);
15294     }
15295 
15296   else
15297     gcc_unreachable ();
15298 
15299   return;
15300 }
15301 
15302 
15303 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal.  SCRATCH
15304    can be used as that dest register.  Return the dest register.  */
15305 
15306 rtx
rs6000_emit_eqne(machine_mode mode,rtx op1,rtx op2,rtx scratch)15307 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
15308 {
15309   if (op2 == const0_rtx)
15310     return op1;
15311 
15312   if (GET_CODE (scratch) == SCRATCH)
15313     scratch = gen_reg_rtx (mode);
15314 
15315   if (logical_operand (op2, mode))
15316     emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
15317   else
15318     emit_insn (gen_rtx_SET (scratch,
15319 			    gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
15320 
15321   return scratch;
15322 }
15323 
15324 /* Emit code doing a cror of two CR bits, for FP comparisons with a CODE that
15325    requires this.  The result is mode MODE.  */
15326 rtx
rs6000_emit_fp_cror(rtx_code code,machine_mode mode,rtx x)15327 rs6000_emit_fp_cror (rtx_code code, machine_mode mode, rtx x)
15328 {
15329   rtx cond[2];
15330   int n = 0;
15331   if (code == LTGT || code == LE || code == UNLT)
15332     cond[n++] = gen_rtx_fmt_ee (LT, mode, x, const0_rtx);
15333   if (code == LTGT || code == GE || code == UNGT)
15334     cond[n++] = gen_rtx_fmt_ee (GT, mode, x, const0_rtx);
15335   if (code == LE || code == GE || code == UNEQ)
15336     cond[n++] = gen_rtx_fmt_ee (EQ, mode, x, const0_rtx);
15337   if (code == UNLT || code == UNGT || code == UNEQ)
15338     cond[n++] = gen_rtx_fmt_ee (UNORDERED, mode, x, const0_rtx);
15339 
15340   gcc_assert (n == 2);
15341 
15342   rtx cc = gen_reg_rtx (CCEQmode);
15343   rtx logical = gen_rtx_IOR (mode, cond[0], cond[1]);
15344   emit_insn (gen_cceq_ior_compare (mode, cc, logical, cond[0], x, cond[1], x));
15345 
15346   return cc;
15347 }
15348 
15349 void
rs6000_emit_sCOND(machine_mode mode,rtx operands[])15350 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
15351 {
15352   rtx condition_rtx = rs6000_generate_compare (operands[1], mode);
15353   rtx_code cond_code = GET_CODE (condition_rtx);
15354 
15355   if (FLOAT_MODE_P (mode) && HONOR_NANS (mode)
15356       && !(FLOAT128_VECTOR_P (mode) && !TARGET_FLOAT128_HW))
15357     ;
15358   else if (cond_code == NE
15359 	   || cond_code == GE || cond_code == LE
15360 	   || cond_code == GEU || cond_code == LEU
15361 	   || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
15362     {
15363       rtx not_result = gen_reg_rtx (CCEQmode);
15364       rtx not_op, rev_cond_rtx;
15365       machine_mode cc_mode;
15366 
15367       cc_mode = GET_MODE (XEXP (condition_rtx, 0));
15368 
15369       rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
15370 				     SImode, XEXP (condition_rtx, 0), const0_rtx);
15371       not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
15372       emit_insn (gen_rtx_SET (not_result, not_op));
15373       condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
15374     }
15375 
15376   machine_mode op_mode = GET_MODE (XEXP (operands[1], 0));
15377   if (op_mode == VOIDmode)
15378     op_mode = GET_MODE (XEXP (operands[1], 1));
15379 
15380   if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
15381     {
15382       PUT_MODE (condition_rtx, DImode);
15383       convert_move (operands[0], condition_rtx, 0);
15384     }
15385   else
15386     {
15387       PUT_MODE (condition_rtx, SImode);
15388       emit_insn (gen_rtx_SET (operands[0], condition_rtx));
15389     }
15390 }
15391 
15392 /* Emit a branch of kind CODE to location LOC.  */
15393 
15394 void
rs6000_emit_cbranch(machine_mode mode,rtx operands[])15395 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
15396 {
15397   rtx condition_rtx = rs6000_generate_compare (operands[0], mode);
15398   rtx loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
15399   rtx ite = gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx, loc_ref, pc_rtx);
15400   emit_jump_insn (gen_rtx_SET (pc_rtx, ite));
15401 }
15402 
15403 /* Return the string to output a conditional branch to LABEL, which is
15404    the operand template of the label, or NULL if the branch is really a
15405    conditional return.
15406 
15407    OP is the conditional expression.  XEXP (OP, 0) is assumed to be a
15408    condition code register and its mode specifies what kind of
15409    comparison we made.
15410 
15411    REVERSED is nonzero if we should reverse the sense of the comparison.
15412 
15413    INSN is the insn.  */
15414 
15415 char *
output_cbranch(rtx op,const char * label,int reversed,rtx_insn * insn)15416 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
15417 {
15418   static char string[64];
15419   enum rtx_code code = GET_CODE (op);
15420   rtx cc_reg = XEXP (op, 0);
15421   machine_mode mode = GET_MODE (cc_reg);
15422   int cc_regno = REGNO (cc_reg) - CR0_REGNO;
15423   int need_longbranch = label != NULL && get_attr_length (insn) == 8;
15424   int really_reversed = reversed ^ need_longbranch;
15425   char *s = string;
15426   const char *ccode;
15427   const char *pred;
15428   rtx note;
15429 
15430   validate_condition_mode (code, mode);
15431 
15432   /* Work out which way this really branches.  We could use
15433      reverse_condition_maybe_unordered here always but this
15434      makes the resulting assembler clearer.  */
15435   if (really_reversed)
15436     {
15437       /* Reversal of FP compares takes care -- an ordered compare
15438 	 becomes an unordered compare and vice versa.  */
15439       if (mode == CCFPmode)
15440 	code = reverse_condition_maybe_unordered (code);
15441       else
15442 	code = reverse_condition (code);
15443     }
15444 
15445   switch (code)
15446     {
15447       /* Not all of these are actually distinct opcodes, but
15448 	 we distinguish them for clarity of the resulting assembler.  */
15449     case NE: case LTGT:
15450       ccode = "ne"; break;
15451     case EQ: case UNEQ:
15452       ccode = "eq"; break;
15453     case GE: case GEU:
15454       ccode = "ge"; break;
15455     case GT: case GTU: case UNGT:
15456       ccode = "gt"; break;
15457     case LE: case LEU:
15458       ccode = "le"; break;
15459     case LT: case LTU: case UNLT:
15460       ccode = "lt"; break;
15461     case UNORDERED: ccode = "un"; break;
15462     case ORDERED: ccode = "nu"; break;
15463     case UNGE: ccode = "nl"; break;
15464     case UNLE: ccode = "ng"; break;
15465     default:
15466       gcc_unreachable ();
15467     }
15468 
15469   /* Maybe we have a guess as to how likely the branch is.  */
15470   pred = "";
15471   note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
15472   if (note != NULL_RTX)
15473     {
15474       /* PROB is the difference from 50%.  */
15475       int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
15476 		   .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2;
15477 
15478       /* Only hint for highly probable/improbable branches on newer cpus when
15479 	 we have real profile data, as static prediction overrides processor
15480 	 dynamic prediction.  For older cpus we may as well always hint, but
15481 	 assume not taken for branches that are very close to 50% as a
15482 	 mispredicted taken branch is more expensive than a
15483 	 mispredicted not-taken branch.  */
15484       if (rs6000_always_hint
15485 	  || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
15486 	      && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
15487 	      && br_prob_note_reliable_p (note)))
15488 	{
15489 	  if (abs (prob) > REG_BR_PROB_BASE / 20
15490 	      && ((prob > 0) ^ need_longbranch))
15491 	    pred = "+";
15492 	  else
15493 	    pred = "-";
15494 	}
15495     }
15496 
15497   if (label == NULL)
15498     s += sprintf (s, "b%slr%s ", ccode, pred);
15499   else
15500     s += sprintf (s, "b%s%s ", ccode, pred);
15501 
15502   /* We need to escape any '%' characters in the reg_names string.
15503      Assume they'd only be the first character....  */
15504   if (reg_names[cc_regno + CR0_REGNO][0] == '%')
15505     *s++ = '%';
15506   s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
15507 
15508   if (label != NULL)
15509     {
15510       /* If the branch distance was too far, we may have to use an
15511 	 unconditional branch to go the distance.  */
15512       if (need_longbranch)
15513 	s += sprintf (s, ",$+8\n\tb %s", label);
15514       else
15515 	s += sprintf (s, ",%s", label);
15516     }
15517 
15518   return string;
15519 }
15520 
15521 /* Return insn for VSX or Altivec comparisons.  */
15522 
15523 static rtx
rs6000_emit_vector_compare_inner(enum rtx_code code,rtx op0,rtx op1)15524 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
15525 {
15526   rtx mask;
15527   machine_mode mode = GET_MODE (op0);
15528 
15529   switch (code)
15530     {
15531     default:
15532       break;
15533 
15534     case GE:
15535       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15536 	return NULL_RTX;
15537       /* FALLTHRU */
15538 
15539     case EQ:
15540     case GT:
15541     case GTU:
15542     case ORDERED:
15543     case UNORDERED:
15544     case UNEQ:
15545     case LTGT:
15546       mask = gen_reg_rtx (mode);
15547       emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
15548       return mask;
15549     }
15550 
15551   return NULL_RTX;
15552 }
15553 
15554 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
15555    DMODE is expected destination mode. This is a recursive function.  */
15556 
15557 static rtx
rs6000_emit_vector_compare(enum rtx_code rcode,rtx op0,rtx op1,machine_mode dmode)15558 rs6000_emit_vector_compare (enum rtx_code rcode,
15559 			    rtx op0, rtx op1,
15560 			    machine_mode dmode)
15561 {
15562   rtx mask;
15563   bool swap_operands = false;
15564   bool try_again = false;
15565 
15566   gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
15567   gcc_assert (GET_MODE (op0) == GET_MODE (op1));
15568 
15569   /* See if the comparison works as is.  */
15570   mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
15571   if (mask)
15572     return mask;
15573 
15574   switch (rcode)
15575     {
15576     case LT:
15577       rcode = GT;
15578       swap_operands = true;
15579       try_again = true;
15580       break;
15581     case LTU:
15582       rcode = GTU;
15583       swap_operands = true;
15584       try_again = true;
15585       break;
15586     case NE:
15587     case UNLE:
15588     case UNLT:
15589     case UNGE:
15590     case UNGT:
15591       /* Invert condition and try again.
15592 	 e.g., A != B becomes ~(A==B).  */
15593       {
15594 	enum rtx_code rev_code;
15595 	enum insn_code nor_code;
15596 	rtx mask2;
15597 
15598 	rev_code = reverse_condition_maybe_unordered (rcode);
15599 	if (rev_code == UNKNOWN)
15600 	  return NULL_RTX;
15601 
15602 	nor_code = optab_handler (one_cmpl_optab, dmode);
15603 	if (nor_code == CODE_FOR_nothing)
15604 	  return NULL_RTX;
15605 
15606 	mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
15607 	if (!mask2)
15608 	  return NULL_RTX;
15609 
15610 	mask = gen_reg_rtx (dmode);
15611 	emit_insn (GEN_FCN (nor_code) (mask, mask2));
15612 	return mask;
15613       }
15614       break;
15615     case GE:
15616     case GEU:
15617     case LE:
15618     case LEU:
15619       /* Try GT/GTU/LT/LTU OR EQ */
15620       {
15621 	rtx c_rtx, eq_rtx;
15622 	enum insn_code ior_code;
15623 	enum rtx_code new_code;
15624 
15625 	switch (rcode)
15626 	  {
15627 	  case  GE:
15628 	    new_code = GT;
15629 	    break;
15630 
15631 	  case GEU:
15632 	    new_code = GTU;
15633 	    break;
15634 
15635 	  case LE:
15636 	    new_code = LT;
15637 	    break;
15638 
15639 	  case LEU:
15640 	    new_code = LTU;
15641 	    break;
15642 
15643 	  default:
15644 	    gcc_unreachable ();
15645 	  }
15646 
15647 	ior_code = optab_handler (ior_optab, dmode);
15648 	if (ior_code == CODE_FOR_nothing)
15649 	  return NULL_RTX;
15650 
15651 	c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
15652 	if (!c_rtx)
15653 	  return NULL_RTX;
15654 
15655 	eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
15656 	if (!eq_rtx)
15657 	  return NULL_RTX;
15658 
15659 	mask = gen_reg_rtx (dmode);
15660 	emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
15661 	return mask;
15662       }
15663       break;
15664     default:
15665       return NULL_RTX;
15666     }
15667 
15668   if (try_again)
15669     {
15670       if (swap_operands)
15671 	std::swap (op0, op1);
15672 
15673       mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
15674       if (mask)
15675 	return mask;
15676     }
15677 
15678   /* You only get two chances.  */
15679   return NULL_RTX;
15680 }
15681 
15682 /* Emit vector conditional expression.  DEST is destination. OP_TRUE and
15683    OP_FALSE are two VEC_COND_EXPR operands.  CC_OP0 and CC_OP1 are the two
15684    operands for the relation operation COND.  */
15685 
15686 int
rs6000_emit_vector_cond_expr(rtx dest,rtx op_true,rtx op_false,rtx cond,rtx cc_op0,rtx cc_op1)15687 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
15688 			      rtx cond, rtx cc_op0, rtx cc_op1)
15689 {
15690   machine_mode dest_mode = GET_MODE (dest);
15691   machine_mode mask_mode = GET_MODE (cc_op0);
15692   enum rtx_code rcode = GET_CODE (cond);
15693   rtx mask;
15694   bool invert_move = false;
15695 
15696   if (VECTOR_UNIT_NONE_P (dest_mode))
15697     return 0;
15698 
15699   gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
15700 	      && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
15701 
15702   switch (rcode)
15703     {
15704       /* Swap operands if we can, and fall back to doing the operation as
15705 	 specified, and doing a NOR to invert the test.  */
15706     case NE:
15707     case UNLE:
15708     case UNLT:
15709     case UNGE:
15710     case UNGT:
15711       /* Invert condition and try again.
15712 	 e.g., A  = (B != C) ? D : E becomes A = (B == C) ? E : D.  */
15713       invert_move = true;
15714       rcode = reverse_condition_maybe_unordered (rcode);
15715       if (rcode == UNKNOWN)
15716 	return 0;
15717       break;
15718 
15719     case GE:
15720     case LE:
15721       if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
15722 	{
15723 	  /* Invert condition to avoid compound test.  */
15724 	  invert_move = true;
15725 	  rcode = reverse_condition (rcode);
15726 	}
15727       break;
15728 
15729     case GTU:
15730     case GEU:
15731     case LTU:
15732     case LEU:
15733 
15734       /* Invert condition to avoid compound test if necessary.  */
15735       if (rcode == GEU || rcode == LEU)
15736 	{
15737 	  invert_move = true;
15738 	  rcode = reverse_condition (rcode);
15739 	}
15740       break;
15741 
15742     default:
15743       break;
15744     }
15745 
15746   /* Get the vector mask for the given relational operations.  */
15747   mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
15748 
15749   if (!mask)
15750     return 0;
15751 
15752   if (mask_mode != dest_mode)
15753     mask = simplify_gen_subreg (dest_mode, mask, mask_mode, 0);
15754 
15755   if (invert_move)
15756     std::swap (op_true, op_false);
15757 
15758   /* Optimize vec1 == vec2, to know the mask generates -1/0.  */
15759   if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
15760       && (GET_CODE (op_true) == CONST_VECTOR
15761 	  || GET_CODE (op_false) == CONST_VECTOR))
15762     {
15763       rtx constant_0 = CONST0_RTX (dest_mode);
15764       rtx constant_m1 = CONSTM1_RTX (dest_mode);
15765 
15766       if (op_true == constant_m1 && op_false == constant_0)
15767 	{
15768 	  emit_move_insn (dest, mask);
15769 	  return 1;
15770 	}
15771 
15772       else if (op_true == constant_0 && op_false == constant_m1)
15773 	{
15774 	  emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
15775 	  return 1;
15776 	}
15777 
15778       /* If we can't use the vector comparison directly, perhaps we can use
15779 	 the mask for the true or false fields, instead of loading up a
15780 	 constant.  */
15781       if (op_true == constant_m1)
15782 	op_true = mask;
15783 
15784       if (op_false == constant_0)
15785 	op_false = mask;
15786     }
15787 
15788   if (!REG_P (op_true) && !SUBREG_P (op_true))
15789     op_true = force_reg (dest_mode, op_true);
15790 
15791   if (!REG_P (op_false) && !SUBREG_P (op_false))
15792     op_false = force_reg (dest_mode, op_false);
15793 
15794   rtx tmp = gen_rtx_IOR (dest_mode,
15795 			 gen_rtx_AND (dest_mode, gen_rtx_NOT (dest_mode, mask),
15796 				      op_false),
15797 			 gen_rtx_AND (dest_mode, mask, op_true));
15798   emit_insn (gen_rtx_SET (dest, tmp));
15799   return 1;
15800 }
15801 
15802 /* Possibly emit the xsmaxc{dp,qp} and xsminc{dp,qp} instructions to emit a
15803    maximum or minimum with "C" semantics.
15804 
15805    Unless you use -ffast-math, you can't use these instructions to replace
15806    conditions that implicitly reverse the condition because the comparison
15807    might generate a NaN or signed zer0.
15808 
15809    I.e. the following can be replaced all of the time
15810 	ret = (op1 >  op2) ? op1 : op2	; generate xsmaxcdp
15811 	ret = (op1 >= op2) ? op1 : op2	; generate xsmaxcdp
15812 	ret = (op1 <  op2) ? op1 : op2;	; generate xsmincdp
15813 	ret = (op1 <= op2) ? op1 : op2;	; generate xsmincdp
15814 
15815    The following can be replaced only if -ffast-math is used:
15816 	ret = (op1 <  op2) ? op2 : op1	; generate xsmaxcdp
15817 	ret = (op1 <= op2) ? op2 : op1	; generate xsmaxcdp
15818 	ret = (op1 >  op2) ? op2 : op1;	; generate xsmincdp
15819 	ret = (op1 >= op2) ? op2 : op1;	; generate xsmincdp
15820 
15821    Move TRUE_COND to DEST if OP of the operands of the last comparison is
15822    nonzero/true, FALSE_COND if it is zero/false.
15823 
15824    Return false if we can't generate the appropriate minimum or maximum, and
15825    true if we can did the minimum or maximum.  */
15826 
15827 static bool
rs6000_maybe_emit_maxc_minc(rtx dest,rtx op,rtx true_cond,rtx false_cond)15828 rs6000_maybe_emit_maxc_minc (rtx dest, rtx op, rtx true_cond, rtx false_cond)
15829 {
15830   enum rtx_code code = GET_CODE (op);
15831   rtx op0 = XEXP (op, 0);
15832   rtx op1 = XEXP (op, 1);
15833   machine_mode compare_mode = GET_MODE (op0);
15834   machine_mode result_mode = GET_MODE (dest);
15835 
15836   if (result_mode != compare_mode)
15837     return false;
15838 
15839   /* See the comments of this function, it simply expects GE/GT/LE/LT in
15840      the checks, but for the reversible equivalent UNLT/UNLE/UNGT/UNGE,
15841      we need to do the reversions first to make the following checks
15842      support fewer cases, like:
15843 
15844 	(a UNLT b) ? op1 : op2 =>  (a >= b) ? op2 : op1;
15845 	(a UNLE b) ? op1 : op2 =>  (a >  b) ? op2 : op1;
15846 	(a UNGT b) ? op1 : op2 =>  (a <= b) ? op2 : op1;
15847 	(a UNGE b) ? op1 : op2 =>  (a <  b) ? op2 : op1;
15848 
15849      By the way, if we see these UNLT/UNLE/UNGT/UNGE it's guaranteed
15850      that we have 4-way condition codes (LT/GT/EQ/UN), so we do not
15851      have to check for fast-math or the like.  */
15852   if (code == UNGE || code == UNGT || code == UNLE || code == UNLT)
15853     {
15854       code = reverse_condition_maybe_unordered (code);
15855       std::swap (true_cond, false_cond);
15856     }
15857 
15858   bool max_p;
15859   if (code == GE || code == GT)
15860     max_p = true;
15861   else if (code == LE || code == LT)
15862     max_p = false;
15863   else
15864     return false;
15865 
15866   if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
15867     ;
15868 
15869   /* Only when NaNs and signed-zeros are not in effect, smax could be
15870      used for `op0 < op1 ? op1 : op0`, and smin could be used for
15871      `op0 > op1 ? op1 : op0`.  */
15872   else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond)
15873 	   && !HONOR_NANS (compare_mode) && !HONOR_SIGNED_ZEROS (compare_mode))
15874     max_p = !max_p;
15875 
15876   else
15877     return false;
15878 
15879   rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
15880   return true;
15881 }
15882 
15883 /* Possibly emit a floating point conditional move by generating a compare that
15884    sets a mask instruction and a XXSEL select instruction.
15885 
15886    Move TRUE_COND to DEST if OP of the operands of the last comparison is
15887    nonzero/true, FALSE_COND if it is zero/false.
15888 
15889    Return false if the operation cannot be generated, and true if we could
15890    generate the instruction.  */
15891 
15892 static bool
rs6000_maybe_emit_fp_cmove(rtx dest,rtx op,rtx true_cond,rtx false_cond)15893 rs6000_maybe_emit_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
15894 {
15895   enum rtx_code code = GET_CODE (op);
15896   rtx op0 = XEXP (op, 0);
15897   rtx op1 = XEXP (op, 1);
15898   machine_mode compare_mode = GET_MODE (op0);
15899   machine_mode result_mode = GET_MODE (dest);
15900   rtx compare_rtx;
15901   rtx cmove_rtx;
15902   rtx clobber_rtx;
15903 
15904   if (!can_create_pseudo_p ())
15905     return 0;
15906 
15907   /* We allow the comparison to be either SFmode/DFmode and the true/false
15908      condition to be either SFmode/DFmode.  I.e. we allow:
15909 
15910 	float a, b;
15911 	double c, d, r;
15912 
15913 	r = (a == b) ? c : d;
15914 
15915     and:
15916 
15917 	double a, b;
15918 	float c, d, r;
15919 
15920 	r = (a == b) ? c : d;
15921 
15922     but we don't allow intermixing the IEEE 128-bit floating point types with
15923     the 32/64-bit scalar types.  */
15924 
15925   if (!(compare_mode == result_mode
15926 	|| (compare_mode == SFmode && result_mode == DFmode)
15927 	|| (compare_mode == DFmode && result_mode == SFmode)))
15928     return false;
15929 
15930   switch (code)
15931     {
15932     case EQ:
15933     case GE:
15934     case GT:
15935       break;
15936 
15937     case NE:
15938     case LT:
15939     case LE:
15940       code = swap_condition (code);
15941       std::swap (op0, op1);
15942       break;
15943 
15944     default:
15945       return false;
15946     }
15947 
15948   /* Generate:	[(parallel [(set (dest)
15949 				 (if_then_else (op (cmp1) (cmp2))
15950 					       (true)
15951 					       (false)))
15952 			    (clobber (scratch))])].  */
15953 
15954   compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
15955   cmove_rtx = gen_rtx_SET (dest,
15956 			   gen_rtx_IF_THEN_ELSE (result_mode,
15957 						 compare_rtx,
15958 						 true_cond,
15959 						 false_cond));
15960 
15961   clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
15962   emit_insn (gen_rtx_PARALLEL (VOIDmode,
15963 			       gen_rtvec (2, cmove_rtx, clobber_rtx)));
15964 
15965   return true;
15966 }
15967 
15968 /* Helper function to return true if the target has instructions to do a
15969    compare and set mask instruction that can be used with XXSEL to implement a
15970    conditional move.  It is also assumed that such a target also supports the
15971    "C" minimum and maximum instructions. */
15972 
15973 static bool
have_compare_and_set_mask(machine_mode mode)15974 have_compare_and_set_mask (machine_mode mode)
15975 {
15976   switch (mode)
15977     {
15978     case E_SFmode:
15979     case E_DFmode:
15980       return TARGET_P9_MINMAX;
15981 
15982     case E_KFmode:
15983     case E_TFmode:
15984       return TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode);
15985 
15986     default:
15987       break;
15988     }
15989 
15990   return false;
15991 }
15992 
15993 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
15994    operands of the last comparison is nonzero/true, FALSE_COND if it
15995    is zero/false.  Return 0 if the hardware has no such operation.  */
15996 
15997 bool
rs6000_emit_cmove(rtx dest,rtx op,rtx true_cond,rtx false_cond)15998 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
15999 {
16000   enum rtx_code code = GET_CODE (op);
16001   rtx op0 = XEXP (op, 0);
16002   rtx op1 = XEXP (op, 1);
16003   machine_mode compare_mode = GET_MODE (op0);
16004   machine_mode result_mode = GET_MODE (dest);
16005   rtx temp;
16006   bool is_against_zero;
16007 
16008   /* These modes should always match.  */
16009   if (GET_MODE (op1) != compare_mode
16010       /* In the isel case however, we can use a compare immediate, so
16011 	 op1 may be a small constant.  */
16012       && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
16013     return false;
16014   if (GET_MODE (true_cond) != result_mode)
16015     return false;
16016   if (GET_MODE (false_cond) != result_mode)
16017     return false;
16018 
16019   /* See if we can use the "C" minimum, "C" maximum, and compare and set mask
16020      instructions.  */
16021   if (have_compare_and_set_mask (compare_mode)
16022       && have_compare_and_set_mask (result_mode))
16023     {
16024       if (rs6000_maybe_emit_maxc_minc (dest, op, true_cond, false_cond))
16025 	return true;
16026 
16027       if (rs6000_maybe_emit_fp_cmove (dest, op, true_cond, false_cond))
16028 	return true;
16029     }
16030 
16031   /* Don't allow using floating point comparisons for integer results for
16032      now.  */
16033   if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
16034     return false;
16035 
16036   /* First, work out if the hardware can do this at all, or
16037      if it's too slow....  */
16038   if (!FLOAT_MODE_P (compare_mode))
16039     {
16040       if (TARGET_ISEL)
16041 	return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
16042       return false;
16043     }
16044 
16045   is_against_zero = op1 == CONST0_RTX (compare_mode);
16046 
16047   /* A floating-point subtract might overflow, underflow, or produce
16048      an inexact result, thus changing the floating-point flags, so it
16049      can't be generated if we care about that.  It's safe if one side
16050      of the construct is zero, since then no subtract will be
16051      generated.  */
16052   if (SCALAR_FLOAT_MODE_P (compare_mode)
16053       && flag_trapping_math && ! is_against_zero)
16054     return false;
16055 
16056   /* Eliminate half of the comparisons by switching operands, this
16057      makes the remaining code simpler.  */
16058   if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
16059       || code == LTGT || code == LT || code == UNLE)
16060     {
16061       code = reverse_condition_maybe_unordered (code);
16062       temp = true_cond;
16063       true_cond = false_cond;
16064       false_cond = temp;
16065     }
16066 
16067   /* UNEQ and LTGT take four instructions for a comparison with zero,
16068      it'll probably be faster to use a branch here too.  */
16069   if (code == UNEQ && HONOR_NANS (compare_mode))
16070     return false;
16071 
16072   /* We're going to try to implement comparisons by performing
16073      a subtract, then comparing against zero.  Unfortunately,
16074      Inf - Inf is NaN which is not zero, and so if we don't
16075      know that the operand is finite and the comparison
16076      would treat EQ different to UNORDERED, we can't do it.  */
16077   if (HONOR_INFINITIES (compare_mode)
16078       && code != GT && code != UNGE
16079       && (!CONST_DOUBLE_P (op1)
16080 	  || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
16081       /* Constructs of the form (a OP b ? a : b) are safe.  */
16082       && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
16083 	  || (! rtx_equal_p (op0, true_cond)
16084 	      && ! rtx_equal_p (op1, true_cond))))
16085     return false;
16086 
16087   /* At this point we know we can use fsel.  */
16088 
16089   /* Don't allow compare_mode other than SFmode or DFmode, for others there
16090      is no fsel instruction.  */
16091   if (compare_mode != SFmode && compare_mode != DFmode)
16092     return false;
16093 
16094   /* Reduce the comparison to a comparison against zero.  */
16095   if (! is_against_zero)
16096     {
16097       temp = gen_reg_rtx (compare_mode);
16098       emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
16099       op0 = temp;
16100       op1 = CONST0_RTX (compare_mode);
16101     }
16102 
16103   /* If we don't care about NaNs we can reduce some of the comparisons
16104      down to faster ones.  */
16105   if (! HONOR_NANS (compare_mode))
16106     switch (code)
16107       {
16108       case GT:
16109 	code = LE;
16110 	temp = true_cond;
16111 	true_cond = false_cond;
16112 	false_cond = temp;
16113 	break;
16114       case UNGE:
16115 	code = GE;
16116 	break;
16117       case UNEQ:
16118 	code = EQ;
16119 	break;
16120       default:
16121 	break;
16122       }
16123 
16124   /* Now, reduce everything down to a GE.  */
16125   switch (code)
16126     {
16127     case GE:
16128       break;
16129 
16130     case LE:
16131       temp = gen_reg_rtx (compare_mode);
16132       emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
16133       op0 = temp;
16134       break;
16135 
16136     case ORDERED:
16137       temp = gen_reg_rtx (compare_mode);
16138       emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
16139       op0 = temp;
16140       break;
16141 
16142     case EQ:
16143       temp = gen_reg_rtx (compare_mode);
16144       emit_insn (gen_rtx_SET (temp,
16145 			      gen_rtx_NEG (compare_mode,
16146 					   gen_rtx_ABS (compare_mode, op0))));
16147       op0 = temp;
16148       break;
16149 
16150     case UNGE:
16151       /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
16152       temp = gen_reg_rtx (result_mode);
16153       emit_insn (gen_rtx_SET (temp,
16154 			      gen_rtx_IF_THEN_ELSE (result_mode,
16155 						    gen_rtx_GE (VOIDmode,
16156 								op0, op1),
16157 						    true_cond, false_cond)));
16158       false_cond = true_cond;
16159       true_cond = temp;
16160 
16161       temp = gen_reg_rtx (compare_mode);
16162       emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
16163       op0 = temp;
16164       break;
16165 
16166     case GT:
16167       /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
16168       temp = gen_reg_rtx (result_mode);
16169       emit_insn (gen_rtx_SET (temp,
16170 			      gen_rtx_IF_THEN_ELSE (result_mode,
16171 						    gen_rtx_GE (VOIDmode,
16172 								op0, op1),
16173 						    true_cond, false_cond)));
16174       true_cond = false_cond;
16175       false_cond = temp;
16176 
16177       temp = gen_reg_rtx (compare_mode);
16178       emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
16179       op0 = temp;
16180       break;
16181 
16182     default:
16183       gcc_unreachable ();
16184     }
16185 
16186   emit_insn (gen_rtx_SET (dest,
16187 			  gen_rtx_IF_THEN_ELSE (result_mode,
16188 						gen_rtx_GE (VOIDmode,
16189 							    op0, op1),
16190 						true_cond, false_cond)));
16191   return true;
16192 }
16193 
16194 /* Same as above, but for ints (isel).  */
16195 
16196 bool
rs6000_emit_int_cmove(rtx dest,rtx op,rtx true_cond,rtx false_cond)16197 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
16198 {
16199   rtx condition_rtx, cr;
16200   machine_mode mode = GET_MODE (dest);
16201   enum rtx_code cond_code;
16202   rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
16203   bool signedp;
16204 
16205   if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
16206     return false;
16207 
16208   /* PR104335: We now need to expect CC-mode "comparisons"
16209      coming from ifcvt.  The following code expects proper
16210      comparisons so better abort here.  */
16211   if (GET_MODE_CLASS (GET_MODE (XEXP (op, 0))) == MODE_CC)
16212     return false;
16213 
16214   /* We still have to do the compare, because isel doesn't do a
16215      compare, it just looks at the CRx bits set by a previous compare
16216      instruction.  */
16217   condition_rtx = rs6000_generate_compare (op, mode);
16218   cond_code = GET_CODE (condition_rtx);
16219   cr = XEXP (condition_rtx, 0);
16220   signedp = GET_MODE (cr) == CCmode;
16221 
16222   isel_func = (mode == SImode
16223 	       ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
16224 	       : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
16225 
16226   switch (cond_code)
16227     {
16228     case LT: case GT: case LTU: case GTU: case EQ:
16229       /* isel handles these directly.  */
16230       break;
16231 
16232     default:
16233       /* We need to swap the sense of the comparison.  */
16234       {
16235 	std::swap (false_cond, true_cond);
16236 	PUT_CODE (condition_rtx, reverse_condition (cond_code));
16237       }
16238       break;
16239     }
16240 
16241   false_cond = force_reg (mode, false_cond);
16242   if (true_cond != const0_rtx)
16243     true_cond = force_reg (mode, true_cond);
16244 
16245   emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
16246 
16247   return true;
16248 }
16249 
16250 void
rs6000_emit_minmax(rtx dest,enum rtx_code code,rtx op0,rtx op1)16251 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
16252 {
16253   machine_mode mode = GET_MODE (op0);
16254   enum rtx_code c;
16255   rtx target;
16256 
16257   /* VSX/altivec have direct min/max insns.  */
16258   if ((code == SMAX || code == SMIN)
16259       && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
16260 	  || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))
16261 	  || (TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode))))
16262     {
16263       emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
16264       return;
16265     }
16266 
16267   if (code == SMAX || code == SMIN)
16268     c = GE;
16269   else
16270     c = GEU;
16271 
16272   if (code == SMAX || code == UMAX)
16273     target = emit_conditional_move (dest, { c, op0, op1, mode },
16274 				    op0, op1, mode, 0);
16275   else
16276     target = emit_conditional_move (dest, { c, op0, op1, mode },
16277 				    op1, op0, mode, 0);
16278   gcc_assert (target);
16279   if (target != dest)
16280     emit_move_insn (dest, target);
16281 }
16282 
16283 /* A subroutine of the atomic operation splitters.  Jump to LABEL if
16284    COND is true.  Mark the jump as unlikely to be taken.  */
16285 
16286 static void
emit_unlikely_jump(rtx cond,rtx label)16287 emit_unlikely_jump (rtx cond, rtx label)
16288 {
16289   rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
16290   rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
16291   add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
16292 }
16293 
16294 /* A subroutine of the atomic operation splitters.  Emit a load-locked
16295    instruction in MODE.  For QI/HImode, possibly use a pattern than includes
16296    the zero_extend operation.  */
16297 
16298 static void
emit_load_locked(machine_mode mode,rtx reg,rtx mem)16299 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
16300 {
16301   rtx (*fn) (rtx, rtx) = NULL;
16302 
16303   switch (mode)
16304     {
16305     case E_QImode:
16306       fn = gen_load_lockedqi;
16307       break;
16308     case E_HImode:
16309       fn = gen_load_lockedhi;
16310       break;
16311     case E_SImode:
16312       if (GET_MODE (mem) == QImode)
16313 	fn = gen_load_lockedqi_si;
16314       else if (GET_MODE (mem) == HImode)
16315 	fn = gen_load_lockedhi_si;
16316       else
16317 	fn = gen_load_lockedsi;
16318       break;
16319     case E_DImode:
16320       fn = gen_load_lockeddi;
16321       break;
16322     case E_TImode:
16323       fn = gen_load_lockedti;
16324       break;
16325     default:
16326       gcc_unreachable ();
16327     }
16328   emit_insn (fn (reg, mem));
16329 }
16330 
16331 /* A subroutine of the atomic operation splitters.  Emit a store-conditional
16332    instruction in MODE.  */
16333 
16334 static void
emit_store_conditional(machine_mode mode,rtx res,rtx mem,rtx val)16335 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
16336 {
16337   rtx (*fn) (rtx, rtx, rtx) = NULL;
16338 
16339   switch (mode)
16340     {
16341     case E_QImode:
16342       fn = gen_store_conditionalqi;
16343       break;
16344     case E_HImode:
16345       fn = gen_store_conditionalhi;
16346       break;
16347     case E_SImode:
16348       fn = gen_store_conditionalsi;
16349       break;
16350     case E_DImode:
16351       fn = gen_store_conditionaldi;
16352       break;
16353     case E_TImode:
16354       fn = gen_store_conditionalti;
16355       break;
16356     default:
16357       gcc_unreachable ();
16358     }
16359 
16360   /* Emit sync before stwcx. to address PPC405 Erratum.  */
16361   if (PPC405_ERRATUM77)
16362     emit_insn (gen_hwsync ());
16363 
16364   emit_insn (fn (res, mem, val));
16365 }
16366 
16367 /* Expand barriers before and after a load_locked/store_cond sequence.  */
16368 
16369 static rtx
rs6000_pre_atomic_barrier(rtx mem,enum memmodel model)16370 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
16371 {
16372   rtx addr = XEXP (mem, 0);
16373 
16374   if (!legitimate_indirect_address_p (addr, reload_completed)
16375       && !legitimate_indexed_address_p (addr, reload_completed))
16376     {
16377       addr = force_reg (Pmode, addr);
16378       mem = replace_equiv_address_nv (mem, addr);
16379     }
16380 
16381   switch (model)
16382     {
16383     case MEMMODEL_RELAXED:
16384     case MEMMODEL_CONSUME:
16385     case MEMMODEL_ACQUIRE:
16386       break;
16387     case MEMMODEL_RELEASE:
16388     case MEMMODEL_ACQ_REL:
16389       emit_insn (gen_lwsync ());
16390       break;
16391     case MEMMODEL_SEQ_CST:
16392       emit_insn (gen_hwsync ());
16393       break;
16394     default:
16395       gcc_unreachable ();
16396     }
16397   return mem;
16398 }
16399 
16400 static void
rs6000_post_atomic_barrier(enum memmodel model)16401 rs6000_post_atomic_barrier (enum memmodel model)
16402 {
16403   switch (model)
16404     {
16405     case MEMMODEL_RELAXED:
16406     case MEMMODEL_CONSUME:
16407     case MEMMODEL_RELEASE:
16408       break;
16409     case MEMMODEL_ACQUIRE:
16410     case MEMMODEL_ACQ_REL:
16411     case MEMMODEL_SEQ_CST:
16412       emit_insn (gen_isync ());
16413       break;
16414     default:
16415       gcc_unreachable ();
16416     }
16417 }
16418 
16419 /* A subroutine of the various atomic expanders.  For sub-word operations,
16420    we must adjust things to operate on SImode.  Given the original MEM,
16421    return a new aligned memory.  Also build and return the quantities by
16422    which to shift and mask.  */
16423 
16424 static rtx
rs6000_adjust_atomic_subword(rtx orig_mem,rtx * pshift,rtx * pmask)16425 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
16426 {
16427   rtx addr, align, shift, mask, mem;
16428   HOST_WIDE_INT shift_mask;
16429   machine_mode mode = GET_MODE (orig_mem);
16430 
16431   /* For smaller modes, we have to implement this via SImode.  */
16432   shift_mask = (mode == QImode ? 0x18 : 0x10);
16433 
16434   addr = XEXP (orig_mem, 0);
16435   addr = force_reg (GET_MODE (addr), addr);
16436 
16437   /* Aligned memory containing subword.  Generate a new memory.  We
16438      do not want any of the existing MEM_ATTR data, as we're now
16439      accessing memory outside the original object.  */
16440   align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
16441 			       NULL_RTX, 1, OPTAB_LIB_WIDEN);
16442   mem = gen_rtx_MEM (SImode, align);
16443   MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
16444   if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
16445     set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
16446 
16447   /* Shift amount for subword relative to aligned word.  */
16448   shift = gen_reg_rtx (SImode);
16449   addr = gen_lowpart (SImode, addr);
16450   rtx tmp = gen_reg_rtx (SImode);
16451   emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
16452   emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
16453   if (BYTES_BIG_ENDIAN)
16454     shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
16455 			         shift, 1, OPTAB_LIB_WIDEN);
16456   *pshift = shift;
16457 
16458   /* Mask for insertion.  */
16459   mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
16460 			      shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
16461   *pmask = mask;
16462 
16463   return mem;
16464 }
16465 
16466 /* A subroutine of the various atomic expanders.  For sub-word operands,
16467    combine OLDVAL and NEWVAL via MASK.  Returns a new pseduo.  */
16468 
16469 static rtx
rs6000_mask_atomic_subword(rtx oldval,rtx newval,rtx mask)16470 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
16471 {
16472   rtx x;
16473 
16474   x = gen_reg_rtx (SImode);
16475   emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
16476 					  gen_rtx_NOT (SImode, mask),
16477 					  oldval)));
16478 
16479   x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
16480 
16481   return x;
16482 }
16483 
16484 /* A subroutine of the various atomic expanders.  For sub-word operands,
16485    extract WIDE to NARROW via SHIFT.  */
16486 
16487 static void
rs6000_finish_atomic_subword(rtx narrow,rtx wide,rtx shift)16488 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
16489 {
16490   wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
16491 			      wide, 1, OPTAB_LIB_WIDEN);
16492   emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
16493 }
16494 
16495 /* Expand an atomic compare and swap operation.  */
16496 
16497 void
rs6000_expand_atomic_compare_and_swap(rtx operands[])16498 rs6000_expand_atomic_compare_and_swap (rtx operands[])
16499 {
16500   rtx boolval, retval, mem, oldval, newval, cond;
16501   rtx label1, label2, x, mask, shift;
16502   machine_mode mode, orig_mode;
16503   enum memmodel mod_s, mod_f;
16504   bool is_weak;
16505 
16506   boolval = operands[0];
16507   retval = operands[1];
16508   mem = operands[2];
16509   oldval = operands[3];
16510   newval = operands[4];
16511   is_weak = (INTVAL (operands[5]) != 0);
16512   mod_s = memmodel_base (INTVAL (operands[6]));
16513   mod_f = memmodel_base (INTVAL (operands[7]));
16514   orig_mode = mode = GET_MODE (mem);
16515 
16516   mask = shift = NULL_RTX;
16517   if (mode == QImode || mode == HImode)
16518     {
16519       /* Before power8, we didn't have access to lbarx/lharx, so generate a
16520 	 lwarx and shift/mask operations.  With power8, we need to do the
16521 	 comparison in SImode, but the store is still done in QI/HImode.  */
16522       oldval = convert_modes (SImode, mode, oldval, 1);
16523 
16524       if (!TARGET_SYNC_HI_QI)
16525 	{
16526 	  mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
16527 
16528 	  /* Shift and mask OLDVAL into position with the word.  */
16529 	  oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
16530 					NULL_RTX, 1, OPTAB_LIB_WIDEN);
16531 
16532 	  /* Shift and mask NEWVAL into position within the word.  */
16533 	  newval = convert_modes (SImode, mode, newval, 1);
16534 	  newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
16535 					NULL_RTX, 1, OPTAB_LIB_WIDEN);
16536 	}
16537 
16538       /* Prepare to adjust the return value.  */
16539       retval = gen_reg_rtx (SImode);
16540       mode = SImode;
16541     }
16542   else if (reg_overlap_mentioned_p (retval, oldval))
16543     oldval = copy_to_reg (oldval);
16544 
16545   if (mode != TImode && !reg_or_short_operand (oldval, mode))
16546     oldval = copy_to_mode_reg (mode, oldval);
16547 
16548   if (reg_overlap_mentioned_p (retval, newval))
16549     newval = copy_to_reg (newval);
16550 
16551   mem = rs6000_pre_atomic_barrier (mem, mod_s);
16552 
16553   label1 = NULL_RTX;
16554   if (!is_weak)
16555     {
16556       label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
16557       emit_label (XEXP (label1, 0));
16558     }
16559   label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
16560 
16561   emit_load_locked (mode, retval, mem);
16562 
16563   x = retval;
16564   if (mask)
16565     x = expand_simple_binop (SImode, AND, retval, mask,
16566 			     NULL_RTX, 1, OPTAB_LIB_WIDEN);
16567 
16568   cond = gen_reg_rtx (CCmode);
16569   /* If we have TImode, synthesize a comparison.  */
16570   if (mode != TImode)
16571     x = gen_rtx_COMPARE (CCmode, x, oldval);
16572   else
16573     {
16574       rtx xor1_result = gen_reg_rtx (DImode);
16575       rtx xor2_result = gen_reg_rtx (DImode);
16576       rtx or_result = gen_reg_rtx (DImode);
16577       rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
16578       rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
16579       rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
16580       rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
16581 
16582       emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
16583       emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
16584       emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
16585       x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
16586     }
16587 
16588   emit_insn (gen_rtx_SET (cond, x));
16589 
16590   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16591   emit_unlikely_jump (x, label2);
16592 
16593   x = newval;
16594   if (mask)
16595     x = rs6000_mask_atomic_subword (retval, newval, mask);
16596 
16597   emit_store_conditional (orig_mode, cond, mem, x);
16598 
16599   if (!is_weak)
16600     {
16601       x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16602       emit_unlikely_jump (x, label1);
16603     }
16604 
16605   if (!is_mm_relaxed (mod_f))
16606     emit_label (XEXP (label2, 0));
16607 
16608   rs6000_post_atomic_barrier (mod_s);
16609 
16610   if (is_mm_relaxed (mod_f))
16611     emit_label (XEXP (label2, 0));
16612 
16613   if (shift)
16614     rs6000_finish_atomic_subword (operands[1], retval, shift);
16615   else if (mode != GET_MODE (operands[1]))
16616     convert_move (operands[1], retval, 1);
16617 
16618   /* In all cases, CR0 contains EQ on success, and NE on failure.  */
16619   x = gen_rtx_EQ (SImode, cond, const0_rtx);
16620   emit_insn (gen_rtx_SET (boolval, x));
16621 }
16622 
16623 /* Expand an atomic exchange operation.  */
16624 
16625 void
rs6000_expand_atomic_exchange(rtx operands[])16626 rs6000_expand_atomic_exchange (rtx operands[])
16627 {
16628   rtx retval, mem, val, cond;
16629   machine_mode mode;
16630   enum memmodel model;
16631   rtx label, x, mask, shift;
16632 
16633   retval = operands[0];
16634   mem = operands[1];
16635   val = operands[2];
16636   model = memmodel_base (INTVAL (operands[3]));
16637   mode = GET_MODE (mem);
16638 
16639   mask = shift = NULL_RTX;
16640   if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
16641     {
16642       mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
16643 
16644       /* Shift and mask VAL into position with the word.  */
16645       val = convert_modes (SImode, mode, val, 1);
16646       val = expand_simple_binop (SImode, ASHIFT, val, shift,
16647 				 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16648 
16649       /* Prepare to adjust the return value.  */
16650       retval = gen_reg_rtx (SImode);
16651       mode = SImode;
16652     }
16653 
16654   mem = rs6000_pre_atomic_barrier (mem, model);
16655 
16656   label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
16657   emit_label (XEXP (label, 0));
16658 
16659   emit_load_locked (mode, retval, mem);
16660 
16661   x = val;
16662   if (mask)
16663     x = rs6000_mask_atomic_subword (retval, val, mask);
16664 
16665   cond = gen_reg_rtx (CCmode);
16666   emit_store_conditional (mode, cond, mem, x);
16667 
16668   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16669   emit_unlikely_jump (x, label);
16670 
16671   rs6000_post_atomic_barrier (model);
16672 
16673   if (shift)
16674     rs6000_finish_atomic_subword (operands[0], retval, shift);
16675 }
16676 
16677 /* Expand an atomic fetch-and-operate pattern.  CODE is the binary operation
16678    to perform.  MEM is the memory on which to operate.  VAL is the second
16679    operand of the binary operator.  BEFORE and AFTER are optional locations to
16680    return the value of MEM either before of after the operation.  MODEL_RTX
16681    is a CONST_INT containing the memory model to use.  */
16682 
16683 void
rs6000_expand_atomic_op(enum rtx_code code,rtx mem,rtx val,rtx orig_before,rtx orig_after,rtx model_rtx)16684 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
16685 			 rtx orig_before, rtx orig_after, rtx model_rtx)
16686 {
16687   enum memmodel model = memmodel_base (INTVAL (model_rtx));
16688   machine_mode mode = GET_MODE (mem);
16689   machine_mode store_mode = mode;
16690   rtx label, x, cond, mask, shift;
16691   rtx before = orig_before, after = orig_after;
16692 
16693   mask = shift = NULL_RTX;
16694   /* On power8, we want to use SImode for the operation.  On previous systems,
16695      use the operation in a subword and shift/mask to get the proper byte or
16696      halfword.  */
16697   if (mode == QImode || mode == HImode)
16698     {
16699       if (TARGET_SYNC_HI_QI)
16700 	{
16701 	  val = convert_modes (SImode, mode, val, 1);
16702 
16703 	  /* Prepare to adjust the return value.  */
16704 	  before = gen_reg_rtx (SImode);
16705 	  if (after)
16706 	    after = gen_reg_rtx (SImode);
16707 	  mode = SImode;
16708 	}
16709       else
16710 	{
16711 	  mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
16712 
16713 	  /* Shift and mask VAL into position with the word.  */
16714 	  val = convert_modes (SImode, mode, val, 1);
16715 	  val = expand_simple_binop (SImode, ASHIFT, val, shift,
16716 				     NULL_RTX, 1, OPTAB_LIB_WIDEN);
16717 
16718 	  switch (code)
16719 	    {
16720 	    case IOR:
16721 	    case XOR:
16722 	      /* We've already zero-extended VAL.  That is sufficient to
16723 		 make certain that it does not affect other bits.  */
16724 	      mask = NULL;
16725 	      break;
16726 
16727 	    case AND:
16728 	      /* If we make certain that all of the other bits in VAL are
16729 		 set, that will be sufficient to not affect other bits.  */
16730 	      x = gen_rtx_NOT (SImode, mask);
16731 	      x = gen_rtx_IOR (SImode, x, val);
16732 	      emit_insn (gen_rtx_SET (val, x));
16733 	      mask = NULL;
16734 	      break;
16735 
16736 	    case NOT:
16737 	    case PLUS:
16738 	    case MINUS:
16739 	      /* These will all affect bits outside the field and need
16740 		 adjustment via MASK within the loop.  */
16741 	      break;
16742 
16743 	    default:
16744 	      gcc_unreachable ();
16745 	    }
16746 
16747 	  /* Prepare to adjust the return value.  */
16748 	  before = gen_reg_rtx (SImode);
16749 	  if (after)
16750 	    after = gen_reg_rtx (SImode);
16751 	  store_mode = mode = SImode;
16752 	}
16753     }
16754 
16755   mem = rs6000_pre_atomic_barrier (mem, model);
16756 
16757   label = gen_label_rtx ();
16758   emit_label (label);
16759   label = gen_rtx_LABEL_REF (VOIDmode, label);
16760 
16761   if (before == NULL_RTX)
16762     before = gen_reg_rtx (mode);
16763 
16764   emit_load_locked (mode, before, mem);
16765 
16766   if (code == NOT)
16767     {
16768       x = expand_simple_binop (mode, AND, before, val,
16769 			       NULL_RTX, 1, OPTAB_LIB_WIDEN);
16770       after = expand_simple_unop (mode, NOT, x, after, 1);
16771     }
16772   else
16773     {
16774       after = expand_simple_binop (mode, code, before, val,
16775 				   after, 1, OPTAB_LIB_WIDEN);
16776     }
16777 
16778   x = after;
16779   if (mask)
16780     {
16781       x = expand_simple_binop (SImode, AND, after, mask,
16782 			       NULL_RTX, 1, OPTAB_LIB_WIDEN);
16783       x = rs6000_mask_atomic_subword (before, x, mask);
16784     }
16785   else if (store_mode != mode)
16786     x = convert_modes (store_mode, mode, x, 1);
16787 
16788   cond = gen_reg_rtx (CCmode);
16789   emit_store_conditional (store_mode, cond, mem, x);
16790 
16791   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16792   emit_unlikely_jump (x, label);
16793 
16794   rs6000_post_atomic_barrier (model);
16795 
16796   if (shift)
16797     {
16798       /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
16799 	 then do the calcuations in a SImode register.  */
16800       if (orig_before)
16801 	rs6000_finish_atomic_subword (orig_before, before, shift);
16802       if (orig_after)
16803 	rs6000_finish_atomic_subword (orig_after, after, shift);
16804     }
16805   else if (store_mode != mode)
16806     {
16807       /* QImode/HImode on machines with lbarx/lharx where we do the native
16808 	 operation and then do the calcuations in a SImode register.  */
16809       if (orig_before)
16810 	convert_move (orig_before, before, 1);
16811       if (orig_after)
16812 	convert_move (orig_after, after, 1);
16813     }
16814   else if (orig_after && after != orig_after)
16815     emit_move_insn (orig_after, after);
16816 }
16817 
16818 static GTY(()) alias_set_type TOC_alias_set = -1;
16819 
16820 alias_set_type
get_TOC_alias_set(void)16821 get_TOC_alias_set (void)
16822 {
16823   if (TOC_alias_set == -1)
16824     TOC_alias_set = new_alias_set ();
16825   return TOC_alias_set;
16826 }
16827 
16828 /* The mode the ABI uses for a word.  This is not the same as word_mode
16829    for -m32 -mpowerpc64.  This is used to implement various target hooks.  */
16830 
16831 static scalar_int_mode
rs6000_abi_word_mode(void)16832 rs6000_abi_word_mode (void)
16833 {
16834   return TARGET_32BIT ? SImode : DImode;
16835 }
16836 
16837 /* Implement the TARGET_OFFLOAD_OPTIONS hook.  */
16838 static char *
rs6000_offload_options(void)16839 rs6000_offload_options (void)
16840 {
16841   if (TARGET_64BIT)
16842     return xstrdup ("-foffload-abi=lp64");
16843   else
16844     return xstrdup ("-foffload-abi=ilp32");
16845 }
16846 
16847 
16848 /* A quick summary of the various types of 'constant-pool tables'
16849    under PowerPC:
16850 
16851    Target	Flags		Name		One table per
16852    AIX		(none)		AIX TOC		object file
16853    AIX		-mfull-toc	AIX TOC		object file
16854    AIX		-mminimal-toc	AIX minimal TOC	translation unit
16855    SVR4/EABI	(none)		SVR4 SDATA	object file
16856    SVR4/EABI	-fpic		SVR4 pic	object file
16857    SVR4/EABI	-fPIC		SVR4 PIC	translation unit
16858    SVR4/EABI	-mrelocatable	EABI TOC	function
16859    SVR4/EABI	-maix		AIX TOC		object file
16860    SVR4/EABI	-maix -mminimal-toc
16861 				AIX minimal TOC	translation unit
16862 
16863    Name			Reg.	Set by	entries	      contains:
16864 					made by	 addrs?	fp?	sum?
16865 
16866    AIX TOC		2	crt0	as	 Y	option	option
16867    AIX minimal TOC	30	prolog	gcc	 Y	Y	option
16868    SVR4 SDATA		13	crt0	gcc	 N	Y	N
16869    SVR4 pic		30	prolog	ld	 Y	not yet	N
16870    SVR4 PIC		30	prolog	gcc	 Y	option	option
16871    EABI TOC		30	prolog	gcc	 Y	option	option
16872 
16873 */
16874 
16875 /* Hash functions for the hash table.  */
16876 
16877 static unsigned
rs6000_hash_constant(rtx k)16878 rs6000_hash_constant (rtx k)
16879 {
16880   enum rtx_code code = GET_CODE (k);
16881   machine_mode mode = GET_MODE (k);
16882   unsigned result = (code << 3) ^ mode;
16883   const char *format;
16884   int flen, fidx;
16885 
16886   format = GET_RTX_FORMAT (code);
16887   flen = strlen (format);
16888   fidx = 0;
16889 
16890   switch (code)
16891     {
16892     case LABEL_REF:
16893       return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
16894 
16895     case CONST_WIDE_INT:
16896       {
16897 	int i;
16898 	flen = CONST_WIDE_INT_NUNITS (k);
16899 	for (i = 0; i < flen; i++)
16900 	  result = result * 613 + CONST_WIDE_INT_ELT (k, i);
16901 	return result;
16902       }
16903 
16904     case CONST_DOUBLE:
16905       return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
16906 
16907     case CODE_LABEL:
16908       fidx = 3;
16909       break;
16910 
16911     default:
16912       break;
16913     }
16914 
16915   for (; fidx < flen; fidx++)
16916     switch (format[fidx])
16917       {
16918       case 's':
16919 	{
16920 	  unsigned i, len;
16921 	  const char *str = XSTR (k, fidx);
16922 	  len = strlen (str);
16923 	  result = result * 613 + len;
16924 	  for (i = 0; i < len; i++)
16925 	    result = result * 613 + (unsigned) str[i];
16926 	  break;
16927 	}
16928       case 'u':
16929       case 'e':
16930 	result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
16931 	break;
16932       case 'i':
16933       case 'n':
16934 	result = result * 613 + (unsigned) XINT (k, fidx);
16935 	break;
16936       case 'w':
16937 	if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
16938 	  result = result * 613 + (unsigned) XWINT (k, fidx);
16939 	else
16940 	  {
16941 	    size_t i;
16942 	    for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
16943 	      result = result * 613 + (unsigned) (XWINT (k, fidx)
16944 						  >> CHAR_BIT * i);
16945 	  }
16946 	break;
16947       case '0':
16948 	break;
16949       default:
16950 	gcc_unreachable ();
16951       }
16952 
16953   return result;
16954 }
16955 
16956 hashval_t
hash(toc_hash_struct * thc)16957 toc_hasher::hash (toc_hash_struct *thc)
16958 {
16959   return rs6000_hash_constant (thc->key) ^ thc->key_mode;
16960 }
16961 
16962 /* Compare H1 and H2 for equivalence.  */
16963 
16964 bool
equal(toc_hash_struct * h1,toc_hash_struct * h2)16965 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
16966 {
16967   rtx r1 = h1->key;
16968   rtx r2 = h2->key;
16969 
16970   if (h1->key_mode != h2->key_mode)
16971     return 0;
16972 
16973   return rtx_equal_p (r1, r2);
16974 }
16975 
16976 /* These are the names given by the C++ front-end to vtables, and
16977    vtable-like objects.  Ideally, this logic should not be here;
16978    instead, there should be some programmatic way of inquiring as
16979    to whether or not an object is a vtable.  */
16980 
16981 #define VTABLE_NAME_P(NAME)	  \
16982   (startswith (name, "_vt.")	  \
16983   || startswith (name, "_ZTV")	  \
16984   || startswith (name, "_ZTT")	  \
16985   || startswith (name, "_ZTI")	  \
16986   || startswith (name, "_ZTC"))
16987 
16988 #ifdef NO_DOLLAR_IN_LABEL
16989 /* Return a GGC-allocated character string translating dollar signs in
16990    input NAME to underscores.  Used by XCOFF ASM_OUTPUT_LABELREF.  */
16991 
16992 const char *
rs6000_xcoff_strip_dollar(const char * name)16993 rs6000_xcoff_strip_dollar (const char *name)
16994 {
16995   char *strip, *p;
16996   const char *q;
16997   size_t len;
16998 
16999   q = (const char *) strchr (name, '$');
17000 
17001   if (q == 0 || q == name)
17002     return name;
17003 
17004   len = strlen (name);
17005   strip = XALLOCAVEC (char, len + 1);
17006   strcpy (strip, name);
17007   p = strip + (q - name);
17008   while (p)
17009     {
17010       *p = '_';
17011       p = strchr (p + 1, '$');
17012     }
17013 
17014   return ggc_alloc_string (strip, len);
17015 }
17016 #endif
17017 
17018 void
rs6000_output_symbol_ref(FILE * file,rtx x)17019 rs6000_output_symbol_ref (FILE *file, rtx x)
17020 {
17021   const char *name = XSTR (x, 0);
17022 
17023   /* Currently C++ toc references to vtables can be emitted before it
17024      is decided whether the vtable is public or private.  If this is
17025      the case, then the linker will eventually complain that there is
17026      a reference to an unknown section.  Thus, for vtables only,
17027      we emit the TOC reference to reference the identifier and not the
17028      symbol.  */
17029   if (VTABLE_NAME_P (name))
17030     {
17031       RS6000_OUTPUT_BASENAME (file, name);
17032     }
17033   else
17034     assemble_name (file, name);
17035 }
17036 
17037 /* Output a TOC entry.  We derive the entry name from what is being
17038    written.  */
17039 
17040 void
output_toc(FILE * file,rtx x,int labelno,machine_mode mode)17041 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
17042 {
17043   char buf[256];
17044   const char *name = buf;
17045   rtx base = x;
17046   HOST_WIDE_INT offset = 0;
17047 
17048   gcc_assert (!TARGET_NO_TOC_OR_PCREL);
17049 
17050   /* When the linker won't eliminate them, don't output duplicate
17051      TOC entries (this happens on AIX if there is any kind of TOC,
17052      and on SVR4 under -fPIC or -mrelocatable).  Don't do this for
17053      CODE_LABELs.  */
17054   if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
17055     {
17056       struct toc_hash_struct *h;
17057 
17058       /* Create toc_hash_table.  This can't be done at TARGET_OPTION_OVERRIDE
17059 	 time because GGC is not initialized at that point.  */
17060       if (toc_hash_table == NULL)
17061 	toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
17062 
17063       h = ggc_alloc<toc_hash_struct> ();
17064       h->key = x;
17065       h->key_mode = mode;
17066       h->labelno = labelno;
17067 
17068       toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
17069       if (*found == NULL)
17070 	*found = h;
17071       else  /* This is indeed a duplicate.
17072 	       Set this label equal to that label.  */
17073 	{
17074 	  fputs ("\t.set ", file);
17075 	  ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
17076 	  fprintf (file, "%d,", labelno);
17077 	  ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
17078 	  fprintf (file, "%d\n", ((*found)->labelno));
17079 
17080 #ifdef HAVE_AS_TLS
17081 	  if (TARGET_XCOFF && SYMBOL_REF_P (x)
17082 	      && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
17083 		  || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
17084 	    {
17085 	      fputs ("\t.set ", file);
17086 	      ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
17087 	      fprintf (file, "%d,", labelno);
17088 	      ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
17089 	      fprintf (file, "%d\n", ((*found)->labelno));
17090 	    }
17091 #endif
17092 	  return;
17093 	}
17094     }
17095 
17096   /* If we're going to put a double constant in the TOC, make sure it's
17097      aligned properly when strict alignment is on.  */
17098   if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
17099       && STRICT_ALIGNMENT
17100       && GET_MODE_BITSIZE (mode) >= 64
17101       && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
17102     ASM_OUTPUT_ALIGN (file, 3);
17103   }
17104 
17105   (*targetm.asm_out.internal_label) (file, "LC", labelno);
17106 
17107   /* Handle FP constants specially.  Note that if we have a minimal
17108      TOC, things we put here aren't actually in the TOC, so we can allow
17109      FP constants.  */
17110   if (CONST_DOUBLE_P (x)
17111       && (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
17112 	  || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
17113     {
17114       long k[4];
17115 
17116       if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17117 	REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
17118       else
17119 	REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
17120 
17121       if (TARGET_64BIT)
17122 	{
17123 	  if (TARGET_ELF || TARGET_MINIMAL_TOC)
17124 	    fputs (DOUBLE_INT_ASM_OP, file);
17125 	  else
17126 	    fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
17127 		     k[0] & 0xffffffff, k[1] & 0xffffffff,
17128 		     k[2] & 0xffffffff, k[3] & 0xffffffff);
17129 	  fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
17130 		   k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
17131 		   k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
17132 		   k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
17133 		   k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
17134 	  return;
17135 	}
17136       else
17137 	{
17138 	  if (TARGET_ELF || TARGET_MINIMAL_TOC)
17139 	    fputs ("\t.long ", file);
17140 	  else
17141 	    fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
17142 		     k[0] & 0xffffffff, k[1] & 0xffffffff,
17143 		     k[2] & 0xffffffff, k[3] & 0xffffffff);
17144 	  fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
17145 		   k[0] & 0xffffffff, k[1] & 0xffffffff,
17146 		   k[2] & 0xffffffff, k[3] & 0xffffffff);
17147 	  return;
17148 	}
17149     }
17150   else if (CONST_DOUBLE_P (x)
17151 	   && (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
17152     {
17153       long k[2];
17154 
17155       if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17156 	REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
17157       else
17158 	REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
17159 
17160       if (TARGET_64BIT)
17161 	{
17162 	  if (TARGET_ELF || TARGET_MINIMAL_TOC)
17163 	    fputs (DOUBLE_INT_ASM_OP, file);
17164 	  else
17165 	    fprintf (file, "\t.tc FD_%lx_%lx[TC],",
17166 		     k[0] & 0xffffffff, k[1] & 0xffffffff);
17167 	  fprintf (file, "0x%lx%08lx\n",
17168 		   k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
17169 		   k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
17170 	  return;
17171 	}
17172       else
17173 	{
17174 	  if (TARGET_ELF || TARGET_MINIMAL_TOC)
17175 	    fputs ("\t.long ", file);
17176 	  else
17177 	    fprintf (file, "\t.tc FD_%lx_%lx[TC],",
17178 		     k[0] & 0xffffffff, k[1] & 0xffffffff);
17179 	  fprintf (file, "0x%lx,0x%lx\n",
17180 		   k[0] & 0xffffffff, k[1] & 0xffffffff);
17181 	  return;
17182 	}
17183     }
17184   else if (CONST_DOUBLE_P (x)
17185 	   && (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
17186     {
17187       long l;
17188 
17189       if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17190 	REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
17191       else
17192 	REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
17193 
17194       if (TARGET_64BIT)
17195 	{
17196 	  if (TARGET_ELF || TARGET_MINIMAL_TOC)
17197 	    fputs (DOUBLE_INT_ASM_OP, file);
17198 	  else
17199 	    fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
17200 	  if (WORDS_BIG_ENDIAN)
17201 	    fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
17202 	  else
17203 	    fprintf (file, "0x%lx\n", l & 0xffffffff);
17204 	  return;
17205 	}
17206       else
17207 	{
17208 	  if (TARGET_ELF || TARGET_MINIMAL_TOC)
17209 	    fputs ("\t.long ", file);
17210 	  else
17211 	    fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
17212 	  fprintf (file, "0x%lx\n", l & 0xffffffff);
17213 	  return;
17214 	}
17215     }
17216   else if (GET_MODE (x) == VOIDmode && CONST_INT_P (x))
17217     {
17218       unsigned HOST_WIDE_INT low;
17219       HOST_WIDE_INT high;
17220 
17221       low = INTVAL (x) & 0xffffffff;
17222       high = (HOST_WIDE_INT) INTVAL (x) >> 32;
17223 
17224       /* TOC entries are always Pmode-sized, so when big-endian
17225 	 smaller integer constants in the TOC need to be padded.
17226 	 (This is still a win over putting the constants in
17227 	 a separate constant pool, because then we'd have
17228 	 to have both a TOC entry _and_ the actual constant.)
17229 
17230 	 For a 32-bit target, CONST_INT values are loaded and shifted
17231 	 entirely within `low' and can be stored in one TOC entry.  */
17232 
17233       /* It would be easy to make this work, but it doesn't now.  */
17234       gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
17235 
17236       if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
17237 	{
17238 	  low |= high << 32;
17239 	  low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
17240 	  high = (HOST_WIDE_INT) low >> 32;
17241 	  low &= 0xffffffff;
17242 	}
17243 
17244       if (TARGET_64BIT)
17245 	{
17246 	  if (TARGET_ELF || TARGET_MINIMAL_TOC)
17247 	    fputs (DOUBLE_INT_ASM_OP, file);
17248 	  else
17249 	    fprintf (file, "\t.tc ID_%lx_%lx[TC],",
17250 		     (long) high & 0xffffffff, (long) low & 0xffffffff);
17251 	  fprintf (file, "0x%lx%08lx\n",
17252 		   (long) high & 0xffffffff, (long) low & 0xffffffff);
17253 	  return;
17254 	}
17255       else
17256 	{
17257 	  if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
17258 	    {
17259 	      if (TARGET_ELF || TARGET_MINIMAL_TOC)
17260 		fputs ("\t.long ", file);
17261 	      else
17262 		fprintf (file, "\t.tc ID_%lx_%lx[TC],",
17263 			 (long) high & 0xffffffff, (long) low & 0xffffffff);
17264 	      fprintf (file, "0x%lx,0x%lx\n",
17265 		       (long) high & 0xffffffff, (long) low & 0xffffffff);
17266 	    }
17267 	  else
17268 	    {
17269 	      if (TARGET_ELF || TARGET_MINIMAL_TOC)
17270 		fputs ("\t.long ", file);
17271 	      else
17272 		fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
17273 	      fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
17274 	    }
17275 	  return;
17276 	}
17277     }
17278 
17279   if (GET_CODE (x) == CONST)
17280     {
17281       gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
17282 		  && CONST_INT_P (XEXP (XEXP (x, 0), 1)));
17283 
17284       base = XEXP (XEXP (x, 0), 0);
17285       offset = INTVAL (XEXP (XEXP (x, 0), 1));
17286     }
17287 
17288   switch (GET_CODE (base))
17289     {
17290     case SYMBOL_REF:
17291       name = XSTR (base, 0);
17292       break;
17293 
17294     case LABEL_REF:
17295       ASM_GENERATE_INTERNAL_LABEL (buf, "L",
17296 				   CODE_LABEL_NUMBER (XEXP (base, 0)));
17297       break;
17298 
17299     case CODE_LABEL:
17300       ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
17301       break;
17302 
17303     default:
17304       gcc_unreachable ();
17305     }
17306 
17307   if (TARGET_ELF || TARGET_MINIMAL_TOC)
17308     fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
17309   else
17310     {
17311       fputs ("\t.tc ", file);
17312       RS6000_OUTPUT_BASENAME (file, name);
17313 
17314       if (offset < 0)
17315 	fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
17316       else if (offset)
17317 	fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
17318 
17319       /* Mark large TOC symbols on AIX with [TE] so they are mapped
17320 	 after other TOC symbols, reducing overflow of small TOC access
17321 	 to [TC] symbols.  */
17322       fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
17323 	     ? "[TE]," : "[TC],", file);
17324     }
17325 
17326   /* Currently C++ toc references to vtables can be emitted before it
17327      is decided whether the vtable is public or private.  If this is
17328      the case, then the linker will eventually complain that there is
17329      a TOC reference to an unknown section.  Thus, for vtables only,
17330      we emit the TOC reference to reference the symbol and not the
17331      section.  */
17332   if (VTABLE_NAME_P (name))
17333     {
17334       RS6000_OUTPUT_BASENAME (file, name);
17335       if (offset < 0)
17336 	fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
17337       else if (offset > 0)
17338 	fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
17339     }
17340   else
17341     output_addr_const (file, x);
17342 
17343 #if HAVE_AS_TLS
17344   if (TARGET_XCOFF && SYMBOL_REF_P (base))
17345     {
17346       switch (SYMBOL_REF_TLS_MODEL (base))
17347 	{
17348 	case 0:
17349 	  break;
17350 	case TLS_MODEL_LOCAL_EXEC:
17351 	  fputs ("@le", file);
17352 	  break;
17353 	case TLS_MODEL_INITIAL_EXEC:
17354 	  fputs ("@ie", file);
17355 	  break;
17356 	/* Use global-dynamic for local-dynamic.  */
17357 	case TLS_MODEL_GLOBAL_DYNAMIC:
17358 	case TLS_MODEL_LOCAL_DYNAMIC:
17359 	  putc ('\n', file);
17360 	  (*targetm.asm_out.internal_label) (file, "LCM", labelno);
17361 	  fputs ("\t.tc .", file);
17362 	  RS6000_OUTPUT_BASENAME (file, name);
17363 	  fputs ("[TC],", file);
17364 	  output_addr_const (file, x);
17365 	  fputs ("@m", file);
17366 	  break;
17367 	default:
17368 	  gcc_unreachable ();
17369 	}
17370     }
17371 #endif
17372 
17373   putc ('\n', file);
17374 }
17375 
17376 /* Output an assembler pseudo-op to write an ASCII string of N characters
17377    starting at P to FILE.
17378 
17379    On the RS/6000, we have to do this using the .byte operation and
17380    write out special characters outside the quoted string.
17381    Also, the assembler is broken; very long strings are truncated,
17382    so we must artificially break them up early.  */
17383 
17384 void
output_ascii(FILE * file,const char * p,int n)17385 output_ascii (FILE *file, const char *p, int n)
17386 {
17387   char c;
17388   int i, count_string;
17389   const char *for_string = "\t.byte \"";
17390   const char *for_decimal = "\t.byte ";
17391   const char *to_close = NULL;
17392 
17393   count_string = 0;
17394   for (i = 0; i < n; i++)
17395     {
17396       c = *p++;
17397       if (c >= ' ' && c < 0177)
17398 	{
17399 	  if (for_string)
17400 	    fputs (for_string, file);
17401 	  putc (c, file);
17402 
17403 	  /* Write two quotes to get one.  */
17404 	  if (c == '"')
17405 	    {
17406 	      putc (c, file);
17407 	      ++count_string;
17408 	    }
17409 
17410 	  for_string = NULL;
17411 	  for_decimal = "\"\n\t.byte ";
17412 	  to_close = "\"\n";
17413 	  ++count_string;
17414 
17415 	  if (count_string >= 512)
17416 	    {
17417 	      fputs (to_close, file);
17418 
17419 	      for_string = "\t.byte \"";
17420 	      for_decimal = "\t.byte ";
17421 	      to_close = NULL;
17422 	      count_string = 0;
17423 	    }
17424 	}
17425       else
17426 	{
17427 	  if (for_decimal)
17428 	    fputs (for_decimal, file);
17429 	  fprintf (file, "%d", c);
17430 
17431 	  for_string = "\n\t.byte \"";
17432 	  for_decimal = ", ";
17433 	  to_close = "\n";
17434 	  count_string = 0;
17435 	}
17436     }
17437 
17438   /* Now close the string if we have written one.  Then end the line.  */
17439   if (to_close)
17440     fputs (to_close, file);
17441 }
17442 
17443 /* Generate a unique section name for FILENAME for a section type
17444    represented by SECTION_DESC.  Output goes into BUF.
17445 
17446    SECTION_DESC can be any string, as long as it is different for each
17447    possible section type.
17448 
17449    We name the section in the same manner as xlc.  The name begins with an
17450    underscore followed by the filename (after stripping any leading directory
17451    names) with the last period replaced by the string SECTION_DESC.  If
17452    FILENAME does not contain a period, SECTION_DESC is appended to the end of
17453    the name.  */
17454 
17455 void
rs6000_gen_section_name(char ** buf,const char * filename,const char * section_desc)17456 rs6000_gen_section_name (char **buf, const char *filename,
17457 			 const char *section_desc)
17458 {
17459   const char *q, *after_last_slash, *last_period = 0;
17460   char *p;
17461   int len;
17462 
17463   after_last_slash = filename;
17464   for (q = filename; *q; q++)
17465     {
17466       if (*q == '/')
17467 	after_last_slash = q + 1;
17468       else if (*q == '.')
17469 	last_period = q;
17470     }
17471 
17472   len = strlen (after_last_slash) + strlen (section_desc) + 2;
17473   *buf = (char *) xmalloc (len);
17474 
17475   p = *buf;
17476   *p++ = '_';
17477 
17478   for (q = after_last_slash; *q; q++)
17479     {
17480       if (q == last_period)
17481 	{
17482 	  strcpy (p, section_desc);
17483 	  p += strlen (section_desc);
17484 	  break;
17485 	}
17486 
17487       else if (ISALNUM (*q))
17488 	*p++ = *q;
17489     }
17490 
17491   if (last_period == 0)
17492     strcpy (p, section_desc);
17493   else
17494     *p = '\0';
17495 }
17496 
17497 /* Emit profile function.  */
17498 
17499 void
output_profile_hook(int labelno ATTRIBUTE_UNUSED)17500 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
17501 {
17502   /* Non-standard profiling for kernels, which just saves LR then calls
17503      _mcount without worrying about arg saves.  The idea is to change
17504      the function prologue as little as possible as it isn't easy to
17505      account for arg save/restore code added just for _mcount.  */
17506   if (TARGET_PROFILE_KERNEL)
17507     return;
17508 
17509   if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
17510     {
17511 #ifndef NO_PROFILE_COUNTERS
17512 # define NO_PROFILE_COUNTERS 0
17513 #endif
17514       if (NO_PROFILE_COUNTERS)
17515 	emit_library_call (init_one_libfunc (RS6000_MCOUNT),
17516 			   LCT_NORMAL, VOIDmode);
17517       else
17518 	{
17519 	  char buf[30];
17520 	  const char *label_name;
17521 	  rtx fun;
17522 
17523 	  ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
17524 	  label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
17525 	  fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
17526 
17527 	  emit_library_call (init_one_libfunc (RS6000_MCOUNT),
17528 			     LCT_NORMAL, VOIDmode, fun, Pmode);
17529 	}
17530     }
17531   else if (DEFAULT_ABI == ABI_DARWIN)
17532     {
17533       const char *mcount_name = RS6000_MCOUNT;
17534       int caller_addr_regno = LR_REGNO;
17535 
17536       /* Be conservative and always set this, at least for now.  */
17537       crtl->uses_pic_offset_table = 1;
17538 
17539 #if TARGET_MACHO
17540       /* For PIC code, set up a stub and collect the caller's address
17541 	 from r0, which is where the prologue puts it.  */
17542       if (MACHOPIC_INDIRECT
17543 	  && crtl->uses_pic_offset_table)
17544 	caller_addr_regno = 0;
17545 #endif
17546       emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
17547 			 LCT_NORMAL, VOIDmode,
17548 			 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
17549     }
17550 }
17551 
17552 /* Write function profiler code.  */
17553 
17554 void
output_function_profiler(FILE * file,int labelno)17555 output_function_profiler (FILE *file, int labelno)
17556 {
17557   char buf[100];
17558 
17559   switch (DEFAULT_ABI)
17560     {
17561     default:
17562       gcc_unreachable ();
17563 
17564     case ABI_V4:
17565       if (!TARGET_32BIT)
17566 	{
17567 	  warning (0, "no profiling of 64-bit code for this ABI");
17568 	  return;
17569 	}
17570       ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
17571       fprintf (file, "\tmflr %s\n", reg_names[0]);
17572       if (NO_PROFILE_COUNTERS)
17573 	{
17574 	  asm_fprintf (file, "\tstw %s,4(%s)\n",
17575 		       reg_names[0], reg_names[1]);
17576 	}
17577       else if (TARGET_SECURE_PLT && flag_pic)
17578 	{
17579 	  if (TARGET_LINK_STACK)
17580 	    {
17581 	      char name[32];
17582 	      get_ppc476_thunk_name (name);
17583 	      asm_fprintf (file, "\tbl %s\n", name);
17584 	    }
17585 	  else
17586 	    asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
17587 	  asm_fprintf (file, "\tstw %s,4(%s)\n",
17588 		       reg_names[0], reg_names[1]);
17589 	  asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
17590 	  asm_fprintf (file, "\taddis %s,%s,",
17591 		       reg_names[12], reg_names[12]);
17592 	  assemble_name (file, buf);
17593 	  asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
17594 	  assemble_name (file, buf);
17595 	  asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
17596 	}
17597       else if (flag_pic == 1)
17598 	{
17599 	  fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
17600 	  asm_fprintf (file, "\tstw %s,4(%s)\n",
17601 		       reg_names[0], reg_names[1]);
17602 	  asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
17603 	  asm_fprintf (file, "\tlwz %s,", reg_names[0]);
17604 	  assemble_name (file, buf);
17605 	  asm_fprintf (file, "@got(%s)\n", reg_names[12]);
17606 	}
17607       else if (flag_pic > 1)
17608 	{
17609 	  asm_fprintf (file, "\tstw %s,4(%s)\n",
17610 		       reg_names[0], reg_names[1]);
17611 	  /* Now, we need to get the address of the label.  */
17612 	  if (TARGET_LINK_STACK)
17613 	    {
17614 	      char name[32];
17615 	      get_ppc476_thunk_name (name);
17616 	      asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
17617 	      assemble_name (file, buf);
17618 	      fputs ("-.\n1:", file);
17619 	      asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
17620 	      asm_fprintf (file, "\taddi %s,%s,4\n",
17621 			   reg_names[11], reg_names[11]);
17622 	    }
17623 	  else
17624 	    {
17625 	      fputs ("\tbcl 20,31,1f\n\t.long ", file);
17626 	      assemble_name (file, buf);
17627 	      fputs ("-.\n1:", file);
17628 	      asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
17629 	    }
17630 	  asm_fprintf (file, "\tlwz %s,0(%s)\n",
17631 		       reg_names[0], reg_names[11]);
17632 	  asm_fprintf (file, "\tadd %s,%s,%s\n",
17633 		       reg_names[0], reg_names[0], reg_names[11]);
17634 	}
17635       else
17636 	{
17637 	  asm_fprintf (file, "\tlis %s,", reg_names[12]);
17638 	  assemble_name (file, buf);
17639 	  fputs ("@ha\n", file);
17640 	  asm_fprintf (file, "\tstw %s,4(%s)\n",
17641 		       reg_names[0], reg_names[1]);
17642 	  asm_fprintf (file, "\tla %s,", reg_names[0]);
17643 	  assemble_name (file, buf);
17644 	  asm_fprintf (file, "@l(%s)\n", reg_names[12]);
17645 	}
17646 
17647       /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH.  */
17648       fprintf (file, "\tbl %s%s\n",
17649 	       RS6000_MCOUNT, flag_pic ? "@plt" : "");
17650       break;
17651 
17652     case ABI_AIX:
17653     case ABI_ELFv2:
17654     case ABI_DARWIN:
17655       /* Don't do anything, done in output_profile_hook ().  */
17656       break;
17657     }
17658 }
17659 
17660 
17661 
17662 /* The following variable value is the last issued insn.  */
17663 
17664 static rtx_insn *last_scheduled_insn;
17665 
17666 /* The following variable helps to balance issuing of load and
17667    store instructions */
17668 
17669 static int load_store_pendulum;
17670 
17671 /* The following variable helps pair divide insns during scheduling.  */
17672 static int divide_cnt;
17673 /* The following variable helps pair and alternate vector and vector load
17674    insns during scheduling.  */
17675 static int vec_pairing;
17676 
17677 
17678 /* Power4 load update and store update instructions are cracked into a
17679    load or store and an integer insn which are executed in the same cycle.
17680    Branches have their own dispatch slot which does not count against the
17681    GCC issue rate, but it changes the program flow so there are no other
17682    instructions to issue in this cycle.  */
17683 
17684 static int
rs6000_variable_issue_1(rtx_insn * insn,int more)17685 rs6000_variable_issue_1 (rtx_insn *insn, int more)
17686 {
17687   last_scheduled_insn = insn;
17688   if (GET_CODE (PATTERN (insn)) == USE
17689       || GET_CODE (PATTERN (insn)) == CLOBBER)
17690     {
17691       cached_can_issue_more = more;
17692       return cached_can_issue_more;
17693     }
17694 
17695   if (insn_terminates_group_p (insn, current_group))
17696     {
17697       cached_can_issue_more = 0;
17698       return cached_can_issue_more;
17699     }
17700 
17701   /* If no reservation, but reach here */
17702   if (recog_memoized (insn) < 0)
17703     return more;
17704 
17705   if (rs6000_sched_groups)
17706     {
17707       if (is_microcoded_insn (insn))
17708         cached_can_issue_more = 0;
17709       else if (is_cracked_insn (insn))
17710         cached_can_issue_more = more > 2 ? more - 2 : 0;
17711       else
17712         cached_can_issue_more = more - 1;
17713 
17714       return cached_can_issue_more;
17715     }
17716 
17717   if (rs6000_tune == PROCESSOR_CELL && is_nonpipeline_insn (insn))
17718     return 0;
17719 
17720   cached_can_issue_more = more - 1;
17721   return cached_can_issue_more;
17722 }
17723 
17724 static int
rs6000_variable_issue(FILE * stream,int verbose,rtx_insn * insn,int more)17725 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
17726 {
17727   int r = rs6000_variable_issue_1 (insn, more);
17728   if (verbose)
17729     fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
17730   return r;
17731 }
17732 
17733 /* Adjust the cost of a scheduling dependency.  Return the new cost of
17734    a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
17735 
17736 static int
rs6000_adjust_cost(rtx_insn * insn,int dep_type,rtx_insn * dep_insn,int cost,unsigned int)17737 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
17738 		    unsigned int)
17739 {
17740   enum attr_type attr_type;
17741 
17742   if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
17743     return cost;
17744 
17745   switch (dep_type)
17746     {
17747     case REG_DEP_TRUE:
17748       {
17749         /* Data dependency; DEP_INSN writes a register that INSN reads
17750 	   some cycles later.  */
17751 
17752 	/* Separate a load from a narrower, dependent store.  */
17753 	if ((rs6000_sched_groups || rs6000_tune == PROCESSOR_POWER9
17754 	     || rs6000_tune == PROCESSOR_POWER10)
17755 	    && GET_CODE (PATTERN (insn)) == SET
17756 	    && GET_CODE (PATTERN (dep_insn)) == SET
17757 	    && MEM_P (XEXP (PATTERN (insn), 1))
17758 	    && MEM_P (XEXP (PATTERN (dep_insn), 0))
17759 	    && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
17760 		> GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
17761 	  return cost + 14;
17762 
17763         attr_type = get_attr_type (insn);
17764 
17765         switch (attr_type)
17766           {
17767           case TYPE_JMPREG:
17768             /* Tell the first scheduling pass about the latency between
17769                a mtctr and bctr (and mtlr and br/blr).  The first
17770                scheduling pass will not know about this latency since
17771                the mtctr instruction, which has the latency associated
17772                to it, will be generated by reload.  */
17773             return 4;
17774           case TYPE_BRANCH:
17775             /* Leave some extra cycles between a compare and its
17776                dependent branch, to inhibit expensive mispredicts.  */
17777             if ((rs6000_tune == PROCESSOR_PPC603
17778                  || rs6000_tune == PROCESSOR_PPC604
17779                  || rs6000_tune == PROCESSOR_PPC604e
17780                  || rs6000_tune == PROCESSOR_PPC620
17781                  || rs6000_tune == PROCESSOR_PPC630
17782                  || rs6000_tune == PROCESSOR_PPC750
17783                  || rs6000_tune == PROCESSOR_PPC7400
17784                  || rs6000_tune == PROCESSOR_PPC7450
17785                  || rs6000_tune == PROCESSOR_PPCE5500
17786                  || rs6000_tune == PROCESSOR_PPCE6500
17787                  || rs6000_tune == PROCESSOR_POWER4
17788                  || rs6000_tune == PROCESSOR_POWER5
17789 		 || rs6000_tune == PROCESSOR_POWER7
17790 		 || rs6000_tune == PROCESSOR_POWER8
17791 		 || rs6000_tune == PROCESSOR_POWER9
17792 		 || rs6000_tune == PROCESSOR_POWER10
17793                  || rs6000_tune == PROCESSOR_CELL)
17794                 && recog_memoized (dep_insn)
17795                 && (INSN_CODE (dep_insn) >= 0))
17796 
17797               switch (get_attr_type (dep_insn))
17798                 {
17799                 case TYPE_CMP:
17800                 case TYPE_FPCOMPARE:
17801                 case TYPE_CR_LOGICAL:
17802 		  return cost + 2;
17803                 case TYPE_EXTS:
17804                 case TYPE_MUL:
17805 		  if (get_attr_dot (dep_insn) == DOT_YES)
17806 		    return cost + 2;
17807 		  else
17808 		    break;
17809                 case TYPE_SHIFT:
17810 		  if (get_attr_dot (dep_insn) == DOT_YES
17811 		      && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
17812 		    return cost + 2;
17813 		  else
17814 		    break;
17815 		default:
17816 		  break;
17817 		}
17818             break;
17819 
17820           case TYPE_STORE:
17821           case TYPE_FPSTORE:
17822             if ((rs6000_tune == PROCESSOR_POWER6)
17823                 && recog_memoized (dep_insn)
17824                 && (INSN_CODE (dep_insn) >= 0))
17825               {
17826 
17827                 if (GET_CODE (PATTERN (insn)) != SET)
17828                   /* If this happens, we have to extend this to schedule
17829                      optimally.  Return default for now.  */
17830                   return cost;
17831 
17832                 /* Adjust the cost for the case where the value written
17833                    by a fixed point operation is used as the address
17834                    gen value on a store. */
17835                 switch (get_attr_type (dep_insn))
17836                   {
17837                   case TYPE_LOAD:
17838                   case TYPE_CNTLZ:
17839                     {
17840                       if (! rs6000_store_data_bypass_p (dep_insn, insn))
17841                         return get_attr_sign_extend (dep_insn)
17842                                == SIGN_EXTEND_YES ? 6 : 4;
17843                       break;
17844                     }
17845                   case TYPE_SHIFT:
17846                     {
17847                       if (! rs6000_store_data_bypass_p (dep_insn, insn))
17848                         return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
17849                                6 : 3;
17850                       break;
17851 		    }
17852                   case TYPE_INTEGER:
17853                   case TYPE_ADD:
17854                   case TYPE_LOGICAL:
17855                   case TYPE_EXTS:
17856                   case TYPE_INSERT:
17857                     {
17858                       if (! rs6000_store_data_bypass_p (dep_insn, insn))
17859                         return 3;
17860                       break;
17861                     }
17862                   case TYPE_STORE:
17863                   case TYPE_FPLOAD:
17864                   case TYPE_FPSTORE:
17865                     {
17866                       if (get_attr_update (dep_insn) == UPDATE_YES
17867                           && ! rs6000_store_data_bypass_p (dep_insn, insn))
17868                         return 3;
17869                       break;
17870                     }
17871                   case TYPE_MUL:
17872                     {
17873                       if (! rs6000_store_data_bypass_p (dep_insn, insn))
17874                         return 17;
17875                       break;
17876                     }
17877                   case TYPE_DIV:
17878                     {
17879                       if (! rs6000_store_data_bypass_p (dep_insn, insn))
17880                         return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
17881                       break;
17882                     }
17883                   default:
17884                     break;
17885                   }
17886               }
17887 	    break;
17888 
17889           case TYPE_LOAD:
17890             if ((rs6000_tune == PROCESSOR_POWER6)
17891                 && recog_memoized (dep_insn)
17892                 && (INSN_CODE (dep_insn) >= 0))
17893               {
17894 
17895                 /* Adjust the cost for the case where the value written
17896                    by a fixed point instruction is used within the address
17897                    gen portion of a subsequent load(u)(x) */
17898                 switch (get_attr_type (dep_insn))
17899                   {
17900                   case TYPE_LOAD:
17901                   case TYPE_CNTLZ:
17902                     {
17903                       if (set_to_load_agen (dep_insn, insn))
17904                         return get_attr_sign_extend (dep_insn)
17905                                == SIGN_EXTEND_YES ? 6 : 4;
17906                       break;
17907                     }
17908                   case TYPE_SHIFT:
17909                     {
17910                       if (set_to_load_agen (dep_insn, insn))
17911                         return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
17912                                6 : 3;
17913                       break;
17914 		    }
17915                   case TYPE_INTEGER:
17916                   case TYPE_ADD:
17917                   case TYPE_LOGICAL:
17918                   case TYPE_EXTS:
17919                   case TYPE_INSERT:
17920                     {
17921                       if (set_to_load_agen (dep_insn, insn))
17922                         return 3;
17923                       break;
17924                     }
17925                   case TYPE_STORE:
17926                   case TYPE_FPLOAD:
17927                   case TYPE_FPSTORE:
17928                     {
17929                       if (get_attr_update (dep_insn) == UPDATE_YES
17930                           && set_to_load_agen (dep_insn, insn))
17931                         return 3;
17932                       break;
17933                     }
17934                   case TYPE_MUL:
17935                     {
17936                       if (set_to_load_agen (dep_insn, insn))
17937                         return 17;
17938                       break;
17939                     }
17940                   case TYPE_DIV:
17941                     {
17942                       if (set_to_load_agen (dep_insn, insn))
17943                         return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
17944                       break;
17945                     }
17946                   default:
17947                     break;
17948                   }
17949               }
17950             break;
17951 
17952           default:
17953             break;
17954           }
17955 
17956 	/* Fall out to return default cost.  */
17957       }
17958       break;
17959 
17960     case REG_DEP_OUTPUT:
17961       /* Output dependency; DEP_INSN writes a register that INSN writes some
17962 	 cycles later.  */
17963       if ((rs6000_tune == PROCESSOR_POWER6)
17964           && recog_memoized (dep_insn)
17965           && (INSN_CODE (dep_insn) >= 0))
17966         {
17967           attr_type = get_attr_type (insn);
17968 
17969           switch (attr_type)
17970             {
17971             case TYPE_FP:
17972             case TYPE_FPSIMPLE:
17973               if (get_attr_type (dep_insn) == TYPE_FP
17974 		  || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
17975                 return 1;
17976               break;
17977             default:
17978               break;
17979             }
17980         }
17981       /* Fall through, no cost for output dependency.  */
17982       /* FALLTHRU */
17983 
17984     case REG_DEP_ANTI:
17985       /* Anti dependency; DEP_INSN reads a register that INSN writes some
17986 	 cycles later.  */
17987       return 0;
17988 
17989     default:
17990       gcc_unreachable ();
17991     }
17992 
17993   return cost;
17994 }
17995 
17996 /* Debug version of rs6000_adjust_cost.  */
17997 
17998 static int
rs6000_debug_adjust_cost(rtx_insn * insn,int dep_type,rtx_insn * dep_insn,int cost,unsigned int dw)17999 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
18000 			  int cost, unsigned int dw)
18001 {
18002   int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
18003 
18004   if (ret != cost)
18005     {
18006       const char *dep;
18007 
18008       switch (dep_type)
18009 	{
18010 	default:	     dep = "unknown depencency"; break;
18011 	case REG_DEP_TRUE:   dep = "data dependency";	 break;
18012 	case REG_DEP_OUTPUT: dep = "output dependency";  break;
18013 	case REG_DEP_ANTI:   dep = "anti depencency";	 break;
18014 	}
18015 
18016       fprintf (stderr,
18017 	       "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
18018 	       "%s, insn:\n", ret, cost, dep);
18019 
18020       debug_rtx (insn);
18021     }
18022 
18023   return ret;
18024 }
18025 
18026 /* The function returns a true if INSN is microcoded.
18027    Return false otherwise.  */
18028 
18029 static bool
is_microcoded_insn(rtx_insn * insn)18030 is_microcoded_insn (rtx_insn *insn)
18031 {
18032   if (!insn || !NONDEBUG_INSN_P (insn)
18033       || GET_CODE (PATTERN (insn)) == USE
18034       || GET_CODE (PATTERN (insn)) == CLOBBER)
18035     return false;
18036 
18037   if (rs6000_tune == PROCESSOR_CELL)
18038     return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
18039 
18040   if (rs6000_sched_groups
18041       && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
18042     {
18043       enum attr_type type = get_attr_type (insn);
18044       if ((type == TYPE_LOAD
18045 	   && get_attr_update (insn) == UPDATE_YES
18046 	   && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
18047 	  || ((type == TYPE_LOAD || type == TYPE_STORE)
18048 	      && get_attr_update (insn) == UPDATE_YES
18049 	      && get_attr_indexed (insn) == INDEXED_YES)
18050 	  || type == TYPE_MFCR)
18051 	return true;
18052     }
18053 
18054   return false;
18055 }
18056 
18057 /* The function returns true if INSN is cracked into 2 instructions
18058    by the processor (and therefore occupies 2 issue slots).  */
18059 
18060 static bool
is_cracked_insn(rtx_insn * insn)18061 is_cracked_insn (rtx_insn *insn)
18062 {
18063   if (!insn || !NONDEBUG_INSN_P (insn)
18064       || GET_CODE (PATTERN (insn)) == USE
18065       || GET_CODE (PATTERN (insn)) == CLOBBER)
18066     return false;
18067 
18068   if (rs6000_sched_groups
18069       && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
18070     {
18071       enum attr_type type = get_attr_type (insn);
18072       if ((type == TYPE_LOAD
18073 	   && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18074 	   && get_attr_update (insn) == UPDATE_NO)
18075 	  || (type == TYPE_LOAD
18076 	      && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
18077 	      && get_attr_update (insn) == UPDATE_YES
18078 	      && get_attr_indexed (insn) == INDEXED_NO)
18079 	  || (type == TYPE_STORE
18080 	      && get_attr_update (insn) == UPDATE_YES
18081 	      && get_attr_indexed (insn) == INDEXED_NO)
18082 	  || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
18083 	      && get_attr_update (insn) == UPDATE_YES)
18084 	  || (type == TYPE_CR_LOGICAL
18085 	      && get_attr_cr_logical_3op (insn) == CR_LOGICAL_3OP_YES)
18086 	  || (type == TYPE_EXTS
18087 	      && get_attr_dot (insn) == DOT_YES)
18088 	  || (type == TYPE_SHIFT
18089 	      && get_attr_dot (insn) == DOT_YES
18090 	      && get_attr_var_shift (insn) == VAR_SHIFT_NO)
18091 	  || (type == TYPE_MUL
18092 	      && get_attr_dot (insn) == DOT_YES)
18093 	  || type == TYPE_DIV
18094 	  || (type == TYPE_INSERT
18095 	      && get_attr_size (insn) == SIZE_32))
18096 	return true;
18097     }
18098 
18099   return false;
18100 }
18101 
18102 /* The function returns true if INSN can be issued only from
18103    the branch slot.  */
18104 
18105 static bool
is_branch_slot_insn(rtx_insn * insn)18106 is_branch_slot_insn (rtx_insn *insn)
18107 {
18108   if (!insn || !NONDEBUG_INSN_P (insn)
18109       || GET_CODE (PATTERN (insn)) == USE
18110       || GET_CODE (PATTERN (insn)) == CLOBBER)
18111     return false;
18112 
18113   if (rs6000_sched_groups)
18114     {
18115       enum attr_type type = get_attr_type (insn);
18116       if (type == TYPE_BRANCH || type == TYPE_JMPREG)
18117 	return true;
18118       return false;
18119     }
18120 
18121   return false;
18122 }
18123 
18124 /* The function returns true if out_inst sets a value that is
18125    used in the address generation computation of in_insn */
18126 static bool
set_to_load_agen(rtx_insn * out_insn,rtx_insn * in_insn)18127 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
18128 {
18129   rtx out_set, in_set;
18130 
18131   /* For performance reasons, only handle the simple case where
18132      both loads are a single_set. */
18133   out_set = single_set (out_insn);
18134   if (out_set)
18135     {
18136       in_set = single_set (in_insn);
18137       if (in_set)
18138         return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
18139     }
18140 
18141   return false;
18142 }
18143 
18144 /* Try to determine base/offset/size parts of the given MEM.
18145    Return true if successful, false if all the values couldn't
18146    be determined.
18147 
18148    This function only looks for REG or REG+CONST address forms.
18149    REG+REG address form will return false. */
18150 
18151 static bool
get_memref_parts(rtx mem,rtx * base,HOST_WIDE_INT * offset,HOST_WIDE_INT * size)18152 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
18153 		  HOST_WIDE_INT *size)
18154 {
18155   rtx addr_rtx;
18156   if MEM_SIZE_KNOWN_P (mem)
18157     *size = MEM_SIZE (mem);
18158   else
18159     return false;
18160 
18161   addr_rtx = (XEXP (mem, 0));
18162   if (GET_CODE (addr_rtx) == PRE_MODIFY)
18163     addr_rtx = XEXP (addr_rtx, 1);
18164 
18165   *offset = 0;
18166   while (GET_CODE (addr_rtx) == PLUS
18167 	 && CONST_INT_P (XEXP (addr_rtx, 1)))
18168     {
18169       *offset += INTVAL (XEXP (addr_rtx, 1));
18170       addr_rtx = XEXP (addr_rtx, 0);
18171     }
18172   if (!REG_P (addr_rtx))
18173     return false;
18174 
18175   *base = addr_rtx;
18176   return true;
18177 }
18178 
18179 /* If the target storage locations of arguments MEM1 and MEM2 are
18180    adjacent, then return the argument that has the lower address.
18181    Otherwise, return NULL_RTX.  */
18182 
18183 static rtx
adjacent_mem_locations(rtx mem1,rtx mem2)18184 adjacent_mem_locations (rtx mem1, rtx mem2)
18185 {
18186   rtx reg1, reg2;
18187   HOST_WIDE_INT off1, size1, off2, size2;
18188 
18189   if (MEM_P (mem1)
18190       && MEM_P (mem2)
18191       && get_memref_parts (mem1, &reg1, &off1, &size1)
18192       && get_memref_parts (mem2, &reg2, &off2, &size2)
18193       && REGNO (reg1) == REGNO (reg2))
18194     {
18195       if (off1 + size1 == off2)
18196 	return mem1;
18197       else if (off2 + size2 == off1)
18198 	return mem2;
18199     }
18200 
18201   return NULL_RTX;
18202 }
18203 
18204 /* This function returns true if it can be determined that the two MEM
18205    locations overlap by at least 1 byte based on base reg/offset/size. */
18206 
18207 static bool
mem_locations_overlap(rtx mem1,rtx mem2)18208 mem_locations_overlap (rtx mem1, rtx mem2)
18209 {
18210   rtx reg1, reg2;
18211   HOST_WIDE_INT off1, size1, off2, size2;
18212 
18213   if (get_memref_parts (mem1, &reg1, &off1, &size1)
18214       && get_memref_parts (mem2, &reg2, &off2, &size2))
18215     return ((REGNO (reg1) == REGNO (reg2))
18216 	    && (((off1 <= off2) && (off1 + size1 > off2))
18217 		|| ((off2 <= off1) && (off2 + size2 > off1))));
18218 
18219   return false;
18220 }
18221 
18222 /* A C statement (sans semicolon) to update the integer scheduling
18223    priority INSN_PRIORITY (INSN). Increase the priority to execute the
18224    INSN earlier, reduce the priority to execute INSN later.  Do not
18225    define this macro if you do not need to adjust the scheduling
18226    priorities of insns.  */
18227 
18228 static int
rs6000_adjust_priority(rtx_insn * insn ATTRIBUTE_UNUSED,int priority)18229 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
18230 {
18231   rtx load_mem, str_mem;
18232   /* On machines (like the 750) which have asymmetric integer units,
18233      where one integer unit can do multiply and divides and the other
18234      can't, reduce the priority of multiply/divide so it is scheduled
18235      before other integer operations.  */
18236 
18237 #if 0
18238   if (! INSN_P (insn))
18239     return priority;
18240 
18241   if (GET_CODE (PATTERN (insn)) == USE)
18242     return priority;
18243 
18244   switch (rs6000_tune) {
18245   case PROCESSOR_PPC750:
18246     switch (get_attr_type (insn))
18247       {
18248       default:
18249 	break;
18250 
18251       case TYPE_MUL:
18252       case TYPE_DIV:
18253 	fprintf (stderr, "priority was %#x (%d) before adjustment\n",
18254 		 priority, priority);
18255 	if (priority >= 0 && priority < 0x01000000)
18256 	  priority >>= 3;
18257 	break;
18258       }
18259   }
18260 #endif
18261 
18262   if (insn_must_be_first_in_group (insn)
18263       && reload_completed
18264       && current_sched_info->sched_max_insns_priority
18265       && rs6000_sched_restricted_insns_priority)
18266     {
18267 
18268       /* Prioritize insns that can be dispatched only in the first
18269 	 dispatch slot.  */
18270       if (rs6000_sched_restricted_insns_priority == 1)
18271 	/* Attach highest priority to insn. This means that in
18272 	   haifa-sched.cc:ready_sort(), dispatch-slot restriction considerations
18273 	   precede 'priority' (critical path) considerations.  */
18274 	return current_sched_info->sched_max_insns_priority;
18275       else if (rs6000_sched_restricted_insns_priority == 2)
18276 	/* Increase priority of insn by a minimal amount. This means that in
18277 	   haifa-sched.cc:ready_sort(), only 'priority' (critical path)
18278 	   considerations precede dispatch-slot restriction considerations.  */
18279 	return (priority + 1);
18280     }
18281 
18282   if (rs6000_tune == PROCESSOR_POWER6
18283       && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
18284           || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
18285     /* Attach highest priority to insn if the scheduler has just issued two
18286        stores and this instruction is a load, or two loads and this instruction
18287        is a store. Power6 wants loads and stores scheduled alternately
18288        when possible */
18289     return current_sched_info->sched_max_insns_priority;
18290 
18291   return priority;
18292 }
18293 
18294 /* Return true if the instruction is nonpipelined on the Cell. */
18295 static bool
is_nonpipeline_insn(rtx_insn * insn)18296 is_nonpipeline_insn (rtx_insn *insn)
18297 {
18298   enum attr_type type;
18299   if (!insn || !NONDEBUG_INSN_P (insn)
18300       || GET_CODE (PATTERN (insn)) == USE
18301       || GET_CODE (PATTERN (insn)) == CLOBBER)
18302     return false;
18303 
18304   type = get_attr_type (insn);
18305   if (type == TYPE_MUL
18306       || type == TYPE_DIV
18307       || type == TYPE_SDIV
18308       || type == TYPE_DDIV
18309       || type == TYPE_SSQRT
18310       || type == TYPE_DSQRT
18311       || type == TYPE_MFCR
18312       || type == TYPE_MFCRF
18313       || type == TYPE_MFJMPR)
18314     {
18315       return true;
18316     }
18317   return false;
18318 }
18319 
18320 
18321 /* Return how many instructions the machine can issue per cycle.  */
18322 
18323 static int
rs6000_issue_rate(void)18324 rs6000_issue_rate (void)
18325 {
18326   /* Unless scheduling for register pressure, use issue rate of 1 for
18327      first scheduling pass to decrease degradation.  */
18328   if (!reload_completed && !flag_sched_pressure)
18329     return 1;
18330 
18331   switch (rs6000_tune) {
18332   case PROCESSOR_RS64A:
18333   case PROCESSOR_PPC601: /* ? */
18334   case PROCESSOR_PPC7450:
18335     return 3;
18336   case PROCESSOR_PPC440:
18337   case PROCESSOR_PPC603:
18338   case PROCESSOR_PPC750:
18339   case PROCESSOR_PPC7400:
18340   case PROCESSOR_PPC8540:
18341   case PROCESSOR_PPC8548:
18342   case PROCESSOR_CELL:
18343   case PROCESSOR_PPCE300C2:
18344   case PROCESSOR_PPCE300C3:
18345   case PROCESSOR_PPCE500MC:
18346   case PROCESSOR_PPCE500MC64:
18347   case PROCESSOR_PPCE5500:
18348   case PROCESSOR_PPCE6500:
18349   case PROCESSOR_TITAN:
18350     return 2;
18351   case PROCESSOR_PPC476:
18352   case PROCESSOR_PPC604:
18353   case PROCESSOR_PPC604e:
18354   case PROCESSOR_PPC620:
18355   case PROCESSOR_PPC630:
18356     return 4;
18357   case PROCESSOR_POWER4:
18358   case PROCESSOR_POWER5:
18359   case PROCESSOR_POWER6:
18360   case PROCESSOR_POWER7:
18361     return 5;
18362   case PROCESSOR_POWER8:
18363     return 7;
18364   case PROCESSOR_POWER9:
18365     return 6;
18366   case PROCESSOR_POWER10:
18367     return 8;
18368   default:
18369     return 1;
18370   }
18371 }
18372 
18373 /* Return how many instructions to look ahead for better insn
18374    scheduling.  */
18375 
18376 static int
rs6000_use_sched_lookahead(void)18377 rs6000_use_sched_lookahead (void)
18378 {
18379   switch (rs6000_tune)
18380     {
18381     case PROCESSOR_PPC8540:
18382     case PROCESSOR_PPC8548:
18383       return 4;
18384 
18385     case PROCESSOR_CELL:
18386       return (reload_completed ? 8 : 0);
18387 
18388     default:
18389       return 0;
18390     }
18391 }
18392 
18393 /* We are choosing insn from the ready queue.  Return zero if INSN can be
18394    chosen.  */
18395 static int
rs6000_use_sched_lookahead_guard(rtx_insn * insn,int ready_index)18396 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
18397 {
18398   if (ready_index == 0)
18399     return 0;
18400 
18401   if (rs6000_tune != PROCESSOR_CELL)
18402     return 0;
18403 
18404   gcc_assert (insn != NULL_RTX && INSN_P (insn));
18405 
18406   if (!reload_completed
18407       || is_nonpipeline_insn (insn)
18408       || is_microcoded_insn (insn))
18409     return 1;
18410 
18411   return 0;
18412 }
18413 
18414 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
18415    and return true.  */
18416 
18417 static bool
find_mem_ref(rtx pat,rtx * mem_ref)18418 find_mem_ref (rtx pat, rtx *mem_ref)
18419 {
18420   const char * fmt;
18421   int i, j;
18422 
18423   /* stack_tie does not produce any real memory traffic.  */
18424   if (tie_operand (pat, VOIDmode))
18425     return false;
18426 
18427   if (MEM_P (pat))
18428     {
18429       *mem_ref = pat;
18430       return true;
18431     }
18432 
18433   /* Recursively process the pattern.  */
18434   fmt = GET_RTX_FORMAT (GET_CODE (pat));
18435 
18436   for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
18437     {
18438       if (fmt[i] == 'e')
18439 	{
18440 	  if (find_mem_ref (XEXP (pat, i), mem_ref))
18441 	    return true;
18442 	}
18443       else if (fmt[i] == 'E')
18444 	for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
18445 	  {
18446 	    if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
18447 	      return true;
18448 	  }
18449     }
18450 
18451   return false;
18452 }
18453 
18454 /* Determine if PAT is a PATTERN of a load insn.  */
18455 
18456 static bool
is_load_insn1(rtx pat,rtx * load_mem)18457 is_load_insn1 (rtx pat, rtx *load_mem)
18458 {
18459   if (!pat || pat == NULL_RTX)
18460     return false;
18461 
18462   if (GET_CODE (pat) == SET)
18463     {
18464       if (REG_P (SET_DEST (pat)))
18465 	return find_mem_ref (SET_SRC (pat), load_mem);
18466       else
18467 	return false;
18468     }
18469 
18470   if (GET_CODE (pat) == PARALLEL)
18471     {
18472       int i;
18473 
18474       for (i = 0; i < XVECLEN (pat, 0); i++)
18475 	if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
18476 	  return true;
18477     }
18478 
18479   return false;
18480 }
18481 
18482 /* Determine if INSN loads from memory.  */
18483 
18484 static bool
is_load_insn(rtx insn,rtx * load_mem)18485 is_load_insn (rtx insn, rtx *load_mem)
18486 {
18487   if (!insn || !INSN_P (insn))
18488     return false;
18489 
18490   if (CALL_P (insn))
18491     return false;
18492 
18493   return is_load_insn1 (PATTERN (insn), load_mem);
18494 }
18495 
18496 /* Determine if PAT is a PATTERN of a store insn.  */
18497 
18498 static bool
is_store_insn1(rtx pat,rtx * str_mem)18499 is_store_insn1 (rtx pat, rtx *str_mem)
18500 {
18501   if (!pat || pat == NULL_RTX)
18502     return false;
18503 
18504   if (GET_CODE (pat) == SET)
18505     {
18506       if (REG_P (SET_SRC (pat)) || SUBREG_P (SET_SRC (pat)))
18507 	return find_mem_ref (SET_DEST (pat), str_mem);
18508       else
18509 	return false;
18510     }
18511 
18512   if (GET_CODE (pat) == PARALLEL)
18513     {
18514       int i;
18515 
18516       for (i = 0; i < XVECLEN (pat, 0); i++)
18517 	if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
18518 	  return true;
18519     }
18520 
18521   return false;
18522 }
18523 
18524 /* Determine if INSN stores to memory.  */
18525 
18526 static bool
is_store_insn(rtx insn,rtx * str_mem)18527 is_store_insn (rtx insn, rtx *str_mem)
18528 {
18529   if (!insn || !INSN_P (insn))
18530     return false;
18531 
18532   return is_store_insn1 (PATTERN (insn), str_mem);
18533 }
18534 
18535 /* Return whether TYPE is a Power9 pairable vector instruction type.  */
18536 
18537 static bool
is_power9_pairable_vec_type(enum attr_type type)18538 is_power9_pairable_vec_type (enum attr_type type)
18539 {
18540   switch (type)
18541     {
18542       case TYPE_VECSIMPLE:
18543       case TYPE_VECCOMPLEX:
18544       case TYPE_VECDIV:
18545       case TYPE_VECCMP:
18546       case TYPE_VECPERM:
18547       case TYPE_VECFLOAT:
18548       case TYPE_VECFDIV:
18549       case TYPE_VECDOUBLE:
18550 	return true;
18551       default:
18552 	break;
18553     }
18554   return false;
18555 }
18556 
18557 /* Returns whether the dependence between INSN and NEXT is considered
18558    costly by the given target.  */
18559 
18560 static bool
rs6000_is_costly_dependence(dep_t dep,int cost,int distance)18561 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
18562 {
18563   rtx insn;
18564   rtx next;
18565   rtx load_mem, str_mem;
18566 
18567   /* If the flag is not enabled - no dependence is considered costly;
18568      allow all dependent insns in the same group.
18569      This is the most aggressive option.  */
18570   if (rs6000_sched_costly_dep == no_dep_costly)
18571     return false;
18572 
18573   /* If the flag is set to 1 - a dependence is always considered costly;
18574      do not allow dependent instructions in the same group.
18575      This is the most conservative option.  */
18576   if (rs6000_sched_costly_dep == all_deps_costly)
18577     return true;
18578 
18579   insn = DEP_PRO (dep);
18580   next = DEP_CON (dep);
18581 
18582   if (rs6000_sched_costly_dep == store_to_load_dep_costly
18583       && is_load_insn (next, &load_mem)
18584       && is_store_insn (insn, &str_mem))
18585     /* Prevent load after store in the same group.  */
18586     return true;
18587 
18588   if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
18589       && is_load_insn (next, &load_mem)
18590       && is_store_insn (insn, &str_mem)
18591       && DEP_TYPE (dep) == REG_DEP_TRUE
18592       && mem_locations_overlap(str_mem, load_mem))
18593      /* Prevent load after store in the same group if it is a true
18594 	dependence.  */
18595      return true;
18596 
18597   /* The flag is set to X; dependences with latency >= X are considered costly,
18598      and will not be scheduled in the same group.  */
18599   if (rs6000_sched_costly_dep <= max_dep_latency
18600       && ((cost - distance) >= (int)rs6000_sched_costly_dep))
18601     return true;
18602 
18603   return false;
18604 }
18605 
18606 /* Return the next insn after INSN that is found before TAIL is reached,
18607    skipping any "non-active" insns - insns that will not actually occupy
18608    an issue slot.  Return NULL_RTX if such an insn is not found.  */
18609 
18610 static rtx_insn *
get_next_active_insn(rtx_insn * insn,rtx_insn * tail)18611 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
18612 {
18613   if (insn == NULL_RTX || insn == tail)
18614     return NULL;
18615 
18616   while (1)
18617     {
18618       insn = NEXT_INSN (insn);
18619       if (insn == NULL_RTX || insn == tail)
18620 	return NULL;
18621 
18622       if (CALL_P (insn)
18623 	  || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
18624 	  || (NONJUMP_INSN_P (insn)
18625 	      && GET_CODE (PATTERN (insn)) != USE
18626 	      && GET_CODE (PATTERN (insn)) != CLOBBER
18627 	      && INSN_CODE (insn) != CODE_FOR_stack_tie))
18628 	break;
18629     }
18630   return insn;
18631 }
18632 
18633 /* Move instruction at POS to the end of the READY list.  */
18634 
18635 static void
move_to_end_of_ready(rtx_insn ** ready,int pos,int lastpos)18636 move_to_end_of_ready (rtx_insn **ready, int pos, int lastpos)
18637 {
18638   rtx_insn *tmp;
18639   int i;
18640 
18641   tmp = ready[pos];
18642   for (i = pos; i < lastpos; i++)
18643     ready[i] = ready[i + 1];
18644   ready[lastpos] = tmp;
18645 }
18646 
18647 /* Do Power6 specific sched_reorder2 reordering of ready list.  */
18648 
18649 static int
power6_sched_reorder2(rtx_insn ** ready,int lastpos)18650 power6_sched_reorder2 (rtx_insn **ready, int lastpos)
18651 {
18652   /* For Power6, we need to handle some special cases to try and keep the
18653      store queue from overflowing and triggering expensive flushes.
18654 
18655      This code monitors how load and store instructions are being issued
18656      and skews the ready list one way or the other to increase the likelihood
18657      that a desired instruction is issued at the proper time.
18658 
18659      A couple of things are done.  First, we maintain a "load_store_pendulum"
18660      to track the current state of load/store issue.
18661 
18662        - If the pendulum is at zero, then no loads or stores have been
18663 	 issued in the current cycle so we do nothing.
18664 
18665        - If the pendulum is 1, then a single load has been issued in this
18666 	 cycle and we attempt to locate another load in the ready list to
18667 	 issue with it.
18668 
18669        - If the pendulum is -2, then two stores have already been
18670 	 issued in this cycle, so we increase the priority of the first load
18671 	 in the ready list to increase it's likelihood of being chosen first
18672 	 in the next cycle.
18673 
18674        - If the pendulum is -1, then a single store has been issued in this
18675 	 cycle and we attempt to locate another store in the ready list to
18676 	 issue with it, preferring a store to an adjacent memory location to
18677 	 facilitate store pairing in the store queue.
18678 
18679        - If the pendulum is 2, then two loads have already been
18680 	 issued in this cycle, so we increase the priority of the first store
18681 	 in the ready list to increase it's likelihood of being chosen first
18682 	 in the next cycle.
18683 
18684        - If the pendulum < -2 or > 2, then do nothing.
18685 
18686        Note: This code covers the most common scenarios.  There exist non
18687 	     load/store instructions which make use of the LSU and which
18688 	     would need to be accounted for to strictly model the behavior
18689 	     of the machine.  Those instructions are currently unaccounted
18690 	     for to help minimize compile time overhead of this code.
18691    */
18692   int pos;
18693   rtx load_mem, str_mem;
18694 
18695   if (is_store_insn (last_scheduled_insn, &str_mem))
18696     /* Issuing a store, swing the load_store_pendulum to the left */
18697     load_store_pendulum--;
18698   else if (is_load_insn (last_scheduled_insn, &load_mem))
18699     /* Issuing a load, swing the load_store_pendulum to the right */
18700     load_store_pendulum++;
18701   else
18702     return cached_can_issue_more;
18703 
18704   /* If the pendulum is balanced, or there is only one instruction on
18705      the ready list, then all is well, so return. */
18706   if ((load_store_pendulum == 0) || (lastpos <= 0))
18707     return cached_can_issue_more;
18708 
18709   if (load_store_pendulum == 1)
18710     {
18711       /* A load has been issued in this cycle.  Scan the ready list
18712 	 for another load to issue with it */
18713       pos = lastpos;
18714 
18715       while (pos >= 0)
18716 	{
18717 	  if (is_load_insn (ready[pos], &load_mem))
18718 	    {
18719 	      /* Found a load.  Move it to the head of the ready list,
18720 		 and adjust it's priority so that it is more likely to
18721 		 stay there */
18722 	      move_to_end_of_ready (ready, pos, lastpos);
18723 
18724 	      if (!sel_sched_p ()
18725 		  && INSN_PRIORITY_KNOWN (ready[lastpos]))
18726 		INSN_PRIORITY (ready[lastpos])++;
18727 	      break;
18728 	    }
18729 	  pos--;
18730 	}
18731     }
18732   else if (load_store_pendulum == -2)
18733     {
18734       /* Two stores have been issued in this cycle.  Increase the
18735 	 priority of the first load in the ready list to favor it for
18736 	 issuing in the next cycle. */
18737       pos = lastpos;
18738 
18739       while (pos >= 0)
18740 	{
18741 	  if (is_load_insn (ready[pos], &load_mem)
18742 	      && !sel_sched_p ()
18743 	      && INSN_PRIORITY_KNOWN (ready[pos]))
18744 	    {
18745 	      INSN_PRIORITY (ready[pos])++;
18746 
18747 	      /* Adjust the pendulum to account for the fact that a load
18748 		 was found and increased in priority.  This is to prevent
18749 		 increasing the priority of multiple loads */
18750 	      load_store_pendulum--;
18751 
18752 	      break;
18753 	    }
18754 	  pos--;
18755 	}
18756     }
18757   else if (load_store_pendulum == -1)
18758     {
18759       /* A store has been issued in this cycle.  Scan the ready list for
18760 	 another store to issue with it, preferring a store to an adjacent
18761 	 memory location */
18762       int first_store_pos = -1;
18763 
18764       pos = lastpos;
18765 
18766       while (pos >= 0)
18767 	{
18768 	  if (is_store_insn (ready[pos], &str_mem))
18769 	    {
18770 	      rtx str_mem2;
18771 	      /* Maintain the index of the first store found on the
18772 		 list */
18773 	      if (first_store_pos == -1)
18774 		first_store_pos = pos;
18775 
18776 	      if (is_store_insn (last_scheduled_insn, &str_mem2)
18777 		  && adjacent_mem_locations (str_mem, str_mem2))
18778 		{
18779 		  /* Found an adjacent store.  Move it to the head of the
18780 		     ready list, and adjust it's priority so that it is
18781 		     more likely to stay there */
18782 		  move_to_end_of_ready (ready, pos, lastpos);
18783 
18784 		  if (!sel_sched_p ()
18785 		      && INSN_PRIORITY_KNOWN (ready[lastpos]))
18786 		    INSN_PRIORITY (ready[lastpos])++;
18787 
18788 		  first_store_pos = -1;
18789 
18790 		  break;
18791 		};
18792 	    }
18793 	  pos--;
18794 	}
18795 
18796       if (first_store_pos >= 0)
18797 	{
18798 	  /* An adjacent store wasn't found, but a non-adjacent store was,
18799 	     so move the non-adjacent store to the front of the ready
18800 	     list, and adjust its priority so that it is more likely to
18801 	     stay there. */
18802 	  move_to_end_of_ready (ready, first_store_pos, lastpos);
18803 	  if (!sel_sched_p ()
18804 	      && INSN_PRIORITY_KNOWN (ready[lastpos]))
18805 	    INSN_PRIORITY (ready[lastpos])++;
18806 	}
18807     }
18808   else if (load_store_pendulum == 2)
18809     {
18810       /* Two loads have been issued in this cycle.  Increase the priority
18811 	 of the first store in the ready list to favor it for issuing in
18812 	 the next cycle. */
18813       pos = lastpos;
18814 
18815       while (pos >= 0)
18816 	{
18817 	  if (is_store_insn (ready[pos], &str_mem)
18818 	      && !sel_sched_p ()
18819 	      && INSN_PRIORITY_KNOWN (ready[pos]))
18820 	    {
18821 	      INSN_PRIORITY (ready[pos])++;
18822 
18823 	      /* Adjust the pendulum to account for the fact that a store
18824 		 was found and increased in priority.  This is to prevent
18825 		 increasing the priority of multiple stores */
18826 	      load_store_pendulum++;
18827 
18828 	      break;
18829 	    }
18830 	  pos--;
18831 	}
18832     }
18833 
18834   return cached_can_issue_more;
18835 }
18836 
18837 /* Do Power9 specific sched_reorder2 reordering of ready list.  */
18838 
18839 static int
power9_sched_reorder2(rtx_insn ** ready,int lastpos)18840 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
18841 {
18842   int pos;
18843   enum attr_type type, type2;
18844 
18845   type = get_attr_type (last_scheduled_insn);
18846 
18847   /* Try to issue fixed point divides back-to-back in pairs so they will be
18848      routed to separate execution units and execute in parallel.  */
18849   if (type == TYPE_DIV && divide_cnt == 0)
18850     {
18851       /* First divide has been scheduled.  */
18852       divide_cnt = 1;
18853 
18854       /* Scan the ready list looking for another divide, if found move it
18855 	 to the end of the list so it is chosen next.  */
18856       pos = lastpos;
18857       while (pos >= 0)
18858 	{
18859 	  if (recog_memoized (ready[pos]) >= 0
18860 	      && get_attr_type (ready[pos]) == TYPE_DIV)
18861 	    {
18862 	      move_to_end_of_ready (ready, pos, lastpos);
18863 	      break;
18864 	    }
18865 	  pos--;
18866 	}
18867     }
18868   else
18869     {
18870       /* Last insn was the 2nd divide or not a divide, reset the counter.  */
18871       divide_cnt = 0;
18872 
18873       /* The best dispatch throughput for vector and vector load insns can be
18874 	 achieved by interleaving a vector and vector load such that they'll
18875 	 dispatch to the same superslice. If this pairing cannot be achieved
18876 	 then it is best to pair vector insns together and vector load insns
18877 	 together.
18878 
18879 	 To aid in this pairing, vec_pairing maintains the current state with
18880 	 the following values:
18881 
18882 	     0  : Initial state, no vecload/vector pairing has been started.
18883 
18884 	     1  : A vecload or vector insn has been issued and a candidate for
18885 		  pairing has been found and moved to the end of the ready
18886 		  list.  */
18887       if (type == TYPE_VECLOAD)
18888 	{
18889 	  /* Issued a vecload.  */
18890 	  if (vec_pairing == 0)
18891 	    {
18892 	      int vecload_pos = -1;
18893 	      /* We issued a single vecload, look for a vector insn to pair it
18894 		 with.  If one isn't found, try to pair another vecload.  */
18895 	      pos = lastpos;
18896 	      while (pos >= 0)
18897 		{
18898 		  if (recog_memoized (ready[pos]) >= 0)
18899 		    {
18900 		      type2 = get_attr_type (ready[pos]);
18901 		      if (is_power9_pairable_vec_type (type2))
18902 			{
18903 			  /* Found a vector insn to pair with, move it to the
18904 			     end of the ready list so it is scheduled next.  */
18905 			  move_to_end_of_ready (ready, pos, lastpos);
18906 			  vec_pairing = 1;
18907 			  return cached_can_issue_more;
18908 			}
18909 		      else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
18910 			/* Remember position of first vecload seen.  */
18911 			vecload_pos = pos;
18912 		    }
18913 		  pos--;
18914 		}
18915 	      if (vecload_pos >= 0)
18916 		{
18917 		  /* Didn't find a vector to pair with but did find a vecload,
18918 		     move it to the end of the ready list.  */
18919 		  move_to_end_of_ready (ready, vecload_pos, lastpos);
18920 		  vec_pairing = 1;
18921 		  return cached_can_issue_more;
18922 		}
18923 	    }
18924 	}
18925       else if (is_power9_pairable_vec_type (type))
18926 	{
18927 	  /* Issued a vector operation.  */
18928 	  if (vec_pairing == 0)
18929 	    {
18930 	      int vec_pos = -1;
18931 	      /* We issued a single vector insn, look for a vecload to pair it
18932 		 with.  If one isn't found, try to pair another vector.  */
18933 	      pos = lastpos;
18934 	      while (pos >= 0)
18935 		{
18936 		  if (recog_memoized (ready[pos]) >= 0)
18937 		    {
18938 		      type2 = get_attr_type (ready[pos]);
18939 		      if (type2 == TYPE_VECLOAD)
18940 			{
18941 			  /* Found a vecload insn to pair with, move it to the
18942 			     end of the ready list so it is scheduled next.  */
18943 			  move_to_end_of_ready (ready, pos, lastpos);
18944 			  vec_pairing = 1;
18945 			  return cached_can_issue_more;
18946 			}
18947 		      else if (is_power9_pairable_vec_type (type2)
18948 			       && vec_pos == -1)
18949 			/* Remember position of first vector insn seen.  */
18950 			vec_pos = pos;
18951 		    }
18952 		  pos--;
18953 		}
18954 	      if (vec_pos >= 0)
18955 		{
18956 		  /* Didn't find a vecload to pair with but did find a vector
18957 		     insn, move it to the end of the ready list.  */
18958 		  move_to_end_of_ready (ready, vec_pos, lastpos);
18959 		  vec_pairing = 1;
18960 		  return cached_can_issue_more;
18961 		}
18962 	    }
18963 	}
18964 
18965       /* We've either finished a vec/vecload pair, couldn't find an insn to
18966 	 continue the current pair, or the last insn had nothing to do with
18967 	 with pairing.  In any case, reset the state.  */
18968       vec_pairing = 0;
18969     }
18970 
18971   return cached_can_issue_more;
18972 }
18973 
18974 /* Determine if INSN is a store to memory that can be fused with a similar
18975    adjacent store.  */
18976 
18977 static bool
is_fusable_store(rtx_insn * insn,rtx * str_mem)18978 is_fusable_store (rtx_insn *insn, rtx *str_mem)
18979 {
18980   /* Insn must be a non-prefixed base+disp form store.  */
18981   if (is_store_insn (insn, str_mem)
18982       && get_attr_prefixed (insn) == PREFIXED_NO
18983       && get_attr_update (insn) == UPDATE_NO
18984       && get_attr_indexed (insn) == INDEXED_NO)
18985     {
18986       /* Further restrictions by mode and size.  */
18987       if (!MEM_SIZE_KNOWN_P (*str_mem))
18988 	return false;
18989 
18990       machine_mode mode = GET_MODE (*str_mem);
18991       HOST_WIDE_INT size = MEM_SIZE (*str_mem);
18992 
18993       if (INTEGRAL_MODE_P (mode))
18994 	/* Must be word or dword size.  */
18995 	return (size == 4 || size == 8);
18996       else if (FLOAT_MODE_P (mode))
18997 	/* Must be dword size.  */
18998 	return (size == 8);
18999     }
19000 
19001   return false;
19002 }
19003 
19004 /* Do Power10 specific reordering of the ready list.  */
19005 
19006 static int
power10_sched_reorder(rtx_insn ** ready,int lastpos)19007 power10_sched_reorder (rtx_insn **ready, int lastpos)
19008 {
19009   rtx mem1;
19010 
19011   /* Do store fusion during sched2 only.  */
19012   if (!reload_completed)
19013     return cached_can_issue_more;
19014 
19015   /* If the prior insn finished off a store fusion pair then simply
19016      reset the counter and return, nothing more to do.  */
19017   if (load_store_pendulum != 0)
19018     {
19019       load_store_pendulum = 0;
19020       return cached_can_issue_more;
19021     }
19022 
19023   /* Try to pair certain store insns to adjacent memory locations
19024      so that the hardware will fuse them to a single operation.  */
19025   if (TARGET_P10_FUSION && is_fusable_store (last_scheduled_insn, &mem1))
19026     {
19027 
19028       /* A fusable store was just scheduled.  Scan the ready list for another
19029 	 store that it can fuse with.  */
19030       int pos = lastpos;
19031       while (pos >= 0)
19032 	{
19033 	  rtx mem2;
19034 	  /* GPR stores can be ascending or descending offsets, FPR/VSR stores
19035 	     must be ascending only.  */
19036 	  if (is_fusable_store (ready[pos], &mem2)
19037 	      && ((INTEGRAL_MODE_P (GET_MODE (mem1))
19038 		   && adjacent_mem_locations (mem1, mem2))
19039 		  || (FLOAT_MODE_P (GET_MODE (mem1))
19040 		   && (adjacent_mem_locations (mem1, mem2) == mem1))))
19041 	    {
19042 	      /* Found a fusable store.  Move it to the end of the ready list
19043 		 so it is scheduled next.  */
19044 	      move_to_end_of_ready (ready, pos, lastpos);
19045 
19046 	      load_store_pendulum = -1;
19047 	      break;
19048 	    }
19049 	  pos--;
19050 	}
19051     }
19052 
19053   return cached_can_issue_more;
19054 }
19055 
19056 /* We are about to begin issuing insns for this clock cycle. */
19057 
19058 static int
rs6000_sched_reorder(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose,rtx_insn ** ready ATTRIBUTE_UNUSED,int * pn_ready ATTRIBUTE_UNUSED,int clock_var ATTRIBUTE_UNUSED)19059 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
19060                         rtx_insn **ready ATTRIBUTE_UNUSED,
19061                         int *pn_ready ATTRIBUTE_UNUSED,
19062 		        int clock_var ATTRIBUTE_UNUSED)
19063 {
19064   int n_ready = *pn_ready;
19065 
19066   if (sched_verbose)
19067     fprintf (dump, "// rs6000_sched_reorder :\n");
19068 
19069   /* Reorder the ready list, if the second to last ready insn
19070      is a nonepipeline insn.  */
19071   if (rs6000_tune == PROCESSOR_CELL && n_ready > 1)
19072   {
19073     if (is_nonpipeline_insn (ready[n_ready - 1])
19074         && (recog_memoized (ready[n_ready - 2]) > 0))
19075       /* Simply swap first two insns.  */
19076       std::swap (ready[n_ready - 1], ready[n_ready - 2]);
19077   }
19078 
19079   if (rs6000_tune == PROCESSOR_POWER6)
19080     load_store_pendulum = 0;
19081 
19082   /* Do Power10 dependent reordering.  */
19083   if (rs6000_tune == PROCESSOR_POWER10 && last_scheduled_insn)
19084     power10_sched_reorder (ready, n_ready - 1);
19085 
19086   return rs6000_issue_rate ();
19087 }
19088 
19089 /* Like rs6000_sched_reorder, but called after issuing each insn.  */
19090 
19091 static int
rs6000_sched_reorder2(FILE * dump,int sched_verbose,rtx_insn ** ready,int * pn_ready,int clock_var ATTRIBUTE_UNUSED)19092 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
19093 		         int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
19094 {
19095   if (sched_verbose)
19096     fprintf (dump, "// rs6000_sched_reorder2 :\n");
19097 
19098   /* Do Power6 dependent reordering if necessary.  */
19099   if (rs6000_tune == PROCESSOR_POWER6 && last_scheduled_insn)
19100     return power6_sched_reorder2 (ready, *pn_ready - 1);
19101 
19102   /* Do Power9 dependent reordering if necessary.  */
19103   if (rs6000_tune == PROCESSOR_POWER9 && last_scheduled_insn
19104       && recog_memoized (last_scheduled_insn) >= 0)
19105     return power9_sched_reorder2 (ready, *pn_ready - 1);
19106 
19107   /* Do Power10 dependent reordering.  */
19108   if (rs6000_tune == PROCESSOR_POWER10 && last_scheduled_insn)
19109     return power10_sched_reorder (ready, *pn_ready - 1);
19110 
19111   return cached_can_issue_more;
19112 }
19113 
19114 /* Return whether the presence of INSN causes a dispatch group termination
19115    of group WHICH_GROUP.
19116 
19117    If WHICH_GROUP == current_group, this function will return true if INSN
19118    causes the termination of the current group (i.e, the dispatch group to
19119    which INSN belongs). This means that INSN will be the last insn in the
19120    group it belongs to.
19121 
19122    If WHICH_GROUP == previous_group, this function will return true if INSN
19123    causes the termination of the previous group (i.e, the dispatch group that
19124    precedes the group to which INSN belongs).  This means that INSN will be
19125    the first insn in the group it belongs to).  */
19126 
19127 static bool
insn_terminates_group_p(rtx_insn * insn,enum group_termination which_group)19128 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
19129 {
19130   bool first, last;
19131 
19132   if (! insn)
19133     return false;
19134 
19135   first = insn_must_be_first_in_group (insn);
19136   last = insn_must_be_last_in_group (insn);
19137 
19138   if (first && last)
19139     return true;
19140 
19141   if (which_group == current_group)
19142     return last;
19143   else if (which_group == previous_group)
19144     return first;
19145 
19146   return false;
19147 }
19148 
19149 
19150 static bool
insn_must_be_first_in_group(rtx_insn * insn)19151 insn_must_be_first_in_group (rtx_insn *insn)
19152 {
19153   enum attr_type type;
19154 
19155   if (!insn
19156       || NOTE_P (insn)
19157       || DEBUG_INSN_P (insn)
19158       || GET_CODE (PATTERN (insn)) == USE
19159       || GET_CODE (PATTERN (insn)) == CLOBBER)
19160     return false;
19161 
19162   switch (rs6000_tune)
19163     {
19164     case PROCESSOR_POWER5:
19165       if (is_cracked_insn (insn))
19166         return true;
19167       /* FALLTHRU */
19168     case PROCESSOR_POWER4:
19169       if (is_microcoded_insn (insn))
19170         return true;
19171 
19172       if (!rs6000_sched_groups)
19173         return false;
19174 
19175       type = get_attr_type (insn);
19176 
19177       switch (type)
19178         {
19179         case TYPE_MFCR:
19180         case TYPE_MFCRF:
19181         case TYPE_MTCR:
19182         case TYPE_CR_LOGICAL:
19183         case TYPE_MTJMPR:
19184         case TYPE_MFJMPR:
19185         case TYPE_DIV:
19186         case TYPE_LOAD_L:
19187         case TYPE_STORE_C:
19188         case TYPE_ISYNC:
19189         case TYPE_SYNC:
19190           return true;
19191         default:
19192           break;
19193         }
19194       break;
19195     case PROCESSOR_POWER6:
19196       type = get_attr_type (insn);
19197 
19198       switch (type)
19199         {
19200         case TYPE_EXTS:
19201         case TYPE_CNTLZ:
19202         case TYPE_TRAP:
19203         case TYPE_MUL:
19204         case TYPE_INSERT:
19205         case TYPE_FPCOMPARE:
19206         case TYPE_MFCR:
19207         case TYPE_MTCR:
19208         case TYPE_MFJMPR:
19209         case TYPE_MTJMPR:
19210         case TYPE_ISYNC:
19211         case TYPE_SYNC:
19212         case TYPE_LOAD_L:
19213         case TYPE_STORE_C:
19214           return true;
19215         case TYPE_SHIFT:
19216           if (get_attr_dot (insn) == DOT_NO
19217               || get_attr_var_shift (insn) == VAR_SHIFT_NO)
19218             return true;
19219           else
19220             break;
19221         case TYPE_DIV:
19222           if (get_attr_size (insn) == SIZE_32)
19223             return true;
19224           else
19225             break;
19226         case TYPE_LOAD:
19227         case TYPE_STORE:
19228         case TYPE_FPLOAD:
19229         case TYPE_FPSTORE:
19230           if (get_attr_update (insn) == UPDATE_YES)
19231             return true;
19232           else
19233             break;
19234         default:
19235           break;
19236         }
19237       break;
19238     case PROCESSOR_POWER7:
19239       type = get_attr_type (insn);
19240 
19241       switch (type)
19242         {
19243         case TYPE_CR_LOGICAL:
19244         case TYPE_MFCR:
19245         case TYPE_MFCRF:
19246         case TYPE_MTCR:
19247         case TYPE_DIV:
19248         case TYPE_ISYNC:
19249         case TYPE_LOAD_L:
19250         case TYPE_STORE_C:
19251         case TYPE_MFJMPR:
19252         case TYPE_MTJMPR:
19253           return true;
19254         case TYPE_MUL:
19255         case TYPE_SHIFT:
19256         case TYPE_EXTS:
19257           if (get_attr_dot (insn) == DOT_YES)
19258             return true;
19259           else
19260             break;
19261         case TYPE_LOAD:
19262           if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19263               || get_attr_update (insn) == UPDATE_YES)
19264             return true;
19265           else
19266             break;
19267         case TYPE_STORE:
19268         case TYPE_FPLOAD:
19269         case TYPE_FPSTORE:
19270           if (get_attr_update (insn) == UPDATE_YES)
19271             return true;
19272           else
19273             break;
19274         default:
19275           break;
19276         }
19277       break;
19278     case PROCESSOR_POWER8:
19279       type = get_attr_type (insn);
19280 
19281       switch (type)
19282         {
19283         case TYPE_CR_LOGICAL:
19284         case TYPE_MFCR:
19285         case TYPE_MFCRF:
19286         case TYPE_MTCR:
19287         case TYPE_SYNC:
19288         case TYPE_ISYNC:
19289         case TYPE_LOAD_L:
19290         case TYPE_STORE_C:
19291         case TYPE_VECSTORE:
19292         case TYPE_MFJMPR:
19293         case TYPE_MTJMPR:
19294           return true;
19295         case TYPE_SHIFT:
19296         case TYPE_EXTS:
19297         case TYPE_MUL:
19298           if (get_attr_dot (insn) == DOT_YES)
19299             return true;
19300           else
19301             break;
19302         case TYPE_LOAD:
19303           if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19304               || get_attr_update (insn) == UPDATE_YES)
19305             return true;
19306           else
19307             break;
19308         case TYPE_STORE:
19309           if (get_attr_update (insn) == UPDATE_YES
19310               && get_attr_indexed (insn) == INDEXED_YES)
19311             return true;
19312           else
19313             break;
19314         default:
19315           break;
19316         }
19317       break;
19318     default:
19319       break;
19320     }
19321 
19322   return false;
19323 }
19324 
19325 static bool
insn_must_be_last_in_group(rtx_insn * insn)19326 insn_must_be_last_in_group (rtx_insn *insn)
19327 {
19328   enum attr_type type;
19329 
19330   if (!insn
19331       || NOTE_P (insn)
19332       || DEBUG_INSN_P (insn)
19333       || GET_CODE (PATTERN (insn)) == USE
19334       || GET_CODE (PATTERN (insn)) == CLOBBER)
19335     return false;
19336 
19337   switch (rs6000_tune) {
19338   case PROCESSOR_POWER4:
19339   case PROCESSOR_POWER5:
19340     if (is_microcoded_insn (insn))
19341       return true;
19342 
19343     if (is_branch_slot_insn (insn))
19344       return true;
19345 
19346     break;
19347   case PROCESSOR_POWER6:
19348     type = get_attr_type (insn);
19349 
19350     switch (type)
19351       {
19352       case TYPE_EXTS:
19353       case TYPE_CNTLZ:
19354       case TYPE_TRAP:
19355       case TYPE_MUL:
19356       case TYPE_FPCOMPARE:
19357       case TYPE_MFCR:
19358       case TYPE_MTCR:
19359       case TYPE_MFJMPR:
19360       case TYPE_MTJMPR:
19361       case TYPE_ISYNC:
19362       case TYPE_SYNC:
19363       case TYPE_LOAD_L:
19364       case TYPE_STORE_C:
19365         return true;
19366       case TYPE_SHIFT:
19367         if (get_attr_dot (insn) == DOT_NO
19368             || get_attr_var_shift (insn) == VAR_SHIFT_NO)
19369           return true;
19370         else
19371           break;
19372       case TYPE_DIV:
19373         if (get_attr_size (insn) == SIZE_32)
19374           return true;
19375         else
19376           break;
19377       default:
19378         break;
19379     }
19380     break;
19381   case PROCESSOR_POWER7:
19382     type = get_attr_type (insn);
19383 
19384     switch (type)
19385       {
19386       case TYPE_ISYNC:
19387       case TYPE_SYNC:
19388       case TYPE_LOAD_L:
19389       case TYPE_STORE_C:
19390         return true;
19391       case TYPE_LOAD:
19392         if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19393             && get_attr_update (insn) == UPDATE_YES)
19394           return true;
19395         else
19396           break;
19397       case TYPE_STORE:
19398         if (get_attr_update (insn) == UPDATE_YES
19399             && get_attr_indexed (insn) == INDEXED_YES)
19400           return true;
19401         else
19402           break;
19403       default:
19404         break;
19405     }
19406     break;
19407   case PROCESSOR_POWER8:
19408     type = get_attr_type (insn);
19409 
19410     switch (type)
19411       {
19412       case TYPE_MFCR:
19413       case TYPE_MTCR:
19414       case TYPE_ISYNC:
19415       case TYPE_SYNC:
19416       case TYPE_LOAD_L:
19417       case TYPE_STORE_C:
19418         return true;
19419       case TYPE_LOAD:
19420         if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19421             && get_attr_update (insn) == UPDATE_YES)
19422           return true;
19423         else
19424           break;
19425       case TYPE_STORE:
19426         if (get_attr_update (insn) == UPDATE_YES
19427             && get_attr_indexed (insn) == INDEXED_YES)
19428           return true;
19429         else
19430           break;
19431       default:
19432         break;
19433     }
19434     break;
19435   default:
19436     break;
19437   }
19438 
19439   return false;
19440 }
19441 
19442 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
19443    dispatch group) from the insns in GROUP_INSNS.  Return false otherwise.  */
19444 
19445 static bool
is_costly_group(rtx * group_insns,rtx next_insn)19446 is_costly_group (rtx *group_insns, rtx next_insn)
19447 {
19448   int i;
19449   int issue_rate = rs6000_issue_rate ();
19450 
19451   for (i = 0; i < issue_rate; i++)
19452     {
19453       sd_iterator_def sd_it;
19454       dep_t dep;
19455       rtx insn = group_insns[i];
19456 
19457       if (!insn)
19458 	continue;
19459 
19460       FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
19461 	{
19462 	  rtx next = DEP_CON (dep);
19463 
19464 	  if (next == next_insn
19465 	      && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
19466 	    return true;
19467 	}
19468     }
19469 
19470   return false;
19471 }
19472 
19473 /* Utility of the function redefine_groups.
19474    Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
19475    in the same dispatch group.  If so, insert nops before NEXT_INSN, in order
19476    to keep it "far" (in a separate group) from GROUP_INSNS, following
19477    one of the following schemes, depending on the value of the flag
19478    -minsert_sched_nops = X:
19479    (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
19480        in order to force NEXT_INSN into a separate group.
19481    (2) X < sched_finish_regroup_exact: insert exactly X nops.
19482    GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
19483    insertion (has a group just ended, how many vacant issue slots remain in the
19484    last group, and how many dispatch groups were encountered so far).  */
19485 
19486 static int
force_new_group(int sched_verbose,FILE * dump,rtx * group_insns,rtx_insn * next_insn,bool * group_end,int can_issue_more,int * group_count)19487 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
19488 		 rtx_insn *next_insn, bool *group_end, int can_issue_more,
19489 		 int *group_count)
19490 {
19491   rtx nop;
19492   bool force;
19493   int issue_rate = rs6000_issue_rate ();
19494   bool end = *group_end;
19495   int i;
19496 
19497   if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
19498     return can_issue_more;
19499 
19500   if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
19501     return can_issue_more;
19502 
19503   force = is_costly_group (group_insns, next_insn);
19504   if (!force)
19505     return can_issue_more;
19506 
19507   if (sched_verbose > 6)
19508     fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
19509 	     *group_count ,can_issue_more);
19510 
19511   if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
19512     {
19513       if (*group_end)
19514 	can_issue_more = 0;
19515 
19516       /* Since only a branch can be issued in the last issue_slot, it is
19517 	 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
19518 	 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
19519 	 in this case the last nop will start a new group and the branch
19520 	 will be forced to the new group.  */
19521       if (can_issue_more && !is_branch_slot_insn (next_insn))
19522 	can_issue_more--;
19523 
19524       /* Do we have a special group ending nop? */
19525       if (rs6000_tune == PROCESSOR_POWER6 || rs6000_tune == PROCESSOR_POWER7
19526 	  || rs6000_tune == PROCESSOR_POWER8)
19527 	{
19528 	  nop = gen_group_ending_nop ();
19529 	  emit_insn_before (nop, next_insn);
19530 	  can_issue_more = 0;
19531 	}
19532       else
19533 	while (can_issue_more > 0)
19534 	  {
19535 	    nop = gen_nop ();
19536 	    emit_insn_before (nop, next_insn);
19537 	    can_issue_more--;
19538 	  }
19539 
19540       *group_end = true;
19541       return 0;
19542     }
19543 
19544   if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
19545     {
19546       int n_nops = rs6000_sched_insert_nops;
19547 
19548       /* Nops can't be issued from the branch slot, so the effective
19549 	 issue_rate for nops is 'issue_rate - 1'.  */
19550       if (can_issue_more == 0)
19551 	can_issue_more = issue_rate;
19552       can_issue_more--;
19553       if (can_issue_more == 0)
19554 	{
19555 	  can_issue_more = issue_rate - 1;
19556 	  (*group_count)++;
19557 	  end = true;
19558 	  for (i = 0; i < issue_rate; i++)
19559 	    {
19560 	      group_insns[i] = 0;
19561 	    }
19562 	}
19563 
19564       while (n_nops > 0)
19565 	{
19566 	  nop = gen_nop ();
19567 	  emit_insn_before (nop, next_insn);
19568 	  if (can_issue_more == issue_rate - 1) /* new group begins */
19569 	    end = false;
19570 	  can_issue_more--;
19571 	  if (can_issue_more == 0)
19572 	    {
19573 	      can_issue_more = issue_rate - 1;
19574 	      (*group_count)++;
19575 	      end = true;
19576 	      for (i = 0; i < issue_rate; i++)
19577 		{
19578 		  group_insns[i] = 0;
19579 		}
19580 	    }
19581 	  n_nops--;
19582 	}
19583 
19584       /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1').  */
19585       can_issue_more++;
19586 
19587       /* Is next_insn going to start a new group?  */
19588       *group_end
19589 	= (end
19590 	   || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
19591 	   || (can_issue_more <= 2 && is_cracked_insn (next_insn))
19592 	   || (can_issue_more < issue_rate &&
19593 	       insn_terminates_group_p (next_insn, previous_group)));
19594       if (*group_end && end)
19595 	(*group_count)--;
19596 
19597       if (sched_verbose > 6)
19598 	fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
19599 		 *group_count, can_issue_more);
19600       return can_issue_more;
19601     }
19602 
19603   return can_issue_more;
19604 }
19605 
19606 /* This function tries to synch the dispatch groups that the compiler "sees"
19607    with the dispatch groups that the processor dispatcher is expected to
19608    form in practice.  It tries to achieve this synchronization by forcing the
19609    estimated processor grouping on the compiler (as opposed to the function
19610    'pad_goups' which tries to force the scheduler's grouping on the processor).
19611 
19612    The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
19613    examines the (estimated) dispatch groups that will be formed by the processor
19614    dispatcher.  It marks these group boundaries to reflect the estimated
19615    processor grouping, overriding the grouping that the scheduler had marked.
19616    Depending on the value of the flag '-minsert-sched-nops' this function can
19617    force certain insns into separate groups or force a certain distance between
19618    them by inserting nops, for example, if there exists a "costly dependence"
19619    between the insns.
19620 
19621    The function estimates the group boundaries that the processor will form as
19622    follows:  It keeps track of how many vacant issue slots are available after
19623    each insn.  A subsequent insn will start a new group if one of the following
19624    4 cases applies:
19625    - no more vacant issue slots remain in the current dispatch group.
19626    - only the last issue slot, which is the branch slot, is vacant, but the next
19627      insn is not a branch.
19628    - only the last 2 or less issue slots, including the branch slot, are vacant,
19629      which means that a cracked insn (which occupies two issue slots) can't be
19630      issued in this group.
19631    - less than 'issue_rate' slots are vacant, and the next insn always needs to
19632      start a new group.  */
19633 
19634 static int
redefine_groups(FILE * dump,int sched_verbose,rtx_insn * prev_head_insn,rtx_insn * tail)19635 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
19636 		 rtx_insn *tail)
19637 {
19638   rtx_insn *insn, *next_insn;
19639   int issue_rate;
19640   int can_issue_more;
19641   int slot, i;
19642   bool group_end;
19643   int group_count = 0;
19644   rtx *group_insns;
19645 
19646   /* Initialize.  */
19647   issue_rate = rs6000_issue_rate ();
19648   group_insns = XALLOCAVEC (rtx, issue_rate);
19649   for (i = 0; i < issue_rate; i++)
19650     {
19651       group_insns[i] = 0;
19652     }
19653   can_issue_more = issue_rate;
19654   slot = 0;
19655   insn = get_next_active_insn (prev_head_insn, tail);
19656   group_end = false;
19657 
19658   while (insn != NULL_RTX)
19659     {
19660       slot = (issue_rate - can_issue_more);
19661       group_insns[slot] = insn;
19662       can_issue_more =
19663 	rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
19664       if (insn_terminates_group_p (insn, current_group))
19665 	can_issue_more = 0;
19666 
19667       next_insn = get_next_active_insn (insn, tail);
19668       if (next_insn == NULL_RTX)
19669 	return group_count + 1;
19670 
19671       /* Is next_insn going to start a new group?  */
19672       group_end
19673 	= (can_issue_more == 0
19674 	   || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
19675 	   || (can_issue_more <= 2 && is_cracked_insn (next_insn))
19676 	   || (can_issue_more < issue_rate &&
19677 	       insn_terminates_group_p (next_insn, previous_group)));
19678 
19679       can_issue_more = force_new_group (sched_verbose, dump, group_insns,
19680 					next_insn, &group_end, can_issue_more,
19681 					&group_count);
19682 
19683       if (group_end)
19684 	{
19685 	  group_count++;
19686 	  can_issue_more = 0;
19687 	  for (i = 0; i < issue_rate; i++)
19688 	    {
19689 	      group_insns[i] = 0;
19690 	    }
19691 	}
19692 
19693       if (GET_MODE (next_insn) == TImode && can_issue_more)
19694 	PUT_MODE (next_insn, VOIDmode);
19695       else if (!can_issue_more && GET_MODE (next_insn) != TImode)
19696 	PUT_MODE (next_insn, TImode);
19697 
19698       insn = next_insn;
19699       if (can_issue_more == 0)
19700 	can_issue_more = issue_rate;
19701     } /* while */
19702 
19703   return group_count;
19704 }
19705 
19706 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
19707    dispatch group boundaries that the scheduler had marked.  Pad with nops
19708    any dispatch groups which have vacant issue slots, in order to force the
19709    scheduler's grouping on the processor dispatcher.  The function
19710    returns the number of dispatch groups found.  */
19711 
19712 static int
pad_groups(FILE * dump,int sched_verbose,rtx_insn * prev_head_insn,rtx_insn * tail)19713 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
19714 	    rtx_insn *tail)
19715 {
19716   rtx_insn *insn, *next_insn;
19717   rtx nop;
19718   int issue_rate;
19719   int can_issue_more;
19720   int group_end;
19721   int group_count = 0;
19722 
19723   /* Initialize issue_rate.  */
19724   issue_rate = rs6000_issue_rate ();
19725   can_issue_more = issue_rate;
19726 
19727   insn = get_next_active_insn (prev_head_insn, tail);
19728   next_insn = get_next_active_insn (insn, tail);
19729 
19730   while (insn != NULL_RTX)
19731     {
19732       can_issue_more =
19733       	rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
19734 
19735       group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
19736 
19737       if (next_insn == NULL_RTX)
19738 	break;
19739 
19740       if (group_end)
19741 	{
19742 	  /* If the scheduler had marked group termination at this location
19743 	     (between insn and next_insn), and neither insn nor next_insn will
19744 	     force group termination, pad the group with nops to force group
19745 	     termination.  */
19746 	  if (can_issue_more
19747 	      && (rs6000_sched_insert_nops == sched_finish_pad_groups)
19748 	      && !insn_terminates_group_p (insn, current_group)
19749 	      && !insn_terminates_group_p (next_insn, previous_group))
19750 	    {
19751 	      if (!is_branch_slot_insn (next_insn))
19752 		can_issue_more--;
19753 
19754 	      while (can_issue_more)
19755 		{
19756 		  nop = gen_nop ();
19757 		  emit_insn_before (nop, next_insn);
19758 		  can_issue_more--;
19759 		}
19760 	    }
19761 
19762 	  can_issue_more = issue_rate;
19763 	  group_count++;
19764 	}
19765 
19766       insn = next_insn;
19767       next_insn = get_next_active_insn (insn, tail);
19768     }
19769 
19770   return group_count;
19771 }
19772 
19773 /* We're beginning a new block.  Initialize data structures as necessary.  */
19774 
19775 static void
rs6000_sched_init(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,int max_ready ATTRIBUTE_UNUSED)19776 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
19777 		     int sched_verbose ATTRIBUTE_UNUSED,
19778 		     int max_ready ATTRIBUTE_UNUSED)
19779 {
19780   last_scheduled_insn = NULL;
19781   load_store_pendulum = 0;
19782   divide_cnt = 0;
19783   vec_pairing = 0;
19784 }
19785 
19786 /* The following function is called at the end of scheduling BB.
19787    After reload, it inserts nops at insn group bundling.  */
19788 
19789 static void
rs6000_sched_finish(FILE * dump,int sched_verbose)19790 rs6000_sched_finish (FILE *dump, int sched_verbose)
19791 {
19792   int n_groups;
19793 
19794   if (sched_verbose)
19795     fprintf (dump, "=== Finishing schedule.\n");
19796 
19797   if (reload_completed && rs6000_sched_groups)
19798     {
19799       /* Do not run sched_finish hook when selective scheduling enabled.  */
19800       if (sel_sched_p ())
19801 	return;
19802 
19803       if (rs6000_sched_insert_nops == sched_finish_none)
19804 	return;
19805 
19806       if (rs6000_sched_insert_nops == sched_finish_pad_groups)
19807 	n_groups = pad_groups (dump, sched_verbose,
19808 			       current_sched_info->prev_head,
19809 			       current_sched_info->next_tail);
19810       else
19811 	n_groups = redefine_groups (dump, sched_verbose,
19812 				    current_sched_info->prev_head,
19813 				    current_sched_info->next_tail);
19814 
19815       if (sched_verbose >= 6)
19816 	{
19817     	  fprintf (dump, "ngroups = %d\n", n_groups);
19818 	  print_rtl (dump, current_sched_info->prev_head);
19819 	  fprintf (dump, "Done finish_sched\n");
19820 	}
19821     }
19822 }
19823 
19824 struct rs6000_sched_context
19825 {
19826   short cached_can_issue_more;
19827   rtx_insn *last_scheduled_insn;
19828   int load_store_pendulum;
19829   int divide_cnt;
19830   int vec_pairing;
19831 };
19832 
19833 typedef struct rs6000_sched_context rs6000_sched_context_def;
19834 typedef rs6000_sched_context_def *rs6000_sched_context_t;
19835 
19836 /* Allocate store for new scheduling context.  */
19837 static void *
rs6000_alloc_sched_context(void)19838 rs6000_alloc_sched_context (void)
19839 {
19840   return xmalloc (sizeof (rs6000_sched_context_def));
19841 }
19842 
19843 /* If CLEAN_P is true then initializes _SC with clean data,
19844    and from the global context otherwise.  */
19845 static void
rs6000_init_sched_context(void * _sc,bool clean_p)19846 rs6000_init_sched_context (void *_sc, bool clean_p)
19847 {
19848   rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
19849 
19850   if (clean_p)
19851     {
19852       sc->cached_can_issue_more = 0;
19853       sc->last_scheduled_insn = NULL;
19854       sc->load_store_pendulum = 0;
19855       sc->divide_cnt = 0;
19856       sc->vec_pairing = 0;
19857     }
19858   else
19859     {
19860       sc->cached_can_issue_more = cached_can_issue_more;
19861       sc->last_scheduled_insn = last_scheduled_insn;
19862       sc->load_store_pendulum = load_store_pendulum;
19863       sc->divide_cnt = divide_cnt;
19864       sc->vec_pairing = vec_pairing;
19865     }
19866 }
19867 
19868 /* Sets the global scheduling context to the one pointed to by _SC.  */
19869 static void
rs6000_set_sched_context(void * _sc)19870 rs6000_set_sched_context (void *_sc)
19871 {
19872   rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
19873 
19874   gcc_assert (sc != NULL);
19875 
19876   cached_can_issue_more = sc->cached_can_issue_more;
19877   last_scheduled_insn = sc->last_scheduled_insn;
19878   load_store_pendulum = sc->load_store_pendulum;
19879   divide_cnt = sc->divide_cnt;
19880   vec_pairing = sc->vec_pairing;
19881 }
19882 
19883 /* Free _SC.  */
19884 static void
rs6000_free_sched_context(void * _sc)19885 rs6000_free_sched_context (void *_sc)
19886 {
19887   gcc_assert (_sc != NULL);
19888 
19889   free (_sc);
19890 }
19891 
19892 static bool
rs6000_sched_can_speculate_insn(rtx_insn * insn)19893 rs6000_sched_can_speculate_insn (rtx_insn *insn)
19894 {
19895   switch (get_attr_type (insn))
19896     {
19897     case TYPE_DIV:
19898     case TYPE_SDIV:
19899     case TYPE_DDIV:
19900     case TYPE_VECDIV:
19901     case TYPE_SSQRT:
19902     case TYPE_DSQRT:
19903       return false;
19904 
19905     default:
19906       return true;
19907   }
19908 }
19909 
19910 /* Length in units of the trampoline for entering a nested function.  */
19911 
19912 int
rs6000_trampoline_size(void)19913 rs6000_trampoline_size (void)
19914 {
19915   int ret = 0;
19916 
19917   switch (DEFAULT_ABI)
19918     {
19919     default:
19920       gcc_unreachable ();
19921 
19922     case ABI_AIX:
19923       ret = (TARGET_32BIT) ? 12 : 24;
19924       break;
19925 
19926     case ABI_ELFv2:
19927       gcc_assert (!TARGET_32BIT);
19928       ret = 32;
19929       break;
19930 
19931     case ABI_DARWIN:
19932     case ABI_V4:
19933       ret = (TARGET_32BIT) ? 40 : 48;
19934       break;
19935     }
19936 
19937   return ret;
19938 }
19939 
19940 /* Emit RTL insns to initialize the variable parts of a trampoline.
19941    FNADDR is an RTX for the address of the function's pure code.
19942    CXT is an RTX for the static chain value for the function.  */
19943 
19944 static void
rs6000_trampoline_init(rtx m_tramp,tree fndecl,rtx cxt)19945 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
19946 {
19947   int regsize = (TARGET_32BIT) ? 4 : 8;
19948   rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
19949   rtx ctx_reg = force_reg (Pmode, cxt);
19950   rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
19951 
19952   switch (DEFAULT_ABI)
19953     {
19954     default:
19955       gcc_unreachable ();
19956 
19957     /* Under AIX, just build the 3 word function descriptor */
19958     case ABI_AIX:
19959       {
19960 	rtx fnmem, fn_reg, toc_reg;
19961 
19962 	if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
19963 	  error ("you cannot take the address of a nested function if you use "
19964 		 "the %qs option", "-mno-pointers-to-nested-functions");
19965 
19966 	fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
19967 	fn_reg = gen_reg_rtx (Pmode);
19968 	toc_reg = gen_reg_rtx (Pmode);
19969 
19970   /* Macro to shorten the code expansions below.  */
19971 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
19972 
19973 	m_tramp = replace_equiv_address (m_tramp, addr);
19974 
19975 	emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
19976 	emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
19977 	emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
19978 	emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
19979 	emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
19980 
19981 # undef MEM_PLUS
19982       }
19983       break;
19984 
19985     /* Under V.4/eabi/darwin, __trampoline_setup does the real work.  */
19986     case ABI_ELFv2:
19987     case ABI_DARWIN:
19988     case ABI_V4:
19989       emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
19990 			 LCT_NORMAL, VOIDmode,
19991 			 addr, Pmode,
19992 			 GEN_INT (rs6000_trampoline_size ()), SImode,
19993 			 fnaddr, Pmode,
19994 			 ctx_reg, Pmode);
19995       break;
19996     }
19997 }
19998 
19999 
20000 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
20001    identifier as an argument, so the front end shouldn't look it up.  */
20002 
20003 static bool
rs6000_attribute_takes_identifier_p(const_tree attr_id)20004 rs6000_attribute_takes_identifier_p (const_tree attr_id)
20005 {
20006   return is_attribute_p ("altivec", attr_id);
20007 }
20008 
20009 /* Handle the "altivec" attribute.  The attribute may have
20010    arguments as follows:
20011 
20012 	__attribute__((altivec(vector__)))
20013 	__attribute__((altivec(pixel__)))	(always followed by 'unsigned short')
20014 	__attribute__((altivec(bool__)))	(always followed by 'unsigned')
20015 
20016   and may appear more than once (e.g., 'vector bool char') in a
20017   given declaration.  */
20018 
20019 static tree
rs6000_handle_altivec_attribute(tree * node,tree name ATTRIBUTE_UNUSED,tree args,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)20020 rs6000_handle_altivec_attribute (tree *node,
20021 				 tree name ATTRIBUTE_UNUSED,
20022 				 tree args,
20023 				 int flags ATTRIBUTE_UNUSED,
20024 				 bool *no_add_attrs)
20025 {
20026   tree type = *node, result = NULL_TREE;
20027   machine_mode mode;
20028   int unsigned_p;
20029   char altivec_type
20030     = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
20031 	&& TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
20032        ? *IDENTIFIER_POINTER (TREE_VALUE (args))
20033        : '?');
20034 
20035   while (POINTER_TYPE_P (type)
20036 	 || TREE_CODE (type) == FUNCTION_TYPE
20037 	 || TREE_CODE (type) == METHOD_TYPE
20038 	 || TREE_CODE (type) == ARRAY_TYPE)
20039     type = TREE_TYPE (type);
20040 
20041   mode = TYPE_MODE (type);
20042 
20043   /* Check for invalid AltiVec type qualifiers.  */
20044   if (type == long_double_type_node)
20045     error ("use of %<long double%> in AltiVec types is invalid");
20046   else if (type == boolean_type_node)
20047     error ("use of boolean types in AltiVec types is invalid");
20048   else if (TREE_CODE (type) == COMPLEX_TYPE)
20049     error ("use of %<complex%> in AltiVec types is invalid");
20050   else if (DECIMAL_FLOAT_MODE_P (mode))
20051     error ("use of decimal floating-point types in AltiVec types is invalid");
20052   else if (!TARGET_VSX)
20053     {
20054       if (type == long_unsigned_type_node || type == long_integer_type_node)
20055 	{
20056 	  if (TARGET_64BIT)
20057 	    error ("use of %<long%> in AltiVec types is invalid for "
20058 		   "64-bit code without %qs", "-mvsx");
20059 	  else if (rs6000_warn_altivec_long)
20060 	    warning (0, "use of %<long%> in AltiVec types is deprecated; "
20061 		     "use %<int%>");
20062 	}
20063       else if (type == long_long_unsigned_type_node
20064 	       || type == long_long_integer_type_node)
20065 	error ("use of %<long long%> in AltiVec types is invalid without %qs",
20066 	       "-mvsx");
20067       else if (type == double_type_node)
20068 	error ("use of %<double%> in AltiVec types is invalid without %qs",
20069 	       "-mvsx");
20070     }
20071 
20072   switch (altivec_type)
20073     {
20074     case 'v':
20075       unsigned_p = TYPE_UNSIGNED (type);
20076       switch (mode)
20077 	{
20078 	case E_TImode:
20079 	  result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
20080 	  break;
20081 	case E_DImode:
20082 	  result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
20083 	  break;
20084 	case E_SImode:
20085 	  result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
20086 	  break;
20087 	case E_HImode:
20088 	  result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
20089 	  break;
20090 	case E_QImode:
20091 	  result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
20092 	  break;
20093 	case E_SFmode: result = V4SF_type_node; break;
20094 	case E_DFmode: result = V2DF_type_node; break;
20095 	  /* If the user says 'vector int bool', we may be handed the 'bool'
20096 	     attribute _before_ the 'vector' attribute, and so select the
20097 	     proper type in the 'b' case below.  */
20098 	case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode:
20099 	case E_V2DImode: case E_V2DFmode:
20100 	  result = type;
20101 	default: break;
20102 	}
20103       break;
20104     case 'b':
20105       switch (mode)
20106 	{
20107 	case E_TImode: case E_V1TImode: result = bool_V1TI_type_node; break;
20108 	case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break;
20109 	case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break;
20110 	case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break;
20111 	case E_QImode: case E_V16QImode: result = bool_V16QI_type_node;
20112 	default: break;
20113 	}
20114       break;
20115     case 'p':
20116       switch (mode)
20117 	{
20118 	case E_V8HImode: result = pixel_V8HI_type_node;
20119 	default: break;
20120 	}
20121     default: break;
20122     }
20123 
20124   /* Propagate qualifiers attached to the element type
20125      onto the vector type.  */
20126   if (result && result != type && TYPE_QUALS (type))
20127     result = build_qualified_type (result, TYPE_QUALS (type));
20128 
20129   *no_add_attrs = true;  /* No need to hang on to the attribute.  */
20130 
20131   if (result)
20132     *node = lang_hooks.types.reconstruct_complex_type (*node, result);
20133 
20134   return NULL_TREE;
20135 }
20136 
20137 /* AltiVec defines five built-in scalar types that serve as vector
20138    elements; we must teach the compiler how to mangle them.  The 128-bit
20139    floating point mangling is target-specific as well.  MMA defines
20140    two built-in types to be used as opaque vector types.  */
20141 
20142 static const char *
rs6000_mangle_type(const_tree type)20143 rs6000_mangle_type (const_tree type)
20144 {
20145   type = TYPE_MAIN_VARIANT (type);
20146 
20147   if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
20148       && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE
20149       && TREE_CODE (type) != OPAQUE_TYPE)
20150     return NULL;
20151 
20152   if (type == bool_char_type_node) return "U6__boolc";
20153   if (type == bool_short_type_node) return "U6__bools";
20154   if (type == pixel_type_node) return "u7__pixel";
20155   if (type == bool_int_type_node) return "U6__booli";
20156   if (type == bool_long_long_type_node) return "U6__boolx";
20157 
20158   if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IBM_P (TYPE_MODE (type)))
20159     return "g";
20160   if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IEEE_P (TYPE_MODE (type)))
20161     return "u9__ieee128";
20162 
20163   if (type == vector_pair_type_node)
20164     return "u13__vector_pair";
20165   if (type == vector_quad_type_node)
20166     return "u13__vector_quad";
20167 
20168   /* For all other types, use the default mangling.  */
20169   return NULL;
20170 }
20171 
20172 /* Handle a "longcall" or "shortcall" attribute; arguments as in
20173    struct attribute_spec.handler.  */
20174 
20175 static tree
rs6000_handle_longcall_attribute(tree * node,tree name,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)20176 rs6000_handle_longcall_attribute (tree *node, tree name,
20177 				  tree args ATTRIBUTE_UNUSED,
20178 				  int flags ATTRIBUTE_UNUSED,
20179 				  bool *no_add_attrs)
20180 {
20181   if (TREE_CODE (*node) != FUNCTION_TYPE
20182       && TREE_CODE (*node) != FIELD_DECL
20183       && TREE_CODE (*node) != TYPE_DECL)
20184     {
20185       warning (OPT_Wattributes, "%qE attribute only applies to functions",
20186 	       name);
20187       *no_add_attrs = true;
20188     }
20189 
20190   return NULL_TREE;
20191 }
20192 
20193 /* Set longcall attributes on all functions declared when
20194    rs6000_default_long_calls is true.  */
20195 static void
rs6000_set_default_type_attributes(tree type)20196 rs6000_set_default_type_attributes (tree type)
20197 {
20198   if (rs6000_default_long_calls
20199       && (TREE_CODE (type) == FUNCTION_TYPE
20200 	  || TREE_CODE (type) == METHOD_TYPE))
20201     TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
20202 					NULL_TREE,
20203 					TYPE_ATTRIBUTES (type));
20204 
20205 #if TARGET_MACHO
20206   darwin_set_default_type_attributes (type);
20207 #endif
20208 }
20209 
20210 /* Return a reference suitable for calling a function with the
20211    longcall attribute.  */
20212 
20213 static rtx
rs6000_longcall_ref(rtx call_ref,rtx arg)20214 rs6000_longcall_ref (rtx call_ref, rtx arg)
20215 {
20216   /* System V adds '.' to the internal name, so skip them.  */
20217   const char *call_name = XSTR (call_ref, 0);
20218   if (*call_name == '.')
20219     {
20220       while (*call_name == '.')
20221 	call_name++;
20222 
20223       tree node = get_identifier (call_name);
20224       call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
20225     }
20226 
20227   if (TARGET_PLTSEQ)
20228     {
20229       rtx base = const0_rtx;
20230       int regno = 12;
20231       if (rs6000_pcrel_p ())
20232 	{
20233 	  rtx reg = gen_rtx_REG (Pmode, regno);
20234 	  rtx u = gen_rtx_UNSPEC_VOLATILE (Pmode,
20235 					   gen_rtvec (3, base, call_ref, arg),
20236 					   UNSPECV_PLT_PCREL);
20237 	  emit_insn (gen_rtx_SET (reg, u));
20238 	  return reg;
20239 	}
20240 
20241       if (DEFAULT_ABI == ABI_ELFv2)
20242 	base = gen_rtx_REG (Pmode, TOC_REGISTER);
20243       else
20244 	{
20245 	  if (flag_pic)
20246 	    base = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
20247 	  regno = 11;
20248 	}
20249       /* Reg must match that used by linker PLT stubs.  For ELFv2, r12
20250 	 may be used by a function global entry point.  For SysV4, r11
20251 	 is used by __glink_PLTresolve lazy resolver entry.  */
20252       rtx reg = gen_rtx_REG (Pmode, regno);
20253       rtx hi = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
20254 			       UNSPEC_PLT16_HA);
20255       rtx lo = gen_rtx_UNSPEC_VOLATILE (Pmode,
20256 					gen_rtvec (3, reg, call_ref, arg),
20257 					UNSPECV_PLT16_LO);
20258       emit_insn (gen_rtx_SET (reg, hi));
20259       emit_insn (gen_rtx_SET (reg, lo));
20260       return reg;
20261     }
20262 
20263   return force_reg (Pmode, call_ref);
20264 }
20265 
20266 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
20267 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
20268 #endif
20269 
20270 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
20271    struct attribute_spec.handler.  */
20272 static tree
rs6000_handle_struct_attribute(tree * node,tree name,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)20273 rs6000_handle_struct_attribute (tree *node, tree name,
20274 				tree args ATTRIBUTE_UNUSED,
20275 				int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
20276 {
20277   tree *type = NULL;
20278   if (DECL_P (*node))
20279     {
20280       if (TREE_CODE (*node) == TYPE_DECL)
20281         type = &TREE_TYPE (*node);
20282     }
20283   else
20284     type = node;
20285 
20286   if (!(type && (TREE_CODE (*type) == RECORD_TYPE
20287                  || TREE_CODE (*type) == UNION_TYPE)))
20288     {
20289       warning (OPT_Wattributes, "%qE attribute ignored", name);
20290       *no_add_attrs = true;
20291     }
20292 
20293   else if ((is_attribute_p ("ms_struct", name)
20294             && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
20295            || ((is_attribute_p ("gcc_struct", name)
20296                 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
20297     {
20298       warning (OPT_Wattributes, "%qE incompatible attribute ignored",
20299                name);
20300       *no_add_attrs = true;
20301     }
20302 
20303   return NULL_TREE;
20304 }
20305 
20306 static bool
rs6000_ms_bitfield_layout_p(const_tree record_type)20307 rs6000_ms_bitfield_layout_p (const_tree record_type)
20308 {
20309   return (TARGET_USE_MS_BITFIELD_LAYOUT &&
20310           !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
20311     || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
20312 }
20313 
20314 #ifdef USING_ELFOS_H
20315 
20316 /* A get_unnamed_section callback, used for switching to toc_section.  */
20317 
20318 static void
rs6000_elf_output_toc_section_asm_op(const char * data ATTRIBUTE_UNUSED)20319 rs6000_elf_output_toc_section_asm_op (const char *data ATTRIBUTE_UNUSED)
20320 {
20321   if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
20322       && TARGET_MINIMAL_TOC)
20323     {
20324       if (!toc_initialized)
20325 	{
20326 	  fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
20327 	  ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20328 	  (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
20329 	  fprintf (asm_out_file, "\t.tc ");
20330 	  ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
20331 	  ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20332 	  fprintf (asm_out_file, "\n");
20333 
20334 	  fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20335 	  ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20336 	  ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20337 	  fprintf (asm_out_file, " = .+32768\n");
20338 	  toc_initialized = 1;
20339 	}
20340       else
20341 	fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20342     }
20343   else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
20344     {
20345       fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
20346       if (!toc_initialized)
20347 	{
20348 	  ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20349 	  toc_initialized = 1;
20350 	}
20351     }
20352   else
20353     {
20354       fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20355       if (!toc_initialized)
20356 	{
20357 	  ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20358 	  ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20359 	  fprintf (asm_out_file, " = .+32768\n");
20360 	  toc_initialized = 1;
20361 	}
20362     }
20363 }
20364 
20365 /* Implement TARGET_ASM_INIT_SECTIONS.  */
20366 
20367 static void
rs6000_elf_asm_init_sections(void)20368 rs6000_elf_asm_init_sections (void)
20369 {
20370   toc_section
20371     = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
20372 
20373   sdata2_section
20374     = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
20375 			   SDATA2_SECTION_ASM_OP);
20376 }
20377 
20378 /* Implement TARGET_SELECT_RTX_SECTION.  */
20379 
20380 static section *
rs6000_elf_select_rtx_section(machine_mode mode,rtx x,unsigned HOST_WIDE_INT align)20381 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
20382 			       unsigned HOST_WIDE_INT align)
20383 {
20384   if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
20385     return toc_section;
20386   else
20387     return default_elf_select_rtx_section (mode, x, align);
20388 }
20389 
20390 /* For a SYMBOL_REF, set generic flags and then perform some
20391    target-specific processing.
20392 
20393    When the AIX ABI is requested on a non-AIX system, replace the
20394    function name with the real name (with a leading .) rather than the
20395    function descriptor name.  This saves a lot of overriding code to
20396    read the prefixes.  */
20397 
20398 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
20399 static void
rs6000_elf_encode_section_info(tree decl,rtx rtl,int first)20400 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
20401 {
20402   default_encode_section_info (decl, rtl, first);
20403 
20404   if (first
20405       && TREE_CODE (decl) == FUNCTION_DECL
20406       && !TARGET_AIX
20407       && DEFAULT_ABI == ABI_AIX)
20408     {
20409       rtx sym_ref = XEXP (rtl, 0);
20410       size_t len = strlen (XSTR (sym_ref, 0));
20411       char *str = XALLOCAVEC (char, len + 2);
20412       str[0] = '.';
20413       memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
20414       XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
20415     }
20416 }
20417 
20418 static inline bool
compare_section_name(const char * section,const char * templ)20419 compare_section_name (const char *section, const char *templ)
20420 {
20421   int len;
20422 
20423   len = strlen (templ);
20424   return (strncmp (section, templ, len) == 0
20425 	  && (section[len] == 0 || section[len] == '.'));
20426 }
20427 
20428 bool
rs6000_elf_in_small_data_p(const_tree decl)20429 rs6000_elf_in_small_data_p (const_tree decl)
20430 {
20431   if (rs6000_sdata == SDATA_NONE)
20432     return false;
20433 
20434   /* We want to merge strings, so we never consider them small data.  */
20435   if (TREE_CODE (decl) == STRING_CST)
20436     return false;
20437 
20438   /* Functions are never in the small data area.  */
20439   if (TREE_CODE (decl) == FUNCTION_DECL)
20440     return false;
20441 
20442   if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
20443     {
20444       const char *section = DECL_SECTION_NAME (decl);
20445       if (compare_section_name (section, ".sdata")
20446 	  || compare_section_name (section, ".sdata2")
20447 	  || compare_section_name (section, ".gnu.linkonce.s")
20448 	  || compare_section_name (section, ".sbss")
20449 	  || compare_section_name (section, ".sbss2")
20450 	  || compare_section_name (section, ".gnu.linkonce.sb")
20451 	  || strcmp (section, ".PPC.EMB.sdata0") == 0
20452 	  || strcmp (section, ".PPC.EMB.sbss0") == 0)
20453 	return true;
20454     }
20455   else
20456     {
20457       /* If we are told not to put readonly data in sdata, then don't.  */
20458       if (TREE_READONLY (decl) && rs6000_sdata != SDATA_EABI
20459 	  && !rs6000_readonly_in_sdata)
20460 	return false;
20461 
20462       HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
20463 
20464       if (size > 0
20465 	  && size <= g_switch_value
20466 	  /* If it's not public, and we're not going to reference it there,
20467 	     there's no need to put it in the small data section.  */
20468 	  && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
20469 	return true;
20470     }
20471 
20472   return false;
20473 }
20474 
20475 #endif /* USING_ELFOS_H */
20476 
20477 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P.  */
20478 
20479 static bool
rs6000_use_blocks_for_constant_p(machine_mode mode,const_rtx x)20480 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
20481 {
20482   return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
20483 }
20484 
20485 /* Do not place thread-local symbols refs in the object blocks.  */
20486 
20487 static bool
rs6000_use_blocks_for_decl_p(const_tree decl)20488 rs6000_use_blocks_for_decl_p (const_tree decl)
20489 {
20490   return !DECL_THREAD_LOCAL_P (decl);
20491 }
20492 
20493 /* Return a REG that occurs in ADDR with coefficient 1.
20494    ADDR can be effectively incremented by incrementing REG.
20495 
20496    r0 is special and we must not select it as an address
20497    register by this routine since our caller will try to
20498    increment the returned register via an "la" instruction.  */
20499 
20500 rtx
find_addr_reg(rtx addr)20501 find_addr_reg (rtx addr)
20502 {
20503   while (GET_CODE (addr) == PLUS)
20504     {
20505       if (REG_P (XEXP (addr, 0))
20506 	  && REGNO (XEXP (addr, 0)) != 0)
20507 	addr = XEXP (addr, 0);
20508       else if (REG_P (XEXP (addr, 1))
20509 	       && REGNO (XEXP (addr, 1)) != 0)
20510 	addr = XEXP (addr, 1);
20511       else if (CONSTANT_P (XEXP (addr, 0)))
20512 	addr = XEXP (addr, 1);
20513       else if (CONSTANT_P (XEXP (addr, 1)))
20514 	addr = XEXP (addr, 0);
20515       else
20516 	gcc_unreachable ();
20517     }
20518   gcc_assert (REG_P (addr) && REGNO (addr) != 0);
20519   return addr;
20520 }
20521 
20522 void
rs6000_fatal_bad_address(rtx op)20523 rs6000_fatal_bad_address (rtx op)
20524 {
20525   fatal_insn ("bad address", op);
20526 }
20527 
20528 #if TARGET_MACHO
20529 
20530 vec<branch_island, va_gc> *branch_islands;
20531 
20532 /* Remember to generate a branch island for far calls to the given
20533    function.  */
20534 
20535 static void
add_compiler_branch_island(tree label_name,tree function_name,int line_number)20536 add_compiler_branch_island (tree label_name, tree function_name,
20537 			    int line_number)
20538 {
20539   branch_island bi = {function_name, label_name, line_number};
20540   vec_safe_push (branch_islands, bi);
20541 }
20542 
20543 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
20544    already there or not.  */
20545 
20546 static int
no_previous_def(tree function_name)20547 no_previous_def (tree function_name)
20548 {
20549   branch_island *bi;
20550   unsigned ix;
20551 
20552   FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
20553     if (function_name == bi->function_name)
20554       return 0;
20555   return 1;
20556 }
20557 
20558 /* GET_PREV_LABEL gets the label name from the previous definition of
20559    the function.  */
20560 
20561 static tree
get_prev_label(tree function_name)20562 get_prev_label (tree function_name)
20563 {
20564   branch_island *bi;
20565   unsigned ix;
20566 
20567   FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
20568     if (function_name == bi->function_name)
20569       return bi->label_name;
20570   return NULL_TREE;
20571 }
20572 
20573 /* Generate external symbol indirection stubs (PIC and non-PIC).  */
20574 
20575 void
machopic_output_stub(FILE * file,const char * symb,const char * stub)20576 machopic_output_stub (FILE *file, const char *symb, const char *stub)
20577 {
20578   unsigned int length;
20579   char *symbol_name, *lazy_ptr_name;
20580   char *local_label_0;
20581   static unsigned label = 0;
20582 
20583   /* Lose our funky encoding stuff so it doesn't contaminate the stub.  */
20584   symb = (*targetm.strip_name_encoding) (symb);
20585 
20586   length = strlen (symb);
20587   symbol_name = XALLOCAVEC (char, length + 32);
20588   GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
20589 
20590   lazy_ptr_name = XALLOCAVEC (char, length + 32);
20591   GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
20592 
20593   if (MACHOPIC_PURE)
20594     {
20595       switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
20596       fprintf (file, "\t.align 5\n");
20597 
20598       fprintf (file, "%s:\n", stub);
20599       fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20600 
20601       label++;
20602       local_label_0 = XALLOCAVEC (char, 16);
20603       sprintf (local_label_0, "L%u$spb", label);
20604 
20605       fprintf (file, "\tmflr r0\n");
20606       fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
20607       fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
20608       fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
20609 	       lazy_ptr_name, local_label_0);
20610       fprintf (file, "\tmtlr r0\n");
20611       fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
20612 	       (TARGET_64BIT ? "ldu" : "lwzu"),
20613 	       lazy_ptr_name, local_label_0);
20614       fprintf (file, "\tmtctr r12\n");
20615       fprintf (file, "\tbctr\n");
20616     }
20617   else /* mdynamic-no-pic or mkernel.  */
20618     {
20619       switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
20620       fprintf (file, "\t.align 4\n");
20621 
20622       fprintf (file, "%s:\n", stub);
20623       fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20624 
20625       fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
20626       fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
20627 	       (TARGET_64BIT ? "ldu" : "lwzu"),
20628 	       lazy_ptr_name);
20629       fprintf (file, "\tmtctr r12\n");
20630       fprintf (file, "\tbctr\n");
20631     }
20632 
20633   switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
20634   fprintf (file, "%s:\n", lazy_ptr_name);
20635   fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20636   fprintf (file, "%sdyld_stub_binding_helper\n",
20637 	   (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
20638 }
20639 
20640 /* Legitimize PIC addresses.  If the address is already
20641    position-independent, we return ORIG.  Newly generated
20642    position-independent addresses go into a reg.  This is REG if non
20643    zero, otherwise we allocate register(s) as necessary.  */
20644 
20645 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
20646 
20647 rtx
rs6000_machopic_legitimize_pic_address(rtx orig,machine_mode mode,rtx reg)20648 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
20649 					rtx reg)
20650 {
20651   rtx base, offset;
20652 
20653   if (reg == NULL && !reload_completed)
20654     reg = gen_reg_rtx (Pmode);
20655 
20656   if (GET_CODE (orig) == CONST)
20657     {
20658       rtx reg_temp;
20659 
20660       if (GET_CODE (XEXP (orig, 0)) == PLUS
20661 	  && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
20662 	return orig;
20663 
20664       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
20665 
20666       /* Use a different reg for the intermediate value, as
20667 	 it will be marked UNCHANGING.  */
20668       reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
20669       base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
20670 						     Pmode, reg_temp);
20671       offset =
20672 	rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
20673 						Pmode, reg);
20674 
20675       if (CONST_INT_P (offset))
20676 	{
20677 	  if (SMALL_INT (offset))
20678 	    return plus_constant (Pmode, base, INTVAL (offset));
20679 	  else if (!reload_completed)
20680 	    offset = force_reg (Pmode, offset);
20681 	  else
20682 	    {
20683  	      rtx mem = force_const_mem (Pmode, orig);
20684 	      return machopic_legitimize_pic_address (mem, Pmode, reg);
20685 	    }
20686 	}
20687       return gen_rtx_PLUS (Pmode, base, offset);
20688     }
20689 
20690   /* Fall back on generic machopic code.  */
20691   return machopic_legitimize_pic_address (orig, mode, reg);
20692 }
20693 
20694 /* Output a .machine directive for the Darwin assembler, and call
20695    the generic start_file routine.  */
20696 
20697 static void
rs6000_darwin_file_start(void)20698 rs6000_darwin_file_start (void)
20699 {
20700   static const struct
20701   {
20702     const char *arg;
20703     const char *name;
20704     HOST_WIDE_INT if_set;
20705   } mapping[] = {
20706     { "ppc64", "ppc64", MASK_64BIT },
20707     { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
20708     { "power4", "ppc970", 0 },
20709     { "G5", "ppc970", 0 },
20710     { "7450", "ppc7450", 0 },
20711     { "7400", "ppc7400", MASK_ALTIVEC },
20712     { "G4", "ppc7400", 0 },
20713     { "750", "ppc750", 0 },
20714     { "740", "ppc750", 0 },
20715     { "G3", "ppc750", 0 },
20716     { "604e", "ppc604e", 0 },
20717     { "604", "ppc604", 0 },
20718     { "603e", "ppc603", 0 },
20719     { "603", "ppc603", 0 },
20720     { "601", "ppc601", 0 },
20721     { NULL, "ppc", 0 } };
20722   const char *cpu_id = "";
20723   size_t i;
20724 
20725   rs6000_file_start ();
20726   darwin_file_start ();
20727 
20728   /* Determine the argument to -mcpu=.  Default to G3 if not specified.  */
20729 
20730   if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
20731     cpu_id = rs6000_default_cpu;
20732 
20733   if (OPTION_SET_P (rs6000_cpu_index))
20734     cpu_id = processor_target_table[rs6000_cpu_index].name;
20735 
20736   /* Look through the mapping array.  Pick the first name that either
20737      matches the argument, has a bit set in IF_SET that is also set
20738      in the target flags, or has a NULL name.  */
20739 
20740   i = 0;
20741   while (mapping[i].arg != NULL
20742 	 && strcmp (mapping[i].arg, cpu_id) != 0
20743 	 && (mapping[i].if_set & rs6000_isa_flags) == 0)
20744     i++;
20745 
20746   fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
20747 }
20748 
20749 #endif /* TARGET_MACHO */
20750 
20751 #if TARGET_ELF
20752 static int
rs6000_elf_reloc_rw_mask(void)20753 rs6000_elf_reloc_rw_mask (void)
20754 {
20755   if (flag_pic)
20756     return 3;
20757   else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
20758     return 2;
20759   else
20760     return 0;
20761 }
20762 
20763 /* Record an element in the table of global constructors.  SYMBOL is
20764    a SYMBOL_REF of the function to be called; PRIORITY is a number
20765    between 0 and MAX_INIT_PRIORITY.
20766 
20767    This differs from default_named_section_asm_out_constructor in
20768    that we have special handling for -mrelocatable.  */
20769 
20770 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
20771 static void
rs6000_elf_asm_out_constructor(rtx symbol,int priority)20772 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
20773 {
20774   const char *section = ".ctors";
20775   char buf[18];
20776 
20777   if (priority != DEFAULT_INIT_PRIORITY)
20778     {
20779       sprintf (buf, ".ctors.%.5u",
20780 	       /* Invert the numbering so the linker puts us in the proper
20781 		  order; constructors are run from right to left, and the
20782 		  linker sorts in increasing order.  */
20783 	       MAX_INIT_PRIORITY - priority);
20784       section = buf;
20785     }
20786 
20787   switch_to_section (get_section (section, SECTION_WRITE, NULL));
20788   assemble_align (POINTER_SIZE);
20789 
20790   if (DEFAULT_ABI == ABI_V4
20791       && (TARGET_RELOCATABLE || flag_pic > 1))
20792     {
20793       fputs ("\t.long (", asm_out_file);
20794       output_addr_const (asm_out_file, symbol);
20795       fputs (")@fixup\n", asm_out_file);
20796     }
20797   else
20798     assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
20799 }
20800 
20801 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
20802 static void
rs6000_elf_asm_out_destructor(rtx symbol,int priority)20803 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
20804 {
20805   const char *section = ".dtors";
20806   char buf[18];
20807 
20808   if (priority != DEFAULT_INIT_PRIORITY)
20809     {
20810       sprintf (buf, ".dtors.%.5u",
20811 	       /* Invert the numbering so the linker puts us in the proper
20812 		  order; constructors are run from right to left, and the
20813 		  linker sorts in increasing order.  */
20814 	       MAX_INIT_PRIORITY - priority);
20815       section = buf;
20816     }
20817 
20818   switch_to_section (get_section (section, SECTION_WRITE, NULL));
20819   assemble_align (POINTER_SIZE);
20820 
20821   if (DEFAULT_ABI == ABI_V4
20822       && (TARGET_RELOCATABLE || flag_pic > 1))
20823     {
20824       fputs ("\t.long (", asm_out_file);
20825       output_addr_const (asm_out_file, symbol);
20826       fputs (")@fixup\n", asm_out_file);
20827     }
20828   else
20829     assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
20830 }
20831 
20832 void
rs6000_elf_declare_function_name(FILE * file,const char * name,tree decl)20833 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
20834 {
20835   if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
20836     {
20837       fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
20838       ASM_OUTPUT_LABEL (file, name);
20839       fputs (DOUBLE_INT_ASM_OP, file);
20840       rs6000_output_function_entry (file, name);
20841       fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
20842       if (DOT_SYMBOLS)
20843 	{
20844 	  fputs ("\t.size\t", file);
20845 	  assemble_name (file, name);
20846 	  fputs (",24\n\t.type\t.", file);
20847 	  assemble_name (file, name);
20848 	  fputs (",@function\n", file);
20849 	  if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
20850 	    {
20851 	      fputs ("\t.globl\t.", file);
20852 	      assemble_name (file, name);
20853 	      putc ('\n', file);
20854 	    }
20855 	}
20856       else
20857 	ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
20858       ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
20859       rs6000_output_function_entry (file, name);
20860       fputs (":\n", file);
20861       return;
20862     }
20863 
20864   int uses_toc;
20865   if (DEFAULT_ABI == ABI_V4
20866       && (TARGET_RELOCATABLE || flag_pic > 1)
20867       && !TARGET_SECURE_PLT
20868       && (!constant_pool_empty_p () || crtl->profile)
20869       && (uses_toc = uses_TOC ()))
20870     {
20871       char buf[256];
20872 
20873       if (uses_toc == 2)
20874 	switch_to_other_text_partition ();
20875       (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
20876 
20877       fprintf (file, "\t.long ");
20878       assemble_name (file, toc_label_name);
20879       need_toc_init = 1;
20880       putc ('-', file);
20881       ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
20882       assemble_name (file, buf);
20883       putc ('\n', file);
20884       if (uses_toc == 2)
20885 	switch_to_other_text_partition ();
20886     }
20887 
20888   ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
20889   ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
20890 
20891   if (TARGET_CMODEL == CMODEL_LARGE
20892       && rs6000_global_entry_point_prologue_needed_p ())
20893     {
20894       char buf[256];
20895 
20896       (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
20897 
20898       fprintf (file, "\t.quad .TOC.-");
20899       ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
20900       assemble_name (file, buf);
20901       putc ('\n', file);
20902     }
20903 
20904   if (DEFAULT_ABI == ABI_AIX)
20905     {
20906       const char *desc_name, *orig_name;
20907 
20908       orig_name = (*targetm.strip_name_encoding) (name);
20909       desc_name = orig_name;
20910       while (*desc_name == '.')
20911 	desc_name++;
20912 
20913       if (TREE_PUBLIC (decl))
20914 	fprintf (file, "\t.globl %s\n", desc_name);
20915 
20916       fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20917       fprintf (file, "%s:\n", desc_name);
20918       fprintf (file, "\t.long %s\n", orig_name);
20919       fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
20920       fputs ("\t.long 0\n", file);
20921       fprintf (file, "\t.previous\n");
20922     }
20923   ASM_OUTPUT_LABEL (file, name);
20924 }
20925 
20926 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
20927 static void
rs6000_elf_file_end(void)20928 rs6000_elf_file_end (void)
20929 {
20930 #ifdef HAVE_AS_GNU_ATTRIBUTE
20931   /* ??? The value emitted depends on options active at file end.
20932      Assume anyone using #pragma or attributes that might change
20933      options knows what they are doing.  */
20934   if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
20935       && rs6000_passes_float)
20936     {
20937       int fp;
20938 
20939       if (TARGET_HARD_FLOAT)
20940 	fp = 1;
20941       else
20942 	fp = 2;
20943       if (rs6000_passes_long_double)
20944 	{
20945 	  if (!TARGET_LONG_DOUBLE_128)
20946 	    fp |= 2 * 4;
20947 	  else if (TARGET_IEEEQUAD)
20948 	    fp |= 3 * 4;
20949 	  else
20950 	    fp |= 1 * 4;
20951 	}
20952       fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
20953     }
20954   if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
20955     {
20956       if (rs6000_passes_vector)
20957 	fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
20958 		 (TARGET_ALTIVEC_ABI ? 2 : 1));
20959       if (rs6000_returns_struct)
20960 	fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
20961 		 aix_struct_return ? 2 : 1);
20962     }
20963 #endif
20964 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
20965   if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
20966     file_end_indicate_exec_stack ();
20967 #endif
20968 
20969   if (flag_split_stack)
20970     file_end_indicate_split_stack ();
20971 
20972   if (cpu_builtin_p)
20973     {
20974       /* We have expanded a CPU builtin, so we need to emit a reference to
20975 	 the special symbol that LIBC uses to declare it supports the
20976 	 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature.  */
20977       switch_to_section (data_section);
20978       fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
20979       fprintf (asm_out_file, "\t%s %s\n",
20980 	       TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
20981     }
20982 }
20983 #endif
20984 
20985 #if TARGET_XCOFF
20986 
20987 #ifndef HAVE_XCOFF_DWARF_EXTRAS
20988 #define HAVE_XCOFF_DWARF_EXTRAS 0
20989 #endif
20990 
20991 static enum unwind_info_type
rs6000_xcoff_debug_unwind_info(void)20992 rs6000_xcoff_debug_unwind_info (void)
20993 {
20994   return UI_NONE;
20995 }
20996 
20997 static void
rs6000_xcoff_asm_output_anchor(rtx symbol)20998 rs6000_xcoff_asm_output_anchor (rtx symbol)
20999 {
21000   char buffer[100];
21001 
21002   sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
21003 	   SYMBOL_REF_BLOCK_OFFSET (symbol));
21004   fprintf (asm_out_file, "%s", SET_ASM_OP);
21005   RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
21006   fprintf (asm_out_file, ",");
21007   RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
21008   fprintf (asm_out_file, "\n");
21009 }
21010 
21011 static void
rs6000_xcoff_asm_globalize_label(FILE * stream,const char * name)21012 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
21013 {
21014   fputs (GLOBAL_ASM_OP, stream);
21015   RS6000_OUTPUT_BASENAME (stream, name);
21016   putc ('\n', stream);
21017 }
21018 
21019 /* A get_unnamed_decl callback, used for read-only sections.  PTR
21020    points to the section string variable.  */
21021 
21022 static void
rs6000_xcoff_output_readonly_section_asm_op(const char * directive)21023 rs6000_xcoff_output_readonly_section_asm_op (const char *directive)
21024 {
21025   fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
21026 	   directive
21027 	   ? xcoff_private_rodata_section_name
21028 	   : xcoff_read_only_section_name,
21029 	   XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
21030 }
21031 
21032 /* Likewise for read-write sections.  */
21033 
21034 static void
rs6000_xcoff_output_readwrite_section_asm_op(const char *)21035 rs6000_xcoff_output_readwrite_section_asm_op (const char *)
21036 {
21037   fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
21038 	   xcoff_private_data_section_name,
21039 	   XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
21040 }
21041 
21042 static void
rs6000_xcoff_output_tls_section_asm_op(const char * directive)21043 rs6000_xcoff_output_tls_section_asm_op (const char *directive)
21044 {
21045   fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
21046 	   directive
21047 	   ? xcoff_private_data_section_name
21048 	   : xcoff_tls_data_section_name,
21049 	   XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
21050 }
21051 
21052 /* A get_unnamed_section callback, used for switching to toc_section.  */
21053 
21054 static void
rs6000_xcoff_output_toc_section_asm_op(const char * data ATTRIBUTE_UNUSED)21055 rs6000_xcoff_output_toc_section_asm_op (const char *data ATTRIBUTE_UNUSED)
21056 {
21057   if (TARGET_MINIMAL_TOC)
21058     {
21059       /* toc_section is always selected at least once from
21060 	 rs6000_xcoff_file_start, so this is guaranteed to
21061 	 always be defined once and only once in each file.  */
21062       if (!toc_initialized)
21063 	{
21064 	  fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
21065 	  fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
21066 	  toc_initialized = 1;
21067 	}
21068       fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
21069 	       (TARGET_32BIT ? "" : ",3"));
21070     }
21071   else
21072     fputs ("\t.toc\n", asm_out_file);
21073 }
21074 
21075 /* Implement TARGET_ASM_INIT_SECTIONS.  */
21076 
21077 static void
rs6000_xcoff_asm_init_sections(void)21078 rs6000_xcoff_asm_init_sections (void)
21079 {
21080   read_only_data_section
21081     = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
21082 			   NULL);
21083 
21084   private_data_section
21085     = get_unnamed_section (SECTION_WRITE,
21086 			   rs6000_xcoff_output_readwrite_section_asm_op,
21087 			   NULL);
21088 
21089   read_only_private_data_section
21090     = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
21091 			   "");
21092 
21093   tls_data_section
21094     = get_unnamed_section (SECTION_TLS,
21095 			   rs6000_xcoff_output_tls_section_asm_op,
21096 			   NULL);
21097 
21098   tls_private_data_section
21099     = get_unnamed_section (SECTION_TLS,
21100 			   rs6000_xcoff_output_tls_section_asm_op,
21101 			   "");
21102 
21103   toc_section
21104     = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
21105 
21106   readonly_data_section = read_only_data_section;
21107 }
21108 
21109 static int
rs6000_xcoff_reloc_rw_mask(void)21110 rs6000_xcoff_reloc_rw_mask (void)
21111 {
21112   return 3;
21113 }
21114 
21115 static void
rs6000_xcoff_asm_named_section(const char * name,unsigned int flags,tree decl ATTRIBUTE_UNUSED)21116 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
21117 				tree decl ATTRIBUTE_UNUSED)
21118 {
21119   int smclass;
21120   static const char * const suffix[7]
21121     = { "PR", "RO", "RW", "BS", "TL", "UL", "XO" };
21122 
21123   if (flags & SECTION_EXCLUDE)
21124     smclass = 6;
21125   else if (flags & SECTION_DEBUG)
21126     {
21127       fprintf (asm_out_file, "\t.dwsect %s\n", name);
21128       return;
21129     }
21130   else if (flags & SECTION_CODE)
21131     smclass = 0;
21132   else if (flags & SECTION_TLS)
21133     {
21134       if (flags & SECTION_BSS)
21135 	smclass = 5;
21136       else
21137 	smclass = 4;
21138     }
21139   else if (flags & SECTION_WRITE)
21140     {
21141       if (flags & SECTION_BSS)
21142 	smclass = 3;
21143       else
21144 	smclass = 2;
21145     }
21146   else
21147     smclass = 1;
21148 
21149   fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
21150 	   (flags & SECTION_CODE) ? "." : "",
21151 	   name, suffix[smclass], flags & SECTION_ENTSIZE);
21152 }
21153 
21154 #define IN_NAMED_SECTION(DECL) \
21155   ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
21156    && DECL_SECTION_NAME (DECL) != NULL)
21157 
21158 static section *
rs6000_xcoff_select_section(tree decl,int reloc,unsigned HOST_WIDE_INT align)21159 rs6000_xcoff_select_section (tree decl, int reloc,
21160 			     unsigned HOST_WIDE_INT align)
21161 {
21162   /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
21163      named section.  */
21164   if (align > BIGGEST_ALIGNMENT && VAR_OR_FUNCTION_DECL_P (decl))
21165     {
21166       resolve_unique_section (decl, reloc, true);
21167       if (IN_NAMED_SECTION (decl))
21168 	return get_named_section (decl, NULL, reloc);
21169     }
21170 
21171   if (decl_readonly_section (decl, reloc))
21172     {
21173       if (TREE_PUBLIC (decl))
21174 	return read_only_data_section;
21175       else
21176 	return read_only_private_data_section;
21177     }
21178   else
21179     {
21180 #if HAVE_AS_TLS
21181       if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
21182 	{
21183 	  if (bss_initializer_p (decl))
21184 	    return tls_comm_section;
21185 	  else if (TREE_PUBLIC (decl))
21186 	    return tls_data_section;
21187 	  else
21188 	    return tls_private_data_section;
21189 	}
21190       else
21191 #endif
21192 	if (TREE_PUBLIC (decl))
21193 	return data_section;
21194       else
21195 	return private_data_section;
21196     }
21197 }
21198 
21199 static void
rs6000_xcoff_unique_section(tree decl,int reloc ATTRIBUTE_UNUSED)21200 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
21201 {
21202   const char *name;
21203 
21204   name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
21205   name = (*targetm.strip_name_encoding) (name);
21206   set_decl_section_name (decl, name);
21207 }
21208 
21209 /* Select section for constant in constant pool.
21210 
21211    On RS/6000, all constants are in the private read-only data area.
21212    However, if this is being placed in the TOC it must be output as a
21213    toc entry.  */
21214 
21215 static section *
rs6000_xcoff_select_rtx_section(machine_mode mode,rtx x,unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)21216 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
21217 				 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
21218 {
21219   if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
21220     return toc_section;
21221   else
21222     return read_only_private_data_section;
21223 }
21224 
21225 /* Remove any trailing [DS] or the like from the symbol name.  */
21226 
21227 static const char *
rs6000_xcoff_strip_name_encoding(const char * name)21228 rs6000_xcoff_strip_name_encoding (const char *name)
21229 {
21230   size_t len;
21231   if (*name == '*')
21232     name++;
21233   len = strlen (name);
21234   if (name[len - 1] == ']')
21235     return ggc_alloc_string (name, len - 4);
21236   else
21237     return name;
21238 }
21239 
21240 /* Section attributes.  AIX is always PIC.  */
21241 
21242 static unsigned int
rs6000_xcoff_section_type_flags(tree decl,const char * name,int reloc)21243 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
21244 {
21245   unsigned int align;
21246   unsigned int flags = default_section_type_flags (decl, name, reloc);
21247 
21248   if (decl && DECL_P (decl) && VAR_P (decl) && bss_initializer_p (decl))
21249     flags |= SECTION_BSS;
21250 
21251   /* Align to at least UNIT size.  */
21252   if (!decl || !DECL_P (decl))
21253     align = MIN_UNITS_PER_WORD;
21254   /* Align code CSECT to at least 32 bytes.  */
21255   else if ((flags & SECTION_CODE) != 0)
21256     align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT), 32);
21257   else
21258     /* Increase alignment of large objects if not already stricter.  */
21259     align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
21260 		 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
21261 		 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
21262 
21263   return flags | (exact_log2 (align) & SECTION_ENTSIZE);
21264 }
21265 
21266 /* Output at beginning of assembler file.
21267 
21268    Initialize the section names for the RS/6000 at this point.
21269 
21270    Specify filename, including full path, to assembler.
21271 
21272    We want to go into the TOC section so at least one .toc will be emitted.
21273    Also, in order to output proper .bs/.es pairs, we need at least one static
21274    [RW] section emitted.
21275 
21276    Finally, declare mcount when profiling to make the assembler happy.  */
21277 
21278 static void
rs6000_xcoff_file_start(void)21279 rs6000_xcoff_file_start (void)
21280 {
21281   rs6000_gen_section_name (&xcoff_bss_section_name,
21282 			   main_input_filename, ".bss_");
21283   rs6000_gen_section_name (&xcoff_private_data_section_name,
21284 			   main_input_filename, ".rw_");
21285   rs6000_gen_section_name (&xcoff_private_rodata_section_name,
21286 			   main_input_filename, ".rop_");
21287   rs6000_gen_section_name (&xcoff_read_only_section_name,
21288 			   main_input_filename, ".ro_");
21289   rs6000_gen_section_name (&xcoff_tls_data_section_name,
21290 			   main_input_filename, ".tls_");
21291 
21292   fputs ("\t.file\t", asm_out_file);
21293   output_quoted_string (asm_out_file, main_input_filename);
21294   fputc ('\n', asm_out_file);
21295   if (write_symbols != NO_DEBUG)
21296     switch_to_section (private_data_section);
21297   switch_to_section (toc_section);
21298   switch_to_section (text_section);
21299   if (profile_flag)
21300     fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
21301   rs6000_file_start ();
21302 }
21303 
21304 /* Output at end of assembler file.
21305    On the RS/6000, referencing data should automatically pull in text.  */
21306 
21307 static void
rs6000_xcoff_file_end(void)21308 rs6000_xcoff_file_end (void)
21309 {
21310   switch_to_section (text_section);
21311   if (xcoff_tls_exec_model_detected)
21312     {
21313       /* Add a .ref to __tls_get_addr to force libpthread dependency.  */
21314       fputs ("\t.extern __tls_get_addr\n\t.ref __tls_get_addr\n", asm_out_file);
21315     }
21316   fputs ("_section_.text:\n", asm_out_file);
21317   switch_to_section (data_section);
21318   fputs (TARGET_32BIT
21319 	 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
21320 	 asm_out_file);
21321 
21322 }
21323 
21324 struct declare_alias_data
21325 {
21326   FILE *file;
21327   bool function_descriptor;
21328 };
21329 
21330 /* Declare alias N.  A helper function for for_node_and_aliases.  */
21331 
21332 static bool
rs6000_declare_alias(struct symtab_node * n,void * d)21333 rs6000_declare_alias (struct symtab_node *n, void *d)
21334 {
21335   struct declare_alias_data *data = (struct declare_alias_data *)d;
21336   /* Main symbol is output specially, because varasm machinery does part of
21337      the job for us - we do not need to declare .globl/lglobs and such.  */
21338   if (!n->alias || n->weakref)
21339     return false;
21340 
21341   if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
21342     return false;
21343 
21344   /* Prevent assemble_alias from trying to use .set pseudo operation
21345      that does not behave as expected by the middle-end.  */
21346   TREE_ASM_WRITTEN (n->decl) = true;
21347 
21348   const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
21349   char *buffer = (char *) alloca (strlen (name) + 2);
21350   char *p;
21351   int dollar_inside = 0;
21352 
21353   strcpy (buffer, name);
21354   p = strchr (buffer, '$');
21355   while (p) {
21356     *p = '_';
21357     dollar_inside++;
21358     p = strchr (p + 1, '$');
21359   }
21360   if (TREE_PUBLIC (n->decl))
21361     {
21362       if (!RS6000_WEAK || !DECL_WEAK (n->decl))
21363 	{
21364           if (dollar_inside) {
21365 	      if (data->function_descriptor)
21366                 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
21367 	      fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
21368 	    }
21369 	  if (data->function_descriptor)
21370 	    {
21371 	      fputs ("\t.globl .", data->file);
21372 	      RS6000_OUTPUT_BASENAME (data->file, buffer);
21373 	      putc ('\n', data->file);
21374 	    }
21375 	  fputs ("\t.globl ", data->file);
21376 	  assemble_name (data->file, buffer);
21377 	  putc ('\n', data->file);
21378 	}
21379 #ifdef ASM_WEAKEN_DECL
21380       else if (DECL_WEAK (n->decl) && !data->function_descriptor)
21381 	ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
21382 #endif
21383     }
21384   else
21385     {
21386       if (dollar_inside)
21387 	{
21388 	  if (data->function_descriptor)
21389             fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
21390 	  fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
21391 	}
21392       if (data->function_descriptor)
21393 	{
21394 	  fputs ("\t.lglobl .", data->file);
21395 	  RS6000_OUTPUT_BASENAME (data->file, buffer);
21396 	  putc ('\n', data->file);
21397 	}
21398       fputs ("\t.lglobl ", data->file);
21399       assemble_name (data->file, buffer);
21400       putc ('\n', data->file);
21401     }
21402   if (data->function_descriptor)
21403     putc ('.', data->file);
21404   ASM_OUTPUT_LABEL (data->file, buffer);
21405   return false;
21406 }
21407 
21408 
21409 #ifdef HAVE_GAS_HIDDEN
21410 /* Helper function to calculate visibility of a DECL
21411    and return the value as a const string.  */
21412 
21413 static const char *
rs6000_xcoff_visibility(tree decl)21414 rs6000_xcoff_visibility (tree decl)
21415 {
21416   static const char * const visibility_types[] = {
21417     "", ",protected", ",hidden", ",internal"
21418   };
21419 
21420   enum symbol_visibility vis = DECL_VISIBILITY (decl);
21421   return visibility_types[vis];
21422 }
21423 #endif
21424 
21425 
21426 /* This macro produces the initial definition of a function name.
21427    On the RS/6000, we need to place an extra '.' in the function name and
21428    output the function descriptor.
21429    Dollar signs are converted to underscores.
21430 
21431    The csect for the function will have already been created when
21432    text_section was selected.  We do have to go back to that csect, however.
21433 
21434    The third and fourth parameters to the .function pseudo-op (16 and 044)
21435    are placeholders which no longer have any use.
21436 
21437    Because AIX assembler's .set command has unexpected semantics, we output
21438    all aliases as alternative labels in front of the definition.  */
21439 
21440 void
rs6000_xcoff_declare_function_name(FILE * file,const char * name,tree decl)21441 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
21442 {
21443   char *buffer = (char *) alloca (strlen (name) + 1);
21444   char *p;
21445   int dollar_inside = 0;
21446   struct declare_alias_data data = {file, false};
21447 
21448   strcpy (buffer, name);
21449   p = strchr (buffer, '$');
21450   while (p) {
21451     *p = '_';
21452     dollar_inside++;
21453     p = strchr (p + 1, '$');
21454   }
21455   if (TREE_PUBLIC (decl))
21456     {
21457       if (!RS6000_WEAK || !DECL_WEAK (decl))
21458 	{
21459           if (dollar_inside) {
21460               fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
21461               fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
21462 	    }
21463 	  fputs ("\t.globl .", file);
21464 	  RS6000_OUTPUT_BASENAME (file, buffer);
21465 #ifdef HAVE_GAS_HIDDEN
21466 	  fputs (rs6000_xcoff_visibility (decl), file);
21467 #endif
21468 	  putc ('\n', file);
21469 	}
21470     }
21471   else
21472     {
21473       if (dollar_inside) {
21474           fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
21475           fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
21476 	}
21477       fputs ("\t.lglobl .", file);
21478       RS6000_OUTPUT_BASENAME (file, buffer);
21479       putc ('\n', file);
21480     }
21481 
21482   fputs ("\t.csect ", file);
21483   assemble_name (file, buffer);
21484   fputs (TARGET_32BIT ? "\n" : ",3\n", file);
21485 
21486   ASM_OUTPUT_LABEL (file, buffer);
21487 
21488   symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
21489 							&data, true);
21490   fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
21491   RS6000_OUTPUT_BASENAME (file, buffer);
21492   fputs (", TOC[tc0], 0\n", file);
21493 
21494   in_section = NULL;
21495   switch_to_section (function_section (decl));
21496   putc ('.', file);
21497   ASM_OUTPUT_LABEL (file, buffer);
21498 
21499   data.function_descriptor = true;
21500   symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
21501 							&data, true);
21502   if (!DECL_IGNORED_P (decl))
21503     {
21504       if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
21505 	xcoffout_declare_function (file, decl, buffer);
21506       else if (dwarf_debuginfo_p ())
21507 	{
21508 	  name = (*targetm.strip_name_encoding) (name);
21509 	  fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
21510 	}
21511     }
21512   return;
21513 }
21514 
21515 
21516 /* Output assembly language to globalize a symbol from a DECL,
21517    possibly with visibility.  */
21518 
21519 void
rs6000_xcoff_asm_globalize_decl_name(FILE * stream,tree decl)21520 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
21521 {
21522   const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
21523   fputs (GLOBAL_ASM_OP, stream);
21524   assemble_name (stream, name);
21525 #ifdef HAVE_GAS_HIDDEN
21526   fputs (rs6000_xcoff_visibility (decl), stream);
21527 #endif
21528   putc ('\n', stream);
21529 }
21530 
21531 /* Output assembly language to define a symbol as COMMON from a DECL,
21532    possibly with visibility.  */
21533 
21534 void
rs6000_xcoff_asm_output_aligned_decl_common(FILE * stream,tree decl ATTRIBUTE_UNUSED,const char * name,unsigned HOST_WIDE_INT size,unsigned int align)21535 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
21536 					     tree decl ATTRIBUTE_UNUSED,
21537 					     const char *name,
21538 					     unsigned HOST_WIDE_INT size,
21539 					     unsigned int align)
21540 {
21541   unsigned int align2 = 2;
21542 
21543   if (align == 0)
21544     align = DATA_ABI_ALIGNMENT (TREE_TYPE (decl), DECL_ALIGN (decl));
21545 
21546   if (align > 32)
21547     align2 = floor_log2 (align / BITS_PER_UNIT);
21548   else if (size > 4)
21549     align2 = 3;
21550 
21551   if (! DECL_COMMON (decl))
21552     {
21553       /* Forget section.  */
21554       in_section = NULL;
21555 
21556       /* Globalize TLS BSS.  */
21557       if (TREE_PUBLIC (decl) && DECL_THREAD_LOCAL_P (decl))
21558 	{
21559 	  fputs (GLOBAL_ASM_OP, stream);
21560 	  assemble_name (stream, name);
21561 	  fputc ('\n', stream);
21562 	}
21563 
21564       /* Switch to section and skip space.  */
21565       fputs ("\t.csect ", stream);
21566       assemble_name (stream, name);
21567       fprintf (stream, ",%u\n", align2);
21568       ASM_DECLARE_OBJECT_NAME (stream, name, decl);
21569       ASM_OUTPUT_SKIP (stream, size ? size : 1);
21570       return;
21571     }
21572 
21573   if (TREE_PUBLIC (decl))
21574     {
21575       fprintf (stream,
21576 	       "\t.comm %s," HOST_WIDE_INT_PRINT_UNSIGNED ",%u" ,
21577 	       name, size, align2);
21578 
21579 #ifdef HAVE_GAS_HIDDEN
21580       if (decl != NULL)
21581 	fputs (rs6000_xcoff_visibility (decl), stream);
21582 #endif
21583       putc ('\n', stream);
21584     }
21585   else
21586       fprintf (stream,
21587 	       "\t.lcomm %s," HOST_WIDE_INT_PRINT_UNSIGNED ",%s,%u\n",
21588 	       (*targetm.strip_name_encoding) (name), size, name, align2);
21589 }
21590 
21591 /* This macro produces the initial definition of a object (variable) name.
21592    Because AIX assembler's .set command has unexpected semantics, we output
21593    all aliases as alternative labels in front of the definition.  */
21594 
21595 void
rs6000_xcoff_declare_object_name(FILE * file,const char * name,tree decl)21596 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
21597 {
21598   struct declare_alias_data data = {file, false};
21599   ASM_OUTPUT_LABEL (file, name);
21600   symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
21601 							       &data, true);
21602 }
21603 
21604 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
21605 
21606 void
rs6000_asm_output_dwarf_pcrel(FILE * file,int size,const char * label)21607 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
21608 {
21609   fputs (integer_asm_op (size, FALSE), file);
21610   assemble_name (file, label);
21611   fputs ("-$", file);
21612 }
21613 
21614 /* Output a symbol offset relative to the dbase for the current object.
21615    We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
21616    signed offsets.
21617 
21618    __gcc_unwind_dbase is embedded in all executables/libraries through
21619    libgcc/config/rs6000/crtdbase.S.  */
21620 
21621 void
rs6000_asm_output_dwarf_datarel(FILE * file,int size,const char * label)21622 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
21623 {
21624   fputs (integer_asm_op (size, FALSE), file);
21625   assemble_name (file, label);
21626   fputs("-__gcc_unwind_dbase", file);
21627 }
21628 
21629 #ifdef HAVE_AS_TLS
21630 static void
rs6000_xcoff_encode_section_info(tree decl,rtx rtl,int first)21631 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
21632 {
21633   rtx symbol;
21634   int flags;
21635   const char *symname;
21636 
21637   default_encode_section_info (decl, rtl, first);
21638 
21639   /* Careful not to prod global register variables.  */
21640   if (!MEM_P (rtl))
21641     return;
21642   symbol = XEXP (rtl, 0);
21643   if (!SYMBOL_REF_P (symbol))
21644     return;
21645 
21646   flags = SYMBOL_REF_FLAGS (symbol);
21647 
21648   if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
21649     flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
21650 
21651   SYMBOL_REF_FLAGS (symbol) = flags;
21652 
21653   symname = XSTR (symbol, 0);
21654 
21655   /* Append CSECT mapping class, unless the symbol already is qualified.
21656      Aliases are implemented as labels, so the symbol name should not add
21657      a mapping class.  */
21658   if (decl
21659       && DECL_P (decl)
21660       && VAR_OR_FUNCTION_DECL_P (decl)
21661       && (symtab_node::get (decl) == NULL
21662 	  || symtab_node::get (decl)->alias == 0)
21663       && symname[strlen (symname) - 1] != ']')
21664     {
21665       const char *smclass = NULL;
21666 
21667       if (TREE_CODE (decl) == FUNCTION_DECL)
21668 	smclass = "[DS]";
21669       else if (DECL_THREAD_LOCAL_P (decl))
21670 	{
21671 	  if (bss_initializer_p (decl))
21672 	    smclass = "[UL]";
21673 	  else if (flag_data_sections)
21674 	    smclass = "[TL]";
21675 	}
21676       else if (DECL_EXTERNAL (decl))
21677 	smclass = "[UA]";
21678       else if (bss_initializer_p (decl))
21679 	smclass = "[BS]";
21680       else if (flag_data_sections)
21681 	{
21682 	  /* This must exactly match the logic of select section.  */
21683 	  if (decl_readonly_section (decl, compute_reloc_for_var (decl)))
21684 	    smclass = "[RO]";
21685 	  else
21686 	    smclass = "[RW]";
21687 	}
21688 
21689       if (smclass != NULL)
21690 	{
21691 	  char *newname = XALLOCAVEC (char, strlen (symname) + 5);
21692 
21693 	  strcpy (newname, symname);
21694 	  strcat (newname, smclass);
21695 	  XSTR (symbol, 0) = ggc_strdup (newname);
21696 	}
21697     }
21698 }
21699 #endif /* HAVE_AS_TLS */
21700 #endif /* TARGET_XCOFF */
21701 
21702 void
rs6000_asm_weaken_decl(FILE * stream,tree decl,const char * name,const char * val)21703 rs6000_asm_weaken_decl (FILE *stream, tree decl,
21704 			const char *name, const char *val)
21705 {
21706   fputs ("\t.weak\t", stream);
21707   assemble_name (stream, name);
21708   if (decl && TREE_CODE (decl) == FUNCTION_DECL
21709       && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
21710     {
21711 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
21712       if (TARGET_XCOFF)
21713 	fputs (rs6000_xcoff_visibility (decl), stream);
21714 #endif
21715       fputs ("\n\t.weak\t.", stream);
21716       RS6000_OUTPUT_BASENAME (stream, name);
21717     }
21718 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
21719   if (TARGET_XCOFF)
21720     fputs (rs6000_xcoff_visibility (decl), stream);
21721 #endif
21722   fputc ('\n', stream);
21723 
21724   if (val)
21725     {
21726 #ifdef ASM_OUTPUT_DEF
21727       ASM_OUTPUT_DEF (stream, name, val);
21728 #endif
21729       if (decl && TREE_CODE (decl) == FUNCTION_DECL
21730 	  && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
21731 	{
21732 	  fputs ("\t.set\t.", stream);
21733 	  RS6000_OUTPUT_BASENAME (stream, name);
21734 	  fputs (",.", stream);
21735 	  RS6000_OUTPUT_BASENAME (stream, val);
21736 	  fputc ('\n', stream);
21737 	}
21738     }
21739 }
21740 
21741 
21742 /* Return true if INSN should not be copied.  */
21743 
21744 static bool
rs6000_cannot_copy_insn_p(rtx_insn * insn)21745 rs6000_cannot_copy_insn_p (rtx_insn *insn)
21746 {
21747   return recog_memoized (insn) >= 0
21748 	 && get_attr_cannot_copy (insn);
21749 }
21750 
21751 /* Compute a (partial) cost for rtx X.  Return true if the complete
21752    cost has been computed, and false if subexpressions should be
21753    scanned.  In either case, *TOTAL contains the cost result.  */
21754 
21755 static bool
rs6000_rtx_costs(rtx x,machine_mode mode,int outer_code,int opno ATTRIBUTE_UNUSED,int * total,bool speed)21756 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
21757 		  int opno ATTRIBUTE_UNUSED, int *total, bool speed)
21758 {
21759   int code = GET_CODE (x);
21760 
21761   switch (code)
21762     {
21763       /* On the RS/6000, if it is valid in the insn, it is free.  */
21764     case CONST_INT:
21765       if (((outer_code == SET
21766 	    || outer_code == PLUS
21767 	    || outer_code == MINUS)
21768 	   && (satisfies_constraint_I (x)
21769 	       || satisfies_constraint_L (x)))
21770 	  || (outer_code == AND
21771 	      && (satisfies_constraint_K (x)
21772 		  || (mode == SImode
21773 		      ? satisfies_constraint_L (x)
21774 		      : satisfies_constraint_J (x))))
21775 	  || ((outer_code == IOR || outer_code == XOR)
21776 	      && (satisfies_constraint_K (x)
21777 		  || (mode == SImode
21778 		      ? satisfies_constraint_L (x)
21779 		      : satisfies_constraint_J (x))))
21780 	  || outer_code == ASHIFT
21781 	  || outer_code == ASHIFTRT
21782 	  || outer_code == LSHIFTRT
21783 	  || outer_code == ROTATE
21784 	  || outer_code == ROTATERT
21785 	  || outer_code == ZERO_EXTRACT
21786 	  || (outer_code == MULT
21787 	      && satisfies_constraint_I (x))
21788 	  || ((outer_code == DIV || outer_code == UDIV
21789 	       || outer_code == MOD || outer_code == UMOD)
21790 	      && exact_log2 (INTVAL (x)) >= 0)
21791 	  || (outer_code == COMPARE
21792 	      && (satisfies_constraint_I (x)
21793 		  || satisfies_constraint_K (x)))
21794 	  || ((outer_code == EQ || outer_code == NE)
21795 	      && (satisfies_constraint_I (x)
21796 		  || satisfies_constraint_K (x)
21797 		  || (mode == SImode
21798 		      ? satisfies_constraint_L (x)
21799 		      : satisfies_constraint_J (x))))
21800 	  || (outer_code == GTU
21801 	      && satisfies_constraint_I (x))
21802 	  || (outer_code == LTU
21803 	      && satisfies_constraint_P (x)))
21804 	{
21805 	  *total = 0;
21806 	  return true;
21807 	}
21808       else if ((outer_code == PLUS
21809 		&& reg_or_add_cint_operand (x, mode))
21810 	       || (outer_code == MINUS
21811 		   && reg_or_sub_cint_operand (x, mode))
21812 	       || ((outer_code == SET
21813 		    || outer_code == IOR
21814 		    || outer_code == XOR)
21815 		   && (INTVAL (x)
21816 		       & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
21817 	{
21818 	  *total = COSTS_N_INSNS (1);
21819 	  return true;
21820 	}
21821       /* FALLTHRU */
21822 
21823     case CONST_DOUBLE:
21824     case CONST_WIDE_INT:
21825     case CONST:
21826     case HIGH:
21827     case SYMBOL_REF:
21828       *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
21829       return true;
21830 
21831     case MEM:
21832       /* When optimizing for size, MEM should be slightly more expensive
21833 	 than generating address, e.g., (plus (reg) (const)).
21834 	 L1 cache latency is about two instructions.  */
21835       *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
21836       if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x)))
21837 	*total += COSTS_N_INSNS (100);
21838       return true;
21839 
21840     case LABEL_REF:
21841       *total = 0;
21842       return true;
21843 
21844     case PLUS:
21845     case MINUS:
21846       if (FLOAT_MODE_P (mode))
21847 	*total = rs6000_cost->fp;
21848       else
21849 	*total = COSTS_N_INSNS (1);
21850       return false;
21851 
21852     case MULT:
21853       if (CONST_INT_P (XEXP (x, 1))
21854 	  && satisfies_constraint_I (XEXP (x, 1)))
21855 	{
21856 	  if (INTVAL (XEXP (x, 1)) >= -256
21857 	      && INTVAL (XEXP (x, 1)) <= 255)
21858 	    *total = rs6000_cost->mulsi_const9;
21859 	  else
21860 	    *total = rs6000_cost->mulsi_const;
21861 	}
21862       else if (mode == SFmode)
21863 	*total = rs6000_cost->fp;
21864       else if (FLOAT_MODE_P (mode))
21865 	*total = rs6000_cost->dmul;
21866       else if (mode == DImode)
21867 	*total = rs6000_cost->muldi;
21868       else
21869 	*total = rs6000_cost->mulsi;
21870       return false;
21871 
21872     case FMA:
21873       if (mode == SFmode)
21874 	*total = rs6000_cost->fp;
21875       else
21876 	*total = rs6000_cost->dmul;
21877       break;
21878 
21879     case DIV:
21880     case MOD:
21881       if (FLOAT_MODE_P (mode))
21882 	{
21883 	  *total = mode == DFmode ? rs6000_cost->ddiv
21884 				  : rs6000_cost->sdiv;
21885 	  return false;
21886 	}
21887       /* FALLTHRU */
21888 
21889     case UDIV:
21890     case UMOD:
21891       if (CONST_INT_P (XEXP (x, 1))
21892 	  && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
21893 	{
21894 	  if (code == DIV || code == MOD)
21895 	    /* Shift, addze */
21896 	    *total = COSTS_N_INSNS (2);
21897 	  else
21898 	    /* Shift */
21899 	    *total = COSTS_N_INSNS (1);
21900 	}
21901       else
21902 	{
21903 	  if (GET_MODE (XEXP (x, 1)) == DImode)
21904 	    *total = rs6000_cost->divdi;
21905 	  else
21906 	    *total = rs6000_cost->divsi;
21907 	}
21908       /* Add in shift and subtract for MOD unless we have a mod instruction. */
21909       if (!TARGET_MODULO && (code == MOD || code == UMOD))
21910 	*total += COSTS_N_INSNS (2);
21911       return false;
21912 
21913     case CTZ:
21914       *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
21915       return false;
21916 
21917     case FFS:
21918       *total = COSTS_N_INSNS (4);
21919       return false;
21920 
21921     case POPCOUNT:
21922       *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
21923       return false;
21924 
21925     case PARITY:
21926       *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
21927       return false;
21928 
21929     case NOT:
21930       if (outer_code == AND || outer_code == IOR || outer_code == XOR)
21931 	*total = 0;
21932       else
21933 	*total = COSTS_N_INSNS (1);
21934       return false;
21935 
21936     case AND:
21937       if (CONST_INT_P (XEXP (x, 1)))
21938 	{
21939 	  rtx left = XEXP (x, 0);
21940 	  rtx_code left_code = GET_CODE (left);
21941 
21942 	  /* rotate-and-mask: 1 insn.  */
21943 	  if ((left_code == ROTATE
21944 	       || left_code == ASHIFT
21945 	       || left_code == LSHIFTRT)
21946 	      && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
21947 	    {
21948 	      *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
21949 	      if (!CONST_INT_P (XEXP (left, 1)))
21950 		*total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
21951 	      *total += COSTS_N_INSNS (1);
21952 	      return true;
21953 	    }
21954 
21955 	  /* rotate-and-mask (no rotate), andi., andis.: 1 insn.  */
21956 	  HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
21957 	  if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
21958 	      || (val & 0xffff) == val
21959 	      || (val & 0xffff0000) == val
21960 	      || ((val & 0xffff) == 0 && mode == SImode))
21961 	    {
21962 	      *total = rtx_cost (left, mode, AND, 0, speed);
21963 	      *total += COSTS_N_INSNS (1);
21964 	      return true;
21965 	    }
21966 
21967 	  /* 2 insns.  */
21968 	  if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
21969 	    {
21970 	      *total = rtx_cost (left, mode, AND, 0, speed);
21971 	      *total += COSTS_N_INSNS (2);
21972 	      return true;
21973 	    }
21974 	}
21975 
21976       *total = COSTS_N_INSNS (1);
21977       return false;
21978 
21979     case IOR:
21980       /* FIXME */
21981       *total = COSTS_N_INSNS (1);
21982       return true;
21983 
21984     case CLZ:
21985     case XOR:
21986     case ZERO_EXTRACT:
21987       *total = COSTS_N_INSNS (1);
21988       return false;
21989 
21990     case ASHIFT:
21991       /* The EXTSWSLI instruction is a combined instruction.  Don't count both
21992 	 the sign extend and shift separately within the insn.  */
21993       if (TARGET_EXTSWSLI && mode == DImode
21994 	  && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
21995 	  && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
21996 	{
21997 	  *total = 0;
21998 	  return false;
21999 	}
22000       /* fall through */
22001 
22002     case ASHIFTRT:
22003     case LSHIFTRT:
22004     case ROTATE:
22005     case ROTATERT:
22006       /* Handle mul_highpart.  */
22007       if (outer_code == TRUNCATE
22008 	  && GET_CODE (XEXP (x, 0)) == MULT)
22009 	{
22010 	  if (mode == DImode)
22011 	    *total = rs6000_cost->muldi;
22012 	  else
22013 	    *total = rs6000_cost->mulsi;
22014 	  return true;
22015 	}
22016       else if (outer_code == AND)
22017 	*total = 0;
22018       else
22019 	*total = COSTS_N_INSNS (1);
22020       return false;
22021 
22022     case SIGN_EXTEND:
22023     case ZERO_EXTEND:
22024       if (MEM_P (XEXP (x, 0)))
22025 	*total = 0;
22026       else
22027 	*total = COSTS_N_INSNS (1);
22028       return false;
22029 
22030     case COMPARE:
22031     case NEG:
22032     case ABS:
22033       if (!FLOAT_MODE_P (mode))
22034 	{
22035 	  *total = COSTS_N_INSNS (1);
22036 	  return false;
22037 	}
22038       /* FALLTHRU */
22039 
22040     case FLOAT:
22041     case UNSIGNED_FLOAT:
22042     case FIX:
22043     case UNSIGNED_FIX:
22044     case FLOAT_TRUNCATE:
22045       *total = rs6000_cost->fp;
22046       return false;
22047 
22048     case FLOAT_EXTEND:
22049       if (mode == DFmode)
22050 	*total = rs6000_cost->sfdf_convert;
22051       else
22052 	*total = rs6000_cost->fp;
22053       return false;
22054 
22055     case CALL:
22056     case IF_THEN_ELSE:
22057       if (!speed)
22058 	{
22059 	  *total = COSTS_N_INSNS (1);
22060 	  return true;
22061 	}
22062       else if (FLOAT_MODE_P (mode) && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT)
22063 	{
22064 	  *total = rs6000_cost->fp;
22065 	  return false;
22066 	}
22067       break;
22068 
22069     case NE:
22070     case EQ:
22071     case GTU:
22072     case LTU:
22073       /* Carry bit requires mode == Pmode.
22074 	 NEG or PLUS already counted so only add one.  */
22075       if (mode == Pmode
22076 	  && (outer_code == NEG || outer_code == PLUS))
22077 	{
22078 	  *total = COSTS_N_INSNS (1);
22079 	  return true;
22080 	}
22081       /* FALLTHRU */
22082 
22083     case GT:
22084     case LT:
22085     case UNORDERED:
22086       if (outer_code == SET)
22087 	{
22088 	  if (XEXP (x, 1) == const0_rtx)
22089 	    {
22090 	      *total = COSTS_N_INSNS (2);
22091 	      return true;
22092 	    }
22093 	  else
22094 	    {
22095 	      *total = COSTS_N_INSNS (3);
22096 	      return false;
22097 	    }
22098 	}
22099       /* CC COMPARE.  */
22100       if (outer_code == COMPARE)
22101 	{
22102 	  *total = 0;
22103 	  return true;
22104 	}
22105       break;
22106 
22107     case UNSPEC:
22108       if (XINT (x, 1) == UNSPECV_MMA_XXSETACCZ)
22109 	{
22110 	  *total = 0;
22111 	  return true;
22112 	}
22113       break;
22114 
22115     default:
22116       break;
22117     }
22118 
22119   return false;
22120 }
22121 
22122 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost.  */
22123 
22124 static bool
rs6000_debug_rtx_costs(rtx x,machine_mode mode,int outer_code,int opno,int * total,bool speed)22125 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
22126 			int opno, int *total, bool speed)
22127 {
22128   bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
22129 
22130   fprintf (stderr,
22131 	   "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
22132 	   "opno = %d, total = %d, speed = %s, x:\n",
22133 	   ret ? "complete" : "scan inner",
22134 	   GET_MODE_NAME (mode),
22135 	   GET_RTX_NAME (outer_code),
22136 	   opno,
22137 	   *total,
22138 	   speed ? "true" : "false");
22139 
22140   debug_rtx (x);
22141 
22142   return ret;
22143 }
22144 
22145 static int
rs6000_insn_cost(rtx_insn * insn,bool speed)22146 rs6000_insn_cost (rtx_insn *insn, bool speed)
22147 {
22148   if (recog_memoized (insn) < 0)
22149     return 0;
22150 
22151   /* If we are optimizing for size, just use the length.  */
22152   if (!speed)
22153     return get_attr_length (insn);
22154 
22155   /* Use the cost if provided.  */
22156   int cost = get_attr_cost (insn);
22157   if (cost > 0)
22158     return cost;
22159 
22160   /* If the insn tells us how many insns there are, use that.  Otherwise use
22161      the length/4.  Adjust the insn length to remove the extra size that
22162      prefixed instructions take.  */
22163   int n = get_attr_num_insns (insn);
22164   if (n == 0)
22165     {
22166       int length = get_attr_length (insn);
22167       if (get_attr_prefixed (insn) == PREFIXED_YES)
22168 	{
22169 	  int adjust = 0;
22170 	  ADJUST_INSN_LENGTH (insn, adjust);
22171 	  length -= adjust;
22172 	}
22173 
22174       n = length / 4;
22175     }
22176 
22177   enum attr_type type = get_attr_type (insn);
22178 
22179   switch (type)
22180     {
22181     case TYPE_LOAD:
22182     case TYPE_FPLOAD:
22183     case TYPE_VECLOAD:
22184       cost = COSTS_N_INSNS (n + 1);
22185       break;
22186 
22187     case TYPE_MUL:
22188       switch (get_attr_size (insn))
22189 	{
22190 	case SIZE_8:
22191 	  cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const9;
22192 	  break;
22193 	case SIZE_16:
22194 	  cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const;
22195 	  break;
22196 	case SIZE_32:
22197 	  cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi;
22198 	  break;
22199 	case SIZE_64:
22200 	  cost = COSTS_N_INSNS (n - 1) + rs6000_cost->muldi;
22201 	  break;
22202 	default:
22203 	  gcc_unreachable ();
22204 	}
22205       break;
22206     case TYPE_DIV:
22207       switch (get_attr_size (insn))
22208 	{
22209 	case SIZE_32:
22210 	  cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divsi;
22211 	  break;
22212 	case SIZE_64:
22213 	  cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divdi;
22214 	  break;
22215 	default:
22216 	  gcc_unreachable ();
22217 	}
22218       break;
22219 
22220     case TYPE_FP:
22221       cost = n * rs6000_cost->fp;
22222       break;
22223     case TYPE_DMUL:
22224       cost = n * rs6000_cost->dmul;
22225       break;
22226     case TYPE_SDIV:
22227       cost = n * rs6000_cost->sdiv;
22228       break;
22229     case TYPE_DDIV:
22230       cost = n * rs6000_cost->ddiv;
22231       break;
22232 
22233     case TYPE_SYNC:
22234     case TYPE_LOAD_L:
22235     case TYPE_MFCR:
22236     case TYPE_MFCRF:
22237       cost = COSTS_N_INSNS (n + 2);
22238       break;
22239 
22240     default:
22241       cost = COSTS_N_INSNS (n);
22242     }
22243 
22244   return cost;
22245 }
22246 
22247 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost.  */
22248 
22249 static int
rs6000_debug_address_cost(rtx x,machine_mode mode,addr_space_t as,bool speed)22250 rs6000_debug_address_cost (rtx x, machine_mode mode,
22251 			   addr_space_t as, bool speed)
22252 {
22253   int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
22254 
22255   fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
22256 	   ret, speed ? "true" : "false");
22257   debug_rtx (x);
22258 
22259   return ret;
22260 }
22261 
22262 
22263 /* A C expression returning the cost of moving data from a register of class
22264    CLASS1 to one of CLASS2.  */
22265 
22266 static int
rs6000_register_move_cost(machine_mode mode,reg_class_t from,reg_class_t to)22267 rs6000_register_move_cost (machine_mode mode,
22268 			   reg_class_t from, reg_class_t to)
22269 {
22270   int ret;
22271   reg_class_t rclass;
22272 
22273   if (TARGET_DEBUG_COST)
22274     dbg_cost_ctrl++;
22275 
22276   /* If we have VSX, we can easily move between FPR or Altivec registers,
22277      otherwise we can only easily move within classes.
22278      Do this first so we give best-case answers for union classes
22279      containing both gprs and vsx regs.  */
22280   HARD_REG_SET to_vsx, from_vsx;
22281   to_vsx = reg_class_contents[to] & reg_class_contents[VSX_REGS];
22282   from_vsx = reg_class_contents[from] & reg_class_contents[VSX_REGS];
22283   if (!hard_reg_set_empty_p (to_vsx)
22284       && !hard_reg_set_empty_p (from_vsx)
22285       && (TARGET_VSX
22286 	  || hard_reg_set_intersect_p (to_vsx, from_vsx)))
22287     {
22288       int reg = FIRST_FPR_REGNO;
22289       if (TARGET_VSX
22290 	  || (TEST_HARD_REG_BIT (to_vsx, FIRST_ALTIVEC_REGNO)
22291 	      && TEST_HARD_REG_BIT (from_vsx, FIRST_ALTIVEC_REGNO)))
22292 	reg = FIRST_ALTIVEC_REGNO;
22293       ret = 2 * hard_regno_nregs (reg, mode);
22294     }
22295 
22296   /*  Moves from/to GENERAL_REGS.  */
22297   else if ((rclass = from, reg_classes_intersect_p (to, GENERAL_REGS))
22298 	   || (rclass = to, reg_classes_intersect_p (from, GENERAL_REGS)))
22299     {
22300       if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
22301 	{
22302 	  if (TARGET_DIRECT_MOVE)
22303 	    {
22304 	      /* Keep the cost for direct moves above that for within
22305 		 a register class even if the actual processor cost is
22306 		 comparable.  We do this because a direct move insn
22307 		 can't be a nop, whereas with ideal register
22308 		 allocation a move within the same class might turn
22309 		 out to be a nop.  */
22310 	      if (rs6000_tune == PROCESSOR_POWER9
22311 		  || rs6000_tune == PROCESSOR_POWER10)
22312 		ret = 3 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22313 	      else
22314 		ret = 4 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22315 	      /* SFmode requires a conversion when moving between gprs
22316 		 and vsx.  */
22317 	      if (mode == SFmode)
22318 		ret += 2;
22319 	    }
22320 	  else
22321 	    ret = (rs6000_memory_move_cost (mode, rclass, false)
22322 		   + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
22323 	}
22324 
22325       /* It's more expensive to move CR_REGS than CR0_REGS because of the
22326 	 shift.  */
22327       else if (rclass == CR_REGS)
22328 	ret = 4;
22329 
22330       /* For those processors that have slow LR/CTR moves, make them more
22331          expensive than memory in order to bias spills to memory .*/
22332       else if ((rs6000_tune == PROCESSOR_POWER6
22333 		|| rs6000_tune == PROCESSOR_POWER7
22334 		|| rs6000_tune == PROCESSOR_POWER8
22335 		|| rs6000_tune == PROCESSOR_POWER9)
22336 	       && reg_class_subset_p (rclass, SPECIAL_REGS))
22337         ret = 6 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22338 
22339       else
22340 	/* A move will cost one instruction per GPR moved.  */
22341 	ret = 2 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22342     }
22343 
22344   /* Everything else has to go through GENERAL_REGS.  */
22345   else
22346     ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
22347 	   + rs6000_register_move_cost (mode, from, GENERAL_REGS));
22348 
22349   if (TARGET_DEBUG_COST)
22350     {
22351       if (dbg_cost_ctrl == 1)
22352 	fprintf (stderr,
22353 		 "rs6000_register_move_cost: ret=%d, mode=%s, from=%s, to=%s\n",
22354 		 ret, GET_MODE_NAME (mode), reg_class_names[from],
22355 		 reg_class_names[to]);
22356       dbg_cost_ctrl--;
22357     }
22358 
22359   return ret;
22360 }
22361 
22362 /* A C expressions returning the cost of moving data of MODE from a register to
22363    or from memory.  */
22364 
22365 static int
rs6000_memory_move_cost(machine_mode mode,reg_class_t rclass,bool in ATTRIBUTE_UNUSED)22366 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
22367 			 bool in ATTRIBUTE_UNUSED)
22368 {
22369   int ret;
22370 
22371   if (TARGET_DEBUG_COST)
22372     dbg_cost_ctrl++;
22373 
22374   if (reg_classes_intersect_p (rclass, GENERAL_REGS))
22375     ret = 4 * hard_regno_nregs (0, mode);
22376   else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
22377 	    || reg_classes_intersect_p (rclass, VSX_REGS)))
22378     ret = 4 * hard_regno_nregs (32, mode);
22379   else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
22380     ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode);
22381   else
22382     ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
22383 
22384   if (TARGET_DEBUG_COST)
22385     {
22386       if (dbg_cost_ctrl == 1)
22387 	fprintf (stderr,
22388 		 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
22389 		 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
22390       dbg_cost_ctrl--;
22391     }
22392 
22393   return ret;
22394 }
22395 
22396 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
22397 
22398    The register allocator chooses GEN_OR_VSX_REGS for the allocno
22399    class if GENERAL_REGS and VSX_REGS cost is lower than the memory
22400    cost.  This happens a lot when TARGET_DIRECT_MOVE makes the register
22401    move cost between GENERAL_REGS and VSX_REGS low.
22402 
22403    It might seem reasonable to use a union class.  After all, if usage
22404    of vsr is low and gpr high, it might make sense to spill gpr to vsr
22405    rather than memory.  However, in cases where register pressure of
22406    both is high, like the cactus_adm spec test, allowing
22407    GEN_OR_VSX_REGS as the allocno class results in bad decisions in
22408    the first scheduling pass.  This is partly due to an allocno of
22409    GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure
22410    class, which gives too high a pressure for GENERAL_REGS and too low
22411    for VSX_REGS.  So, force a choice of the subclass here.
22412 
22413    The best class is also the union if GENERAL_REGS and VSX_REGS have
22414    the same cost.  In that case we do use GEN_OR_VSX_REGS as the
22415    allocno class, since trying to narrow down the class by regno mode
22416    is prone to error.  For example, SImode is allowed in VSX regs and
22417    in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect)
22418    it would be wrong to choose an allocno of GENERAL_REGS based on
22419    SImode.  */
22420 
22421 static reg_class_t
rs6000_ira_change_pseudo_allocno_class(int regno ATTRIBUTE_UNUSED,reg_class_t allocno_class,reg_class_t best_class)22422 rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED,
22423 					reg_class_t allocno_class,
22424 					reg_class_t best_class)
22425 {
22426   switch (allocno_class)
22427     {
22428     case GEN_OR_VSX_REGS:
22429       /* best_class must be a subset of allocno_class.  */
22430       gcc_checking_assert (best_class == GEN_OR_VSX_REGS
22431 			   || best_class == GEN_OR_FLOAT_REGS
22432 			   || best_class == VSX_REGS
22433 			   || best_class == ALTIVEC_REGS
22434 			   || best_class == FLOAT_REGS
22435 			   || best_class == GENERAL_REGS
22436 			   || best_class == BASE_REGS);
22437       /* Use best_class but choose wider classes when copying from the
22438 	 wider class to best_class is cheap.  This mimics IRA choice
22439 	 of allocno class.  */
22440       if (best_class == BASE_REGS)
22441 	return GENERAL_REGS;
22442       if (TARGET_VSX && best_class == FLOAT_REGS)
22443 	return VSX_REGS;
22444       return best_class;
22445 
22446     case VSX_REGS:
22447       if (best_class == ALTIVEC_REGS)
22448 	return ALTIVEC_REGS;
22449 
22450     default:
22451       break;
22452     }
22453 
22454   return allocno_class;
22455 }
22456 
22457 /* Load up a constant.  If the mode is a vector mode, splat the value across
22458    all of the vector elements.  */
22459 
22460 static rtx
rs6000_load_constant_and_splat(machine_mode mode,REAL_VALUE_TYPE dconst)22461 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
22462 {
22463   rtx reg;
22464 
22465   if (mode == SFmode || mode == DFmode)
22466     {
22467       rtx d = const_double_from_real_value (dconst, mode);
22468       reg = force_reg (mode, d);
22469     }
22470   else if (mode == V4SFmode)
22471     {
22472       rtx d = const_double_from_real_value (dconst, SFmode);
22473       rtvec v = gen_rtvec (4, d, d, d, d);
22474       reg = gen_reg_rtx (mode);
22475       rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
22476     }
22477   else if (mode == V2DFmode)
22478     {
22479       rtx d = const_double_from_real_value (dconst, DFmode);
22480       rtvec v = gen_rtvec (2, d, d);
22481       reg = gen_reg_rtx (mode);
22482       rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
22483     }
22484   else
22485     gcc_unreachable ();
22486 
22487   return reg;
22488 }
22489 
22490 /* Generate an FMA instruction.  */
22491 
22492 static void
rs6000_emit_madd(rtx target,rtx m1,rtx m2,rtx a)22493 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
22494 {
22495   machine_mode mode = GET_MODE (target);
22496   rtx dst;
22497 
22498   dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
22499   gcc_assert (dst != NULL);
22500 
22501   if (dst != target)
22502     emit_move_insn (target, dst);
22503 }
22504 
22505 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a).  */
22506 
22507 static void
rs6000_emit_nmsub(rtx dst,rtx m1,rtx m2,rtx a)22508 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
22509 {
22510   machine_mode mode = GET_MODE (dst);
22511   rtx r;
22512 
22513   /* This is a tad more complicated, since the fnma_optab is for
22514      a different expression: fma(-m1, m2, a), which is the same
22515      thing except in the case of signed zeros.
22516 
22517      Fortunately we know that if FMA is supported that FNMSUB is
22518      also supported in the ISA.  Just expand it directly.  */
22519 
22520   gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
22521 
22522   r = gen_rtx_NEG (mode, a);
22523   r = gen_rtx_FMA (mode, m1, m2, r);
22524   r = gen_rtx_NEG (mode, r);
22525   emit_insn (gen_rtx_SET (dst, r));
22526 }
22527 
22528 /* Newton-Raphson approximation of floating point divide DST = N/D.  If NOTE_P,
22529    add a reg_note saying that this was a division.  Support both scalar and
22530    vector divide.  Assumes no trapping math and finite arguments.  */
22531 
22532 void
rs6000_emit_swdiv(rtx dst,rtx n,rtx d,bool note_p)22533 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
22534 {
22535   machine_mode mode = GET_MODE (dst);
22536   rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
22537   int i;
22538 
22539   /* Low precision estimates guarantee 5 bits of accuracy.  High
22540      precision estimates guarantee 14 bits of accuracy.  SFmode
22541      requires 23 bits of accuracy.  DFmode requires 52 bits of
22542      accuracy.  Each pass at least doubles the accuracy, leading
22543      to the following.  */
22544   int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
22545   if (mode == DFmode || mode == V2DFmode)
22546     passes++;
22547 
22548   enum insn_code code = optab_handler (smul_optab, mode);
22549   insn_gen_fn gen_mul = GEN_FCN (code);
22550 
22551   gcc_assert (code != CODE_FOR_nothing);
22552 
22553   one = rs6000_load_constant_and_splat (mode, dconst1);
22554 
22555   /* x0 = 1./d estimate */
22556   x0 = gen_reg_rtx (mode);
22557   emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
22558 					      UNSPEC_FRES)));
22559 
22560   /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i).  */
22561   if (passes > 1) {
22562 
22563     /* e0 = 1. - d * x0  */
22564     e0 = gen_reg_rtx (mode);
22565     rs6000_emit_nmsub (e0, d, x0, one);
22566 
22567     /* x1 = x0 + e0 * x0  */
22568     x1 = gen_reg_rtx (mode);
22569     rs6000_emit_madd (x1, e0, x0, x0);
22570 
22571     for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
22572 	 ++i, xprev = xnext, eprev = enext) {
22573 
22574       /* enext = eprev * eprev  */
22575       enext = gen_reg_rtx (mode);
22576       emit_insn (gen_mul (enext, eprev, eprev));
22577 
22578       /* xnext = xprev + enext * xprev  */
22579       xnext = gen_reg_rtx (mode);
22580       rs6000_emit_madd (xnext, enext, xprev, xprev);
22581     }
22582 
22583   } else
22584     xprev = x0;
22585 
22586   /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i).  */
22587 
22588   /* u = n * xprev  */
22589   u = gen_reg_rtx (mode);
22590   emit_insn (gen_mul (u, n, xprev));
22591 
22592   /* v = n - (d * u)  */
22593   v = gen_reg_rtx (mode);
22594   rs6000_emit_nmsub (v, d, u, n);
22595 
22596   /* dst = (v * xprev) + u  */
22597   rs6000_emit_madd (dst, v, xprev, u);
22598 
22599   if (note_p)
22600     add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
22601 }
22602 
22603 /* Goldschmidt's Algorithm for single/double-precision floating point
22604    sqrt and rsqrt.  Assumes no trapping math and finite arguments.  */
22605 
22606 void
rs6000_emit_swsqrt(rtx dst,rtx src,bool recip)22607 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
22608 {
22609   machine_mode mode = GET_MODE (src);
22610   rtx e = gen_reg_rtx (mode);
22611   rtx g = gen_reg_rtx (mode);
22612   rtx h = gen_reg_rtx (mode);
22613 
22614   /* Low precision estimates guarantee 5 bits of accuracy.  High
22615      precision estimates guarantee 14 bits of accuracy.  SFmode
22616      requires 23 bits of accuracy.  DFmode requires 52 bits of
22617      accuracy.  Each pass at least doubles the accuracy, leading
22618      to the following.  */
22619   int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
22620   if (mode == DFmode || mode == V2DFmode)
22621     passes++;
22622 
22623   int i;
22624   rtx mhalf;
22625   enum insn_code code = optab_handler (smul_optab, mode);
22626   insn_gen_fn gen_mul = GEN_FCN (code);
22627 
22628   gcc_assert (code != CODE_FOR_nothing);
22629 
22630   mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
22631 
22632   /* e = rsqrt estimate */
22633   emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
22634 					     UNSPEC_RSQRT)));
22635 
22636   /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0).  */
22637   if (!recip)
22638     {
22639       rtx zero = force_reg (mode, CONST0_RTX (mode));
22640 
22641       if (mode == SFmode)
22642 	{
22643 	  rtx target = emit_conditional_move (e, { GT, src, zero, mode },
22644 					      e, zero, mode, 0);
22645 	  if (target != e)
22646 	    emit_move_insn (e, target);
22647 	}
22648       else
22649 	{
22650 	  rtx cond = gen_rtx_GT (VOIDmode, e, zero);
22651 	  rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
22652 	}
22653     }
22654 
22655   /* g = sqrt estimate.  */
22656   emit_insn (gen_mul (g, e, src));
22657   /* h = 1/(2*sqrt) estimate.  */
22658   emit_insn (gen_mul (h, e, mhalf));
22659 
22660   if (recip)
22661     {
22662       if (passes == 1)
22663 	{
22664 	  rtx t = gen_reg_rtx (mode);
22665 	  rs6000_emit_nmsub (t, g, h, mhalf);
22666 	  /* Apply correction directly to 1/rsqrt estimate.  */
22667 	  rs6000_emit_madd (dst, e, t, e);
22668 	}
22669       else
22670 	{
22671 	  for (i = 0; i < passes; i++)
22672 	    {
22673 	      rtx t1 = gen_reg_rtx (mode);
22674 	      rtx g1 = gen_reg_rtx (mode);
22675 	      rtx h1 = gen_reg_rtx (mode);
22676 
22677 	      rs6000_emit_nmsub (t1, g, h, mhalf);
22678 	      rs6000_emit_madd (g1, g, t1, g);
22679 	      rs6000_emit_madd (h1, h, t1, h);
22680 
22681 	      g = g1;
22682 	      h = h1;
22683 	    }
22684 	  /* Multiply by 2 for 1/rsqrt.  */
22685 	  emit_insn (gen_add3_insn (dst, h, h));
22686 	}
22687     }
22688   else
22689     {
22690       rtx t = gen_reg_rtx (mode);
22691       rs6000_emit_nmsub (t, g, h, mhalf);
22692       rs6000_emit_madd (dst, g, t, g);
22693     }
22694 
22695   return;
22696 }
22697 
22698 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
22699    (Power7) targets.  DST is the target, and SRC is the argument operand.  */
22700 
22701 void
rs6000_emit_popcount(rtx dst,rtx src)22702 rs6000_emit_popcount (rtx dst, rtx src)
22703 {
22704   machine_mode mode = GET_MODE (dst);
22705   rtx tmp1, tmp2;
22706 
22707   /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can.  */
22708   if (TARGET_POPCNTD)
22709     {
22710       if (mode == SImode)
22711 	emit_insn (gen_popcntdsi2 (dst, src));
22712       else
22713 	emit_insn (gen_popcntddi2 (dst, src));
22714       return;
22715     }
22716 
22717   tmp1 = gen_reg_rtx (mode);
22718 
22719   if (mode == SImode)
22720     {
22721       emit_insn (gen_popcntbsi2 (tmp1, src));
22722       tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
22723 			   NULL_RTX, 0);
22724       tmp2 = force_reg (SImode, tmp2);
22725       emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
22726     }
22727   else
22728     {
22729       emit_insn (gen_popcntbdi2 (tmp1, src));
22730       tmp2 = expand_mult (DImode, tmp1,
22731 			  GEN_INT ((HOST_WIDE_INT)
22732 				   0x01010101 << 32 | 0x01010101),
22733 			  NULL_RTX, 0);
22734       tmp2 = force_reg (DImode, tmp2);
22735       emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
22736     }
22737 }
22738 
22739 
22740 /* Emit parity intrinsic on TARGET_POPCNTB targets.  DST is the
22741    target, and SRC is the argument operand.  */
22742 
22743 void
rs6000_emit_parity(rtx dst,rtx src)22744 rs6000_emit_parity (rtx dst, rtx src)
22745 {
22746   machine_mode mode = GET_MODE (dst);
22747   rtx tmp;
22748 
22749   tmp = gen_reg_rtx (mode);
22750 
22751   /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can.  */
22752   if (TARGET_CMPB)
22753     {
22754       if (mode == SImode)
22755 	{
22756 	  emit_insn (gen_popcntbsi2 (tmp, src));
22757 	  emit_insn (gen_paritysi2_cmpb (dst, tmp));
22758 	}
22759       else
22760 	{
22761 	  emit_insn (gen_popcntbdi2 (tmp, src));
22762 	  emit_insn (gen_paritydi2_cmpb (dst, tmp));
22763 	}
22764       return;
22765     }
22766 
22767   if (mode == SImode)
22768     {
22769       /* Is mult+shift >= shift+xor+shift+xor?  */
22770       if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
22771 	{
22772 	  rtx tmp1, tmp2, tmp3, tmp4;
22773 
22774 	  tmp1 = gen_reg_rtx (SImode);
22775 	  emit_insn (gen_popcntbsi2 (tmp1, src));
22776 
22777 	  tmp2 = gen_reg_rtx (SImode);
22778 	  emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
22779 	  tmp3 = gen_reg_rtx (SImode);
22780 	  emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
22781 
22782 	  tmp4 = gen_reg_rtx (SImode);
22783 	  emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
22784 	  emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
22785 	}
22786       else
22787 	rs6000_emit_popcount (tmp, src);
22788       emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
22789     }
22790   else
22791     {
22792       /* Is mult+shift >= shift+xor+shift+xor+shift+xor?  */
22793       if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
22794 	{
22795 	  rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
22796 
22797 	  tmp1 = gen_reg_rtx (DImode);
22798 	  emit_insn (gen_popcntbdi2 (tmp1, src));
22799 
22800 	  tmp2 = gen_reg_rtx (DImode);
22801 	  emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
22802 	  tmp3 = gen_reg_rtx (DImode);
22803 	  emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
22804 
22805 	  tmp4 = gen_reg_rtx (DImode);
22806 	  emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
22807 	  tmp5 = gen_reg_rtx (DImode);
22808 	  emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
22809 
22810 	  tmp6 = gen_reg_rtx (DImode);
22811 	  emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
22812 	  emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
22813 	}
22814       else
22815         rs6000_emit_popcount (tmp, src);
22816       emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
22817     }
22818 }
22819 
22820 /* Expand an Altivec constant permutation for little endian mode.
22821    OP0 and OP1 are the input vectors and TARGET is the output vector.
22822    SEL specifies the constant permutation vector.
22823 
22824    There are two issues: First, the two input operands must be
22825    swapped so that together they form a double-wide array in LE
22826    order.  Second, the vperm instruction has surprising behavior
22827    in LE mode:  it interprets the elements of the source vectors
22828    in BE mode ("left to right") and interprets the elements of
22829    the destination vector in LE mode ("right to left").  To
22830    correct for this, we must subtract each element of the permute
22831    control vector from 31.
22832 
22833    For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
22834    with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
22835    We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
22836    serve as the permute control vector.  Then, in BE mode,
22837 
22838      vperm 9,10,11,12
22839 
22840    places the desired result in vr9.  However, in LE mode the
22841    vector contents will be
22842 
22843      vr10 = 00000003 00000002 00000001 00000000
22844      vr11 = 00000007 00000006 00000005 00000004
22845 
22846    The result of the vperm using the same permute control vector is
22847 
22848      vr9  = 05000000 07000000 01000000 03000000
22849 
22850    That is, the leftmost 4 bytes of vr10 are interpreted as the
22851    source for the rightmost 4 bytes of vr9, and so on.
22852 
22853    If we change the permute control vector to
22854 
22855      vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
22856 
22857    and issue
22858 
22859      vperm 9,11,10,12
22860 
22861    we get the desired
22862 
22863    vr9  = 00000006 00000004 00000002 00000000.  */
22864 
22865 static void
altivec_expand_vec_perm_const_le(rtx target,rtx op0,rtx op1,const vec_perm_indices & sel)22866 altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1,
22867 				  const vec_perm_indices &sel)
22868 {
22869   unsigned int i;
22870   rtx perm[16];
22871   rtx constv, unspec;
22872 
22873   /* Unpack and adjust the constant selector.  */
22874   for (i = 0; i < 16; ++i)
22875     {
22876       unsigned int elt = 31 - (sel[i] & 31);
22877       perm[i] = GEN_INT (elt);
22878     }
22879 
22880   /* Expand to a permute, swapping the inputs and using the
22881      adjusted selector.  */
22882   if (!REG_P (op0))
22883     op0 = force_reg (V16QImode, op0);
22884   if (!REG_P (op1))
22885     op1 = force_reg (V16QImode, op1);
22886 
22887   constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
22888   constv = force_reg (V16QImode, constv);
22889   unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
22890 			   UNSPEC_VPERM);
22891   if (!REG_P (target))
22892     {
22893       rtx tmp = gen_reg_rtx (V16QImode);
22894       emit_move_insn (tmp, unspec);
22895       unspec = tmp;
22896     }
22897 
22898   emit_move_insn (target, unspec);
22899 }
22900 
22901 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
22902    permute control vector.  But here it's not a constant, so we must
22903    generate a vector NAND or NOR to do the adjustment.  */
22904 
22905 void
altivec_expand_vec_perm_le(rtx operands[4])22906 altivec_expand_vec_perm_le (rtx operands[4])
22907 {
22908   rtx notx, iorx, unspec;
22909   rtx target = operands[0];
22910   rtx op0 = operands[1];
22911   rtx op1 = operands[2];
22912   rtx sel = operands[3];
22913   rtx tmp = target;
22914   rtx norreg = gen_reg_rtx (V16QImode);
22915   machine_mode mode = GET_MODE (target);
22916 
22917   /* Get everything in regs so the pattern matches.  */
22918   if (!REG_P (op0))
22919     op0 = force_reg (mode, op0);
22920   if (!REG_P (op1))
22921     op1 = force_reg (mode, op1);
22922   if (!REG_P (sel))
22923     sel = force_reg (V16QImode, sel);
22924   if (!REG_P (target))
22925     tmp = gen_reg_rtx (mode);
22926 
22927   if (TARGET_P9_VECTOR)
22928     {
22929       unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, sel),
22930 			       UNSPEC_VPERMR);
22931     }
22932   else
22933     {
22934       /* Invert the selector with a VNAND if available, else a VNOR.
22935 	 The VNAND is preferred for future fusion opportunities.  */
22936       notx = gen_rtx_NOT (V16QImode, sel);
22937       iorx = (TARGET_P8_VECTOR
22938 	      ? gen_rtx_IOR (V16QImode, notx, notx)
22939 	      : gen_rtx_AND (V16QImode, notx, notx));
22940       emit_insn (gen_rtx_SET (norreg, iorx));
22941 
22942       /* Permute with operands reversed and adjusted selector.  */
22943       unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
22944 			       UNSPEC_VPERM);
22945     }
22946 
22947   /* Copy into target, possibly by way of a register.  */
22948   if (!REG_P (target))
22949     {
22950       emit_move_insn (tmp, unspec);
22951       unspec = tmp;
22952     }
22953 
22954   emit_move_insn (target, unspec);
22955 }
22956 
22957 /* Expand an Altivec constant permutation.  Return true if we match
22958    an efficient implementation; false to fall back to VPERM.
22959 
22960    OP0 and OP1 are the input vectors and TARGET is the output vector.
22961    SEL specifies the constant permutation vector.  */
22962 
22963 static bool
altivec_expand_vec_perm_const(rtx target,rtx op0,rtx op1,const vec_perm_indices & sel)22964 altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
22965 			       const vec_perm_indices &sel)
22966 {
22967   struct altivec_perm_insn {
22968     HOST_WIDE_INT mask;
22969     enum insn_code impl;
22970     unsigned char perm[16];
22971   };
22972   static const struct altivec_perm_insn patterns[] = {
22973     {OPTION_MASK_ALTIVEC,
22974      CODE_FOR_altivec_vpkuhum_direct,
22975      {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}},
22976     {OPTION_MASK_ALTIVEC,
22977      CODE_FOR_altivec_vpkuwum_direct,
22978      {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}},
22979     {OPTION_MASK_ALTIVEC,
22980      BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
22981 		      : CODE_FOR_altivec_vmrglb_direct,
22982      {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}},
22983     {OPTION_MASK_ALTIVEC,
22984      BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
22985 		      : CODE_FOR_altivec_vmrglh_direct,
22986      {0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23}},
22987     {OPTION_MASK_ALTIVEC,
22988      BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct_v4si
22989 		      : CODE_FOR_altivec_vmrglw_direct_v4si,
22990      {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}},
22991     {OPTION_MASK_ALTIVEC,
22992      BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
22993 		      : CODE_FOR_altivec_vmrghb_direct,
22994      {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}},
22995     {OPTION_MASK_ALTIVEC,
22996      BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
22997 		      : CODE_FOR_altivec_vmrghh_direct,
22998      {8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31}},
22999     {OPTION_MASK_ALTIVEC,
23000      BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct_v4si
23001 		      : CODE_FOR_altivec_vmrghw_direct_v4si,
23002      {8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31}},
23003     {OPTION_MASK_P8_VECTOR,
23004      BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct
23005 		      : CODE_FOR_p8_vmrgow_v4sf_direct,
23006      {0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27}},
23007     {OPTION_MASK_P8_VECTOR,
23008      BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgow_v4sf_direct
23009 		      : CODE_FOR_p8_vmrgew_v4sf_direct,
23010      {4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31}},
23011     {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
23012      {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23}},
23013     {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
23014      {8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}},
23015     {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
23016      {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31}},
23017     {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
23018      {8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31}}};
23019 
23020   unsigned int i, j, elt, which;
23021   unsigned char perm[16];
23022   rtx x;
23023   bool one_vec;
23024 
23025   /* Unpack the constant selector.  */
23026   for (i = which = 0; i < 16; ++i)
23027     {
23028       elt = sel[i] & 31;
23029       which |= (elt < 16 ? 1 : 2);
23030       perm[i] = elt;
23031     }
23032 
23033   /* Simplify the constant selector based on operands.  */
23034   switch (which)
23035     {
23036     default:
23037       gcc_unreachable ();
23038 
23039     case 3:
23040       one_vec = false;
23041       if (!rtx_equal_p (op0, op1))
23042 	break;
23043       /* FALLTHRU */
23044 
23045     case 2:
23046       for (i = 0; i < 16; ++i)
23047 	perm[i] &= 15;
23048       op0 = op1;
23049       one_vec = true;
23050       break;
23051 
23052     case 1:
23053       op1 = op0;
23054       one_vec = true;
23055       break;
23056     }
23057 
23058   /* Look for splat patterns.  */
23059   if (one_vec)
23060     {
23061       elt = perm[0];
23062 
23063       for (i = 0; i < 16; ++i)
23064 	if (perm[i] != elt)
23065 	  break;
23066       if (i == 16)
23067 	{
23068           if (!BYTES_BIG_ENDIAN)
23069             elt = 15 - elt;
23070 	  emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
23071 	  return true;
23072 	}
23073 
23074       if (elt % 2 == 0)
23075 	{
23076 	  for (i = 0; i < 16; i += 2)
23077 	    if (perm[i] != elt || perm[i + 1] != elt + 1)
23078 	      break;
23079 	  if (i == 16)
23080 	    {
23081 	      int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
23082 	      x = gen_reg_rtx (V8HImode);
23083 	      emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
23084 						    GEN_INT (field)));
23085 	      emit_move_insn (target, gen_lowpart (V16QImode, x));
23086 	      return true;
23087 	    }
23088 	}
23089 
23090       if (elt % 4 == 0)
23091 	{
23092 	  for (i = 0; i < 16; i += 4)
23093 	    if (perm[i] != elt
23094 		|| perm[i + 1] != elt + 1
23095 		|| perm[i + 2] != elt + 2
23096 		|| perm[i + 3] != elt + 3)
23097 	      break;
23098 	  if (i == 16)
23099 	    {
23100 	      int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
23101 	      x = gen_reg_rtx (V4SImode);
23102 	      emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
23103 						    GEN_INT (field)));
23104 	      emit_move_insn (target, gen_lowpart (V16QImode, x));
23105 	      return true;
23106 	    }
23107 	}
23108     }
23109 
23110   /* Look for merge and pack patterns.  */
23111   for (j = 0; j < ARRAY_SIZE (patterns); ++j)
23112     {
23113       bool swapped;
23114 
23115       if ((patterns[j].mask & rs6000_isa_flags) == 0)
23116 	continue;
23117 
23118       elt = patterns[j].perm[0];
23119       if (perm[0] == elt)
23120 	swapped = false;
23121       else if (perm[0] == elt + 16)
23122 	swapped = true;
23123       else
23124 	continue;
23125       for (i = 1; i < 16; ++i)
23126 	{
23127 	  elt = patterns[j].perm[i];
23128 	  if (swapped)
23129 	    elt = (elt >= 16 ? elt - 16 : elt + 16);
23130 	  else if (one_vec && elt >= 16)
23131 	    elt -= 16;
23132 	  if (perm[i] != elt)
23133 	    break;
23134 	}
23135       if (i == 16)
23136 	{
23137 	  enum insn_code icode = patterns[j].impl;
23138 	  machine_mode omode = insn_data[icode].operand[0].mode;
23139 	  machine_mode imode = insn_data[icode].operand[1].mode;
23140 
23141 	  rtx perm_idx = GEN_INT (0);
23142 	  if (icode == CODE_FOR_vsx_xxpermdi_v16qi)
23143 	    {
23144 	      int perm_val = 0;
23145 	      if (one_vec)
23146 		{
23147 		  if (perm[0] == 8)
23148 		    perm_val |= 2;
23149 		  if (perm[8] == 8)
23150 		    perm_val |= 1;
23151 		}
23152 	      else
23153 		{
23154 		  if (perm[0] != 0)
23155 		    perm_val |= 2;
23156 		  if (perm[8] != 16)
23157 		    perm_val |= 1;
23158 		}
23159 	      perm_idx = GEN_INT (perm_val);
23160 	    }
23161 
23162 	  /* For little-endian, don't use vpkuwum and vpkuhum if the
23163 	     underlying vector type is not V4SI and V8HI, respectively.
23164 	     For example, using vpkuwum with a V8HI picks up the even
23165 	     halfwords (BE numbering) when the even halfwords (LE
23166 	     numbering) are what we need.  */
23167 	  if (!BYTES_BIG_ENDIAN
23168 	      && icode == CODE_FOR_altivec_vpkuwum_direct
23169 	      && ((REG_P (op0)
23170 		   && GET_MODE (op0) != V4SImode)
23171 		  || (SUBREG_P (op0)
23172 		      && GET_MODE (XEXP (op0, 0)) != V4SImode)))
23173 	    continue;
23174 	  if (!BYTES_BIG_ENDIAN
23175 	      && icode == CODE_FOR_altivec_vpkuhum_direct
23176 	      && ((REG_P (op0)
23177 		   && GET_MODE (op0) != V8HImode)
23178 		  || (SUBREG_P (op0)
23179 		      && GET_MODE (XEXP (op0, 0)) != V8HImode)))
23180 	    continue;
23181 
23182           /* For little-endian, the two input operands must be swapped
23183              (or swapped back) to ensure proper right-to-left numbering
23184              from 0 to 2N-1.  */
23185 	  if (swapped ^ !BYTES_BIG_ENDIAN
23186 	      && icode != CODE_FOR_vsx_xxpermdi_v16qi)
23187 	    std::swap (op0, op1);
23188 	  if (imode != V16QImode)
23189 	    {
23190 	      op0 = gen_lowpart (imode, op0);
23191 	      op1 = gen_lowpart (imode, op1);
23192 	    }
23193 	  if (omode == V16QImode)
23194 	    x = target;
23195 	  else
23196 	    x = gen_reg_rtx (omode);
23197 	  if (icode == CODE_FOR_vsx_xxpermdi_v16qi)
23198 	    emit_insn (GEN_FCN (icode) (x, op0, op1, perm_idx));
23199 	  else
23200 	    emit_insn (GEN_FCN (icode) (x, op0, op1));
23201 	  if (omode != V16QImode)
23202 	    emit_move_insn (target, gen_lowpart (V16QImode, x));
23203 	  return true;
23204 	}
23205     }
23206 
23207   if (!BYTES_BIG_ENDIAN)
23208     {
23209       altivec_expand_vec_perm_const_le (target, op0, op1, sel);
23210       return true;
23211     }
23212 
23213   return false;
23214 }
23215 
23216 /* Expand a VSX Permute Doubleword constant permutation.
23217    Return true if we match an efficient implementation.  */
23218 
23219 static bool
rs6000_expand_vec_perm_const_1(rtx target,rtx op0,rtx op1,unsigned char perm0,unsigned char perm1)23220 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
23221 				unsigned char perm0, unsigned char perm1)
23222 {
23223   rtx x;
23224 
23225   /* If both selectors come from the same operand, fold to single op.  */
23226   if ((perm0 & 2) == (perm1 & 2))
23227     {
23228       if (perm0 & 2)
23229 	op0 = op1;
23230       else
23231 	op1 = op0;
23232     }
23233   /* If both operands are equal, fold to simpler permutation.  */
23234   if (rtx_equal_p (op0, op1))
23235     {
23236       perm0 = perm0 & 1;
23237       perm1 = (perm1 & 1) + 2;
23238     }
23239   /* If the first selector comes from the second operand, swap.  */
23240   else if (perm0 & 2)
23241     {
23242       if (perm1 & 2)
23243 	return false;
23244       perm0 -= 2;
23245       perm1 += 2;
23246       std::swap (op0, op1);
23247     }
23248   /* If the second selector does not come from the second operand, fail.  */
23249   else if ((perm1 & 2) == 0)
23250     return false;
23251 
23252   /* Success! */
23253   if (target != NULL)
23254     {
23255       machine_mode vmode, dmode;
23256       rtvec v;
23257 
23258       vmode = GET_MODE (target);
23259       gcc_assert (GET_MODE_NUNITS (vmode) == 2);
23260       dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require ();
23261       x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
23262       v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
23263       x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
23264       emit_insn (gen_rtx_SET (target, x));
23265     }
23266   return true;
23267 }
23268 
23269 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST.  */
23270 
23271 static bool
rs6000_vectorize_vec_perm_const(machine_mode vmode,rtx target,rtx op0,rtx op1,const vec_perm_indices & sel)23272 rs6000_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
23273 				 rtx op1, const vec_perm_indices &sel)
23274 {
23275   bool testing_p = !target;
23276 
23277   /* AltiVec (and thus VSX) can handle arbitrary permutations.  */
23278   if (TARGET_ALTIVEC && testing_p)
23279     return true;
23280 
23281   if (op0)
23282     {
23283       rtx nop0 = force_reg (vmode, op0);
23284       if (op0 == op1)
23285         op1 = nop0;
23286       op0 = nop0;
23287     }
23288   if (op1)
23289     op1 = force_reg (vmode, op1);
23290 
23291   /* Check for ps_merge* or xxpermdi insns.  */
23292   if ((vmode == V2DFmode || vmode == V2DImode) && VECTOR_MEM_VSX_P (vmode))
23293     {
23294       if (testing_p)
23295 	{
23296 	  op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
23297 	  op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
23298 	}
23299       if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1]))
23300 	return true;
23301     }
23302 
23303   if (TARGET_ALTIVEC)
23304     {
23305       /* Force the target-independent code to lower to V16QImode.  */
23306       if (vmode != V16QImode)
23307 	return false;
23308       if (altivec_expand_vec_perm_const (target, op0, op1, sel))
23309 	return true;
23310     }
23311 
23312   return false;
23313 }
23314 
23315 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
23316    OP0 and OP1 are the input vectors and TARGET is the output vector.
23317    PERM specifies the constant permutation vector.  */
23318 
23319 static void
rs6000_do_expand_vec_perm(rtx target,rtx op0,rtx op1,machine_mode vmode,const vec_perm_builder & perm)23320 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
23321 			   machine_mode vmode, const vec_perm_builder &perm)
23322 {
23323   rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target);
23324   if (x != target)
23325     emit_move_insn (target, x);
23326 }
23327 
23328 /* Expand an extract even operation.  */
23329 
23330 void
rs6000_expand_extract_even(rtx target,rtx op0,rtx op1)23331 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
23332 {
23333   machine_mode vmode = GET_MODE (target);
23334   unsigned i, nelt = GET_MODE_NUNITS (vmode);
23335   vec_perm_builder perm (nelt, nelt, 1);
23336 
23337   for (i = 0; i < nelt; i++)
23338     perm.quick_push (i * 2);
23339 
23340   rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
23341 }
23342 
23343 /* Expand a vector interleave operation.  */
23344 
23345 void
rs6000_expand_interleave(rtx target,rtx op0,rtx op1,bool highp)23346 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
23347 {
23348   machine_mode vmode = GET_MODE (target);
23349   unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
23350   vec_perm_builder perm (nelt, nelt, 1);
23351 
23352   high = (highp ? 0 : nelt / 2);
23353   for (i = 0; i < nelt / 2; i++)
23354     {
23355       perm.quick_push (i + high);
23356       perm.quick_push (i + nelt + high);
23357     }
23358 
23359   rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
23360 }
23361 
23362 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT.  */
23363 void
rs6000_scale_v2df(rtx tgt,rtx src,int scale)23364 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
23365 {
23366   HOST_WIDE_INT hwi_scale (scale);
23367   REAL_VALUE_TYPE r_pow;
23368   rtvec v = rtvec_alloc (2);
23369   rtx elt;
23370   rtx scale_vec = gen_reg_rtx (V2DFmode);
23371   (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
23372   elt = const_double_from_real_value (r_pow, DFmode);
23373   RTVEC_ELT (v, 0) = elt;
23374   RTVEC_ELT (v, 1) = elt;
23375   rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
23376   emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
23377 }
23378 
23379 /* Return an RTX representing where to find the function value of a
23380    function returning MODE.  */
23381 static rtx
rs6000_complex_function_value(machine_mode mode)23382 rs6000_complex_function_value (machine_mode mode)
23383 {
23384   unsigned int regno;
23385   rtx r1, r2;
23386   machine_mode inner = GET_MODE_INNER (mode);
23387   unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
23388 
23389   if (TARGET_FLOAT128_TYPE
23390       && (mode == KCmode
23391 	  || (mode == TCmode && TARGET_IEEEQUAD)))
23392     regno = ALTIVEC_ARG_RETURN;
23393 
23394   else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
23395     regno = FP_ARG_RETURN;
23396 
23397   else
23398     {
23399       regno = GP_ARG_RETURN;
23400 
23401       /* 32-bit is OK since it'll go in r3/r4.  */
23402       if (TARGET_32BIT && inner_bytes >= 4)
23403 	return gen_rtx_REG (mode, regno);
23404     }
23405 
23406   if (inner_bytes >= 8)
23407     return gen_rtx_REG (mode, regno);
23408 
23409   r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
23410 			  const0_rtx);
23411   r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
23412 			  GEN_INT (inner_bytes));
23413   return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
23414 }
23415 
23416 /* Return an rtx describing a return value of MODE as a PARALLEL
23417    in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
23418    stride REG_STRIDE.  */
23419 
23420 static rtx
rs6000_parallel_return(machine_mode mode,int n_elts,machine_mode elt_mode,unsigned int regno,unsigned int reg_stride)23421 rs6000_parallel_return (machine_mode mode,
23422 			int n_elts, machine_mode elt_mode,
23423 			unsigned int regno, unsigned int reg_stride)
23424 {
23425   rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
23426 
23427   int i;
23428   for (i = 0; i < n_elts; i++)
23429     {
23430       rtx r = gen_rtx_REG (elt_mode, regno);
23431       rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
23432       XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
23433       regno += reg_stride;
23434     }
23435 
23436   return par;
23437 }
23438 
23439 /* Target hook for TARGET_FUNCTION_VALUE.
23440 
23441    An integer value is in r3 and a floating-point value is in fp1,
23442    unless -msoft-float.  */
23443 
23444 static rtx
rs6000_function_value(const_tree valtype,const_tree fn_decl_or_type ATTRIBUTE_UNUSED,bool outgoing ATTRIBUTE_UNUSED)23445 rs6000_function_value (const_tree valtype,
23446 		       const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
23447 		       bool outgoing ATTRIBUTE_UNUSED)
23448 {
23449   machine_mode mode;
23450   unsigned int regno;
23451   machine_mode elt_mode;
23452   int n_elts;
23453 
23454   /* Special handling for structs in darwin64.  */
23455   if (TARGET_MACHO
23456       && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
23457     {
23458       CUMULATIVE_ARGS valcum;
23459       rtx valret;
23460 
23461       valcum.words = 0;
23462       valcum.fregno = FP_ARG_MIN_REG;
23463       valcum.vregno = ALTIVEC_ARG_MIN_REG;
23464       /* Do a trial code generation as if this were going to be passed as
23465 	 an argument; if any part goes in memory, we return NULL.  */
23466       valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
23467       if (valret)
23468 	return valret;
23469       /* Otherwise fall through to standard ABI rules.  */
23470     }
23471 
23472   mode = TYPE_MODE (valtype);
23473 
23474   /* The ELFv2 ABI returns homogeneous VFP aggregates in registers.  */
23475   if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
23476     {
23477       int first_reg, n_regs;
23478 
23479       if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
23480 	{
23481 	  /* _Decimal128 must use even/odd register pairs.  */
23482 	  first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
23483 	  n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
23484 	}
23485       else
23486 	{
23487 	  first_reg = ALTIVEC_ARG_RETURN;
23488 	  n_regs = 1;
23489 	}
23490 
23491       return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
23492     }
23493 
23494   /* Some return value types need be split in -mpowerpc64, 32bit ABI.  */
23495   if (TARGET_32BIT && TARGET_POWERPC64)
23496     switch (mode)
23497       {
23498       default:
23499 	break;
23500       case E_DImode:
23501       case E_SCmode:
23502       case E_DCmode:
23503       case E_TCmode:
23504 	int count = GET_MODE_SIZE (mode) / 4;
23505 	return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
23506       }
23507 
23508   if ((INTEGRAL_TYPE_P (valtype)
23509        && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
23510       || POINTER_TYPE_P (valtype))
23511     mode = TARGET_32BIT ? SImode : DImode;
23512 
23513   if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
23514     /* _Decimal128 must use an even/odd register pair.  */
23515     regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
23516   else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT
23517 	   && !FLOAT128_VECTOR_P (mode))
23518     regno = FP_ARG_RETURN;
23519   else if (TREE_CODE (valtype) == COMPLEX_TYPE
23520 	   && targetm.calls.split_complex_arg)
23521     return rs6000_complex_function_value (mode);
23522   /* VSX is a superset of Altivec and adds V2DImode/V2DFmode.  Since the same
23523      return register is used in both cases, and we won't see V2DImode/V2DFmode
23524      for pure altivec, combine the two cases.  */
23525   else if ((TREE_CODE (valtype) == VECTOR_TYPE || VECTOR_ALIGNMENT_P (mode))
23526 	   && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
23527 	   && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
23528     regno = ALTIVEC_ARG_RETURN;
23529   else
23530     regno = GP_ARG_RETURN;
23531 
23532   return gen_rtx_REG (mode, regno);
23533 }
23534 
23535 /* Define how to find the value returned by a library function
23536    assuming the value has mode MODE.  */
23537 rtx
rs6000_libcall_value(machine_mode mode)23538 rs6000_libcall_value (machine_mode mode)
23539 {
23540   unsigned int regno;
23541 
23542   /* Long long return value need be split in -mpowerpc64, 32bit ABI.  */
23543   if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
23544     return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
23545 
23546   if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
23547     /* _Decimal128 must use an even/odd register pair.  */
23548     regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
23549   else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && TARGET_HARD_FLOAT)
23550     regno = FP_ARG_RETURN;
23551   /* VSX is a superset of Altivec and adds V2DImode/V2DFmode.  Since the same
23552      return register is used in both cases, and we won't see V2DImode/V2DFmode
23553      for pure altivec, combine the two cases.  */
23554   else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
23555 	   && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
23556     regno = ALTIVEC_ARG_RETURN;
23557   else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
23558     return rs6000_complex_function_value (mode);
23559   else
23560     regno = GP_ARG_RETURN;
23561 
23562   return gen_rtx_REG (mode, regno);
23563 }
23564 
23565 /* Compute register pressure classes.  We implement the target hook to avoid
23566    IRA picking something like GEN_OR_FLOAT_REGS as a pressure class, which can
23567    lead to incorrect estimates of number of available registers and therefor
23568    increased register pressure/spill.   */
23569 static int
rs6000_compute_pressure_classes(enum reg_class * pressure_classes)23570 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
23571 {
23572   int n;
23573 
23574   n = 0;
23575   pressure_classes[n++] = GENERAL_REGS;
23576   if (TARGET_ALTIVEC)
23577     pressure_classes[n++] = ALTIVEC_REGS;
23578   if (TARGET_VSX)
23579     pressure_classes[n++] = VSX_REGS;
23580   else
23581     {
23582       if (TARGET_HARD_FLOAT)
23583 	pressure_classes[n++] = FLOAT_REGS;
23584     }
23585   pressure_classes[n++] = CR_REGS;
23586   pressure_classes[n++] = SPECIAL_REGS;
23587 
23588   return n;
23589 }
23590 
23591 /* Given FROM and TO register numbers, say whether this elimination is allowed.
23592    Frame pointer elimination is automatically handled.
23593 
23594    For the RS/6000, if frame pointer elimination is being done, we would like
23595    to convert ap into fp, not sp.
23596 
23597    We need r30 if -mminimal-toc was specified, and there are constant pool
23598    references.  */
23599 
23600 static bool
rs6000_can_eliminate(const int from,const int to)23601 rs6000_can_eliminate (const int from, const int to)
23602 {
23603   return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
23604 	  ? ! frame_pointer_needed
23605 	  : from == RS6000_PIC_OFFSET_TABLE_REGNUM
23606 	    ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC_OR_PCREL
23607 		|| constant_pool_empty_p ()
23608 	    : true);
23609 }
23610 
23611 /* Define the offset between two registers, FROM to be eliminated and its
23612    replacement TO, at the start of a routine.  */
23613 HOST_WIDE_INT
rs6000_initial_elimination_offset(int from,int to)23614 rs6000_initial_elimination_offset (int from, int to)
23615 {
23616   rs6000_stack_t *info = rs6000_stack_info ();
23617   HOST_WIDE_INT offset;
23618 
23619   if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
23620     offset = info->push_p ? 0 : -info->total_size;
23621   else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
23622     {
23623       offset = info->push_p ? 0 : -info->total_size;
23624       if (FRAME_GROWS_DOWNWARD)
23625 	offset += info->fixed_size + info->vars_size + info->parm_size;
23626     }
23627   else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
23628     offset = FRAME_GROWS_DOWNWARD
23629 	     ? info->fixed_size + info->vars_size + info->parm_size
23630 	     : 0;
23631   else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
23632     offset = info->total_size;
23633   else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
23634     offset = info->push_p ? info->total_size : 0;
23635   else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
23636     offset = 0;
23637   else
23638     gcc_unreachable ();
23639 
23640   return offset;
23641 }
23642 
23643 /* Fill in sizes of registers used by unwinder.  */
23644 
23645 static void
rs6000_init_dwarf_reg_sizes_extra(tree address)23646 rs6000_init_dwarf_reg_sizes_extra (tree address)
23647 {
23648   if (TARGET_MACHO && ! TARGET_ALTIVEC)
23649     {
23650       int i;
23651       machine_mode mode = TYPE_MODE (char_type_node);
23652       rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
23653       rtx mem = gen_rtx_MEM (BLKmode, addr);
23654       rtx value = gen_int_mode (16, mode);
23655 
23656       /* On Darwin, libgcc may be built to run on both G3 and G4/5.
23657 	 The unwinder still needs to know the size of Altivec registers.  */
23658 
23659       for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
23660 	{
23661 	  int column = DWARF_REG_TO_UNWIND_COLUMN
23662 		(DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
23663 	  HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
23664 
23665 	  emit_move_insn (adjust_address (mem, mode, offset), value);
23666 	}
23667     }
23668 }
23669 
23670 /* Map internal gcc register numbers to debug format register numbers.
23671    FORMAT specifies the type of debug register number to use:
23672      0 -- debug information, except for frame-related sections
23673      1 -- DWARF .debug_frame section
23674      2 -- DWARF .eh_frame section  */
23675 
23676 unsigned int
rs6000_dbx_register_number(unsigned int regno,unsigned int format)23677 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
23678 {
23679   /* On some platforms, we use the standard DWARF register
23680      numbering for .debug_info and .debug_frame.  */
23681   if ((format == 0 && dwarf_debuginfo_p ()) || format == 1)
23682     {
23683 #ifdef RS6000_USE_DWARF_NUMBERING
23684       if (regno <= 31)
23685 	return regno;
23686       if (FP_REGNO_P (regno))
23687 	return regno - FIRST_FPR_REGNO + 32;
23688       if (ALTIVEC_REGNO_P (regno))
23689 	return regno - FIRST_ALTIVEC_REGNO + 1124;
23690       if (regno == LR_REGNO)
23691 	return 108;
23692       if (regno == CTR_REGNO)
23693 	return 109;
23694       if (regno == CA_REGNO)
23695 	return 101;  /* XER */
23696       /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
23697 	 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
23698 	 The actual code emitted saves the whole of CR, so we map CR2_REGNO
23699 	 to the DWARF reg for CR.  */
23700       if (format == 1 && regno == CR2_REGNO)
23701 	return 64;
23702       if (CR_REGNO_P (regno))
23703 	return regno - CR0_REGNO + 86;
23704       if (regno == VRSAVE_REGNO)
23705 	return 356;
23706       if (regno == VSCR_REGNO)
23707 	return 67;
23708 
23709       /* These do not make much sense.  */
23710       if (regno == FRAME_POINTER_REGNUM)
23711 	return 111;
23712       if (regno == ARG_POINTER_REGNUM)
23713 	return 67;
23714       if (regno == 64)
23715 	return 100;
23716 
23717       gcc_unreachable ();
23718 #endif
23719     }
23720 
23721   /* We use the GCC 7 (and before) internal number for non-DWARF debug
23722      information, and also for .eh_frame.  */
23723   /* Translate the regnos to their numbers in GCC 7 (and before).  */
23724   if (regno <= 31)
23725     return regno;
23726   if (FP_REGNO_P (regno))
23727     return regno - FIRST_FPR_REGNO + 32;
23728   if (ALTIVEC_REGNO_P (regno))
23729     return regno - FIRST_ALTIVEC_REGNO + 77;
23730   if (regno == LR_REGNO)
23731     return 65;
23732   if (regno == CTR_REGNO)
23733     return 66;
23734   if (regno == CA_REGNO)
23735     return 76;  /* XER */
23736   if (CR_REGNO_P (regno))
23737     return regno - CR0_REGNO + 68;
23738   if (regno == VRSAVE_REGNO)
23739     return 109;
23740   if (regno == VSCR_REGNO)
23741     return 110;
23742 
23743   if (regno == FRAME_POINTER_REGNUM)
23744     return 111;
23745   if (regno == ARG_POINTER_REGNUM)
23746     return 67;
23747   if (regno == 64)
23748     return 64;
23749 
23750   gcc_unreachable ();
23751 }
23752 
23753 /* target hook eh_return_filter_mode */
23754 static scalar_int_mode
rs6000_eh_return_filter_mode(void)23755 rs6000_eh_return_filter_mode (void)
23756 {
23757   return TARGET_32BIT ? SImode : word_mode;
23758 }
23759 
23760 /* Target hook for translate_mode_attribute.  */
23761 static machine_mode
rs6000_translate_mode_attribute(machine_mode mode)23762 rs6000_translate_mode_attribute (machine_mode mode)
23763 {
23764   if ((FLOAT128_IEEE_P (mode)
23765        && ieee128_float_type_node == long_double_type_node)
23766       || (FLOAT128_IBM_P (mode)
23767 	  && ibm128_float_type_node == long_double_type_node))
23768     return COMPLEX_MODE_P (mode) ? E_TCmode : E_TFmode;
23769   return mode;
23770 }
23771 
23772 /* Target hook for scalar_mode_supported_p.  */
23773 static bool
rs6000_scalar_mode_supported_p(scalar_mode mode)23774 rs6000_scalar_mode_supported_p (scalar_mode mode)
23775 {
23776   /* -m32 does not support TImode.  This is the default, from
23777      default_scalar_mode_supported_p.  For -m32 -mpowerpc64 we want the
23778      same ABI as for -m32.  But default_scalar_mode_supported_p allows
23779      integer modes of precision 2 * BITS_PER_WORD, which matches TImode
23780      for -mpowerpc64.  */
23781   if (TARGET_32BIT && mode == TImode)
23782     return false;
23783 
23784   if (DECIMAL_FLOAT_MODE_P (mode))
23785     return default_decimal_float_supported_p ();
23786   else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
23787     return true;
23788   else
23789     return default_scalar_mode_supported_p (mode);
23790 }
23791 
23792 /* Target hook for libgcc_floating_mode_supported_p.  */
23793 
23794 static bool
rs6000_libgcc_floating_mode_supported_p(scalar_float_mode mode)23795 rs6000_libgcc_floating_mode_supported_p (scalar_float_mode mode)
23796 {
23797   switch (mode)
23798     {
23799     case E_SFmode:
23800     case E_DFmode:
23801     case E_TFmode:
23802       return true;
23803 
23804       /* We only return true for KFmode if IEEE 128-bit types are supported, and
23805 	 if long double does not use the IEEE 128-bit format.  If long double
23806 	 uses the IEEE 128-bit format, it will use TFmode and not KFmode.
23807 	 Because the code will not use KFmode in that case, there will be aborts
23808 	 because it can't find KFmode in the Floatn types.  */
23809     case E_KFmode:
23810       return TARGET_FLOAT128_TYPE && !TARGET_IEEEQUAD;
23811 
23812     default:
23813       return false;
23814     }
23815 }
23816 
23817 /* Target hook for vector_mode_supported_p.  */
23818 static bool
rs6000_vector_mode_supported_p(machine_mode mode)23819 rs6000_vector_mode_supported_p (machine_mode mode)
23820 {
23821   /* There is no vector form for IEEE 128-bit.  If we return true for IEEE
23822      128-bit, the compiler might try to widen IEEE 128-bit to IBM
23823      double-double.  */
23824   if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
23825     return true;
23826 
23827   else
23828     return false;
23829 }
23830 
23831 /* Target hook for floatn_mode.  */
23832 static opt_scalar_float_mode
rs6000_floatn_mode(int n,bool extended)23833 rs6000_floatn_mode (int n, bool extended)
23834 {
23835   if (extended)
23836     {
23837       switch (n)
23838 	{
23839 	case 32:
23840 	  return DFmode;
23841 
23842 	case 64:
23843 	  if (TARGET_FLOAT128_TYPE)
23844 	    return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
23845 	  else
23846 	    return opt_scalar_float_mode ();
23847 
23848 	case 128:
23849 	  return opt_scalar_float_mode ();
23850 
23851 	default:
23852 	  /* Those are the only valid _FloatNx types.  */
23853 	  gcc_unreachable ();
23854 	}
23855     }
23856   else
23857     {
23858       switch (n)
23859 	{
23860 	case 32:
23861 	  return SFmode;
23862 
23863 	case 64:
23864 	  return DFmode;
23865 
23866 	case 128:
23867 	  if (TARGET_FLOAT128_TYPE)
23868 	    return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
23869 	  else
23870 	    return opt_scalar_float_mode ();
23871 
23872 	default:
23873 	  return opt_scalar_float_mode ();
23874 	}
23875     }
23876 
23877 }
23878 
23879 /* Target hook for c_mode_for_suffix.  */
23880 static machine_mode
rs6000_c_mode_for_suffix(char suffix)23881 rs6000_c_mode_for_suffix (char suffix)
23882 {
23883   if (TARGET_FLOAT128_TYPE)
23884     {
23885       if (suffix == 'q' || suffix == 'Q')
23886 	return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
23887 
23888       /* At the moment, we are not defining a suffix for IBM extended double.
23889 	 If/when the default for -mabi=ieeelongdouble is changed, and we want
23890 	 to support __ibm128 constants in legacy library code, we may need to
23891 	 re-evalaute this decision.  Currently, c-lex.cc only supports 'w' and
23892 	 'q' as machine dependent suffixes.  The x86_64 port uses 'w' for
23893 	 __float80 constants.  */
23894     }
23895 
23896   return VOIDmode;
23897 }
23898 
23899 /* Target hook for invalid_arg_for_unprototyped_fn. */
23900 static const char *
invalid_arg_for_unprototyped_fn(const_tree typelist,const_tree funcdecl,const_tree val)23901 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
23902 {
23903   return (!rs6000_darwin64_abi
23904 	  && typelist == 0
23905           && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
23906           && (funcdecl == NULL_TREE
23907               || (TREE_CODE (funcdecl) == FUNCTION_DECL
23908                   && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
23909 	  ? N_("AltiVec argument passed to unprototyped function")
23910 	  : NULL;
23911 }
23912 
23913 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
23914    setup by using __stack_chk_fail_local hidden function instead of
23915    calling __stack_chk_fail directly.  Otherwise it is better to call
23916    __stack_chk_fail directly.  */
23917 
23918 static tree ATTRIBUTE_UNUSED
rs6000_stack_protect_fail(void)23919 rs6000_stack_protect_fail (void)
23920 {
23921   return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
23922 	 ? default_hidden_stack_protect_fail ()
23923 	 : default_external_stack_protect_fail ();
23924 }
23925 
23926 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
23927 
23928 #if TARGET_ELF
23929 static unsigned HOST_WIDE_INT
rs6000_asan_shadow_offset(void)23930 rs6000_asan_shadow_offset (void)
23931 {
23932   return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
23933 }
23934 #endif
23935 
23936 /* Mask options that we want to support inside of attribute((target)) and
23937    #pragma GCC target operations.  Note, we do not include things like
23938    64/32-bit, endianness, hard/soft floating point, etc. that would have
23939    different calling sequences.  */
23940 
23941 struct rs6000_opt_mask {
23942   const char *name;		/* option name */
23943   HOST_WIDE_INT mask;		/* mask to set */
23944   bool invert;			/* invert sense of mask */
23945   bool valid_target;		/* option is a target option */
23946 };
23947 
23948 static struct rs6000_opt_mask const rs6000_opt_masks[] =
23949 {
23950   { "altivec",			OPTION_MASK_ALTIVEC,		false, true  },
23951   { "block-ops-unaligned-vsx",	OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX,
23952 								false, true  },
23953   { "block-ops-vector-pair",	OPTION_MASK_BLOCK_OPS_VECTOR_PAIR,
23954 								false, true  },
23955   { "cmpb",			OPTION_MASK_CMPB,		false, true  },
23956   { "crypto",			OPTION_MASK_CRYPTO,		false, true  },
23957   { "direct-move",		OPTION_MASK_DIRECT_MOVE,	false, true  },
23958   { "dlmzb",			OPTION_MASK_DLMZB,		false, true  },
23959   { "efficient-unaligned-vsx",	OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
23960 								false, true  },
23961   { "float128",			OPTION_MASK_FLOAT128_KEYWORD,	false, true  },
23962   { "float128-hardware",	OPTION_MASK_FLOAT128_HW,	false, true  },
23963   { "fprnd",			OPTION_MASK_FPRND,		false, true  },
23964   { "power10",			OPTION_MASK_POWER10,		false, true  },
23965   { "hard-dfp",			OPTION_MASK_DFP,		false, true  },
23966   { "htm",			OPTION_MASK_HTM,		false, true  },
23967   { "isel",			OPTION_MASK_ISEL,		false, true  },
23968   { "mfcrf",			OPTION_MASK_MFCRF,		false, true  },
23969   { "mfpgpr",			0,				false, true  },
23970   { "mma",			OPTION_MASK_MMA,		false, true  },
23971   { "modulo",			OPTION_MASK_MODULO,		false, true  },
23972   { "mulhw",			OPTION_MASK_MULHW,		false, true  },
23973   { "multiple",			OPTION_MASK_MULTIPLE,		false, true  },
23974   { "pcrel",			OPTION_MASK_PCREL,		false, true  },
23975   { "pcrel-opt",		OPTION_MASK_PCREL_OPT,		false, true  },
23976   { "popcntb",			OPTION_MASK_POPCNTB,		false, true  },
23977   { "popcntd",			OPTION_MASK_POPCNTD,		false, true  },
23978   { "power8-fusion",		OPTION_MASK_P8_FUSION,		false, true  },
23979   { "power8-fusion-sign",	OPTION_MASK_P8_FUSION_SIGN,	false, true  },
23980   { "power8-vector",		OPTION_MASK_P8_VECTOR,		false, true  },
23981   { "power9-minmax",		OPTION_MASK_P9_MINMAX,		false, true  },
23982   { "power9-misc",		OPTION_MASK_P9_MISC,		false, true  },
23983   { "power9-vector",		OPTION_MASK_P9_VECTOR,		false, true  },
23984   { "power10-fusion",		OPTION_MASK_P10_FUSION,		false, true  },
23985   { "powerpc-gfxopt",		OPTION_MASK_PPC_GFXOPT,		false, true  },
23986   { "powerpc-gpopt",		OPTION_MASK_PPC_GPOPT,		false, true  },
23987   { "prefixed",			OPTION_MASK_PREFIXED,		false, true  },
23988   { "quad-memory",		OPTION_MASK_QUAD_MEMORY,	false, true  },
23989   { "quad-memory-atomic",	OPTION_MASK_QUAD_MEMORY_ATOMIC,	false, true  },
23990   { "recip-precision",		OPTION_MASK_RECIP_PRECISION,	false, true  },
23991   { "save-toc-indirect",	OPTION_MASK_SAVE_TOC_INDIRECT,	false, true  },
23992   { "string",			0,				false, true  },
23993   { "update",			OPTION_MASK_NO_UPDATE,		true , true  },
23994   { "vsx",			OPTION_MASK_VSX,		false, true  },
23995 #ifdef OPTION_MASK_64BIT
23996 #if TARGET_AIX_OS
23997   { "aix64",			OPTION_MASK_64BIT,		false, false },
23998   { "aix32",			OPTION_MASK_64BIT,		true,  false },
23999 #else
24000   { "64",			OPTION_MASK_64BIT,		false, false },
24001   { "32",			OPTION_MASK_64BIT,		true,  false },
24002 #endif
24003 #endif
24004 #ifdef OPTION_MASK_EABI
24005   { "eabi",			OPTION_MASK_EABI,		false, false },
24006 #endif
24007 #ifdef OPTION_MASK_LITTLE_ENDIAN
24008   { "little",			OPTION_MASK_LITTLE_ENDIAN,	false, false },
24009   { "big",			OPTION_MASK_LITTLE_ENDIAN,	true,  false },
24010 #endif
24011 #ifdef OPTION_MASK_RELOCATABLE
24012   { "relocatable",		OPTION_MASK_RELOCATABLE,	false, false },
24013 #endif
24014 #ifdef OPTION_MASK_STRICT_ALIGN
24015   { "strict-align",		OPTION_MASK_STRICT_ALIGN,	false, false },
24016 #endif
24017   { "soft-float",		OPTION_MASK_SOFT_FLOAT,		false, false },
24018   { "string",			0,				false, false },
24019 };
24020 
24021 /* Builtin mask mapping for printing the flags.  */
24022 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
24023 {
24024   { "altivec",		 RS6000_BTM_ALTIVEC,	false, false },
24025   { "vsx",		 RS6000_BTM_VSX,	false, false },
24026   { "fre",		 RS6000_BTM_FRE,	false, false },
24027   { "fres",		 RS6000_BTM_FRES,	false, false },
24028   { "frsqrte",		 RS6000_BTM_FRSQRTE,	false, false },
24029   { "frsqrtes",		 RS6000_BTM_FRSQRTES,	false, false },
24030   { "popcntd",		 RS6000_BTM_POPCNTD,	false, false },
24031   { "cell",		 RS6000_BTM_CELL,	false, false },
24032   { "power8-vector",	 RS6000_BTM_P8_VECTOR,	false, false },
24033   { "power9-vector",	 RS6000_BTM_P9_VECTOR,	false, false },
24034   { "power9-misc",	 RS6000_BTM_P9_MISC,	false, false },
24035   { "crypto",		 RS6000_BTM_CRYPTO,	false, false },
24036   { "htm",		 RS6000_BTM_HTM,	false, false },
24037   { "hard-dfp",		 RS6000_BTM_DFP,	false, false },
24038   { "hard-float",	 RS6000_BTM_HARD_FLOAT,	false, false },
24039   { "long-double-128",	 RS6000_BTM_LDBL128,	false, false },
24040   { "powerpc64",	 RS6000_BTM_POWERPC64,  false, false },
24041   { "float128",		 RS6000_BTM_FLOAT128,   false, false },
24042   { "float128-hw",	 RS6000_BTM_FLOAT128_HW,false, false },
24043   { "mma",		 RS6000_BTM_MMA,	false, false },
24044   { "power10",		 RS6000_BTM_P10,	false, false },
24045 };
24046 
24047 /* Option variables that we want to support inside attribute((target)) and
24048    #pragma GCC target operations.  */
24049 
24050 struct rs6000_opt_var {
24051   const char *name;		/* option name */
24052   size_t global_offset;		/* offset of the option in global_options.  */
24053   size_t target_offset;		/* offset of the option in target options.  */
24054 };
24055 
24056 static struct rs6000_opt_var const rs6000_opt_vars[] =
24057 {
24058   { "friz",
24059     offsetof (struct gcc_options, x_TARGET_FRIZ),
24060     offsetof (struct cl_target_option, x_TARGET_FRIZ), },
24061   { "avoid-indexed-addresses",
24062     offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
24063     offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
24064   { "longcall",
24065     offsetof (struct gcc_options, x_rs6000_default_long_calls),
24066     offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
24067   { "optimize-swaps",
24068     offsetof (struct gcc_options, x_rs6000_optimize_swaps),
24069     offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
24070   { "allow-movmisalign",
24071     offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
24072     offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
24073   { "sched-groups",
24074     offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
24075     offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
24076   { "always-hint",
24077     offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
24078     offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
24079   { "align-branch-targets",
24080     offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
24081     offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
24082   { "sched-prolog",
24083     offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
24084     offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
24085   { "sched-epilog",
24086     offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
24087     offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
24088   { "speculate-indirect-jumps",
24089     offsetof (struct gcc_options, x_rs6000_speculate_indirect_jumps),
24090     offsetof (struct cl_target_option, x_rs6000_speculate_indirect_jumps), },
24091 };
24092 
24093 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
24094    parsing.  Return true if there were no errors.  */
24095 
24096 static bool
rs6000_inner_target_options(tree args,bool attr_p)24097 rs6000_inner_target_options (tree args, bool attr_p)
24098 {
24099   bool ret = true;
24100 
24101   if (args == NULL_TREE)
24102     ;
24103 
24104   else if (TREE_CODE (args) == STRING_CST)
24105     {
24106       char *p = ASTRDUP (TREE_STRING_POINTER (args));
24107       char *q;
24108 
24109       while ((q = strtok (p, ",")) != NULL)
24110 	{
24111 	  bool error_p = false;
24112 	  bool not_valid_p = false;
24113 	  const char *cpu_opt = NULL;
24114 
24115 	  p = NULL;
24116 	  if (startswith (q, "cpu="))
24117 	    {
24118 	      int cpu_index = rs6000_cpu_name_lookup (q+4);
24119 	      if (cpu_index >= 0)
24120 		rs6000_cpu_index = cpu_index;
24121 	      else
24122 		{
24123 		  error_p = true;
24124 		  cpu_opt = q+4;
24125 		}
24126 	    }
24127 	  else if (startswith (q, "tune="))
24128 	    {
24129 	      int tune_index = rs6000_cpu_name_lookup (q+5);
24130 	      if (tune_index >= 0)
24131 		rs6000_tune_index = tune_index;
24132 	      else
24133 		{
24134 		  error_p = true;
24135 		  cpu_opt = q+5;
24136 		}
24137 	    }
24138 	  else
24139 	    {
24140 	      size_t i;
24141 	      bool invert = false;
24142 	      char *r = q;
24143 
24144 	      error_p = true;
24145 	      if (startswith (r, "no-"))
24146 		{
24147 		  invert = true;
24148 		  r += 3;
24149 		}
24150 
24151 	      for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
24152 		if (strcmp (r, rs6000_opt_masks[i].name) == 0)
24153 		  {
24154 		    HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
24155 
24156 		    if (!rs6000_opt_masks[i].valid_target)
24157 		      not_valid_p = true;
24158 		    else
24159 		      {
24160 			error_p = false;
24161 			rs6000_isa_flags_explicit |= mask;
24162 
24163 			/* VSX needs altivec, so -mvsx automagically sets
24164 			   altivec and disables -mavoid-indexed-addresses.  */
24165 			if (!invert)
24166 			  {
24167 			    if (mask == OPTION_MASK_VSX)
24168 			      {
24169 				mask |= OPTION_MASK_ALTIVEC;
24170 				TARGET_AVOID_XFORM = 0;
24171 			      }
24172 			  }
24173 
24174 			if (rs6000_opt_masks[i].invert)
24175 			  invert = !invert;
24176 
24177 			if (invert)
24178 			  rs6000_isa_flags &= ~mask;
24179 			else
24180 			  rs6000_isa_flags |= mask;
24181 		      }
24182 		    break;
24183 		  }
24184 
24185 	      if (error_p && !not_valid_p)
24186 		{
24187 		  for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
24188 		    if (strcmp (r, rs6000_opt_vars[i].name) == 0)
24189 		      {
24190 			size_t j = rs6000_opt_vars[i].global_offset;
24191 			*((int *) ((char *)&global_options + j)) = !invert;
24192 			error_p = false;
24193 			not_valid_p = false;
24194 			break;
24195 		      }
24196 		}
24197 	    }
24198 
24199 	  if (error_p)
24200 	    {
24201 	      const char *eprefix, *esuffix;
24202 
24203 	      ret = false;
24204 	      if (attr_p)
24205 		{
24206 		  eprefix = "__attribute__((__target__(";
24207 		  esuffix = ")))";
24208 		}
24209 	      else
24210 		{
24211 		  eprefix = "#pragma GCC target ";
24212 		  esuffix = "";
24213 		}
24214 
24215 	      if (cpu_opt)
24216 		error ("invalid cpu %qs for %s%qs%s", cpu_opt, eprefix,
24217 		       q, esuffix);
24218 	      else if (not_valid_p)
24219 		error ("%s%qs%s is not allowed", eprefix, q, esuffix);
24220 	      else
24221 		error ("%s%qs%s is invalid", eprefix, q, esuffix);
24222 	    }
24223 	}
24224     }
24225 
24226   else if (TREE_CODE (args) == TREE_LIST)
24227     {
24228       do
24229 	{
24230 	  tree value = TREE_VALUE (args);
24231 	  if (value)
24232 	    {
24233 	      bool ret2 = rs6000_inner_target_options (value, attr_p);
24234 	      if (!ret2)
24235 		ret = false;
24236 	    }
24237 	  args = TREE_CHAIN (args);
24238 	}
24239       while (args != NULL_TREE);
24240     }
24241 
24242   else
24243     {
24244       error ("attribute %<target%> argument not a string");
24245       return false;
24246     }
24247 
24248   return ret;
24249 }
24250 
24251 /* Print out the target options as a list for -mdebug=target.  */
24252 
24253 static void
rs6000_debug_target_options(tree args,const char * prefix)24254 rs6000_debug_target_options (tree args, const char *prefix)
24255 {
24256   if (args == NULL_TREE)
24257     fprintf (stderr, "%s<NULL>", prefix);
24258 
24259   else if (TREE_CODE (args) == STRING_CST)
24260     {
24261       char *p = ASTRDUP (TREE_STRING_POINTER (args));
24262       char *q;
24263 
24264       while ((q = strtok (p, ",")) != NULL)
24265 	{
24266 	  p = NULL;
24267 	  fprintf (stderr, "%s\"%s\"", prefix, q);
24268 	  prefix = ", ";
24269 	}
24270     }
24271 
24272   else if (TREE_CODE (args) == TREE_LIST)
24273     {
24274       do
24275 	{
24276 	  tree value = TREE_VALUE (args);
24277 	  if (value)
24278 	    {
24279 	      rs6000_debug_target_options (value, prefix);
24280 	      prefix = ", ";
24281 	    }
24282 	  args = TREE_CHAIN (args);
24283 	}
24284       while (args != NULL_TREE);
24285     }
24286 
24287   else
24288     gcc_unreachable ();
24289 
24290   return;
24291 }
24292 
24293 
24294 /* Hook to validate attribute((target("..."))).  */
24295 
24296 static bool
rs6000_valid_attribute_p(tree fndecl,tree ARG_UNUSED (name),tree args,int flags)24297 rs6000_valid_attribute_p (tree fndecl,
24298 			  tree ARG_UNUSED (name),
24299 			  tree args,
24300 			  int flags)
24301 {
24302   struct cl_target_option cur_target;
24303   bool ret;
24304   tree old_optimize;
24305   tree new_target, new_optimize;
24306   tree func_optimize;
24307 
24308   gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
24309 
24310   if (TARGET_DEBUG_TARGET)
24311     {
24312       tree tname = DECL_NAME (fndecl);
24313       fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
24314       if (tname)
24315 	fprintf (stderr, "function: %.*s\n",
24316 		 (int) IDENTIFIER_LENGTH (tname),
24317 		 IDENTIFIER_POINTER (tname));
24318       else
24319 	fprintf (stderr, "function: unknown\n");
24320 
24321       fprintf (stderr, "args:");
24322       rs6000_debug_target_options (args, " ");
24323       fprintf (stderr, "\n");
24324 
24325       if (flags)
24326 	fprintf (stderr, "flags: 0x%x\n", flags);
24327 
24328       fprintf (stderr, "--------------------\n");
24329     }
24330 
24331   /* attribute((target("default"))) does nothing, beyond
24332      affecting multi-versioning.  */
24333   if (TREE_VALUE (args)
24334       && TREE_CODE (TREE_VALUE (args)) == STRING_CST
24335       && TREE_CHAIN (args) == NULL_TREE
24336       && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
24337     return true;
24338 
24339   old_optimize = build_optimization_node (&global_options,
24340 					  &global_options_set);
24341   func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
24342 
24343   /* If the function changed the optimization levels as well as setting target
24344      options, start with the optimizations specified.  */
24345   if (func_optimize && func_optimize != old_optimize)
24346     cl_optimization_restore (&global_options, &global_options_set,
24347 			     TREE_OPTIMIZATION (func_optimize));
24348 
24349   /* The target attributes may also change some optimization flags, so update
24350      the optimization options if necessary.  */
24351   cl_target_option_save (&cur_target, &global_options, &global_options_set);
24352   rs6000_cpu_index = rs6000_tune_index = -1;
24353   ret = rs6000_inner_target_options (args, true);
24354 
24355   /* Set up any additional state.  */
24356   if (ret)
24357     {
24358       ret = rs6000_option_override_internal (false);
24359       new_target = build_target_option_node (&global_options,
24360 					     &global_options_set);
24361     }
24362   else
24363     new_target = NULL;
24364 
24365   new_optimize = build_optimization_node (&global_options,
24366 					  &global_options_set);
24367 
24368   if (!new_target)
24369     ret = false;
24370 
24371   else if (fndecl)
24372     {
24373       DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
24374 
24375       if (old_optimize != new_optimize)
24376 	DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
24377     }
24378 
24379   cl_target_option_restore (&global_options, &global_options_set, &cur_target);
24380 
24381   if (old_optimize != new_optimize)
24382     cl_optimization_restore (&global_options, &global_options_set,
24383 			     TREE_OPTIMIZATION (old_optimize));
24384 
24385   return ret;
24386 }
24387 
24388 
24389 /* Hook to validate the current #pragma GCC target and set the state, and
24390    update the macros based on what was changed.  If ARGS is NULL, then
24391    POP_TARGET is used to reset the options.  */
24392 
24393 bool
rs6000_pragma_target_parse(tree args,tree pop_target)24394 rs6000_pragma_target_parse (tree args, tree pop_target)
24395 {
24396   tree prev_tree = build_target_option_node (&global_options,
24397 					     &global_options_set);
24398   tree cur_tree;
24399   struct cl_target_option *prev_opt, *cur_opt;
24400   HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
24401   HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
24402 
24403   if (TARGET_DEBUG_TARGET)
24404     {
24405       fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
24406       fprintf (stderr, "args:");
24407       rs6000_debug_target_options (args, " ");
24408       fprintf (stderr, "\n");
24409 
24410       if (pop_target)
24411 	{
24412 	  fprintf (stderr, "pop_target:\n");
24413 	  debug_tree (pop_target);
24414 	}
24415       else
24416 	fprintf (stderr, "pop_target: <NULL>\n");
24417 
24418       fprintf (stderr, "--------------------\n");
24419     }
24420 
24421   if (! args)
24422     {
24423       cur_tree = ((pop_target)
24424 		  ? pop_target
24425 		  : target_option_default_node);
24426       cl_target_option_restore (&global_options, &global_options_set,
24427 				TREE_TARGET_OPTION (cur_tree));
24428     }
24429   else
24430     {
24431       rs6000_cpu_index = rs6000_tune_index = -1;
24432       if (!rs6000_inner_target_options (args, false)
24433 	  || !rs6000_option_override_internal (false)
24434 	  || (cur_tree = build_target_option_node (&global_options,
24435 						   &global_options_set))
24436 	     == NULL_TREE)
24437 	{
24438 	  if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
24439 	    fprintf (stderr, "invalid pragma\n");
24440 
24441 	  return false;
24442 	}
24443     }
24444 
24445   target_option_current_node = cur_tree;
24446   rs6000_activate_target_options (target_option_current_node);
24447 
24448   /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
24449      change the macros that are defined.  */
24450   if (rs6000_target_modify_macros_ptr)
24451     {
24452       prev_opt    = TREE_TARGET_OPTION (prev_tree);
24453       prev_bumask = prev_opt->x_rs6000_builtin_mask;
24454       prev_flags  = prev_opt->x_rs6000_isa_flags;
24455 
24456       cur_opt     = TREE_TARGET_OPTION (cur_tree);
24457       cur_flags   = cur_opt->x_rs6000_isa_flags;
24458       cur_bumask  = cur_opt->x_rs6000_builtin_mask;
24459 
24460       diff_bumask = (prev_bumask ^ cur_bumask);
24461       diff_flags  = (prev_flags ^ cur_flags);
24462 
24463       if ((diff_flags != 0) || (diff_bumask != 0))
24464 	{
24465 	  /* Delete old macros.  */
24466 	  rs6000_target_modify_macros_ptr (false,
24467 					   prev_flags & diff_flags,
24468 					   prev_bumask & diff_bumask);
24469 
24470 	  /* Define new macros.  */
24471 	  rs6000_target_modify_macros_ptr (true,
24472 					   cur_flags & diff_flags,
24473 					   cur_bumask & diff_bumask);
24474 	}
24475     }
24476 
24477   return true;
24478 }
24479 
24480 
24481 /* Remember the last target of rs6000_set_current_function.  */
24482 static GTY(()) tree rs6000_previous_fndecl;
24483 
24484 /* Restore target's globals from NEW_TREE and invalidate the
24485    rs6000_previous_fndecl cache.  */
24486 
24487 void
rs6000_activate_target_options(tree new_tree)24488 rs6000_activate_target_options (tree new_tree)
24489 {
24490   cl_target_option_restore (&global_options, &global_options_set,
24491 			    TREE_TARGET_OPTION (new_tree));
24492   if (TREE_TARGET_GLOBALS (new_tree))
24493     restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
24494   else if (new_tree == target_option_default_node)
24495     restore_target_globals (&default_target_globals);
24496   else
24497     TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
24498   rs6000_previous_fndecl = NULL_TREE;
24499 }
24500 
24501 /* Establish appropriate back-end context for processing the function
24502    FNDECL.  The argument might be NULL to indicate processing at top
24503    level, outside of any function scope.  */
24504 static void
rs6000_set_current_function(tree fndecl)24505 rs6000_set_current_function (tree fndecl)
24506 {
24507   if (TARGET_DEBUG_TARGET)
24508     {
24509       fprintf (stderr, "\n==================== rs6000_set_current_function");
24510 
24511       if (fndecl)
24512 	fprintf (stderr, ", fndecl %s (%p)",
24513 		 (DECL_NAME (fndecl)
24514 		  ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
24515 		  : "<unknown>"), (void *)fndecl);
24516 
24517       if (rs6000_previous_fndecl)
24518 	fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
24519 
24520       fprintf (stderr, "\n");
24521     }
24522 
24523   /* Only change the context if the function changes.  This hook is called
24524      several times in the course of compiling a function, and we don't want to
24525      slow things down too much or call target_reinit when it isn't safe.  */
24526   if (fndecl == rs6000_previous_fndecl)
24527     return;
24528 
24529   tree old_tree;
24530   if (rs6000_previous_fndecl == NULL_TREE)
24531     old_tree = target_option_current_node;
24532   else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl))
24533     old_tree = DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl);
24534   else
24535     old_tree = target_option_default_node;
24536 
24537   tree new_tree;
24538   if (fndecl == NULL_TREE)
24539     {
24540       if (old_tree != target_option_current_node)
24541 	new_tree = target_option_current_node;
24542       else
24543 	new_tree = NULL_TREE;
24544     }
24545   else
24546     {
24547       new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
24548       if (new_tree == NULL_TREE)
24549 	new_tree = target_option_default_node;
24550     }
24551 
24552   if (TARGET_DEBUG_TARGET)
24553     {
24554       if (new_tree)
24555 	{
24556 	  fprintf (stderr, "\nnew fndecl target specific options:\n");
24557 	  debug_tree (new_tree);
24558 	}
24559 
24560       if (old_tree)
24561 	{
24562 	  fprintf (stderr, "\nold fndecl target specific options:\n");
24563 	  debug_tree (old_tree);
24564 	}
24565 
24566       if (old_tree != NULL_TREE || new_tree != NULL_TREE)
24567 	fprintf (stderr, "--------------------\n");
24568     }
24569 
24570   if (new_tree && old_tree != new_tree)
24571     rs6000_activate_target_options (new_tree);
24572 
24573   if (fndecl)
24574     rs6000_previous_fndecl = fndecl;
24575 }
24576 
24577 
24578 /* Save the current options */
24579 
24580 static void
rs6000_function_specific_save(struct cl_target_option * ptr,struct gcc_options * opts,struct gcc_options *)24581 rs6000_function_specific_save (struct cl_target_option *ptr,
24582 			       struct gcc_options *opts,
24583 			       struct gcc_options */* opts_set */)
24584 {
24585   ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
24586   ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
24587 }
24588 
24589 /* Restore the current options */
24590 
24591 static void
rs6000_function_specific_restore(struct gcc_options * opts,struct gcc_options *,struct cl_target_option * ptr)24592 rs6000_function_specific_restore (struct gcc_options *opts,
24593 				  struct gcc_options */* opts_set */,
24594 				  struct cl_target_option *ptr)
24595 
24596 {
24597   opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
24598   opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
24599   (void) rs6000_option_override_internal (false);
24600 }
24601 
24602 /* Print the current options */
24603 
24604 static void
rs6000_function_specific_print(FILE * file,int indent,struct cl_target_option * ptr)24605 rs6000_function_specific_print (FILE *file, int indent,
24606 				struct cl_target_option *ptr)
24607 {
24608   rs6000_print_isa_options (file, indent, "Isa options set",
24609 			    ptr->x_rs6000_isa_flags);
24610 
24611   rs6000_print_isa_options (file, indent, "Isa options explicit",
24612 			    ptr->x_rs6000_isa_flags_explicit);
24613 }
24614 
24615 /* Helper function to print the current isa or misc options on a line.  */
24616 
24617 static void
rs6000_print_options_internal(FILE * file,int indent,const char * string,HOST_WIDE_INT flags,const char * prefix,const struct rs6000_opt_mask * opts,size_t num_elements)24618 rs6000_print_options_internal (FILE *file,
24619 			       int indent,
24620 			       const char *string,
24621 			       HOST_WIDE_INT flags,
24622 			       const char *prefix,
24623 			       const struct rs6000_opt_mask *opts,
24624 			       size_t num_elements)
24625 {
24626   size_t i;
24627   size_t start_column = 0;
24628   size_t cur_column;
24629   size_t max_column = 120;
24630   size_t prefix_len = strlen (prefix);
24631   size_t comma_len = 0;
24632   const char *comma = "";
24633 
24634   if (indent)
24635     start_column += fprintf (file, "%*s", indent, "");
24636 
24637   if (!flags)
24638     {
24639       fprintf (stderr, DEBUG_FMT_S, string, "<none>");
24640       return;
24641     }
24642 
24643   start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
24644 
24645   /* Print the various mask options.  */
24646   cur_column = start_column;
24647   for (i = 0; i < num_elements; i++)
24648     {
24649       bool invert = opts[i].invert;
24650       const char *name = opts[i].name;
24651       const char *no_str = "";
24652       HOST_WIDE_INT mask = opts[i].mask;
24653       size_t len = comma_len + prefix_len + strlen (name);
24654 
24655       if (!invert)
24656 	{
24657 	  if ((flags & mask) == 0)
24658 	    {
24659 	      no_str = "no-";
24660 	      len += strlen ("no-");
24661 	    }
24662 
24663 	  flags &= ~mask;
24664 	}
24665 
24666       else
24667 	{
24668 	  if ((flags & mask) != 0)
24669 	    {
24670 	      no_str = "no-";
24671 	      len += strlen ("no-");
24672 	    }
24673 
24674 	  flags |= mask;
24675 	}
24676 
24677       cur_column += len;
24678       if (cur_column > max_column)
24679 	{
24680 	  fprintf (stderr, ", \\\n%*s", (int)start_column, "");
24681 	  cur_column = start_column + len;
24682 	  comma = "";
24683 	}
24684 
24685       fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
24686       comma = ", ";
24687       comma_len = strlen (", ");
24688     }
24689 
24690   fputs ("\n", file);
24691 }
24692 
24693 /* Helper function to print the current isa options on a line.  */
24694 
24695 static void
rs6000_print_isa_options(FILE * file,int indent,const char * string,HOST_WIDE_INT flags)24696 rs6000_print_isa_options (FILE *file, int indent, const char *string,
24697 			  HOST_WIDE_INT flags)
24698 {
24699   rs6000_print_options_internal (file, indent, string, flags, "-m",
24700 				 &rs6000_opt_masks[0],
24701 				 ARRAY_SIZE (rs6000_opt_masks));
24702 }
24703 
24704 static void
rs6000_print_builtin_options(FILE * file,int indent,const char * string,HOST_WIDE_INT flags)24705 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
24706 			      HOST_WIDE_INT flags)
24707 {
24708   rs6000_print_options_internal (file, indent, string, flags, "",
24709 				 &rs6000_builtin_mask_names[0],
24710 				 ARRAY_SIZE (rs6000_builtin_mask_names));
24711 }
24712 
24713 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
24714    2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
24715    -mupper-regs-df, etc.).
24716 
24717    If the user used -mno-power8-vector, we need to turn off all of the implicit
24718    ISA 2.07 and 3.0 options that relate to the vector unit.
24719 
24720    If the user used -mno-power9-vector, we need to turn off all of the implicit
24721    ISA 3.0 options that relate to the vector unit.
24722 
24723    This function does not handle explicit options such as the user specifying
24724    -mdirect-move.  These are handled in rs6000_option_override_internal, and
24725    the appropriate error is given if needed.
24726 
24727    We return a mask of all of the implicit options that should not be enabled
24728    by default.  */
24729 
24730 static HOST_WIDE_INT
rs6000_disable_incompatible_switches(void)24731 rs6000_disable_incompatible_switches (void)
24732 {
24733   HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit;
24734   size_t i, j;
24735 
24736   static const struct {
24737     const HOST_WIDE_INT no_flag;	/* flag explicitly turned off.  */
24738     const HOST_WIDE_INT dep_flags;	/* flags that depend on this option.  */
24739     const char *const name;		/* name of the switch.  */
24740   } flags[] = {
24741     { OPTION_MASK_P9_VECTOR,	OTHER_P9_VECTOR_MASKS,	"power9-vector"	},
24742     { OPTION_MASK_P8_VECTOR,	OTHER_P8_VECTOR_MASKS,	"power8-vector"	},
24743     { OPTION_MASK_VSX,		OTHER_VSX_VECTOR_MASKS,	"vsx"		},
24744     { OPTION_MASK_ALTIVEC,	OTHER_ALTIVEC_MASKS,	"altivec"	},
24745   };
24746 
24747   for (i = 0; i < ARRAY_SIZE (flags); i++)
24748     {
24749       HOST_WIDE_INT no_flag = flags[i].no_flag;
24750 
24751       if ((rs6000_isa_flags & no_flag) == 0
24752 	  && (rs6000_isa_flags_explicit & no_flag) != 0)
24753 	{
24754 	  HOST_WIDE_INT dep_flags = flags[i].dep_flags;
24755 	  HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit
24756 				     & rs6000_isa_flags
24757 				     & dep_flags);
24758 
24759 	  if (set_flags)
24760 	    {
24761 	      for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++)
24762 		if ((set_flags & rs6000_opt_masks[j].mask) != 0)
24763 		  {
24764 		    set_flags &= ~rs6000_opt_masks[j].mask;
24765 		    error ("%<-mno-%s%> turns off %<-m%s%>",
24766 			   flags[i].name,
24767 			   rs6000_opt_masks[j].name);
24768 		  }
24769 
24770 	      gcc_assert (!set_flags);
24771 	    }
24772 
24773 	  rs6000_isa_flags &= ~dep_flags;
24774 	  ignore_masks |= no_flag | dep_flags;
24775 	}
24776     }
24777 
24778   return ignore_masks;
24779 }
24780 
24781 
24782 /* Helper function for printing the function name when debugging.  */
24783 
24784 static const char *
get_decl_name(tree fn)24785 get_decl_name (tree fn)
24786 {
24787   tree name;
24788 
24789   if (!fn)
24790     return "<null>";
24791 
24792   name = DECL_NAME (fn);
24793   if (!name)
24794     return "<no-name>";
24795 
24796   return IDENTIFIER_POINTER (name);
24797 }
24798 
24799 /* Return the clone id of the target we are compiling code for in a target
24800    clone.  The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
24801    the priority list for the target clones (ordered from lowest to
24802    highest).  */
24803 
24804 static int
rs6000_clone_priority(tree fndecl)24805 rs6000_clone_priority (tree fndecl)
24806 {
24807   tree fn_opts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
24808   HOST_WIDE_INT isa_masks;
24809   int ret = CLONE_DEFAULT;
24810   tree attrs = lookup_attribute ("target", DECL_ATTRIBUTES (fndecl));
24811   const char *attrs_str = NULL;
24812 
24813   attrs = TREE_VALUE (TREE_VALUE (attrs));
24814   attrs_str = TREE_STRING_POINTER (attrs);
24815 
24816   /* Return priority zero for default function.  Return the ISA needed for the
24817      function if it is not the default.  */
24818   if (strcmp (attrs_str, "default") != 0)
24819     {
24820       if (fn_opts == NULL_TREE)
24821 	fn_opts = target_option_default_node;
24822 
24823       if (!fn_opts || !TREE_TARGET_OPTION (fn_opts))
24824 	isa_masks = rs6000_isa_flags;
24825       else
24826 	isa_masks = TREE_TARGET_OPTION (fn_opts)->x_rs6000_isa_flags;
24827 
24828       for (ret = CLONE_MAX - 1; ret != 0; ret--)
24829 	if ((rs6000_clone_map[ret].isa_mask & isa_masks) != 0)
24830 	  break;
24831     }
24832 
24833   if (TARGET_DEBUG_TARGET)
24834     fprintf (stderr, "rs6000_get_function_version_priority (%s) => %d\n",
24835 	     get_decl_name (fndecl), ret);
24836 
24837   return ret;
24838 }
24839 
24840 /* This compares the priority of target features in function DECL1 and DECL2.
24841    It returns positive value if DECL1 is higher priority, negative value if
24842    DECL2 is higher priority and 0 if they are the same.  Note, priorities are
24843    ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0).  */
24844 
24845 static int
rs6000_compare_version_priority(tree decl1,tree decl2)24846 rs6000_compare_version_priority (tree decl1, tree decl2)
24847 {
24848   int priority1 = rs6000_clone_priority (decl1);
24849   int priority2 = rs6000_clone_priority (decl2);
24850   int ret = priority1 - priority2;
24851 
24852   if (TARGET_DEBUG_TARGET)
24853     fprintf (stderr, "rs6000_compare_version_priority (%s, %s) => %d\n",
24854 	     get_decl_name (decl1), get_decl_name (decl2), ret);
24855 
24856   return ret;
24857 }
24858 
24859 /* Make a dispatcher declaration for the multi-versioned function DECL.
24860    Calls to DECL function will be replaced with calls to the dispatcher
24861    by the front-end.  Returns the decl of the dispatcher function.  */
24862 
24863 static tree
rs6000_get_function_versions_dispatcher(void * decl)24864 rs6000_get_function_versions_dispatcher (void *decl)
24865 {
24866   tree fn = (tree) decl;
24867   struct cgraph_node *node = NULL;
24868   struct cgraph_node *default_node = NULL;
24869   struct cgraph_function_version_info *node_v = NULL;
24870   struct cgraph_function_version_info *first_v = NULL;
24871 
24872   tree dispatch_decl = NULL;
24873 
24874   struct cgraph_function_version_info *default_version_info = NULL;
24875   gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
24876 
24877   if (TARGET_DEBUG_TARGET)
24878     fprintf (stderr, "rs6000_get_function_versions_dispatcher (%s)\n",
24879 	     get_decl_name (fn));
24880 
24881   node = cgraph_node::get (fn);
24882   gcc_assert (node != NULL);
24883 
24884   node_v = node->function_version ();
24885   gcc_assert (node_v != NULL);
24886 
24887   if (node_v->dispatcher_resolver != NULL)
24888     return node_v->dispatcher_resolver;
24889 
24890   /* Find the default version and make it the first node.  */
24891   first_v = node_v;
24892   /* Go to the beginning of the chain.  */
24893   while (first_v->prev != NULL)
24894     first_v = first_v->prev;
24895 
24896   default_version_info = first_v;
24897   while (default_version_info != NULL)
24898     {
24899       const tree decl2 = default_version_info->this_node->decl;
24900       if (is_function_default_version (decl2))
24901         break;
24902       default_version_info = default_version_info->next;
24903     }
24904 
24905   /* If there is no default node, just return NULL.  */
24906   if (default_version_info == NULL)
24907     return NULL;
24908 
24909   /* Make default info the first node.  */
24910   if (first_v != default_version_info)
24911     {
24912       default_version_info->prev->next = default_version_info->next;
24913       if (default_version_info->next)
24914         default_version_info->next->prev = default_version_info->prev;
24915       first_v->prev = default_version_info;
24916       default_version_info->next = first_v;
24917       default_version_info->prev = NULL;
24918     }
24919 
24920   default_node = default_version_info->this_node;
24921 
24922 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
24923   error_at (DECL_SOURCE_LOCATION (default_node->decl),
24924 	    "%<target_clones%> attribute needs GLIBC (2.23 and newer) that "
24925 	    "exports hardware capability bits");
24926 #else
24927 
24928   if (targetm.has_ifunc_p ())
24929     {
24930       struct cgraph_function_version_info *it_v = NULL;
24931       struct cgraph_node *dispatcher_node = NULL;
24932       struct cgraph_function_version_info *dispatcher_version_info = NULL;
24933 
24934       /* Right now, the dispatching is done via ifunc.  */
24935       dispatch_decl = make_dispatcher_decl (default_node->decl);
24936 
24937       dispatcher_node = cgraph_node::get_create (dispatch_decl);
24938       gcc_assert (dispatcher_node != NULL);
24939       dispatcher_node->dispatcher_function = 1;
24940       dispatcher_version_info
24941 	= dispatcher_node->insert_new_function_version ();
24942       dispatcher_version_info->next = default_version_info;
24943       dispatcher_node->definition = 1;
24944 
24945       /* Set the dispatcher for all the versions.  */
24946       it_v = default_version_info;
24947       while (it_v != NULL)
24948 	{
24949 	  it_v->dispatcher_resolver = dispatch_decl;
24950 	  it_v = it_v->next;
24951 	}
24952     }
24953   else
24954     {
24955       error_at (DECL_SOURCE_LOCATION (default_node->decl),
24956 		"multiversioning needs %<ifunc%> which is not supported "
24957 		"on this target");
24958     }
24959 #endif
24960 
24961   return dispatch_decl;
24962 }
24963 
24964 /* Make the resolver function decl to dispatch the versions of a multi-
24965    versioned function, DEFAULT_DECL.  Create an empty basic block in the
24966    resolver and store the pointer in EMPTY_BB.  Return the decl of the resolver
24967    function.  */
24968 
24969 static tree
make_resolver_func(const tree default_decl,const tree dispatch_decl,basic_block * empty_bb)24970 make_resolver_func (const tree default_decl,
24971 		    const tree dispatch_decl,
24972 		    basic_block *empty_bb)
24973 {
24974   /* Make the resolver function static.  The resolver function returns
24975      void *.  */
24976   tree decl_name = clone_function_name (default_decl, "resolver");
24977   const char *resolver_name = IDENTIFIER_POINTER (decl_name);
24978   tree type = build_function_type_list (ptr_type_node, NULL_TREE);
24979   tree decl = build_fn_decl (resolver_name, type);
24980   SET_DECL_ASSEMBLER_NAME (decl, decl_name);
24981 
24982   DECL_NAME (decl) = decl_name;
24983   TREE_USED (decl) = 1;
24984   DECL_ARTIFICIAL (decl) = 1;
24985   DECL_IGNORED_P (decl) = 0;
24986   TREE_PUBLIC (decl) = 0;
24987   DECL_UNINLINABLE (decl) = 1;
24988 
24989   /* Resolver is not external, body is generated.  */
24990   DECL_EXTERNAL (decl) = 0;
24991   DECL_EXTERNAL (dispatch_decl) = 0;
24992 
24993   DECL_CONTEXT (decl) = NULL_TREE;
24994   DECL_INITIAL (decl) = make_node (BLOCK);
24995   DECL_STATIC_CONSTRUCTOR (decl) = 0;
24996 
24997   if (DECL_COMDAT_GROUP (default_decl)
24998       || TREE_PUBLIC (default_decl))
24999     {
25000       /* In this case, each translation unit with a call to this
25001 	 versioned function will put out a resolver.  Ensure it
25002 	 is comdat to keep just one copy.  */
25003       DECL_COMDAT (decl) = 1;
25004       make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
25005     }
25006   else
25007     TREE_PUBLIC (dispatch_decl) = 0;
25008 
25009   /* Build result decl and add to function_decl.  */
25010   tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
25011   DECL_CONTEXT (t) = decl;
25012   DECL_ARTIFICIAL (t) = 1;
25013   DECL_IGNORED_P (t) = 1;
25014   DECL_RESULT (decl) = t;
25015 
25016   gimplify_function_tree (decl);
25017   push_cfun (DECL_STRUCT_FUNCTION (decl));
25018   *empty_bb = init_lowered_empty_function (decl, false,
25019 					   profile_count::uninitialized ());
25020 
25021   cgraph_node::add_new_function (decl, true);
25022   symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
25023 
25024   pop_cfun ();
25025 
25026   /* Mark dispatch_decl as "ifunc" with resolver as resolver_name.  */
25027   DECL_ATTRIBUTES (dispatch_decl)
25028     = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
25029 
25030   cgraph_node::create_same_body_alias (dispatch_decl, decl);
25031 
25032   return decl;
25033 }
25034 
25035 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
25036    return a pointer to VERSION_DECL if we are running on a machine that
25037    supports the index CLONE_ISA hardware architecture bits.  This function will
25038    be called during version dispatch to decide which function version to
25039    execute.  It returns the basic block at the end, to which more conditions
25040    can be added.  */
25041 
25042 static basic_block
add_condition_to_bb(tree function_decl,tree version_decl,int clone_isa,basic_block new_bb)25043 add_condition_to_bb (tree function_decl, tree version_decl,
25044 		     int clone_isa, basic_block new_bb)
25045 {
25046   push_cfun (DECL_STRUCT_FUNCTION (function_decl));
25047 
25048   gcc_assert (new_bb != NULL);
25049   gimple_seq gseq = bb_seq (new_bb);
25050 
25051 
25052   tree convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
25053 			      build_fold_addr_expr (version_decl));
25054   tree result_var = create_tmp_var (ptr_type_node);
25055   gimple *convert_stmt = gimple_build_assign (result_var, convert_expr);
25056   gimple *return_stmt = gimple_build_return (result_var);
25057 
25058   if (clone_isa == CLONE_DEFAULT)
25059     {
25060       gimple_seq_add_stmt (&gseq, convert_stmt);
25061       gimple_seq_add_stmt (&gseq, return_stmt);
25062       set_bb_seq (new_bb, gseq);
25063       gimple_set_bb (convert_stmt, new_bb);
25064       gimple_set_bb (return_stmt, new_bb);
25065       pop_cfun ();
25066       return new_bb;
25067     }
25068 
25069   tree bool_zero = build_int_cst (bool_int_type_node, 0);
25070   tree cond_var = create_tmp_var (bool_int_type_node);
25071   tree predicate_decl = rs6000_builtin_decls[(int) RS6000_BIF_CPU_SUPPORTS];
25072   const char *arg_str = rs6000_clone_map[clone_isa].name;
25073   tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
25074   gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
25075   gimple_call_set_lhs (call_cond_stmt, cond_var);
25076 
25077   gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
25078   gimple_set_bb (call_cond_stmt, new_bb);
25079   gimple_seq_add_stmt (&gseq, call_cond_stmt);
25080 
25081   gimple *if_else_stmt = gimple_build_cond (NE_EXPR, cond_var, bool_zero,
25082 					    NULL_TREE, NULL_TREE);
25083   gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
25084   gimple_set_bb (if_else_stmt, new_bb);
25085   gimple_seq_add_stmt (&gseq, if_else_stmt);
25086 
25087   gimple_seq_add_stmt (&gseq, convert_stmt);
25088   gimple_seq_add_stmt (&gseq, return_stmt);
25089   set_bb_seq (new_bb, gseq);
25090 
25091   basic_block bb1 = new_bb;
25092   edge e12 = split_block (bb1, if_else_stmt);
25093   basic_block bb2 = e12->dest;
25094   e12->flags &= ~EDGE_FALLTHRU;
25095   e12->flags |= EDGE_TRUE_VALUE;
25096 
25097   edge e23 = split_block (bb2, return_stmt);
25098   gimple_set_bb (convert_stmt, bb2);
25099   gimple_set_bb (return_stmt, bb2);
25100 
25101   basic_block bb3 = e23->dest;
25102   make_edge (bb1, bb3, EDGE_FALSE_VALUE);
25103 
25104   remove_edge (e23);
25105   make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
25106 
25107   pop_cfun ();
25108   return bb3;
25109 }
25110 
25111 /* This function generates the dispatch function for multi-versioned functions.
25112    DISPATCH_DECL is the function which will contain the dispatch logic.
25113    FNDECLS are the function choices for dispatch, and is a tree chain.
25114    EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
25115    code is generated.  */
25116 
25117 static int
dispatch_function_versions(tree dispatch_decl,void * fndecls_p,basic_block * empty_bb)25118 dispatch_function_versions (tree dispatch_decl,
25119 			    void *fndecls_p,
25120 			    basic_block *empty_bb)
25121 {
25122   int ix;
25123   tree ele;
25124   vec<tree> *fndecls;
25125   tree clones[CLONE_MAX];
25126 
25127   if (TARGET_DEBUG_TARGET)
25128     fputs ("dispatch_function_versions, top\n", stderr);
25129 
25130   gcc_assert (dispatch_decl != NULL
25131 	      && fndecls_p != NULL
25132 	      && empty_bb != NULL);
25133 
25134   /* fndecls_p is actually a vector.  */
25135   fndecls = static_cast<vec<tree> *> (fndecls_p);
25136 
25137   /* At least one more version other than the default.  */
25138   gcc_assert (fndecls->length () >= 2);
25139 
25140   /* The first version in the vector is the default decl.  */
25141   memset ((void *) clones, '\0', sizeof (clones));
25142   clones[CLONE_DEFAULT] = (*fndecls)[0];
25143 
25144   /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
25145      on the PowerPC (on the x86_64, it is not a NOP).  The builtin function
25146      __builtin_cpu_support ensures that the TOC fields are setup by requiring a
25147      recent glibc.  If we ever need to call __builtin_cpu_init, we would need
25148      to insert the code here to do the call.  */
25149 
25150   for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
25151     {
25152       int priority = rs6000_clone_priority (ele);
25153       if (!clones[priority])
25154 	clones[priority] = ele;
25155     }
25156 
25157   for (ix = CLONE_MAX - 1; ix >= 0; ix--)
25158     if (clones[ix])
25159       {
25160 	if (TARGET_DEBUG_TARGET)
25161 	  fprintf (stderr, "dispatch_function_versions, clone %d, %s\n",
25162 		   ix, get_decl_name (clones[ix]));
25163 
25164 	*empty_bb = add_condition_to_bb (dispatch_decl, clones[ix], ix,
25165 					 *empty_bb);
25166       }
25167 
25168   return 0;
25169 }
25170 
25171 /* Generate the dispatching code body to dispatch multi-versioned function
25172    DECL.  The target hook is called to process the "target" attributes and
25173    provide the code to dispatch the right function at run-time.  NODE points
25174    to the dispatcher decl whose body will be created.  */
25175 
25176 static tree
rs6000_generate_version_dispatcher_body(void * node_p)25177 rs6000_generate_version_dispatcher_body (void *node_p)
25178 {
25179   tree resolver;
25180   basic_block empty_bb;
25181   struct cgraph_node *node = (cgraph_node *) node_p;
25182   struct cgraph_function_version_info *ninfo = node->function_version ();
25183 
25184   if (ninfo->dispatcher_resolver)
25185     return ninfo->dispatcher_resolver;
25186 
25187   /* node is going to be an alias, so remove the finalized bit.  */
25188   node->definition = false;
25189 
25190   /* The first version in the chain corresponds to the default version.  */
25191   ninfo->dispatcher_resolver = resolver
25192     = make_resolver_func (ninfo->next->this_node->decl, node->decl, &empty_bb);
25193 
25194   if (TARGET_DEBUG_TARGET)
25195     fprintf (stderr, "rs6000_get_function_versions_dispatcher, %s\n",
25196 	     get_decl_name (resolver));
25197 
25198   push_cfun (DECL_STRUCT_FUNCTION (resolver));
25199   auto_vec<tree, 2> fn_ver_vec;
25200 
25201   for (struct cgraph_function_version_info *vinfo = ninfo->next;
25202        vinfo;
25203        vinfo = vinfo->next)
25204     {
25205       struct cgraph_node *version = vinfo->this_node;
25206       /* Check for virtual functions here again, as by this time it should
25207 	 have been determined if this function needs a vtable index or
25208 	 not.  This happens for methods in derived classes that override
25209 	 virtual methods in base classes but are not explicitly marked as
25210 	 virtual.  */
25211       if (DECL_VINDEX (version->decl))
25212 	sorry ("Virtual function multiversioning not supported");
25213 
25214       fn_ver_vec.safe_push (version->decl);
25215     }
25216 
25217   dispatch_function_versions (resolver, &fn_ver_vec, &empty_bb);
25218   cgraph_edge::rebuild_edges ();
25219   pop_cfun ();
25220   return resolver;
25221 }
25222 
25223 /* Hook to decide if we need to scan function gimple statements to
25224    collect target specific information for inlining, and update the
25225    corresponding RS6000_FN_TARGET_INFO_* bit in INFO if we are able
25226    to predict which ISA feature is used at this time.  Return true
25227    if we need to scan, otherwise return false.  */
25228 
25229 static bool
rs6000_need_ipa_fn_target_info(const_tree decl,unsigned int & info ATTRIBUTE_UNUSED)25230 rs6000_need_ipa_fn_target_info (const_tree decl,
25231 				unsigned int &info ATTRIBUTE_UNUSED)
25232 {
25233   tree target = DECL_FUNCTION_SPECIFIC_TARGET (decl);
25234   if (!target)
25235     target = target_option_default_node;
25236   struct cl_target_option *opts = TREE_TARGET_OPTION (target);
25237 
25238   /* See PR102059, we only handle HTM for now, so will only do
25239      the consequent scannings when HTM feature enabled.  */
25240   if (opts->x_rs6000_isa_flags & OPTION_MASK_HTM)
25241       return true;
25242 
25243   return false;
25244 }
25245 
25246 /* Hook to update target specific information INFO for inlining by
25247    checking the given STMT.  Return false if we don't need to scan
25248    any more, otherwise return true.  */
25249 
25250 static bool
rs6000_update_ipa_fn_target_info(unsigned int & info,const gimple * stmt)25251 rs6000_update_ipa_fn_target_info (unsigned int &info, const gimple *stmt)
25252 {
25253   /* Assume inline asm can use any instruction features.  */
25254   if (gimple_code (stmt) == GIMPLE_ASM)
25255     {
25256       /* Should set any bits we concerned, for now OPTION_MASK_HTM is
25257 	 the only bit we care about.  */
25258       info |= RS6000_FN_TARGET_INFO_HTM;
25259       return false;
25260     }
25261   else if (gimple_code (stmt) == GIMPLE_CALL)
25262     {
25263       tree fndecl = gimple_call_fndecl (stmt);
25264       if (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD))
25265 	{
25266 	  enum rs6000_gen_builtins fcode
25267 	    = (enum rs6000_gen_builtins) DECL_MD_FUNCTION_CODE (fndecl);
25268 	  /* HTM bifs definitely exploit HTM insns.  */
25269 	  if (bif_is_htm (rs6000_builtin_info[fcode]))
25270 	    {
25271 	      info |= RS6000_FN_TARGET_INFO_HTM;
25272 	      return false;
25273 	    }
25274 	}
25275     }
25276 
25277   return true;
25278 }
25279 
25280 /* Hook to determine if one function can safely inline another.  */
25281 
25282 static bool
rs6000_can_inline_p(tree caller,tree callee)25283 rs6000_can_inline_p (tree caller, tree callee)
25284 {
25285   bool ret = false;
25286   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
25287   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
25288 
25289   /* If the callee has no option attributes, then it is ok to inline.  */
25290   if (!callee_tree)
25291     ret = true;
25292 
25293   else
25294     {
25295       HOST_WIDE_INT caller_isa;
25296       struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
25297       HOST_WIDE_INT callee_isa = callee_opts->x_rs6000_isa_flags;
25298       HOST_WIDE_INT explicit_isa = callee_opts->x_rs6000_isa_flags_explicit;
25299 
25300       /* If the caller has option attributes, then use them.
25301 	 Otherwise, use the command line options.  */
25302       if (caller_tree)
25303 	caller_isa = TREE_TARGET_OPTION (caller_tree)->x_rs6000_isa_flags;
25304       else
25305 	caller_isa = rs6000_isa_flags;
25306 
25307       cgraph_node *callee_node = cgraph_node::get (callee);
25308       if (ipa_fn_summaries && ipa_fn_summaries->get (callee_node) != NULL)
25309 	{
25310 	  unsigned int info = ipa_fn_summaries->get (callee_node)->target_info;
25311 	  if ((info & RS6000_FN_TARGET_INFO_HTM) == 0)
25312 	    {
25313 	      callee_isa &= ~OPTION_MASK_HTM;
25314 	      explicit_isa &= ~OPTION_MASK_HTM;
25315 	    }
25316 	}
25317 
25318       /* Ignore -mpower8-fusion and -mpower10-fusion options for inlining
25319 	 purposes.  */
25320       callee_isa &= ~(OPTION_MASK_P8_FUSION | OPTION_MASK_P10_FUSION);
25321       explicit_isa &= ~(OPTION_MASK_P8_FUSION | OPTION_MASK_P10_FUSION);
25322 
25323       /* The callee's options must be a subset of the caller's options, i.e.
25324 	 a vsx function may inline an altivec function, but a no-vsx function
25325 	 must not inline a vsx function.  However, for those options that the
25326 	 callee has explicitly enabled or disabled, then we must enforce that
25327 	 the callee's and caller's options match exactly; see PR70010.  */
25328       if (((caller_isa & callee_isa) == callee_isa)
25329 	  && (caller_isa & explicit_isa) == (callee_isa & explicit_isa))
25330 	ret = true;
25331     }
25332 
25333   if (TARGET_DEBUG_TARGET)
25334     fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
25335 	     get_decl_name (caller), get_decl_name (callee),
25336 	     (ret ? "can" : "cannot"));
25337 
25338   return ret;
25339 }
25340 
25341 /* Allocate a stack temp and fixup the address so it meets the particular
25342    memory requirements (either offetable or REG+REG addressing).  */
25343 
25344 rtx
rs6000_allocate_stack_temp(machine_mode mode,bool offsettable_p,bool reg_reg_p)25345 rs6000_allocate_stack_temp (machine_mode mode,
25346 			    bool offsettable_p,
25347 			    bool reg_reg_p)
25348 {
25349   rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
25350   rtx addr = XEXP (stack, 0);
25351   int strict_p = reload_completed;
25352 
25353   if (!legitimate_indirect_address_p (addr, strict_p))
25354     {
25355       if (offsettable_p
25356 	  && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
25357 	stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
25358 
25359       else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
25360 	stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
25361     }
25362 
25363   return stack;
25364 }
25365 
25366 /* Given a memory reference, if it is not a reg or reg+reg addressing,
25367    convert to such a form to deal with memory reference instructions
25368    like STFIWX and LDBRX that only take reg+reg addressing.  */
25369 
25370 rtx
rs6000_force_indexed_or_indirect_mem(rtx x)25371 rs6000_force_indexed_or_indirect_mem (rtx x)
25372 {
25373   machine_mode mode = GET_MODE (x);
25374 
25375   gcc_assert (MEM_P (x));
25376   if (can_create_pseudo_p () && !indexed_or_indirect_operand (x, mode))
25377     {
25378       rtx addr = XEXP (x, 0);
25379       if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
25380 	{
25381 	  rtx reg = XEXP (addr, 0);
25382 	  HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
25383 	  rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
25384 	  gcc_assert (REG_P (reg));
25385 	  emit_insn (gen_add3_insn (reg, reg, size_rtx));
25386 	  addr = reg;
25387 	}
25388       else if (GET_CODE (addr) == PRE_MODIFY)
25389 	{
25390 	  rtx reg = XEXP (addr, 0);
25391 	  rtx expr = XEXP (addr, 1);
25392 	  gcc_assert (REG_P (reg));
25393 	  gcc_assert (GET_CODE (expr) == PLUS);
25394 	  emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
25395 	  addr = reg;
25396 	}
25397 
25398       if (GET_CODE (addr) == PLUS)
25399 	{
25400 	  rtx op0 = XEXP (addr, 0);
25401 	  rtx op1 = XEXP (addr, 1);
25402 	  op0 = force_reg (Pmode, op0);
25403 	  op1 = force_reg (Pmode, op1);
25404 	  x = replace_equiv_address (x, gen_rtx_PLUS (Pmode, op0, op1));
25405 	}
25406       else
25407 	x = replace_equiv_address (x, force_reg (Pmode, addr));
25408     }
25409 
25410   return x;
25411 }
25412 
25413 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
25414 
25415    On the RS/6000, all integer constants are acceptable, most won't be valid
25416    for particular insns, though.  Only easy FP constants are acceptable.  */
25417 
25418 static bool
rs6000_legitimate_constant_p(machine_mode mode,rtx x)25419 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
25420 {
25421   if (TARGET_ELF && tls_referenced_p (x))
25422     return false;
25423 
25424   if (CONST_DOUBLE_P (x))
25425     return easy_fp_constant (x, mode);
25426 
25427   if (GET_CODE (x) == CONST_VECTOR)
25428     return easy_vector_constant (x, mode);
25429 
25430   return true;
25431 }
25432 
25433 #if TARGET_AIX_OS
25434 /* Implement TARGET_PRECOMPUTE_TLS_P.
25435 
25436    On the AIX, TLS symbols are in the TOC, which is maintained in the
25437    constant pool.  AIX TOC TLS symbols need to be pre-computed, but
25438    must be considered legitimate constants.  */
25439 
25440 static bool
rs6000_aix_precompute_tls_p(machine_mode mode ATTRIBUTE_UNUSED,rtx x)25441 rs6000_aix_precompute_tls_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
25442 {
25443   return tls_referenced_p (x);
25444 }
25445 #endif
25446 
25447 
25448 /* Return TRUE iff the sequence ending in LAST sets the static chain.  */
25449 
25450 static bool
chain_already_loaded(rtx_insn * last)25451 chain_already_loaded (rtx_insn *last)
25452 {
25453   for (; last != NULL; last = PREV_INSN (last))
25454     {
25455       if (NONJUMP_INSN_P (last))
25456 	{
25457 	  rtx patt = PATTERN (last);
25458 
25459 	  if (GET_CODE (patt) == SET)
25460 	    {
25461 	      rtx lhs = XEXP (patt, 0);
25462 
25463 	      if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
25464 		return true;
25465 	    }
25466 	}
25467     }
25468   return false;
25469 }
25470 
25471 /* Expand code to perform a call under the AIX or ELFv2 ABI.  */
25472 
25473 void
rs6000_call_aix(rtx value,rtx func_desc,rtx tlsarg,rtx cookie)25474 rs6000_call_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25475 {
25476   rtx func = func_desc;
25477   rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
25478   rtx toc_load = NULL_RTX;
25479   rtx toc_restore = NULL_RTX;
25480   rtx func_addr;
25481   rtx abi_reg = NULL_RTX;
25482   rtx call[5];
25483   int n_call;
25484   rtx insn;
25485   bool is_pltseq_longcall;
25486 
25487   if (global_tlsarg)
25488     tlsarg = global_tlsarg;
25489 
25490   /* Handle longcall attributes.  */
25491   is_pltseq_longcall = false;
25492   if ((INTVAL (cookie) & CALL_LONG) != 0
25493       && GET_CODE (func_desc) == SYMBOL_REF)
25494     {
25495       func = rs6000_longcall_ref (func_desc, tlsarg);
25496       if (TARGET_PLTSEQ)
25497 	is_pltseq_longcall = true;
25498     }
25499 
25500   /* Handle indirect calls.  */
25501   if (!SYMBOL_REF_P (func)
25502       || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func)))
25503     {
25504       if (!rs6000_pcrel_p ())
25505 	{
25506 	  /* Save the TOC into its reserved slot before the call,
25507 	     and prepare to restore it after the call.  */
25508 	  rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
25509 	  rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
25510 						 gen_rtvec (1, stack_toc_offset),
25511 						 UNSPEC_TOCSLOT);
25512 	  toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
25513 
25514 	  /* Can we optimize saving the TOC in the prologue or
25515 	     do we need to do it at every call?  */
25516 	  if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
25517 	    cfun->machine->save_toc_in_prologue = true;
25518 	  else
25519 	    {
25520 	      rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
25521 	      rtx stack_toc_mem = gen_frame_mem (Pmode,
25522 						 gen_rtx_PLUS (Pmode, stack_ptr,
25523 							       stack_toc_offset));
25524 	      MEM_VOLATILE_P (stack_toc_mem) = 1;
25525 	      if (is_pltseq_longcall)
25526 		{
25527 		  rtvec v = gen_rtvec (3, toc_reg, func_desc, tlsarg);
25528 		  rtx mark_toc_reg = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25529 		  emit_insn (gen_rtx_SET (stack_toc_mem, mark_toc_reg));
25530 		}
25531 	      else
25532 		emit_move_insn (stack_toc_mem, toc_reg);
25533 	    }
25534 	}
25535 
25536       if (DEFAULT_ABI == ABI_ELFv2)
25537 	{
25538 	  /* A function pointer in the ELFv2 ABI is just a plain address, but
25539 	     the ABI requires it to be loaded into r12 before the call.  */
25540 	  func_addr = gen_rtx_REG (Pmode, 12);
25541 	  emit_move_insn (func_addr, func);
25542 	  abi_reg = func_addr;
25543 	  /* Indirect calls via CTR are strongly preferred over indirect
25544 	     calls via LR, so move the address there.  Needed to mark
25545 	     this insn for linker plt sequence editing too.  */
25546 	  func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25547 	  if (is_pltseq_longcall)
25548 	    {
25549 	      rtvec v = gen_rtvec (3, abi_reg, func_desc, tlsarg);
25550 	      rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25551 	      emit_insn (gen_rtx_SET (func_addr, mark_func));
25552 	      v = gen_rtvec (2, func_addr, func_desc);
25553 	      func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25554 	    }
25555 	  else
25556 	    emit_move_insn (func_addr, abi_reg);
25557 	}
25558       else
25559 	{
25560 	  /* A function pointer under AIX is a pointer to a data area whose
25561 	     first word contains the actual address of the function, whose
25562 	     second word contains a pointer to its TOC, and whose third word
25563 	     contains a value to place in the static chain register (r11).
25564 	     Note that if we load the static chain, our "trampoline" need
25565 	     not have any executable code.  */
25566 
25567 	  /* Load up address of the actual function.  */
25568 	  func = force_reg (Pmode, func);
25569 	  func_addr = gen_reg_rtx (Pmode);
25570 	  emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func));
25571 
25572 	  /* Indirect calls via CTR are strongly preferred over indirect
25573 	     calls via LR, so move the address there.  */
25574 	  rtx ctr_reg = gen_rtx_REG (Pmode, CTR_REGNO);
25575 	  emit_move_insn (ctr_reg, func_addr);
25576 	  func_addr = ctr_reg;
25577 
25578 	  /* Prepare to load the TOC of the called function.  Note that the
25579 	     TOC load must happen immediately before the actual call so
25580 	     that unwinding the TOC registers works correctly.  See the
25581 	     comment in frob_update_context.  */
25582 	  rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
25583 	  rtx func_toc_mem = gen_rtx_MEM (Pmode,
25584 					  gen_rtx_PLUS (Pmode, func,
25585 							func_toc_offset));
25586 	  toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
25587 
25588 	  /* If we have a static chain, load it up.  But, if the call was
25589 	     originally direct, the 3rd word has not been written since no
25590 	     trampoline has been built, so we ought not to load it, lest we
25591 	     override a static chain value.  */
25592 	  if (!(GET_CODE (func_desc) == SYMBOL_REF
25593 		&& SYMBOL_REF_FUNCTION_P (func_desc))
25594 	      && TARGET_POINTERS_TO_NESTED_FUNCTIONS
25595 	      && !chain_already_loaded (get_current_sequence ()->next->last))
25596 	    {
25597 	      rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
25598 	      rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
25599 	      rtx func_sc_mem = gen_rtx_MEM (Pmode,
25600 					     gen_rtx_PLUS (Pmode, func,
25601 							   func_sc_offset));
25602 	      emit_move_insn (sc_reg, func_sc_mem);
25603 	      abi_reg = sc_reg;
25604 	    }
25605 	}
25606     }
25607   else
25608     {
25609       /* No TOC register needed for calls from PC-relative callers.  */
25610       if (!rs6000_pcrel_p ())
25611 	/* Direct calls use the TOC: for local calls, the callee will
25612 	   assume the TOC register is set; for non-local calls, the
25613 	   PLT stub needs the TOC register.  */
25614 	abi_reg = toc_reg;
25615       func_addr = func;
25616     }
25617 
25618   /* Create the call.  */
25619   call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25620   if (value != NULL_RTX)
25621     call[0] = gen_rtx_SET (value, call[0]);
25622   call[1] = gen_rtx_USE (VOIDmode, cookie);
25623   n_call = 2;
25624 
25625   if (toc_load)
25626     call[n_call++] = toc_load;
25627   if (toc_restore)
25628     call[n_call++] = toc_restore;
25629 
25630   call[n_call++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
25631 
25632   insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
25633   insn = emit_call_insn (insn);
25634 
25635   /* Mention all registers defined by the ABI to hold information
25636      as uses in CALL_INSN_FUNCTION_USAGE.  */
25637   if (abi_reg)
25638     use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
25639 }
25640 
25641 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI.  */
25642 
25643 void
rs6000_sibcall_aix(rtx value,rtx func_desc,rtx tlsarg,rtx cookie)25644 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25645 {
25646   rtx call[2];
25647   rtx insn;
25648   rtx r12 = NULL_RTX;
25649   rtx func_addr = func_desc;
25650 
25651   if (global_tlsarg)
25652     tlsarg = global_tlsarg;
25653 
25654   /* Handle longcall attributes.  */
25655   if (INTVAL (cookie) & CALL_LONG && SYMBOL_REF_P (func_desc))
25656     {
25657       /* PCREL can do a sibling call to a longcall function
25658 	 because we don't need to restore the TOC register.  */
25659       gcc_assert (rs6000_pcrel_p ());
25660       func_desc = rs6000_longcall_ref (func_desc, tlsarg);
25661     }
25662   else
25663     gcc_assert (INTVAL (cookie) == 0);
25664 
25665   /* For ELFv2, r12 and CTR need to hold the function address
25666      for an indirect call.  */
25667   if (GET_CODE (func_desc) != SYMBOL_REF && DEFAULT_ABI == ABI_ELFv2)
25668     {
25669       r12 = gen_rtx_REG (Pmode, 12);
25670       emit_move_insn (r12, func_desc);
25671       func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25672       emit_move_insn (func_addr, r12);
25673     }
25674 
25675   /* Create the call.  */
25676   call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25677   if (value != NULL_RTX)
25678     call[0] = gen_rtx_SET (value, call[0]);
25679 
25680   call[1] = simple_return_rtx;
25681 
25682   insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
25683   insn = emit_call_insn (insn);
25684 
25685   /* Note use of the TOC register.  */
25686   if (!rs6000_pcrel_p ())
25687     use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
25688 	     gen_rtx_REG (Pmode, TOC_REGNUM));
25689 
25690   /* Note use of r12.  */
25691   if (r12)
25692     use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r12);
25693 }
25694 
25695 /* Expand code to perform a call under the SYSV4 ABI.  */
25696 
25697 void
rs6000_call_sysv(rtx value,rtx func_desc,rtx tlsarg,rtx cookie)25698 rs6000_call_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25699 {
25700   rtx func = func_desc;
25701   rtx func_addr;
25702   rtx call[4];
25703   rtx insn;
25704   rtx abi_reg = NULL_RTX;
25705   int n;
25706 
25707   if (global_tlsarg)
25708     tlsarg = global_tlsarg;
25709 
25710   /* Handle longcall attributes.  */
25711   if ((INTVAL (cookie) & CALL_LONG) != 0
25712       && GET_CODE (func_desc) == SYMBOL_REF)
25713     {
25714       func = rs6000_longcall_ref (func_desc, tlsarg);
25715       /* If the longcall was implemented as an inline PLT call using
25716 	 PLT unspecs then func will be REG:r11.  If not, func will be
25717 	 a pseudo reg.  The inline PLT call sequence supports lazy
25718 	 linking (and longcalls to functions in dlopen'd libraries).
25719 	 The other style of longcalls don't.  The lazy linking entry
25720 	 to the dynamic symbol resolver requires r11 be the function
25721 	 address (as it is for linker generated PLT stubs).  Ensure
25722 	 r11 stays valid to the bctrl by marking r11 used by the call.  */
25723       if (TARGET_PLTSEQ)
25724 	abi_reg = func;
25725     }
25726 
25727   /* Handle indirect calls.  */
25728   if (GET_CODE (func) != SYMBOL_REF)
25729     {
25730       func = force_reg (Pmode, func);
25731 
25732       /* Indirect calls via CTR are strongly preferred over indirect
25733 	 calls via LR, so move the address there.  That can't be left
25734 	 to reload because we want to mark every instruction in an
25735 	 inline PLT call sequence with a reloc, enabling the linker to
25736 	 edit the sequence back to a direct call when that makes sense.  */
25737       func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25738       if (abi_reg)
25739 	{
25740 	  rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
25741 	  rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25742 	  emit_insn (gen_rtx_SET (func_addr, mark_func));
25743 	  v = gen_rtvec (2, func_addr, func_desc);
25744 	  func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25745 	}
25746       else
25747 	emit_move_insn (func_addr, func);
25748     }
25749   else
25750     func_addr = func;
25751 
25752   /* Create the call.  */
25753   call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25754   if (value != NULL_RTX)
25755     call[0] = gen_rtx_SET (value, call[0]);
25756 
25757   call[1] = gen_rtx_USE (VOIDmode, cookie);
25758   n = 2;
25759   if (TARGET_SECURE_PLT
25760       && flag_pic
25761       && GET_CODE (func_addr) == SYMBOL_REF
25762       && !SYMBOL_REF_LOCAL_P (func_addr))
25763     call[n++] = gen_rtx_USE (VOIDmode, pic_offset_table_rtx);
25764 
25765   call[n++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
25766 
25767   insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n, call));
25768   insn = emit_call_insn (insn);
25769   if (abi_reg)
25770     use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
25771 }
25772 
25773 /* Expand code to perform a sibling call under the SysV4 ABI.  */
25774 
25775 void
rs6000_sibcall_sysv(rtx value,rtx func_desc,rtx tlsarg,rtx cookie)25776 rs6000_sibcall_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25777 {
25778   rtx func = func_desc;
25779   rtx func_addr;
25780   rtx call[3];
25781   rtx insn;
25782   rtx abi_reg = NULL_RTX;
25783 
25784   if (global_tlsarg)
25785     tlsarg = global_tlsarg;
25786 
25787   /* Handle longcall attributes.  */
25788   if ((INTVAL (cookie) & CALL_LONG) != 0
25789       && GET_CODE (func_desc) == SYMBOL_REF)
25790     {
25791       func = rs6000_longcall_ref (func_desc, tlsarg);
25792       /* If the longcall was implemented as an inline PLT call using
25793 	 PLT unspecs then func will be REG:r11.  If not, func will be
25794 	 a pseudo reg.  The inline PLT call sequence supports lazy
25795 	 linking (and longcalls to functions in dlopen'd libraries).
25796 	 The other style of longcalls don't.  The lazy linking entry
25797 	 to the dynamic symbol resolver requires r11 be the function
25798 	 address (as it is for linker generated PLT stubs).  Ensure
25799 	 r11 stays valid to the bctr by marking r11 used by the call.  */
25800       if (TARGET_PLTSEQ)
25801 	abi_reg = func;
25802     }
25803 
25804   /* Handle indirect calls.  */
25805   if (GET_CODE (func) != SYMBOL_REF)
25806     {
25807       func = force_reg (Pmode, func);
25808 
25809       /* Indirect sibcalls must go via CTR.  That can't be left to
25810 	 reload because we want to mark every instruction in an inline
25811 	 PLT call sequence with a reloc, enabling the linker to edit
25812 	 the sequence back to a direct call when that makes sense.  */
25813       func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25814       if (abi_reg)
25815 	{
25816 	  rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
25817 	  rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25818 	  emit_insn (gen_rtx_SET (func_addr, mark_func));
25819 	  v = gen_rtvec (2, func_addr, func_desc);
25820 	  func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25821 	}
25822       else
25823 	emit_move_insn (func_addr, func);
25824     }
25825   else
25826     func_addr = func;
25827 
25828   /* Create the call.  */
25829   call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25830   if (value != NULL_RTX)
25831     call[0] = gen_rtx_SET (value, call[0]);
25832 
25833   call[1] = gen_rtx_USE (VOIDmode, cookie);
25834   call[2] = simple_return_rtx;
25835 
25836   insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
25837   insn = emit_call_insn (insn);
25838   if (abi_reg)
25839     use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
25840 }
25841 
25842 #if TARGET_MACHO
25843 
25844 /* Expand code to perform a call under the Darwin ABI.
25845    Modulo handling of mlongcall, this is much the same as sysv.
25846    if/when the longcall optimisation is removed, we could drop this
25847    code and use the sysv case (taking care to avoid the tls stuff).
25848 
25849    We can use this for sibcalls too, if needed.  */
25850 
25851 void
rs6000_call_darwin_1(rtx value,rtx func_desc,rtx tlsarg,rtx cookie,bool sibcall)25852 rs6000_call_darwin_1 (rtx value, rtx func_desc, rtx tlsarg,
25853 		      rtx cookie, bool sibcall)
25854 {
25855   rtx func = func_desc;
25856   rtx func_addr;
25857   rtx call[3];
25858   rtx insn;
25859   int cookie_val = INTVAL (cookie);
25860   bool make_island = false;
25861 
25862   /* Handle longcall attributes, there are two cases for Darwin:
25863      1) Newer linkers are capable of synthesising any branch islands needed.
25864      2) We need a helper branch island synthesised by the compiler.
25865      The second case has mostly been retired and we don't use it for m64.
25866      In fact, it's is an optimisation, we could just indirect as sysv does..
25867      ... however, backwards compatibility for now.
25868      If we're going to use this, then we need to keep the CALL_LONG bit set,
25869      so that we can pick up the special insn form later.  */
25870   if ((cookie_val & CALL_LONG) != 0
25871       && GET_CODE (func_desc) == SYMBOL_REF)
25872     {
25873       /* FIXME: the longcall opt should not hang off this flag, it is most
25874 	 likely incorrect for kernel-mode code-generation.  */
25875       if (darwin_symbol_stubs && TARGET_32BIT)
25876 	make_island = true; /* Do nothing yet, retain the CALL_LONG flag.  */
25877       else
25878 	{
25879 	  /* The linker is capable of doing this, but the user explicitly
25880 	     asked for -mlongcall, so we'll do the 'normal' version.  */
25881 	  func = rs6000_longcall_ref (func_desc, NULL_RTX);
25882 	  cookie_val &= ~CALL_LONG; /* Handled, zap it.  */
25883 	}
25884     }
25885 
25886   /* Handle indirect calls.  */
25887   if (GET_CODE (func) != SYMBOL_REF)
25888     {
25889       func = force_reg (Pmode, func);
25890 
25891       /* Indirect calls via CTR are strongly preferred over indirect
25892 	 calls via LR, and are required for indirect sibcalls, so move
25893 	 the address there.   */
25894       func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25895       emit_move_insn (func_addr, func);
25896     }
25897   else
25898     func_addr = func;
25899 
25900   /* Create the call.  */
25901   call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25902   if (value != NULL_RTX)
25903     call[0] = gen_rtx_SET (value, call[0]);
25904 
25905   call[1] = gen_rtx_USE (VOIDmode, GEN_INT (cookie_val));
25906 
25907   if (sibcall)
25908     call[2] = simple_return_rtx;
25909   else
25910     call[2] = gen_hard_reg_clobber (Pmode, LR_REGNO);
25911 
25912   insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
25913   insn = emit_call_insn (insn);
25914   /* Now we have the debug info in the insn, we can set up the branch island
25915      if we're using one.  */
25916   if (make_island)
25917     {
25918       tree funname = get_identifier (XSTR (func_desc, 0));
25919 
25920       if (no_previous_def (funname))
25921 	{
25922 	  rtx label_rtx = gen_label_rtx ();
25923 	  char *label_buf, temp_buf[256];
25924 	  ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
25925 				       CODE_LABEL_NUMBER (label_rtx));
25926 	  label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
25927 	  tree labelname = get_identifier (label_buf);
25928 	  add_compiler_branch_island (labelname, funname,
25929 				     insn_line ((const rtx_insn*)insn));
25930 	}
25931      }
25932 }
25933 #endif
25934 
25935 void
rs6000_call_darwin(rtx value ATTRIBUTE_UNUSED,rtx func_desc ATTRIBUTE_UNUSED,rtx tlsarg ATTRIBUTE_UNUSED,rtx cookie ATTRIBUTE_UNUSED)25936 rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
25937 		    rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
25938 {
25939 #if TARGET_MACHO
25940   rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, false);
25941 #else
25942   gcc_unreachable();
25943 #endif
25944 }
25945 
25946 
25947 void
rs6000_sibcall_darwin(rtx value ATTRIBUTE_UNUSED,rtx func_desc ATTRIBUTE_UNUSED,rtx tlsarg ATTRIBUTE_UNUSED,rtx cookie ATTRIBUTE_UNUSED)25948 rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
25949 		       rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
25950 {
25951 #if TARGET_MACHO
25952   rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, true);
25953 #else
25954   gcc_unreachable();
25955 #endif
25956 }
25957 
25958 /* Return whether we should generate PC-relative code for FNDECL.  */
25959 bool
rs6000_fndecl_pcrel_p(const_tree fndecl)25960 rs6000_fndecl_pcrel_p (const_tree fndecl)
25961 {
25962   if (DEFAULT_ABI != ABI_ELFv2)
25963     return false;
25964 
25965   struct cl_target_option *opts = target_opts_for_fn (fndecl);
25966 
25967   return ((opts->x_rs6000_isa_flags & OPTION_MASK_PCREL) != 0
25968 	  && TARGET_CMODEL == CMODEL_MEDIUM);
25969 }
25970 
25971 /* Return whether we should generate PC-relative code for *FN.  */
25972 bool
rs6000_function_pcrel_p(struct function * fn)25973 rs6000_function_pcrel_p (struct function *fn)
25974 {
25975   if (DEFAULT_ABI != ABI_ELFv2)
25976     return false;
25977 
25978   /* Optimize usual case.  */
25979   if (fn == cfun)
25980     return ((rs6000_isa_flags & OPTION_MASK_PCREL) != 0
25981 	    && TARGET_CMODEL == CMODEL_MEDIUM);
25982 
25983   return rs6000_fndecl_pcrel_p (fn->decl);
25984 }
25985 
25986 /* Return whether we should generate PC-relative code for the current
25987    function.  */
25988 bool
rs6000_pcrel_p()25989 rs6000_pcrel_p ()
25990 {
25991   return (DEFAULT_ABI == ABI_ELFv2
25992 	  && (rs6000_isa_flags & OPTION_MASK_PCREL) != 0
25993 	  && TARGET_CMODEL == CMODEL_MEDIUM);
25994 }
25995 
25996 
25997 /* Given an address (ADDR), a mode (MODE), and what the format of the
25998    non-prefixed address (NON_PREFIXED_FORMAT) is, return the instruction format
25999    for the address.  */
26000 
26001 enum insn_form
address_to_insn_form(rtx addr,machine_mode mode,enum non_prefixed_form non_prefixed_format)26002 address_to_insn_form (rtx addr,
26003 		      machine_mode mode,
26004 		      enum non_prefixed_form non_prefixed_format)
26005 {
26006   /* Single register is easy.  */
26007   if (REG_P (addr) || SUBREG_P (addr))
26008     return INSN_FORM_BASE_REG;
26009 
26010   /* If the non prefixed instruction format doesn't support offset addressing,
26011      make sure only indexed addressing is allowed.
26012 
26013      We special case SDmode so that the register allocator does not try to move
26014      SDmode through GPR registers, but instead uses the 32-bit integer load and
26015      store instructions for the floating point registers.  */
26016   if (non_prefixed_format == NON_PREFIXED_X || (mode == SDmode && TARGET_DFP))
26017     {
26018       if (GET_CODE (addr) != PLUS)
26019 	return INSN_FORM_BAD;
26020 
26021       rtx op0 = XEXP (addr, 0);
26022       rtx op1 = XEXP (addr, 1);
26023       if (!REG_P (op0) && !SUBREG_P (op0))
26024 	return INSN_FORM_BAD;
26025 
26026       if (!REG_P (op1) && !SUBREG_P (op1))
26027 	return INSN_FORM_BAD;
26028 
26029       return INSN_FORM_X;
26030     }
26031 
26032   /* Deal with update forms.  */
26033   if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
26034     return INSN_FORM_UPDATE;
26035 
26036   /* Handle PC-relative symbols and labels.  Check for both local and
26037      external symbols.  Assume labels are always local.  TLS symbols
26038      are not PC-relative for rs6000.  */
26039   if (TARGET_PCREL)
26040     {
26041       if (LABEL_REF_P (addr))
26042 	return INSN_FORM_PCREL_LOCAL;
26043 
26044       if (SYMBOL_REF_P (addr) && !SYMBOL_REF_TLS_MODEL (addr))
26045 	{
26046 	  if (!SYMBOL_REF_LOCAL_P (addr))
26047 	    return INSN_FORM_PCREL_EXTERNAL;
26048 	  else
26049 	    return INSN_FORM_PCREL_LOCAL;
26050 	}
26051     }
26052 
26053   if (GET_CODE (addr) == CONST)
26054     addr = XEXP (addr, 0);
26055 
26056   /* Recognize LO_SUM addresses used with TOC and 32-bit addressing.  */
26057   if (GET_CODE (addr) == LO_SUM)
26058     return INSN_FORM_LO_SUM;
26059 
26060   /* Everything below must be an offset address of some form.  */
26061   if (GET_CODE (addr) != PLUS)
26062     return INSN_FORM_BAD;
26063 
26064   rtx op0 = XEXP (addr, 0);
26065   rtx op1 = XEXP (addr, 1);
26066 
26067   /* Check for indexed addresses.  */
26068   if (REG_P (op1) || SUBREG_P (op1))
26069     {
26070       if (REG_P (op0) || SUBREG_P (op0))
26071 	return INSN_FORM_X;
26072 
26073       return INSN_FORM_BAD;
26074     }
26075 
26076   if (!CONST_INT_P (op1))
26077     return INSN_FORM_BAD;
26078 
26079   HOST_WIDE_INT offset = INTVAL (op1);
26080   if (!SIGNED_INTEGER_34BIT_P (offset))
26081     return INSN_FORM_BAD;
26082 
26083   /* Check for local and external PC-relative addresses.  Labels are always
26084      local.  TLS symbols are not PC-relative for rs6000.  */
26085   if (TARGET_PCREL)
26086     {
26087       if (LABEL_REF_P (op0))
26088 	return INSN_FORM_PCREL_LOCAL;
26089 
26090       if (SYMBOL_REF_P (op0) && !SYMBOL_REF_TLS_MODEL (op0))
26091 	{
26092 	  if (!SYMBOL_REF_LOCAL_P (op0))
26093 	    return INSN_FORM_PCREL_EXTERNAL;
26094 	  else
26095 	    return INSN_FORM_PCREL_LOCAL;
26096 	}
26097     }
26098 
26099   /* If it isn't PC-relative, the address must use a base register.  */
26100   if (!REG_P (op0) && !SUBREG_P (op0))
26101     return INSN_FORM_BAD;
26102 
26103   /* Large offsets must be prefixed.  */
26104   if (!SIGNED_INTEGER_16BIT_P (offset))
26105     {
26106       if (TARGET_PREFIXED)
26107 	return INSN_FORM_PREFIXED_NUMERIC;
26108 
26109       return INSN_FORM_BAD;
26110     }
26111 
26112   /* We have a 16-bit offset, see what default instruction format to use.  */
26113   if (non_prefixed_format == NON_PREFIXED_DEFAULT)
26114     {
26115       unsigned size = GET_MODE_SIZE (mode);
26116 
26117       /* On 64-bit systems, assume 64-bit integers need to use DS form
26118 	 addresses (for LD/STD).  VSX vectors need to use DQ form addresses
26119 	 (for LXV and STXV).  TImode is problematical in that its normal usage
26120 	 is expected to be GPRs where it wants a DS instruction format, but if
26121 	 it goes into the vector registers, it wants a DQ instruction
26122 	 format.  */
26123       if (TARGET_POWERPC64 && size >= 8 && GET_MODE_CLASS (mode) == MODE_INT)
26124 	non_prefixed_format = NON_PREFIXED_DS;
26125 
26126       else if (TARGET_VSX && size >= 16
26127 	       && (VECTOR_MODE_P (mode) || VECTOR_ALIGNMENT_P (mode)))
26128 	non_prefixed_format = NON_PREFIXED_DQ;
26129 
26130       else
26131 	non_prefixed_format = NON_PREFIXED_D;
26132     }
26133 
26134   /* Classify the D/DS/DQ-form addresses.  */
26135   switch (non_prefixed_format)
26136     {
26137       /* Instruction format D, all 16 bits are valid.  */
26138     case NON_PREFIXED_D:
26139       return INSN_FORM_D;
26140 
26141       /* Instruction format DS, bottom 2 bits must be 0.  */
26142     case NON_PREFIXED_DS:
26143       if ((offset & 3) == 0)
26144 	return INSN_FORM_DS;
26145 
26146       else if (TARGET_PREFIXED)
26147 	return INSN_FORM_PREFIXED_NUMERIC;
26148 
26149       else
26150 	return INSN_FORM_BAD;
26151 
26152       /* Instruction format DQ, bottom 4 bits must be 0.  */
26153     case NON_PREFIXED_DQ:
26154       if ((offset & 15) == 0)
26155 	return INSN_FORM_DQ;
26156 
26157       else if (TARGET_PREFIXED)
26158 	return INSN_FORM_PREFIXED_NUMERIC;
26159 
26160       else
26161 	return INSN_FORM_BAD;
26162 
26163     default:
26164       break;
26165     }
26166 
26167   return INSN_FORM_BAD;
26168 }
26169 
26170 /* Given address rtx ADDR for a load of MODE, is this legitimate for a
26171    non-prefixed D-form or X-form instruction?  NON_PREFIXED_FORMAT is
26172    given NON_PREFIXED_D or NON_PREFIXED_DS to indicate whether we want
26173    a D-form or DS-form instruction.  X-form and base_reg are always
26174    allowed.  */
26175 bool
address_is_non_pfx_d_or_x(rtx addr,machine_mode mode,enum non_prefixed_form non_prefixed_format)26176 address_is_non_pfx_d_or_x (rtx addr, machine_mode mode,
26177 			   enum non_prefixed_form non_prefixed_format)
26178 {
26179   enum insn_form result_form;
26180 
26181   result_form = address_to_insn_form (addr, mode, non_prefixed_format);
26182 
26183   switch (non_prefixed_format)
26184     {
26185     case NON_PREFIXED_D:
26186       switch (result_form)
26187 	{
26188 	case INSN_FORM_X:
26189 	case INSN_FORM_D:
26190 	case INSN_FORM_DS:
26191 	case INSN_FORM_BASE_REG:
26192 	  return true;
26193 	default:
26194 	  return false;
26195 	}
26196       break;
26197     case NON_PREFIXED_DS:
26198       switch (result_form)
26199 	{
26200 	case INSN_FORM_X:
26201 	case INSN_FORM_DS:
26202 	case INSN_FORM_BASE_REG:
26203 	  return true;
26204 	default:
26205 	  return false;
26206 	}
26207       break;
26208     default:
26209       break;
26210     }
26211   return false;
26212 }
26213 
26214 /* Return true if an REG with a given MODE is loaded from or stored into a MEM
26215    location uses a non-prefixed D/DS/DQ-form address.  This is used to validate
26216    the load or store with the PCREL_OPT optimization to make sure it is an
26217    instruction that can be optimized.
26218 
26219    We need to specify the MODE separately from the REG to allow for loads that
26220    include zero/sign/float extension.  */
26221 
26222 bool
pcrel_opt_valid_mem_p(rtx reg,machine_mode mode,rtx mem)26223 pcrel_opt_valid_mem_p (rtx reg, machine_mode mode, rtx mem)
26224 {
26225   /* If the instruction is indexed only like LFIWAX/LXSIWAX we cannot do the
26226      PCREL_OPT optimization.  */
26227   enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mode);
26228   if (non_prefixed == NON_PREFIXED_X)
26229     return false;
26230 
26231   /* Check if this is a non-prefixed D/DS/DQ-form instruction.  */
26232   rtx addr = XEXP (mem, 0);
26233   enum insn_form iform = address_to_insn_form (addr, mode, non_prefixed);
26234   return (iform == INSN_FORM_BASE_REG
26235 	  || iform == INSN_FORM_D
26236 	  || iform == INSN_FORM_DS
26237 	  || iform == INSN_FORM_DQ);
26238 }
26239 
26240 /* Helper function to see if we're potentially looking at lfs/stfs.
26241    - PARALLEL containing a SET and a CLOBBER
26242    - stfs:
26243     - SET is from UNSPEC_SI_FROM_SF to MEM:SI
26244     - CLOBBER is a V4SF
26245    - lfs:
26246     - SET is from UNSPEC_SF_FROM_SI to REG:SF
26247     - CLOBBER is a DI
26248  */
26249 
26250 static bool
is_lfs_stfs_insn(rtx_insn * insn)26251 is_lfs_stfs_insn (rtx_insn *insn)
26252 {
26253   rtx pattern = PATTERN (insn);
26254   if (GET_CODE (pattern) != PARALLEL)
26255     return false;
26256 
26257   /* This should be a parallel with exactly one set and one clobber.  */
26258   if (XVECLEN (pattern, 0) != 2)
26259     return false;
26260 
26261   rtx set = XVECEXP (pattern, 0, 0);
26262   if (GET_CODE (set) != SET)
26263     return false;
26264 
26265   rtx clobber = XVECEXP (pattern, 0, 1);
26266   if (GET_CODE (clobber) != CLOBBER)
26267     return false;
26268 
26269   /* All we care is that the destination of the SET is a mem:SI,
26270      the source should be an UNSPEC_SI_FROM_SF, and the clobber
26271      should be a scratch:V4SF.  */
26272 
26273   rtx dest = SET_DEST (set);
26274   rtx src = SET_SRC (set);
26275   rtx scratch = SET_DEST (clobber);
26276 
26277   if (GET_CODE (src) != UNSPEC)
26278     return false;
26279 
26280   /* stfs case.  */
26281   if (XINT (src, 1) == UNSPEC_SI_FROM_SF
26282       && GET_CODE (dest) == MEM && GET_MODE (dest) == SImode
26283       && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == V4SFmode)
26284     return true;
26285 
26286   /* lfs case.  */
26287   if (XINT (src, 1) == UNSPEC_SF_FROM_SI
26288       && GET_CODE (dest) == REG && GET_MODE (dest) == SFmode
26289       && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == DImode)
26290     return true;
26291 
26292   return false;
26293 }
26294 
26295 /* Helper function to take a REG and a MODE and turn it into the non-prefixed
26296    instruction format (D/DS/DQ) used for offset memory.  */
26297 
26298 enum non_prefixed_form
reg_to_non_prefixed(rtx reg,machine_mode mode)26299 reg_to_non_prefixed (rtx reg, machine_mode mode)
26300 {
26301   /* If it isn't a register, use the defaults.  */
26302   if (!REG_P (reg) && !SUBREG_P (reg))
26303     return NON_PREFIXED_DEFAULT;
26304 
26305   unsigned int r = reg_or_subregno (reg);
26306 
26307   /* If we have a pseudo, use the default instruction format.  */
26308   if (!HARD_REGISTER_NUM_P (r))
26309     return NON_PREFIXED_DEFAULT;
26310 
26311   unsigned size = GET_MODE_SIZE (mode);
26312 
26313   /* FPR registers use D-mode for scalars, and DQ-mode for vectors, IEEE
26314      128-bit floating point, and 128-bit integers.  Before power9, only indexed
26315      addressing was available for vectors.  */
26316   if (FP_REGNO_P (r))
26317     {
26318       if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
26319 	return NON_PREFIXED_D;
26320 
26321       else if (size < 8)
26322 	return NON_PREFIXED_X;
26323 
26324       else if (TARGET_VSX && size >= 16
26325 	       && (VECTOR_MODE_P (mode)
26326 		   || VECTOR_ALIGNMENT_P (mode)
26327 		   || mode == TImode || mode == CTImode))
26328 	return (TARGET_P9_VECTOR) ? NON_PREFIXED_DQ : NON_PREFIXED_X;
26329 
26330       else
26331 	return NON_PREFIXED_DEFAULT;
26332     }
26333 
26334   /* Altivec registers use DS-mode for scalars, and DQ-mode for vectors, IEEE
26335      128-bit floating point, and 128-bit integers.  Before power9, only indexed
26336      addressing was available.  */
26337   else if (ALTIVEC_REGNO_P (r))
26338     {
26339       if (!TARGET_P9_VECTOR)
26340 	return NON_PREFIXED_X;
26341 
26342       if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
26343 	return NON_PREFIXED_DS;
26344 
26345       else if (size < 8)
26346 	return NON_PREFIXED_X;
26347 
26348       else if (TARGET_VSX && size >= 16
26349 	       && (VECTOR_MODE_P (mode)
26350 		   || VECTOR_ALIGNMENT_P (mode)
26351 		   || mode == TImode || mode == CTImode))
26352 	return NON_PREFIXED_DQ;
26353 
26354       else
26355 	return NON_PREFIXED_DEFAULT;
26356     }
26357 
26358   /* GPR registers use DS-mode for 64-bit items on 64-bit systems, and D-mode
26359      otherwise.  Assume that any other register, such as LR, CRs, etc. will go
26360      through the GPR registers for memory operations.  */
26361   else if (TARGET_POWERPC64 && size >= 8)
26362     return NON_PREFIXED_DS;
26363 
26364   return NON_PREFIXED_D;
26365 }
26366 
26367 
26368 /* Whether a load instruction is a prefixed instruction.  This is called from
26369    the prefixed attribute processing.  */
26370 
26371 bool
prefixed_load_p(rtx_insn * insn)26372 prefixed_load_p (rtx_insn *insn)
26373 {
26374   /* Validate the insn to make sure it is a normal load insn.  */
26375   extract_insn_cached (insn);
26376   if (recog_data.n_operands < 2)
26377     return false;
26378 
26379   rtx reg = recog_data.operand[0];
26380   rtx mem = recog_data.operand[1];
26381 
26382   if (!REG_P (reg) && !SUBREG_P (reg))
26383     return false;
26384 
26385   if (!MEM_P (mem))
26386     return false;
26387 
26388   /* Prefixed load instructions do not support update or indexed forms.  */
26389   if (get_attr_indexed (insn) == INDEXED_YES
26390       || get_attr_update (insn) == UPDATE_YES)
26391     return false;
26392 
26393   /* LWA uses the DS format instead of the D format that LWZ uses.  */
26394   enum non_prefixed_form non_prefixed;
26395   machine_mode reg_mode = GET_MODE (reg);
26396   machine_mode mem_mode = GET_MODE (mem);
26397 
26398   if (mem_mode == SImode && reg_mode == DImode
26399       && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
26400     non_prefixed = NON_PREFIXED_DS;
26401 
26402   else
26403     non_prefixed = reg_to_non_prefixed (reg, mem_mode);
26404 
26405   if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
26406     return address_is_prefixed (XEXP (mem, 0), mem_mode, NON_PREFIXED_DEFAULT);
26407   else
26408     return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
26409 }
26410 
26411 /* Whether a store instruction is a prefixed instruction.  This is called from
26412    the prefixed attribute processing.  */
26413 
26414 bool
prefixed_store_p(rtx_insn * insn)26415 prefixed_store_p (rtx_insn *insn)
26416 {
26417   /* Validate the insn to make sure it is a normal store insn.  */
26418   extract_insn_cached (insn);
26419   if (recog_data.n_operands < 2)
26420     return false;
26421 
26422   rtx mem = recog_data.operand[0];
26423   rtx reg = recog_data.operand[1];
26424 
26425   if (!REG_P (reg) && !SUBREG_P (reg))
26426     return false;
26427 
26428   if (!MEM_P (mem))
26429     return false;
26430 
26431   /* Prefixed store instructions do not support update or indexed forms.  */
26432   if (get_attr_indexed (insn) == INDEXED_YES
26433       || get_attr_update (insn) == UPDATE_YES)
26434     return false;
26435 
26436   machine_mode mem_mode = GET_MODE (mem);
26437   rtx addr = XEXP (mem, 0);
26438   enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mem_mode);
26439 
26440   /* Need to make sure we aren't looking at a stfs which doesn't look
26441      like the other things reg_to_non_prefixed/address_is_prefixed
26442      looks for.  */
26443   if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
26444     return address_is_prefixed (addr, mem_mode, NON_PREFIXED_DEFAULT);
26445   else
26446     return address_is_prefixed (addr, mem_mode, non_prefixed);
26447 }
26448 
26449 /* Whether a load immediate or add instruction is a prefixed instruction.  This
26450    is called from the prefixed attribute processing.  */
26451 
26452 bool
prefixed_paddi_p(rtx_insn * insn)26453 prefixed_paddi_p (rtx_insn *insn)
26454 {
26455   rtx set = single_set (insn);
26456   if (!set)
26457     return false;
26458 
26459   rtx dest = SET_DEST (set);
26460   rtx src = SET_SRC (set);
26461 
26462   if (!REG_P (dest) && !SUBREG_P (dest))
26463     return false;
26464 
26465   /* Is this a load immediate that can't be done with a simple ADDI or
26466      ADDIS?  */
26467   if (CONST_INT_P (src))
26468     return (satisfies_constraint_eI (src)
26469 	    && !satisfies_constraint_I (src)
26470 	    && !satisfies_constraint_L (src));
26471 
26472   /* Is this a PADDI instruction that can't be done with a simple ADDI or
26473      ADDIS?  */
26474   if (GET_CODE (src) == PLUS)
26475     {
26476       rtx op1 = XEXP (src, 1);
26477 
26478       return (CONST_INT_P (op1)
26479 	      && satisfies_constraint_eI (op1)
26480 	      && !satisfies_constraint_I (op1)
26481 	      && !satisfies_constraint_L (op1));
26482     }
26483 
26484   /* If not, is it a load of a PC-relative address?  */
26485   if (!TARGET_PCREL || GET_MODE (dest) != Pmode)
26486     return false;
26487 
26488   if (!SYMBOL_REF_P (src) && !LABEL_REF_P (src) && GET_CODE (src) != CONST)
26489     return false;
26490 
26491   enum insn_form iform = address_to_insn_form (src, Pmode,
26492 					       NON_PREFIXED_DEFAULT);
26493 
26494   return (iform == INSN_FORM_PCREL_EXTERNAL || iform == INSN_FORM_PCREL_LOCAL);
26495 }
26496 
26497 /* Whether the next instruction needs a 'p' prefix issued before the
26498    instruction is printed out.  */
26499 static bool prepend_p_to_next_insn;
26500 
26501 /* Define FINAL_PRESCAN_INSN if some processing needs to be done before
26502    outputting the assembler code.  On the PowerPC, we remember if the current
26503    insn is a prefixed insn where we need to emit a 'p' before the insn.
26504 
26505    In addition, if the insn is part of a PC-relative reference to an external
26506    label optimization, this is recorded also.  */
26507 void
rs6000_final_prescan_insn(rtx_insn * insn,rtx[],int)26508 rs6000_final_prescan_insn (rtx_insn *insn, rtx [], int)
26509 {
26510   prepend_p_to_next_insn = (get_attr_maybe_prefixed (insn)
26511 			    == MAYBE_PREFIXED_YES
26512 			    && get_attr_prefixed (insn) == PREFIXED_YES);
26513   return;
26514 }
26515 
26516 /* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode.
26517    We use it to emit a 'p' for prefixed insns that is set in
26518    FINAL_PRESCAN_INSN.  */
26519 void
rs6000_asm_output_opcode(FILE * stream)26520 rs6000_asm_output_opcode (FILE *stream)
26521 {
26522   if (prepend_p_to_next_insn)
26523     {
26524       fprintf (stream, "p");
26525 
26526       /* Reset the flag in the case where there are separate insn lines in the
26527 	 sequence, so the 'p' is only emitted for the first line.  This shows up
26528 	 when we are doing the PCREL_OPT optimization, in that the label created
26529 	 with %r<n> would have a leading 'p' printed.  */
26530       prepend_p_to_next_insn = false;
26531     }
26532 
26533   return;
26534 }
26535 
26536 /* Emit the relocation to tie the next instruction to a previous instruction
26537    that loads up an external address.  This is used to do the PCREL_OPT
26538    optimization.  Note, the label is generated after the PLD of the got
26539    pc-relative address to allow for the assembler to insert NOPs before the PLD
26540    instruction.  The operand is a constant integer that is the label
26541    number.  */
26542 
26543 void
output_pcrel_opt_reloc(rtx label_num)26544 output_pcrel_opt_reloc (rtx label_num)
26545 {
26546   rtx operands[1] = { label_num };
26547   output_asm_insn (".reloc .Lpcrel%0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel%0-8)",
26548 		   operands);
26549 }
26550 
26551 /* Adjust the length of an INSN.  LENGTH is the currently-computed length and
26552    should be adjusted to reflect any required changes.  This macro is used when
26553    there is some systematic length adjustment required that would be difficult
26554    to express in the length attribute.
26555 
26556    In the PowerPC, we use this to adjust the length of an instruction if one or
26557    more prefixed instructions are generated, using the attribute
26558    num_prefixed_insns.  A prefixed instruction is 8 bytes instead of 4, but the
26559    hardware requires that a prefied instruciton does not cross a 64-byte
26560    boundary.  This means the compiler has to assume the length of the first
26561    prefixed instruction is 12 bytes instead of 8 bytes.  Since the length is
26562    already set for the non-prefixed instruction, we just need to udpate for the
26563    difference.  */
26564 
26565 int
rs6000_adjust_insn_length(rtx_insn * insn,int length)26566 rs6000_adjust_insn_length (rtx_insn *insn, int length)
26567 {
26568   if (TARGET_PREFIXED && NONJUMP_INSN_P (insn))
26569     {
26570       rtx pattern = PATTERN (insn);
26571       if (GET_CODE (pattern) != USE && GET_CODE (pattern) != CLOBBER
26572 	  && get_attr_prefixed (insn) == PREFIXED_YES)
26573 	{
26574 	  int num_prefixed = get_attr_max_prefixed_insns (insn);
26575 	  length += 4 * (num_prefixed + 1);
26576 	}
26577     }
26578 
26579   return length;
26580 }
26581 
26582 
26583 #ifdef HAVE_GAS_HIDDEN
26584 # define USE_HIDDEN_LINKONCE 1
26585 #else
26586 # define USE_HIDDEN_LINKONCE 0
26587 #endif
26588 
26589 /* Fills in the label name that should be used for a 476 link stack thunk.  */
26590 
26591 void
get_ppc476_thunk_name(char name[32])26592 get_ppc476_thunk_name (char name[32])
26593 {
26594   gcc_assert (TARGET_LINK_STACK);
26595 
26596   if (USE_HIDDEN_LINKONCE)
26597     sprintf (name, "__ppc476.get_thunk");
26598   else
26599     ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
26600 }
26601 
26602 /* This function emits the simple thunk routine that is used to preserve
26603    the link stack on the 476 cpu.  */
26604 
26605 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
26606 static void
rs6000_code_end(void)26607 rs6000_code_end (void)
26608 {
26609   char name[32];
26610   tree decl;
26611 
26612   if (!TARGET_LINK_STACK)
26613     return;
26614 
26615   get_ppc476_thunk_name (name);
26616 
26617   decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
26618 		     build_function_type_list (void_type_node, NULL_TREE));
26619   DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
26620 				   NULL_TREE, void_type_node);
26621   TREE_PUBLIC (decl) = 1;
26622   TREE_STATIC (decl) = 1;
26623 
26624 #if RS6000_WEAK
26625   if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
26626     {
26627       cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
26628       targetm.asm_out.unique_section (decl, 0);
26629       switch_to_section (get_named_section (decl, NULL, 0));
26630       DECL_WEAK (decl) = 1;
26631       ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
26632       targetm.asm_out.globalize_label (asm_out_file, name);
26633       targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
26634       ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
26635     }
26636   else
26637 #endif
26638     {
26639       switch_to_section (text_section);
26640       ASM_OUTPUT_LABEL (asm_out_file, name);
26641     }
26642 
26643   DECL_INITIAL (decl) = make_node (BLOCK);
26644   current_function_decl = decl;
26645   allocate_struct_function (decl, false);
26646   init_function_start (decl);
26647   first_function_block_is_cold = false;
26648   /* Make sure unwind info is emitted for the thunk if needed.  */
26649   final_start_function (emit_barrier (), asm_out_file, 1);
26650 
26651   fputs ("\tblr\n", asm_out_file);
26652 
26653   final_end_function ();
26654   init_insn_lengths ();
26655   free_after_compilation (cfun);
26656   set_cfun (NULL);
26657   current_function_decl = NULL;
26658 }
26659 
26660 /* Add r30 to hard reg set if the prologue sets it up and it is not
26661    pic_offset_table_rtx.  */
26662 
26663 static void
rs6000_set_up_by_prologue(struct hard_reg_set_container * set)26664 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
26665 {
26666   if (!TARGET_SINGLE_PIC_BASE
26667       && TARGET_TOC
26668       && TARGET_MINIMAL_TOC
26669       && !constant_pool_empty_p ())
26670     add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
26671   if (cfun->machine->split_stack_argp_used)
26672     add_to_hard_reg_set (&set->set, Pmode, 12);
26673 
26674   /* Make sure the hard reg set doesn't include r2, which was possibly added
26675      via PIC_OFFSET_TABLE_REGNUM.  */
26676   if (TARGET_TOC)
26677     remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM);
26678 }
26679 
26680 
26681 /* Helper function for rs6000_split_logical to emit a logical instruction after
26682    spliting the operation to single GPR registers.
26683 
26684    DEST is the destination register.
26685    OP1 and OP2 are the input source registers.
26686    CODE is the base operation (AND, IOR, XOR, NOT).
26687    MODE is the machine mode.
26688    If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26689    If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26690    If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.  */
26691 
26692 static void
rs6000_split_logical_inner(rtx dest,rtx op1,rtx op2,enum rtx_code code,machine_mode mode,bool complement_final_p,bool complement_op1_p,bool complement_op2_p)26693 rs6000_split_logical_inner (rtx dest,
26694 			    rtx op1,
26695 			    rtx op2,
26696 			    enum rtx_code code,
26697 			    machine_mode mode,
26698 			    bool complement_final_p,
26699 			    bool complement_op1_p,
26700 			    bool complement_op2_p)
26701 {
26702   rtx bool_rtx;
26703 
26704   /* Optimize AND of 0/0xffffffff and IOR/XOR of 0.  */
26705   if (op2 && CONST_INT_P (op2)
26706       && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
26707       && !complement_final_p && !complement_op1_p && !complement_op2_p)
26708     {
26709       HOST_WIDE_INT mask = GET_MODE_MASK (mode);
26710       HOST_WIDE_INT value = INTVAL (op2) & mask;
26711 
26712       /* Optimize AND of 0 to just set 0.  Optimize AND of -1 to be a move.  */
26713       if (code == AND)
26714 	{
26715 	  if (value == 0)
26716 	    {
26717 	      emit_insn (gen_rtx_SET (dest, const0_rtx));
26718 	      return;
26719 	    }
26720 
26721 	  else if (value == mask)
26722 	    {
26723 	      if (!rtx_equal_p (dest, op1))
26724 		emit_insn (gen_rtx_SET (dest, op1));
26725 	      return;
26726 	    }
26727 	}
26728 
26729       /* Optimize IOR/XOR of 0 to be a simple move.  Split large operations
26730 	 into separate ORI/ORIS or XORI/XORIS instrucitons.  */
26731       else if (code == IOR || code == XOR)
26732 	{
26733 	  if (value == 0)
26734 	    {
26735 	      if (!rtx_equal_p (dest, op1))
26736 		emit_insn (gen_rtx_SET (dest, op1));
26737 	      return;
26738 	    }
26739 	}
26740     }
26741 
26742   if (code == AND && mode == SImode
26743       && !complement_final_p && !complement_op1_p && !complement_op2_p)
26744     {
26745       emit_insn (gen_andsi3 (dest, op1, op2));
26746       return;
26747     }
26748 
26749   if (complement_op1_p)
26750     op1 = gen_rtx_NOT (mode, op1);
26751 
26752   if (complement_op2_p)
26753     op2 = gen_rtx_NOT (mode, op2);
26754 
26755   /* For canonical RTL, if only one arm is inverted it is the first.  */
26756   if (!complement_op1_p && complement_op2_p)
26757     std::swap (op1, op2);
26758 
26759   bool_rtx = ((code == NOT)
26760 	      ? gen_rtx_NOT (mode, op1)
26761 	      : gen_rtx_fmt_ee (code, mode, op1, op2));
26762 
26763   if (complement_final_p)
26764     bool_rtx = gen_rtx_NOT (mode, bool_rtx);
26765 
26766   emit_insn (gen_rtx_SET (dest, bool_rtx));
26767 }
26768 
26769 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system.  These
26770    operations are split immediately during RTL generation to allow for more
26771    optimizations of the AND/IOR/XOR.
26772 
26773    OPERANDS is an array containing the destination and two input operands.
26774    CODE is the base operation (AND, IOR, XOR, NOT).
26775    MODE is the machine mode.
26776    If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26777    If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26778    If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
26779    CLOBBER_REG is either NULL or a scratch register of type CC to allow
26780    formation of the AND instructions.  */
26781 
26782 static void
rs6000_split_logical_di(rtx operands[3],enum rtx_code code,bool complement_final_p,bool complement_op1_p,bool complement_op2_p)26783 rs6000_split_logical_di (rtx operands[3],
26784 			 enum rtx_code code,
26785 			 bool complement_final_p,
26786 			 bool complement_op1_p,
26787 			 bool complement_op2_p)
26788 {
26789   const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
26790   const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
26791   const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
26792   enum hi_lo { hi = 0, lo = 1 };
26793   rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
26794   size_t i;
26795 
26796   op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
26797   op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
26798   op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
26799   op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
26800 
26801   if (code == NOT)
26802     op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
26803   else
26804     {
26805       if (!CONST_INT_P (operands[2]))
26806 	{
26807 	  op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
26808 	  op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
26809 	}
26810       else
26811 	{
26812 	  HOST_WIDE_INT value = INTVAL (operands[2]);
26813 	  HOST_WIDE_INT value_hi_lo[2];
26814 
26815 	  gcc_assert (!complement_final_p);
26816 	  gcc_assert (!complement_op1_p);
26817 	  gcc_assert (!complement_op2_p);
26818 
26819 	  value_hi_lo[hi] = value >> 32;
26820 	  value_hi_lo[lo] = value & lower_32bits;
26821 
26822 	  for (i = 0; i < 2; i++)
26823 	    {
26824 	      HOST_WIDE_INT sub_value = value_hi_lo[i];
26825 
26826 	      if (sub_value & sign_bit)
26827 		sub_value |= upper_32bits;
26828 
26829 	      op2_hi_lo[i] = GEN_INT (sub_value);
26830 
26831 	      /* If this is an AND instruction, check to see if we need to load
26832 		 the value in a register.  */
26833 	      if (code == AND && sub_value != -1 && sub_value != 0
26834 		  && !and_operand (op2_hi_lo[i], SImode))
26835 		op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
26836 	    }
26837 	}
26838     }
26839 
26840   for (i = 0; i < 2; i++)
26841     {
26842       /* Split large IOR/XOR operations.  */
26843       if ((code == IOR || code == XOR)
26844 	  && CONST_INT_P (op2_hi_lo[i])
26845 	  && !complement_final_p
26846 	  && !complement_op1_p
26847 	  && !complement_op2_p
26848 	  && !logical_const_operand (op2_hi_lo[i], SImode))
26849 	{
26850 	  HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
26851 	  HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
26852 	  HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
26853 	  rtx tmp = gen_reg_rtx (SImode);
26854 
26855 	  /* Make sure the constant is sign extended.  */
26856 	  if ((hi_16bits & sign_bit) != 0)
26857 	    hi_16bits |= upper_32bits;
26858 
26859 	  rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
26860 				      code, SImode, false, false, false);
26861 
26862 	  rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
26863 				      code, SImode, false, false, false);
26864 	}
26865       else
26866 	rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
26867 				    code, SImode, complement_final_p,
26868 				    complement_op1_p, complement_op2_p);
26869     }
26870 
26871   return;
26872 }
26873 
26874 /* Split the insns that make up boolean operations operating on multiple GPR
26875    registers.  The boolean MD patterns ensure that the inputs either are
26876    exactly the same as the output registers, or there is no overlap.
26877 
26878    OPERANDS is an array containing the destination and two input operands.
26879    CODE is the base operation (AND, IOR, XOR, NOT).
26880    If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26881    If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26882    If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.  */
26883 
26884 void
rs6000_split_logical(rtx operands[3],enum rtx_code code,bool complement_final_p,bool complement_op1_p,bool complement_op2_p)26885 rs6000_split_logical (rtx operands[3],
26886 		      enum rtx_code code,
26887 		      bool complement_final_p,
26888 		      bool complement_op1_p,
26889 		      bool complement_op2_p)
26890 {
26891   machine_mode mode = GET_MODE (operands[0]);
26892   machine_mode sub_mode;
26893   rtx op0, op1, op2;
26894   int sub_size, regno0, regno1, nregs, i;
26895 
26896   /* If this is DImode, use the specialized version that can run before
26897      register allocation.  */
26898   if (mode == DImode && !TARGET_POWERPC64)
26899     {
26900       rs6000_split_logical_di (operands, code, complement_final_p,
26901 			       complement_op1_p, complement_op2_p);
26902       return;
26903     }
26904 
26905   op0 = operands[0];
26906   op1 = operands[1];
26907   op2 = (code == NOT) ? NULL_RTX : operands[2];
26908   sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
26909   sub_size = GET_MODE_SIZE (sub_mode);
26910   regno0 = REGNO (op0);
26911   regno1 = REGNO (op1);
26912 
26913   gcc_assert (reload_completed);
26914   gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
26915   gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
26916 
26917   nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
26918   gcc_assert (nregs > 1);
26919 
26920   if (op2 && REG_P (op2))
26921     gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
26922 
26923   for (i = 0; i < nregs; i++)
26924     {
26925       int offset = i * sub_size;
26926       rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
26927       rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
26928       rtx sub_op2 = ((code == NOT)
26929 		     ? NULL_RTX
26930 		     : simplify_subreg (sub_mode, op2, mode, offset));
26931 
26932       rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
26933 				  complement_final_p, complement_op1_p,
26934 				  complement_op2_p);
26935     }
26936 
26937   return;
26938 }
26939 
26940 /* Emit instructions to move SRC to DST.  Called by splitters for
26941    multi-register moves.  It will emit at most one instruction for
26942    each register that is accessed; that is, it won't emit li/lis pairs
26943    (or equivalent for 64-bit code).  One of SRC or DST must be a hard
26944    register.  */
26945 
26946 void
rs6000_split_multireg_move(rtx dst,rtx src)26947 rs6000_split_multireg_move (rtx dst, rtx src)
26948 {
26949   /* The register number of the first register being moved.  */
26950   int reg;
26951   /* The mode that is to be moved.  */
26952   machine_mode mode;
26953   /* The mode that the move is being done in, and its size.  */
26954   machine_mode reg_mode;
26955   int reg_mode_size;
26956   /* The number of registers that will be moved.  */
26957   int nregs;
26958 
26959   reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
26960   mode = GET_MODE (dst);
26961   nregs = hard_regno_nregs (reg, mode);
26962 
26963   /* If we have a vector quad register for MMA, and this is a load or store,
26964      see if we can use vector paired load/stores.  */
26965   if (mode == XOmode && TARGET_MMA
26966       && (MEM_P (dst) || MEM_P (src)))
26967     {
26968       reg_mode = OOmode;
26969       nregs /= 2;
26970     }
26971   /* If we have a vector pair/quad mode, split it into two/four separate
26972      vectors.  */
26973   else if (mode == OOmode || mode == XOmode)
26974     reg_mode = V1TImode;
26975   else if (FP_REGNO_P (reg))
26976     reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
26977 	(TARGET_HARD_FLOAT ? DFmode : SFmode);
26978   else if (ALTIVEC_REGNO_P (reg))
26979     reg_mode = V16QImode;
26980   else
26981     reg_mode = word_mode;
26982   reg_mode_size = GET_MODE_SIZE (reg_mode);
26983 
26984   gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
26985 
26986   /* TDmode residing in FP registers is special, since the ISA requires that
26987      the lower-numbered word of a register pair is always the most significant
26988      word, even in little-endian mode.  This does not match the usual subreg
26989      semantics, so we cannnot use simplify_gen_subreg in those cases.  Access
26990      the appropriate constituent registers "by hand" in little-endian mode.
26991 
26992      Note we do not need to check for destructive overlap here since TDmode
26993      can only reside in even/odd register pairs.  */
26994   if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
26995     {
26996       rtx p_src, p_dst;
26997       int i;
26998 
26999       for (i = 0; i < nregs; i++)
27000 	{
27001 	  if (REG_P (src) && FP_REGNO_P (REGNO (src)))
27002 	    p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
27003 	  else
27004 	    p_src = simplify_gen_subreg (reg_mode, src, mode,
27005 					 i * reg_mode_size);
27006 
27007 	  if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
27008 	    p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
27009 	  else
27010 	    p_dst = simplify_gen_subreg (reg_mode, dst, mode,
27011 					 i * reg_mode_size);
27012 
27013 	  emit_insn (gen_rtx_SET (p_dst, p_src));
27014 	}
27015 
27016       return;
27017     }
27018 
27019   /* The __vector_pair and __vector_quad modes are multi-register
27020      modes, so if we have to load or store the registers, we have to be
27021      careful to properly swap them if we're in little endian mode
27022      below.  This means the last register gets the first memory
27023      location.  We also need to be careful of using the right register
27024      numbers if we are splitting XO to OO.  */
27025   if (mode == OOmode || mode == XOmode)
27026     {
27027       nregs = hard_regno_nregs (reg, mode);
27028       int reg_mode_nregs = hard_regno_nregs (reg, reg_mode);
27029       if (MEM_P (dst))
27030 	{
27031 	  unsigned offset = 0;
27032 	  unsigned size = GET_MODE_SIZE (reg_mode);
27033 
27034 	  /* If we are reading an accumulator register, we have to
27035 	     deprime it before we can access it.  */
27036 	  if (TARGET_MMA
27037 	      && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
27038 	    emit_insn (gen_mma_xxmfacc (src, src));
27039 
27040 	  for (int i = 0; i < nregs; i += reg_mode_nregs)
27041 	    {
27042 	      unsigned subreg
27043 		= WORDS_BIG_ENDIAN ? i : (nregs - reg_mode_nregs - i);
27044 	      rtx dst2 = adjust_address (dst, reg_mode, offset);
27045 	      rtx src2 = gen_rtx_REG (reg_mode, reg + subreg);
27046 	      offset += size;
27047 	      emit_insn (gen_rtx_SET (dst2, src2));
27048 	    }
27049 
27050 	  return;
27051 	}
27052 
27053       if (MEM_P (src))
27054 	{
27055 	  unsigned offset = 0;
27056 	  unsigned size = GET_MODE_SIZE (reg_mode);
27057 
27058 	  for (int i = 0; i < nregs; i += reg_mode_nregs)
27059 	    {
27060 	      unsigned subreg
27061 		= WORDS_BIG_ENDIAN ? i : (nregs - reg_mode_nregs - i);
27062 	      rtx dst2 = gen_rtx_REG (reg_mode, reg + subreg);
27063 	      rtx src2 = adjust_address (src, reg_mode, offset);
27064 	      offset += size;
27065 	      emit_insn (gen_rtx_SET (dst2, src2));
27066 	    }
27067 
27068 	  /* If we are writing an accumulator register, we have to
27069 	     prime it after we've written it.  */
27070 	  if (TARGET_MMA
27071 	      && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
27072 	    emit_insn (gen_mma_xxmtacc (dst, dst));
27073 
27074 	  return;
27075 	}
27076 
27077       if (GET_CODE (src) == UNSPEC
27078 	  || GET_CODE (src) == UNSPEC_VOLATILE)
27079 	{
27080 	  gcc_assert (XINT (src, 1) == UNSPEC_VSX_ASSEMBLE
27081 		      || XINT (src, 1) == UNSPECV_MMA_ASSEMBLE);
27082 	  gcc_assert (REG_P (dst));
27083 	  if (GET_MODE (src) == XOmode)
27084 	    gcc_assert (FP_REGNO_P (REGNO (dst)));
27085 	  if (GET_MODE (src) == OOmode)
27086 	    gcc_assert (VSX_REGNO_P (REGNO (dst)));
27087 
27088 	  int nvecs = XVECLEN (src, 0);
27089 	  for (int i = 0; i < nvecs; i++)
27090 	    {
27091 	      rtx op;
27092 	      int regno = reg + i;
27093 
27094 	      if (WORDS_BIG_ENDIAN)
27095 		{
27096 		  op = XVECEXP (src, 0, i);
27097 
27098 		  /* If we are loading an even VSX register and the memory location
27099 		     is adjacent to the next register's memory location (if any),
27100 		     then we can load them both with one LXVP instruction.  */
27101 		  if ((regno & 1) == 0)
27102 		    {
27103 		      rtx op2 = XVECEXP (src, 0, i + 1);
27104 		      if (adjacent_mem_locations (op, op2) == op)
27105 			{
27106 			  op = adjust_address (op, OOmode, 0);
27107 			  /* Skip the next register, since we're going to
27108 			     load it together with this register.  */
27109 			  i++;
27110 			}
27111 		    }
27112 		}
27113 	      else
27114 		{
27115 		  op = XVECEXP (src, 0, nvecs - i - 1);
27116 
27117 		  /* If we are loading an even VSX register and the memory location
27118 		     is adjacent to the next register's memory location (if any),
27119 		     then we can load them both with one LXVP instruction.  */
27120 		  if ((regno & 1) == 0)
27121 		    {
27122 			  rtx op2 = XVECEXP (src, 0, nvecs - i - 2);
27123 			  if (adjacent_mem_locations (op2, op) == op2)
27124 			    {
27125 			      op = adjust_address (op2, OOmode, 0);
27126 			      /* Skip the next register, since we're going to
27127 				 load it together with this register.  */
27128 			      i++;
27129 			    }
27130 		    }
27131 		}
27132 
27133 	      rtx dst_i = gen_rtx_REG (GET_MODE (op), regno);
27134 	      emit_insn (gen_rtx_SET (dst_i, op));
27135 	    }
27136 
27137 	  /* We are writing an accumulator register, so we have to
27138 	     prime it after we've written it.  */
27139 	  if (GET_MODE (src) == XOmode)
27140 	    emit_insn (gen_mma_xxmtacc (dst, dst));
27141 
27142 	  return;
27143 	}
27144 
27145       /* Register -> register moves can use common code.  */
27146     }
27147 
27148   if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
27149     {
27150       /* If we are reading an accumulator register, we have to
27151 	 deprime it before we can access it.  */
27152       if (TARGET_MMA
27153 	  && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
27154 	emit_insn (gen_mma_xxmfacc (src, src));
27155 
27156       /* Move register range backwards, if we might have destructive
27157 	 overlap.  */
27158       int i;
27159       /* XO/OO are opaque so cannot use subregs. */
27160       if (mode == OOmode || mode == XOmode )
27161 	{
27162 	  for (i = nregs - 1; i >= 0; i--)
27163 	    {
27164 	      rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + i);
27165 	      rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + i);
27166 	      emit_insn (gen_rtx_SET (dst_i, src_i));
27167 	    }
27168 	}
27169       else
27170 	{
27171 	  for (i = nregs - 1; i >= 0; i--)
27172 	    emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
27173 							 i * reg_mode_size),
27174 				    simplify_gen_subreg (reg_mode, src, mode,
27175 							 i * reg_mode_size)));
27176 	}
27177 
27178       /* If we are writing an accumulator register, we have to
27179 	 prime it after we've written it.  */
27180       if (TARGET_MMA
27181 	  && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
27182 	emit_insn (gen_mma_xxmtacc (dst, dst));
27183     }
27184   else
27185     {
27186       int i;
27187       int j = -1;
27188       bool used_update = false;
27189       rtx restore_basereg = NULL_RTX;
27190 
27191       if (MEM_P (src) && INT_REGNO_P (reg))
27192 	{
27193 	  rtx breg;
27194 
27195 	  if (GET_CODE (XEXP (src, 0)) == PRE_INC
27196 	      || GET_CODE (XEXP (src, 0)) == PRE_DEC)
27197 	    {
27198 	      rtx delta_rtx;
27199 	      breg = XEXP (XEXP (src, 0), 0);
27200 	      delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
27201 			   ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
27202 			   : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
27203 	      emit_insn (gen_add3_insn (breg, breg, delta_rtx));
27204 	      src = replace_equiv_address (src, breg);
27205 	    }
27206 	  else if (! rs6000_offsettable_memref_p (src, reg_mode, true))
27207 	    {
27208 	      if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
27209 		{
27210 		  rtx basereg = XEXP (XEXP (src, 0), 0);
27211 		  if (TARGET_UPDATE)
27212 		    {
27213 		      rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
27214 		      emit_insn (gen_rtx_SET (ndst,
27215 					      gen_rtx_MEM (reg_mode,
27216 							   XEXP (src, 0))));
27217 		      used_update = true;
27218 		    }
27219 		  else
27220 		    emit_insn (gen_rtx_SET (basereg,
27221 					    XEXP (XEXP (src, 0), 1)));
27222 		  src = replace_equiv_address (src, basereg);
27223 		}
27224 	      else
27225 		{
27226 		  rtx basereg = gen_rtx_REG (Pmode, reg);
27227 		  emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
27228 		  src = replace_equiv_address (src, basereg);
27229 		}
27230 	    }
27231 
27232 	  breg = XEXP (src, 0);
27233 	  if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
27234 	    breg = XEXP (breg, 0);
27235 
27236 	  /* If the base register we are using to address memory is
27237 	     also a destination reg, then change that register last.  */
27238 	  if (REG_P (breg)
27239 	      && REGNO (breg) >= REGNO (dst)
27240 	      && REGNO (breg) < REGNO (dst) + nregs)
27241 	    j = REGNO (breg) - REGNO (dst);
27242 	}
27243       else if (MEM_P (dst) && INT_REGNO_P (reg))
27244 	{
27245 	  rtx breg;
27246 
27247 	  if (GET_CODE (XEXP (dst, 0)) == PRE_INC
27248 	      || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
27249 	    {
27250 	      rtx delta_rtx;
27251 	      breg = XEXP (XEXP (dst, 0), 0);
27252 	      delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
27253 			   ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
27254 			   : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
27255 
27256 	      /* We have to update the breg before doing the store.
27257 		 Use store with update, if available.  */
27258 
27259 	      if (TARGET_UPDATE)
27260 		{
27261 		  rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
27262 		  emit_insn (TARGET_32BIT
27263 			     ? (TARGET_POWERPC64
27264 				? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
27265 				: gen_movsi_si_update (breg, breg, delta_rtx, nsrc))
27266 			     : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
27267 		  used_update = true;
27268 		}
27269 	      else
27270 		emit_insn (gen_add3_insn (breg, breg, delta_rtx));
27271 	      dst = replace_equiv_address (dst, breg);
27272 	    }
27273 	  else if (!rs6000_offsettable_memref_p (dst, reg_mode, true)
27274 		   && GET_CODE (XEXP (dst, 0)) != LO_SUM)
27275 	    {
27276 	      if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
27277 		{
27278 		  rtx basereg = XEXP (XEXP (dst, 0), 0);
27279 		  if (TARGET_UPDATE)
27280 		    {
27281 		      rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
27282 		      emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
27283 							   XEXP (dst, 0)),
27284 					      nsrc));
27285 		      used_update = true;
27286 		    }
27287 		  else
27288 		    emit_insn (gen_rtx_SET (basereg,
27289 					    XEXP (XEXP (dst, 0), 1)));
27290 		  dst = replace_equiv_address (dst, basereg);
27291 		}
27292 	      else
27293 		{
27294 		  rtx basereg = XEXP (XEXP (dst, 0), 0);
27295 		  rtx offsetreg = XEXP (XEXP (dst, 0), 1);
27296 		  gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
27297 			      && REG_P (basereg)
27298 			      && REG_P (offsetreg)
27299 			      && REGNO (basereg) != REGNO (offsetreg));
27300 		  if (REGNO (basereg) == 0)
27301 		    {
27302 		      rtx tmp = offsetreg;
27303 		      offsetreg = basereg;
27304 		      basereg = tmp;
27305 		    }
27306 		  emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
27307 		  restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
27308 		  dst = replace_equiv_address (dst, basereg);
27309 		}
27310 	    }
27311 	  else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
27312 	    gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true));
27313 	}
27314 
27315       /* If we are reading an accumulator register, we have to
27316 	 deprime it before we can access it.  */
27317       if (TARGET_MMA && REG_P (src)
27318 	  && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
27319 	emit_insn (gen_mma_xxmfacc (src, src));
27320 
27321       for (i = 0; i < nregs; i++)
27322 	{
27323 	  /* Calculate index to next subword.  */
27324 	  ++j;
27325 	  if (j == nregs)
27326 	    j = 0;
27327 
27328 	  /* If compiler already emitted move of first word by
27329 	     store with update, no need to do anything.  */
27330 	  if (j == 0 && used_update)
27331 	    continue;
27332 
27333 	  /* XO/OO are opaque so cannot use subregs. */
27334 	  if (mode == OOmode || mode == XOmode )
27335 	    {
27336 	      rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + j);
27337 	      rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + j);
27338 	      emit_insn (gen_rtx_SET (dst_i, src_i));
27339 	    }
27340 	  else
27341 	    emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
27342 							 j * reg_mode_size),
27343 				    simplify_gen_subreg (reg_mode, src, mode,
27344 							 j * reg_mode_size)));
27345 	}
27346 
27347       /* If we are writing an accumulator register, we have to
27348 	 prime it after we've written it.  */
27349       if (TARGET_MMA && REG_P (dst)
27350 	  && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
27351 	emit_insn (gen_mma_xxmtacc (dst, dst));
27352 
27353       if (restore_basereg != NULL_RTX)
27354 	emit_insn (restore_basereg);
27355     }
27356 }
27357 
27358 /* Return true if the peephole2 can combine a load involving a combination of
27359    an addis instruction and a load with an offset that can be fused together on
27360    a power8.  */
27361 
27362 bool
fusion_gpr_load_p(rtx addis_reg,rtx addis_value,rtx target,rtx mem)27363 fusion_gpr_load_p (rtx addis_reg,	/* register set via addis.  */
27364 		   rtx addis_value,	/* addis value.  */
27365 		   rtx target,		/* target register that is loaded.  */
27366 		   rtx mem)		/* bottom part of the memory addr.  */
27367 {
27368   rtx addr;
27369   rtx base_reg;
27370 
27371   /* Validate arguments.  */
27372   if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
27373     return false;
27374 
27375   if (!base_reg_operand (target, GET_MODE (target)))
27376     return false;
27377 
27378   if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
27379     return false;
27380 
27381   /* Allow sign/zero extension.  */
27382   if (GET_CODE (mem) == ZERO_EXTEND
27383       || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
27384     mem = XEXP (mem, 0);
27385 
27386   if (!MEM_P (mem))
27387     return false;
27388 
27389   if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
27390     return false;
27391 
27392   addr = XEXP (mem, 0);			/* either PLUS or LO_SUM.  */
27393   if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
27394     return false;
27395 
27396   /* Validate that the register used to load the high value is either the
27397      register being loaded, or we can safely replace its use.
27398 
27399      This function is only called from the peephole2 pass and we assume that
27400      there are 2 instructions in the peephole (addis and load), so we want to
27401      check if the target register was not used in the memory address and the
27402      register to hold the addis result is dead after the peephole.  */
27403   if (REGNO (addis_reg) != REGNO (target))
27404     {
27405       if (reg_mentioned_p (target, mem))
27406 	return false;
27407 
27408       if (!peep2_reg_dead_p (2, addis_reg))
27409 	return false;
27410 
27411       /* If the target register being loaded is the stack pointer, we must
27412          avoid loading any other value into it, even temporarily.  */
27413       if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
27414 	return false;
27415     }
27416 
27417   base_reg = XEXP (addr, 0);
27418   return REGNO (addis_reg) == REGNO (base_reg);
27419 }
27420 
27421 /* During the peephole2 pass, adjust and expand the insns for a load fusion
27422    sequence.  We adjust the addis register to use the target register.  If the
27423    load sign extends, we adjust the code to do the zero extending load, and an
27424    explicit sign extension later since the fusion only covers zero extending
27425    loads.
27426 
27427    The operands are:
27428 	operands[0]	register set with addis (to be replaced with target)
27429 	operands[1]	value set via addis
27430 	operands[2]	target register being loaded
27431 	operands[3]	D-form memory reference using operands[0].  */
27432 
27433 void
expand_fusion_gpr_load(rtx * operands)27434 expand_fusion_gpr_load (rtx *operands)
27435 {
27436   rtx addis_value = operands[1];
27437   rtx target = operands[2];
27438   rtx orig_mem = operands[3];
27439   rtx  new_addr, new_mem, orig_addr, offset;
27440   enum rtx_code plus_or_lo_sum;
27441   machine_mode target_mode = GET_MODE (target);
27442   machine_mode extend_mode = target_mode;
27443   machine_mode ptr_mode = Pmode;
27444   enum rtx_code extend = UNKNOWN;
27445 
27446   if (GET_CODE (orig_mem) == ZERO_EXTEND
27447       || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
27448     {
27449       extend = GET_CODE (orig_mem);
27450       orig_mem = XEXP (orig_mem, 0);
27451       target_mode = GET_MODE (orig_mem);
27452     }
27453 
27454   gcc_assert (MEM_P (orig_mem));
27455 
27456   orig_addr = XEXP (orig_mem, 0);
27457   plus_or_lo_sum = GET_CODE (orig_addr);
27458   gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
27459 
27460   offset = XEXP (orig_addr, 1);
27461   new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
27462   new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
27463 
27464   if (extend != UNKNOWN)
27465     new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
27466 
27467   new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
27468 			    UNSPEC_FUSION_GPR);
27469   emit_insn (gen_rtx_SET (target, new_mem));
27470 
27471   if (extend == SIGN_EXTEND)
27472     {
27473       int sub_off = ((BYTES_BIG_ENDIAN)
27474 		     ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
27475 		     : 0);
27476       rtx sign_reg
27477 	= simplify_subreg (target_mode, target, extend_mode, sub_off);
27478 
27479       emit_insn (gen_rtx_SET (target,
27480 			      gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
27481     }
27482 
27483   return;
27484 }
27485 
27486 /* Emit the addis instruction that will be part of a fused instruction
27487    sequence.  */
27488 
27489 void
emit_fusion_addis(rtx target,rtx addis_value)27490 emit_fusion_addis (rtx target, rtx addis_value)
27491 {
27492   rtx fuse_ops[10];
27493   const char *addis_str = NULL;
27494 
27495   /* Emit the addis instruction.  */
27496   fuse_ops[0] = target;
27497   if (satisfies_constraint_L (addis_value))
27498     {
27499       fuse_ops[1] = addis_value;
27500       addis_str = "lis %0,%v1";
27501     }
27502 
27503   else if (GET_CODE (addis_value) == PLUS)
27504     {
27505       rtx op0 = XEXP (addis_value, 0);
27506       rtx op1 = XEXP (addis_value, 1);
27507 
27508       if (REG_P (op0) && CONST_INT_P (op1)
27509 	  && satisfies_constraint_L (op1))
27510 	{
27511 	  fuse_ops[1] = op0;
27512 	  fuse_ops[2] = op1;
27513 	  addis_str = "addis %0,%1,%v2";
27514 	}
27515     }
27516 
27517   else if (GET_CODE (addis_value) == HIGH)
27518     {
27519       rtx value = XEXP (addis_value, 0);
27520       if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
27521 	{
27522 	  fuse_ops[1] = XVECEXP (value, 0, 0);		/* symbol ref.  */
27523 	  fuse_ops[2] = XVECEXP (value, 0, 1);		/* TOC register.  */
27524 	  if (TARGET_ELF)
27525 	    addis_str = "addis %0,%2,%1@toc@ha";
27526 
27527 	  else if (TARGET_XCOFF)
27528 	    addis_str = "addis %0,%1@u(%2)";
27529 
27530 	  else
27531 	    gcc_unreachable ();
27532 	}
27533 
27534       else if (GET_CODE (value) == PLUS)
27535 	{
27536 	  rtx op0 = XEXP (value, 0);
27537 	  rtx op1 = XEXP (value, 1);
27538 
27539 	  if (GET_CODE (op0) == UNSPEC
27540 	      && XINT (op0, 1) == UNSPEC_TOCREL
27541 	      && CONST_INT_P (op1))
27542 	    {
27543 	      fuse_ops[1] = XVECEXP (op0, 0, 0);	/* symbol ref.  */
27544 	      fuse_ops[2] = XVECEXP (op0, 0, 1);	/* TOC register.  */
27545 	      fuse_ops[3] = op1;
27546 	      if (TARGET_ELF)
27547 		addis_str = "addis %0,%2,%1+%3@toc@ha";
27548 
27549 	      else if (TARGET_XCOFF)
27550 		addis_str = "addis %0,%1+%3@u(%2)";
27551 
27552 	      else
27553 		gcc_unreachable ();
27554 	    }
27555 	}
27556 
27557       else if (satisfies_constraint_L (value))
27558 	{
27559 	  fuse_ops[1] = value;
27560 	  addis_str = "lis %0,%v1";
27561 	}
27562 
27563       else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
27564 	{
27565 	  fuse_ops[1] = value;
27566 	  addis_str = "lis %0,%1@ha";
27567 	}
27568     }
27569 
27570   if (!addis_str)
27571     fatal_insn ("Could not generate addis value for fusion", addis_value);
27572 
27573   output_asm_insn (addis_str, fuse_ops);
27574 }
27575 
27576 /* Emit a D-form load or store instruction that is the second instruction
27577    of a fusion sequence.  */
27578 
27579 static void
emit_fusion_load(rtx load_reg,rtx addis_reg,rtx offset,const char * insn_str)27580 emit_fusion_load (rtx load_reg, rtx addis_reg, rtx offset, const char *insn_str)
27581 {
27582   rtx fuse_ops[10];
27583   char insn_template[80];
27584 
27585   fuse_ops[0] = load_reg;
27586   fuse_ops[1] = addis_reg;
27587 
27588   if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
27589     {
27590       sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
27591       fuse_ops[2] = offset;
27592       output_asm_insn (insn_template, fuse_ops);
27593     }
27594 
27595   else if (GET_CODE (offset) == UNSPEC
27596 	   && XINT (offset, 1) == UNSPEC_TOCREL)
27597     {
27598       if (TARGET_ELF)
27599 	sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
27600 
27601       else if (TARGET_XCOFF)
27602 	sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
27603 
27604       else
27605 	gcc_unreachable ();
27606 
27607       fuse_ops[2] = XVECEXP (offset, 0, 0);
27608       output_asm_insn (insn_template, fuse_ops);
27609     }
27610 
27611   else if (GET_CODE (offset) == PLUS
27612 	   && GET_CODE (XEXP (offset, 0)) == UNSPEC
27613 	   && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
27614 	   && CONST_INT_P (XEXP (offset, 1)))
27615     {
27616       rtx tocrel_unspec = XEXP (offset, 0);
27617       if (TARGET_ELF)
27618 	sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
27619 
27620       else if (TARGET_XCOFF)
27621 	sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
27622 
27623       else
27624 	gcc_unreachable ();
27625 
27626       fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
27627       fuse_ops[3] = XEXP (offset, 1);
27628       output_asm_insn (insn_template, fuse_ops);
27629     }
27630 
27631   else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
27632     {
27633       sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
27634 
27635       fuse_ops[2] = offset;
27636       output_asm_insn (insn_template, fuse_ops);
27637     }
27638 
27639   else
27640     fatal_insn ("Unable to generate load/store offset for fusion", offset);
27641 
27642   return;
27643 }
27644 
27645 /* Given an address, convert it into the addis and load offset parts.  Addresses
27646    created during the peephole2 process look like:
27647 	(lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
27648 		(unspec [(...)] UNSPEC_TOCREL))  */
27649 
27650 static void
fusion_split_address(rtx addr,rtx * p_hi,rtx * p_lo)27651 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
27652 {
27653   rtx hi, lo;
27654 
27655   if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
27656     {
27657       hi = XEXP (addr, 0);
27658       lo = XEXP (addr, 1);
27659     }
27660   else
27661     gcc_unreachable ();
27662 
27663   *p_hi = hi;
27664   *p_lo = lo;
27665 }
27666 
27667 /* Return a string to fuse an addis instruction with a gpr load to the same
27668    register that we loaded up the addis instruction.  The address that is used
27669    is the logical address that was formed during peephole2:
27670 	(lo_sum (high) (low-part))
27671 
27672    The code is complicated, so we call output_asm_insn directly, and just
27673    return "".  */
27674 
27675 const char *
emit_fusion_gpr_load(rtx target,rtx mem)27676 emit_fusion_gpr_load (rtx target, rtx mem)
27677 {
27678   rtx addis_value;
27679   rtx addr;
27680   rtx load_offset;
27681   const char *load_str = NULL;
27682   machine_mode mode;
27683 
27684   if (GET_CODE (mem) == ZERO_EXTEND)
27685     mem = XEXP (mem, 0);
27686 
27687   gcc_assert (REG_P (target) && MEM_P (mem));
27688 
27689   addr = XEXP (mem, 0);
27690   fusion_split_address (addr, &addis_value, &load_offset);
27691 
27692   /* Now emit the load instruction to the same register.  */
27693   mode = GET_MODE (mem);
27694   switch (mode)
27695     {
27696     case E_QImode:
27697       load_str = "lbz";
27698       break;
27699 
27700     case E_HImode:
27701       load_str = "lhz";
27702       break;
27703 
27704     case E_SImode:
27705     case E_SFmode:
27706       load_str = "lwz";
27707       break;
27708 
27709     case E_DImode:
27710     case E_DFmode:
27711       gcc_assert (TARGET_POWERPC64);
27712       load_str = "ld";
27713       break;
27714 
27715     default:
27716       fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
27717     }
27718 
27719   /* Emit the addis instruction.  */
27720   emit_fusion_addis (target, addis_value);
27721 
27722   /* Emit the D-form load instruction.  */
27723   emit_fusion_load (target, target, load_offset, load_str);
27724 
27725   return "";
27726 }
27727 
27728 /* This is not inside an  #ifdef RS6000_GLIBC_ATOMIC_FENV  because gengtype
27729    ignores it then.  */
27730 static GTY(()) tree atomic_hold_decl;
27731 static GTY(()) tree atomic_clear_decl;
27732 static GTY(()) tree atomic_update_decl;
27733 
27734 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook.  */
27735 static void
rs6000_atomic_assign_expand_fenv(tree * hold,tree * clear,tree * update)27736 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
27737 {
27738   if (!TARGET_HARD_FLOAT)
27739     {
27740 #ifdef RS6000_GLIBC_ATOMIC_FENV
27741       if (atomic_hold_decl == NULL_TREE)
27742 	{
27743 	  atomic_hold_decl
27744 	    = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
27745 			  get_identifier ("__atomic_feholdexcept"),
27746 			  build_function_type_list (void_type_node,
27747 						    double_ptr_type_node,
27748 						    NULL_TREE));
27749 	  TREE_PUBLIC (atomic_hold_decl) = 1;
27750 	  DECL_EXTERNAL (atomic_hold_decl) = 1;
27751 	}
27752 
27753       if (atomic_clear_decl == NULL_TREE)
27754 	{
27755 	  atomic_clear_decl
27756 	    = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
27757 			  get_identifier ("__atomic_feclearexcept"),
27758 			  build_function_type_list (void_type_node,
27759 						    NULL_TREE));
27760 	  TREE_PUBLIC (atomic_clear_decl) = 1;
27761 	  DECL_EXTERNAL (atomic_clear_decl) = 1;
27762 	}
27763 
27764       tree const_double = build_qualified_type (double_type_node,
27765 						TYPE_QUAL_CONST);
27766       tree const_double_ptr = build_pointer_type (const_double);
27767       if (atomic_update_decl == NULL_TREE)
27768 	{
27769 	  atomic_update_decl
27770 	    = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
27771 			  get_identifier ("__atomic_feupdateenv"),
27772 			  build_function_type_list (void_type_node,
27773 						    const_double_ptr,
27774 						    NULL_TREE));
27775 	  TREE_PUBLIC (atomic_update_decl) = 1;
27776 	  DECL_EXTERNAL (atomic_update_decl) = 1;
27777 	}
27778 
27779       tree fenv_var = create_tmp_var_raw (double_type_node);
27780       TREE_ADDRESSABLE (fenv_var) = 1;
27781       tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node,
27782 			       build4 (TARGET_EXPR, double_type_node, fenv_var,
27783 				       void_node, NULL_TREE, NULL_TREE));
27784 
27785       *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
27786       *clear = build_call_expr (atomic_clear_decl, 0);
27787       *update = build_call_expr (atomic_update_decl, 1,
27788 				 fold_convert (const_double_ptr, fenv_addr));
27789 #endif
27790       return;
27791     }
27792 
27793   tree mffs = rs6000_builtin_decls[RS6000_BIF_MFFS];
27794   tree mtfsf = rs6000_builtin_decls[RS6000_BIF_MTFSF];
27795   tree call_mffs = build_call_expr (mffs, 0);
27796 
27797   /* Generates the equivalent of feholdexcept (&fenv_var)
27798 
27799      *fenv_var = __builtin_mffs ();
27800      double fenv_hold;
27801      *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
27802      __builtin_mtfsf (0xff, fenv_hold);  */
27803 
27804   /* Mask to clear everything except for the rounding modes and non-IEEE
27805      arithmetic flag.  */
27806   const unsigned HOST_WIDE_INT hold_exception_mask
27807     = HOST_WIDE_INT_C (0xffffffff00000007);
27808 
27809   tree fenv_var = create_tmp_var_raw (double_type_node);
27810 
27811   tree hold_mffs = build4 (TARGET_EXPR, double_type_node, fenv_var, call_mffs,
27812 			   NULL_TREE, NULL_TREE);
27813 
27814   tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
27815   tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
27816 			      build_int_cst (uint64_type_node,
27817 					     hold_exception_mask));
27818 
27819   tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
27820 				 fenv_llu_and);
27821 
27822   tree hold_mtfsf = build_call_expr (mtfsf, 2,
27823 				     build_int_cst (unsigned_type_node, 0xff),
27824 				     fenv_hold_mtfsf);
27825 
27826   *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
27827 
27828   /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
27829 
27830      double fenv_clear = __builtin_mffs ();
27831      *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
27832      __builtin_mtfsf (0xff, fenv_clear);  */
27833 
27834   /* Mask to clear everything except for the rounding modes and non-IEEE
27835      arithmetic flag.  */
27836   const unsigned HOST_WIDE_INT clear_exception_mask
27837     = HOST_WIDE_INT_C (0xffffffff00000000);
27838 
27839   tree fenv_clear = create_tmp_var_raw (double_type_node);
27840 
27841   tree clear_mffs = build4 (TARGET_EXPR, double_type_node, fenv_clear,
27842 			    call_mffs, NULL_TREE, NULL_TREE);
27843 
27844   tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
27845   tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
27846 				    fenv_clean_llu,
27847 				    build_int_cst (uint64_type_node,
27848 						   clear_exception_mask));
27849 
27850   tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
27851 				  fenv_clear_llu_and);
27852 
27853   tree clear_mtfsf = build_call_expr (mtfsf, 2,
27854 				      build_int_cst (unsigned_type_node, 0xff),
27855 				      fenv_clear_mtfsf);
27856 
27857   *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
27858 
27859   /* Generates the equivalent of feupdateenv (&fenv_var)
27860 
27861      double old_fenv = __builtin_mffs ();
27862      double fenv_update;
27863      *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
27864                                 (*(uint64_t*)fenv_var 0x1ff80fff);
27865      __builtin_mtfsf (0xff, fenv_update);  */
27866 
27867   const unsigned HOST_WIDE_INT update_exception_mask
27868     = HOST_WIDE_INT_C (0xffffffff1fffff00);
27869   const unsigned HOST_WIDE_INT new_exception_mask
27870     = HOST_WIDE_INT_C (0x1ff80fff);
27871 
27872   tree old_fenv = create_tmp_var_raw (double_type_node);
27873   tree update_mffs = build4 (TARGET_EXPR, double_type_node, old_fenv,
27874 			     call_mffs, NULL_TREE, NULL_TREE);
27875 
27876   tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
27877   tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
27878 			     build_int_cst (uint64_type_node,
27879 					    update_exception_mask));
27880 
27881   tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
27882 			     build_int_cst (uint64_type_node,
27883 					    new_exception_mask));
27884 
27885   tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
27886 			      old_llu_and, new_llu_and);
27887 
27888   tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
27889 				   new_llu_mask);
27890 
27891   tree update_mtfsf = build_call_expr (mtfsf, 2,
27892 				       build_int_cst (unsigned_type_node, 0xff),
27893 				       fenv_update_mtfsf);
27894 
27895   *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
27896 }
27897 
27898 void
rs6000_generate_float2_double_code(rtx dst,rtx src1,rtx src2)27899 rs6000_generate_float2_double_code (rtx dst, rtx src1, rtx src2)
27900 {
27901   rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
27902 
27903   rtx_tmp0 = gen_reg_rtx (V2DFmode);
27904   rtx_tmp1 = gen_reg_rtx (V2DFmode);
27905 
27906   /* The destination of the vmrgew instruction layout is:
27907      rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
27908      Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
27909      vmrgew instruction will be correct.  */
27910   if (BYTES_BIG_ENDIAN)
27911     {
27912        emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0, src1, src2,
27913 					    GEN_INT (0)));
27914        emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1, src1, src2,
27915 					    GEN_INT (3)));
27916     }
27917   else
27918     {
27919        emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (3)));
27920        emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (0)));
27921     }
27922 
27923   rtx_tmp2 = gen_reg_rtx (V4SFmode);
27924   rtx_tmp3 = gen_reg_rtx (V4SFmode);
27925 
27926   emit_insn (gen_vsx_xvcvdpsp (rtx_tmp2, rtx_tmp0));
27927   emit_insn (gen_vsx_xvcvdpsp (rtx_tmp3, rtx_tmp1));
27928 
27929   if (BYTES_BIG_ENDIAN)
27930     emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
27931   else
27932     emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
27933 }
27934 
27935 void
rs6000_generate_float2_code(bool signed_convert,rtx dst,rtx src1,rtx src2)27936 rs6000_generate_float2_code (bool signed_convert, rtx dst, rtx src1, rtx src2)
27937 {
27938   rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
27939 
27940   rtx_tmp0 = gen_reg_rtx (V2DImode);
27941   rtx_tmp1 = gen_reg_rtx (V2DImode);
27942 
27943   /* The destination of the vmrgew instruction layout is:
27944      rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
27945      Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
27946      vmrgew instruction will be correct.  */
27947   if (BYTES_BIG_ENDIAN)
27948     {
27949       emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0, src1, src2, GEN_INT (0)));
27950       emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1, src1, src2, GEN_INT (3)));
27951     }
27952   else
27953     {
27954       emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0, src1, src2, GEN_INT (3)));
27955       emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1, src1, src2, GEN_INT (0)));
27956     }
27957 
27958   rtx_tmp2 = gen_reg_rtx (V4SFmode);
27959   rtx_tmp3 = gen_reg_rtx (V4SFmode);
27960 
27961   if (signed_convert)
27962     {
27963       emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2, rtx_tmp0));
27964       emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3, rtx_tmp1));
27965     }
27966   else
27967     {
27968        emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2, rtx_tmp0));
27969        emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3, rtx_tmp1));
27970     }
27971 
27972   if (BYTES_BIG_ENDIAN)
27973     emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
27974   else
27975     emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
27976 }
27977 
27978 void
rs6000_generate_vsigned2_code(bool signed_convert,rtx dst,rtx src1,rtx src2)27979 rs6000_generate_vsigned2_code (bool signed_convert, rtx dst, rtx src1,
27980 			       rtx src2)
27981 {
27982   rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
27983 
27984   rtx_tmp0 = gen_reg_rtx (V2DFmode);
27985   rtx_tmp1 = gen_reg_rtx (V2DFmode);
27986 
27987   emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (0)));
27988   emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (3)));
27989 
27990   rtx_tmp2 = gen_reg_rtx (V4SImode);
27991   rtx_tmp3 = gen_reg_rtx (V4SImode);
27992 
27993   if (signed_convert)
27994     {
27995       emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2, rtx_tmp0));
27996       emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3, rtx_tmp1));
27997     }
27998   else
27999     {
28000       emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2, rtx_tmp0));
28001       emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3, rtx_tmp1));
28002     }
28003 
28004   emit_insn (gen_p8_vmrgew_v4si (dst, rtx_tmp2, rtx_tmp3));
28005 }
28006 
28007 /* Implement the TARGET_OPTAB_SUPPORTED_P hook.  */
28008 
28009 static bool
rs6000_optab_supported_p(int op,machine_mode mode1,machine_mode,optimization_type opt_type)28010 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
28011 			  optimization_type opt_type)
28012 {
28013   switch (op)
28014     {
28015     case rsqrt_optab:
28016       return (opt_type == OPTIMIZE_FOR_SPEED
28017 	      && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
28018 
28019     default:
28020       return true;
28021     }
28022 }
28023 
28024 /* Implement TARGET_CONSTANT_ALIGNMENT.  */
28025 
28026 static HOST_WIDE_INT
rs6000_constant_alignment(const_tree exp,HOST_WIDE_INT align)28027 rs6000_constant_alignment (const_tree exp, HOST_WIDE_INT align)
28028 {
28029   if (TREE_CODE (exp) == STRING_CST
28030       && (STRICT_ALIGNMENT || !optimize_size))
28031     return MAX (align, BITS_PER_WORD);
28032   return align;
28033 }
28034 
28035 /* Implement TARGET_STARTING_FRAME_OFFSET.  */
28036 
28037 static HOST_WIDE_INT
rs6000_starting_frame_offset(void)28038 rs6000_starting_frame_offset (void)
28039 {
28040   if (FRAME_GROWS_DOWNWARD)
28041     return 0;
28042   return RS6000_STARTING_FRAME_OFFSET;
28043 }
28044 
28045 /* Internal function to return the built-in function id for the complex
28046    multiply operation for a given mode.  */
28047 
28048 static inline built_in_function
complex_multiply_builtin_code(machine_mode mode)28049 complex_multiply_builtin_code (machine_mode mode)
28050 {
28051   gcc_assert (IN_RANGE (mode, MIN_MODE_COMPLEX_FLOAT, MAX_MODE_COMPLEX_FLOAT));
28052   int func = BUILT_IN_COMPLEX_MUL_MIN + mode - MIN_MODE_COMPLEX_FLOAT;
28053   return (built_in_function) func;
28054 }
28055 
28056 /* Internal function to return the built-in function id for the complex divide
28057    operation for a given mode.  */
28058 
28059 static inline built_in_function
complex_divide_builtin_code(machine_mode mode)28060 complex_divide_builtin_code (machine_mode mode)
28061 {
28062   gcc_assert (IN_RANGE (mode, MIN_MODE_COMPLEX_FLOAT, MAX_MODE_COMPLEX_FLOAT));
28063   int func = BUILT_IN_COMPLEX_DIV_MIN + mode - MIN_MODE_COMPLEX_FLOAT;
28064   return (built_in_function) func;
28065 }
28066 
28067 /* On 64-bit Linux and Freebsd systems, possibly switch the long double library
28068    function names from <foo>l to <foo>f128 if the default long double type is
28069    IEEE 128-bit.  Typically, with the C and C++ languages, the standard math.h
28070    include file switches the names on systems that support long double as IEEE
28071    128-bit, but that doesn't work if the user uses __builtin_<foo>l directly.
28072    In the future, glibc will export names like __ieee128_sinf128 and we can
28073    switch to using those instead of using sinf128, which pollutes the user's
28074    namespace.
28075 
28076    This will switch the names for Fortran math functions as well (which doesn't
28077    use math.h).  However, Fortran needs other changes to the compiler and
28078    library before you can switch the real*16 type at compile time.
28079 
28080    We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name.  We
28081    only do this transformation if the __float128 type is enabled.  This
28082    prevents us from doing the transformation on older 32-bit ports that might
28083    have enabled using IEEE 128-bit floating point as the default long double
28084    type.
28085 
28086    We also use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change the
28087    function names used for complex multiply and divide to the appropriate
28088    names.  */
28089 
28090 static tree
rs6000_mangle_decl_assembler_name(tree decl,tree id)28091 rs6000_mangle_decl_assembler_name (tree decl, tree id)
28092 {
28093   /* Handle complex multiply/divide.  For IEEE 128-bit, use __mulkc3 or
28094      __divkc3 and for IBM 128-bit use __multc3 and __divtc3.  */
28095   if (TARGET_FLOAT128_TYPE
28096       && TREE_CODE (decl) == FUNCTION_DECL
28097       && DECL_IS_UNDECLARED_BUILTIN (decl)
28098       && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
28099     {
28100       built_in_function id = DECL_FUNCTION_CODE (decl);
28101       const char *newname = NULL;
28102 
28103       if (id == complex_multiply_builtin_code (KCmode))
28104 	newname = "__mulkc3";
28105 
28106       else if (id == complex_multiply_builtin_code (ICmode))
28107 	newname = "__multc3";
28108 
28109       else if (id == complex_multiply_builtin_code (TCmode))
28110 	newname = (TARGET_IEEEQUAD) ? "__mulkc3" : "__multc3";
28111 
28112       else if (id == complex_divide_builtin_code (KCmode))
28113 	newname = "__divkc3";
28114 
28115       else if (id == complex_divide_builtin_code (ICmode))
28116 	newname = "__divtc3";
28117 
28118       else if (id == complex_divide_builtin_code (TCmode))
28119 	newname = (TARGET_IEEEQUAD) ? "__divkc3" : "__divtc3";
28120 
28121       if (newname)
28122 	{
28123 	  if (TARGET_DEBUG_BUILTIN)
28124 	    fprintf (stderr, "Map complex mul/div => %s\n", newname);
28125 
28126 	  return get_identifier (newname);
28127 	}
28128     }
28129 
28130   /* Map long double built-in functions if long double is IEEE 128-bit.  */
28131   if (TARGET_FLOAT128_TYPE && TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
28132       && TREE_CODE (decl) == FUNCTION_DECL
28133       && DECL_IS_UNDECLARED_BUILTIN (decl)
28134       && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
28135     {
28136       size_t len = IDENTIFIER_LENGTH (id);
28137       const char *name = IDENTIFIER_POINTER (id);
28138       char *newname = NULL;
28139 
28140       /* See if it is one of the built-in functions with an unusual name.  */
28141       switch (DECL_FUNCTION_CODE (decl))
28142 	{
28143 	case BUILT_IN_DREML:
28144 	  newname = xstrdup ("__remainderieee128");
28145 	  break;
28146 
28147 	case BUILT_IN_GAMMAL:
28148 	  newname = xstrdup ("__lgammaieee128");
28149 	  break;
28150 
28151 	case BUILT_IN_GAMMAL_R:
28152 	case BUILT_IN_LGAMMAL_R:
28153 	  newname = xstrdup ("__lgammaieee128_r");
28154 	  break;
28155 
28156 	case BUILT_IN_NEXTTOWARD:
28157 	  newname = xstrdup ("__nexttoward_to_ieee128");
28158 	  break;
28159 
28160 	case BUILT_IN_NEXTTOWARDF:
28161 	  newname = xstrdup ("__nexttowardf_to_ieee128");
28162 	  break;
28163 
28164 	case BUILT_IN_NEXTTOWARDL:
28165 	  newname = xstrdup ("__nexttowardieee128");
28166 	  break;
28167 
28168 	case BUILT_IN_POW10L:
28169 	  newname = xstrdup ("__exp10ieee128");
28170 	  break;
28171 
28172 	case BUILT_IN_SCALBL:
28173 	  newname = xstrdup ("__scalbieee128");
28174 	  break;
28175 
28176 	case BUILT_IN_SIGNIFICANDL:
28177 	  newname = xstrdup ("__significandieee128");
28178 	  break;
28179 
28180 	case BUILT_IN_SINCOSL:
28181 	  newname = xstrdup ("__sincosieee128");
28182 	  break;
28183 
28184 	default:
28185 	  break;
28186 	}
28187 
28188       /* Update the __builtin_*printf and __builtin_*scanf functions.  */
28189       if (!newname)
28190 	{
28191 	  size_t printf_len = strlen ("printf");
28192 	  size_t scanf_len = strlen ("scanf");
28193 	  size_t printf_chk_len = strlen ("printf_chk");
28194 
28195 	  if (len >= printf_len
28196 	      && strcmp (name + len - printf_len, "printf") == 0)
28197 	    newname = xasprintf ("__%sieee128", name);
28198 
28199 	  else if (len >= scanf_len
28200 		   && strcmp (name + len - scanf_len, "scanf") == 0)
28201 	    newname = xasprintf ("__isoc99_%sieee128", name);
28202 
28203 	  else if (len >= printf_chk_len
28204 		   && strcmp (name + len - printf_chk_len, "printf_chk") == 0)
28205 	    newname = xasprintf ("%sieee128", name);
28206 
28207 	  else if (name[len - 1] == 'l')
28208 	    {
28209 	      bool uses_ieee128_p = false;
28210 	      tree type = TREE_TYPE (decl);
28211 	      machine_mode ret_mode = TYPE_MODE (type);
28212 
28213 	      /* See if the function returns a IEEE 128-bit floating point type or
28214 		 complex type.  */
28215 	      if (ret_mode == TFmode || ret_mode == TCmode)
28216 		uses_ieee128_p = true;
28217 	      else
28218 		{
28219 		  function_args_iterator args_iter;
28220 		  tree arg;
28221 
28222 		  /* See if the function passes a IEEE 128-bit floating point type
28223 		     or complex type.  */
28224 		  FOREACH_FUNCTION_ARGS (type, arg, args_iter)
28225 		    {
28226 		      machine_mode arg_mode = TYPE_MODE (arg);
28227 		      if (arg_mode == TFmode || arg_mode == TCmode)
28228 			{
28229 			  uses_ieee128_p = true;
28230 			  break;
28231 			}
28232 		    }
28233 		}
28234 
28235 	      /* If we passed or returned an IEEE 128-bit floating point type,
28236 		 change the name.  Use __<name>ieee128, instead of <name>l.  */
28237 	      if (uses_ieee128_p)
28238 		newname = xasprintf ("__%.*sieee128", (int)(len - 1), name);
28239 	    }
28240 	}
28241 
28242       if (newname)
28243 	{
28244 	  if (TARGET_DEBUG_BUILTIN)
28245 	    fprintf (stderr, "Map %s => %s\n", name, newname);
28246 
28247 	  id = get_identifier (newname);
28248 	  free (newname);
28249 	}
28250     }
28251 
28252   return id;
28253 }
28254 
28255 /* Predict whether the given loop in gimple will be transformed in the RTL
28256    doloop_optimize pass.  */
28257 
28258 static bool
rs6000_predict_doloop_p(struct loop * loop)28259 rs6000_predict_doloop_p (struct loop *loop)
28260 {
28261   gcc_assert (loop);
28262 
28263   /* On rs6000, targetm.can_use_doloop_p is actually
28264      can_use_doloop_if_innermost.  Just ensure the loop is innermost.  */
28265   if (loop->inner != NULL)
28266     {
28267       if (dump_file && (dump_flags & TDF_DETAILS))
28268 	fprintf (dump_file, "Predict doloop failure due to"
28269 			    " loop nesting.\n");
28270       return false;
28271     }
28272 
28273   return true;
28274 }
28275 
28276 /* Implement TARGET_PREFERRED_DOLOOP_MODE. */
28277 
28278 static machine_mode
rs6000_preferred_doloop_mode(machine_mode)28279 rs6000_preferred_doloop_mode (machine_mode)
28280 {
28281   return word_mode;
28282 }
28283 
28284 /* Implement TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P.  */
28285 
28286 static bool
rs6000_cannot_substitute_mem_equiv_p(rtx mem)28287 rs6000_cannot_substitute_mem_equiv_p (rtx mem)
28288 {
28289   gcc_assert (MEM_P (mem));
28290 
28291   /* curr_insn_transform()'s handling of subregs cannot handle altivec AND:
28292      type addresses, so don't allow MEMs with those address types to be
28293      substituted as an equivalent expression.  See PR93974 for details.  */
28294   if (GET_CODE (XEXP (mem, 0)) == AND)
28295     return true;
28296 
28297   return false;
28298 }
28299 
28300 /* Implement TARGET_INVALID_CONVERSION.  */
28301 
28302 static const char *
rs6000_invalid_conversion(const_tree fromtype,const_tree totype)28303 rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
28304 {
28305   /* Make sure we're working with the canonical types.  */
28306   if (TYPE_CANONICAL (fromtype) != NULL_TREE)
28307     fromtype = TYPE_CANONICAL (fromtype);
28308   if (TYPE_CANONICAL (totype) != NULL_TREE)
28309     totype = TYPE_CANONICAL (totype);
28310 
28311   machine_mode frommode = TYPE_MODE (fromtype);
28312   machine_mode tomode = TYPE_MODE (totype);
28313 
28314   if (frommode != tomode)
28315     {
28316       /* Do not allow conversions to/from XOmode and OOmode types.  */
28317       if (frommode == XOmode)
28318 	return N_("invalid conversion from type %<__vector_quad%>");
28319       if (tomode == XOmode)
28320 	return N_("invalid conversion to type %<__vector_quad%>");
28321       if (frommode == OOmode)
28322 	return N_("invalid conversion from type %<__vector_pair%>");
28323       if (tomode == OOmode)
28324 	return N_("invalid conversion to type %<__vector_pair%>");
28325     }
28326 
28327   /* Conversion allowed.  */
28328   return NULL;
28329 }
28330 
28331 /* Convert a SFmode constant to the integer bit pattern.  */
28332 
28333 long
rs6000_const_f32_to_i32(rtx operand)28334 rs6000_const_f32_to_i32 (rtx operand)
28335 {
28336   long value;
28337   const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (operand);
28338 
28339   gcc_assert (GET_MODE (operand) == SFmode);
28340   REAL_VALUE_TO_TARGET_SINGLE (*rv, value);
28341   return value;
28342 }
28343 
28344 void
rs6000_emit_xxspltidp_v2df(rtx dst,long value)28345 rs6000_emit_xxspltidp_v2df (rtx dst, long value)
28346 {
28347   if (((value & 0x7F800000) == 0) && ((value & 0x7FFFFF) != 0))
28348     inform (input_location,
28349 	    "the result for the xxspltidp instruction "
28350 	    "is undefined for subnormal input values");
28351   emit_insn( gen_xxspltidp_v2df_inst (dst, GEN_INT (value)));
28352 }
28353 
28354 /* Implement TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC.  */
28355 
28356 static bool
rs6000_gen_pic_addr_diff_vec(void)28357 rs6000_gen_pic_addr_diff_vec (void)
28358 {
28359   return rs6000_relative_jumptables;
28360 }
28361 
28362 void
rs6000_output_addr_vec_elt(FILE * file,int value)28363 rs6000_output_addr_vec_elt (FILE *file, int value)
28364 {
28365   const char *directive = TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t";
28366   char buf[100];
28367 
28368   fprintf (file, "%s", directive);
28369   ASM_GENERATE_INTERNAL_LABEL (buf, "L", value);
28370   assemble_name (file, buf);
28371   fprintf (file, "\n");
28372 }
28373 
28374 
28375 /* Copy an integer constant to the vector constant structure.  */
28376 
28377 static void
constant_int_to_128bit_vector(rtx op,machine_mode mode,size_t byte_num,vec_const_128bit_type * info)28378 constant_int_to_128bit_vector (rtx op,
28379 			       machine_mode mode,
28380 			       size_t byte_num,
28381 			       vec_const_128bit_type *info)
28382 {
28383   unsigned HOST_WIDE_INT uvalue = UINTVAL (op);
28384   unsigned bitsize = GET_MODE_BITSIZE (mode);
28385 
28386   for (int shift = bitsize - 8; shift >= 0; shift -= 8)
28387     info->bytes[byte_num++] = (uvalue >> shift) & 0xff;
28388 }
28389 
28390 /* Copy a floating point constant to the vector constant structure.  */
28391 
28392 static void
constant_fp_to_128bit_vector(rtx op,machine_mode mode,size_t byte_num,vec_const_128bit_type * info)28393 constant_fp_to_128bit_vector (rtx op,
28394 			      machine_mode mode,
28395 			      size_t byte_num,
28396 			      vec_const_128bit_type *info)
28397 {
28398   unsigned bitsize = GET_MODE_BITSIZE (mode);
28399   unsigned num_words = bitsize / 32;
28400   const REAL_VALUE_TYPE *rtype = CONST_DOUBLE_REAL_VALUE (op);
28401   long real_words[VECTOR_128BIT_WORDS];
28402 
28403   /* Make sure we don't overflow the real_words array and that it is
28404      filled completely.  */
28405   gcc_assert (num_words <= VECTOR_128BIT_WORDS && (bitsize % 32) == 0);
28406 
28407   real_to_target (real_words, rtype, mode);
28408 
28409   /* Iterate over each 32-bit word in the floating point constant.  The
28410      real_to_target function puts out words in target endian fashion.  We need
28411      to arrange the order so that the bytes are written in big endian order.  */
28412   for (unsigned num = 0; num < num_words; num++)
28413     {
28414       unsigned endian_num = (BYTES_BIG_ENDIAN
28415 			     ? num
28416 			     : num_words - 1 - num);
28417 
28418       unsigned uvalue = real_words[endian_num];
28419       for (int shift = 32 - 8; shift >= 0; shift -= 8)
28420 	info->bytes[byte_num++] = (uvalue >> shift) & 0xff;
28421     }
28422 
28423   /* Mark that this constant involves floating point.  */
28424   info->fp_constant_p = true;
28425 }
28426 
28427 /* Convert a vector constant OP with mode MODE to a vector 128-bit constant
28428    structure INFO.
28429 
28430    Break out the constant out to bytes, half words, words, and double words.
28431    Return true if we have successfully converted the constant.
28432 
28433    We handle CONST_INT, CONST_DOUBLE, CONST_VECTOR, and VEC_DUPLICATE of
28434    constants.  Integer and floating point scalar constants are splatted to fill
28435    out the vector.  */
28436 
28437 bool
vec_const_128bit_to_bytes(rtx op,machine_mode mode,vec_const_128bit_type * info)28438 vec_const_128bit_to_bytes (rtx op,
28439 			   machine_mode mode,
28440 			   vec_const_128bit_type *info)
28441 {
28442   /* Initialize the constant structure.  */
28443   memset ((void *)info, 0, sizeof (vec_const_128bit_type));
28444 
28445   /* Assume CONST_INTs are DImode.  */
28446   if (mode == VOIDmode)
28447     mode = CONST_INT_P (op) ? DImode : GET_MODE (op);
28448 
28449   if (mode == VOIDmode)
28450     return false;
28451 
28452   unsigned size = GET_MODE_SIZE (mode);
28453   bool splat_p = false;
28454 
28455   if (size > VECTOR_128BIT_BYTES)
28456     return false;
28457 
28458   /* Set up the bits.  */
28459   switch (GET_CODE (op))
28460     {
28461       /* Integer constants, default to double word.  */
28462     case CONST_INT:
28463       {
28464 	constant_int_to_128bit_vector (op, mode, 0, info);
28465 	splat_p = true;
28466 	break;
28467       }
28468 
28469       /* Floating point constants.  */
28470     case CONST_DOUBLE:
28471       {
28472 	/* Fail if the floating point constant is the wrong mode.  */
28473 	if (GET_MODE (op) != mode)
28474 	  return false;
28475 
28476 	/* SFmode stored as scalars are stored in DFmode format.  */
28477 	if (mode == SFmode)
28478 	  {
28479 	    mode = DFmode;
28480 	    size = GET_MODE_SIZE (DFmode);
28481 	  }
28482 
28483 	constant_fp_to_128bit_vector (op, mode, 0, info);
28484 	splat_p = true;
28485 	break;
28486       }
28487 
28488       /* Vector constants, iterate over each element.  On little endian
28489 	 systems, we have to reverse the element numbers.  */
28490     case CONST_VECTOR:
28491       {
28492 	/* Fail if the vector constant is the wrong mode or size.  */
28493 	if (GET_MODE (op) != mode
28494 	    || GET_MODE_SIZE (mode) != VECTOR_128BIT_BYTES)
28495 	  return false;
28496 
28497 	machine_mode ele_mode = GET_MODE_INNER (mode);
28498 	size_t ele_size = GET_MODE_SIZE (ele_mode);
28499 	size_t nunits = GET_MODE_NUNITS (mode);
28500 
28501 	for (size_t num = 0; num < nunits; num++)
28502 	  {
28503 	    rtx ele = CONST_VECTOR_ELT (op, num);
28504 	    size_t byte_num = (BYTES_BIG_ENDIAN
28505 			       ? num
28506 			       : nunits - 1 - num) * ele_size;
28507 
28508 	    if (CONST_INT_P (ele))
28509 	      constant_int_to_128bit_vector (ele, ele_mode, byte_num, info);
28510 	    else if (CONST_DOUBLE_P (ele))
28511 	      constant_fp_to_128bit_vector (ele, ele_mode, byte_num, info);
28512 	    else
28513 	      return false;
28514 	  }
28515 
28516 	break;
28517       }
28518 
28519 	/* Treat VEC_DUPLICATE of a constant just like a vector constant.
28520 	   Since we are duplicating the element, we don't have to worry about
28521 	   endian issues.  */
28522     case VEC_DUPLICATE:
28523       {
28524 	/* Fail if the vector duplicate is the wrong mode or size.  */
28525 	if (GET_MODE (op) != mode
28526 	    || GET_MODE_SIZE (mode) != VECTOR_128BIT_BYTES)
28527 	  return false;
28528 
28529 	machine_mode ele_mode = GET_MODE_INNER (mode);
28530 	size_t ele_size = GET_MODE_SIZE (ele_mode);
28531 	rtx ele = XEXP (op, 0);
28532 	size_t nunits = GET_MODE_NUNITS (mode);
28533 
28534 	if (!CONST_INT_P (ele) && !CONST_DOUBLE_P (ele))
28535 	  return false;
28536 
28537 	for (size_t num = 0; num < nunits; num++)
28538 	  {
28539 	    size_t byte_num = num * ele_size;
28540 
28541 	    if (CONST_INT_P (ele))
28542 	      constant_int_to_128bit_vector (ele, ele_mode, byte_num, info);
28543 	    else
28544 	      constant_fp_to_128bit_vector (ele, ele_mode, byte_num, info);
28545 	  }
28546 
28547 	break;
28548       }
28549 
28550       /* Any thing else, just return failure.  */
28551     default:
28552       return false;
28553     }
28554 
28555   /* Splat the constant to fill 128 bits if desired.  */
28556   if (splat_p && size < VECTOR_128BIT_BYTES)
28557     {
28558       if ((VECTOR_128BIT_BYTES % size) != 0)
28559 	return false;
28560 
28561       for (size_t offset = size;
28562 	   offset < VECTOR_128BIT_BYTES;
28563 	   offset += size)
28564 	memcpy ((void *) &info->bytes[offset],
28565 		(void *) &info->bytes[0],
28566 		size);
28567     }
28568 
28569   /* Remember original size.  */
28570   info->original_size = size;
28571 
28572   /* Determine if the bytes are all the same.  */
28573   unsigned char first_byte = info->bytes[0];
28574   info->all_bytes_same = true;
28575   for (size_t i = 1; i < VECTOR_128BIT_BYTES; i++)
28576     if (first_byte != info->bytes[i])
28577       {
28578 	info->all_bytes_same = false;
28579 	break;
28580       }
28581 
28582   /* Pack half words together & determine if all of the half words are the
28583      same.  */
28584   for (size_t i = 0; i < VECTOR_128BIT_HALF_WORDS; i++)
28585     info->half_words[i] = ((info->bytes[i * 2] << 8)
28586 			   | info->bytes[(i * 2) + 1]);
28587 
28588   unsigned short first_hword = info->half_words[0];
28589   info->all_half_words_same = true;
28590   for (size_t i = 1; i < VECTOR_128BIT_HALF_WORDS; i++)
28591     if (first_hword != info->half_words[i])
28592       {
28593 	info->all_half_words_same = false;
28594 	break;
28595       }
28596 
28597   /* Pack words together & determine if all of the words are the same.  */
28598   for (size_t i = 0; i < VECTOR_128BIT_WORDS; i++)
28599     info->words[i] = ((info->bytes[i * 4] << 24)
28600 		      | (info->bytes[(i * 4) + 1] << 16)
28601 		      | (info->bytes[(i * 4) + 2] << 8)
28602 		      | info->bytes[(i * 4) + 3]);
28603 
28604   info->all_words_same
28605     = (info->words[0] == info->words[1]
28606        && info->words[0] == info->words[1]
28607        && info->words[0] == info->words[2]
28608        && info->words[0] == info->words[3]);
28609 
28610   /* Pack double words together & determine if all of the double words are the
28611      same.  */
28612   for (size_t i = 0; i < VECTOR_128BIT_DOUBLE_WORDS; i++)
28613     {
28614       unsigned HOST_WIDE_INT d_word = 0;
28615       for (size_t j = 0; j < 8; j++)
28616 	d_word = (d_word << 8) | info->bytes[(i * 8) + j];
28617 
28618       info->double_words[i] = d_word;
28619     }
28620 
28621   info->all_double_words_same
28622     = (info->double_words[0] == info->double_words[1]);
28623 
28624   return true;
28625 }
28626 
28627 /* Determine if an IEEE 128-bit constant can be loaded with LXVKQ.  Return zero
28628    if the LXVKQ instruction cannot be used.  Otherwise return the immediate
28629    value to be used with the LXVKQ instruction.  */
28630 
28631 unsigned
constant_generates_lxvkq(vec_const_128bit_type * vsx_const)28632 constant_generates_lxvkq (vec_const_128bit_type *vsx_const)
28633 {
28634   /* Is the instruction supported with power10 code generation, IEEE 128-bit
28635      floating point hardware and VSX registers are available.  */
28636   if (!TARGET_IEEE128_CONSTANT || !TARGET_FLOAT128_HW || !TARGET_POWER10
28637       || !TARGET_VSX)
28638     return 0;
28639 
28640   /* All of the constants that are generated by LXVKQ have the bottom 3 words
28641      that are 0.  */
28642   if (vsx_const->words[1] != 0
28643       || vsx_const->words[2] != 0
28644       || vsx_const->words[3] != 0)
28645       return 0;
28646 
28647   /* See if we have a match for the first word.  */
28648   switch (vsx_const->words[0])
28649     {
28650     case 0x3FFF0000U: return 1;		/* IEEE 128-bit +1.0.  */
28651     case 0x40000000U: return 2;		/* IEEE 128-bit +2.0.  */
28652     case 0x40008000U: return 3;		/* IEEE 128-bit +3.0.  */
28653     case 0x40010000U: return 4;		/* IEEE 128-bit +4.0.  */
28654     case 0x40014000U: return 5;		/* IEEE 128-bit +5.0.  */
28655     case 0x40018000U: return 6;		/* IEEE 128-bit +6.0.  */
28656     case 0x4001C000U: return 7;		/* IEEE 128-bit +7.0.  */
28657     case 0x7FFF0000U: return 8;		/* IEEE 128-bit +Infinity.  */
28658     case 0x7FFF8000U: return 9;		/* IEEE 128-bit quiet NaN.  */
28659     case 0x80000000U: return 16;	/* IEEE 128-bit -0.0.  */
28660     case 0xBFFF0000U: return 17;	/* IEEE 128-bit -1.0.  */
28661     case 0xC0000000U: return 18;	/* IEEE 128-bit -2.0.  */
28662     case 0xC0008000U: return 19;	/* IEEE 128-bit -3.0.  */
28663     case 0xC0010000U: return 20;	/* IEEE 128-bit -4.0.  */
28664     case 0xC0014000U: return 21;	/* IEEE 128-bit -5.0.  */
28665     case 0xC0018000U: return 22;	/* IEEE 128-bit -6.0.  */
28666     case 0xC001C000U: return 23;	/* IEEE 128-bit -7.0.  */
28667     case 0xFFFF0000U: return 24;	/* IEEE 128-bit -Infinity.  */
28668 
28669       /* anything else cannot be loaded.  */
28670     default:
28671       break;
28672     }
28673 
28674   return 0;
28675 }
28676 
28677 /* Determine if a vector constant can be loaded with XXSPLTIW.  Return zero if
28678    the XXSPLTIW instruction cannot be used.  Otherwise return the immediate
28679    value to be used with the XXSPLTIW instruction.  */
28680 
28681 unsigned
constant_generates_xxspltiw(vec_const_128bit_type * vsx_const)28682 constant_generates_xxspltiw (vec_const_128bit_type *vsx_const)
28683 {
28684   if (!TARGET_SPLAT_WORD_CONSTANT || !TARGET_PREFIXED || !TARGET_VSX)
28685     return 0;
28686 
28687   if (!vsx_const->all_words_same)
28688     return 0;
28689 
28690   /* If we can use XXSPLTIB, don't generate XXSPLTIW.  */
28691   if (vsx_const->all_bytes_same)
28692     return 0;
28693 
28694   /* See if we can use VSPLTISH or VSPLTISW.  */
28695   if (vsx_const->all_half_words_same)
28696     {
28697       unsigned short h_word = vsx_const->half_words[0];
28698       short sign_h_word = ((h_word & 0xffff) ^ 0x8000) - 0x8000;
28699       if (EASY_VECTOR_15 (sign_h_word))
28700 	return 0;
28701     }
28702 
28703   unsigned int word = vsx_const->words[0];
28704   int sign_word = ((word & 0xffffffff) ^ 0x80000000) - 0x80000000;
28705   if (EASY_VECTOR_15 (sign_word))
28706     return 0;
28707 
28708   return vsx_const->words[0];
28709 }
28710 
28711 /* Determine if a vector constant can be loaded with XXSPLTIDP.  Return zero if
28712    the XXSPLTIDP instruction cannot be used.  Otherwise return the immediate
28713    value to be used with the XXSPLTIDP instruction.  */
28714 
28715 unsigned
constant_generates_xxspltidp(vec_const_128bit_type * vsx_const)28716 constant_generates_xxspltidp (vec_const_128bit_type *vsx_const)
28717 {
28718   if (!TARGET_SPLAT_FLOAT_CONSTANT || !TARGET_PREFIXED || !TARGET_VSX)
28719     return 0;
28720 
28721   /* Reject if the two 64-bit segments are not the same.  */
28722   if (!vsx_const->all_double_words_same)
28723     return 0;
28724 
28725   /* If the bytes, half words, or words are all the same, don't use XXSPLTIDP.
28726      Use a simpler instruction (XXSPLTIB, VSPLTISB, VSPLTISH, or VSPLTISW).  */
28727   if (vsx_const->all_bytes_same
28728       || vsx_const->all_half_words_same
28729       || vsx_const->all_words_same)
28730     return 0;
28731 
28732   unsigned HOST_WIDE_INT value = vsx_const->double_words[0];
28733 
28734   /* Avoid values that look like DFmode NaN's, except for the normal NaN bit
28735      pattern and the signalling NaN bit pattern.  Recognize infinity and
28736      negative infinity.  */
28737 
28738   /* Bit representation of DFmode normal quiet NaN.  */
28739 #define RS6000_CONST_DF_NAN	HOST_WIDE_INT_UC (0x7ff8000000000000)
28740 
28741   /* Bit representation of DFmode normal signaling NaN.  */
28742 #define RS6000_CONST_DF_NANS	HOST_WIDE_INT_UC (0x7ff4000000000000)
28743 
28744   /* Bit representation of DFmode positive infinity.  */
28745 #define RS6000_CONST_DF_INF	HOST_WIDE_INT_UC (0x7ff0000000000000)
28746 
28747   /* Bit representation of DFmode negative infinity.  */
28748 #define RS6000_CONST_DF_NEG_INF	HOST_WIDE_INT_UC (0xfff0000000000000)
28749 
28750   if (value != RS6000_CONST_DF_NAN
28751       && value != RS6000_CONST_DF_NANS
28752       && value != RS6000_CONST_DF_INF
28753       && value != RS6000_CONST_DF_NEG_INF)
28754     {
28755       /* The IEEE 754 64-bit floating format has 1 bit for sign, 11 bits for
28756 	 the exponent, and 52 bits for the mantissa (not counting the hidden
28757 	 bit used for normal numbers).  NaN values have the exponent set to all
28758 	 1 bits, and the mantissa non-zero (mantissa == 0 is infinity).  */
28759 
28760       int df_exponent = (value >> 52) & 0x7ff;
28761       unsigned HOST_WIDE_INT
28762 	df_mantissa = value & ((HOST_WIDE_INT_1U << 52) - HOST_WIDE_INT_1U);
28763 
28764       if (df_exponent == 0x7ff && df_mantissa != 0)	/* other NaNs.  */
28765 	return 0;
28766 
28767       /* Avoid values that are DFmode subnormal values.  Subnormal numbers have
28768 	 the exponent all 0 bits, and the mantissa non-zero.  If the value is
28769 	 subnormal, then the hidden bit in the mantissa is not set.  */
28770       if (df_exponent == 0 && df_mantissa != 0)		/* subnormal.  */
28771 	return 0;
28772     }
28773 
28774   /* Change the representation to DFmode constant.  */
28775   long df_words[2] = { vsx_const->words[0], vsx_const->words[1] };
28776 
28777   /* real_from_target takes the target words in target order.  */
28778   if (!BYTES_BIG_ENDIAN)
28779     std::swap (df_words[0], df_words[1]);
28780 
28781   REAL_VALUE_TYPE rv_type;
28782   real_from_target (&rv_type, df_words, DFmode);
28783 
28784   const REAL_VALUE_TYPE *rv = &rv_type;
28785 
28786   /* Validate that the number can be stored as a SFmode value.  */
28787   if (!exact_real_truncate (SFmode, rv))
28788     return 0;
28789 
28790   /* Validate that the number is not a SFmode subnormal value (exponent is 0,
28791      mantissa field is non-zero) which is undefined for the XXSPLTIDP
28792      instruction.  */
28793   long sf_value;
28794   real_to_target (&sf_value, rv, SFmode);
28795 
28796   /* IEEE 754 32-bit values have 1 bit for the sign, 8 bits for the exponent,
28797      and 23 bits for the mantissa.  Subnormal numbers have the exponent all
28798      0 bits, and the mantissa non-zero.  */
28799   long sf_exponent = (sf_value >> 23) & 0xFF;
28800   long sf_mantissa = sf_value & 0x7FFFFF;
28801 
28802   if (sf_exponent == 0 && sf_mantissa != 0)
28803     return 0;
28804 
28805   /* Return the immediate to be used.  */
28806   return sf_value;
28807 }
28808 
28809 /* Now we have only two opaque types, they are __vector_quad and
28810    __vector_pair built-in types.  They are target specific and
28811    only available when MMA is supported.  With MMA supported, it
28812    simply returns true, otherwise it checks if the given gimple
28813    STMT is an assignment, asm or call stmt and uses either of
28814    these two opaque types unexpectedly, if yes, it would raise
28815    an error message and returns true, otherwise it returns false.  */
28816 
28817 bool
rs6000_opaque_type_invalid_use_p(gimple * stmt)28818 rs6000_opaque_type_invalid_use_p (gimple *stmt)
28819 {
28820   if (TARGET_MMA)
28821     return false;
28822 
28823   /* If the given TYPE is one MMA opaque type, emit the corresponding
28824      error messages and return true, otherwise return false.  */
28825   auto check_and_error_invalid_use = [](tree type)
28826   {
28827     tree mv = TYPE_MAIN_VARIANT (type);
28828     if (mv == vector_quad_type_node)
28829       {
28830 	error ("type %<__vector_quad%> requires the %qs option", "-mmma");
28831 	return true;
28832       }
28833     else if (mv == vector_pair_type_node)
28834       {
28835 	error ("type %<__vector_pair%> requires the %qs option", "-mmma");
28836 	return true;
28837       }
28838     return false;
28839   };
28840 
28841   if (stmt)
28842     {
28843       /* The usage of MMA opaque types is very limited for now,
28844 	 to check with gassign, gasm and gcall is enough so far.  */
28845       if (gassign *ga = dyn_cast<gassign *> (stmt))
28846 	{
28847 	  tree lhs = gimple_assign_lhs (ga);
28848 	  tree type = TREE_TYPE (lhs);
28849 	  if (check_and_error_invalid_use (type))
28850 	    return true;
28851 	}
28852       else if (gasm *gs = dyn_cast<gasm *> (stmt))
28853 	{
28854 	  unsigned ninputs = gimple_asm_ninputs (gs);
28855 	  for (unsigned i = 0; i < ninputs; i++)
28856 	    {
28857 	      tree op = gimple_asm_input_op (gs, i);
28858 	      tree val = TREE_VALUE (op);
28859 	      tree type = TREE_TYPE (val);
28860 	      if (check_and_error_invalid_use (type))
28861 		return true;
28862 	    }
28863 	  unsigned noutputs = gimple_asm_noutputs (gs);
28864 	  for (unsigned i = 0; i < noutputs; i++)
28865 	    {
28866 	      tree op = gimple_asm_output_op (gs, i);
28867 	      tree val = TREE_VALUE (op);
28868 	      tree type = TREE_TYPE (val);
28869 	      if (check_and_error_invalid_use (type))
28870 		return true;
28871 	    }
28872 	}
28873       else if (gcall *gc = dyn_cast<gcall *> (stmt))
28874 	{
28875 	  unsigned nargs = gimple_call_num_args (gc);
28876 	  for (unsigned i = 0; i < nargs; i++)
28877 	    {
28878 	      tree arg = gimple_call_arg (gc, i);
28879 	      tree type = TREE_TYPE (arg);
28880 	      if (check_and_error_invalid_use (type))
28881 		return true;
28882 	    }
28883 	}
28884     }
28885 
28886   return false;
28887 }
28888 
28889 struct gcc_target targetm = TARGET_INITIALIZER;
28890 
28891 #include "gt-rs6000.h"
28892